summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/Makefile.ssl215
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.s1860
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.s.works533
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl84
-rw-r--r--src/lib/libcrypto/bn/asm/bn-alpha.pl571
-rw-r--r--src/lib/libcrypto/bn/asm/bn-win32.asm1441
-rw-r--r--src/lib/libcrypto/bn/asm/bn86unix.cpp752
-rw-r--r--src/lib/libcrypto/bn/asm/ca.pl33
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl286
-rw-r--r--src/lib/libcrypto/bn/asm/co-alpha.pl116
-rw-r--r--src/lib/libcrypto/bn/asm/mips1.s539
-rw-r--r--src/lib/libcrypto/bn/asm/mips3.s2138
-rw-r--r--src/lib/libcrypto/bn/asm/sparc.s462
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8.S1458
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8plus.S1535
-rw-r--r--src/lib/libcrypto/bn/asm/vms.mar6695
-rw-r--r--src/lib/libcrypto/bn/asm/x86.pl28
-rw-r--r--src/lib/libcrypto/bn/asm/x86w16.asm6
-rw-r--r--src/lib/libcrypto/bn/asm/x86w32.asm34
-rw-r--r--src/lib/libcrypto/bn/bn.err27
-rw-r--r--src/lib/libcrypto/bn/bn.h (renamed from src/lib/libcrypto/bn/bn.org)337
-rw-r--r--src/lib/libcrypto/bn/bn.mul19
-rw-r--r--src/lib/libcrypto/bn/bn_add.c194
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c802
-rw-r--r--src/lib/libcrypto/bn/bn_blind.c43
-rw-r--r--src/lib/libcrypto/bn/bn_comba.c345
-rw-r--r--src/lib/libcrypto/bn/bn_div.c180
-rw-r--r--src/lib/libcrypto/bn/bn_err.c123
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c210
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c195
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c53
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h107
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c492
-rw-r--r--src/lib/libcrypto/bn/bn_m.c169
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c97
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c441
-rw-r--r--src/lib/libcrypto/bn/bn_mpi.c11
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c753
-rw-r--r--src/lib/libcrypto/bn/bn_mulw.c366
-rw-r--r--src/lib/libcrypto/bn/bn_opts.c324
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c126
-rw-r--r--src/lib/libcrypto/bn/bn_prime.pl2
-rw-r--r--src/lib/libcrypto/bn/bn_print.c30
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c10
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c178
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c18
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c205
-rw-r--r--src/lib/libcrypto/bn/bn_sub.c180
-rw-r--r--src/lib/libcrypto/bn/bn_word.c30
-rw-r--r--src/lib/libcrypto/bn/bnspeed.c67
-rw-r--r--src/lib/libcrypto/bn/bntest.c621
-rw-r--r--src/lib/libcrypto/bn/comba.pl285
-rw-r--r--src/lib/libcrypto/bn/d.c72
-rw-r--r--src/lib/libcrypto/bn/exp.c60
-rw-r--r--src/lib/libcrypto/bn/expspeed.c55
-rw-r--r--src/lib/libcrypto/bn/exptest.c60
-rw-r--r--src/lib/libcrypto/bn/new23
-rw-r--r--src/lib/libcrypto/bn/test.c241
-rw-r--r--src/lib/libcrypto/bn/todo3
-rw-r--r--src/lib/libcrypto/bn/vms-helper.c66
60 files changed, 22708 insertions, 3698 deletions
diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl
index 9809d26cbc..fcabb62452 100644
--- a/src/lib/libcrypto/bn/Makefile.ssl
+++ b/src/lib/libcrypto/bn/Makefile.ssl
@@ -7,35 +7,35 @@ TOP= ../..
7CC= cc 7CC= cc
8INCLUDES= -I.. -I../../include 8INCLUDES= -I.. -I../../include
9CFLAG=-g 9CFLAG=-g
10INSTALL_PREFIX=
11OPENSSLDIR= /usr/local/ssl
10INSTALLTOP=/usr/local/ssl 12INSTALLTOP=/usr/local/ssl
11MAKE= make -f Makefile.ssl 13MAKE= make -f Makefile.ssl
12MAKEDEPEND= makedepend -f Makefile.ssl 14MAKEDEPEND= $(TOP)/util/domd $(TOP)
13MAKEFILE= Makefile.ssl 15MAKEFILE= Makefile.ssl
14AR= ar r 16AR= ar r
15 17
16BN_MULW= bn_mulw.o 18BN_ASM= bn_asm.o
17# or use 19# or use
18#BN_MULW= bn86-elf.o 20#BN_ASM= bn86-elf.o
19 21
20CFLAGS= $(INCLUDES) $(CFLAG) 22CFLAGS= $(INCLUDES) $(CFLAG)
23ASFLAGS=$(CFLAGS)
21 24
22ERR=bn
23ERRC=bn_err
24GENERAL=Makefile 25GENERAL=Makefile
25TEST=bntest.c exptest.c 26TEST=bntest.c exptest.c
26APPS= 27APPS=
27 28
28LIB=$(TOP)/libcrypto.a 29LIB=$(TOP)/libcrypto.a
29LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mod.c bn_mul.c \ 30LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mul.c \
30 bn_print.c bn_rand.c bn_shift.c bn_sub.c bn_word.c bn_blind.c \ 31 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
31 bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_mulw.c bn_recp.c bn_mont.c \ 32 bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c bn_recp.c bn_mont.c \
32 bn_mpi.c 33 bn_mpi.c bn_exp2.c
33
34LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mod.o bn_mul.o \
35 bn_print.o bn_rand.o bn_shift.o bn_sub.o bn_word.o bn_blind.o \
36 bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_MULW) bn_recp.o bn_mont.o \
37 bn_mpi.o
38 34
35LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mul.o \
36 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
37 bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) bn_recp.o bn_mont.o \
38 bn_mpi.o bn_exp2.o
39 39
40SRC= $(LIBSRC) 40SRC= $(LIBSRC)
41 41
@@ -58,53 +58,90 @@ knuth.fast: bn_knuth.c
58 58
59lib: $(LIBOBJ) 59lib: $(LIBOBJ)
60 $(AR) $(LIB) $(LIBOBJ) 60 $(AR) $(LIB) $(LIBOBJ)
61 sh $(TOP)/util/ranlib.sh $(LIB) 61 $(RANLIB) $(LIB)
62 @touch lib 62 @touch lib
63 63
64# elf 64# elf
65asm/bn86-elf.o: asm/bn86unix.cpp 65asm/bn86-elf.o: asm/bn86unix.cpp
66 $(CPP) -DELF asm/bn86unix.cpp | as -o asm/bn86-elf.o 66 $(CPP) -DELF asm/bn86unix.cpp | as -o asm/bn86-elf.o
67 67
68asm/co86-elf.o: asm/co86unix.cpp
69 $(CPP) -DELF asm/co86unix.cpp | as -o asm/co86-elf.o
70
68# solaris 71# solaris
69asm/bn86-sol.o: asm/bn86unix.cpp 72asm/bn86-sol.o: asm/bn86unix.cpp
70 $(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s 73 $(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s
71 as -o asm/bn86-sol.o asm/bn86-sol.s 74 as -o asm/bn86-sol.o asm/bn86-sol.s
72 rm -f asm/bn86-sol.s 75 rm -f asm/bn86-sol.s
73 76
77asm/co86-sol.o: asm/co86unix.cpp
78 $(CC) -E -DSOL asm/co86unix.cpp | sed 's/^#.*//' > asm/co86-sol.s
79 as -o asm/co86-sol.o asm/co86-sol.s
80 rm -f asm/co86-sol.s
81
74# a.out 82# a.out
75asm/bn86-out.o: asm/bn86unix.cpp 83asm/bn86-out.o: asm/bn86unix.cpp
76 $(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o 84 $(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o
77 85
86asm/co86-out.o: asm/co86unix.cpp
87 $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o
88
78# bsdi 89# bsdi
79asm/bn86bsdi.o: asm/bn86unix.cpp 90asm/bn86bsdi.o: asm/bn86unix.cpp
80 $(CPP) -DBSDI asm/bn86unix.cpp | as -o asm/bn86bsdi.o 91 $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o
92
93asm/co86bsdi.o: asm/co86unix.cpp
94 $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o
95
96asm/bn86unix.cpp: asm/bn-586.pl
97 (cd asm; $(PERL) bn-586.pl cpp >bn86unix.cpp )
98
99asm/co86unix.cpp: asm/co-586.pl
100 (cd asm; $(PERL) co-586.pl cpp >co86unix.cpp )
81 101
82asm/bn86unix.cpp: 102asm/sparcv8.o: asm/sparcv8.S
83 (cd asm; perl bn-586.pl cpp >bn86unix.cpp ) 103
104asm/sparcv8plus.o: asm/sparcv8plus.S
105
106# Old GNU assembler doesn't understand V9 instructions, so we
107# hire /usr/ccs/bin/as to do the job. Note that option is called
108# *-gcc27, but even gcc 2>=8 users may experience similar problem
109# if they didn't bother to upgrade GNU assembler. Such users should
110# not choose this option, but be adviced to *remove* GNU assembler
111# or upgrade it.
112asm/sparcv8plus-gcc27.o: asm/sparcv8plus.S
113 $(CC) $(ASFLAGS) -E asm/sparcv8plus.S | \
114 /usr/ccs/bin/as -xarch=v8plus - -o asm/sparcv8plus-gcc27.o
115
116# MIPS 64 bit assember
117asm/mips3.o: asm/mips3.s
118
119# MIPS 32 bit assember
120asm/mips1.o: asm/mips1.s
121 /usr/bin/as -O2 -o asm/mips1.o asm/mips1.s
84 122
85files: 123files:
86 perl $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO 124 $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
87 125
88links: 126links:
89 /bin/rm -f Makefile 127 @$(TOP)/util/point.sh Makefile.ssl Makefile
90 $(TOP)/util/point.sh Makefile.ssl Makefile ; 128 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
91 $(TOP)/util/mklink.sh ../../include $(EXHEADER) 129 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
92 $(TOP)/util/mklink.sh ../../test $(TEST) 130 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
93 $(TOP)/util/mklink.sh ../../apps $(APPS)
94 131
95install: 132install:
96 @for i in $(EXHEADER) ; \ 133 @for i in $(EXHEADER) ; \
97 do \ 134 do \
98 (cp $$i $(INSTALLTOP)/include/$$i; \ 135 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
99 chmod 644 $(INSTALLTOP)/include/$$i ); \ 136 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
100 done; 137 done;
101 138
102exptest: 139exptest:
103 /bin/rm -f exptest 140 rm -f exptest
104 gcc -I../../include -g2 -ggdb -o exptest exptest.c ../../libcrypto.a 141 gcc -I../../include -g2 -ggdb -o exptest exptest.c ../../libcrypto.a
105 142
106div: 143div:
107 /bin/rm -f a.out 144 rm -f a.out
108 gcc -I.. -g div.c ../../libcrypto.a 145 gcc -I.. -g div.c ../../libcrypto.a
109 146
110tags: 147tags:
@@ -116,18 +153,124 @@ lint:
116 lint -DLINT $(INCLUDES) $(SRC)>fluff 153 lint -DLINT $(INCLUDES) $(SRC)>fluff
117 154
118depend: 155depend:
119 $(MAKEDEPEND) $(INCLUDES) $(PROGS) $(LIBSRC) 156 $(MAKEDEPEND) $(INCLUDES) $(DEPFLAG) $(PROGS) $(LIBSRC)
120 157
121dclean: 158dclean:
122 perl -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new 159 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
123 mv -f Makefile.new $(MAKEFILE) 160 mv -f Makefile.new $(MAKEFILE)
124 161
125clean: 162clean:
126 /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_mulw.s 163 rm -f asm/co86unix.cpp asm/bn86unix.cpp *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s
127
128errors:
129 perl $(TOP)/util/err-ins.pl $(ERR).err $(ERR).org # special case .org
130 perl $(TOP)/util/err-ins.pl $(ERR).err $(ERR).h
131 perl ../err/err_genc.pl -s $(ERR).h $(ERRC).c
132 164
133# DO NOT DELETE THIS LINE -- make depend depends on it. 165# DO NOT DELETE THIS LINE -- make depend depends on it.
166
167bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
168bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
169bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
170bn_add.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
171bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
172bn_add.o: ../cryptlib.h bn_lcl.h
173bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
174bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
175bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
176bn_asm.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
177bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
178bn_asm.o: ../cryptlib.h bn_lcl.h
179bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
180bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
181bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
182bn_blind.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
183bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
184bn_blind.o: ../cryptlib.h bn_lcl.h
185bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
186bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
187bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
188bn_div.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
189bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
190bn_div.o: ../cryptlib.h bn_lcl.h
191bn_err.o: ../../include/openssl/bn.h ../../include/openssl/err.h
192bn_err.o: ../../include/openssl/opensslconf.h
193bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
194bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
195bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
196bn_exp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
197bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
198bn_exp.o: ../cryptlib.h bn_lcl.h
199bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
200bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
201bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
202bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
203bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
204bn_exp2.o: ../cryptlib.h bn_lcl.h
205bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
206bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
207bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
208bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
209bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
210bn_gcd.o: ../cryptlib.h bn_lcl.h
211bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
212bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
213bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
214bn_lib.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
215bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
216bn_lib.o: ../cryptlib.h bn_lcl.h
217bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
218bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
219bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
220bn_mont.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
221bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
222bn_mont.o: ../cryptlib.h bn_lcl.h
223bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
224bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
225bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
226bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
227bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
228bn_mpi.o: ../cryptlib.h bn_lcl.h
229bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
230bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
231bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
232bn_mul.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
233bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
234bn_mul.o: ../cryptlib.h bn_lcl.h
235bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
236bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
237bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
238bn_prime.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
239bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h
240bn_prime.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h bn_prime.h
241bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
242bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
243bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
244bn_print.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
245bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
246bn_print.o: ../cryptlib.h bn_lcl.h
247bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
248bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
249bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
250bn_rand.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
251bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h
252bn_rand.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h
253bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
254bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
255bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
256bn_recp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
257bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
258bn_recp.o: ../cryptlib.h bn_lcl.h
259bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
260bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
261bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
262bn_shift.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
263bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
264bn_shift.o: ../cryptlib.h bn_lcl.h
265bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
266bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
267bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
268bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
269bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
270bn_sqr.o: ../cryptlib.h bn_lcl.h
271bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
272bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
273bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
274bn_word.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h
275bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/stack.h
276bn_word.o: ../cryptlib.h bn_lcl.h
diff --git a/src/lib/libcrypto/bn/asm/alpha.s b/src/lib/libcrypto/bn/asm/alpha.s
index 1d17b1d619..a351694ca2 100644
--- a/src/lib/libcrypto/bn/asm/alpha.s
+++ b/src/lib/libcrypto/bn/asm/alpha.s
@@ -1,8 +1,14 @@
1 # DEC Alpha assember 1 # DEC Alpha assember
2 # The bn_div64 is actually gcc output but the other parts are hand done. 2 # The bn_div_words is actually gcc output but the other parts are hand done.
3 # Thanks to tzeruch@ceddec.com for sending me the gcc output for 3 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
4 # bn_div64. 4 # bn_div_words.
5 .file 1 "bn_mulw.c" 5 # I've gone back and re-done most of routines.
6 # The key thing to remeber for the 164 CPU is that while a
7 # multiply operation takes 8 cycles, another one can only be issued
8 # after 4 cycles have elapsed. I've done modification to help
9 # improve this. Also, normally, a ld instruction will not be available
10 # for about 3 cycles.
11 .file 1 "bn_asm.c"
6 .set noat 12 .set noat
7gcc2_compiled.: 13gcc2_compiled.:
8__gnu_compiled_c: 14__gnu_compiled_c:
@@ -14,65 +20,91 @@ bn_mul_add_words:
14bn_mul_add_words..ng: 20bn_mul_add_words..ng:
15 .frame $30,0,$26,0 21 .frame $30,0,$26,0
16 .prologue 0 22 .prologue 0
17 subq $18,2,$25 # num=-2
18 bis $31,$31,$0
19 blt $25,$42
20 .align 5 23 .align 5
21$142: 24 subq $18,4,$18
22 subq $18,2,$18 # num-=2 25 bis $31,$31,$0
23 subq $25,2,$25 # num-=2 26 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
24 27 ldq $20,0($17) # 1 1
25 ldq $1,0($17) # a[0] 28 ldq $1,0($16) # 1 1
26 ldq $2,8($17) # a[1] 29 .align 3
27 30$42:
28 mulq $19,$1,$3 # a[0]*w low part r3 31 mulq $20,$19,$5 # 1 2 1 ######
29 umulh $19,$1,$1 # a[0]*w high part r1 32 ldq $21,8($17) # 2 1
30 mulq $19,$2,$4 # a[1]*w low part r4 33 ldq $2,8($16) # 2 1
31 umulh $19,$2,$2 # a[1]*w high part r2 34 umulh $20,$19,$20 # 1 2 ######
32 35 ldq $27,16($17) # 3 1
33 ldq $22,0($16) # r[0] r22 36 ldq $3,16($16) # 3 1
34 ldq $23,8($16) # r[1] r23 37 mulq $21,$19,$6 # 2 2 1 ######
35 38 ldq $28,24($17) # 4 1
36 addq $3,$22,$3 # a0 low part + r[0] 39 addq $1,$5,$1 # 1 2 2
37 addq $4,$23,$4 # a1 low part + r[1] 40 ldq $4,24($16) # 4 1
38 cmpult $3,$22,$5 # overflow? 41 umulh $21,$19,$21 # 2 2 ######
39 cmpult $4,$23,$6 # overflow? 42 cmpult $1,$5,$22 # 1 2 3 1
40 addq $5,$1,$1 # high part + overflow 43 addq $20,$22,$20 # 1 3 1
41 addq $6,$2,$2 # high part + overflow 44 addq $1,$0,$1 # 1 2 3 1
42 45 mulq $27,$19,$7 # 3 2 1 ######
43 addq $3,$0,$3 # add c 46 cmpult $1,$0,$0 # 1 2 3 2
44 cmpult $3,$0,$5 # overflow? 47 addq $2,$6,$2 # 2 2 2
45 stq $3,0($16) 48 addq $20,$0,$0 # 1 3 2
46 addq $5,$1,$0 # c=high part + overflow 49 cmpult $2,$6,$23 # 2 2 3 1
47 50 addq $21,$23,$21 # 2 3 1
48 addq $4,$0,$4 # add c 51 umulh $27,$19,$27 # 3 2 ######
49 cmpult $4,$0,$5 # overflow? 52 addq $2,$0,$2 # 2 2 3 1
50 stq $4,8($16) 53 cmpult $2,$0,$0 # 2 2 3 2
51 addq $5,$2,$0 # c=high part + overflow 54 subq $18,4,$18
55 mulq $28,$19,$8 # 4 2 1 ######
56 addq $21,$0,$0 # 2 3 2
57 addq $3,$7,$3 # 3 2 2
58 addq $16,32,$16
59 cmpult $3,$7,$24 # 3 2 3 1
60 stq $1,-32($16) # 1 2 4
61 umulh $28,$19,$28 # 4 2 ######
62 addq $27,$24,$27 # 3 3 1
63 addq $3,$0,$3 # 3 2 3 1
64 stq $2,-24($16) # 2 2 4
65 cmpult $3,$0,$0 # 3 2 3 2
66 stq $3,-16($16) # 3 2 4
67 addq $4,$8,$4 # 4 2 2
68 addq $27,$0,$0 # 3 3 2
69 cmpult $4,$8,$25 # 4 2 3 1
70 addq $17,32,$17
71 addq $28,$25,$28 # 4 3 1
72 addq $4,$0,$4 # 4 2 3 1
73 cmpult $4,$0,$0 # 4 2 3 2
74 stq $4,-8($16) # 4 2 4
75 addq $28,$0,$0 # 4 3 2
76 blt $18,$43
52 77
53 ble $18,$43 78 ldq $20,0($17) # 1 1
79 ldq $1,0($16) # 1 1
54 80
55 addq $16,16,$16 81 br $42
56 addq $17,16,$17
57 blt $25,$42
58 82
59 br $31,$142 83 .align 4
60$42: 84$45:
61 ldq $1,0($17) # a[0] 85 ldq $20,0($17) # 4 1
62 umulh $19,$1,$3 # a[0]*w high part 86 ldq $1,0($16) # 4 1
63 mulq $19,$1,$1 # a[0]*w low part 87 mulq $20,$19,$5 # 4 2 1
64 ldq $2,0($16) # r[0] 88 subq $18,1,$18
65 addq $1,$2,$1 # low part + r[0] 89 addq $16,8,$16
66 cmpult $1,$2,$4 # overflow? 90 addq $17,8,$17
67 addq $4,$3,$3 # high part + overflow 91 umulh $20,$19,$20 # 4 2
68 addq $1,$0,$1 # add c 92 addq $1,$5,$1 # 4 2 2
69 cmpult $1,$0,$4 # overflow? 93 cmpult $1,$5,$22 # 4 2 3 1
70 addq $4,$3,$0 # c=high part + overflow 94 addq $20,$22,$20 # 4 3 1
71 stq $1,0($16) 95 addq $1,$0,$1 # 4 2 3 1
96 cmpult $1,$0,$0 # 4 2 3 2
97 addq $20,$0,$0 # 4 3 2
98 stq $1,-8($16) # 4 2 4
99 bgt $18,$45
100 ret $31,($26),1 # else exit
72 101
73 .align 4 102 .align 4
74$43: 103$43:
75 ret $31,($26),1 104 addq $18,4,$18
105 bgt $18,$45 # goto tail code
106 ret $31,($26),1 # else exit
107
76 .end bn_mul_add_words 108 .end bn_mul_add_words
77 .align 3 109 .align 3
78 .globl bn_mul_words 110 .globl bn_mul_words
@@ -81,49 +113,75 @@ bn_mul_words:
81bn_mul_words..ng: 113bn_mul_words..ng:
82 .frame $30,0,$26,0 114 .frame $30,0,$26,0
83 .prologue 0 115 .prologue 0
84 subq $18,2,$25 # num=-2
85 bis $31,$31,$0
86 blt $25,$242
87 .align 5 116 .align 5
88$342: 117 subq $18,4,$18
89 subq $18,2,$18 # num-=2 118 bis $31,$31,$0
90 subq $25,2,$25 # num-=2 119 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
91 120 ldq $20,0($17) # 1 1
92 ldq $1,0($17) # a[0] 121 .align 3
93 ldq $2,8($17) # a[1] 122$142:
94 123
95 mulq $19,$1,$3 # a[0]*w low part r3 124 mulq $20,$19,$5 # 1 2 1 #####
96 umulh $19,$1,$1 # a[0]*w high part r1 125 ldq $21,8($17) # 2 1
97 mulq $19,$2,$4 # a[1]*w low part r4 126 ldq $27,16($17) # 3 1
98 umulh $19,$2,$2 # a[1]*w high part r2 127 umulh $20,$19,$20 # 1 2 #####
99 128 ldq $28,24($17) # 4 1
100 addq $3,$0,$3 # add c 129 mulq $21,$19,$6 # 2 2 1 #####
101 cmpult $3,$0,$5 # overflow? 130 addq $5,$0,$5 # 1 2 3 1
102 stq $3,0($16) 131 subq $18,4,$18
103 addq $5,$1,$0 # c=high part + overflow 132 cmpult $5,$0,$0 # 1 2 3 2
104 133 umulh $21,$19,$21 # 2 2 #####
105 addq $4,$0,$4 # add c 134 addq $20,$0,$0 # 1 3 2
106 cmpult $4,$0,$5 # overflow? 135 addq $17,32,$17
107 stq $4,8($16) 136 addq $6,$0,$6 # 2 2 3 1
108 addq $5,$2,$0 # c=high part + overflow 137 mulq $27,$19,$7 # 3 2 1 #####
109 138 cmpult $6,$0,$0 # 2 2 3 2
110 ble $18,$243 139 addq $21,$0,$0 # 2 3 2
111 140 addq $16,32,$16
112 addq $16,16,$16 141 umulh $27,$19,$27 # 3 2 #####
113 addq $17,16,$17 142 stq $5,-32($16) # 1 2 4
114 blt $25,$242 143 mulq $28,$19,$8 # 4 2 1 #####
115 144 addq $7,$0,$7 # 3 2 3 1
116 br $31,$342 145 stq $6,-24($16) # 2 2 4
117$242: 146 cmpult $7,$0,$0 # 3 2 3 2
118 ldq $1,0($17) # a[0] 147 umulh $28,$19,$28 # 4 2 #####
119 umulh $19,$1,$3 # a[0]*w high part 148 addq $27,$0,$0 # 3 3 2
120 mulq $19,$1,$1 # a[0]*w low part 149 stq $7,-16($16) # 3 2 4
121 addq $1,$0,$1 # add c 150 addq $8,$0,$8 # 4 2 3 1
122 cmpult $1,$0,$4 # overflow? 151 cmpult $8,$0,$0 # 4 2 3 2
123 addq $4,$3,$0 # c=high part + overflow 152
124 stq $1,0($16) 153 addq $28,$0,$0 # 4 3 2
125$243: 154
126 ret $31,($26),1 155 stq $8,-8($16) # 4 2 4
156
157 blt $18,$143
158
159 ldq $20,0($17) # 1 1
160
161 br $142
162
163 .align 4
164$145:
165 ldq $20,0($17) # 4 1
166 mulq $20,$19,$5 # 4 2 1
167 subq $18,1,$18
168 umulh $20,$19,$20 # 4 2
169 addq $5,$0,$5 # 4 2 3 1
170 addq $16,8,$16
171 cmpult $5,$0,$0 # 4 2 3 2
172 addq $17,8,$17
173 addq $20,$0,$0 # 4 3 2
174 stq $5,-8($16) # 4 2 4
175
176 bgt $18,$145
177 ret $31,($26),1 # else exit
178
179 .align 4
180$143:
181 addq $18,4,$18
182 bgt $18,$145 # goto tail code
183 ret $31,($26),1 # else exit
184
127 .end bn_mul_words 185 .end bn_mul_words
128 .align 3 186 .align 3
129 .globl bn_sqr_words 187 .globl bn_sqr_words
@@ -132,44 +190,58 @@ bn_sqr_words:
132bn_sqr_words..ng: 190bn_sqr_words..ng:
133 .frame $30,0,$26,0 191 .frame $30,0,$26,0
134 .prologue 0 192 .prologue 0
135
136 subq $18,2,$25 # num=-2
137 blt $25,$442
138 .align 5
139$542:
140 subq $18,2,$18 # num-=2
141 subq $25,2,$25 # num-=2
142
143 ldq $1,0($17) # a[0]
144 ldq $4,8($17) # a[1]
145 193
146 mulq $1,$1,$2 # a[0]*w low part r2 194 subq $18,4,$18
147 umulh $1,$1,$3 # a[0]*w high part r3 195 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
148 mulq $4,$4,$5 # a[1]*w low part r5 196 ldq $20,0($17) # 1 1
149 umulh $4,$4,$6 # a[1]*w high part r6 197 .align 3
150 198$542:
151 stq $2,0($16) # r[0] 199 mulq $20,$20,$5 ######
152 stq $3,8($16) # r[1] 200 ldq $21,8($17) # 1 1
153 stq $5,16($16) # r[3] 201 subq $18,4
154 stq $6,24($16) # r[4] 202 umulh $20,$20,$1 ######
203 ldq $27,16($17) # 1 1
204 mulq $21,$21,$6 ######
205 ldq $28,24($17) # 1 1
206 stq $5,0($16) # r[0]
207 umulh $21,$21,$2 ######
208 stq $1,8($16) # r[1]
209 mulq $27,$27,$7 ######
210 stq $6,16($16) # r[0]
211 umulh $27,$27,$3 ######
212 stq $2,24($16) # r[1]
213 mulq $28,$28,$8 ######
214 stq $7,32($16) # r[0]
215 umulh $28,$28,$4 ######
216 stq $3,40($16) # r[1]
155 217
156 ble $18,$443 218 addq $16,64,$16
219 addq $17,32,$17
220 stq $8,-16($16) # r[0]
221 stq $4,-8($16) # r[1]
157 222
158 addq $16,32,$16 223 blt $18,$543
159 addq $17,16,$17 224 ldq $20,0($17) # 1 1
160 blt $25,$442 225 br $542
161 br $31,$542
162 226
163$442: 227$442:
164 ldq $1,0($17) # a[0] 228 ldq $20,0($17) # a[0]
165 mulq $1,$1,$2 # a[0]*w low part r2 229 mulq $20,$20,$5 # a[0]*w low part r2
166 umulh $1,$1,$3 # a[0]*w high part r3 230 addq $16,16,$16
167 stq $2,0($16) # r[0] 231 addq $17,8,$17
168 stq $3,8($16) # r[1] 232 subq $18,1,$18
233 umulh $20,$20,$1 # a[0]*w high part r3
234 stq $5,-16($16) # r[0]
235 stq $1,-8($16) # r[1]
236
237 bgt $18,$442
238 ret $31,($26),1 # else exit
169 239
170 .align 4 240 .align 4
171$443: 241$543:
172 ret $31,($26),1 242 addq $18,4,$18
243 bgt $18,$442 # goto tail code
244 ret $31,($26),1 # else exit
173 .end bn_sqr_words 245 .end bn_sqr_words
174 246
175 .align 3 247 .align 3
@@ -180,31 +252,74 @@ bn_add_words..ng:
180 .frame $30,0,$26,0 252 .frame $30,0,$26,0
181 .prologue 0 253 .prologue 0
182 254
183 bis $31,$31,$8 # carry = 0 255 subq $19,4,$19
184 ble $19,$900 256 bis $31,$31,$0 # carry = 0
257 blt $19,$900
258 ldq $5,0($17) # a[0]
259 ldq $1,0($18) # b[1]
260 .align 3
185$901: 261$901:
186 ldq $0,0($17) # a[0] 262 addq $1,$5,$1 # r=a+b;
187 ldq $1,0($18) # a[1] 263 ldq $6,8($17) # a[1]
264 cmpult $1,$5,$22 # did we overflow?
265 ldq $2,8($18) # b[1]
266 addq $1,$0,$1 # c+= overflow
267 ldq $7,16($17) # a[2]
268 cmpult $1,$0,$0 # overflow?
269 ldq $3,16($18) # b[2]
270 addq $0,$22,$0
271 ldq $8,24($17) # a[3]
272 addq $2,$6,$2 # r=a+b;
273 ldq $4,24($18) # b[3]
274 cmpult $2,$6,$23 # did we overflow?
275 addq $3,$7,$3 # r=a+b;
276 addq $2,$0,$2 # c+= overflow
277 cmpult $3,$7,$24 # did we overflow?
278 cmpult $2,$0,$0 # overflow?
279 addq $4,$8,$4 # r=a+b;
280 addq $0,$23,$0
281 cmpult $4,$8,$25 # did we overflow?
282 addq $3,$0,$3 # c+= overflow
283 stq $1,0($16) # r[0]=c
284 cmpult $3,$0,$0 # overflow?
285 stq $2,8($16) # r[1]=c
286 addq $0,$24,$0
287 stq $3,16($16) # r[2]=c
288 addq $4,$0,$4 # c+= overflow
289 subq $19,4,$19 # loop--
290 cmpult $4,$0,$0 # overflow?
291 addq $17,32,$17 # a++
292 addq $0,$25,$0
293 stq $4,24($16) # r[3]=c
294 addq $18,32,$18 # b++
295 addq $16,32,$16 # r++
188 296
189 addq $0,$1,$3 # c=a+b; 297 blt $19,$900
298 ldq $5,0($17) # a[0]
299 ldq $1,0($18) # b[1]
300 br $901
301 .align 4
302$945:
303 ldq $5,0($17) # a[0]
304 ldq $1,0($18) # b[1]
305 addq $1,$5,$1 # r=a+b;
306 subq $19,1,$19 # loop--
307 addq $1,$0,$1 # c+= overflow
190 addq $17,8,$17 # a++ 308 addq $17,8,$17 # a++
309 cmpult $1,$5,$22 # did we overflow?
310 cmpult $1,$0,$0 # overflow?
311 addq $18,8,$18 # b++
312 stq $1,0($16) # r[0]=c
313 addq $0,$22,$0
314 addq $16,8,$16 # r++
191 315
192 cmpult $3,$1,$7 # did we overflow? 316 bgt $19,$945
193 addq $18,8,$18 # b++ 317 ret $31,($26),1 # else exit
194
195 addq $8,$3,$3 # c+=carry
196 318
197 cmpult $3,$8,$8 # did we overflow?
198 stq $3,($16) # r[0]=c
199
200 addq $7,$8,$8 # add into overflow
201 subq $19,1,$19 # loop--
202
203 addq $16,8,$16 # r++
204 bgt $19,$901
205$900: 319$900:
206 bis $8,$8,$0 # return carry 320 addq $19,4,$19
207 ret $31,($26),1 321 bgt $19,$945 # goto tail code
322 ret $31,($26),1 # else exit
208 .end bn_add_words 323 .end bn_add_words
209 324
210 # 325 #
@@ -213,11 +328,11 @@ $900:
213 # 328 #
214.text 329.text
215 .align 3 330 .align 3
216 .globl bn_div64 331 .globl bn_div_words
217 .ent bn_div64 332 .ent bn_div_words
218bn_div64: 333bn_div_words:
219 ldgp $29,0($27) 334 ldgp $29,0($27)
220bn_div64..ng: 335bn_div_words..ng:
221 lda $30,-48($30) 336 lda $30,-48($30)
222 .frame $30,48,$26,0 337 .frame $30,48,$26,0
223 stq $26,0($30) 338 stq $26,0($30)
@@ -338,7 +453,1446 @@ $136:
338 ldq $13,40($30) 453 ldq $13,40($30)
339 addq $30,48,$30 454 addq $30,48,$30
340 ret $31,($26),1 455 ret $31,($26),1
341 .end bn_div64 456 .end bn_div_words
342 .ident "GCC: (GNU) 2.7.2.1" 457
458 .set noat
459 .text
460 .align 3
461 .globl bn_sub_words
462 .ent bn_sub_words
463bn_sub_words:
464bn_sub_words..ng:
465 .frame $30,0,$26,0
466 .prologue 0
467
468 subq $19, 4, $19
469 bis $31, $31, $0
470 blt $19, $100
471 ldq $1, 0($17)
472 ldq $2, 0($18)
473$101:
474 ldq $3, 8($17)
475 cmpult $1, $2, $4
476 ldq $5, 8($18)
477 subq $1, $2, $1
478 ldq $6, 16($17)
479 cmpult $1, $0, $2
480 ldq $7, 16($18)
481 subq $1, $0, $23
482 ldq $8, 24($17)
483 addq $2, $4, $0
484 cmpult $3, $5, $24
485 subq $3, $5, $3
486 ldq $22, 24($18)
487 cmpult $3, $0, $5
488 subq $3, $0, $25
489 addq $5, $24, $0
490 cmpult $6, $7, $27
491 subq $6, $7, $6
492 stq $23, 0($16)
493 cmpult $6, $0, $7
494 subq $6, $0, $28
495 addq $7, $27, $0
496 cmpult $8, $22, $21
497 subq $8, $22, $8
498 stq $25, 8($16)
499 cmpult $8, $0, $22
500 subq $8, $0, $20
501 addq $22, $21, $0
502 stq $28, 16($16)
503 subq $19, 4, $19
504 stq $20, 24($16)
505 addq $17, 32, $17
506 addq $18, 32, $18
507 addq $16, 32, $16
508 blt $19, $100
509 ldq $1, 0($17)
510 ldq $2, 0($18)
511 br $101
512$102:
513 ldq $1, 0($17)
514 ldq $2, 0($18)
515 cmpult $1, $2, $27
516 subq $1, $2, $1
517 cmpult $1, $0, $2
518 subq $1, $0, $1
519 stq $1, 0($16)
520 addq $2, $27, $0
521 addq $17, 8, $17
522 addq $18, 8, $18
523 addq $16, 8, $16
524 subq $19, 1, $19
525 bgt $19, $102
526 ret $31,($26),1
527$100:
528 addq $19, 4, $19
529 bgt $19, $102
530$103:
531 ret $31,($26),1
532 .end bn_sub_words
533 .text
534 .align 3
535 .globl bn_mul_comba4
536 .ent bn_mul_comba4
537bn_mul_comba4:
538bn_mul_comba4..ng:
539 .frame $30,0,$26,0
540 .prologue 0
343 541
542 ldq $0, 0($17)
543 ldq $1, 0($18)
544 ldq $2, 8($17)
545 ldq $3, 8($18)
546 ldq $4, 16($17)
547 ldq $5, 16($18)
548 ldq $6, 24($17)
549 ldq $7, 24($18)
550 bis $31, $31, $23
551 mulq $0, $1, $8
552 umulh $0, $1, $22
553 stq $8, 0($16)
554 bis $31, $31, $8
555 mulq $0, $3, $24
556 umulh $0, $3, $25
557 addq $22, $24, $22
558 cmpult $22, $24, $27
559 addq $27, $25, $25
560 addq $23, $25, $23
561 cmpult $23, $25, $28
562 addq $8, $28, $8
563 mulq $2, $1, $21
564 umulh $2, $1, $20
565 addq $22, $21, $22
566 cmpult $22, $21, $19
567 addq $19, $20, $20
568 addq $23, $20, $23
569 cmpult $23, $20, $17
570 addq $8, $17, $8
571 stq $22, 8($16)
572 bis $31, $31, $22
573 mulq $2, $3, $18
574 umulh $2, $3, $24
575 addq $23, $18, $23
576 cmpult $23, $18, $27
577 addq $27, $24, $24
578 addq $8, $24, $8
579 cmpult $8, $24, $25
580 addq $22, $25, $22
581 mulq $0, $5, $28
582 umulh $0, $5, $21
583 addq $23, $28, $23
584 cmpult $23, $28, $19
585 addq $19, $21, $21
586 addq $8, $21, $8
587 cmpult $8, $21, $20
588 addq $22, $20, $22
589 mulq $4, $1, $17
590 umulh $4, $1, $18
591 addq $23, $17, $23
592 cmpult $23, $17, $27
593 addq $27, $18, $18
594 addq $8, $18, $8
595 cmpult $8, $18, $24
596 addq $22, $24, $22
597 stq $23, 16($16)
598 bis $31, $31, $23
599 mulq $0, $7, $25
600 umulh $0, $7, $28
601 addq $8, $25, $8
602 cmpult $8, $25, $19
603 addq $19, $28, $28
604 addq $22, $28, $22
605 cmpult $22, $28, $21
606 addq $23, $21, $23
607 mulq $2, $5, $20
608 umulh $2, $5, $17
609 addq $8, $20, $8
610 cmpult $8, $20, $27
611 addq $27, $17, $17
612 addq $22, $17, $22
613 cmpult $22, $17, $18
614 addq $23, $18, $23
615 mulq $4, $3, $24
616 umulh $4, $3, $25
617 addq $8, $24, $8
618 cmpult $8, $24, $19
619 addq $19, $25, $25
620 addq $22, $25, $22
621 cmpult $22, $25, $28
622 addq $23, $28, $23
623 mulq $6, $1, $21
624 umulh $6, $1, $0
625 addq $8, $21, $8
626 cmpult $8, $21, $20
627 addq $20, $0, $0
628 addq $22, $0, $22
629 cmpult $22, $0, $27
630 addq $23, $27, $23
631 stq $8, 24($16)
632 bis $31, $31, $8
633 mulq $2, $7, $17
634 umulh $2, $7, $18
635 addq $22, $17, $22
636 cmpult $22, $17, $24
637 addq $24, $18, $18
638 addq $23, $18, $23
639 cmpult $23, $18, $19
640 addq $8, $19, $8
641 mulq $4, $5, $25
642 umulh $4, $5, $28
643 addq $22, $25, $22
644 cmpult $22, $25, $21
645 addq $21, $28, $28
646 addq $23, $28, $23
647 cmpult $23, $28, $20
648 addq $8, $20, $8
649 mulq $6, $3, $0
650 umulh $6, $3, $27
651 addq $22, $0, $22
652 cmpult $22, $0, $1
653 addq $1, $27, $27
654 addq $23, $27, $23
655 cmpult $23, $27, $17
656 addq $8, $17, $8
657 stq $22, 32($16)
658 bis $31, $31, $22
659 mulq $4, $7, $24
660 umulh $4, $7, $18
661 addq $23, $24, $23
662 cmpult $23, $24, $19
663 addq $19, $18, $18
664 addq $8, $18, $8
665 cmpult $8, $18, $2
666 addq $22, $2, $22
667 mulq $6, $5, $25
668 umulh $6, $5, $21
669 addq $23, $25, $23
670 cmpult $23, $25, $28
671 addq $28, $21, $21
672 addq $8, $21, $8
673 cmpult $8, $21, $20
674 addq $22, $20, $22
675 stq $23, 40($16)
676 bis $31, $31, $23
677 mulq $6, $7, $0
678 umulh $6, $7, $1
679 addq $8, $0, $8
680 cmpult $8, $0, $27
681 addq $27, $1, $1
682 addq $22, $1, $22
683 cmpult $22, $1, $17
684 addq $23, $17, $23
685 stq $8, 48($16)
686 stq $22, 56($16)
687 ret $31,($26),1
688 .end bn_mul_comba4
689 .text
690 .align 3
691 .globl bn_mul_comba8
692 .ent bn_mul_comba8
693bn_mul_comba8:
694bn_mul_comba8..ng:
695 .frame $30,0,$26,0
696 .prologue 0
697
698 subq $30, 16, $30
699 ldq $0, 0($17)
700 ldq $1, 0($18)
701 stq $9, 0($30)
702 stq $10, 8($30)
703 ldq $2, 8($17)
704 ldq $3, 8($18)
705 ldq $4, 16($17)
706 ldq $5, 16($18)
707 ldq $6, 24($17)
708 ldq $7, 24($18)
709 ldq $8, 8($17)
710 ldq $22, 8($18)
711 ldq $23, 8($17)
712 ldq $24, 8($18)
713 ldq $25, 8($17)
714 ldq $27, 8($18)
715 ldq $28, 8($17)
716 ldq $21, 8($18)
717 bis $31, $31, $9
718 mulq $0, $1, $20
719 umulh $0, $1, $19
720 stq $20, 0($16)
721 bis $31, $31, $20
722 mulq $0, $3, $10
723 umulh $0, $3, $17
724 addq $19, $10, $19
725 cmpult $19, $10, $18
726 addq $18, $17, $17
727 addq $9, $17, $9
728 cmpult $9, $17, $10
729 addq $20, $10, $20
730 mulq $2, $1, $18
731 umulh $2, $1, $17
732 addq $19, $18, $19
733 cmpult $19, $18, $10
734 addq $10, $17, $17
735 addq $9, $17, $9
736 cmpult $9, $17, $18
737 addq $20, $18, $20
738 stq $19, 8($16)
739 bis $31, $31, $19
740 mulq $0, $5, $10
741 umulh $0, $5, $17
742 addq $9, $10, $9
743 cmpult $9, $10, $18
744 addq $18, $17, $17
745 addq $20, $17, $20
746 cmpult $20, $17, $10
747 addq $19, $10, $19
748 mulq $2, $3, $18
749 umulh $2, $3, $17
750 addq $9, $18, $9
751 cmpult $9, $18, $10
752 addq $10, $17, $17
753 addq $20, $17, $20
754 cmpult $20, $17, $18
755 addq $19, $18, $19
756 mulq $4, $1, $10
757 umulh $4, $1, $17
758 addq $9, $10, $9
759 cmpult $9, $10, $18
760 addq $18, $17, $17
761 addq $20, $17, $20
762 cmpult $20, $17, $10
763 addq $19, $10, $19
764 stq $9, 16($16)
765 bis $31, $31, $9
766 mulq $0, $7, $18
767 umulh $0, $7, $17
768 addq $20, $18, $20
769 cmpult $20, $18, $10
770 addq $10, $17, $17
771 addq $19, $17, $19
772 cmpult $19, $17, $18
773 addq $9, $18, $9
774 mulq $2, $5, $10
775 umulh $2, $5, $17
776 addq $20, $10, $20
777 cmpult $20, $10, $18
778 addq $18, $17, $17
779 addq $19, $17, $19
780 cmpult $19, $17, $10
781 addq $9, $10, $9
782 mulq $4, $3, $18
783 umulh $4, $3, $17
784 addq $20, $18, $20
785 cmpult $20, $18, $10
786 addq $10, $17, $17
787 addq $19, $17, $19
788 cmpult $19, $17, $18
789 addq $9, $18, $9
790 mulq $6, $1, $10
791 umulh $6, $1, $17
792 addq $20, $10, $20
793 cmpult $20, $10, $18
794 addq $18, $17, $17
795 addq $19, $17, $19
796 cmpult $19, $17, $10
797 addq $9, $10, $9
798 stq $20, 24($16)
799 bis $31, $31, $20
800 mulq $0, $22, $18
801 umulh $0, $22, $17
802 addq $19, $18, $19
803 cmpult $19, $18, $10
804 addq $10, $17, $17
805 addq $9, $17, $9
806 cmpult $9, $17, $18
807 addq $20, $18, $20
808 mulq $2, $7, $10
809 umulh $2, $7, $17
810 addq $19, $10, $19
811 cmpult $19, $10, $18
812 addq $18, $17, $17
813 addq $9, $17, $9
814 cmpult $9, $17, $10
815 addq $20, $10, $20
816 mulq $4, $5, $18
817 umulh $4, $5, $17
818 addq $19, $18, $19
819 cmpult $19, $18, $10
820 addq $10, $17, $17
821 addq $9, $17, $9
822 cmpult $9, $17, $18
823 addq $20, $18, $20
824 mulq $6, $3, $10
825 umulh $6, $3, $17
826 addq $19, $10, $19
827 cmpult $19, $10, $18
828 addq $18, $17, $17
829 addq $9, $17, $9
830 cmpult $9, $17, $10
831 addq $20, $10, $20
832 mulq $8, $1, $18
833 umulh $8, $1, $17
834 addq $19, $18, $19
835 cmpult $19, $18, $10
836 addq $10, $17, $17
837 addq $9, $17, $9
838 cmpult $9, $17, $18
839 addq $20, $18, $20
840 stq $19, 32($16)
841 bis $31, $31, $19
842 mulq $0, $24, $10
843 umulh $0, $24, $17
844 addq $9, $10, $9
845 cmpult $9, $10, $18
846 addq $18, $17, $17
847 addq $20, $17, $20
848 cmpult $20, $17, $10
849 addq $19, $10, $19
850 mulq $2, $22, $18
851 umulh $2, $22, $17
852 addq $9, $18, $9
853 cmpult $9, $18, $10
854 addq $10, $17, $17
855 addq $20, $17, $20
856 cmpult $20, $17, $18
857 addq $19, $18, $19
858 mulq $4, $7, $10
859 umulh $4, $7, $17
860 addq $9, $10, $9
861 cmpult $9, $10, $18
862 addq $18, $17, $17
863 addq $20, $17, $20
864 cmpult $20, $17, $10
865 addq $19, $10, $19
866 mulq $6, $5, $18
867 umulh $6, $5, $17
868 addq $9, $18, $9
869 cmpult $9, $18, $10
870 addq $10, $17, $17
871 addq $20, $17, $20
872 cmpult $20, $17, $18
873 addq $19, $18, $19
874 mulq $8, $3, $10
875 umulh $8, $3, $17
876 addq $9, $10, $9
877 cmpult $9, $10, $18
878 addq $18, $17, $17
879 addq $20, $17, $20
880 cmpult $20, $17, $10
881 addq $19, $10, $19
882 mulq $23, $1, $18
883 umulh $23, $1, $17
884 addq $9, $18, $9
885 cmpult $9, $18, $10
886 addq $10, $17, $17
887 addq $20, $17, $20
888 cmpult $20, $17, $18
889 addq $19, $18, $19
890 stq $9, 40($16)
891 bis $31, $31, $9
892 mulq $0, $27, $10
893 umulh $0, $27, $17
894 addq $20, $10, $20
895 cmpult $20, $10, $18
896 addq $18, $17, $17
897 addq $19, $17, $19
898 cmpult $19, $17, $10
899 addq $9, $10, $9
900 mulq $2, $24, $18
901 umulh $2, $24, $17
902 addq $20, $18, $20
903 cmpult $20, $18, $10
904 addq $10, $17, $17
905 addq $19, $17, $19
906 cmpult $19, $17, $18
907 addq $9, $18, $9
908 mulq $4, $22, $10
909 umulh $4, $22, $17
910 addq $20, $10, $20
911 cmpult $20, $10, $18
912 addq $18, $17, $17
913 addq $19, $17, $19
914 cmpult $19, $17, $10
915 addq $9, $10, $9
916 mulq $6, $7, $18
917 umulh $6, $7, $17
918 addq $20, $18, $20
919 cmpult $20, $18, $10
920 addq $10, $17, $17
921 addq $19, $17, $19
922 cmpult $19, $17, $18
923 addq $9, $18, $9
924 mulq $8, $5, $10
925 umulh $8, $5, $17
926 addq $20, $10, $20
927 cmpult $20, $10, $18
928 addq $18, $17, $17
929 addq $19, $17, $19
930 cmpult $19, $17, $10
931 addq $9, $10, $9
932 mulq $23, $3, $18
933 umulh $23, $3, $17
934 addq $20, $18, $20
935 cmpult $20, $18, $10
936 addq $10, $17, $17
937 addq $19, $17, $19
938 cmpult $19, $17, $18
939 addq $9, $18, $9
940 mulq $25, $1, $10
941 umulh $25, $1, $17
942 addq $20, $10, $20
943 cmpult $20, $10, $18
944 addq $18, $17, $17
945 addq $19, $17, $19
946 cmpult $19, $17, $10
947 addq $9, $10, $9
948 stq $20, 48($16)
949 bis $31, $31, $20
950 mulq $0, $21, $18
951 umulh $0, $21, $17
952 addq $19, $18, $19
953 cmpult $19, $18, $10
954 addq $10, $17, $17
955 addq $9, $17, $9
956 cmpult $9, $17, $18
957 addq $20, $18, $20
958 mulq $2, $27, $10
959 umulh $2, $27, $17
960 addq $19, $10, $19
961 cmpult $19, $10, $18
962 addq $18, $17, $17
963 addq $9, $17, $9
964 cmpult $9, $17, $0
965 addq $20, $0, $20
966 mulq $4, $24, $10
967 umulh $4, $24, $18
968 addq $19, $10, $19
969 cmpult $19, $10, $17
970 addq $17, $18, $18
971 addq $9, $18, $9
972 cmpult $9, $18, $0
973 addq $20, $0, $20
974 mulq $6, $22, $10
975 umulh $6, $22, $17
976 addq $19, $10, $19
977 cmpult $19, $10, $18
978 addq $18, $17, $17
979 addq $9, $17, $9
980 cmpult $9, $17, $0
981 addq $20, $0, $20
982 mulq $8, $7, $10
983 umulh $8, $7, $18
984 addq $19, $10, $19
985 cmpult $19, $10, $17
986 addq $17, $18, $18
987 addq $9, $18, $9
988 cmpult $9, $18, $0
989 addq $20, $0, $20
990 mulq $23, $5, $10
991 umulh $23, $5, $17
992 addq $19, $10, $19
993 cmpult $19, $10, $18
994 addq $18, $17, $17
995 addq $9, $17, $9
996 cmpult $9, $17, $0
997 addq $20, $0, $20
998 mulq $25, $3, $10
999 umulh $25, $3, $18
1000 addq $19, $10, $19
1001 cmpult $19, $10, $17
1002 addq $17, $18, $18
1003 addq $9, $18, $9
1004 cmpult $9, $18, $0
1005 addq $20, $0, $20
1006 mulq $28, $1, $10
1007 umulh $28, $1, $17
1008 addq $19, $10, $19
1009 cmpult $19, $10, $18
1010 addq $18, $17, $17
1011 addq $9, $17, $9
1012 cmpult $9, $17, $0
1013 addq $20, $0, $20
1014 stq $19, 56($16)
1015 bis $31, $31, $19
1016 mulq $2, $21, $10
1017 umulh $2, $21, $18
1018 addq $9, $10, $9
1019 cmpult $9, $10, $17
1020 addq $17, $18, $18
1021 addq $20, $18, $20
1022 cmpult $20, $18, $0
1023 addq $19, $0, $19
1024 mulq $4, $27, $1
1025 umulh $4, $27, $10
1026 addq $9, $1, $9
1027 cmpult $9, $1, $17
1028 addq $17, $10, $10
1029 addq $20, $10, $20
1030 cmpult $20, $10, $18
1031 addq $19, $18, $19
1032 mulq $6, $24, $0
1033 umulh $6, $24, $2
1034 addq $9, $0, $9
1035 cmpult $9, $0, $1
1036 addq $1, $2, $2
1037 addq $20, $2, $20
1038 cmpult $20, $2, $17
1039 addq $19, $17, $19
1040 mulq $8, $22, $10
1041 umulh $8, $22, $18
1042 addq $9, $10, $9
1043 cmpult $9, $10, $0
1044 addq $0, $18, $18
1045 addq $20, $18, $20
1046 cmpult $20, $18, $1
1047 addq $19, $1, $19
1048 mulq $23, $7, $2
1049 umulh $23, $7, $17
1050 addq $9, $2, $9
1051 cmpult $9, $2, $10
1052 addq $10, $17, $17
1053 addq $20, $17, $20
1054 cmpult $20, $17, $0
1055 addq $19, $0, $19
1056 mulq $25, $5, $18
1057 umulh $25, $5, $1
1058 addq $9, $18, $9
1059 cmpult $9, $18, $2
1060 addq $2, $1, $1
1061 addq $20, $1, $20
1062 cmpult $20, $1, $10
1063 addq $19, $10, $19
1064 mulq $28, $3, $17
1065 umulh $28, $3, $0
1066 addq $9, $17, $9
1067 cmpult $9, $17, $18
1068 addq $18, $0, $0
1069 addq $20, $0, $20
1070 cmpult $20, $0, $2
1071 addq $19, $2, $19
1072 stq $9, 64($16)
1073 bis $31, $31, $9
1074 mulq $4, $21, $1
1075 umulh $4, $21, $10
1076 addq $20, $1, $20
1077 cmpult $20, $1, $17
1078 addq $17, $10, $10
1079 addq $19, $10, $19
1080 cmpult $19, $10, $18
1081 addq $9, $18, $9
1082 mulq $6, $27, $0
1083 umulh $6, $27, $2
1084 addq $20, $0, $20
1085 cmpult $20, $0, $3
1086 addq $3, $2, $2
1087 addq $19, $2, $19
1088 cmpult $19, $2, $1
1089 addq $9, $1, $9
1090 mulq $8, $24, $17
1091 umulh $8, $24, $10
1092 addq $20, $17, $20
1093 cmpult $20, $17, $18
1094 addq $18, $10, $10
1095 addq $19, $10, $19
1096 cmpult $19, $10, $4
1097 addq $9, $4, $9
1098 mulq $23, $22, $0
1099 umulh $23, $22, $3
1100 addq $20, $0, $20
1101 cmpult $20, $0, $2
1102 addq $2, $3, $3
1103 addq $19, $3, $19
1104 cmpult $19, $3, $1
1105 addq $9, $1, $9
1106 mulq $25, $7, $17
1107 umulh $25, $7, $18
1108 addq $20, $17, $20
1109 cmpult $20, $17, $10
1110 addq $10, $18, $18
1111 addq $19, $18, $19
1112 cmpult $19, $18, $4
1113 addq $9, $4, $9
1114 mulq $28, $5, $0
1115 umulh $28, $5, $2
1116 addq $20, $0, $20
1117 cmpult $20, $0, $3
1118 addq $3, $2, $2
1119 addq $19, $2, $19
1120 cmpult $19, $2, $1
1121 addq $9, $1, $9
1122 stq $20, 72($16)
1123 bis $31, $31, $20
1124 mulq $6, $21, $17
1125 umulh $6, $21, $10
1126 addq $19, $17, $19
1127 cmpult $19, $17, $18
1128 addq $18, $10, $10
1129 addq $9, $10, $9
1130 cmpult $9, $10, $4
1131 addq $20, $4, $20
1132 mulq $8, $27, $0
1133 umulh $8, $27, $3
1134 addq $19, $0, $19
1135 cmpult $19, $0, $2
1136 addq $2, $3, $3
1137 addq $9, $3, $9
1138 cmpult $9, $3, $1
1139 addq $20, $1, $20
1140 mulq $23, $24, $5
1141 umulh $23, $24, $17
1142 addq $19, $5, $19
1143 cmpult $19, $5, $18
1144 addq $18, $17, $17
1145 addq $9, $17, $9
1146 cmpult $9, $17, $10
1147 addq $20, $10, $20
1148 mulq $25, $22, $4
1149 umulh $25, $22, $6
1150 addq $19, $4, $19
1151 cmpult $19, $4, $0
1152 addq $0, $6, $6
1153 addq $9, $6, $9
1154 cmpult $9, $6, $2
1155 addq $20, $2, $20
1156 mulq $28, $7, $3
1157 umulh $28, $7, $1
1158 addq $19, $3, $19
1159 cmpult $19, $3, $5
1160 addq $5, $1, $1
1161 addq $9, $1, $9
1162 cmpult $9, $1, $18
1163 addq $20, $18, $20
1164 stq $19, 80($16)
1165 bis $31, $31, $19
1166 mulq $8, $21, $17
1167 umulh $8, $21, $10
1168 addq $9, $17, $9
1169 cmpult $9, $17, $4
1170 addq $4, $10, $10
1171 addq $20, $10, $20
1172 cmpult $20, $10, $0
1173 addq $19, $0, $19
1174 mulq $23, $27, $6
1175 umulh $23, $27, $2
1176 addq $9, $6, $9
1177 cmpult $9, $6, $3
1178 addq $3, $2, $2
1179 addq $20, $2, $20
1180 cmpult $20, $2, $5
1181 addq $19, $5, $19
1182 mulq $25, $24, $1
1183 umulh $25, $24, $18
1184 addq $9, $1, $9
1185 cmpult $9, $1, $7
1186 addq $7, $18, $18
1187 addq $20, $18, $20
1188 cmpult $20, $18, $17
1189 addq $19, $17, $19
1190 mulq $28, $22, $4
1191 umulh $28, $22, $10
1192 addq $9, $4, $9
1193 cmpult $9, $4, $0
1194 addq $0, $10, $10
1195 addq $20, $10, $20
1196 cmpult $20, $10, $8
1197 addq $19, $8, $19
1198 stq $9, 88($16)
1199 bis $31, $31, $9
1200 mulq $23, $21, $6
1201 umulh $23, $21, $3
1202 addq $20, $6, $20
1203 cmpult $20, $6, $2
1204 addq $2, $3, $3
1205 addq $19, $3, $19
1206 cmpult $19, $3, $5
1207 addq $9, $5, $9
1208 mulq $25, $27, $1
1209 umulh $25, $27, $7
1210 addq $20, $1, $20
1211 cmpult $20, $1, $18
1212 addq $18, $7, $7
1213 addq $19, $7, $19
1214 cmpult $19, $7, $17
1215 addq $9, $17, $9
1216 mulq $28, $24, $4
1217 umulh $28, $24, $0
1218 addq $20, $4, $20
1219 cmpult $20, $4, $10
1220 addq $10, $0, $0
1221 addq $19, $0, $19
1222 cmpult $19, $0, $8
1223 addq $9, $8, $9
1224 stq $20, 96($16)
1225 bis $31, $31, $20
1226 mulq $25, $21, $22
1227 umulh $25, $21, $6
1228 addq $19, $22, $19
1229 cmpult $19, $22, $2
1230 addq $2, $6, $6
1231 addq $9, $6, $9
1232 cmpult $9, $6, $3
1233 addq $20, $3, $20
1234 mulq $28, $27, $5
1235 umulh $28, $27, $23
1236 addq $19, $5, $19
1237 cmpult $19, $5, $1
1238 addq $1, $23, $23
1239 addq $9, $23, $9
1240 cmpult $9, $23, $18
1241 addq $20, $18, $20
1242 stq $19, 104($16)
1243 bis $31, $31, $19
1244 mulq $28, $21, $7
1245 umulh $28, $21, $17
1246 addq $9, $7, $9
1247 cmpult $9, $7, $4
1248 addq $4, $17, $17
1249 addq $20, $17, $20
1250 cmpult $20, $17, $10
1251 addq $19, $10, $19
1252 stq $9, 112($16)
1253 stq $20, 120($16)
1254 ldq $9, 0($30)
1255 ldq $10, 8($30)
1256 addq $30, 16, $30
1257 ret $31,($26),1
1258 .end bn_mul_comba8
1259 .text
1260 .align 3
1261 .globl bn_sqr_comba4
1262 .ent bn_sqr_comba4
1263bn_sqr_comba4:
1264bn_sqr_comba4..ng:
1265 .frame $30,0,$26,0
1266 .prologue 0
1267
1268 ldq $0, 0($17)
1269 ldq $1, 8($17)
1270 ldq $2, 16($17)
1271 ldq $3, 24($17)
1272 bis $31, $31, $6
1273 mulq $0, $0, $4
1274 umulh $0, $0, $5
1275 stq $4, 0($16)
1276 bis $31, $31, $4
1277 mulq $0, $1, $7
1278 umulh $0, $1, $8
1279 cmplt $7, $31, $22
1280 cmplt $8, $31, $23
1281 addq $7, $7, $7
1282 addq $8, $8, $8
1283 addq $8, $22, $8
1284 addq $4, $23, $4
1285 addq $5, $7, $5
1286 addq $6, $8, $6
1287 cmpult $5, $7, $24
1288 cmpult $6, $8, $25
1289 addq $6, $24, $6
1290 addq $4, $25, $4
1291 stq $5, 8($16)
1292 bis $31, $31, $5
1293 mulq $1, $1, $27
1294 umulh $1, $1, $28
1295 addq $6, $27, $6
1296 addq $4, $28, $4
1297 cmpult $6, $27, $21
1298 cmpult $4, $28, $20
1299 addq $4, $21, $4
1300 addq $5, $20, $5
1301 mulq $2, $0, $19
1302 umulh $2, $0, $18
1303 cmplt $19, $31, $17
1304 cmplt $18, $31, $22
1305 addq $19, $19, $19
1306 addq $18, $18, $18
1307 addq $18, $17, $18
1308 addq $5, $22, $5
1309 addq $6, $19, $6
1310 addq $4, $18, $4
1311 cmpult $6, $19, $23
1312 cmpult $4, $18, $7
1313 addq $4, $23, $4
1314 addq $5, $7, $5
1315 stq $6, 16($16)
1316 bis $31, $31, $6
1317 mulq $3, $0, $8
1318 umulh $3, $0, $24
1319 cmplt $8, $31, $25
1320 cmplt $24, $31, $27
1321 addq $8, $8, $8
1322 addq $24, $24, $24
1323 addq $24, $25, $24
1324 addq $6, $27, $6
1325 addq $4, $8, $4
1326 addq $5, $24, $5
1327 cmpult $4, $8, $28
1328 cmpult $5, $24, $21
1329 addq $5, $28, $5
1330 addq $6, $21, $6
1331 mulq $2, $1, $20
1332 umulh $2, $1, $17
1333 cmplt $20, $31, $22
1334 cmplt $17, $31, $19
1335 addq $20, $20, $20
1336 addq $17, $17, $17
1337 addq $17, $22, $17
1338 addq $6, $19, $6
1339 addq $4, $20, $4
1340 addq $5, $17, $5
1341 cmpult $4, $20, $18
1342 cmpult $5, $17, $23
1343 addq $5, $18, $5
1344 addq $6, $23, $6
1345 stq $4, 24($16)
1346 bis $31, $31, $4
1347 mulq $2, $2, $7
1348 umulh $2, $2, $25
1349 addq $5, $7, $5
1350 addq $6, $25, $6
1351 cmpult $5, $7, $27
1352 cmpult $6, $25, $8
1353 addq $6, $27, $6
1354 addq $4, $8, $4
1355 mulq $3, $1, $24
1356 umulh $3, $1, $28
1357 cmplt $24, $31, $21
1358 cmplt $28, $31, $22
1359 addq $24, $24, $24
1360 addq $28, $28, $28
1361 addq $28, $21, $28
1362 addq $4, $22, $4
1363 addq $5, $24, $5
1364 addq $6, $28, $6
1365 cmpult $5, $24, $19
1366 cmpult $6, $28, $20
1367 addq $6, $19, $6
1368 addq $4, $20, $4
1369 stq $5, 32($16)
1370 bis $31, $31, $5
1371 mulq $3, $2, $17
1372 umulh $3, $2, $18
1373 cmplt $17, $31, $23
1374 cmplt $18, $31, $7
1375 addq $17, $17, $17
1376 addq $18, $18, $18
1377 addq $18, $23, $18
1378 addq $5, $7, $5
1379 addq $6, $17, $6
1380 addq $4, $18, $4
1381 cmpult $6, $17, $25
1382 cmpult $4, $18, $27
1383 addq $4, $25, $4
1384 addq $5, $27, $5
1385 stq $6, 40($16)
1386 bis $31, $31, $6
1387 mulq $3, $3, $8
1388 umulh $3, $3, $21
1389 addq $4, $8, $4
1390 addq $5, $21, $5
1391 cmpult $4, $8, $22
1392 cmpult $5, $21, $24
1393 addq $5, $22, $5
1394 addq $6, $24, $6
1395 stq $4, 48($16)
1396 stq $5, 56($16)
1397 ret $31,($26),1
1398 .end bn_sqr_comba4
1399 .text
1400 .align 3
1401 .globl bn_sqr_comba8
1402 .ent bn_sqr_comba8
1403bn_sqr_comba8:
1404bn_sqr_comba8..ng:
1405 .frame $30,0,$26,0
1406 .prologue 0
344 1407
1408 ldq $0, 0($17)
1409 ldq $1, 8($17)
1410 ldq $2, 16($17)
1411 ldq $3, 24($17)
1412 ldq $4, 32($17)
1413 ldq $5, 40($17)
1414 ldq $6, 48($17)
1415 ldq $7, 56($17)
1416 bis $31, $31, $23
1417 mulq $0, $0, $8
1418 umulh $0, $0, $22
1419 stq $8, 0($16)
1420 bis $31, $31, $8
1421 mulq $1, $0, $24
1422 umulh $1, $0, $25
1423 cmplt $24, $31, $27
1424 cmplt $25, $31, $28
1425 addq $24, $24, $24
1426 addq $25, $25, $25
1427 addq $25, $27, $25
1428 addq $8, $28, $8
1429 addq $22, $24, $22
1430 addq $23, $25, $23
1431 cmpult $22, $24, $21
1432 cmpult $23, $25, $20
1433 addq $23, $21, $23
1434 addq $8, $20, $8
1435 stq $22, 8($16)
1436 bis $31, $31, $22
1437 mulq $1, $1, $19
1438 umulh $1, $1, $18
1439 addq $23, $19, $23
1440 addq $8, $18, $8
1441 cmpult $23, $19, $17
1442 cmpult $8, $18, $27
1443 addq $8, $17, $8
1444 addq $22, $27, $22
1445 mulq $2, $0, $28
1446 umulh $2, $0, $24
1447 cmplt $28, $31, $25
1448 cmplt $24, $31, $21
1449 addq $28, $28, $28
1450 addq $24, $24, $24
1451 addq $24, $25, $24
1452 addq $22, $21, $22
1453 addq $23, $28, $23
1454 addq $8, $24, $8
1455 cmpult $23, $28, $20
1456 cmpult $8, $24, $19
1457 addq $8, $20, $8
1458 addq $22, $19, $22
1459 stq $23, 16($16)
1460 bis $31, $31, $23
1461 mulq $2, $1, $18
1462 umulh $2, $1, $17
1463 cmplt $18, $31, $27
1464 cmplt $17, $31, $25
1465 addq $18, $18, $18
1466 addq $17, $17, $17
1467 addq $17, $27, $17
1468 addq $23, $25, $23
1469 addq $8, $18, $8
1470 addq $22, $17, $22
1471 cmpult $8, $18, $21
1472 cmpult $22, $17, $28
1473 addq $22, $21, $22
1474 addq $23, $28, $23
1475 mulq $3, $0, $24
1476 umulh $3, $0, $20
1477 cmplt $24, $31, $19
1478 cmplt $20, $31, $27
1479 addq $24, $24, $24
1480 addq $20, $20, $20
1481 addq $20, $19, $20
1482 addq $23, $27, $23
1483 addq $8, $24, $8
1484 addq $22, $20, $22
1485 cmpult $8, $24, $25
1486 cmpult $22, $20, $18
1487 addq $22, $25, $22
1488 addq $23, $18, $23
1489 stq $8, 24($16)
1490 bis $31, $31, $8
1491 mulq $2, $2, $17
1492 umulh $2, $2, $21
1493 addq $22, $17, $22
1494 addq $23, $21, $23
1495 cmpult $22, $17, $28
1496 cmpult $23, $21, $19
1497 addq $23, $28, $23
1498 addq $8, $19, $8
1499 mulq $3, $1, $27
1500 umulh $3, $1, $24
1501 cmplt $27, $31, $20
1502 cmplt $24, $31, $25
1503 addq $27, $27, $27
1504 addq $24, $24, $24
1505 addq $24, $20, $24
1506 addq $8, $25, $8
1507 addq $22, $27, $22
1508 addq $23, $24, $23
1509 cmpult $22, $27, $18
1510 cmpult $23, $24, $17
1511 addq $23, $18, $23
1512 addq $8, $17, $8
1513 mulq $4, $0, $21
1514 umulh $4, $0, $28
1515 cmplt $21, $31, $19
1516 cmplt $28, $31, $20
1517 addq $21, $21, $21
1518 addq $28, $28, $28
1519 addq $28, $19, $28
1520 addq $8, $20, $8
1521 addq $22, $21, $22
1522 addq $23, $28, $23
1523 cmpult $22, $21, $25
1524 cmpult $23, $28, $27
1525 addq $23, $25, $23
1526 addq $8, $27, $8
1527 stq $22, 32($16)
1528 bis $31, $31, $22
1529 mulq $3, $2, $24
1530 umulh $3, $2, $18
1531 cmplt $24, $31, $17
1532 cmplt $18, $31, $19
1533 addq $24, $24, $24
1534 addq $18, $18, $18
1535 addq $18, $17, $18
1536 addq $22, $19, $22
1537 addq $23, $24, $23
1538 addq $8, $18, $8
1539 cmpult $23, $24, $20
1540 cmpult $8, $18, $21
1541 addq $8, $20, $8
1542 addq $22, $21, $22
1543 mulq $4, $1, $28
1544 umulh $4, $1, $25
1545 cmplt $28, $31, $27
1546 cmplt $25, $31, $17
1547 addq $28, $28, $28
1548 addq $25, $25, $25
1549 addq $25, $27, $25
1550 addq $22, $17, $22
1551 addq $23, $28, $23
1552 addq $8, $25, $8
1553 cmpult $23, $28, $19
1554 cmpult $8, $25, $24
1555 addq $8, $19, $8
1556 addq $22, $24, $22
1557 mulq $5, $0, $18
1558 umulh $5, $0, $20
1559 cmplt $18, $31, $21
1560 cmplt $20, $31, $27
1561 addq $18, $18, $18
1562 addq $20, $20, $20
1563 addq $20, $21, $20
1564 addq $22, $27, $22
1565 addq $23, $18, $23
1566 addq $8, $20, $8
1567 cmpult $23, $18, $17
1568 cmpult $8, $20, $28
1569 addq $8, $17, $8
1570 addq $22, $28, $22
1571 stq $23, 40($16)
1572 bis $31, $31, $23
1573 mulq $3, $3, $25
1574 umulh $3, $3, $19
1575 addq $8, $25, $8
1576 addq $22, $19, $22
1577 cmpult $8, $25, $24
1578 cmpult $22, $19, $21
1579 addq $22, $24, $22
1580 addq $23, $21, $23
1581 mulq $4, $2, $27
1582 umulh $4, $2, $18
1583 cmplt $27, $31, $20
1584 cmplt $18, $31, $17
1585 addq $27, $27, $27
1586 addq $18, $18, $18
1587 addq $18, $20, $18
1588 addq $23, $17, $23
1589 addq $8, $27, $8
1590 addq $22, $18, $22
1591 cmpult $8, $27, $28
1592 cmpult $22, $18, $25
1593 addq $22, $28, $22
1594 addq $23, $25, $23
1595 mulq $5, $1, $19
1596 umulh $5, $1, $24
1597 cmplt $19, $31, $21
1598 cmplt $24, $31, $20
1599 addq $19, $19, $19
1600 addq $24, $24, $24
1601 addq $24, $21, $24
1602 addq $23, $20, $23
1603 addq $8, $19, $8
1604 addq $22, $24, $22
1605 cmpult $8, $19, $17
1606 cmpult $22, $24, $27
1607 addq $22, $17, $22
1608 addq $23, $27, $23
1609 mulq $6, $0, $18
1610 umulh $6, $0, $28
1611 cmplt $18, $31, $25
1612 cmplt $28, $31, $21
1613 addq $18, $18, $18
1614 addq $28, $28, $28
1615 addq $28, $25, $28
1616 addq $23, $21, $23
1617 addq $8, $18, $8
1618 addq $22, $28, $22
1619 cmpult $8, $18, $20
1620 cmpult $22, $28, $19
1621 addq $22, $20, $22
1622 addq $23, $19, $23
1623 stq $8, 48($16)
1624 bis $31, $31, $8
1625 mulq $4, $3, $24
1626 umulh $4, $3, $17
1627 cmplt $24, $31, $27
1628 cmplt $17, $31, $25
1629 addq $24, $24, $24
1630 addq $17, $17, $17
1631 addq $17, $27, $17
1632 addq $8, $25, $8
1633 addq $22, $24, $22
1634 addq $23, $17, $23
1635 cmpult $22, $24, $21
1636 cmpult $23, $17, $18
1637 addq $23, $21, $23
1638 addq $8, $18, $8
1639 mulq $5, $2, $28
1640 umulh $5, $2, $20
1641 cmplt $28, $31, $19
1642 cmplt $20, $31, $27
1643 addq $28, $28, $28
1644 addq $20, $20, $20
1645 addq $20, $19, $20
1646 addq $8, $27, $8
1647 addq $22, $28, $22
1648 addq $23, $20, $23
1649 cmpult $22, $28, $25
1650 cmpult $23, $20, $24
1651 addq $23, $25, $23
1652 addq $8, $24, $8
1653 mulq $6, $1, $17
1654 umulh $6, $1, $21
1655 cmplt $17, $31, $18
1656 cmplt $21, $31, $19
1657 addq $17, $17, $17
1658 addq $21, $21, $21
1659 addq $21, $18, $21
1660 addq $8, $19, $8
1661 addq $22, $17, $22
1662 addq $23, $21, $23
1663 cmpult $22, $17, $27
1664 cmpult $23, $21, $28
1665 addq $23, $27, $23
1666 addq $8, $28, $8
1667 mulq $7, $0, $20
1668 umulh $7, $0, $25
1669 cmplt $20, $31, $24
1670 cmplt $25, $31, $18
1671 addq $20, $20, $20
1672 addq $25, $25, $25
1673 addq $25, $24, $25
1674 addq $8, $18, $8
1675 addq $22, $20, $22
1676 addq $23, $25, $23
1677 cmpult $22, $20, $19
1678 cmpult $23, $25, $17
1679 addq $23, $19, $23
1680 addq $8, $17, $8
1681 stq $22, 56($16)
1682 bis $31, $31, $22
1683 mulq $4, $4, $21
1684 umulh $4, $4, $27
1685 addq $23, $21, $23
1686 addq $8, $27, $8
1687 cmpult $23, $21, $28
1688 cmpult $8, $27, $24
1689 addq $8, $28, $8
1690 addq $22, $24, $22
1691 mulq $5, $3, $18
1692 umulh $5, $3, $20
1693 cmplt $18, $31, $25
1694 cmplt $20, $31, $19
1695 addq $18, $18, $18
1696 addq $20, $20, $20
1697 addq $20, $25, $20
1698 addq $22, $19, $22
1699 addq $23, $18, $23
1700 addq $8, $20, $8
1701 cmpult $23, $18, $17
1702 cmpult $8, $20, $21
1703 addq $8, $17, $8
1704 addq $22, $21, $22
1705 mulq $6, $2, $27
1706 umulh $6, $2, $28
1707 cmplt $27, $31, $24
1708 cmplt $28, $31, $25
1709 addq $27, $27, $27
1710 addq $28, $28, $28
1711 addq $28, $24, $28
1712 addq $22, $25, $22
1713 addq $23, $27, $23
1714 addq $8, $28, $8
1715 cmpult $23, $27, $19
1716 cmpult $8, $28, $18
1717 addq $8, $19, $8
1718 addq $22, $18, $22
1719 mulq $7, $1, $20
1720 umulh $7, $1, $17
1721 cmplt $20, $31, $21
1722 cmplt $17, $31, $24
1723 addq $20, $20, $20
1724 addq $17, $17, $17
1725 addq $17, $21, $17
1726 addq $22, $24, $22
1727 addq $23, $20, $23
1728 addq $8, $17, $8
1729 cmpult $23, $20, $25
1730 cmpult $8, $17, $27
1731 addq $8, $25, $8
1732 addq $22, $27, $22
1733 stq $23, 64($16)
1734 bis $31, $31, $23
1735 mulq $5, $4, $28
1736 umulh $5, $4, $19
1737 cmplt $28, $31, $18
1738 cmplt $19, $31, $21
1739 addq $28, $28, $28
1740 addq $19, $19, $19
1741 addq $19, $18, $19
1742 addq $23, $21, $23
1743 addq $8, $28, $8
1744 addq $22, $19, $22
1745 cmpult $8, $28, $24
1746 cmpult $22, $19, $20
1747 addq $22, $24, $22
1748 addq $23, $20, $23
1749 mulq $6, $3, $17
1750 umulh $6, $3, $25
1751 cmplt $17, $31, $27
1752 cmplt $25, $31, $18
1753 addq $17, $17, $17
1754 addq $25, $25, $25
1755 addq $25, $27, $25
1756 addq $23, $18, $23
1757 addq $8, $17, $8
1758 addq $22, $25, $22
1759 cmpult $8, $17, $21
1760 cmpult $22, $25, $28
1761 addq $22, $21, $22
1762 addq $23, $28, $23
1763 mulq $7, $2, $19
1764 umulh $7, $2, $24
1765 cmplt $19, $31, $20
1766 cmplt $24, $31, $27
1767 addq $19, $19, $19
1768 addq $24, $24, $24
1769 addq $24, $20, $24
1770 addq $23, $27, $23
1771 addq $8, $19, $8
1772 addq $22, $24, $22
1773 cmpult $8, $19, $18
1774 cmpult $22, $24, $17
1775 addq $22, $18, $22
1776 addq $23, $17, $23
1777 stq $8, 72($16)
1778 bis $31, $31, $8
1779 mulq $5, $5, $25
1780 umulh $5, $5, $21
1781 addq $22, $25, $22
1782 addq $23, $21, $23
1783 cmpult $22, $25, $28
1784 cmpult $23, $21, $20
1785 addq $23, $28, $23
1786 addq $8, $20, $8
1787 mulq $6, $4, $27
1788 umulh $6, $4, $19
1789 cmplt $27, $31, $24
1790 cmplt $19, $31, $18
1791 addq $27, $27, $27
1792 addq $19, $19, $19
1793 addq $19, $24, $19
1794 addq $8, $18, $8
1795 addq $22, $27, $22
1796 addq $23, $19, $23
1797 cmpult $22, $27, $17
1798 cmpult $23, $19, $25
1799 addq $23, $17, $23
1800 addq $8, $25, $8
1801 mulq $7, $3, $21
1802 umulh $7, $3, $28
1803 cmplt $21, $31, $20
1804 cmplt $28, $31, $24
1805 addq $21, $21, $21
1806 addq $28, $28, $28
1807 addq $28, $20, $28
1808 addq $8, $24, $8
1809 addq $22, $21, $22
1810 addq $23, $28, $23
1811 cmpult $22, $21, $18
1812 cmpult $23, $28, $27
1813 addq $23, $18, $23
1814 addq $8, $27, $8
1815 stq $22, 80($16)
1816 bis $31, $31, $22
1817 mulq $6, $5, $19
1818 umulh $6, $5, $17
1819 cmplt $19, $31, $25
1820 cmplt $17, $31, $20
1821 addq $19, $19, $19
1822 addq $17, $17, $17
1823 addq $17, $25, $17
1824 addq $22, $20, $22
1825 addq $23, $19, $23
1826 addq $8, $17, $8
1827 cmpult $23, $19, $24
1828 cmpult $8, $17, $21
1829 addq $8, $24, $8
1830 addq $22, $21, $22
1831 mulq $7, $4, $28
1832 umulh $7, $4, $18
1833 cmplt $28, $31, $27
1834 cmplt $18, $31, $25
1835 addq $28, $28, $28
1836 addq $18, $18, $18
1837 addq $18, $27, $18
1838 addq $22, $25, $22
1839 addq $23, $28, $23
1840 addq $8, $18, $8
1841 cmpult $23, $28, $20
1842 cmpult $8, $18, $19
1843 addq $8, $20, $8
1844 addq $22, $19, $22
1845 stq $23, 88($16)
1846 bis $31, $31, $23
1847 mulq $6, $6, $17
1848 umulh $6, $6, $24
1849 addq $8, $17, $8
1850 addq $22, $24, $22
1851 cmpult $8, $17, $21
1852 cmpult $22, $24, $27
1853 addq $22, $21, $22
1854 addq $23, $27, $23
1855 mulq $7, $5, $25
1856 umulh $7, $5, $28
1857 cmplt $25, $31, $18
1858 cmplt $28, $31, $20
1859 addq $25, $25, $25
1860 addq $28, $28, $28
1861 addq $28, $18, $28
1862 addq $23, $20, $23
1863 addq $8, $25, $8
1864 addq $22, $28, $22
1865 cmpult $8, $25, $19
1866 cmpult $22, $28, $17
1867 addq $22, $19, $22
1868 addq $23, $17, $23
1869 stq $8, 96($16)
1870 bis $31, $31, $8
1871 mulq $7, $6, $24
1872 umulh $7, $6, $21
1873 cmplt $24, $31, $27
1874 cmplt $21, $31, $18
1875 addq $24, $24, $24
1876 addq $21, $21, $21
1877 addq $21, $27, $21
1878 addq $8, $18, $8
1879 addq $22, $24, $22
1880 addq $23, $21, $23
1881 cmpult $22, $24, $20
1882 cmpult $23, $21, $25
1883 addq $23, $20, $23
1884 addq $8, $25, $8
1885 stq $22, 104($16)
1886 bis $31, $31, $22
1887 mulq $7, $7, $28
1888 umulh $7, $7, $19
1889 addq $23, $28, $23
1890 addq $8, $19, $8
1891 cmpult $23, $28, $17
1892 cmpult $8, $19, $27
1893 addq $8, $17, $8
1894 addq $22, $27, $22
1895 stq $23, 112($16)
1896 stq $8, 120($16)
1897 ret $31,($26),1
1898 .end bn_sqr_comba8
diff --git a/src/lib/libcrypto/bn/asm/alpha.s.works b/src/lib/libcrypto/bn/asm/alpha.s.works
new file mode 100644
index 0000000000..ee6c587809
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.s.works
@@ -0,0 +1,533 @@
1
2 # DEC Alpha assember
3 # The bn_div64 is actually gcc output but the other parts are hand done.
4 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
5 # bn_div64.
6 # I've gone back and re-done most of routines.
7 # The key thing to remeber for the 164 CPU is that while a
8 # multiply operation takes 8 cycles, another one can only be issued
9 # after 4 cycles have elapsed. I've done modification to help
10 # improve this. Also, normally, a ld instruction will not be available
11 # for about 3 cycles.
12 .file 1 "bn_asm.c"
13 .set noat
14gcc2_compiled.:
15__gnu_compiled_c:
16 .text
17 .align 3
18 .globl bn_mul_add_words
19 .ent bn_mul_add_words
20bn_mul_add_words:
21bn_mul_add_words..ng:
22 .frame $30,0,$26,0
23 .prologue 0
24 .align 5
25 subq $18,4,$18
26 bis $31,$31,$0
27 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
28 ldq $20,0($17) # 1 1
29 ldq $1,0($16) # 1 1
30 .align 3
31$42:
32 mulq $20,$19,$5 # 1 2 1 ######
33 ldq $21,8($17) # 2 1
34 ldq $2,8($16) # 2 1
35 umulh $20,$19,$20 # 1 2 ######
36 ldq $27,16($17) # 3 1
37 ldq $3,16($16) # 3 1
38 mulq $21,$19,$6 # 2 2 1 ######
39 ldq $28,24($17) # 4 1
40 addq $1,$5,$1 # 1 2 2
41 ldq $4,24($16) # 4 1
42 umulh $21,$19,$21 # 2 2 ######
43 cmpult $1,$5,$22 # 1 2 3 1
44 addq $20,$22,$20 # 1 3 1
45 addq $1,$0,$1 # 1 2 3 1
46 mulq $27,$19,$7 # 3 2 1 ######
47 cmpult $1,$0,$0 # 1 2 3 2
48 addq $2,$6,$2 # 2 2 2
49 addq $20,$0,$0 # 1 3 2
50 cmpult $2,$6,$23 # 2 2 3 1
51 addq $21,$23,$21 # 2 3 1
52 umulh $27,$19,$27 # 3 2 ######
53 addq $2,$0,$2 # 2 2 3 1
54 cmpult $2,$0,$0 # 2 2 3 2
55 subq $18,4,$18
56 mulq $28,$19,$8 # 4 2 1 ######
57 addq $21,$0,$0 # 2 3 2
58 addq $3,$7,$3 # 3 2 2
59 addq $16,32,$16
60 cmpult $3,$7,$24 # 3 2 3 1
61 stq $1,-32($16) # 1 2 4
62 umulh $28,$19,$28 # 4 2 ######
63 addq $27,$24,$27 # 3 3 1
64 addq $3,$0,$3 # 3 2 3 1
65 stq $2,-24($16) # 2 2 4
66 cmpult $3,$0,$0 # 3 2 3 2
67 stq $3,-16($16) # 3 2 4
68 addq $4,$8,$4 # 4 2 2
69 addq $27,$0,$0 # 3 3 2
70 cmpult $4,$8,$25 # 4 2 3 1
71 addq $17,32,$17
72 addq $28,$25,$28 # 4 3 1
73 addq $4,$0,$4 # 4 2 3 1
74 cmpult $4,$0,$0 # 4 2 3 2
75 stq $4,-8($16) # 4 2 4
76 addq $28,$0,$0 # 4 3 2
77 blt $18,$43
78
79 ldq $20,0($17) # 1 1
80 ldq $1,0($16) # 1 1
81
82 br $42
83
84 .align 4
85$45:
86 ldq $20,0($17) # 4 1
87 ldq $1,0($16) # 4 1
88 mulq $20,$19,$5 # 4 2 1
89 subq $18,1,$18
90 addq $16,8,$16
91 addq $17,8,$17
92 umulh $20,$19,$20 # 4 2
93 addq $1,$5,$1 # 4 2 2
94 cmpult $1,$5,$22 # 4 2 3 1
95 addq $20,$22,$20 # 4 3 1
96 addq $1,$0,$1 # 4 2 3 1
97 cmpult $1,$0,$0 # 4 2 3 2
98 addq $20,$0,$0 # 4 3 2
99 stq $1,-8($16) # 4 2 4
100 bgt $18,$45
101 ret $31,($26),1 # else exit
102
103 .align 4
104$43:
105 addq $18,4,$18
106 bgt $18,$45 # goto tail code
107 ret $31,($26),1 # else exit
108
109 .end bn_mul_add_words
110 .align 3
111 .globl bn_mul_words
112 .ent bn_mul_words
113bn_mul_words:
114bn_mul_words..ng:
115 .frame $30,0,$26,0
116 .prologue 0
117 .align 5
118 subq $18,4,$18
119 bis $31,$31,$0
120 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
121 ldq $20,0($17) # 1 1
122 .align 3
123$142:
124
125 mulq $20,$19,$5 # 1 2 1 #####
126 ldq $21,8($17) # 2 1
127 ldq $27,16($17) # 3 1
128 umulh $20,$19,$20 # 1 2 #####
129 ldq $28,24($17) # 4 1
130 mulq $21,$19,$6 # 2 2 1 #####
131 addq $5,$0,$5 # 1 2 3 1
132 subq $18,4,$18
133 cmpult $5,$0,$0 # 1 2 3 2
134 umulh $21,$19,$21 # 2 2 #####
135 addq $20,$0,$0 # 1 3 2
136 addq $17,32,$17
137 addq $6,$0,$6 # 2 2 3 1
138 mulq $27,$19,$7 # 3 2 1 #####
139 cmpult $6,$0,$0 # 2 2 3 2
140 addq $21,$0,$0 # 2 3 2
141 addq $16,32,$16
142 umulh $27,$19,$27 # 3 2 #####
143 stq $5,-32($16) # 1 2 4
144 mulq $28,$19,$8 # 4 2 1 #####
145 addq $7,$0,$7 # 3 2 3 1
146 stq $6,-24($16) # 2 2 4
147 cmpult $7,$0,$0 # 3 2 3 2
148 umulh $28,$19,$28 # 4 2 #####
149 addq $27,$0,$0 # 3 3 2
150 stq $7,-16($16) # 3 2 4
151 addq $8,$0,$8 # 4 2 3 1
152 cmpult $8,$0,$0 # 4 2 3 2
153
154 addq $28,$0,$0 # 4 3 2
155
156 stq $8,-8($16) # 4 2 4
157
158 blt $18,$143
159
160 ldq $20,0($17) # 1 1
161
162 br $142
163
164 .align 4
165$145:
166 ldq $20,0($17) # 4 1
167 mulq $20,$19,$5 # 4 2 1
168 subq $18,1,$18
169 umulh $20,$19,$20 # 4 2
170 addq $5,$0,$5 # 4 2 3 1
171 addq $16,8,$16
172 cmpult $5,$0,$0 # 4 2 3 2
173 addq $17,8,$17
174 addq $20,$0,$0 # 4 3 2
175 stq $5,-8($16) # 4 2 4
176
177 bgt $18,$145
178 ret $31,($26),1 # else exit
179
180 .align 4
181$143:
182 addq $18,4,$18
183 bgt $18,$145 # goto tail code
184 ret $31,($26),1 # else exit
185
186 .end bn_mul_words
187 .align 3
188 .globl bn_sqr_words
189 .ent bn_sqr_words
190bn_sqr_words:
191bn_sqr_words..ng:
192 .frame $30,0,$26,0
193 .prologue 0
194
195 subq $18,4,$18
196 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
197 ldq $20,0($17) # 1 1
198 .align 3
199$542:
200 mulq $20,$20,$5 ######
201 ldq $21,8($17) # 1 1
202 subq $18,4
203 umulh $20,$20,$1 ######
204 ldq $27,16($17) # 1 1
205 mulq $21,$21,$6 ######
206 ldq $28,24($17) # 1 1
207 stq $5,0($16) # r[0]
208 umulh $21,$21,$2 ######
209 stq $1,8($16) # r[1]
210 mulq $27,$27,$7 ######
211 stq $6,16($16) # r[0]
212 umulh $27,$27,$3 ######
213 stq $2,24($16) # r[1]
214 mulq $28,$28,$8 ######
215 stq $7,32($16) # r[0]
216 umulh $28,$28,$4 ######
217 stq $3,40($16) # r[1]
218
219 addq $16,64,$16
220 addq $17,32,$17
221 stq $8,-16($16) # r[0]
222 stq $4,-8($16) # r[1]
223
224 blt $18,$543
225 ldq $20,0($17) # 1 1
226 br $542
227
228$442:
229 ldq $20,0($17) # a[0]
230 mulq $20,$20,$5 # a[0]*w low part r2
231 addq $16,16,$16
232 addq $17,8,$17
233 subq $18,1,$18
234 umulh $20,$20,$1 # a[0]*w high part r3
235 stq $5,-16($16) # r[0]
236 stq $1,-8($16) # r[1]
237
238 bgt $18,$442
239 ret $31,($26),1 # else exit
240
241 .align 4
242$543:
243 addq $18,4,$18
244 bgt $18,$442 # goto tail code
245 ret $31,($26),1 # else exit
246 .end bn_sqr_words
247
248 .align 3
249 .globl bn_add_words
250 .ent bn_add_words
251bn_add_words:
252bn_add_words..ng:
253 .frame $30,0,$26,0
254 .prologue 0
255
256 subq $19,4,$19
257 bis $31,$31,$0 # carry = 0
258 blt $19,$900
259 ldq $5,0($17) # a[0]
260 ldq $1,0($18) # b[1]
261 .align 3
262$901:
263 addq $1,$5,$1 # r=a+b;
264 ldq $6,8($17) # a[1]
265 cmpult $1,$5,$22 # did we overflow?
266 ldq $2,8($18) # b[1]
267 addq $1,$0,$1 # c+= overflow
268 ldq $7,16($17) # a[2]
269 cmpult $1,$0,$0 # overflow?
270 ldq $3,16($18) # b[2]
271 addq $0,$22,$0
272 ldq $8,24($17) # a[3]
273 addq $2,$6,$2 # r=a+b;
274 ldq $4,24($18) # b[3]
275 cmpult $2,$6,$23 # did we overflow?
276 addq $3,$7,$3 # r=a+b;
277 addq $2,$0,$2 # c+= overflow
278 cmpult $3,$7,$24 # did we overflow?
279 cmpult $2,$0,$0 # overflow?
280 addq $4,$8,$4 # r=a+b;
281 addq $0,$23,$0
282 cmpult $4,$8,$25 # did we overflow?
283 addq $3,$0,$3 # c+= overflow
284 stq $1,0($16) # r[0]=c
285 cmpult $3,$0,$0 # overflow?
286 stq $2,8($16) # r[1]=c
287 addq $0,$24,$0
288 stq $3,16($16) # r[2]=c
289 addq $4,$0,$4 # c+= overflow
290 subq $19,4,$19 # loop--
291 cmpult $4,$0,$0 # overflow?
292 addq $17,32,$17 # a++
293 addq $0,$25,$0
294 stq $4,24($16) # r[3]=c
295 addq $18,32,$18 # b++
296 addq $16,32,$16 # r++
297
298 blt $19,$900
299 ldq $5,0($17) # a[0]
300 ldq $1,0($18) # b[1]
301 br $901
302 .align 4
303$945:
304 ldq $5,0($17) # a[0]
305 ldq $1,0($18) # b[1]
306 addq $1,$5,$1 # r=a+b;
307 subq $19,1,$19 # loop--
308 addq $1,$0,$1 # c+= overflow
309 addq $17,8,$17 # a++
310 cmpult $1,$5,$22 # did we overflow?
311 cmpult $1,$0,$0 # overflow?
312 addq $18,8,$18 # b++
313 stq $1,0($16) # r[0]=c
314 addq $0,$22,$0
315 addq $16,8,$16 # r++
316
317 bgt $19,$945
318 ret $31,($26),1 # else exit
319
320$900:
321 addq $19,4,$19
322 bgt $19,$945 # goto tail code
323 ret $31,($26),1 # else exit
324 .end bn_add_words
325
326 #
327 # What follows was taken directly from the C compiler with a few
328 # hacks to redo the lables.
329 #
330.text
331 .align 3
332 .globl bn_div64
333 .ent bn_div64
334bn_div64:
335 ldgp $29,0($27)
336bn_div64..ng:
337 lda $30,-48($30)
338 .frame $30,48,$26,0
339 stq $26,0($30)
340 stq $9,8($30)
341 stq $10,16($30)
342 stq $11,24($30)
343 stq $12,32($30)
344 stq $13,40($30)
345 .mask 0x4003e00,-48
346 .prologue 1
347 bis $16,$16,$9
348 bis $17,$17,$10
349 bis $18,$18,$11
350 bis $31,$31,$13
351 bis $31,2,$12
352 bne $11,$119
353 lda $0,-1
354 br $31,$136
355 .align 4
356$119:
357 bis $11,$11,$16
358 jsr $26,BN_num_bits_word
359 ldgp $29,0($26)
360 subq $0,64,$1
361 beq $1,$120
362 bis $31,1,$1
363 sll $1,$0,$1
364 cmpule $9,$1,$1
365 bne $1,$120
366 # lda $16,_IO_stderr_
367 # lda $17,$C32
368 # bis $0,$0,$18
369 # jsr $26,fprintf
370 # ldgp $29,0($26)
371 jsr $26,abort
372 ldgp $29,0($26)
373 .align 4
374$120:
375 bis $31,64,$3
376 cmpult $9,$11,$2
377 subq $3,$0,$1
378 addl $1,$31,$0
379 subq $9,$11,$1
380 cmoveq $2,$1,$9
381 beq $0,$122
382 zapnot $0,15,$2
383 subq $3,$0,$1
384 sll $11,$2,$11
385 sll $9,$2,$3
386 srl $10,$1,$1
387 sll $10,$2,$10
388 bis $3,$1,$9
389$122:
390 srl $11,32,$5
391 zapnot $11,15,$6
392 lda $7,-1
393 .align 5
394$123:
395 srl $9,32,$1
396 subq $1,$5,$1
397 bne $1,$126
398 zapnot $7,15,$27
399 br $31,$127
400 .align 4
401$126:
402 bis $9,$9,$24
403 bis $5,$5,$25
404 divqu $24,$25,$27
405$127:
406 srl $10,32,$4
407 .align 5
408$128:
409 mulq $27,$5,$1
410 subq $9,$1,$3
411 zapnot $3,240,$1
412 bne $1,$129
413 mulq $6,$27,$2
414 sll $3,32,$1
415 addq $1,$4,$1
416 cmpule $2,$1,$2
417 bne $2,$129
418 subq $27,1,$27
419 br $31,$128
420 .align 4
421$129:
422 mulq $27,$6,$1
423 mulq $27,$5,$4
424 srl $1,32,$3
425 sll $1,32,$1
426 addq $4,$3,$4
427 cmpult $10,$1,$2
428 subq $10,$1,$10
429 addq $2,$4,$2
430 cmpult $9,$2,$1
431 bis $2,$2,$4
432 beq $1,$134
433 addq $9,$11,$9
434 subq $27,1,$27
435$134:
436 subl $12,1,$12
437 subq $9,$4,$9
438 beq $12,$124
439 sll $27,32,$13
440 sll $9,32,$2
441 srl $10,32,$1
442 sll $10,32,$10
443 bis $2,$1,$9
444 br $31,$123
445 .align 4
446$124:
447 bis $13,$27,$0
448$136:
449 ldq $26,0($30)
450 ldq $9,8($30)
451 ldq $10,16($30)
452 ldq $11,24($30)
453 ldq $12,32($30)
454 ldq $13,40($30)
455 addq $30,48,$30
456 ret $31,($26),1
457 .end bn_div64
458
459 .set noat
460 .text
461 .align 3
462 .globl bn_sub_words
463 .ent bn_sub_words
464bn_sub_words:
465bn_sub_words..ng:
466 .frame $30,0,$26,0
467 .prologue 0
468
469 subq $19, 4, $19
470 bis $31, $31, $0
471 blt $19, $100
472 ldq $1, 0($17)
473 ldq $2, 0($18)
474$101:
475 ldq $3, 8($17)
476 cmpult $1, $2, $4
477 ldq $5, 8($18)
478 subq $1, $2, $1
479 ldq $6, 16($17)
480 cmpult $1, $0, $2
481 ldq $7, 16($18)
482 subq $1, $0, $23
483 ldq $8, 24($17)
484 addq $2, $4, $0
485 cmpult $3, $5, $24
486 subq $3, $5, $3
487 ldq $22, 24($18)
488 cmpult $3, $0, $5
489 subq $3, $0, $25
490 addq $5, $24, $0
491 cmpult $6, $7, $27
492 subq $6, $7, $6
493 stq $23, 0($16)
494 cmpult $6, $0, $7
495 subq $6, $0, $28
496 addq $7, $27, $0
497 cmpult $8, $22, $21
498 subq $8, $22, $8
499 stq $25, 8($16)
500 cmpult $8, $0, $22
501 subq $8, $0, $20
502 addq $22, $21, $0
503 stq $28, 16($16)
504 subq $19, 4, $19
505 stq $20, 24($16)
506 addq $17, 32, $17
507 addq $18, 32, $18
508 addq $16, 32, $16
509 blt $19, $100
510 ldq $1, 0($17)
511 ldq $2, 0($18)
512 br $101
513$102:
514 ldq $1, 0($17)
515 ldq $2, 0($18)
516 cmpult $1, $2, $27
517 subq $1, $2, $1
518 cmpult $1, $0, $2
519 subq $1, $0, $1
520 stq $1, 0($16)
521 addq $2, $27, $0
522 addq $17, 8, $17
523 addq $18, 8, $18
524 addq $16, 8, $16
525 subq $19, 1, $19
526 bgt $19, $102
527 ret $31,($26),1
528$100:
529 addq $19, 4, $19
530 bgt $19, $102
531$103:
532 ret $31,($26),1
533 .end bn_sub_words
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 19d425ee96..5191bed273 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -1,18 +1,16 @@
1#!/usr/bin/perl
2#
3
4#!/usr/local/bin/perl 1#!/usr/local/bin/perl
5 2
6push(@INC,"perlasm","../../perlasm"); 3push(@INC,"perlasm","../../perlasm");
7require "x86asm.pl"; 4require "x86asm.pl";
8 5
9&asm_init($ARGV[0],"bn-586.pl"); 6&asm_init($ARGV[0],$0);
10 7
11&bn_mul_add_words("bn_mul_add_words"); 8&bn_mul_add_words("bn_mul_add_words");
12&bn_mul_words("bn_mul_words"); 9&bn_mul_words("bn_mul_words");
13&bn_sqr_words("bn_sqr_words"); 10&bn_sqr_words("bn_sqr_words");
14&bn_div64("bn_div64"); 11&bn_div_words("bn_div_words");
15&bn_add_words("bn_add_words"); 12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words");
16 14
17&asm_finish(); 15&asm_finish();
18 16
@@ -228,7 +226,7 @@ sub bn_sqr_words
228 &function_end($name); 226 &function_end($name);
229 } 227 }
230 228
231sub bn_div64 229sub bn_div_words
232 { 230 {
233 local($name)=@_; 231 local($name)=@_;
234 232
@@ -307,7 +305,79 @@ sub bn_add_words
307 } 305 }
308 &set_label("aw_end",0); 306 &set_label("aw_end",0);
309 307
310 &mov("eax",$c); 308# &mov("eax",$c); # $c is "eax"
309
310 &function_end($name);
311 }
312
313sub bn_sub_words
314 {
315 local($name)=@_;
316
317 &function_begin($name,"");
318
319 &comment("");
320 $a="esi";
321 $b="edi";
322 $c="eax";
323 $r="ebx";
324 $tmp1="ecx";
325 $tmp2="edx";
326 $num="ebp";
327
328 &mov($r,&wparam(0)); # get r
329 &mov($a,&wparam(1)); # get a
330 &mov($b,&wparam(2)); # get b
331 &mov($num,&wparam(3)); # get num
332 &xor($c,$c); # clear carry
333 &and($num,0xfffffff8); # num / 8
334
335 &jz(&label("aw_finish"));
336
337 &set_label("aw_loop",0);
338 for ($i=0; $i<8; $i++)
339 {
340 &comment("Round $i");
341
342 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
343 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
344 &sub($tmp1,$c);
345 &mov($c,0);
346 &adc($c,$c);
347 &sub($tmp1,$tmp2);
348 &adc($c,0);
349 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
350 }
351
352 &comment("");
353 &add($a,32);
354 &add($b,32);
355 &add($r,32);
356 &sub($num,8);
357 &jnz(&label("aw_loop"));
358
359 &set_label("aw_finish",0);
360 &mov($num,&wparam(3)); # get num
361 &and($num,7);
362 &jz(&label("aw_end"));
363
364 for ($i=0; $i<7; $i++)
365 {
366 &comment("Tail Round $i");
367 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
368 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
369 &sub($tmp1,$c);
370 &mov($c,0);
371 &adc($c,$c);
372 &sub($tmp1,$tmp2);
373 &adc($c,0);
374 &dec($num) if ($i != 6);
375 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
376 &jz(&label("aw_end")) if ($i != 6);
377 }
378 &set_label("aw_end",0);
379
380# &mov("eax",$c); # $c is "eax"
311 381
312 &function_end($name); 382 &function_end($name);
313 } 383 }
diff --git a/src/lib/libcrypto/bn/asm/bn-alpha.pl b/src/lib/libcrypto/bn/asm/bn-alpha.pl
new file mode 100644
index 0000000000..302edf2376
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/bn-alpha.pl
@@ -0,0 +1,571 @@
1#!/usr/local/bin/perl
2# I have this in perl so I can use more usefull register names and then convert
3# them into alpha registers.
4#
5
6$d=&data();
7$d =~ s/CC/0/g;
8$d =~ s/R1/1/g;
9$d =~ s/R2/2/g;
10$d =~ s/R3/3/g;
11$d =~ s/R4/4/g;
12$d =~ s/L1/5/g;
13$d =~ s/L2/6/g;
14$d =~ s/L3/7/g;
15$d =~ s/L4/8/g;
16$d =~ s/O1/22/g;
17$d =~ s/O2/23/g;
18$d =~ s/O3/24/g;
19$d =~ s/O4/25/g;
20$d =~ s/A1/20/g;
21$d =~ s/A2/21/g;
22$d =~ s/A3/27/g;
23$d =~ s/A4/28/g;
24if (0){
25}
26
27print $d;
28
29sub data
30 {
31 local($data)=<<'EOF';
32
33 # DEC Alpha assember
34 # The bn_div_words is actually gcc output but the other parts are hand done.
35 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
36 # bn_div_words.
37 # I've gone back and re-done most of routines.
38 # The key thing to remeber for the 164 CPU is that while a
39 # multiply operation takes 8 cycles, another one can only be issued
40 # after 4 cycles have elapsed. I've done modification to help
41 # improve this. Also, normally, a ld instruction will not be available
42 # for about 3 cycles.
43 .file 1 "bn_asm.c"
44 .set noat
45gcc2_compiled.:
46__gnu_compiled_c:
47 .text
48 .align 3
49 .globl bn_mul_add_words
50 .ent bn_mul_add_words
51bn_mul_add_words:
52bn_mul_add_words..ng:
53 .frame $30,0,$26,0
54 .prologue 0
55 .align 5
56 subq $18,4,$18
57 bis $31,$31,$CC
58 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
59 ldq $A1,0($17) # 1 1
60 ldq $R1,0($16) # 1 1
61 .align 3
62$42:
63 mulq $A1,$19,$L1 # 1 2 1 ######
64 ldq $A2,8($17) # 2 1
65 ldq $R2,8($16) # 2 1
66 umulh $A1,$19,$A1 # 1 2 ######
67 ldq $A3,16($17) # 3 1
68 ldq $R3,16($16) # 3 1
69 mulq $A2,$19,$L2 # 2 2 1 ######
70 ldq $A4,24($17) # 4 1
71 addq $R1,$L1,$R1 # 1 2 2
72 ldq $R4,24($16) # 4 1
73 umulh $A2,$19,$A2 # 2 2 ######
74 cmpult $R1,$L1,$O1 # 1 2 3 1
75 addq $A1,$O1,$A1 # 1 3 1
76 addq $R1,$CC,$R1 # 1 2 3 1
77 mulq $A3,$19,$L3 # 3 2 1 ######
78 cmpult $R1,$CC,$CC # 1 2 3 2
79 addq $R2,$L2,$R2 # 2 2 2
80 addq $A1,$CC,$CC # 1 3 2
81 cmpult $R2,$L2,$O2 # 2 2 3 1
82 addq $A2,$O2,$A2 # 2 3 1
83 umulh $A3,$19,$A3 # 3 2 ######
84 addq $R2,$CC,$R2 # 2 2 3 1
85 cmpult $R2,$CC,$CC # 2 2 3 2
86 subq $18,4,$18
87 mulq $A4,$19,$L4 # 4 2 1 ######
88 addq $A2,$CC,$CC # 2 3 2
89 addq $R3,$L3,$R3 # 3 2 2
90 addq $16,32,$16
91 cmpult $R3,$L3,$O3 # 3 2 3 1
92 stq $R1,-32($16) # 1 2 4
93 umulh $A4,$19,$A4 # 4 2 ######
94 addq $A3,$O3,$A3 # 3 3 1
95 addq $R3,$CC,$R3 # 3 2 3 1
96 stq $R2,-24($16) # 2 2 4
97 cmpult $R3,$CC,$CC # 3 2 3 2
98 stq $R3,-16($16) # 3 2 4
99 addq $R4,$L4,$R4 # 4 2 2
100 addq $A3,$CC,$CC # 3 3 2
101 cmpult $R4,$L4,$O4 # 4 2 3 1
102 addq $17,32,$17
103 addq $A4,$O4,$A4 # 4 3 1
104 addq $R4,$CC,$R4 # 4 2 3 1
105 cmpult $R4,$CC,$CC # 4 2 3 2
106 stq $R4,-8($16) # 4 2 4
107 addq $A4,$CC,$CC # 4 3 2
108 blt $18,$43
109
110 ldq $A1,0($17) # 1 1
111 ldq $R1,0($16) # 1 1
112
113 br $42
114
115 .align 4
116$45:
117 ldq $A1,0($17) # 4 1
118 ldq $R1,0($16) # 4 1
119 mulq $A1,$19,$L1 # 4 2 1
120 subq $18,1,$18
121 addq $16,8,$16
122 addq $17,8,$17
123 umulh $A1,$19,$A1 # 4 2
124 addq $R1,$L1,$R1 # 4 2 2
125 cmpult $R1,$L1,$O1 # 4 2 3 1
126 addq $A1,$O1,$A1 # 4 3 1
127 addq $R1,$CC,$R1 # 4 2 3 1
128 cmpult $R1,$CC,$CC # 4 2 3 2
129 addq $A1,$CC,$CC # 4 3 2
130 stq $R1,-8($16) # 4 2 4
131 bgt $18,$45
132 ret $31,($26),1 # else exit
133
134 .align 4
135$43:
136 addq $18,4,$18
137 bgt $18,$45 # goto tail code
138 ret $31,($26),1 # else exit
139
140 .end bn_mul_add_words
141 .align 3
142 .globl bn_mul_words
143 .ent bn_mul_words
144bn_mul_words:
145bn_mul_words..ng:
146 .frame $30,0,$26,0
147 .prologue 0
148 .align 5
149 subq $18,4,$18
150 bis $31,$31,$CC
151 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
152 ldq $A1,0($17) # 1 1
153 .align 3
154$142:
155
156 mulq $A1,$19,$L1 # 1 2 1 #####
157 ldq $A2,8($17) # 2 1
158 ldq $A3,16($17) # 3 1
159 umulh $A1,$19,$A1 # 1 2 #####
160 ldq $A4,24($17) # 4 1
161 mulq $A2,$19,$L2 # 2 2 1 #####
162 addq $L1,$CC,$L1 # 1 2 3 1
163 subq $18,4,$18
164 cmpult $L1,$CC,$CC # 1 2 3 2
165 umulh $A2,$19,$A2 # 2 2 #####
166 addq $A1,$CC,$CC # 1 3 2
167 addq $17,32,$17
168 addq $L2,$CC,$L2 # 2 2 3 1
169 mulq $A3,$19,$L3 # 3 2 1 #####
170 cmpult $L2,$CC,$CC # 2 2 3 2
171 addq $A2,$CC,$CC # 2 3 2
172 addq $16,32,$16
173 umulh $A3,$19,$A3 # 3 2 #####
174 stq $L1,-32($16) # 1 2 4
175 mulq $A4,$19,$L4 # 4 2 1 #####
176 addq $L3,$CC,$L3 # 3 2 3 1
177 stq $L2,-24($16) # 2 2 4
178 cmpult $L3,$CC,$CC # 3 2 3 2
179 umulh $A4,$19,$A4 # 4 2 #####
180 addq $A3,$CC,$CC # 3 3 2
181 stq $L3,-16($16) # 3 2 4
182 addq $L4,$CC,$L4 # 4 2 3 1
183 cmpult $L4,$CC,$CC # 4 2 3 2
184
185 addq $A4,$CC,$CC # 4 3 2
186
187 stq $L4,-8($16) # 4 2 4
188
189 blt $18,$143
190
191 ldq $A1,0($17) # 1 1
192
193 br $142
194
195 .align 4
196$145:
197 ldq $A1,0($17) # 4 1
198 mulq $A1,$19,$L1 # 4 2 1
199 subq $18,1,$18
200 umulh $A1,$19,$A1 # 4 2
201 addq $L1,$CC,$L1 # 4 2 3 1
202 addq $16,8,$16
203 cmpult $L1,$CC,$CC # 4 2 3 2
204 addq $17,8,$17
205 addq $A1,$CC,$CC # 4 3 2
206 stq $L1,-8($16) # 4 2 4
207
208 bgt $18,$145
209 ret $31,($26),1 # else exit
210
211 .align 4
212$143:
213 addq $18,4,$18
214 bgt $18,$145 # goto tail code
215 ret $31,($26),1 # else exit
216
217 .end bn_mul_words
218 .align 3
219 .globl bn_sqr_words
220 .ent bn_sqr_words
221bn_sqr_words:
222bn_sqr_words..ng:
223 .frame $30,0,$26,0
224 .prologue 0
225
226 subq $18,4,$18
227 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
228 ldq $A1,0($17) # 1 1
229 .align 3
230$542:
231 mulq $A1,$A1,$L1 ######
232 ldq $A2,8($17) # 1 1
233 subq $18,4
234 umulh $A1,$A1,$R1 ######
235 ldq $A3,16($17) # 1 1
236 mulq $A2,$A2,$L2 ######
237 ldq $A4,24($17) # 1 1
238 stq $L1,0($16) # r[0]
239 umulh $A2,$A2,$R2 ######
240 stq $R1,8($16) # r[1]
241 mulq $A3,$A3,$L3 ######
242 stq $L2,16($16) # r[0]
243 umulh $A3,$A3,$R3 ######
244 stq $R2,24($16) # r[1]
245 mulq $A4,$A4,$L4 ######
246 stq $L3,32($16) # r[0]
247 umulh $A4,$A4,$R4 ######
248 stq $R3,40($16) # r[1]
249
250 addq $16,64,$16
251 addq $17,32,$17
252 stq $L4,-16($16) # r[0]
253 stq $R4,-8($16) # r[1]
254
255 blt $18,$543
256 ldq $A1,0($17) # 1 1
257 br $542
258
259$442:
260 ldq $A1,0($17) # a[0]
261 mulq $A1,$A1,$L1 # a[0]*w low part r2
262 addq $16,16,$16
263 addq $17,8,$17
264 subq $18,1,$18
265 umulh $A1,$A1,$R1 # a[0]*w high part r3
266 stq $L1,-16($16) # r[0]
267 stq $R1,-8($16) # r[1]
268
269 bgt $18,$442
270 ret $31,($26),1 # else exit
271
272 .align 4
273$543:
274 addq $18,4,$18
275 bgt $18,$442 # goto tail code
276 ret $31,($26),1 # else exit
277 .end bn_sqr_words
278
279 .align 3
280 .globl bn_add_words
281 .ent bn_add_words
282bn_add_words:
283bn_add_words..ng:
284 .frame $30,0,$26,0
285 .prologue 0
286
287 subq $19,4,$19
288 bis $31,$31,$CC # carry = 0
289 blt $19,$900
290 ldq $L1,0($17) # a[0]
291 ldq $R1,0($18) # b[1]
292 .align 3
293$901:
294 addq $R1,$L1,$R1 # r=a+b;
295 ldq $L2,8($17) # a[1]
296 cmpult $R1,$L1,$O1 # did we overflow?
297 ldq $R2,8($18) # b[1]
298 addq $R1,$CC,$R1 # c+= overflow
299 ldq $L3,16($17) # a[2]
300 cmpult $R1,$CC,$CC # overflow?
301 ldq $R3,16($18) # b[2]
302 addq $CC,$O1,$CC
303 ldq $L4,24($17) # a[3]
304 addq $R2,$L2,$R2 # r=a+b;
305 ldq $R4,24($18) # b[3]
306 cmpult $R2,$L2,$O2 # did we overflow?
307 addq $R3,$L3,$R3 # r=a+b;
308 addq $R2,$CC,$R2 # c+= overflow
309 cmpult $R3,$L3,$O3 # did we overflow?
310 cmpult $R2,$CC,$CC # overflow?
311 addq $R4,$L4,$R4 # r=a+b;
312 addq $CC,$O2,$CC
313 cmpult $R4,$L4,$O4 # did we overflow?
314 addq $R3,$CC,$R3 # c+= overflow
315 stq $R1,0($16) # r[0]=c
316 cmpult $R3,$CC,$CC # overflow?
317 stq $R2,8($16) # r[1]=c
318 addq $CC,$O3,$CC
319 stq $R3,16($16) # r[2]=c
320 addq $R4,$CC,$R4 # c+= overflow
321 subq $19,4,$19 # loop--
322 cmpult $R4,$CC,$CC # overflow?
323 addq $17,32,$17 # a++
324 addq $CC,$O4,$CC
325 stq $R4,24($16) # r[3]=c
326 addq $18,32,$18 # b++
327 addq $16,32,$16 # r++
328
329 blt $19,$900
330 ldq $L1,0($17) # a[0]
331 ldq $R1,0($18) # b[1]
332 br $901
333 .align 4
334$945:
335 ldq $L1,0($17) # a[0]
336 ldq $R1,0($18) # b[1]
337 addq $R1,$L1,$R1 # r=a+b;
338 subq $19,1,$19 # loop--
339 addq $R1,$CC,$R1 # c+= overflow
340 addq $17,8,$17 # a++
341 cmpult $R1,$L1,$O1 # did we overflow?
342 cmpult $R1,$CC,$CC # overflow?
343 addq $18,8,$18 # b++
344 stq $R1,0($16) # r[0]=c
345 addq $CC,$O1,$CC
346 addq $16,8,$16 # r++
347
348 bgt $19,$945
349 ret $31,($26),1 # else exit
350
351$900:
352 addq $19,4,$19
353 bgt $19,$945 # goto tail code
354 ret $31,($26),1 # else exit
355 .end bn_add_words
356
357 .align 3
358 .globl bn_sub_words
359 .ent bn_sub_words
360bn_sub_words:
361bn_sub_words..ng:
362 .frame $30,0,$26,0
363 .prologue 0
364
365 subq $19,4,$19
366 bis $31,$31,$CC # carry = 0
367 br $800
368 blt $19,$800
369 ldq $L1,0($17) # a[0]
370 ldq $R1,0($18) # b[1]
371 .align 3
372$801:
373 addq $R1,$L1,$R1 # r=a+b;
374 ldq $L2,8($17) # a[1]
375 cmpult $R1,$L1,$O1 # did we overflow?
376 ldq $R2,8($18) # b[1]
377 addq $R1,$CC,$R1 # c+= overflow
378 ldq $L3,16($17) # a[2]
379 cmpult $R1,$CC,$CC # overflow?
380 ldq $R3,16($18) # b[2]
381 addq $CC,$O1,$CC
382 ldq $L4,24($17) # a[3]
383 addq $R2,$L2,$R2 # r=a+b;
384 ldq $R4,24($18) # b[3]
385 cmpult $R2,$L2,$O2 # did we overflow?
386 addq $R3,$L3,$R3 # r=a+b;
387 addq $R2,$CC,$R2 # c+= overflow
388 cmpult $R3,$L3,$O3 # did we overflow?
389 cmpult $R2,$CC,$CC # overflow?
390 addq $R4,$L4,$R4 # r=a+b;
391 addq $CC,$O2,$CC
392 cmpult $R4,$L4,$O4 # did we overflow?
393 addq $R3,$CC,$R3 # c+= overflow
394 stq $R1,0($16) # r[0]=c
395 cmpult $R3,$CC,$CC # overflow?
396 stq $R2,8($16) # r[1]=c
397 addq $CC,$O3,$CC
398 stq $R3,16($16) # r[2]=c
399 addq $R4,$CC,$R4 # c+= overflow
400 subq $19,4,$19 # loop--
401 cmpult $R4,$CC,$CC # overflow?
402 addq $17,32,$17 # a++
403 addq $CC,$O4,$CC
404 stq $R4,24($16) # r[3]=c
405 addq $18,32,$18 # b++
406 addq $16,32,$16 # r++
407
408 blt $19,$800
409 ldq $L1,0($17) # a[0]
410 ldq $R1,0($18) # b[1]
411 br $801
412 .align 4
413$845:
414 ldq $L1,0($17) # a[0]
415 ldq $R1,0($18) # b[1]
416 cmpult $L1,$R1,$O1 # will we borrow?
417 subq $L1,$R1,$R1 # r=a-b;
418 subq $19,1,$19 # loop--
419 cmpult $R1,$CC,$O2 # will we borrow?
420 subq $R1,$CC,$R1 # c+= overflow
421 addq $17,8,$17 # a++
422 addq $18,8,$18 # b++
423 stq $R1,0($16) # r[0]=c
424 addq $O2,$O1,$CC
425 addq $16,8,$16 # r++
426
427 bgt $19,$845
428 ret $31,($26),1 # else exit
429
430$800:
431 addq $19,4,$19
432 bgt $19,$845 # goto tail code
433 ret $31,($26),1 # else exit
434 .end bn_sub_words
435
436 #
437 # What follows was taken directly from the C compiler with a few
438 # hacks to redo the lables.
439 #
440.text
441 .align 3
442 .globl bn_div_words
443 .ent bn_div_words
444bn_div_words:
445 ldgp $29,0($27)
446bn_div_words..ng:
447 lda $30,-48($30)
448 .frame $30,48,$26,0
449 stq $26,0($30)
450 stq $9,8($30)
451 stq $10,16($30)
452 stq $11,24($30)
453 stq $12,32($30)
454 stq $13,40($30)
455 .mask 0x4003e00,-48
456 .prologue 1
457 bis $16,$16,$9
458 bis $17,$17,$10
459 bis $18,$18,$11
460 bis $31,$31,$13
461 bis $31,2,$12
462 bne $11,$119
463 lda $0,-1
464 br $31,$136
465 .align 4
466$119:
467 bis $11,$11,$16
468 jsr $26,BN_num_bits_word
469 ldgp $29,0($26)
470 subq $0,64,$1
471 beq $1,$120
472 bis $31,1,$1
473 sll $1,$0,$1
474 cmpule $9,$1,$1
475 bne $1,$120
476 # lda $16,_IO_stderr_
477 # lda $17,$C32
478 # bis $0,$0,$18
479 # jsr $26,fprintf
480 # ldgp $29,0($26)
481 jsr $26,abort
482 ldgp $29,0($26)
483 .align 4
484$120:
485 bis $31,64,$3
486 cmpult $9,$11,$2
487 subq $3,$0,$1
488 addl $1,$31,$0
489 subq $9,$11,$1
490 cmoveq $2,$1,$9
491 beq $0,$122
492 zapnot $0,15,$2
493 subq $3,$0,$1
494 sll $11,$2,$11
495 sll $9,$2,$3
496 srl $10,$1,$1
497 sll $10,$2,$10
498 bis $3,$1,$9
499$122:
500 srl $11,32,$5
501 zapnot $11,15,$6
502 lda $7,-1
503 .align 5
504$123:
505 srl $9,32,$1
506 subq $1,$5,$1
507 bne $1,$126
508 zapnot $7,15,$27
509 br $31,$127
510 .align 4
511$126:
512 bis $9,$9,$24
513 bis $5,$5,$25
514 divqu $24,$25,$27
515$127:
516 srl $10,32,$4
517 .align 5
518$128:
519 mulq $27,$5,$1
520 subq $9,$1,$3
521 zapnot $3,240,$1
522 bne $1,$129
523 mulq $6,$27,$2
524 sll $3,32,$1
525 addq $1,$4,$1
526 cmpule $2,$1,$2
527 bne $2,$129
528 subq $27,1,$27
529 br $31,$128
530 .align 4
531$129:
532 mulq $27,$6,$1
533 mulq $27,$5,$4
534 srl $1,32,$3
535 sll $1,32,$1
536 addq $4,$3,$4
537 cmpult $10,$1,$2
538 subq $10,$1,$10
539 addq $2,$4,$2
540 cmpult $9,$2,$1
541 bis $2,$2,$4
542 beq $1,$134
543 addq $9,$11,$9
544 subq $27,1,$27
545$134:
546 subl $12,1,$12
547 subq $9,$4,$9
548 beq $12,$124
549 sll $27,32,$13
550 sll $9,32,$2
551 srl $10,32,$1
552 sll $10,32,$10
553 bis $2,$1,$9
554 br $31,$123
555 .align 4
556$124:
557 bis $13,$27,$0
558$136:
559 ldq $26,0($30)
560 ldq $9,8($30)
561 ldq $10,16($30)
562 ldq $11,24($30)
563 ldq $12,32($30)
564 ldq $13,40($30)
565 addq $30,48,$30
566 ret $31,($26),1
567 .end bn_div_words
568EOF
569 return($data);
570 }
571
diff --git a/src/lib/libcrypto/bn/asm/bn-win32.asm b/src/lib/libcrypto/bn/asm/bn-win32.asm
index 017ea462b0..871bd88d77 100644
--- a/src/lib/libcrypto/bn/asm/bn-win32.asm
+++ b/src/lib/libcrypto/bn/asm/bn-win32.asm
@@ -485,9 +485,9 @@ $L010sw_end:
485_bn_sqr_words ENDP 485_bn_sqr_words ENDP
486_TEXT ENDS 486_TEXT ENDS
487_TEXT SEGMENT 487_TEXT SEGMENT
488PUBLIC _bn_div64 488PUBLIC _bn_div_words
489 489
490_bn_div64 PROC NEAR 490_bn_div_words PROC NEAR
491 push ebp 491 push ebp
492 push ebx 492 push ebx
493 push esi 493 push esi
@@ -501,7 +501,7 @@ _bn_div64 PROC NEAR
501 pop ebx 501 pop ebx
502 pop ebp 502 pop ebp
503 ret 503 ret
504_bn_div64 ENDP 504_bn_div_words ENDP
505_TEXT ENDS 505_TEXT ENDS
506_TEXT SEGMENT 506_TEXT SEGMENT
507PUBLIC _bn_add_words 507PUBLIC _bn_add_words
@@ -678,7 +678,6 @@ $L011aw_finish:
678 adc eax, 0 678 adc eax, 0
679 mov DWORD PTR 24[ebx],ecx 679 mov DWORD PTR 24[ebx],ecx
680$L013aw_end: 680$L013aw_end:
681 mov eax, eax
682 pop edi 681 pop edi
683 pop esi 682 pop esi
684 pop ebx 683 pop ebx
@@ -686,4 +685,1438 @@ $L013aw_end:
686 ret 685 ret
687_bn_add_words ENDP 686_bn_add_words ENDP
688_TEXT ENDS 687_TEXT ENDS
688_TEXT SEGMENT
689PUBLIC _bn_sub_words
690
691_bn_sub_words PROC NEAR
692 push ebp
693 push ebx
694 push esi
695 push edi
696 ;
697 mov ebx, DWORD PTR 20[esp]
698 mov esi, DWORD PTR 24[esp]
699 mov edi, DWORD PTR 28[esp]
700 mov ebp, DWORD PTR 32[esp]
701 xor eax, eax
702 and ebp, 4294967288
703 jz $L014aw_finish
704L015aw_loop:
705 ; Round 0
706 mov ecx, DWORD PTR [esi]
707 mov edx, DWORD PTR [edi]
708 sub ecx, eax
709 mov eax, 0
710 adc eax, eax
711 sub ecx, edx
712 adc eax, 0
713 mov DWORD PTR [ebx],ecx
714 ; Round 1
715 mov ecx, DWORD PTR 4[esi]
716 mov edx, DWORD PTR 4[edi]
717 sub ecx, eax
718 mov eax, 0
719 adc eax, eax
720 sub ecx, edx
721 adc eax, 0
722 mov DWORD PTR 4[ebx],ecx
723 ; Round 2
724 mov ecx, DWORD PTR 8[esi]
725 mov edx, DWORD PTR 8[edi]
726 sub ecx, eax
727 mov eax, 0
728 adc eax, eax
729 sub ecx, edx
730 adc eax, 0
731 mov DWORD PTR 8[ebx],ecx
732 ; Round 3
733 mov ecx, DWORD PTR 12[esi]
734 mov edx, DWORD PTR 12[edi]
735 sub ecx, eax
736 mov eax, 0
737 adc eax, eax
738 sub ecx, edx
739 adc eax, 0
740 mov DWORD PTR 12[ebx],ecx
741 ; Round 4
742 mov ecx, DWORD PTR 16[esi]
743 mov edx, DWORD PTR 16[edi]
744 sub ecx, eax
745 mov eax, 0
746 adc eax, eax
747 sub ecx, edx
748 adc eax, 0
749 mov DWORD PTR 16[ebx],ecx
750 ; Round 5
751 mov ecx, DWORD PTR 20[esi]
752 mov edx, DWORD PTR 20[edi]
753 sub ecx, eax
754 mov eax, 0
755 adc eax, eax
756 sub ecx, edx
757 adc eax, 0
758 mov DWORD PTR 20[ebx],ecx
759 ; Round 6
760 mov ecx, DWORD PTR 24[esi]
761 mov edx, DWORD PTR 24[edi]
762 sub ecx, eax
763 mov eax, 0
764 adc eax, eax
765 sub ecx, edx
766 adc eax, 0
767 mov DWORD PTR 24[ebx],ecx
768 ; Round 7
769 mov ecx, DWORD PTR 28[esi]
770 mov edx, DWORD PTR 28[edi]
771 sub ecx, eax
772 mov eax, 0
773 adc eax, eax
774 sub ecx, edx
775 adc eax, 0
776 mov DWORD PTR 28[ebx],ecx
777 ;
778 add esi, 32
779 add edi, 32
780 add ebx, 32
781 sub ebp, 8
782 jnz L015aw_loop
783$L014aw_finish:
784 mov ebp, DWORD PTR 32[esp]
785 and ebp, 7
786 jz $L016aw_end
787 ; Tail Round 0
788 mov ecx, DWORD PTR [esi]
789 mov edx, DWORD PTR [edi]
790 sub ecx, eax
791 mov eax, 0
792 adc eax, eax
793 sub ecx, edx
794 adc eax, 0
795 dec ebp
796 mov DWORD PTR [ebx],ecx
797 jz $L016aw_end
798 ; Tail Round 1
799 mov ecx, DWORD PTR 4[esi]
800 mov edx, DWORD PTR 4[edi]
801 sub ecx, eax
802 mov eax, 0
803 adc eax, eax
804 sub ecx, edx
805 adc eax, 0
806 dec ebp
807 mov DWORD PTR 4[ebx],ecx
808 jz $L016aw_end
809 ; Tail Round 2
810 mov ecx, DWORD PTR 8[esi]
811 mov edx, DWORD PTR 8[edi]
812 sub ecx, eax
813 mov eax, 0
814 adc eax, eax
815 sub ecx, edx
816 adc eax, 0
817 dec ebp
818 mov DWORD PTR 8[ebx],ecx
819 jz $L016aw_end
820 ; Tail Round 3
821 mov ecx, DWORD PTR 12[esi]
822 mov edx, DWORD PTR 12[edi]
823 sub ecx, eax
824 mov eax, 0
825 adc eax, eax
826 sub ecx, edx
827 adc eax, 0
828 dec ebp
829 mov DWORD PTR 12[ebx],ecx
830 jz $L016aw_end
831 ; Tail Round 4
832 mov ecx, DWORD PTR 16[esi]
833 mov edx, DWORD PTR 16[edi]
834 sub ecx, eax
835 mov eax, 0
836 adc eax, eax
837 sub ecx, edx
838 adc eax, 0
839 dec ebp
840 mov DWORD PTR 16[ebx],ecx
841 jz $L016aw_end
842 ; Tail Round 5
843 mov ecx, DWORD PTR 20[esi]
844 mov edx, DWORD PTR 20[edi]
845 sub ecx, eax
846 mov eax, 0
847 adc eax, eax
848 sub ecx, edx
849 adc eax, 0
850 dec ebp
851 mov DWORD PTR 20[ebx],ecx
852 jz $L016aw_end
853 ; Tail Round 6
854 mov ecx, DWORD PTR 24[esi]
855 mov edx, DWORD PTR 24[edi]
856 sub ecx, eax
857 mov eax, 0
858 adc eax, eax
859 sub ecx, edx
860 adc eax, 0
861 mov DWORD PTR 24[ebx],ecx
862$L016aw_end:
863 pop edi
864 pop esi
865 pop ebx
866 pop ebp
867 ret
868_bn_sub_words ENDP
869_TEXT ENDS
870_TEXT SEGMENT
871PUBLIC _bn_mul_comba8
872
873_bn_mul_comba8 PROC NEAR
874 push esi
875 mov esi, DWORD PTR 12[esp]
876 push edi
877 mov edi, DWORD PTR 20[esp]
878 push ebp
879 push ebx
880 xor ebx, ebx
881 mov eax, DWORD PTR [esi]
882 xor ecx, ecx
883 mov edx, DWORD PTR [edi]
884 ; ################## Calculate word 0
885 xor ebp, ebp
886 ; mul a[0]*b[0]
887 mul edx
888 add ebx, eax
889 mov eax, DWORD PTR 20[esp]
890 adc ecx, edx
891 mov edx, DWORD PTR [edi]
892 adc ebp, 0
893 mov DWORD PTR [eax],ebx
894 mov eax, DWORD PTR 4[esi]
895 ; saved r[0]
896 ; ################## Calculate word 1
897 xor ebx, ebx
898 ; mul a[1]*b[0]
899 mul edx
900 add ecx, eax
901 mov eax, DWORD PTR [esi]
902 adc ebp, edx
903 mov edx, DWORD PTR 4[edi]
904 adc ebx, 0
905 ; mul a[0]*b[1]
906 mul edx
907 add ecx, eax
908 mov eax, DWORD PTR 20[esp]
909 adc ebp, edx
910 mov edx, DWORD PTR [edi]
911 adc ebx, 0
912 mov DWORD PTR 4[eax],ecx
913 mov eax, DWORD PTR 8[esi]
914 ; saved r[1]
915 ; ################## Calculate word 2
916 xor ecx, ecx
917 ; mul a[2]*b[0]
918 mul edx
919 add ebp, eax
920 mov eax, DWORD PTR 4[esi]
921 adc ebx, edx
922 mov edx, DWORD PTR 4[edi]
923 adc ecx, 0
924 ; mul a[1]*b[1]
925 mul edx
926 add ebp, eax
927 mov eax, DWORD PTR [esi]
928 adc ebx, edx
929 mov edx, DWORD PTR 8[edi]
930 adc ecx, 0
931 ; mul a[0]*b[2]
932 mul edx
933 add ebp, eax
934 mov eax, DWORD PTR 20[esp]
935 adc ebx, edx
936 mov edx, DWORD PTR [edi]
937 adc ecx, 0
938 mov DWORD PTR 8[eax],ebp
939 mov eax, DWORD PTR 12[esi]
940 ; saved r[2]
941 ; ################## Calculate word 3
942 xor ebp, ebp
943 ; mul a[3]*b[0]
944 mul edx
945 add ebx, eax
946 mov eax, DWORD PTR 8[esi]
947 adc ecx, edx
948 mov edx, DWORD PTR 4[edi]
949 adc ebp, 0
950 ; mul a[2]*b[1]
951 mul edx
952 add ebx, eax
953 mov eax, DWORD PTR 4[esi]
954 adc ecx, edx
955 mov edx, DWORD PTR 8[edi]
956 adc ebp, 0
957 ; mul a[1]*b[2]
958 mul edx
959 add ebx, eax
960 mov eax, DWORD PTR [esi]
961 adc ecx, edx
962 mov edx, DWORD PTR 12[edi]
963 adc ebp, 0
964 ; mul a[0]*b[3]
965 mul edx
966 add ebx, eax
967 mov eax, DWORD PTR 20[esp]
968 adc ecx, edx
969 mov edx, DWORD PTR [edi]
970 adc ebp, 0
971 mov DWORD PTR 12[eax],ebx
972 mov eax, DWORD PTR 16[esi]
973 ; saved r[3]
974 ; ################## Calculate word 4
975 xor ebx, ebx
976 ; mul a[4]*b[0]
977 mul edx
978 add ecx, eax
979 mov eax, DWORD PTR 12[esi]
980 adc ebp, edx
981 mov edx, DWORD PTR 4[edi]
982 adc ebx, 0
983 ; mul a[3]*b[1]
984 mul edx
985 add ecx, eax
986 mov eax, DWORD PTR 8[esi]
987 adc ebp, edx
988 mov edx, DWORD PTR 8[edi]
989 adc ebx, 0
990 ; mul a[2]*b[2]
991 mul edx
992 add ecx, eax
993 mov eax, DWORD PTR 4[esi]
994 adc ebp, edx
995 mov edx, DWORD PTR 12[edi]
996 adc ebx, 0
997 ; mul a[1]*b[3]
998 mul edx
999 add ecx, eax
1000 mov eax, DWORD PTR [esi]
1001 adc ebp, edx
1002 mov edx, DWORD PTR 16[edi]
1003 adc ebx, 0
1004 ; mul a[0]*b[4]
1005 mul edx
1006 add ecx, eax
1007 mov eax, DWORD PTR 20[esp]
1008 adc ebp, edx
1009 mov edx, DWORD PTR [edi]
1010 adc ebx, 0
1011 mov DWORD PTR 16[eax],ecx
1012 mov eax, DWORD PTR 20[esi]
1013 ; saved r[4]
1014 ; ################## Calculate word 5
1015 xor ecx, ecx
1016 ; mul a[5]*b[0]
1017 mul edx
1018 add ebp, eax
1019 mov eax, DWORD PTR 16[esi]
1020 adc ebx, edx
1021 mov edx, DWORD PTR 4[edi]
1022 adc ecx, 0
1023 ; mul a[4]*b[1]
1024 mul edx
1025 add ebp, eax
1026 mov eax, DWORD PTR 12[esi]
1027 adc ebx, edx
1028 mov edx, DWORD PTR 8[edi]
1029 adc ecx, 0
1030 ; mul a[3]*b[2]
1031 mul edx
1032 add ebp, eax
1033 mov eax, DWORD PTR 8[esi]
1034 adc ebx, edx
1035 mov edx, DWORD PTR 12[edi]
1036 adc ecx, 0
1037 ; mul a[2]*b[3]
1038 mul edx
1039 add ebp, eax
1040 mov eax, DWORD PTR 4[esi]
1041 adc ebx, edx
1042 mov edx, DWORD PTR 16[edi]
1043 adc ecx, 0
1044 ; mul a[1]*b[4]
1045 mul edx
1046 add ebp, eax
1047 mov eax, DWORD PTR [esi]
1048 adc ebx, edx
1049 mov edx, DWORD PTR 20[edi]
1050 adc ecx, 0
1051 ; mul a[0]*b[5]
1052 mul edx
1053 add ebp, eax
1054 mov eax, DWORD PTR 20[esp]
1055 adc ebx, edx
1056 mov edx, DWORD PTR [edi]
1057 adc ecx, 0
1058 mov DWORD PTR 20[eax],ebp
1059 mov eax, DWORD PTR 24[esi]
1060 ; saved r[5]
1061 ; ################## Calculate word 6
1062 xor ebp, ebp
1063 ; mul a[6]*b[0]
1064 mul edx
1065 add ebx, eax
1066 mov eax, DWORD PTR 20[esi]
1067 adc ecx, edx
1068 mov edx, DWORD PTR 4[edi]
1069 adc ebp, 0
1070 ; mul a[5]*b[1]
1071 mul edx
1072 add ebx, eax
1073 mov eax, DWORD PTR 16[esi]
1074 adc ecx, edx
1075 mov edx, DWORD PTR 8[edi]
1076 adc ebp, 0
1077 ; mul a[4]*b[2]
1078 mul edx
1079 add ebx, eax
1080 mov eax, DWORD PTR 12[esi]
1081 adc ecx, edx
1082 mov edx, DWORD PTR 12[edi]
1083 adc ebp, 0
1084 ; mul a[3]*b[3]
1085 mul edx
1086 add ebx, eax
1087 mov eax, DWORD PTR 8[esi]
1088 adc ecx, edx
1089 mov edx, DWORD PTR 16[edi]
1090 adc ebp, 0
1091 ; mul a[2]*b[4]
1092 mul edx
1093 add ebx, eax
1094 mov eax, DWORD PTR 4[esi]
1095 adc ecx, edx
1096 mov edx, DWORD PTR 20[edi]
1097 adc ebp, 0
1098 ; mul a[1]*b[5]
1099 mul edx
1100 add ebx, eax
1101 mov eax, DWORD PTR [esi]
1102 adc ecx, edx
1103 mov edx, DWORD PTR 24[edi]
1104 adc ebp, 0
1105 ; mul a[0]*b[6]
1106 mul edx
1107 add ebx, eax
1108 mov eax, DWORD PTR 20[esp]
1109 adc ecx, edx
1110 mov edx, DWORD PTR [edi]
1111 adc ebp, 0
1112 mov DWORD PTR 24[eax],ebx
1113 mov eax, DWORD PTR 28[esi]
1114 ; saved r[6]
1115 ; ################## Calculate word 7
1116 xor ebx, ebx
1117 ; mul a[7]*b[0]
1118 mul edx
1119 add ecx, eax
1120 mov eax, DWORD PTR 24[esi]
1121 adc ebp, edx
1122 mov edx, DWORD PTR 4[edi]
1123 adc ebx, 0
1124 ; mul a[6]*b[1]
1125 mul edx
1126 add ecx, eax
1127 mov eax, DWORD PTR 20[esi]
1128 adc ebp, edx
1129 mov edx, DWORD PTR 8[edi]
1130 adc ebx, 0
1131 ; mul a[5]*b[2]
1132 mul edx
1133 add ecx, eax
1134 mov eax, DWORD PTR 16[esi]
1135 adc ebp, edx
1136 mov edx, DWORD PTR 12[edi]
1137 adc ebx, 0
1138 ; mul a[4]*b[3]
1139 mul edx
1140 add ecx, eax
1141 mov eax, DWORD PTR 12[esi]
1142 adc ebp, edx
1143 mov edx, DWORD PTR 16[edi]
1144 adc ebx, 0
1145 ; mul a[3]*b[4]
1146 mul edx
1147 add ecx, eax
1148 mov eax, DWORD PTR 8[esi]
1149 adc ebp, edx
1150 mov edx, DWORD PTR 20[edi]
1151 adc ebx, 0
1152 ; mul a[2]*b[5]
1153 mul edx
1154 add ecx, eax
1155 mov eax, DWORD PTR 4[esi]
1156 adc ebp, edx
1157 mov edx, DWORD PTR 24[edi]
1158 adc ebx, 0
1159 ; mul a[1]*b[6]
1160 mul edx
1161 add ecx, eax
1162 mov eax, DWORD PTR [esi]
1163 adc ebp, edx
1164 mov edx, DWORD PTR 28[edi]
1165 adc ebx, 0
1166 ; mul a[0]*b[7]
1167 mul edx
1168 add ecx, eax
1169 mov eax, DWORD PTR 20[esp]
1170 adc ebp, edx
1171 mov edx, DWORD PTR 4[edi]
1172 adc ebx, 0
1173 mov DWORD PTR 28[eax],ecx
1174 mov eax, DWORD PTR 28[esi]
1175 ; saved r[7]
1176 ; ################## Calculate word 8
1177 xor ecx, ecx
1178 ; mul a[7]*b[1]
1179 mul edx
1180 add ebp, eax
1181 mov eax, DWORD PTR 24[esi]
1182 adc ebx, edx
1183 mov edx, DWORD PTR 8[edi]
1184 adc ecx, 0
1185 ; mul a[6]*b[2]
1186 mul edx
1187 add ebp, eax
1188 mov eax, DWORD PTR 20[esi]
1189 adc ebx, edx
1190 mov edx, DWORD PTR 12[edi]
1191 adc ecx, 0
1192 ; mul a[5]*b[3]
1193 mul edx
1194 add ebp, eax
1195 mov eax, DWORD PTR 16[esi]
1196 adc ebx, edx
1197 mov edx, DWORD PTR 16[edi]
1198 adc ecx, 0
1199 ; mul a[4]*b[4]
1200 mul edx
1201 add ebp, eax
1202 mov eax, DWORD PTR 12[esi]
1203 adc ebx, edx
1204 mov edx, DWORD PTR 20[edi]
1205 adc ecx, 0
1206 ; mul a[3]*b[5]
1207 mul edx
1208 add ebp, eax
1209 mov eax, DWORD PTR 8[esi]
1210 adc ebx, edx
1211 mov edx, DWORD PTR 24[edi]
1212 adc ecx, 0
1213 ; mul a[2]*b[6]
1214 mul edx
1215 add ebp, eax
1216 mov eax, DWORD PTR 4[esi]
1217 adc ebx, edx
1218 mov edx, DWORD PTR 28[edi]
1219 adc ecx, 0
1220 ; mul a[1]*b[7]
1221 mul edx
1222 add ebp, eax
1223 mov eax, DWORD PTR 20[esp]
1224 adc ebx, edx
1225 mov edx, DWORD PTR 8[edi]
1226 adc ecx, 0
1227 mov DWORD PTR 32[eax],ebp
1228 mov eax, DWORD PTR 28[esi]
1229 ; saved r[8]
1230 ; ################## Calculate word 9
1231 xor ebp, ebp
1232 ; mul a[7]*b[2]
1233 mul edx
1234 add ebx, eax
1235 mov eax, DWORD PTR 24[esi]
1236 adc ecx, edx
1237 mov edx, DWORD PTR 12[edi]
1238 adc ebp, 0
1239 ; mul a[6]*b[3]
1240 mul edx
1241 add ebx, eax
1242 mov eax, DWORD PTR 20[esi]
1243 adc ecx, edx
1244 mov edx, DWORD PTR 16[edi]
1245 adc ebp, 0
1246 ; mul a[5]*b[4]
1247 mul edx
1248 add ebx, eax
1249 mov eax, DWORD PTR 16[esi]
1250 adc ecx, edx
1251 mov edx, DWORD PTR 20[edi]
1252 adc ebp, 0
1253 ; mul a[4]*b[5]
1254 mul edx
1255 add ebx, eax
1256 mov eax, DWORD PTR 12[esi]
1257 adc ecx, edx
1258 mov edx, DWORD PTR 24[edi]
1259 adc ebp, 0
1260 ; mul a[3]*b[6]
1261 mul edx
1262 add ebx, eax
1263 mov eax, DWORD PTR 8[esi]
1264 adc ecx, edx
1265 mov edx, DWORD PTR 28[edi]
1266 adc ebp, 0
1267 ; mul a[2]*b[7]
1268 mul edx
1269 add ebx, eax
1270 mov eax, DWORD PTR 20[esp]
1271 adc ecx, edx
1272 mov edx, DWORD PTR 12[edi]
1273 adc ebp, 0
1274 mov DWORD PTR 36[eax],ebx
1275 mov eax, DWORD PTR 28[esi]
1276 ; saved r[9]
1277 ; ################## Calculate word 10
1278 xor ebx, ebx
1279 ; mul a[7]*b[3]
1280 mul edx
1281 add ecx, eax
1282 mov eax, DWORD PTR 24[esi]
1283 adc ebp, edx
1284 mov edx, DWORD PTR 16[edi]
1285 adc ebx, 0
1286 ; mul a[6]*b[4]
1287 mul edx
1288 add ecx, eax
1289 mov eax, DWORD PTR 20[esi]
1290 adc ebp, edx
1291 mov edx, DWORD PTR 20[edi]
1292 adc ebx, 0
1293 ; mul a[5]*b[5]
1294 mul edx
1295 add ecx, eax
1296 mov eax, DWORD PTR 16[esi]
1297 adc ebp, edx
1298 mov edx, DWORD PTR 24[edi]
1299 adc ebx, 0
1300 ; mul a[4]*b[6]
1301 mul edx
1302 add ecx, eax
1303 mov eax, DWORD PTR 12[esi]
1304 adc ebp, edx
1305 mov edx, DWORD PTR 28[edi]
1306 adc ebx, 0
1307 ; mul a[3]*b[7]
1308 mul edx
1309 add ecx, eax
1310 mov eax, DWORD PTR 20[esp]
1311 adc ebp, edx
1312 mov edx, DWORD PTR 16[edi]
1313 adc ebx, 0
1314 mov DWORD PTR 40[eax],ecx
1315 mov eax, DWORD PTR 28[esi]
1316 ; saved r[10]
1317 ; ################## Calculate word 11
1318 xor ecx, ecx
1319 ; mul a[7]*b[4]
1320 mul edx
1321 add ebp, eax
1322 mov eax, DWORD PTR 24[esi]
1323 adc ebx, edx
1324 mov edx, DWORD PTR 20[edi]
1325 adc ecx, 0
1326 ; mul a[6]*b[5]
1327 mul edx
1328 add ebp, eax
1329 mov eax, DWORD PTR 20[esi]
1330 adc ebx, edx
1331 mov edx, DWORD PTR 24[edi]
1332 adc ecx, 0
1333 ; mul a[5]*b[6]
1334 mul edx
1335 add ebp, eax
1336 mov eax, DWORD PTR 16[esi]
1337 adc ebx, edx
1338 mov edx, DWORD PTR 28[edi]
1339 adc ecx, 0
1340 ; mul a[4]*b[7]
1341 mul edx
1342 add ebp, eax
1343 mov eax, DWORD PTR 20[esp]
1344 adc ebx, edx
1345 mov edx, DWORD PTR 20[edi]
1346 adc ecx, 0
1347 mov DWORD PTR 44[eax],ebp
1348 mov eax, DWORD PTR 28[esi]
1349 ; saved r[11]
1350 ; ################## Calculate word 12
1351 xor ebp, ebp
1352 ; mul a[7]*b[5]
1353 mul edx
1354 add ebx, eax
1355 mov eax, DWORD PTR 24[esi]
1356 adc ecx, edx
1357 mov edx, DWORD PTR 24[edi]
1358 adc ebp, 0
1359 ; mul a[6]*b[6]
1360 mul edx
1361 add ebx, eax
1362 mov eax, DWORD PTR 20[esi]
1363 adc ecx, edx
1364 mov edx, DWORD PTR 28[edi]
1365 adc ebp, 0
1366 ; mul a[5]*b[7]
1367 mul edx
1368 add ebx, eax
1369 mov eax, DWORD PTR 20[esp]
1370 adc ecx, edx
1371 mov edx, DWORD PTR 24[edi]
1372 adc ebp, 0
1373 mov DWORD PTR 48[eax],ebx
1374 mov eax, DWORD PTR 28[esi]
1375 ; saved r[12]
1376 ; ################## Calculate word 13
1377 xor ebx, ebx
1378 ; mul a[7]*b[6]
1379 mul edx
1380 add ecx, eax
1381 mov eax, DWORD PTR 24[esi]
1382 adc ebp, edx
1383 mov edx, DWORD PTR 28[edi]
1384 adc ebx, 0
1385 ; mul a[6]*b[7]
1386 mul edx
1387 add ecx, eax
1388 mov eax, DWORD PTR 20[esp]
1389 adc ebp, edx
1390 mov edx, DWORD PTR 28[edi]
1391 adc ebx, 0
1392 mov DWORD PTR 52[eax],ecx
1393 mov eax, DWORD PTR 28[esi]
1394 ; saved r[13]
1395 ; ################## Calculate word 14
1396 xor ecx, ecx
1397 ; mul a[7]*b[7]
1398 mul edx
1399 add ebp, eax
1400 mov eax, DWORD PTR 20[esp]
1401 adc ebx, edx
1402 adc ecx, 0
1403 mov DWORD PTR 56[eax],ebp
1404 ; saved r[14]
1405 ; save r[15]
1406 mov DWORD PTR 60[eax],ebx
1407 pop ebx
1408 pop ebp
1409 pop edi
1410 pop esi
1411 ret
1412_bn_mul_comba8 ENDP
1413_TEXT ENDS
1414_TEXT SEGMENT
1415PUBLIC _bn_mul_comba4
1416
1417_bn_mul_comba4 PROC NEAR
1418 push esi
1419 mov esi, DWORD PTR 12[esp]
1420 push edi
1421 mov edi, DWORD PTR 20[esp]
1422 push ebp
1423 push ebx
1424 xor ebx, ebx
1425 mov eax, DWORD PTR [esi]
1426 xor ecx, ecx
1427 mov edx, DWORD PTR [edi]
1428 ; ################## Calculate word 0
1429 xor ebp, ebp
1430 ; mul a[0]*b[0]
1431 mul edx
1432 add ebx, eax
1433 mov eax, DWORD PTR 20[esp]
1434 adc ecx, edx
1435 mov edx, DWORD PTR [edi]
1436 adc ebp, 0
1437 mov DWORD PTR [eax],ebx
1438 mov eax, DWORD PTR 4[esi]
1439 ; saved r[0]
1440 ; ################## Calculate word 1
1441 xor ebx, ebx
1442 ; mul a[1]*b[0]
1443 mul edx
1444 add ecx, eax
1445 mov eax, DWORD PTR [esi]
1446 adc ebp, edx
1447 mov edx, DWORD PTR 4[edi]
1448 adc ebx, 0
1449 ; mul a[0]*b[1]
1450 mul edx
1451 add ecx, eax
1452 mov eax, DWORD PTR 20[esp]
1453 adc ebp, edx
1454 mov edx, DWORD PTR [edi]
1455 adc ebx, 0
1456 mov DWORD PTR 4[eax],ecx
1457 mov eax, DWORD PTR 8[esi]
1458 ; saved r[1]
1459 ; ################## Calculate word 2
1460 xor ecx, ecx
1461 ; mul a[2]*b[0]
1462 mul edx
1463 add ebp, eax
1464 mov eax, DWORD PTR 4[esi]
1465 adc ebx, edx
1466 mov edx, DWORD PTR 4[edi]
1467 adc ecx, 0
1468 ; mul a[1]*b[1]
1469 mul edx
1470 add ebp, eax
1471 mov eax, DWORD PTR [esi]
1472 adc ebx, edx
1473 mov edx, DWORD PTR 8[edi]
1474 adc ecx, 0
1475 ; mul a[0]*b[2]
1476 mul edx
1477 add ebp, eax
1478 mov eax, DWORD PTR 20[esp]
1479 adc ebx, edx
1480 mov edx, DWORD PTR [edi]
1481 adc ecx, 0
1482 mov DWORD PTR 8[eax],ebp
1483 mov eax, DWORD PTR 12[esi]
1484 ; saved r[2]
1485 ; ################## Calculate word 3
1486 xor ebp, ebp
1487 ; mul a[3]*b[0]
1488 mul edx
1489 add ebx, eax
1490 mov eax, DWORD PTR 8[esi]
1491 adc ecx, edx
1492 mov edx, DWORD PTR 4[edi]
1493 adc ebp, 0
1494 ; mul a[2]*b[1]
1495 mul edx
1496 add ebx, eax
1497 mov eax, DWORD PTR 4[esi]
1498 adc ecx, edx
1499 mov edx, DWORD PTR 8[edi]
1500 adc ebp, 0
1501 ; mul a[1]*b[2]
1502 mul edx
1503 add ebx, eax
1504 mov eax, DWORD PTR [esi]
1505 adc ecx, edx
1506 mov edx, DWORD PTR 12[edi]
1507 adc ebp, 0
1508 ; mul a[0]*b[3]
1509 mul edx
1510 add ebx, eax
1511 mov eax, DWORD PTR 20[esp]
1512 adc ecx, edx
1513 mov edx, DWORD PTR 4[edi]
1514 adc ebp, 0
1515 mov DWORD PTR 12[eax],ebx
1516 mov eax, DWORD PTR 12[esi]
1517 ; saved r[3]
1518 ; ################## Calculate word 4
1519 xor ebx, ebx
1520 ; mul a[3]*b[1]
1521 mul edx
1522 add ecx, eax
1523 mov eax, DWORD PTR 8[esi]
1524 adc ebp, edx
1525 mov edx, DWORD PTR 8[edi]
1526 adc ebx, 0
1527 ; mul a[2]*b[2]
1528 mul edx
1529 add ecx, eax
1530 mov eax, DWORD PTR 4[esi]
1531 adc ebp, edx
1532 mov edx, DWORD PTR 12[edi]
1533 adc ebx, 0
1534 ; mul a[1]*b[3]
1535 mul edx
1536 add ecx, eax
1537 mov eax, DWORD PTR 20[esp]
1538 adc ebp, edx
1539 mov edx, DWORD PTR 8[edi]
1540 adc ebx, 0
1541 mov DWORD PTR 16[eax],ecx
1542 mov eax, DWORD PTR 12[esi]
1543 ; saved r[4]
1544 ; ################## Calculate word 5
1545 xor ecx, ecx
1546 ; mul a[3]*b[2]
1547 mul edx
1548 add ebp, eax
1549 mov eax, DWORD PTR 8[esi]
1550 adc ebx, edx
1551 mov edx, DWORD PTR 12[edi]
1552 adc ecx, 0
1553 ; mul a[2]*b[3]
1554 mul edx
1555 add ebp, eax
1556 mov eax, DWORD PTR 20[esp]
1557 adc ebx, edx
1558 mov edx, DWORD PTR 12[edi]
1559 adc ecx, 0
1560 mov DWORD PTR 20[eax],ebp
1561 mov eax, DWORD PTR 12[esi]
1562 ; saved r[5]
1563 ; ################## Calculate word 6
1564 xor ebp, ebp
1565 ; mul a[3]*b[3]
1566 mul edx
1567 add ebx, eax
1568 mov eax, DWORD PTR 20[esp]
1569 adc ecx, edx
1570 adc ebp, 0
1571 mov DWORD PTR 24[eax],ebx
1572 ; saved r[6]
1573 ; save r[7]
1574 mov DWORD PTR 28[eax],ecx
1575 pop ebx
1576 pop ebp
1577 pop edi
1578 pop esi
1579 ret
1580_bn_mul_comba4 ENDP
1581_TEXT ENDS
1582_TEXT SEGMENT
1583PUBLIC _bn_sqr_comba8
1584
1585_bn_sqr_comba8 PROC NEAR
1586 push esi
1587 push edi
1588 push ebp
1589 push ebx
1590 mov edi, DWORD PTR 20[esp]
1591 mov esi, DWORD PTR 24[esp]
1592 xor ebx, ebx
1593 xor ecx, ecx
1594 mov eax, DWORD PTR [esi]
1595 ; ############### Calculate word 0
1596 xor ebp, ebp
1597 ; sqr a[0]*a[0]
1598 mul eax
1599 add ebx, eax
1600 adc ecx, edx
1601 mov edx, DWORD PTR [esi]
1602 adc ebp, 0
1603 mov DWORD PTR [edi],ebx
1604 mov eax, DWORD PTR 4[esi]
1605 ; saved r[0]
1606 ; ############### Calculate word 1
1607 xor ebx, ebx
1608 ; sqr a[1]*a[0]
1609 mul edx
1610 add eax, eax
1611 adc edx, edx
1612 adc ebx, 0
1613 add ecx, eax
1614 adc ebp, edx
1615 mov eax, DWORD PTR 8[esi]
1616 adc ebx, 0
1617 mov DWORD PTR 4[edi],ecx
1618 mov edx, DWORD PTR [esi]
1619 ; saved r[1]
1620 ; ############### Calculate word 2
1621 xor ecx, ecx
1622 ; sqr a[2]*a[0]
1623 mul edx
1624 add eax, eax
1625 adc edx, edx
1626 adc ecx, 0
1627 add ebp, eax
1628 adc ebx, edx
1629 mov eax, DWORD PTR 4[esi]
1630 adc ecx, 0
1631 ; sqr a[1]*a[1]
1632 mul eax
1633 add ebp, eax
1634 adc ebx, edx
1635 mov edx, DWORD PTR [esi]
1636 adc ecx, 0
1637 mov DWORD PTR 8[edi],ebp
1638 mov eax, DWORD PTR 12[esi]
1639 ; saved r[2]
1640 ; ############### Calculate word 3
1641 xor ebp, ebp
1642 ; sqr a[3]*a[0]
1643 mul edx
1644 add eax, eax
1645 adc edx, edx
1646 adc ebp, 0
1647 add ebx, eax
1648 adc ecx, edx
1649 mov eax, DWORD PTR 8[esi]
1650 adc ebp, 0
1651 mov edx, DWORD PTR 4[esi]
1652 ; sqr a[2]*a[1]
1653 mul edx
1654 add eax, eax
1655 adc edx, edx
1656 adc ebp, 0
1657 add ebx, eax
1658 adc ecx, edx
1659 mov eax, DWORD PTR 16[esi]
1660 adc ebp, 0
1661 mov DWORD PTR 12[edi],ebx
1662 mov edx, DWORD PTR [esi]
1663 ; saved r[3]
1664 ; ############### Calculate word 4
1665 xor ebx, ebx
1666 ; sqr a[4]*a[0]
1667 mul edx
1668 add eax, eax
1669 adc edx, edx
1670 adc ebx, 0
1671 add ecx, eax
1672 adc ebp, edx
1673 mov eax, DWORD PTR 12[esi]
1674 adc ebx, 0
1675 mov edx, DWORD PTR 4[esi]
1676 ; sqr a[3]*a[1]
1677 mul edx
1678 add eax, eax
1679 adc edx, edx
1680 adc ebx, 0
1681 add ecx, eax
1682 adc ebp, edx
1683 mov eax, DWORD PTR 8[esi]
1684 adc ebx, 0
1685 ; sqr a[2]*a[2]
1686 mul eax
1687 add ecx, eax
1688 adc ebp, edx
1689 mov edx, DWORD PTR [esi]
1690 adc ebx, 0
1691 mov DWORD PTR 16[edi],ecx
1692 mov eax, DWORD PTR 20[esi]
1693 ; saved r[4]
1694 ; ############### Calculate word 5
1695 xor ecx, ecx
1696 ; sqr a[5]*a[0]
1697 mul edx
1698 add eax, eax
1699 adc edx, edx
1700 adc ecx, 0
1701 add ebp, eax
1702 adc ebx, edx
1703 mov eax, DWORD PTR 16[esi]
1704 adc ecx, 0
1705 mov edx, DWORD PTR 4[esi]
1706 ; sqr a[4]*a[1]
1707 mul edx
1708 add eax, eax
1709 adc edx, edx
1710 adc ecx, 0
1711 add ebp, eax
1712 adc ebx, edx
1713 mov eax, DWORD PTR 12[esi]
1714 adc ecx, 0
1715 mov edx, DWORD PTR 8[esi]
1716 ; sqr a[3]*a[2]
1717 mul edx
1718 add eax, eax
1719 adc edx, edx
1720 adc ecx, 0
1721 add ebp, eax
1722 adc ebx, edx
1723 mov eax, DWORD PTR 24[esi]
1724 adc ecx, 0
1725 mov DWORD PTR 20[edi],ebp
1726 mov edx, DWORD PTR [esi]
1727 ; saved r[5]
1728 ; ############### Calculate word 6
1729 xor ebp, ebp
1730 ; sqr a[6]*a[0]
1731 mul edx
1732 add eax, eax
1733 adc edx, edx
1734 adc ebp, 0
1735 add ebx, eax
1736 adc ecx, edx
1737 mov eax, DWORD PTR 20[esi]
1738 adc ebp, 0
1739 mov edx, DWORD PTR 4[esi]
1740 ; sqr a[5]*a[1]
1741 mul edx
1742 add eax, eax
1743 adc edx, edx
1744 adc ebp, 0
1745 add ebx, eax
1746 adc ecx, edx
1747 mov eax, DWORD PTR 16[esi]
1748 adc ebp, 0
1749 mov edx, DWORD PTR 8[esi]
1750 ; sqr a[4]*a[2]
1751 mul edx
1752 add eax, eax
1753 adc edx, edx
1754 adc ebp, 0
1755 add ebx, eax
1756 adc ecx, edx
1757 mov eax, DWORD PTR 12[esi]
1758 adc ebp, 0
1759 ; sqr a[3]*a[3]
1760 mul eax
1761 add ebx, eax
1762 adc ecx, edx
1763 mov edx, DWORD PTR [esi]
1764 adc ebp, 0
1765 mov DWORD PTR 24[edi],ebx
1766 mov eax, DWORD PTR 28[esi]
1767 ; saved r[6]
1768 ; ############### Calculate word 7
1769 xor ebx, ebx
1770 ; sqr a[7]*a[0]
1771 mul edx
1772 add eax, eax
1773 adc edx, edx
1774 adc ebx, 0
1775 add ecx, eax
1776 adc ebp, edx
1777 mov eax, DWORD PTR 24[esi]
1778 adc ebx, 0
1779 mov edx, DWORD PTR 4[esi]
1780 ; sqr a[6]*a[1]
1781 mul edx
1782 add eax, eax
1783 adc edx, edx
1784 adc ebx, 0
1785 add ecx, eax
1786 adc ebp, edx
1787 mov eax, DWORD PTR 20[esi]
1788 adc ebx, 0
1789 mov edx, DWORD PTR 8[esi]
1790 ; sqr a[5]*a[2]
1791 mul edx
1792 add eax, eax
1793 adc edx, edx
1794 adc ebx, 0
1795 add ecx, eax
1796 adc ebp, edx
1797 mov eax, DWORD PTR 16[esi]
1798 adc ebx, 0
1799 mov edx, DWORD PTR 12[esi]
1800 ; sqr a[4]*a[3]
1801 mul edx
1802 add eax, eax
1803 adc edx, edx
1804 adc ebx, 0
1805 add ecx, eax
1806 adc ebp, edx
1807 mov eax, DWORD PTR 28[esi]
1808 adc ebx, 0
1809 mov DWORD PTR 28[edi],ecx
1810 mov edx, DWORD PTR 4[esi]
1811 ; saved r[7]
1812 ; ############### Calculate word 8
1813 xor ecx, ecx
1814 ; sqr a[7]*a[1]
1815 mul edx
1816 add eax, eax
1817 adc edx, edx
1818 adc ecx, 0
1819 add ebp, eax
1820 adc ebx, edx
1821 mov eax, DWORD PTR 24[esi]
1822 adc ecx, 0
1823 mov edx, DWORD PTR 8[esi]
1824 ; sqr a[6]*a[2]
1825 mul edx
1826 add eax, eax
1827 adc edx, edx
1828 adc ecx, 0
1829 add ebp, eax
1830 adc ebx, edx
1831 mov eax, DWORD PTR 20[esi]
1832 adc ecx, 0
1833 mov edx, DWORD PTR 12[esi]
1834 ; sqr a[5]*a[3]
1835 mul edx
1836 add eax, eax
1837 adc edx, edx
1838 adc ecx, 0
1839 add ebp, eax
1840 adc ebx, edx
1841 mov eax, DWORD PTR 16[esi]
1842 adc ecx, 0
1843 ; sqr a[4]*a[4]
1844 mul eax
1845 add ebp, eax
1846 adc ebx, edx
1847 mov edx, DWORD PTR 8[esi]
1848 adc ecx, 0
1849 mov DWORD PTR 32[edi],ebp
1850 mov eax, DWORD PTR 28[esi]
1851 ; saved r[8]
1852 ; ############### Calculate word 9
1853 xor ebp, ebp
1854 ; sqr a[7]*a[2]
1855 mul edx
1856 add eax, eax
1857 adc edx, edx
1858 adc ebp, 0
1859 add ebx, eax
1860 adc ecx, edx
1861 mov eax, DWORD PTR 24[esi]
1862 adc ebp, 0
1863 mov edx, DWORD PTR 12[esi]
1864 ; sqr a[6]*a[3]
1865 mul edx
1866 add eax, eax
1867 adc edx, edx
1868 adc ebp, 0
1869 add ebx, eax
1870 adc ecx, edx
1871 mov eax, DWORD PTR 20[esi]
1872 adc ebp, 0
1873 mov edx, DWORD PTR 16[esi]
1874 ; sqr a[5]*a[4]
1875 mul edx
1876 add eax, eax
1877 adc edx, edx
1878 adc ebp, 0
1879 add ebx, eax
1880 adc ecx, edx
1881 mov eax, DWORD PTR 28[esi]
1882 adc ebp, 0
1883 mov DWORD PTR 36[edi],ebx
1884 mov edx, DWORD PTR 12[esi]
1885 ; saved r[9]
1886 ; ############### Calculate word 10
1887 xor ebx, ebx
1888 ; sqr a[7]*a[3]
1889 mul edx
1890 add eax, eax
1891 adc edx, edx
1892 adc ebx, 0
1893 add ecx, eax
1894 adc ebp, edx
1895 mov eax, DWORD PTR 24[esi]
1896 adc ebx, 0
1897 mov edx, DWORD PTR 16[esi]
1898 ; sqr a[6]*a[4]
1899 mul edx
1900 add eax, eax
1901 adc edx, edx
1902 adc ebx, 0
1903 add ecx, eax
1904 adc ebp, edx
1905 mov eax, DWORD PTR 20[esi]
1906 adc ebx, 0
1907 ; sqr a[5]*a[5]
1908 mul eax
1909 add ecx, eax
1910 adc ebp, edx
1911 mov edx, DWORD PTR 16[esi]
1912 adc ebx, 0
1913 mov DWORD PTR 40[edi],ecx
1914 mov eax, DWORD PTR 28[esi]
1915 ; saved r[10]
1916 ; ############### Calculate word 11
1917 xor ecx, ecx
1918 ; sqr a[7]*a[4]
1919 mul edx
1920 add eax, eax
1921 adc edx, edx
1922 adc ecx, 0
1923 add ebp, eax
1924 adc ebx, edx
1925 mov eax, DWORD PTR 24[esi]
1926 adc ecx, 0
1927 mov edx, DWORD PTR 20[esi]
1928 ; sqr a[6]*a[5]
1929 mul edx
1930 add eax, eax
1931 adc edx, edx
1932 adc ecx, 0
1933 add ebp, eax
1934 adc ebx, edx
1935 mov eax, DWORD PTR 28[esi]
1936 adc ecx, 0
1937 mov DWORD PTR 44[edi],ebp
1938 mov edx, DWORD PTR 20[esi]
1939 ; saved r[11]
1940 ; ############### Calculate word 12
1941 xor ebp, ebp
1942 ; sqr a[7]*a[5]
1943 mul edx
1944 add eax, eax
1945 adc edx, edx
1946 adc ebp, 0
1947 add ebx, eax
1948 adc ecx, edx
1949 mov eax, DWORD PTR 24[esi]
1950 adc ebp, 0
1951 ; sqr a[6]*a[6]
1952 mul eax
1953 add ebx, eax
1954 adc ecx, edx
1955 mov edx, DWORD PTR 24[esi]
1956 adc ebp, 0
1957 mov DWORD PTR 48[edi],ebx
1958 mov eax, DWORD PTR 28[esi]
1959 ; saved r[12]
1960 ; ############### Calculate word 13
1961 xor ebx, ebx
1962 ; sqr a[7]*a[6]
1963 mul edx
1964 add eax, eax
1965 adc edx, edx
1966 adc ebx, 0
1967 add ecx, eax
1968 adc ebp, edx
1969 mov eax, DWORD PTR 28[esi]
1970 adc ebx, 0
1971 mov DWORD PTR 52[edi],ecx
1972 ; saved r[13]
1973 ; ############### Calculate word 14
1974 xor ecx, ecx
1975 ; sqr a[7]*a[7]
1976 mul eax
1977 add ebp, eax
1978 adc ebx, edx
1979 adc ecx, 0
1980 mov DWORD PTR 56[edi],ebp
1981 ; saved r[14]
1982 mov DWORD PTR 60[edi],ebx
1983 pop ebx
1984 pop ebp
1985 pop edi
1986 pop esi
1987 ret
1988_bn_sqr_comba8 ENDP
1989_TEXT ENDS
1990_TEXT SEGMENT
1991PUBLIC _bn_sqr_comba4
1992
1993_bn_sqr_comba4 PROC NEAR
1994 push esi
1995 push edi
1996 push ebp
1997 push ebx
1998 mov edi, DWORD PTR 20[esp]
1999 mov esi, DWORD PTR 24[esp]
2000 xor ebx, ebx
2001 xor ecx, ecx
2002 mov eax, DWORD PTR [esi]
2003 ; ############### Calculate word 0
2004 xor ebp, ebp
2005 ; sqr a[0]*a[0]
2006 mul eax
2007 add ebx, eax
2008 adc ecx, edx
2009 mov edx, DWORD PTR [esi]
2010 adc ebp, 0
2011 mov DWORD PTR [edi],ebx
2012 mov eax, DWORD PTR 4[esi]
2013 ; saved r[0]
2014 ; ############### Calculate word 1
2015 xor ebx, ebx
2016 ; sqr a[1]*a[0]
2017 mul edx
2018 add eax, eax
2019 adc edx, edx
2020 adc ebx, 0
2021 add ecx, eax
2022 adc ebp, edx
2023 mov eax, DWORD PTR 8[esi]
2024 adc ebx, 0
2025 mov DWORD PTR 4[edi],ecx
2026 mov edx, DWORD PTR [esi]
2027 ; saved r[1]
2028 ; ############### Calculate word 2
2029 xor ecx, ecx
2030 ; sqr a[2]*a[0]
2031 mul edx
2032 add eax, eax
2033 adc edx, edx
2034 adc ecx, 0
2035 add ebp, eax
2036 adc ebx, edx
2037 mov eax, DWORD PTR 4[esi]
2038 adc ecx, 0
2039 ; sqr a[1]*a[1]
2040 mul eax
2041 add ebp, eax
2042 adc ebx, edx
2043 mov edx, DWORD PTR [esi]
2044 adc ecx, 0
2045 mov DWORD PTR 8[edi],ebp
2046 mov eax, DWORD PTR 12[esi]
2047 ; saved r[2]
2048 ; ############### Calculate word 3
2049 xor ebp, ebp
2050 ; sqr a[3]*a[0]
2051 mul edx
2052 add eax, eax
2053 adc edx, edx
2054 adc ebp, 0
2055 add ebx, eax
2056 adc ecx, edx
2057 mov eax, DWORD PTR 8[esi]
2058 adc ebp, 0
2059 mov edx, DWORD PTR 4[esi]
2060 ; sqr a[2]*a[1]
2061 mul edx
2062 add eax, eax
2063 adc edx, edx
2064 adc ebp, 0
2065 add ebx, eax
2066 adc ecx, edx
2067 mov eax, DWORD PTR 12[esi]
2068 adc ebp, 0
2069 mov DWORD PTR 12[edi],ebx
2070 mov edx, DWORD PTR 4[esi]
2071 ; saved r[3]
2072 ; ############### Calculate word 4
2073 xor ebx, ebx
2074 ; sqr a[3]*a[1]
2075 mul edx
2076 add eax, eax
2077 adc edx, edx
2078 adc ebx, 0
2079 add ecx, eax
2080 adc ebp, edx
2081 mov eax, DWORD PTR 8[esi]
2082 adc ebx, 0
2083 ; sqr a[2]*a[2]
2084 mul eax
2085 add ecx, eax
2086 adc ebp, edx
2087 mov edx, DWORD PTR 8[esi]
2088 adc ebx, 0
2089 mov DWORD PTR 16[edi],ecx
2090 mov eax, DWORD PTR 12[esi]
2091 ; saved r[4]
2092 ; ############### Calculate word 5
2093 xor ecx, ecx
2094 ; sqr a[3]*a[2]
2095 mul edx
2096 add eax, eax
2097 adc edx, edx
2098 adc ecx, 0
2099 add ebp, eax
2100 adc ebx, edx
2101 mov eax, DWORD PTR 12[esi]
2102 adc ecx, 0
2103 mov DWORD PTR 20[edi],ebp
2104 ; saved r[5]
2105 ; ############### Calculate word 6
2106 xor ebp, ebp
2107 ; sqr a[3]*a[3]
2108 mul eax
2109 add ebx, eax
2110 adc ecx, edx
2111 adc ebp, 0
2112 mov DWORD PTR 24[edi],ebx
2113 ; saved r[6]
2114 mov DWORD PTR 28[edi],ecx
2115 pop ebx
2116 pop ebp
2117 pop edi
2118 pop esi
2119 ret
2120_bn_sqr_comba4 ENDP
2121_TEXT ENDS
689END 2122END
diff --git a/src/lib/libcrypto/bn/asm/bn86unix.cpp b/src/lib/libcrypto/bn/asm/bn86unix.cpp
deleted file mode 100644
index 64702201ea..0000000000
--- a/src/lib/libcrypto/bn/asm/bn86unix.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
1/* Run the C pre-processor over this file with one of the following defined
2 * ELF - elf object files,
3 * OUT - a.out object files,
4 * BSDI - BSDI style a.out object files
5 * SOL - Solaris style elf
6 */
7
8#define TYPE(a,b) .type a,b
9#define SIZE(a,b) .size a,b
10
11#if defined(OUT) || defined(BSDI)
12#define bn_mul_add_words _bn_mul_add_words
13#define bn_mul_words _bn_mul_words
14#define bn_sqr_words _bn_sqr_words
15#define bn_div64 _bn_div64
16#define bn_add_words _bn_add_words
17
18#endif
19
20#ifdef OUT
21#define OK 1
22#define ALIGN 4
23#endif
24
25#ifdef BSDI
26#define OK 1
27#define ALIGN 4
28#undef SIZE
29#undef TYPE
30#define SIZE(a,b)
31#define TYPE(a,b)
32#endif
33
34#if defined(ELF) || defined(SOL)
35#define OK 1
36#define ALIGN 16
37#endif
38
39#ifndef OK
40You need to define one of
41ELF - elf systems - linux-elf, NetBSD and DG-UX
42OUT - a.out systems - linux-a.out and FreeBSD
43SOL - solaris systems, which are elf with strange comment lines
44BSDI - a.out with a very primative version of as.
45#endif
46
47/* Let the Assembler begin :-) */
48 /* Don't even think of reading this code */
49 /* It was automatically generated by bn-586.pl */
50 /* Which is a perl program used to generate the x86 assember for */
51 /* any of elf, a.out, BSDI,Win32, or Solaris */
52 /* eric <eay@cryptsoft.com> */
53
54 .file "bn-586.s"
55 .version "01.01"
56gcc2_compiled.:
57.text
58 .align ALIGN
59.globl bn_mul_add_words
60 TYPE(bn_mul_add_words,@function)
61bn_mul_add_words:
62 pushl %ebp
63 pushl %ebx
64 pushl %esi
65 pushl %edi
66
67
68 xorl %esi, %esi
69 movl 20(%esp), %edi
70 movl 28(%esp), %ecx
71 movl 24(%esp), %ebx
72 andl $4294967288, %ecx
73 movl 32(%esp), %ebp
74 pushl %ecx
75 jz .L000maw_finish
76.L001maw_loop:
77 movl %ecx, (%esp)
78 /* Round 0 */
79 movl (%ebx), %eax
80 mull %ebp
81 addl %esi, %eax
82 movl (%edi), %esi
83 adcl $0, %edx
84 addl %esi, %eax
85 adcl $0, %edx
86 movl %eax, (%edi)
87 movl %edx, %esi
88 /* Round 4 */
89 movl 4(%ebx), %eax
90 mull %ebp
91 addl %esi, %eax
92 movl 4(%edi), %esi
93 adcl $0, %edx
94 addl %esi, %eax
95 adcl $0, %edx
96 movl %eax, 4(%edi)
97 movl %edx, %esi
98 /* Round 8 */
99 movl 8(%ebx), %eax
100 mull %ebp
101 addl %esi, %eax
102 movl 8(%edi), %esi
103 adcl $0, %edx
104 addl %esi, %eax
105 adcl $0, %edx
106 movl %eax, 8(%edi)
107 movl %edx, %esi
108 /* Round 12 */
109 movl 12(%ebx), %eax
110 mull %ebp
111 addl %esi, %eax
112 movl 12(%edi), %esi
113 adcl $0, %edx
114 addl %esi, %eax
115 adcl $0, %edx
116 movl %eax, 12(%edi)
117 movl %edx, %esi
118 /* Round 16 */
119 movl 16(%ebx), %eax
120 mull %ebp
121 addl %esi, %eax
122 movl 16(%edi), %esi
123 adcl $0, %edx
124 addl %esi, %eax
125 adcl $0, %edx
126 movl %eax, 16(%edi)
127 movl %edx, %esi
128 /* Round 20 */
129 movl 20(%ebx), %eax
130 mull %ebp
131 addl %esi, %eax
132 movl 20(%edi), %esi
133 adcl $0, %edx
134 addl %esi, %eax
135 adcl $0, %edx
136 movl %eax, 20(%edi)
137 movl %edx, %esi
138 /* Round 24 */
139 movl 24(%ebx), %eax
140 mull %ebp
141 addl %esi, %eax
142 movl 24(%edi), %esi
143 adcl $0, %edx
144 addl %esi, %eax
145 adcl $0, %edx
146 movl %eax, 24(%edi)
147 movl %edx, %esi
148 /* Round 28 */
149 movl 28(%ebx), %eax
150 mull %ebp
151 addl %esi, %eax
152 movl 28(%edi), %esi
153 adcl $0, %edx
154 addl %esi, %eax
155 adcl $0, %edx
156 movl %eax, 28(%edi)
157 movl %edx, %esi
158
159 movl (%esp), %ecx
160 addl $32, %ebx
161 addl $32, %edi
162 subl $8, %ecx
163 jnz .L001maw_loop
164.L000maw_finish:
165 movl 32(%esp), %ecx
166 andl $7, %ecx
167 jnz .L002maw_finish2
168 jmp .L003maw_end
169.align ALIGN
170.L002maw_finish2:
171 /* Tail Round 0 */
172 movl (%ebx), %eax
173 mull %ebp
174 addl %esi, %eax
175 movl (%edi), %esi
176 adcl $0, %edx
177 addl %esi, %eax
178 adcl $0, %edx
179 decl %ecx
180 movl %eax, (%edi)
181 movl %edx, %esi
182 jz .L003maw_end
183 /* Tail Round 1 */
184 movl 4(%ebx), %eax
185 mull %ebp
186 addl %esi, %eax
187 movl 4(%edi), %esi
188 adcl $0, %edx
189 addl %esi, %eax
190 adcl $0, %edx
191 decl %ecx
192 movl %eax, 4(%edi)
193 movl %edx, %esi
194 jz .L003maw_end
195 /* Tail Round 2 */
196 movl 8(%ebx), %eax
197 mull %ebp
198 addl %esi, %eax
199 movl 8(%edi), %esi
200 adcl $0, %edx
201 addl %esi, %eax
202 adcl $0, %edx
203 decl %ecx
204 movl %eax, 8(%edi)
205 movl %edx, %esi
206 jz .L003maw_end
207 /* Tail Round 3 */
208 movl 12(%ebx), %eax
209 mull %ebp
210 addl %esi, %eax
211 movl 12(%edi), %esi
212 adcl $0, %edx
213 addl %esi, %eax
214 adcl $0, %edx
215 decl %ecx
216 movl %eax, 12(%edi)
217 movl %edx, %esi
218 jz .L003maw_end
219 /* Tail Round 4 */
220 movl 16(%ebx), %eax
221 mull %ebp
222 addl %esi, %eax
223 movl 16(%edi), %esi
224 adcl $0, %edx
225 addl %esi, %eax
226 adcl $0, %edx
227 decl %ecx
228 movl %eax, 16(%edi)
229 movl %edx, %esi
230 jz .L003maw_end
231 /* Tail Round 5 */
232 movl 20(%ebx), %eax
233 mull %ebp
234 addl %esi, %eax
235 movl 20(%edi), %esi
236 adcl $0, %edx
237 addl %esi, %eax
238 adcl $0, %edx
239 decl %ecx
240 movl %eax, 20(%edi)
241 movl %edx, %esi
242 jz .L003maw_end
243 /* Tail Round 6 */
244 movl 24(%ebx), %eax
245 mull %ebp
246 addl %esi, %eax
247 movl 24(%edi), %esi
248 adcl $0, %edx
249 addl %esi, %eax
250 adcl $0, %edx
251 movl %eax, 24(%edi)
252 movl %edx, %esi
253.L003maw_end:
254 movl %esi, %eax
255 popl %ecx
256 popl %edi
257 popl %esi
258 popl %ebx
259 popl %ebp
260 ret
261.bn_mul_add_words_end:
262 SIZE(bn_mul_add_words,.bn_mul_add_words_end-bn_mul_add_words)
263.ident "bn_mul_add_words"
264.text
265 .align ALIGN
266.globl bn_mul_words
267 TYPE(bn_mul_words,@function)
268bn_mul_words:
269 pushl %ebp
270 pushl %ebx
271 pushl %esi
272 pushl %edi
273
274
275 xorl %esi, %esi
276 movl 20(%esp), %edi
277 movl 24(%esp), %ebx
278 movl 28(%esp), %ebp
279 movl 32(%esp), %ecx
280 andl $4294967288, %ebp
281 jz .L004mw_finish
282.L005mw_loop:
283 /* Round 0 */
284 movl (%ebx), %eax
285 mull %ecx
286 addl %esi, %eax
287 adcl $0, %edx
288 movl %eax, (%edi)
289 movl %edx, %esi
290 /* Round 4 */
291 movl 4(%ebx), %eax
292 mull %ecx
293 addl %esi, %eax
294 adcl $0, %edx
295 movl %eax, 4(%edi)
296 movl %edx, %esi
297 /* Round 8 */
298 movl 8(%ebx), %eax
299 mull %ecx
300 addl %esi, %eax
301 adcl $0, %edx
302 movl %eax, 8(%edi)
303 movl %edx, %esi
304 /* Round 12 */
305 movl 12(%ebx), %eax
306 mull %ecx
307 addl %esi, %eax
308 adcl $0, %edx
309 movl %eax, 12(%edi)
310 movl %edx, %esi
311 /* Round 16 */
312 movl 16(%ebx), %eax
313 mull %ecx
314 addl %esi, %eax
315 adcl $0, %edx
316 movl %eax, 16(%edi)
317 movl %edx, %esi
318 /* Round 20 */
319 movl 20(%ebx), %eax
320 mull %ecx
321 addl %esi, %eax
322 adcl $0, %edx
323 movl %eax, 20(%edi)
324 movl %edx, %esi
325 /* Round 24 */
326 movl 24(%ebx), %eax
327 mull %ecx
328 addl %esi, %eax
329 adcl $0, %edx
330 movl %eax, 24(%edi)
331 movl %edx, %esi
332 /* Round 28 */
333 movl 28(%ebx), %eax
334 mull %ecx
335 addl %esi, %eax
336 adcl $0, %edx
337 movl %eax, 28(%edi)
338 movl %edx, %esi
339
340 addl $32, %ebx
341 addl $32, %edi
342 subl $8, %ebp
343 jz .L004mw_finish
344 jmp .L005mw_loop
345.L004mw_finish:
346 movl 28(%esp), %ebp
347 andl $7, %ebp
348 jnz .L006mw_finish2
349 jmp .L007mw_end
350.align ALIGN
351.L006mw_finish2:
352 /* Tail Round 0 */
353 movl (%ebx), %eax
354 mull %ecx
355 addl %esi, %eax
356 adcl $0, %edx
357 movl %eax, (%edi)
358 movl %edx, %esi
359 decl %ebp
360 jz .L007mw_end
361 /* Tail Round 1 */
362 movl 4(%ebx), %eax
363 mull %ecx
364 addl %esi, %eax
365 adcl $0, %edx
366 movl %eax, 4(%edi)
367 movl %edx, %esi
368 decl %ebp
369 jz .L007mw_end
370 /* Tail Round 2 */
371 movl 8(%ebx), %eax
372 mull %ecx
373 addl %esi, %eax
374 adcl $0, %edx
375 movl %eax, 8(%edi)
376 movl %edx, %esi
377 decl %ebp
378 jz .L007mw_end
379 /* Tail Round 3 */
380 movl 12(%ebx), %eax
381 mull %ecx
382 addl %esi, %eax
383 adcl $0, %edx
384 movl %eax, 12(%edi)
385 movl %edx, %esi
386 decl %ebp
387 jz .L007mw_end
388 /* Tail Round 4 */
389 movl 16(%ebx), %eax
390 mull %ecx
391 addl %esi, %eax
392 adcl $0, %edx
393 movl %eax, 16(%edi)
394 movl %edx, %esi
395 decl %ebp
396 jz .L007mw_end
397 /* Tail Round 5 */
398 movl 20(%ebx), %eax
399 mull %ecx
400 addl %esi, %eax
401 adcl $0, %edx
402 movl %eax, 20(%edi)
403 movl %edx, %esi
404 decl %ebp
405 jz .L007mw_end
406 /* Tail Round 6 */
407 movl 24(%ebx), %eax
408 mull %ecx
409 addl %esi, %eax
410 adcl $0, %edx
411 movl %eax, 24(%edi)
412 movl %edx, %esi
413.L007mw_end:
414 movl %esi, %eax
415 popl %edi
416 popl %esi
417 popl %ebx
418 popl %ebp
419 ret
420.bn_mul_words_end:
421 SIZE(bn_mul_words,.bn_mul_words_end-bn_mul_words)
422.ident "bn_mul_words"
423.text
424 .align ALIGN
425.globl bn_sqr_words
426 TYPE(bn_sqr_words,@function)
427bn_sqr_words:
428 pushl %ebp
429 pushl %ebx
430 pushl %esi
431 pushl %edi
432
433
434 movl 20(%esp), %esi
435 movl 24(%esp), %edi
436 movl 28(%esp), %ebx
437 andl $4294967288, %ebx
438 jz .L008sw_finish
439.L009sw_loop:
440 /* Round 0 */
441 movl (%edi), %eax
442 mull %eax
443 movl %eax, (%esi)
444 movl %edx, 4(%esi)
445 /* Round 4 */
446 movl 4(%edi), %eax
447 mull %eax
448 movl %eax, 8(%esi)
449 movl %edx, 12(%esi)
450 /* Round 8 */
451 movl 8(%edi), %eax
452 mull %eax
453 movl %eax, 16(%esi)
454 movl %edx, 20(%esi)
455 /* Round 12 */
456 movl 12(%edi), %eax
457 mull %eax
458 movl %eax, 24(%esi)
459 movl %edx, 28(%esi)
460 /* Round 16 */
461 movl 16(%edi), %eax
462 mull %eax
463 movl %eax, 32(%esi)
464 movl %edx, 36(%esi)
465 /* Round 20 */
466 movl 20(%edi), %eax
467 mull %eax
468 movl %eax, 40(%esi)
469 movl %edx, 44(%esi)
470 /* Round 24 */
471 movl 24(%edi), %eax
472 mull %eax
473 movl %eax, 48(%esi)
474 movl %edx, 52(%esi)
475 /* Round 28 */
476 movl 28(%edi), %eax
477 mull %eax
478 movl %eax, 56(%esi)
479 movl %edx, 60(%esi)
480
481 addl $32, %edi
482 addl $64, %esi
483 subl $8, %ebx
484 jnz .L009sw_loop
485.L008sw_finish:
486 movl 28(%esp), %ebx
487 andl $7, %ebx
488 jz .L010sw_end
489 /* Tail Round 0 */
490 movl (%edi), %eax
491 mull %eax
492 movl %eax, (%esi)
493 decl %ebx
494 movl %edx, 4(%esi)
495 jz .L010sw_end
496 /* Tail Round 1 */
497 movl 4(%edi), %eax
498 mull %eax
499 movl %eax, 8(%esi)
500 decl %ebx
501 movl %edx, 12(%esi)
502 jz .L010sw_end
503 /* Tail Round 2 */
504 movl 8(%edi), %eax
505 mull %eax
506 movl %eax, 16(%esi)
507 decl %ebx
508 movl %edx, 20(%esi)
509 jz .L010sw_end
510 /* Tail Round 3 */
511 movl 12(%edi), %eax
512 mull %eax
513 movl %eax, 24(%esi)
514 decl %ebx
515 movl %edx, 28(%esi)
516 jz .L010sw_end
517 /* Tail Round 4 */
518 movl 16(%edi), %eax
519 mull %eax
520 movl %eax, 32(%esi)
521 decl %ebx
522 movl %edx, 36(%esi)
523 jz .L010sw_end
524 /* Tail Round 5 */
525 movl 20(%edi), %eax
526 mull %eax
527 movl %eax, 40(%esi)
528 decl %ebx
529 movl %edx, 44(%esi)
530 jz .L010sw_end
531 /* Tail Round 6 */
532 movl 24(%edi), %eax
533 mull %eax
534 movl %eax, 48(%esi)
535 movl %edx, 52(%esi)
536.L010sw_end:
537 popl %edi
538 popl %esi
539 popl %ebx
540 popl %ebp
541 ret
542.bn_sqr_words_end:
543 SIZE(bn_sqr_words,.bn_sqr_words_end-bn_sqr_words)
544.ident "bn_sqr_words"
545.text
546 .align ALIGN
547.globl bn_div64
548 TYPE(bn_div64,@function)
549bn_div64:
550 pushl %ebp
551 pushl %ebx
552 pushl %esi
553 pushl %edi
554
555 movl 20(%esp), %edx
556 movl 24(%esp), %eax
557 movl 28(%esp), %ebx
558 divl %ebx
559 popl %edi
560 popl %esi
561 popl %ebx
562 popl %ebp
563 ret
564.bn_div64_end:
565 SIZE(bn_div64,.bn_div64_end-bn_div64)
566.ident "bn_div64"
567.text
568 .align ALIGN
569.globl bn_add_words
570 TYPE(bn_add_words,@function)
571bn_add_words:
572 pushl %ebp
573 pushl %ebx
574 pushl %esi
575 pushl %edi
576
577
578 movl 20(%esp), %ebx
579 movl 24(%esp), %esi
580 movl 28(%esp), %edi
581 movl 32(%esp), %ebp
582 xorl %eax, %eax
583 andl $4294967288, %ebp
584 jz .L011aw_finish
585.L012aw_loop:
586 /* Round 0 */
587 movl (%esi), %ecx
588 movl (%edi), %edx
589 addl %eax, %ecx
590 movl $0, %eax
591 adcl %eax, %eax
592 addl %edx, %ecx
593 adcl $0, %eax
594 movl %ecx, (%ebx)
595 /* Round 1 */
596 movl 4(%esi), %ecx
597 movl 4(%edi), %edx
598 addl %eax, %ecx
599 movl $0, %eax
600 adcl %eax, %eax
601 addl %edx, %ecx
602 adcl $0, %eax
603 movl %ecx, 4(%ebx)
604 /* Round 2 */
605 movl 8(%esi), %ecx
606 movl 8(%edi), %edx
607 addl %eax, %ecx
608 movl $0, %eax
609 adcl %eax, %eax
610 addl %edx, %ecx
611 adcl $0, %eax
612 movl %ecx, 8(%ebx)
613 /* Round 3 */
614 movl 12(%esi), %ecx
615 movl 12(%edi), %edx
616 addl %eax, %ecx
617 movl $0, %eax
618 adcl %eax, %eax
619 addl %edx, %ecx
620 adcl $0, %eax
621 movl %ecx, 12(%ebx)
622 /* Round 4 */
623 movl 16(%esi), %ecx
624 movl 16(%edi), %edx
625 addl %eax, %ecx
626 movl $0, %eax
627 adcl %eax, %eax
628 addl %edx, %ecx
629 adcl $0, %eax
630 movl %ecx, 16(%ebx)
631 /* Round 5 */
632 movl 20(%esi), %ecx
633 movl 20(%edi), %edx
634 addl %eax, %ecx
635 movl $0, %eax
636 adcl %eax, %eax
637 addl %edx, %ecx
638 adcl $0, %eax
639 movl %ecx, 20(%ebx)
640 /* Round 6 */
641 movl 24(%esi), %ecx
642 movl 24(%edi), %edx
643 addl %eax, %ecx
644 movl $0, %eax
645 adcl %eax, %eax
646 addl %edx, %ecx
647 adcl $0, %eax
648 movl %ecx, 24(%ebx)
649 /* Round 7 */
650 movl 28(%esi), %ecx
651 movl 28(%edi), %edx
652 addl %eax, %ecx
653 movl $0, %eax
654 adcl %eax, %eax
655 addl %edx, %ecx
656 adcl $0, %eax
657 movl %ecx, 28(%ebx)
658
659 addl $32, %esi
660 addl $32, %edi
661 addl $32, %ebx
662 subl $8, %ebp
663 jnz .L012aw_loop
664.L011aw_finish:
665 movl 32(%esp), %ebp
666 andl $7, %ebp
667 jz .L013aw_end
668 /* Tail Round 0 */
669 movl (%esi), %ecx
670 movl (%edi), %edx
671 addl %eax, %ecx
672 movl $0, %eax
673 adcl %eax, %eax
674 addl %edx, %ecx
675 adcl $0, %eax
676 decl %ebp
677 movl %ecx, (%ebx)
678 jz .L013aw_end
679 /* Tail Round 1 */
680 movl 4(%esi), %ecx
681 movl 4(%edi), %edx
682 addl %eax, %ecx
683 movl $0, %eax
684 adcl %eax, %eax
685 addl %edx, %ecx
686 adcl $0, %eax
687 decl %ebp
688 movl %ecx, 4(%ebx)
689 jz .L013aw_end
690 /* Tail Round 2 */
691 movl 8(%esi), %ecx
692 movl 8(%edi), %edx
693 addl %eax, %ecx
694 movl $0, %eax
695 adcl %eax, %eax
696 addl %edx, %ecx
697 adcl $0, %eax
698 decl %ebp
699 movl %ecx, 8(%ebx)
700 jz .L013aw_end
701 /* Tail Round 3 */
702 movl 12(%esi), %ecx
703 movl 12(%edi), %edx
704 addl %eax, %ecx
705 movl $0, %eax
706 adcl %eax, %eax
707 addl %edx, %ecx
708 adcl $0, %eax
709 decl %ebp
710 movl %ecx, 12(%ebx)
711 jz .L013aw_end
712 /* Tail Round 4 */
713 movl 16(%esi), %ecx
714 movl 16(%edi), %edx
715 addl %eax, %ecx
716 movl $0, %eax
717 adcl %eax, %eax
718 addl %edx, %ecx
719 adcl $0, %eax
720 decl %ebp
721 movl %ecx, 16(%ebx)
722 jz .L013aw_end
723 /* Tail Round 5 */
724 movl 20(%esi), %ecx
725 movl 20(%edi), %edx
726 addl %eax, %ecx
727 movl $0, %eax
728 adcl %eax, %eax
729 addl %edx, %ecx
730 adcl $0, %eax
731 decl %ebp
732 movl %ecx, 20(%ebx)
733 jz .L013aw_end
734 /* Tail Round 6 */
735 movl 24(%esi), %ecx
736 movl 24(%edi), %edx
737 addl %eax, %ecx
738 movl $0, %eax
739 adcl %eax, %eax
740 addl %edx, %ecx
741 adcl $0, %eax
742 movl %ecx, 24(%ebx)
743.L013aw_end:
744 movl %eax, %eax
745 popl %edi
746 popl %esi
747 popl %ebx
748 popl %ebp
749 ret
750.bn_add_words_end:
751 SIZE(bn_add_words,.bn_add_words_end-bn_add_words)
752.ident "bn_add_words"
diff --git a/src/lib/libcrypto/bn/asm/ca.pl b/src/lib/libcrypto/bn/asm/ca.pl
new file mode 100644
index 0000000000..c1ce67a6b4
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/ca.pl
@@ -0,0 +1,33 @@
1#!/usr/local/bin/perl
2# I have this in perl so I can use more usefull register names and then convert
3# them into alpha registers.
4#
5
6push(@INC,"perlasm","../../perlasm");
7require "alpha.pl";
8require "alpha/mul_add.pl";
9require "alpha/mul.pl";
10require "alpha/sqr.pl";
11require "alpha/add.pl";
12require "alpha/sub.pl";
13require "alpha/mul_c8.pl";
14require "alpha/mul_c4.pl";
15require "alpha/sqr_c4.pl";
16require "alpha/sqr_c8.pl";
17require "alpha/div.pl";
18
19&asm_init($ARGV[0],$0);
20
21&bn_mul_words("bn_mul_words");
22&bn_sqr_words("bn_sqr_words");
23&bn_mul_add_words("bn_mul_add_words");
24&bn_add_words("bn_add_words");
25&bn_sub_words("bn_sub_words");
26&bn_div_words("bn_div_words");
27&bn_mul_comba8("bn_mul_comba8");
28&bn_mul_comba4("bn_mul_comba4");
29&bn_sqr_comba4("bn_sqr_comba4");
30&bn_sqr_comba8("bn_sqr_comba8");
31
32&asm_finish();
33
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
new file mode 100644
index 0000000000..5d962cb957
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/co-586.pl
@@ -0,0 +1,286 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_comba("bn_mul_comba8",8);
9&bn_mul_comba("bn_mul_comba4",4);
10&bn_sqr_comba("bn_sqr_comba8",8);
11&bn_sqr_comba("bn_sqr_comba4",4);
12
13&asm_finish();
14
15sub mul_add_c
16 {
17 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
18
19 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
20 # words, and 1 if load return value
21
22 &comment("mul a[$ai]*b[$bi]");
23
24 # "eax" and "edx" will always be pre-loaded.
25 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
26 # &mov("edx",&DWP($bi*4,$b,"",0));
27
28 &mul("edx");
29 &add($c0,"eax");
30 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
31 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
32 ###
33 &adc($c1,"edx");
34 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
36 ###
37 &adc($c2,0);
38 # is pos > 1, it means it is the last loop
39 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
40 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
41 }
42
43sub sqr_add_c
44 {
45 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
46
47 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
48 # words, and 1 if load return value
49
50 &comment("sqr a[$ai]*a[$bi]");
51
52 # "eax" and "edx" will always be pre-loaded.
53 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
54 # &mov("edx",&DWP($bi*4,$b,"",0));
55
56 if ($ai == $bi)
57 { &mul("eax");}
58 else
59 { &mul("edx");}
60 &add($c0,"eax");
61 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
62 ###
63 &adc($c1,"edx");
64 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
65 ###
66 &adc($c2,0);
67 # is pos > 1, it means it is the last loop
68 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
69 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
70 }
71
72sub sqr_add_c2
73 {
74 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
75
76 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
77 # words, and 1 if load return value
78
79 &comment("sqr a[$ai]*a[$bi]");
80
81 # "eax" and "edx" will always be pre-loaded.
82 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
83 # &mov("edx",&DWP($bi*4,$a,"",0));
84
85 if ($ai == $bi)
86 { &mul("eax");}
87 else
88 { &mul("edx");}
89 &add("eax","eax");
90 ###
91 &adc("edx","edx");
92 ###
93 &adc($c2,0);
94 &add($c0,"eax");
95 &adc($c1,"edx");
96 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
98 &adc($c2,0);
99 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
100 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
101 ###
102 }
103
104sub bn_mul_comba
105 {
106 local($name,$num)=@_;
107 local($a,$b,$c0,$c1,$c2);
108 local($i,$as,$ae,$bs,$be,$ai,$bi);
109 local($tot,$end);
110
111 &function_begin_B($name,"");
112
113 $c0="ebx";
114 $c1="ecx";
115 $c2="ebp";
116 $a="esi";
117 $b="edi";
118
119 $as=0;
120 $ae=0;
121 $bs=0;
122 $be=0;
123 $tot=$num+$num-1;
124
125 &push("esi");
126 &mov($a,&wparam(1));
127 &push("edi");
128 &mov($b,&wparam(2));
129 &push("ebp");
130 &push("ebx");
131
132 &xor($c0,$c0);
133 &mov("eax",&DWP(0,$a,"",0)); # load the first word
134 &xor($c1,$c1);
135 &mov("edx",&DWP(0,$b,"",0)); # load the first second
136
137 for ($i=0; $i<$tot; $i++)
138 {
139 $ai=$as;
140 $bi=$bs;
141 $end=$be+1;
142
143 &comment("################## Calculate word $i");
144
145 for ($j=$bs; $j<$end; $j++)
146 {
147 &xor($c2,$c2) if ($j == $bs);
148 if (($j+1) == $end)
149 {
150 $v=1;
151 $v=2 if (($i+1) == $tot);
152 }
153 else
154 { $v=0; }
155 if (($j+1) != $end)
156 {
157 $na=($ai-1);
158 $nb=($bi+1);
159 }
160 else
161 {
162 $na=$as+($i < ($num-1));
163 $nb=$bs+($i >= ($num-1));
164 }
165#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
166 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
167 if ($v)
168 {
169 &comment("saved r[$i]");
170 # &mov("eax",&wparam(0));
171 # &mov(&DWP($i*4,"eax","",0),$c0);
172 ($c0,$c1,$c2)=($c1,$c2,$c0);
173 }
174 $ai--;
175 $bi++;
176 }
177 $as++ if ($i < ($num-1));
178 $ae++ if ($i >= ($num-1));
179
180 $bs++ if ($i >= ($num-1));
181 $be++ if ($i < ($num-1));
182 }
183 &comment("save r[$i]");
184 # &mov("eax",&wparam(0));
185 &mov(&DWP($i*4,"eax","",0),$c0);
186
187 &pop("ebx");
188 &pop("ebp");
189 &pop("edi");
190 &pop("esi");
191 &ret();
192 &function_end_B($name);
193 }
194
195sub bn_sqr_comba
196 {
197 local($name,$num)=@_;
198 local($r,$a,$c0,$c1,$c2)=@_;
199 local($i,$as,$ae,$bs,$be,$ai,$bi);
200 local($b,$tot,$end,$half);
201
202 &function_begin_B($name,"");
203
204 $c0="ebx";
205 $c1="ecx";
206 $c2="ebp";
207 $a="esi";
208 $r="edi";
209
210 &push("esi");
211 &push("edi");
212 &push("ebp");
213 &push("ebx");
214 &mov($r,&wparam(0));
215 &mov($a,&wparam(1));
216 &xor($c0,$c0);
217 &xor($c1,$c1);
218 &mov("eax",&DWP(0,$a,"",0)); # load the first word
219
220 $as=0;
221 $ae=0;
222 $bs=0;
223 $be=0;
224 $tot=$num+$num-1;
225
226 for ($i=0; $i<$tot; $i++)
227 {
228 $ai=$as;
229 $bi=$bs;
230 $end=$be+1;
231
232 &comment("############### Calculate word $i");
233 for ($j=$bs; $j<$end; $j++)
234 {
235 &xor($c2,$c2) if ($j == $bs);
236 if (($ai-1) < ($bi+1))
237 {
238 $v=1;
239 $v=2 if ($i+1) == $tot;
240 }
241 else
242 { $v=0; }
243 if (!$v)
244 {
245 $na=$ai-1;
246 $nb=$bi+1;
247 }
248 else
249 {
250 $na=$as+($i < ($num-1));
251 $nb=$bs+($i >= ($num-1));
252 }
253 if ($ai == $bi)
254 {
255 &sqr_add_c($r,$a,$ai,$bi,
256 $c0,$c1,$c2,$v,$i,$na,$nb);
257 }
258 else
259 {
260 &sqr_add_c2($r,$a,$ai,$bi,
261 $c0,$c1,$c2,$v,$i,$na,$nb);
262 }
263 if ($v)
264 {
265 &comment("saved r[$i]");
266 #&mov(&DWP($i*4,$r,"",0),$c0);
267 ($c0,$c1,$c2)=($c1,$c2,$c0);
268 last;
269 }
270 $ai--;
271 $bi++;
272 }
273 $as++ if ($i < ($num-1));
274 $ae++ if ($i >= ($num-1));
275
276 $bs++ if ($i >= ($num-1));
277 $be++ if ($i < ($num-1));
278 }
279 &mov(&DWP($i*4,$r,"",0),$c0);
280 &pop("ebx");
281 &pop("ebp");
282 &pop("edi");
283 &pop("esi");
284 &ret();
285 &function_end_B($name);
286 }
diff --git a/src/lib/libcrypto/bn/asm/co-alpha.pl b/src/lib/libcrypto/bn/asm/co-alpha.pl
new file mode 100644
index 0000000000..67dad3e3d5
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/co-alpha.pl
@@ -0,0 +1,116 @@
1#!/usr/local/bin/perl
2# I have this in perl so I can use more usefull register names and then convert
3# them into alpha registers.
4#
5
6push(@INC,"perlasm","../../perlasm");
7require "alpha.pl";
8
9&asm_init($ARGV[0],$0);
10
11print &bn_sub_words("bn_sub_words");
12
13&asm_finish();
14
15sub bn_sub_words
16 {
17 local($name)=@_;
18 local($cc,$a,$b,$r);
19
20 $cc="r0";
21 $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13";
22 $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14";
23 $a2="r3"; $b2="r7"; $r2="r11";
24 $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15";
25
26 $rp=&wparam(0);
27 $ap=&wparam(1);
28 $bp=&wparam(2);
29 $count=&wparam(3);
30
31 &function_begin($name,"");
32
33 &comment("");
34 &sub($count,4,$count);
35 &mov("zero",$cc);
36 &blt($count,&label("finish"));
37
38 &ld($a0,&QWPw(0,$ap));
39 &ld($b0,&QWPw(0,$bp));
40
41##########################################################
42 &set_label("loop");
43
44 &ld($a1,&QWPw(1,$ap));
45 &cmpult($a0,$b0,$tmp); # will we borrow?
46 &ld($b1,&QWPw(1,$bp));
47 &sub($a0,$b0,$a0); # do the subtract
48 &ld($a2,&QWPw(2,$ap));
49 &cmpult($a0,$cc,$b0); # will we borrow?
50 &ld($b2,&QWPw(2,$bp));
51 &sub($a0,$cc,$a0); # will we borrow?
52 &ld($a3,&QWPw(3,$ap));
53 &add($b0,$tmp,$cc); # add the borrows
54
55 &cmpult($a1,$b1,$t1); # will we borrow?
56 &sub($a1,$b1,$a1); # do the subtract
57 &ld($b3,&QWPw(3,$bp));
58 &cmpult($a1,$cc,$b1); # will we borrow?
59 &sub($a1,$cc,$a1); # will we borrow?
60 &add($b1,$t1,$cc); # add the borrows
61
62 &cmpult($a2,$b2,$tmp); # will we borrow?
63 &sub($a2,$b2,$a2); # do the subtract
64 &st($a0,&QWPw(0,$rp)); # save
65 &cmpult($a2,$cc,$b2); # will we borrow?
66 &sub($a2,$cc,$a2); # will we borrow?
67 &add($b2,$tmp,$cc); # add the borrows
68
69 &cmpult($a3,$b3,$t3); # will we borrow?
70 &sub($a3,$b3,$a3); # do the subtract
71 &st($a1,&QWPw(1,$rp)); # save
72 &cmpult($a3,$cc,$b3); # will we borrow?
73 &sub($a3,$cc,$a3); # will we borrow?
74 &add($b3,$t3,$cc); # add the borrows
75
76 &st($a2,&QWPw(2,$rp)); # save
77 &sub($count,4,$count); # count-=4
78 &st($a3,&QWPw(3,$rp)); # save
79 &add($ap,4*$QWS,$ap); # count+=4
80 &add($bp,4*$QWS,$bp); # count+=4
81 &add($rp,4*$QWS,$rp); # count+=4
82
83 &blt($count,&label("finish"));
84 &ld($a0,&QWPw(0,$ap));
85 &ld($b0,&QWPw(0,$bp));
86 &br(&label("loop"));
87##################################################
88 # Do the last 0..3 words
89
90 &set_label("last_loop");
91
92 &ld($a0,&QWPw(0,$ap)); # get a
93 &ld($b0,&QWPw(0,$bp)); # get b
94 &cmpult($a0,$b0,$tmp); # will we borrow?
95 &sub($a0,$b0,$a0); # do the subtract
96 &cmpult($a0,$cc,$b0); # will we borrow?
97 &sub($a0,$cc,$a0); # will we borrow?
98 &st($a0,&QWPw(0,$rp)); # save
99 &add($b0,$tmp,$cc); # add the borrows
100
101 &add($ap,$QWS,$ap);
102 &add($bp,$QWS,$bp);
103 &add($rp,$QWS,$rp);
104 &sub($count,1,$count);
105 &bgt($count,&label("last_loop"));
106 &function_end_A($name);
107
108######################################################
109 &set_label("finish");
110 &add($count,4,$count);
111 &bgt($count,&label("last_loop"));
112
113 &set_label("end");
114 &function_end($name);
115 }
116
diff --git a/src/lib/libcrypto/bn/asm/mips1.s b/src/lib/libcrypto/bn/asm/mips1.s
new file mode 100644
index 0000000000..44fa1254c7
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/mips1.s
@@ -0,0 +1,539 @@
1/* This assember is for R2000/R3000 machines, or higher ones that do
2 * no want to do any 64 bit arithmatic.
3 * Make sure that the SSLeay bignum library is compiled with
4 * THIRTY_TWO_BIT set.
5 * This must either be compiled with the system CC, or, if you use GNU gas,
6 * cc -E mips1.s|gas -o mips1.o
7 */
8 .set reorder
9 .set noat
10
11#define R1 $1
12#define CC $2
13#define R2 $3
14#define R3 $8
15#define R4 $9
16#define L1 $10
17#define L2 $11
18#define L3 $12
19#define L4 $13
20#define H1 $14
21#define H2 $15
22#define H3 $24
23#define H4 $25
24
25#define P1 $4
26#define P2 $5
27#define P3 $6
28#define P4 $7
29
30 .align 2
31 .ent bn_mul_add_words
32 .globl bn_mul_add_words
33.text
34bn_mul_add_words:
35 .frame $sp,0,$31
36 .mask 0x00000000,0
37 .fmask 0x00000000,0
38
39 #blt P3,4,$lab34
40
41 subu R1,P3,4
42 move CC,$0
43 bltz R1,$lab34
44$lab2:
45 lw R1,0(P1)
46 lw L1,0(P2)
47 lw R2,4(P1)
48 lw L2,4(P2)
49 lw R3,8(P1)
50 lw L3,8(P2)
51 lw R4,12(P1)
52 lw L4,12(P2)
53 multu L1,P4
54 addu R1,R1,CC
55 mflo L1
56 sltu CC,R1,CC
57 addu R1,R1,L1
58 mfhi H1
59 sltu L1,R1,L1
60 sw R1,0(P1)
61 addu CC,CC,L1
62 multu L2,P4
63 addu CC,H1,CC
64 mflo L2
65 addu R2,R2,CC
66 sltu CC,R2,CC
67 mfhi H2
68 addu R2,R2,L2
69 addu P2,P2,16
70 sltu L2,R2,L2
71 sw R2,4(P1)
72 addu CC,CC,L2
73 multu L3,P4
74 addu CC,H2,CC
75 mflo L3
76 addu R3,R3,CC
77 sltu CC,R3,CC
78 mfhi H3
79 addu R3,R3,L3
80 addu P1,P1,16
81 sltu L3,R3,L3
82 sw R3,-8(P1)
83 addu CC,CC,L3
84 multu L4,P4
85 addu CC,H3,CC
86 mflo L4
87 addu R4,R4,CC
88 sltu CC,R4,CC
89 mfhi H4
90 addu R4,R4,L4
91 subu P3,P3,4
92 sltu L4,R4,L4
93 addu CC,CC,L4
94 addu CC,H4,CC
95
96 subu R1,P3,4
97 sw R4,-4(P1) # delay slot
98 bgez R1,$lab2
99
100 bleu P3,0,$lab3
101 .align 2
102$lab33:
103 lw L1,0(P2)
104 lw R1,0(P1)
105 multu L1,P4
106 addu R1,R1,CC
107 sltu CC,R1,CC
108 addu P1,P1,4
109 mflo L1
110 mfhi H1
111 addu R1,R1,L1
112 addu P2,P2,4
113 sltu L1,R1,L1
114 subu P3,P3,1
115 addu CC,CC,L1
116 sw R1,-4(P1)
117 addu CC,H1,CC
118 bgtz P3,$lab33
119 j $31
120 .align 2
121$lab3:
122 j $31
123 .align 2
124$lab34:
125 bgt P3,0,$lab33
126 j $31
127 .end bn_mul_add_words
128
129 .align 2
130 # Program Unit: bn_mul_words
131 .ent bn_mul_words
132 .globl bn_mul_words
133.text
134bn_mul_words:
135 .frame $sp,0,$31
136 .mask 0x00000000,0
137 .fmask 0x00000000,0
138
139 subu P3,P3,4
140 move CC,$0
141 bltz P3,$lab45
142$lab44:
143 lw L1,0(P2)
144 lw L2,4(P2)
145 lw L3,8(P2)
146 lw L4,12(P2)
147 multu L1,P4
148 subu P3,P3,4
149 mflo L1
150 mfhi H1
151 addu L1,L1,CC
152 multu L2,P4
153 sltu CC,L1,CC
154 sw L1,0(P1)
155 addu CC,H1,CC
156 mflo L2
157 mfhi H2
158 addu L2,L2,CC
159 multu L3,P4
160 sltu CC,L2,CC
161 sw L2,4(P1)
162 addu CC,H2,CC
163 mflo L3
164 mfhi H3
165 addu L3,L3,CC
166 multu L4,P4
167 sltu CC,L3,CC
168 sw L3,8(P1)
169 addu CC,H3,CC
170 mflo L4
171 mfhi H4
172 addu L4,L4,CC
173 addu P1,P1,16
174 sltu CC,L4,CC
175 addu P2,P2,16
176 addu CC,H4,CC
177 sw L4,-4(P1)
178
179 bgez P3,$lab44
180 b $lab45
181$lab46:
182 lw L1,0(P2)
183 addu P1,P1,4
184 multu L1,P4
185 addu P2,P2,4
186 mflo L1
187 mfhi H1
188 addu L1,L1,CC
189 subu P3,P3,1
190 sltu CC,L1,CC
191 sw L1,-4(P1)
192 addu CC,H1,CC
193 bgtz P3,$lab46
194 j $31
195$lab45:
196 addu P3,P3,4
197 bgtz P3,$lab46
198 j $31
199 .align 2
200 .end bn_mul_words
201
202 # Program Unit: bn_sqr_words
203 .ent bn_sqr_words
204 .globl bn_sqr_words
205.text
206bn_sqr_words:
207 .frame $sp,0,$31
208 .mask 0x00000000,0
209 .fmask 0x00000000,0
210
211 subu P3,P3,4
212 bltz P3,$lab55
213$lab54:
214 lw L1,0(P2)
215 lw L2,4(P2)
216 lw L3,8(P2)
217 lw L4,12(P2)
218
219 multu L1,L1
220 subu P3,P3,4
221 mflo L1
222 mfhi H1
223 sw L1,0(P1)
224 sw H1,4(P1)
225
226 multu L2,L2
227 addu P1,P1,32
228 mflo L2
229 mfhi H2
230 sw L2,-24(P1)
231 sw H2,-20(P1)
232
233 multu L3,L3
234 addu P2,P2,16
235 mflo L3
236 mfhi H3
237 sw L3,-16(P1)
238 sw H3,-12(P1)
239
240 multu L4,L4
241
242 mflo L4
243 mfhi H4
244 sw L4,-8(P1)
245 sw H4,-4(P1)
246
247 bgtz P3,$lab54
248 b $lab55
249$lab56:
250 lw L1,0(P2)
251 addu P1,P1,8
252 multu L1,L1
253 addu P2,P2,4
254 subu P3,P3,1
255 mflo L1
256 mfhi H1
257 sw L1,-8(P1)
258 sw H1,-4(P1)
259
260 bgtz P3,$lab56
261 j $31
262$lab55:
263 addu P3,P3,4
264 bgtz P3,$lab56
265 j $31
266 .align 2
267 .end bn_sqr_words
268
269 # Program Unit: bn_add_words
270 .ent bn_add_words
271 .globl bn_add_words
272.text
273bn_add_words: # 0x590
274 .frame $sp,0,$31
275 .mask 0x00000000,0
276 .fmask 0x00000000,0
277
278 subu P4,P4,4
279 move CC,$0
280 bltz P4,$lab65
281$lab64:
282 lw L1,0(P2)
283 lw R1,0(P3)
284 lw L2,4(P2)
285 lw R2,4(P3)
286
287 addu L1,L1,CC
288 lw L3,8(P2)
289 sltu CC,L1,CC
290 addu L1,L1,R1
291 sltu R1,L1,R1
292 lw R3,8(P3)
293 addu CC,CC,R1
294 lw L4,12(P2)
295
296 addu L2,L2,CC
297 lw R4,12(P3)
298 sltu CC,L2,CC
299 addu L2,L2,R2
300 sltu R2,L2,R2
301 sw L1,0(P1)
302 addu CC,CC,R2
303 addu P1,P1,16
304 addu L3,L3,CC
305 sw L2,-12(P1)
306
307 sltu CC,L3,CC
308 addu L3,L3,R3
309 sltu R3,L3,R3
310 addu P2,P2,16
311 addu CC,CC,R3
312
313 addu L4,L4,CC
314 addu P3,P3,16
315 sltu CC,L4,CC
316 addu L4,L4,R4
317 subu P4,P4,4
318 sltu R4,L4,R4
319 sw L3,-8(P1)
320 addu CC,CC,R4
321 sw L4,-4(P1)
322
323 bgtz P4,$lab64
324 b $lab65
325$lab66:
326 lw L1,0(P2)
327 lw R1,0(P3)
328 addu L1,L1,CC
329 addu P1,P1,4
330 sltu CC,L1,CC
331 addu P2,P2,4
332 addu P3,P3,4
333 addu L1,L1,R1
334 subu P4,P4,1
335 sltu R1,L1,R1
336 sw L1,-4(P1)
337 addu CC,CC,R1
338
339 bgtz P4,$lab66
340 j $31
341$lab65:
342 addu P4,P4,4
343 bgtz P4,$lab66
344 j $31
345 .end bn_add_words
346
347 # Program Unit: bn_div64
348 .set at
349 .set reorder
350 .text
351 .align 2
352 .globl bn_div64
353 # 321 {
354 .ent bn_div64 2
355bn_div64:
356 subu $sp, 64
357 sw $31, 56($sp)
358 sw $16, 48($sp)
359 .mask 0x80010000, -56
360 .frame $sp, 64, $31
361 move $9, $4
362 move $12, $5
363 move $16, $6
364 # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
365 move $31, $0
366 # 323 int i,count=2;
367 li $13, 2
368 # 324
369 # 325 if (d == 0) return(BN_MASK2);
370 bne $16, 0, $80
371 li $2, -1
372 b $93
373$80:
374 # 326
375 # 327 i=BN_num_bits_word(d);
376 move $4, $16
377 sw $31, 16($sp)
378 sw $9, 24($sp)
379 sw $12, 32($sp)
380 sw $13, 40($sp)
381 .livereg 0x800ff0e,0xfff
382 jal BN_num_bits_word
383 li $4, 32
384 lw $31, 16($sp)
385 lw $9, 24($sp)
386 lw $12, 32($sp)
387 lw $13, 40($sp)
388 move $3, $2
389 # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
390 beq $2, $4, $81
391 li $14, 1
392 sll $15, $14, $2
393 bleu $9, $15, $81
394 # 329 {
395 # 330 #if !defined(NO_STDIO) && !defined(WIN16)
396 # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
397 # 332 #endif
398 # 333 abort();
399 sw $3, 8($sp)
400 sw $9, 24($sp)
401 sw $12, 32($sp)
402 sw $13, 40($sp)
403 sw $31, 26($sp)
404 .livereg 0xff0e,0xfff
405 jal abort
406 lw $3, 8($sp)
407 li $4, 32
408 lw $9, 24($sp)
409 lw $12, 32($sp)
410 lw $13, 40($sp)
411 lw $31, 26($sp)
412 # 334 }
413$81:
414 # 335 i=BN_BITS2-i;
415 subu $3, $4, $3
416 # 336 if (h >= d) h-=d;
417 bltu $9, $16, $82
418 subu $9, $9, $16
419$82:
420 # 337
421 # 338 if (i)
422 beq $3, 0, $83
423 # 339 {
424 # 340 d<<=i;
425 sll $16, $16, $3
426 # 341 h=(h<<i)|(l>>(BN_BITS2-i));
427 sll $24, $9, $3
428 subu $25, $4, $3
429 srl $14, $12, $25
430 or $9, $24, $14
431 # 342 l<<=i;
432 sll $12, $12, $3
433 # 343 }
434$83:
435 # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
436 # 345 dl=(d&BN_MASK2l);
437 and $8, $16, -65536
438 srl $8, $8, 16
439 and $10, $16, 65535
440 li $6, -65536
441$84:
442 # 346 for (;;)
443 # 347 {
444 # 348 if ((h>>BN_BITS4) == dh)
445 srl $15, $9, 16
446 bne $8, $15, $85
447 # 349 q=BN_MASK2l;
448 li $5, 65535
449 b $86
450$85:
451 # 350 else
452 # 351 q=h/dh;
453 divu $5, $9, $8
454$86:
455 # 352
456 # 353 for (;;)
457 # 354 {
458 # 355 t=(h-q*dh);
459 mul $4, $5, $8
460 subu $2, $9, $4
461 move $3, $2
462 # 356 if ((t&BN_MASK2h) ||
463 # 357 ((dl*q) <= (
464 # 358 (t<<BN_BITS4)+
465 # 359 ((l&BN_MASK2h)>>BN_BITS4))))
466 and $25, $2, $6
467 bne $25, $0, $87
468 mul $24, $10, $5
469 sll $14, $3, 16
470 and $15, $12, $6
471 srl $25, $15, 16
472 addu $15, $14, $25
473 bgtu $24, $15, $88
474$87:
475 # 360 break;
476 mul $3, $10, $5
477 b $89
478$88:
479 # 361 q--;
480 addu $5, $5, -1
481 # 362 }
482 b $86
483$89:
484 # 363 th=q*dh;
485 # 364 tl=q*dl;
486 # 365 t=(tl>>BN_BITS4);
487 # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
488 sll $14, $3, 16
489 and $2, $14, $6
490 move $11, $2
491 # 367 th+=t;
492 srl $25, $3, 16
493 addu $7, $4, $25
494 # 368
495 # 369 if (l < tl) th++;
496 bgeu $12, $2, $90
497 addu $7, $7, 1
498$90:
499 # 370 l-=tl;
500 subu $12, $12, $11
501 # 371 if (h < th)
502 bgeu $9, $7, $91
503 # 372 {
504 # 373 h+=d;
505 addu $9, $9, $16
506 # 374 q--;
507 addu $5, $5, -1
508 # 375 }
509$91:
510 # 376 h-=th;
511 subu $9, $9, $7
512 # 377
513 # 378 if (--count == 0) break;
514 addu $13, $13, -1
515 beq $13, 0, $92
516 # 379
517 # 380 ret=q<<BN_BITS4;
518 sll $31, $5, 16
519 # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
520 sll $24, $9, 16
521 srl $15, $12, 16
522 or $9, $24, $15
523 # 382 l=(l&BN_MASK2l)<<BN_BITS4;
524 and $12, $12, 65535
525 sll $12, $12, 16
526 # 383 }
527 b $84
528$92:
529 # 384 ret|=q;
530 or $31, $31, $5
531 # 385 return(ret);
532 move $2, $31
533$93:
534 lw $16, 48($sp)
535 lw $31, 56($sp)
536 addu $sp, 64
537 j $31
538 .end bn_div64
539
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s
new file mode 100644
index 0000000000..191345d920
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/mips3.s
@@ -0,0 +1,2138 @@
1.rdata
2.asciiz "mips3.s, Version 1.0"
3.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4
5/*
6 * ====================================================================
7 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
8 * project.
9 *
10 * Rights for redistribution and usage in source and binary forms are
11 * granted according to the OpenSSL license. Warranty of any kind is
12 * disclaimed.
13 * ====================================================================
14 */
15
16/*
17 * This is my modest contributon to the OpenSSL project (see
18 * http://www.openssl.org/ for more information about it) and is
19 * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
20 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 *
22 * The module is designed to work with either of the "new" MIPS ABI(5),
23 * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
24 * IRIX 5.x not only because it doesn't support new ABIs but also
25 * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
26 * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
27 * cause illegal instruction exception:-(
28 *
29 * In addition the code depends on preprocessor flags set up by MIPSpro
30 * compiler driver (either as or cc) and therefore (probably?) can't be
31 * compiled by the GNU assembler. GNU C driver manages fine though...
32 * I mean as long as -mmips-as is specified or is the default option,
33 * because then it simply invokes /usr/bin/as which in turn takes
34 * perfect care of the preprocessor definitions. Another neat feature
35 * offered by the MIPSpro assembler is an optimization pass. This gave
36 * me the opportunity to have the code looking more regular as all those
37 * architecture dependent instruction rescheduling details were left to
38 * the assembler. Cool, huh?
39 *
40 * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
41 * goes way over 3 times faster!
42 *
43 * <appro@fy.chalmers.se>
44 */
45#include <asm.h>
46#include <regdef.h>
47
48#if _MIPS_ISA>=4
49#define MOVNZ(cond,dst,src) \
50 movn dst,src,cond
51#else
52#define MOVNZ(cond,dst,src) \
53 .set noreorder; \
54 bnezl cond,.+8; \
55 move dst,src; \
56 .set reorder
57#endif
58
59.text
60
61.set noat
62.set reorder
63
64#define MINUS4 v1
65
66.align 5
67LEAF(bn_mul_add_words)
68 .set noreorder
69 bgtzl a2,.L_bn_mul_add_words_proceed
70 ld t0,0(a1)
71 jr ra
72 move v0,zero
73 .set reorder
74
75.L_bn_mul_add_words_proceed:
76 li MINUS4,-4
77 and ta0,a2,MINUS4
78 move v0,zero
79 beqz ta0,.L_bn_mul_add_words_tail
80
81.L_bn_mul_add_words_loop:
82 dmultu t0,a3
83 ld t1,0(a0)
84 ld t2,8(a1)
85 ld t3,8(a0)
86 ld ta0,16(a1)
87 ld ta1,16(a0)
88 daddu t1,v0
89 sltu v0,t1,v0 /* All manuals say it "compares 32-bit
90 * values", but it seems to work fine
91 * even on 64-bit registers. */
92 mflo AT
93 mfhi t0
94 daddu t1,AT
95 daddu v0,t0
96 sltu AT,t1,AT
97 sd t1,0(a0)
98 daddu v0,AT
99
100 dmultu t2,a3
101 ld ta2,24(a1)
102 ld ta3,24(a0)
103 daddu t3,v0
104 sltu v0,t3,v0
105 mflo AT
106 mfhi t2
107 daddu t3,AT
108 daddu v0,t2
109 sltu AT,t3,AT
110 sd t3,8(a0)
111 daddu v0,AT
112
113 dmultu ta0,a3
114 subu a2,4
115 PTR_ADD a0,32
116 PTR_ADD a1,32
117 daddu ta1,v0
118 sltu v0,ta1,v0
119 mflo AT
120 mfhi ta0
121 daddu ta1,AT
122 daddu v0,ta0
123 sltu AT,ta1,AT
124 sd ta1,-16(a0)
125 daddu v0,AT
126
127
128 dmultu ta2,a3
129 and ta0,a2,MINUS4
130 daddu ta3,v0
131 sltu v0,ta3,v0
132 mflo AT
133 mfhi ta2
134 daddu ta3,AT
135 daddu v0,ta2
136 sltu AT,ta3,AT
137 sd ta3,-8(a0)
138 daddu v0,AT
139 .set noreorder
140 bgtzl ta0,.L_bn_mul_add_words_loop
141 ld t0,0(a1)
142
143 bnezl a2,.L_bn_mul_add_words_tail
144 ld t0,0(a1)
145 .set reorder
146
147.L_bn_mul_add_words_return:
148 jr ra
149
150.L_bn_mul_add_words_tail:
151 dmultu t0,a3
152 ld t1,0(a0)
153 subu a2,1
154 daddu t1,v0
155 sltu v0,t1,v0
156 mflo AT
157 mfhi t0
158 daddu t1,AT
159 daddu v0,t0
160 sltu AT,t1,AT
161 sd t1,0(a0)
162 daddu v0,AT
163 beqz a2,.L_bn_mul_add_words_return
164
165 ld t0,8(a1)
166 dmultu t0,a3
167 ld t1,8(a0)
168 subu a2,1
169 daddu t1,v0
170 sltu v0,t1,v0
171 mflo AT
172 mfhi t0
173 daddu t1,AT
174 daddu v0,t0
175 sltu AT,t1,AT
176 sd t1,8(a0)
177 daddu v0,AT
178 beqz a2,.L_bn_mul_add_words_return
179
180 ld t0,16(a1)
181 dmultu t0,a3
182 ld t1,16(a0)
183 daddu t1,v0
184 sltu v0,t1,v0
185 mflo AT
186 mfhi t0
187 daddu t1,AT
188 daddu v0,t0
189 sltu AT,t1,AT
190 sd t1,16(a0)
191 daddu v0,AT
192 jr ra
193END(bn_mul_add_words)
194
195.align 5
196LEAF(bn_mul_words)
197 .set noreorder
198 bgtzl a2,.L_bn_mul_words_proceed
199 ld t0,0(a1)
200 jr ra
201 move v0,zero
202 .set reorder
203
204.L_bn_mul_words_proceed:
205 li MINUS4,-4
206 and ta0,a2,MINUS4
207 move v0,zero
208 beqz ta0,.L_bn_mul_words_tail
209
210.L_bn_mul_words_loop:
211 dmultu t0,a3
212 ld t2,8(a1)
213 ld ta0,16(a1)
214 ld ta2,24(a1)
215 mflo AT
216 mfhi t0
217 daddu v0,AT
218 sltu t1,v0,AT
219 sd v0,0(a0)
220 daddu v0,t1,t0
221
222 dmultu t2,a3
223 subu a2,4
224 PTR_ADD a0,32
225 PTR_ADD a1,32
226 mflo AT
227 mfhi t2
228 daddu v0,AT
229 sltu t3,v0,AT
230 sd v0,-24(a0)
231 daddu v0,t3,t2
232
233 dmultu ta0,a3
234 mflo AT
235 mfhi ta0
236 daddu v0,AT
237 sltu ta1,v0,AT
238 sd v0,-16(a0)
239 daddu v0,ta1,ta0
240
241
242 dmultu ta2,a3
243 and ta0,a2,MINUS4
244 mflo AT
245 mfhi ta2
246 daddu v0,AT
247 sltu ta3,v0,AT
248 sd v0,-8(a0)
249 daddu v0,ta3,ta2
250 .set noreorder
251 bgtzl ta0,.L_bn_mul_words_loop
252 ld t0,0(a1)
253
254 bnezl a2,.L_bn_mul_words_tail
255 ld t0,0(a1)
256 .set reorder
257
258.L_bn_mul_words_return:
259 jr ra
260
261.L_bn_mul_words_tail:
262 dmultu t0,a3
263 subu a2,1
264 mflo AT
265 mfhi t0
266 daddu v0,AT
267 sltu t1,v0,AT
268 sd v0,0(a0)
269 daddu v0,t1,t0
270 beqz a2,.L_bn_mul_words_return
271
272 ld t0,8(a1)
273 dmultu t0,a3
274 subu a2,1
275 mflo AT
276 mfhi t0
277 daddu v0,AT
278 sltu t1,v0,AT
279 sd v0,8(a0)
280 daddu v0,t1,t0
281 beqz a2,.L_bn_mul_words_return
282
283 ld t0,16(a1)
284 dmultu t0,a3
285 mflo AT
286 mfhi t0
287 daddu v0,AT
288 sltu t1,v0,AT
289 sd v0,16(a0)
290 daddu v0,t1,t0
291 jr ra
292END(bn_mul_words)
293
294.align 5
295LEAF(bn_sqr_words)
296 .set noreorder
297 bgtzl a2,.L_bn_sqr_words_proceed
298 ld t0,0(a1)
299 jr ra
300 move v0,zero
301 .set reorder
302
303.L_bn_sqr_words_proceed:
304 li MINUS4,-4
305 and ta0,a2,MINUS4
306 move v0,zero
307 beqz ta0,.L_bn_sqr_words_tail
308
309.L_bn_sqr_words_loop:
310 dmultu t0,t0
311 ld t2,8(a1)
312 ld ta0,16(a1)
313 ld ta2,24(a1)
314 mflo t1
315 mfhi t0
316 sd t1,0(a0)
317 sd t0,8(a0)
318
319 dmultu t2,t2
320 subu a2,4
321 PTR_ADD a0,64
322 PTR_ADD a1,32
323 mflo t3
324 mfhi t2
325 sd t3,-48(a0)
326 sd t2,-40(a0)
327
328 dmultu ta0,ta0
329 mflo ta1
330 mfhi ta0
331 sd ta1,-32(a0)
332 sd ta0,-24(a0)
333
334
335 dmultu ta2,ta2
336 and ta0,a2,MINUS4
337 mflo ta3
338 mfhi ta2
339 sd ta3,-16(a0)
340 sd ta2,-8(a0)
341
342 .set noreorder
343 bgtzl ta0,.L_bn_sqr_words_loop
344 ld t0,0(a1)
345
346 bnezl a2,.L_bn_sqr_words_tail
347 ld t0,0(a1)
348 .set reorder
349
350.L_bn_sqr_words_return:
351 move v0,zero
352 jr ra
353
354.L_bn_sqr_words_tail:
355 dmultu t0,t0
356 subu a2,1
357 mflo t1
358 mfhi t0
359 sd t1,0(a0)
360 sd t0,8(a0)
361 beqz a2,.L_bn_sqr_words_return
362
363 ld t0,8(a1)
364 dmultu t0,t0
365 subu a2,1
366 mflo t1
367 mfhi t0
368 sd t1,16(a0)
369 sd t0,24(a0)
370 beqz a2,.L_bn_sqr_words_return
371
372 ld t0,16(a1)
373 dmultu t0,t0
374 mflo t1
375 mfhi t0
376 sd t1,32(a0)
377 sd t0,40(a0)
378 jr ra
379END(bn_sqr_words)
380
381.align 5
382LEAF(bn_add_words)
383 .set noreorder
384 bgtzl a3,.L_bn_add_words_proceed
385 ld t0,0(a1)
386 jr ra
387 move v0,zero
388 .set reorder
389
390.L_bn_add_words_proceed:
391 li MINUS4,-4
392 and AT,a3,MINUS4
393 move v0,zero
394 beqz AT,.L_bn_add_words_tail
395
396.L_bn_add_words_loop:
397 ld ta0,0(a2)
398 ld t1,8(a1)
399 ld ta1,8(a2)
400 ld t2,16(a1)
401 ld ta2,16(a2)
402 ld t3,24(a1)
403 ld ta3,24(a2)
404 daddu ta0,t0
405 subu a3,4
406 sltu t8,ta0,t0
407 daddu t0,ta0,v0
408 PTR_ADD a0,32
409 sltu v0,t0,ta0
410 sd t0,-32(a0)
411 daddu v0,t8
412
413 daddu ta1,t1
414 PTR_ADD a1,32
415 sltu t9,ta1,t1
416 daddu t1,ta1,v0
417 PTR_ADD a2,32
418 sltu v0,t1,ta1
419 sd t1,-24(a0)
420 daddu v0,t9
421
422 daddu ta2,t2
423 and AT,a3,MINUS4
424 sltu t8,ta2,t2
425 daddu t2,ta2,v0
426 sltu v0,t2,ta2
427 sd t2,-16(a0)
428 daddu v0,t8
429
430 daddu ta3,t3
431 sltu t9,ta3,t3
432 daddu t3,ta3,v0
433 sltu v0,t3,ta3
434 sd t3,-8(a0)
435 daddu v0,t9
436
437 .set noreorder
438 bgtzl AT,.L_bn_add_words_loop
439 ld t0,0(a1)
440
441 bnezl a3,.L_bn_add_words_tail
442 ld t0,0(a1)
443 .set reorder
444
445.L_bn_add_words_return:
446 jr ra
447
448.L_bn_add_words_tail:
449 ld ta0,0(a2)
450 daddu ta0,t0
451 subu a3,1
452 sltu t8,ta0,t0
453 daddu t0,ta0,v0
454 sltu v0,t0,ta0
455 sd t0,0(a0)
456 daddu v0,t8
457 beqz a3,.L_bn_add_words_return
458
459 ld t1,8(a1)
460 ld ta1,8(a2)
461 daddu ta1,t1
462 subu a3,1
463 sltu t9,ta1,t1
464 daddu t1,ta1,v0
465 sltu v0,t1,ta1
466 sd t1,8(a0)
467 daddu v0,t9
468 beqz a3,.L_bn_add_words_return
469
470 ld t2,16(a1)
471 ld ta2,16(a2)
472 daddu ta2,t2
473 sltu t8,ta2,t2
474 daddu t2,ta2,v0
475 sltu v0,t2,ta2
476 sd t2,16(a0)
477 daddu v0,t8
478 jr ra
479END(bn_add_words)
480
481.align 5
482LEAF(bn_sub_words)
483 .set noreorder
484 bgtzl a3,.L_bn_sub_words_proceed
485 ld t0,0(a1)
486 jr ra
487 move v0,zero
488 .set reorder
489
490.L_bn_sub_words_proceed:
491 li MINUS4,-4
492 and AT,a3,MINUS4
493 move v0,zero
494 beqz AT,.L_bn_sub_words_tail
495
496.L_bn_sub_words_loop:
497 ld ta0,0(a2)
498 ld t1,8(a1)
499 ld ta1,8(a2)
500 ld t2,16(a1)
501 ld ta2,16(a2)
502 ld t3,24(a1)
503 ld ta3,24(a2)
504 sltu t8,t0,ta0
505 dsubu t0,ta0
506 subu a3,4
507 dsubu ta0,t0,v0
508 and AT,a3,MINUS4
509 sd ta0,0(a0)
510 MOVNZ (t0,v0,t8)
511
512 sltu t9,t1,ta1
513 dsubu t1,ta1
514 PTR_ADD a0,32
515 dsubu ta1,t1,v0
516 PTR_ADD a1,32
517 sd ta1,-24(a0)
518 MOVNZ (t1,v0,t9)
519
520
521 sltu t8,t2,ta2
522 dsubu t2,ta2
523 dsubu ta2,t2,v0
524 PTR_ADD a2,32
525 sd ta2,-16(a0)
526 MOVNZ (t2,v0,t8)
527
528 sltu t9,t3,ta3
529 dsubu t3,ta3
530 dsubu ta3,t3,v0
531 sd ta3,-8(a0)
532 MOVNZ (t3,v0,t9)
533
534 .set noreorder
535 bgtzl AT,.L_bn_sub_words_loop
536 ld t0,0(a1)
537
538 bnezl a3,.L_bn_sub_words_tail
539 ld t0,0(a1)
540 .set reorder
541
542.L_bn_sub_words_return:
543 jr ra
544
545.L_bn_sub_words_tail:
546 ld ta0,0(a2)
547 subu a3,1
548 sltu t8,t0,ta0
549 dsubu t0,ta0
550 dsubu ta0,t0,v0
551 MOVNZ (t0,v0,t8)
552 sd ta0,0(a0)
553 beqz a3,.L_bn_sub_words_return
554
555 ld t1,8(a1)
556 subu a3,1
557 ld ta1,8(a2)
558 sltu t9,t1,ta1
559 dsubu t1,ta1
560 dsubu ta1,t1,v0
561 MOVNZ (t1,v0,t9)
562 sd ta1,8(a0)
563 beqz a3,.L_bn_sub_words_return
564
565 ld t2,16(a1)
566 ld ta2,16(a2)
567 sltu t8,t2,ta2
568 dsubu t2,ta2
569 dsubu ta2,t2,v0
570 MOVNZ (t2,v0,t8)
571 sd ta2,16(a0)
572 jr ra
573END(bn_sub_words)
574
575#undef MINUS4
576
577.align 5
578LEAF(bn_div_words)
579 .set noreorder
580 bnezl a2,.L_bn_div_words_proceed
581 move v1,zero
582 jr ra
583 li v0,-1 /* I'd rather signal div-by-zero
584 * which can be done with 'break 7' */
585
586.L_bn_div_words_proceed:
587 bltz a2,.L_bn_div_words_body
588 move t9,v1
589 dsll a2,1
590 bgtz a2,.-4
591 addu t9,1
592
593 .set reorder
594 negu t1,t9
595 li t2,-1
596 dsll t2,t1
597 and t2,a0
598 dsrl AT,a1,t1
599 .set noreorder
600 bnezl t2,.+8
601 break 6 /* signal overflow */
602 .set reorder
603 dsll a0,t9
604 dsll a1,t9
605 or a0,AT
606
607#define QT ta0
608#define HH ta1
609#define DH v1
610.L_bn_div_words_body:
611 dsrl DH,a2,32
612 sgeu AT,a0,a2
613 .set noreorder
614 bnezl AT,.+8
615 dsubu a0,a2
616 .set reorder
617
618 li QT,-1
619 dsrl HH,a0,32
620 dsrl QT,32 /* q=0xffffffff */
621 beq DH,HH,.L_bn_div_words_skip_div1
622 ddivu zero,a0,DH
623 mflo QT
624.L_bn_div_words_skip_div1:
625 dmultu a2,QT
626 dsll t3,a0,32
627 dsrl AT,a1,32
628 or t3,AT
629 mflo t0
630 mfhi t1
631.L_bn_div_words_inner_loop1:
632 sltu t2,t3,t0
633 seq t8,HH,t1
634 sltu AT,HH,t1
635 and t2,t8
636 or AT,t2
637 .set noreorder
638 beqz AT,.L_bn_div_words_inner_loop1_done
639 sltu t2,t0,a2
640 .set reorder
641 dsubu QT,1
642 dsubu t0,a2
643 dsubu t1,t2
644 b .L_bn_div_words_inner_loop1
645.L_bn_div_words_inner_loop1_done:
646
647 dsll a1,32
648 dsubu a0,t3,t0
649 dsll v0,QT,32
650
651 li QT,-1
652 dsrl HH,a0,32
653 dsrl QT,32 /* q=0xffffffff */
654 beq DH,HH,.L_bn_div_words_skip_div2
655 ddivu zero,a0,DH
656 mflo QT
657.L_bn_div_words_skip_div2:
658 dmultu a2,QT
659 dsll t3,a0,32
660 dsrl AT,a1,32
661 or t3,AT
662 mflo t0
663 mfhi t1
664.L_bn_div_words_inner_loop2:
665 sltu t2,t3,t0
666 seq t8,HH,t1
667 sltu AT,HH,t1
668 and t2,t8
669 or AT,t2
670 .set noreorder
671 beqz AT,.L_bn_div_words_inner_loop2_done
672 sltu t2,t0,a2
673 .set reorder
674 dsubu QT,1
675 dsubu t0,a2
676 dsubu t1,t2
677 b .L_bn_div_words_inner_loop2
678.L_bn_div_words_inner_loop2_done:
679
680 dsubu a0,t3,t0
681 or v0,QT
682 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
683 dsrl a2,t9 /* restore a2 */
684 jr ra
685#undef HH
686#undef DH
687#undef QT
688END(bn_div_words)
689
690.align 5
691LEAF(bn_div_3_words)
692 .set reorder
693 move a3,a0 /* we know that bn_div_words doesn't
694 * touch a3, ta2, ta3 and preserves a2
695 * so that we can save two arguments
696 * and return address in registers
697 * instead of stack:-)
698 */
699 ld a0,(a3)
700 move ta2,a2
701 move a2,a1
702 ld a1,-8(a3)
703 move ta3,ra
704 move v1,zero
705 li v0,-1
706 beq a0,a2,.L_bn_div_3_words_skip_div
707 jal bn_div_words
708 move ra,ta3
709.L_bn_div_3_words_skip_div:
710 dmultu ta2,v0
711 ld t2,-16(a3)
712 mflo t0
713 mfhi t1
714.L_bn_div_3_words_inner_loop:
715 sgeu AT,t2,t0
716 seq t9,t1,v1
717 sltu t8,t1,v1
718 and AT,t9
719 or AT,t8
720 bnez AT,.L_bn_div_3_words_inner_loop_done
721 daddu v1,a2
722 sltu t3,t0,ta2
723 sltu AT,v1,a2
724 dsubu v0,1
725 dsubu t0,ta2
726 dsubu t1,t3
727 beqz AT,.L_bn_div_3_words_inner_loop
728.L_bn_div_3_words_inner_loop_done:
729 jr ra
730END(bn_div_3_words)
731
732#define a_0 t0
733#define a_1 t1
734#define a_2 t2
735#define a_3 t3
736#define b_0 ta0
737#define b_1 ta1
738#define b_2 ta2
739#define b_3 ta3
740
741#define a_4 s0
742#define a_5 s2
743#define a_6 s4
744#define a_7 a1 /* once we load a[7] we don't need a anymore */
745#define b_4 s1
746#define b_5 s3
747#define b_6 s5
748#define b_7 a2 /* once we load b[7] we don't need b anymore */
749
750#define t_1 t8
751#define t_2 t9
752
753#define c_1 v0
754#define c_2 v1
755#define c_3 a3
756
757#define FRAME_SIZE 48
758
759.align 5
760LEAF(bn_mul_comba8)
761 .set noreorder
762 PTR_SUB sp,FRAME_SIZE
763 .frame sp,64,ra
764 .set reorder
765 ld a_0,0(a1) /* If compiled with -mips3 option on
766 * R5000 box assembler barks on this
767 * line with "shouldn't have mult/div
768 * as last instruction in bb (R10K
769 * bug)" warning. If anybody out there
770 * has a clue about how to circumvent
771 * this do send me a note.
772 * <appro@fy.chalmers.se>
773 */
774 ld b_0,0(a2)
775 ld a_1,8(a1)
776 ld a_2,16(a1)
777 ld a_3,24(a1)
778 ld b_1,8(a2)
779 ld b_2,16(a2)
780 ld b_3,24(a2)
781 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
782 sd s0,0(sp)
783 sd s1,8(sp)
784 sd s2,16(sp)
785 sd s3,24(sp)
786 sd s4,32(sp)
787 sd s5,40(sp)
788 mflo c_1
789 mfhi c_2
790
791 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
792 ld a_4,32(a1)
793 ld a_5,40(a1)
794 ld a_6,48(a1)
795 ld a_7,56(a1)
796 ld b_4,32(a2)
797 ld b_5,40(a2)
798 mflo t_1
799 mfhi t_2
800 daddu c_2,t_1
801 sltu AT,c_2,t_1
802 daddu c_3,t_2,AT
803 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
804 ld b_6,48(a2)
805 ld b_7,56(a2)
806 sd c_1,0(a0) /* r[0]=c1; */
807 mflo t_1
808 mfhi t_2
809 daddu c_2,t_1
810 sltu AT,c_2,t_1
811 daddu t_2,AT
812 daddu c_3,t_2
813 sltu c_1,c_3,t_2
814 sd c_2,8(a0) /* r[1]=c2; */
815
816 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
817 mflo t_1
818 mfhi t_2
819 daddu c_3,t_1
820 sltu AT,c_3,t_1
821 daddu t_2,AT
822 daddu c_1,t_2
823 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
824 mflo t_1
825 mfhi t_2
826 daddu c_3,t_1
827 sltu AT,c_3,t_1
828 daddu t_2,AT
829 daddu c_1,t_2
830 sltu c_2,c_1,t_2
831 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
832 mflo t_1
833 mfhi t_2
834 daddu c_3,t_1
835 sltu AT,c_3,t_1
836 daddu t_2,AT
837 daddu c_1,t_2
838 sltu AT,c_1,t_2
839 daddu c_2,AT
840 sd c_3,16(a0) /* r[2]=c3; */
841
842 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
843 mflo t_1
844 mfhi t_2
845 daddu c_1,t_1
846 sltu AT,c_1,t_1
847 daddu t_2,AT
848 daddu c_2,t_2
849 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
850 mflo t_1
851 mfhi t_2
852 daddu c_1,t_1
853 sltu AT,c_1,t_1
854 daddu t_2,AT
855 daddu c_2,t_2
856 sltu c_3,c_2,t_2
857 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
858 mflo t_1
859 mfhi t_2
860 daddu c_1,t_1
861 sltu AT,c_1,t_1
862 daddu t_2,AT
863 daddu c_2,t_2
864 sltu AT,c_2,t_2
865 daddu c_3,AT
866 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
867 mflo t_1
868 mfhi t_2
869 daddu c_1,t_1
870 sltu AT,c_1,t_1
871 daddu t_2,AT
872 daddu c_2,t_2
873 sltu AT,c_2,t_2
874 daddu c_3,AT
875 sd c_1,24(a0) /* r[3]=c1; */
876
877 dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
878 mflo t_1
879 mfhi t_2
880 daddu c_2,t_1
881 sltu AT,c_2,t_1
882 daddu t_2,AT
883 daddu c_3,t_2
884 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
885 mflo t_1
886 mfhi t_2
887 daddu c_2,t_1
888 sltu AT,c_2,t_1
889 daddu t_2,AT
890 daddu c_3,t_2
891 sltu c_1,c_3,t_2
892 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
893 mflo t_1
894 mfhi t_2
895 daddu c_2,t_1
896 sltu AT,c_2,t_1
897 daddu t_2,AT
898 daddu c_3,t_2
899 sltu AT,c_3,t_2
900 daddu c_1,AT
901 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
902 mflo t_1
903 mfhi t_2
904 daddu c_2,t_1
905 sltu AT,c_2,t_1
906 daddu t_2,AT
907 daddu c_3,t_2
908 sltu AT,c_3,t_2
909 daddu c_1,AT
910 dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
911 mflo t_1
912 mfhi t_2
913 daddu c_2,t_1
914 sltu AT,c_2,t_1
915 daddu t_2,AT
916 daddu c_3,t_2
917 sltu AT,c_3,t_2
918 daddu c_1,AT
919 sd c_2,32(a0) /* r[4]=c2; */
920
921 dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
922 mflo t_1
923 mfhi t_2
924 daddu c_3,t_1
925 sltu AT,c_3,t_1
926 daddu t_2,AT
927 daddu c_1,t_2
928 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
929 mflo t_1
930 mfhi t_2
931 daddu c_3,t_1
932 sltu AT,c_3,t_1
933 daddu t_2,AT
934 daddu c_1,t_2
935 sltu c_2,c_1,t_2
936 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
937 mflo t_1
938 mfhi t_2
939 daddu c_3,t_1
940 sltu AT,c_3,t_1
941 daddu t_2,AT
942 daddu c_1,t_2
943 sltu AT,c_1,t_2
944 daddu c_2,AT
945 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
946 mflo t_1
947 mfhi t_2
948 daddu c_3,t_1
949 sltu AT,c_3,t_1
950 daddu t_2,AT
951 daddu c_1,t_2
952 sltu AT,c_1,t_2
953 daddu c_2,AT
954 dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
955 mflo t_1
956 mfhi t_2
957 daddu c_3,t_1
958 sltu AT,c_3,t_1
959 daddu t_2,AT
960 daddu c_1,t_2
961 sltu AT,c_1,t_2
962 daddu c_2,AT
963 dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
964 mflo t_1
965 mfhi t_2
966 daddu c_3,t_1
967 sltu AT,c_3,t_1
968 daddu t_2,AT
969 daddu c_1,t_2
970 sltu AT,c_1,t_2
971 daddu c_2,AT
972 sd c_3,40(a0) /* r[5]=c3; */
973
974 dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
975 mflo t_1
976 mfhi t_2
977 daddu c_1,t_1
978 sltu AT,c_1,t_1
979 daddu t_2,AT
980 daddu c_2,t_2
981 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
982 mflo t_1
983 mfhi t_2
984 daddu c_1,t_1
985 sltu AT,c_1,t_1
986 daddu t_2,AT
987 daddu c_2,t_2
988 sltu c_3,c_2,t_2
989 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
990 mflo t_1
991 mfhi t_2
992 daddu c_1,t_1
993 sltu AT,c_1,t_1
994 daddu t_2,AT
995 daddu c_2,t_2
996 sltu AT,c_2,t_2
997 daddu c_3,AT
998 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
999 mflo t_1
1000 mfhi t_2
1001 daddu c_1,t_1
1002 sltu AT,c_1,t_1
1003 daddu t_2,AT
1004 daddu c_2,t_2
1005 sltu AT,c_2,t_2
1006 daddu c_3,AT
1007 dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
1008 mflo t_1
1009 mfhi t_2
1010 daddu c_1,t_1
1011 sltu AT,c_1,t_1
1012 daddu t_2,AT
1013 daddu c_2,t_2
1014 sltu AT,c_2,t_2
1015 daddu c_3,AT
1016 dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
1017 mflo t_1
1018 mfhi t_2
1019 daddu c_1,t_1
1020 sltu AT,c_1,t_1
1021 daddu t_2,AT
1022 daddu c_2,t_2
1023 sltu AT,c_2,t_2
1024 daddu c_3,AT
1025 dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
1026 mflo t_1
1027 mfhi t_2
1028 daddu c_1,t_1
1029 sltu AT,c_1,t_1
1030 daddu t_2,AT
1031 daddu c_2,t_2
1032 sltu AT,c_2,t_2
1033 daddu c_3,AT
1034 sd c_1,48(a0) /* r[6]=c1; */
1035
1036 dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
1037 mflo t_1
1038 mfhi t_2
1039 daddu c_2,t_1
1040 sltu AT,c_2,t_1
1041 daddu t_2,AT
1042 daddu c_3,t_2
1043 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1044 mflo t_1
1045 mfhi t_2
1046 daddu c_2,t_1
1047 sltu AT,c_2,t_1
1048 daddu t_2,AT
1049 daddu c_3,t_2
1050 sltu c_1,c_3,t_2
1051 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1052 mflo t_1
1053 mfhi t_2
1054 daddu c_2,t_1
1055 sltu AT,c_2,t_1
1056 daddu t_2,AT
1057 daddu c_3,t_2
1058 sltu AT,c_3,t_2
1059 daddu c_1,AT
1060 dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
1061 mflo t_1
1062 mfhi t_2
1063 daddu c_2,t_1
1064 sltu AT,c_2,t_1
1065 daddu t_2,AT
1066 daddu c_3,t_2
1067 sltu AT,c_3,t_2
1068 daddu c_1,AT
1069 dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
1070 mflo t_1
1071 mfhi t_2
1072 daddu c_2,t_1
1073 sltu AT,c_2,t_1
1074 daddu t_2,AT
1075 daddu c_3,t_2
1076 sltu AT,c_3,t_2
1077 daddu c_1,AT
1078 dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
1079 mflo t_1
1080 mfhi t_2
1081 daddu c_2,t_1
1082 sltu AT,c_2,t_1
1083 daddu t_2,AT
1084 daddu c_3,t_2
1085 sltu AT,c_3,t_2
1086 daddu c_1,AT
1087 dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
1088 mflo t_1
1089 mfhi t_2
1090 daddu c_2,t_1
1091 sltu AT,c_2,t_1
1092 daddu t_2,AT
1093 daddu c_3,t_2
1094 sltu AT,c_3,t_2
1095 daddu c_1,AT
1096 dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
1097 mflo t_1
1098 mfhi t_2
1099 daddu c_2,t_1
1100 sltu AT,c_2,t_1
1101 daddu t_2,AT
1102 daddu c_3,t_2
1103 sltu AT,c_3,t_2
1104 daddu c_1,AT
1105 sd c_2,56(a0) /* r[7]=c2; */
1106
1107 dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
1108 mflo t_1
1109 mfhi t_2
1110 daddu c_3,t_1
1111 sltu AT,c_3,t_1
1112 daddu t_2,AT
1113 daddu c_1,t_2
1114 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1115 mflo t_1
1116 mfhi t_2
1117 daddu c_3,t_1
1118 sltu AT,c_3,t_1
1119 daddu t_2,AT
1120 daddu c_1,t_2
1121 sltu c_2,c_1,t_2
1122 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1123 mflo t_1
1124 mfhi t_2
1125 daddu c_3,t_1
1126 sltu AT,c_3,t_1
1127 daddu t_2,AT
1128 daddu c_1,t_2
1129 sltu AT,c_1,t_2
1130 daddu c_2,AT
1131 dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1132 mflo t_1
1133 mfhi t_2
1134 daddu c_3,t_1
1135 sltu AT,c_3,t_1
1136 daddu t_2,AT
1137 daddu c_1,t_2
1138 sltu AT,c_1,t_2
1139 daddu c_2,AT
1140 dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
1141 mflo t_1
1142 mfhi t_2
1143 daddu c_3,t_1
1144 sltu AT,c_3,t_1
1145 daddu t_2,AT
1146 daddu c_1,t_2
1147 sltu AT,c_1,t_2
1148 daddu c_2,AT
1149 dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
1150 mflo t_1
1151 mfhi t_2
1152 daddu c_3,t_1
1153 sltu AT,c_3,t_1
1154 daddu t_2,AT
1155 daddu c_1,t_2
1156 sltu AT,c_1,t_2
1157 daddu c_2,AT
1158 dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
1159 mflo t_1
1160 mfhi t_2
1161 daddu c_3,t_1
1162 sltu AT,c_3,t_1
1163 daddu t_2,AT
1164 daddu c_1,t_2
1165 sltu AT,c_1,t_2
1166 daddu c_2,AT
1167 sd c_3,64(a0) /* r[8]=c3; */
1168
1169 dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
1170 mflo t_1
1171 mfhi t_2
1172 daddu c_1,t_1
1173 sltu AT,c_1,t_1
1174 daddu t_2,AT
1175 daddu c_2,t_2
1176 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1177 mflo t_1
1178 mfhi t_2
1179 daddu c_1,t_1
1180 sltu AT,c_1,t_1
1181 daddu t_2,AT
1182 daddu c_2,t_2
1183 sltu c_3,c_2,t_2
1184 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1185 mflo t_1
1186 mfhi t_2
1187 daddu c_1,t_1
1188 sltu AT,c_1,t_1
1189 daddu t_2,AT
1190 daddu c_2,t_2
1191 sltu AT,c_2,t_2
1192 daddu c_3,AT
1193 dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
1194 mflo t_1
1195 mfhi t_2
1196 daddu c_1,t_1
1197 sltu AT,c_1,t_1
1198 daddu t_2,AT
1199 daddu c_2,t_2
1200 sltu AT,c_2,t_2
1201 daddu c_3,AT
1202 dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
1203 mflo t_1
1204 mfhi t_2
1205 daddu c_1,t_1
1206 sltu AT,c_1,t_1
1207 daddu t_2,AT
1208 daddu c_2,t_2
1209 sltu AT,c_2,t_2
1210 daddu c_3,AT
1211 dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
1212 mflo t_1
1213 mfhi t_2
1214 daddu c_1,t_1
1215 sltu AT,c_1,t_1
1216 daddu t_2,AT
1217 daddu c_2,t_2
1218 sltu AT,c_2,t_2
1219 daddu c_3,AT
1220 sd c_1,72(a0) /* r[9]=c1; */
1221
1222 dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
1223 mflo t_1
1224 mfhi t_2
1225 daddu c_2,t_1
1226 sltu AT,c_2,t_1
1227 daddu t_2,AT
1228 daddu c_3,t_2
1229 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1230 mflo t_1
1231 mfhi t_2
1232 daddu c_2,t_1
1233 sltu AT,c_2,t_1
1234 daddu t_2,AT
1235 daddu c_3,t_2
1236 sltu c_1,c_3,t_2
1237 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1238 mflo t_1
1239 mfhi t_2
1240 daddu c_2,t_1
1241 sltu AT,c_2,t_1
1242 daddu t_2,AT
1243 daddu c_3,t_2
1244 sltu AT,c_3,t_2
1245 daddu c_1,AT
1246 dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
1247 mflo t_1
1248 mfhi t_2
1249 daddu c_2,t_1
1250 sltu AT,c_2,t_1
1251 daddu t_2,AT
1252 daddu c_3,t_2
1253 sltu AT,c_3,t_2
1254 daddu c_1,AT
1255 dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
1256 mflo t_1
1257 mfhi t_2
1258 daddu c_2,t_1
1259 sltu AT,c_2,t_1
1260 daddu t_2,AT
1261 daddu c_3,t_2
1262 sltu AT,c_3,t_2
1263 daddu c_1,AT
1264 sd c_2,80(a0) /* r[10]=c2; */
1265
1266 dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
1267 mflo t_1
1268 mfhi t_2
1269 daddu c_3,t_1
1270 sltu AT,c_3,t_1
1271 daddu t_2,AT
1272 daddu c_1,t_2
1273 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
1274 mflo t_1
1275 mfhi t_2
1276 daddu c_3,t_1
1277 sltu AT,c_3,t_1
1278 daddu t_2,AT
1279 daddu c_1,t_2
1280 sltu c_2,c_1,t_2
1281 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
1282 mflo t_1
1283 mfhi t_2
1284 daddu c_3,t_1
1285 sltu AT,c_3,t_1
1286 daddu t_2,AT
1287 daddu c_1,t_2
1288 sltu AT,c_1,t_2
1289 daddu c_2,AT
1290 dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
1291 mflo t_1
1292 mfhi t_2
1293 daddu c_3,t_1
1294 sltu AT,c_3,t_1
1295 daddu t_2,AT
1296 daddu c_1,t_2
1297 sltu AT,c_1,t_2
1298 daddu c_2,AT
1299 sd c_3,88(a0) /* r[11]=c3; */
1300
1301 dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
1302 mflo t_1
1303 mfhi t_2
1304 daddu c_1,t_1
1305 sltu AT,c_1,t_1
1306 daddu t_2,AT
1307 daddu c_2,t_2
1308 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1309 mflo t_1
1310 mfhi t_2
1311 daddu c_1,t_1
1312 sltu AT,c_1,t_1
1313 daddu t_2,AT
1314 daddu c_2,t_2
1315 sltu c_3,c_2,t_2
1316 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
1317 mflo t_1
1318 mfhi t_2
1319 daddu c_1,t_1
1320 sltu AT,c_1,t_1
1321 daddu t_2,AT
1322 daddu c_2,t_2
1323 sltu AT,c_2,t_2
1324 daddu c_3,AT
1325 sd c_1,96(a0) /* r[12]=c1; */
1326
1327 dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
1328 mflo t_1
1329 mfhi t_2
1330 daddu c_2,t_1
1331 sltu AT,c_2,t_1
1332 daddu t_2,AT
1333 daddu c_3,t_2
1334 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
1335 mflo t_1
1336 mfhi t_2
1337 daddu c_2,t_1
1338 sltu AT,c_2,t_1
1339 daddu t_2,AT
1340 daddu c_3,t_2
1341 sltu c_1,c_3,t_2
1342 sd c_2,104(a0) /* r[13]=c2; */
1343
1344 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
1345 ld s0,0(sp)
1346 ld s1,8(sp)
1347 ld s2,16(sp)
1348 ld s3,24(sp)
1349 ld s4,32(sp)
1350 ld s5,40(sp)
1351 mflo t_1
1352 mfhi t_2
1353 daddu c_3,t_1
1354 sltu AT,c_3,t_1
1355 daddu t_2,AT
1356 daddu c_1,t_2
1357 sd c_3,112(a0) /* r[14]=c3; */
1358 sd c_1,120(a0) /* r[15]=c1; */
1359
1360 PTR_ADD sp,FRAME_SIZE
1361
1362 jr ra
1363END(bn_mul_comba8)
1364
1365.align 5
1366LEAF(bn_mul_comba4)
1367 .set reorder
1368 ld a_0,0(a1)
1369 ld b_0,0(a2)
1370 ld a_1,8(a1)
1371 ld a_2,16(a1)
1372 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1373 ld a_3,24(a1)
1374 ld b_1,8(a2)
1375 ld b_2,16(a2)
1376 ld b_3,24(a2)
1377 mflo c_1
1378 mfhi c_2
1379 sd c_1,0(a0)
1380
1381 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
1382 mflo t_1
1383 mfhi t_2
1384 daddu c_2,t_1
1385 sltu AT,c_2,t_1
1386 daddu c_3,t_2,AT
1387 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
1388 mflo t_1
1389 mfhi t_2
1390 daddu c_2,t_1
1391 sltu AT,c_2,t_1
1392 daddu t_2,AT
1393 daddu c_3,t_2
1394 sltu c_1,c_3,t_2
1395 sd c_2,8(a0)
1396
1397 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
1398 mflo t_1
1399 mfhi t_2
1400 daddu c_3,t_1
1401 sltu AT,c_3,t_1
1402 daddu t_2,AT
1403 daddu c_1,t_2
1404 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1405 mflo t_1
1406 mfhi t_2
1407 daddu c_3,t_1
1408 sltu AT,c_3,t_1
1409 daddu t_2,AT
1410 daddu c_1,t_2
1411 sltu c_2,c_1,t_2
1412 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
1413 mflo t_1
1414 mfhi t_2
1415 daddu c_3,t_1
1416 sltu AT,c_3,t_1
1417 daddu t_2,AT
1418 daddu c_1,t_2
1419 sltu AT,c_1,t_2
1420 daddu c_2,AT
1421 sd c_3,16(a0)
1422
1423 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
1424 mflo t_1
1425 mfhi t_2
1426 daddu c_1,t_1
1427 sltu AT,c_1,t_1
1428 daddu t_2,AT
1429 daddu c_2,t_2
1430 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1431 mflo t_1
1432 mfhi t_2
1433 daddu c_1,t_1
1434 sltu AT,c_1,t_1
1435 daddu t_2,AT
1436 daddu c_2,t_2
1437 sltu c_3,c_2,t_2
1438 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1439 mflo t_1
1440 mfhi t_2
1441 daddu c_1,t_1
1442 sltu AT,c_1,t_1
1443 daddu t_2,AT
1444 daddu c_2,t_2
1445 sltu AT,c_2,t_2
1446 daddu c_3,AT
1447 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
1448 mflo t_1
1449 mfhi t_2
1450 daddu c_1,t_1
1451 sltu AT,c_1,t_1
1452 daddu t_2,AT
1453 daddu c_2,t_2
1454 sltu AT,c_2,t_2
1455 daddu c_3,AT
1456 sd c_1,24(a0)
1457
1458 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
1459 mflo t_1
1460 mfhi t_2
1461 daddu c_2,t_1
1462 sltu AT,c_2,t_1
1463 daddu t_2,AT
1464 daddu c_3,t_2
1465 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1466 mflo t_1
1467 mfhi t_2
1468 daddu c_2,t_1
1469 sltu AT,c_2,t_1
1470 daddu t_2,AT
1471 daddu c_3,t_2
1472 sltu c_1,c_3,t_2
1473 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1474 mflo t_1
1475 mfhi t_2
1476 daddu c_2,t_1
1477 sltu AT,c_2,t_1
1478 daddu t_2,AT
1479 daddu c_3,t_2
1480 sltu AT,c_3,t_2
1481 daddu c_1,AT
1482 sd c_2,32(a0)
1483
1484 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
1485 mflo t_1
1486 mfhi t_2
1487 daddu c_3,t_1
1488 sltu AT,c_3,t_1
1489 daddu t_2,AT
1490 daddu c_1,t_2
1491 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1492 mflo t_1
1493 mfhi t_2
1494 daddu c_3,t_1
1495 sltu AT,c_3,t_1
1496 daddu t_2,AT
1497 daddu c_1,t_2
1498 sltu c_2,c_1,t_2
1499 sd c_3,40(a0)
1500
1501 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1502 mflo t_1
1503 mfhi t_2
1504 daddu c_1,t_1
1505 sltu AT,c_1,t_1
1506 daddu t_2,AT
1507 daddu c_2,t_2
1508 sd c_1,48(a0)
1509 sd c_2,56(a0)
1510
1511 jr ra
1512END(bn_mul_comba4)
1513
1514#undef a_4
1515#undef a_5
1516#undef a_6
1517#undef a_7
1518#define a_4 b_0
1519#define a_5 b_1
1520#define a_6 b_2
1521#define a_7 b_3
1522
1523.align 5
1524LEAF(bn_sqr_comba8)
1525 .set reorder
1526 ld a_0,0(a1)
1527 ld a_1,8(a1)
1528 ld a_2,16(a1)
1529 ld a_3,24(a1)
1530
1531 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1532 ld a_4,32(a1)
1533 ld a_5,40(a1)
1534 ld a_6,48(a1)
1535 ld a_7,56(a1)
1536 mflo c_1
1537 mfhi c_2
1538 sd c_1,0(a0)
1539
1540 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
1541 mflo t_1
1542 mfhi t_2
1543 daddu c_2,t_1
1544 sltu AT,c_2,t_1
1545 daddu c_3,t_2,AT
1546 daddu c_2,t_1
1547 sltu AT,c_2,t_1
1548 daddu t_2,AT
1549 daddu c_3,t_2
1550 sltu c_1,c_3,t_2
1551 sd c_2,8(a0)
1552
1553 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
1554 mflo t_1
1555 mfhi t_2
1556 daddu c_3,t_1
1557 sltu AT,c_3,t_1
1558 daddu a2,t_2,AT
1559 daddu c_1,a2
1560 daddu c_3,t_1
1561 sltu AT,c_3,t_1
1562 daddu t_2,AT
1563 daddu c_1,t_2
1564 sltu c_2,c_1,t_2
1565 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1566 mflo t_1
1567 mfhi t_2
1568 daddu c_3,t_1
1569 sltu AT,c_3,t_1
1570 daddu t_2,AT
1571 daddu c_1,t_2
1572 sltu AT,c_1,t_2
1573 daddu c_2,AT
1574 sd c_3,16(a0)
1575
1576 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
1577 mflo t_1
1578 mfhi t_2
1579 daddu c_1,t_1
1580 sltu AT,c_1,t_1
1581 daddu a2,t_2,AT
1582 daddu c_2,a2
1583 daddu c_1,t_1
1584 sltu AT,c_1,t_1
1585 daddu t_2,AT
1586 daddu c_2,t_2
1587 sltu c_3,c_2,t_2
1588 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
1589 mflo t_1
1590 mfhi t_2
1591 daddu c_1,t_1
1592 sltu AT,c_1,t_1
1593 daddu a2,t_2,AT
1594 daddu c_2,a2
1595 sltu AT,c_2,a2
1596 daddu c_3,AT
1597 daddu c_1,t_1
1598 sltu AT,c_1,t_1
1599 daddu t_2,AT
1600 daddu c_2,t_2
1601 sltu AT,c_2,t_2
1602 daddu c_3,AT
1603 sd c_1,24(a0)
1604
1605 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
1606 mflo t_1
1607 mfhi t_2
1608 daddu c_2,t_1
1609 sltu AT,c_2,t_1
1610 daddu a2,t_2,AT
1611 daddu c_3,a2
1612 daddu c_2,t_1
1613 sltu AT,c_2,t_1
1614 daddu t_2,AT
1615 daddu c_3,t_2
1616 sltu c_1,c_3,t_2
1617 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
1618 mflo t_1
1619 mfhi t_2
1620 daddu c_2,t_1
1621 sltu AT,c_2,t_1
1622 daddu a2,t_2,AT
1623 daddu c_3,a2
1624 sltu AT,c_3,a2
1625 daddu c_1,AT
1626 daddu c_2,t_1
1627 sltu AT,c_2,t_1
1628 daddu t_2,AT
1629 daddu c_3,t_2
1630 sltu AT,c_3,t_2
1631 daddu c_1,AT
1632 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1633 mflo t_1
1634 mfhi t_2
1635 daddu c_2,t_1
1636 sltu AT,c_2,t_1
1637 daddu t_2,AT
1638 daddu c_3,t_2
1639 sltu AT,c_3,t_2
1640 daddu c_1,AT
1641 sd c_2,32(a0)
1642
1643 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
1644 mflo t_1
1645 mfhi t_2
1646 daddu c_3,t_1
1647 sltu AT,c_3,t_1
1648 daddu a2,t_2,AT
1649 daddu c_1,a2
1650 daddu c_3,t_1
1651 sltu AT,c_3,t_1
1652 daddu t_2,AT
1653 daddu c_1,t_2
1654 sltu c_2,c_1,t_2
1655 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
1656 mflo t_1
1657 mfhi t_2
1658 daddu c_3,t_1
1659 sltu AT,c_3,t_1
1660 daddu a2,t_2,AT
1661 daddu c_1,a2
1662 sltu AT,c_1,a2
1663 daddu c_2,AT
1664 daddu c_3,t_1
1665 sltu AT,c_3,t_1
1666 daddu t_2,AT
1667 daddu c_1,t_2
1668 sltu AT,c_1,t_2
1669 daddu c_2,AT
1670 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
1671 mflo t_1
1672 mfhi t_2
1673 daddu c_3,t_1
1674 sltu AT,c_3,t_1
1675 daddu a2,t_2,AT
1676 daddu c_1,a2
1677 sltu AT,c_1,a2
1678 daddu c_2,AT
1679 daddu c_3,t_1
1680 sltu AT,c_3,t_1
1681 daddu t_2,AT
1682 daddu c_1,t_2
1683 sltu AT,c_1,t_2
1684 daddu c_2,AT
1685 sd c_3,40(a0)
1686
1687 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
1688 mflo t_1
1689 mfhi t_2
1690 daddu c_1,t_1
1691 sltu AT,c_1,t_1
1692 daddu a2,t_2,AT
1693 daddu c_2,a2
1694 daddu c_1,t_1
1695 sltu AT,c_1,t_1
1696 daddu t_2,AT
1697 daddu c_2,t_2
1698 sltu c_3,c_2,t_2
1699 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
1700 mflo t_1
1701 mfhi t_2
1702 daddu c_1,t_1
1703 sltu AT,c_1,t_1
1704 daddu a2,t_2,AT
1705 daddu c_2,a2
1706 sltu AT,c_2,a2
1707 daddu c_3,AT
1708 daddu c_1,t_1
1709 sltu AT,c_1,t_1
1710 daddu t_2,AT
1711 daddu c_2,t_2
1712 sltu AT,c_2,t_2
1713 daddu c_3,AT
1714 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
1715 mflo t_1
1716 mfhi t_2
1717 daddu c_1,t_1
1718 sltu AT,c_1,t_1
1719 daddu a2,t_2,AT
1720 daddu c_2,a2
1721 sltu AT,c_2,a2
1722 daddu c_3,AT
1723 daddu c_1,t_1
1724 sltu AT,c_1,t_1
1725 daddu t_2,AT
1726 daddu c_2,t_2
1727 sltu AT,c_2,t_2
1728 daddu c_3,AT
1729 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1730 mflo t_1
1731 mfhi t_2
1732 daddu c_1,t_1
1733 sltu AT,c_1,t_1
1734 daddu t_2,AT
1735 daddu c_2,t_2
1736 sltu AT,c_2,t_2
1737 daddu c_3,AT
1738 sd c_1,48(a0)
1739
1740 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
1741 mflo t_1
1742 mfhi t_2
1743 daddu c_2,t_1
1744 sltu AT,c_2,t_1
1745 daddu a2,t_2,AT
1746 daddu c_3,a2
1747 daddu c_2,t_1
1748 sltu AT,c_2,t_1
1749 daddu t_2,AT
1750 daddu c_3,t_2
1751 sltu c_1,c_3,t_2
1752 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
1753 mflo t_1
1754 mfhi t_2
1755 daddu c_2,t_1
1756 sltu AT,c_2,t_1
1757 daddu a2,t_2,AT
1758 daddu c_3,a2
1759 sltu AT,c_3,a2
1760 daddu c_1,AT
1761 daddu c_2,t_1
1762 sltu AT,c_2,t_1
1763 daddu t_2,AT
1764 daddu c_3,t_2
1765 sltu AT,c_3,t_2
1766 daddu c_1,AT
1767 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
1768 mflo t_1
1769 mfhi t_2
1770 daddu c_2,t_1
1771 sltu AT,c_2,t_1
1772 daddu a2,t_2,AT
1773 daddu c_3,a2
1774 sltu AT,c_3,a2
1775 daddu c_1,AT
1776 daddu c_2,t_1
1777 sltu AT,c_2,t_1
1778 daddu t_2,AT
1779 daddu c_3,t_2
1780 sltu AT,c_3,t_2
1781 daddu c_1,AT
1782 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
1783 mflo t_1
1784 mfhi t_2
1785 daddu c_2,t_1
1786 sltu AT,c_2,t_1
1787 daddu a2,t_2,AT
1788 daddu c_3,a2
1789 sltu AT,c_3,a2
1790 daddu c_1,AT
1791 daddu c_2,t_1
1792 sltu AT,c_2,t_1
1793 daddu t_2,AT
1794 daddu c_3,t_2
1795 sltu AT,c_3,t_2
1796 daddu c_1,AT
1797 sd c_2,56(a0)
1798
1799 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
1800 mflo t_1
1801 mfhi t_2
1802 daddu c_3,t_1
1803 sltu AT,c_3,t_1
1804 daddu a2,t_2,AT
1805 daddu c_1,a2
1806 daddu c_3,t_1
1807 sltu AT,c_3,t_1
1808 daddu t_2,AT
1809 daddu c_1,t_2
1810 sltu c_2,c_1,t_2
1811 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
1812 mflo t_1
1813 mfhi t_2
1814 daddu c_3,t_1
1815 sltu AT,c_3,t_1
1816 daddu a2,t_2,AT
1817 daddu c_1,a2
1818 sltu AT,c_1,a2
1819 daddu c_2,AT
1820 daddu c_3,t_1
1821 sltu AT,c_3,t_1
1822 daddu t_2,AT
1823 daddu c_1,t_2
1824 sltu AT,c_1,t_2
1825 daddu c_2,AT
1826 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
1827 mflo t_1
1828 mfhi t_2
1829 daddu c_3,t_1
1830 sltu AT,c_3,t_1
1831 daddu a2,t_2,AT
1832 daddu c_1,a2
1833 sltu AT,c_1,a2
1834 daddu c_2,AT
1835 daddu c_3,t_1
1836 sltu AT,c_3,t_1
1837 daddu t_2,AT
1838 daddu c_1,t_2
1839 sltu AT,c_1,t_2
1840 daddu c_2,AT
1841 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1842 mflo t_1
1843 mfhi t_2
1844 daddu c_3,t_1
1845 sltu AT,c_3,t_1
1846 daddu t_2,AT
1847 daddu c_1,t_2
1848 sltu AT,c_1,t_2
1849 daddu c_2,AT
1850 sd c_3,64(a0)
1851
1852 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
1853 mflo t_1
1854 mfhi t_2
1855 daddu c_1,t_1
1856 sltu AT,c_1,t_1
1857 daddu a2,t_2,AT
1858 daddu c_2,a2
1859 daddu c_1,t_1
1860 sltu AT,c_1,t_1
1861 daddu t_2,AT
1862 daddu c_2,t_2
1863 sltu c_3,c_2,t_2
1864 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
1865 mflo t_1
1866 mfhi t_2
1867 daddu c_1,t_1
1868 sltu AT,c_1,t_1
1869 daddu a2,t_2,AT
1870 daddu c_2,a2
1871 sltu AT,c_2,a2
1872 daddu c_3,AT
1873 daddu c_1,t_1
1874 sltu AT,c_1,t_1
1875 daddu t_2,AT
1876 daddu c_2,t_2
1877 sltu AT,c_2,t_2
1878 daddu c_3,AT
1879 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
1880 mflo t_1
1881 mfhi t_2
1882 daddu c_1,t_1
1883 sltu AT,c_1,t_1
1884 daddu a2,t_2,AT
1885 daddu c_2,a2
1886 sltu AT,c_2,a2
1887 daddu c_3,AT
1888 daddu c_1,t_1
1889 sltu AT,c_1,t_1
1890 daddu t_2,AT
1891 daddu c_2,t_2
1892 sltu AT,c_2,t_2
1893 daddu c_3,AT
1894 sd c_1,72(a0)
1895
1896 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
1897 mflo t_1
1898 mfhi t_2
1899 daddu c_2,t_1
1900 sltu AT,c_2,t_1
1901 daddu a2,t_2,AT
1902 daddu c_3,a2
1903 daddu c_2,t_1
1904 sltu AT,c_2,t_1
1905 daddu t_2,AT
1906 daddu c_3,t_2
1907 sltu c_1,c_3,t_2
1908 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
1909 mflo t_1
1910 mfhi t_2
1911 daddu c_2,t_1
1912 sltu AT,c_2,t_1
1913 daddu a2,t_2,AT
1914 daddu c_3,a2
1915 sltu AT,c_3,a2
1916 daddu c_1,AT
1917 daddu c_2,t_1
1918 sltu AT,c_2,t_1
1919 daddu t_2,AT
1920 daddu c_3,t_2
1921 sltu AT,c_3,t_2
1922 daddu c_1,AT
1923 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1924 mflo t_1
1925 mfhi t_2
1926 daddu c_2,t_1
1927 sltu AT,c_2,t_1
1928 daddu t_2,AT
1929 daddu c_3,t_2
1930 sltu AT,c_3,t_2
1931 daddu c_1,AT
1932 sd c_2,80(a0)
1933
1934 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
1935 mflo t_1
1936 mfhi t_2
1937 daddu c_3,t_1
1938 sltu AT,c_3,t_1
1939 daddu a2,t_2,AT
1940 daddu c_1,a2
1941 daddu c_3,t_1
1942 sltu AT,c_3,t_1
1943 daddu t_2,AT
1944 daddu c_1,t_2
1945 sltu c_2,c_1,t_2
1946 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
1947 mflo t_1
1948 mfhi t_2
1949 daddu c_3,t_1
1950 sltu AT,c_3,t_1
1951 daddu a2,t_2,AT
1952 daddu c_1,a2
1953 sltu AT,c_1,a2
1954 daddu c_2,AT
1955 daddu c_3,t_1
1956 sltu AT,c_3,t_1
1957 daddu t_2,AT
1958 daddu c_1,t_2
1959 sltu AT,c_1,t_2
1960 daddu c_2,AT
1961 sd c_3,88(a0)
1962
1963 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
1964 mflo t_1
1965 mfhi t_2
1966 daddu c_1,t_1
1967 sltu AT,c_1,t_1
1968 daddu a2,t_2,AT
1969 daddu c_2,a2
1970 daddu c_1,t_1
1971 sltu AT,c_1,t_1
1972 daddu t_2,AT
1973 daddu c_2,t_2
1974 sltu c_3,c_2,t_2
1975 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1976 mflo t_1
1977 mfhi t_2
1978 daddu c_1,t_1
1979 sltu AT,c_1,t_1
1980 daddu t_2,AT
1981 daddu c_2,t_2
1982 sltu AT,c_2,t_2
1983 daddu c_3,AT
1984 sd c_1,96(a0)
1985
1986 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
1987 mflo t_1
1988 mfhi t_2
1989 daddu c_2,t_1
1990 sltu AT,c_2,t_1
1991 daddu a2,t_2,AT
1992 daddu c_3,a2
1993 daddu c_2,t_1
1994 sltu AT,c_2,t_1
1995 daddu t_2,AT
1996 daddu c_3,t_2
1997 sltu c_1,c_3,t_2
1998 sd c_2,104(a0)
1999
2000 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2001 mflo t_1
2002 mfhi t_2
2003 daddu c_3,t_1
2004 sltu AT,c_3,t_1
2005 daddu t_2,AT
2006 daddu c_1,t_2
2007 sd c_3,112(a0)
2008 sd c_1,120(a0)
2009
2010 jr ra
2011END(bn_sqr_comba8)
2012
2013.align 5
2014LEAF(bn_sqr_comba4)
2015 .set reorder
2016 ld a_0,0(a1)
2017 ld a_1,8(a1)
2018 ld a_2,16(a1)
2019 ld a_3,24(a1)
2020 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2021 mflo c_1
2022 mfhi c_2
2023 sd c_1,0(a0)
2024
2025 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2026 mflo t_1
2027 mfhi t_2
2028 daddu c_2,t_1
2029 sltu AT,c_2,t_1
2030 daddu c_3,t_2,AT
2031 daddu c_2,t_1
2032 sltu AT,c_2,t_1
2033 daddu t_2,AT
2034 daddu c_3,t_2
2035 sltu c_1,c_3,t_2
2036 sd c_2,8(a0)
2037
2038 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2039 mflo t_1
2040 mfhi t_2
2041 daddu c_3,t_1
2042 sltu AT,c_3,t_1
2043 daddu a2,t_2,AT
2044 daddu c_1,a2
2045 daddu c_3,t_1
2046 sltu AT,c_3,t_1
2047 daddu t_2,AT
2048 daddu c_1,t_2
2049 sltu c_2,c_1,t_2
2050 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2051 mflo t_1
2052 mfhi t_2
2053 daddu c_3,t_1
2054 sltu AT,c_3,t_1
2055 daddu t_2,AT
2056 daddu c_1,t_2
2057 sltu AT,c_1,t_2
2058 daddu c_2,AT
2059 sd c_3,16(a0)
2060
2061 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2062 mflo t_1
2063 mfhi t_2
2064 daddu c_1,t_1
2065 sltu AT,c_1,t_1
2066 daddu a2,t_2,AT
2067 daddu c_2,a2
2068 daddu c_1,t_1
2069 sltu AT,c_1,t_1
2070 daddu t_2,AT
2071 daddu c_2,t_2
2072 sltu c_3,c_2,t_2
2073 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2074 mflo t_1
2075 mfhi t_2
2076 daddu c_1,t_1
2077 sltu AT,c_1,t_1
2078 daddu a2,t_2,AT
2079 daddu c_2,a2
2080 sltu AT,c_2,a2
2081 daddu c_3,AT
2082 daddu c_1,t_1
2083 sltu AT,c_1,t_1
2084 daddu t_2,AT
2085 daddu c_2,t_2
2086 sltu AT,c_2,t_2
2087 daddu c_3,AT
2088 sd c_1,24(a0)
2089
2090 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2091 mflo t_1
2092 mfhi t_2
2093 daddu c_2,t_1
2094 sltu AT,c_2,t_1
2095 daddu a2,t_2,AT
2096 daddu c_3,a2
2097 daddu c_2,t_1
2098 sltu AT,c_2,t_1
2099 daddu t_2,AT
2100 daddu c_3,t_2
2101 sltu c_1,c_3,t_2
2102 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2103 mflo t_1
2104 mfhi t_2
2105 daddu c_2,t_1
2106 sltu AT,c_2,t_1
2107 daddu t_2,AT
2108 daddu c_3,t_2
2109 sltu AT,c_3,t_2
2110 daddu c_1,AT
2111 sd c_2,32(a0)
2112
2113 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2114 mflo t_1
2115 mfhi t_2
2116 daddu c_3,t_1
2117 sltu AT,c_3,t_1
2118 daddu a2,t_2,AT
2119 daddu c_1,a2
2120 daddu c_3,t_1
2121 sltu AT,c_3,t_1
2122 daddu t_2,AT
2123 daddu c_1,t_2
2124 sltu c_2,c_1,t_2
2125 sd c_3,40(a0)
2126
2127 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2128 mflo t_1
2129 mfhi t_2
2130 daddu c_1,t_1
2131 sltu AT,c_1,t_1
2132 daddu t_2,AT
2133 daddu c_2,t_2
2134 sd c_1,48(a0)
2135 sd c_2,56(a0)
2136
2137 jr ra
2138END(bn_sqr_comba4)
diff --git a/src/lib/libcrypto/bn/asm/sparc.s b/src/lib/libcrypto/bn/asm/sparc.s
deleted file mode 100644
index f9e533caa8..0000000000
--- a/src/lib/libcrypto/bn/asm/sparc.s
+++ /dev/null
@@ -1,462 +0,0 @@
1 .file "bn_mulw.c"
2gcc2_compiled.:
3.section ".text"
4 .align 4
5 .global bn_mul_add_words
6 .type bn_mul_add_words,#function
7 .proc 016
8bn_mul_add_words:
9 !#PROLOGUE# 0
10 save %sp,-112,%sp
11 !#PROLOGUE# 1
12 mov %i0,%o0
13 mov %i1,%o2
14 mov %i2,%g1
15 mov %i3,%o1
16 mov 0,%i4
17 add %o0,12,%g4
18 add %o2,12,%o7
19.LL2:
20 mov %i4,%i3
21 mov 0,%i2
22 ld [%o0],%g2
23 mov %g2,%i1
24 ld [%o2],%g2
25 mov 0,%i0
26 umul %o1,%g2,%g3
27 rd %y,%g2
28 addcc %g3,%i1,%g3
29 addx %g2,%i0,%g2
30 addcc %g3,%i3,%g3
31 addx %g2,%i2,%g2
32 st %g3,[%o0]
33 mov %g2,%i5
34 mov 0,%i4
35 addcc %g1,-1,%g1
36 be .LL3
37 mov %i5,%i4
38 mov %i4,%i3
39 mov 0,%i2
40 ld [%g4-8],%g2
41 mov %g2,%i1
42 ld [%o7-8],%g2
43 mov 0,%i0
44 umul %o1,%g2,%g3
45 rd %y,%g2
46 addcc %g3,%i1,%g3
47 addx %g2,%i0,%g2
48 addcc %g3,%i3,%g3
49 addx %g2,%i2,%g2
50 st %g3,[%g4-8]
51 mov %g2,%i5
52 mov 0,%i4
53 addcc %g1,-1,%g1
54 be .LL3
55 mov %i5,%i4
56 mov %i4,%i3
57 mov 0,%i2
58 ld [%g4-4],%g2
59 mov %g2,%i1
60 ld [%o7-4],%g2
61 mov 0,%i0
62 umul %o1,%g2,%g3
63 rd %y,%g2
64 addcc %g3,%i1,%g3
65 addx %g2,%i0,%g2
66 addcc %g3,%i3,%g3
67 addx %g2,%i2,%g2
68 st %g3,[%g4-4]
69 mov %g2,%i5
70 mov 0,%i4
71 addcc %g1,-1,%g1
72 be .LL3
73 mov %i5,%i4
74 mov %i4,%i3
75 mov 0,%i2
76 ld [%g4],%g2
77 mov %g2,%i1
78 ld [%o7],%g2
79 mov 0,%i0
80 umul %o1,%g2,%g3
81 rd %y,%g2
82 addcc %g3,%i1,%g3
83 addx %g2,%i0,%g2
84 addcc %g3,%i3,%g3
85 addx %g2,%i2,%g2
86 st %g3,[%g4]
87 mov %g2,%i5
88 mov 0,%i4
89 addcc %g1,-1,%g1
90 be .LL3
91 mov %i5,%i4
92 add %o7,16,%o7
93 add %o2,16,%o2
94 add %g4,16,%g4
95 b .LL2
96 add %o0,16,%o0
97.LL3:
98 ret
99 restore %g0,%i4,%o0
100.LLfe1:
101 .size bn_mul_add_words,.LLfe1-bn_mul_add_words
102 .align 4
103 .global bn_mul_words
104 .type bn_mul_words,#function
105 .proc 016
106bn_mul_words:
107 !#PROLOGUE# 0
108 save %sp,-112,%sp
109 !#PROLOGUE# 1
110 mov %i0,%o7
111 mov %i1,%o0
112 mov %i2,%i4
113 mov %i3,%g4
114 mov 0,%i0
115 add %o7,12,%g1
116 add %o0,12,%i5
117.LL18:
118 mov %i0,%g3
119 mov 0,%g2
120 ld [%o0],%i2
121 umul %g4,%i2,%i3
122 rd %y,%i2
123 addcc %i3,%g3,%i3
124 addx %i2,%g2,%i2
125 st %i3,[%o7]
126 mov %i2,%i1
127 mov 0,%i0
128 addcc %i4,-1,%i4
129 be .LL19
130 mov %i1,%i0
131 mov %i0,%g3
132 mov 0,%g2
133 ld [%i5-8],%i2
134 umul %g4,%i2,%i3
135 rd %y,%i2
136 addcc %i3,%g3,%i3
137 addx %i2,%g2,%i2
138 st %i3,[%g1-8]
139 mov %i2,%i1
140 mov 0,%i0
141 addcc %i4,-1,%i4
142 be .LL19
143 mov %i1,%i0
144 mov %i0,%g3
145 mov 0,%g2
146 ld [%i5-4],%i2
147 umul %g4,%i2,%i3
148 rd %y,%i2
149 addcc %i3,%g3,%i3
150 addx %i2,%g2,%i2
151 st %i3,[%g1-4]
152 mov %i2,%i1
153 mov 0,%i0
154 addcc %i4,-1,%i4
155 be .LL19
156 mov %i1,%i0
157 mov %i0,%g3
158 mov 0,%g2
159 ld [%i5],%i2
160 umul %g4,%i2,%i3
161 rd %y,%i2
162 addcc %i3,%g3,%i3
163 addx %i2,%g2,%i2
164 st %i3,[%g1]
165 mov %i2,%i1
166 mov 0,%i0
167 addcc %i4,-1,%i4
168 be .LL19
169 mov %i1,%i0
170 add %i5,16,%i5
171 add %o0,16,%o0
172 add %g1,16,%g1
173 b .LL18
174 add %o7,16,%o7
175.LL19:
176 ret
177 restore
178.LLfe2:
179 .size bn_mul_words,.LLfe2-bn_mul_words
180 .align 4
181 .global bn_sqr_words
182 .type bn_sqr_words,#function
183 .proc 020
184bn_sqr_words:
185 !#PROLOGUE# 0
186 !#PROLOGUE# 1
187 mov %o0,%g4
188 add %g4,28,%o3
189 add %o1,12,%g1
190.LL34:
191 ld [%o1],%o0
192 addcc %o2,-1,%o2
193 umul %o0,%o0,%o5
194 rd %y,%o4
195 st %o5,[%g4]
196 mov %o4,%g3
197 mov 0,%g2
198 be .LL35
199 st %g3,[%o3-24]
200 ld [%g1-8],%o0
201 addcc %o2,-1,%o2
202 umul %o0,%o0,%o5
203 rd %y,%o4
204 st %o5,[%o3-20]
205 mov %o4,%g3
206 mov 0,%g2
207 be .LL35
208 st %g3,[%o3-16]
209 ld [%g1-4],%o0
210 addcc %o2,-1,%o2
211 umul %o0,%o0,%o5
212 rd %y,%o4
213 st %o5,[%o3-12]
214 mov %o4,%g3
215 mov 0,%g2
216 be .LL35
217 st %g3,[%o3-8]
218 ld [%g1],%o0
219 addcc %o2,-1,%o2
220 umul %o0,%o0,%o5
221 rd %y,%o4
222 st %o5,[%o3-4]
223 mov %o4,%g3
224 mov 0,%g2
225 be .LL35
226 st %g3,[%o3]
227 add %g1,16,%g1
228 add %o1,16,%o1
229 add %o3,32,%o3
230 b .LL34
231 add %g4,32,%g4
232.LL35:
233 retl
234 nop
235.LLfe3:
236 .size bn_sqr_words,.LLfe3-bn_sqr_words
237 .align 4
238 .global bn_add_words
239 .type bn_add_words,#function
240 .proc 016
241bn_add_words:
242 !#PROLOGUE# 0
243 save %sp,-112,%sp
244 !#PROLOGUE# 1
245 mov %i0,%o2
246 mov %i1,%o3
247 mov %i2,%o4
248 mov %i3,%i5
249 mov 0,%o0
250 mov 0,%o1
251 add %o2,12,%o7
252 add %o4,12,%g4
253 b .LL42
254 add %o3,12,%g1
255.LL45:
256 add %i5,-1,%i5
257 mov %i4,%g3
258 ld [%g4-8],%i4
259 mov 0,%g2
260 mov %i4,%i1
261 mov 0,%i0
262 addcc %g3,%i1,%g3
263 addx %g2,%i0,%g2
264 addcc %o1,%g3,%o1
265 addx %o0,%g2,%o0
266 st %o1,[%o7-8]
267 mov %o0,%i3
268 mov 0,%i2
269 mov %i2,%o0
270 mov %i3,%o1
271 cmp %i5,0
272 ble .LL43
273 add %i5,-1,%i5
274 ld [%g1-4],%i4
275 mov %i4,%g3
276 ld [%g4-4],%i4
277 mov 0,%g2
278 mov %i4,%i1
279 mov 0,%i0
280 addcc %g3,%i1,%g3
281 addx %g2,%i0,%g2
282 addcc %o1,%g3,%o1
283 addx %o0,%g2,%o0
284 st %o1,[%o7-4]
285 mov %o0,%i3
286 mov 0,%i2
287 mov %i2,%o0
288 mov %i3,%o1
289 cmp %i5,0
290 ble .LL43
291 add %i5,-1,%i5
292 ld [%g1],%i4
293 mov %i4,%g3
294 ld [%g4],%i4
295 mov 0,%g2
296 mov %i4,%i1
297 mov 0,%i0
298 addcc %g3,%i1,%g3
299 addx %g2,%i0,%g2
300 addcc %o1,%g3,%o1
301 addx %o0,%g2,%o0
302 st %o1,[%o7]
303 mov %o0,%i3
304 mov 0,%i2
305 mov %i2,%o0
306 mov %i3,%o1
307 cmp %i5,0
308 ble .LL43
309 add %g1,16,%g1
310 add %o3,16,%o3
311 add %g4,16,%g4
312 add %o4,16,%o4
313 add %o7,16,%o7
314 add %o2,16,%o2
315.LL42:
316 ld [%o3],%i4
317 add %i5,-1,%i5
318 mov %i4,%g3
319 ld [%o4],%i4
320 mov 0,%g2
321 mov %i4,%i1
322 mov 0,%i0
323 addcc %g3,%i1,%g3
324 addx %g2,%i0,%g2
325 addcc %o1,%g3,%o1
326 addx %o0,%g2,%o0
327 st %o1,[%o2]
328 mov %o0,%i3
329 mov 0,%i2
330 mov %i2,%o0
331 mov %i3,%o1
332 cmp %i5,0
333 bg,a .LL45
334 ld [%g1-8],%i4
335.LL43:
336 ret
337 restore %g0,%o1,%o0
338.LLfe4:
339 .size bn_add_words,.LLfe4-bn_add_words
340.section ".rodata"
341 .align 8
342.LLC0:
343 .asciz "Division would overflow (%d)\n"
344.section ".text"
345 .align 4
346 .global bn_div64
347 .type bn_div64,#function
348 .proc 016
349bn_div64:
350 !#PROLOGUE# 0
351 save %sp,-112,%sp
352 !#PROLOGUE# 1
353 mov 0,%l1
354 cmp %i2,0
355 bne .LL51
356 mov 2,%l0
357 b .LL68
358 mov -1,%i0
359.LL51:
360 call BN_num_bits_word,0
361 mov %i2,%o0
362 mov %o0,%o2
363 cmp %o2,32
364 be .LL52
365 mov 1,%o0
366 sll %o0,%o2,%o0
367 cmp %i0,%o0
368 bleu .LL69
369 mov 32,%o0
370 sethi %hi(__iob+32),%o0
371 or %o0,%lo(__iob+32),%o0
372 sethi %hi(.LLC0),%o1
373 call fprintf,0
374 or %o1,%lo(.LLC0),%o1
375 call abort,0
376 nop
377.LL52:
378 mov 32,%o0
379.LL69:
380 cmp %i0,%i2
381 blu .LL53
382 sub %o0,%o2,%o2
383 sub %i0,%i2,%i0
384.LL53:
385 cmp %o2,0
386 be .LL54
387 sll %i0,%o2,%o1
388 sll %i2,%o2,%i2
389 sub %o0,%o2,%o0
390 srl %i1,%o0,%o0
391 or %o1,%o0,%i0
392 sll %i1,%o2,%i1
393.LL54:
394 srl %i2,16,%g2
395 sethi %hi(65535),%o0
396 or %o0,%lo(65535),%o1
397 and %i2,%o1,%g3
398 mov %o0,%g4
399 sethi %hi(-65536),%o7
400 mov %o1,%g1
401.LL55:
402 srl %i0,16,%o0
403 cmp %o0,%g2
404 be .LL59
405 or %g4,%lo(65535),%o3
406 wr %g0,%g0,%y
407 nop
408 nop
409 nop
410 udiv %i0,%g2,%o3
411.LL59:
412 and %i1,%o7,%o0
413 srl %o0,16,%o5
414 smul %o3,%g3,%o4
415 smul %o3,%g2,%o2
416.LL60:
417 sub %i0,%o2,%o1
418 andcc %o1,%o7,%g0
419 bne .LL61
420 sll %o1,16,%o0
421 add %o0,%o5,%o0
422 cmp %o4,%o0
423 bleu .LL61
424 sub %o4,%g3,%o4
425 sub %o2,%g2,%o2
426 b .LL60
427 add %o3,-1,%o3
428.LL61:
429 smul %o3,%g2,%o2
430 smul %o3,%g3,%o0
431 srl %o0,16,%o1
432 sll %o0,16,%o0
433 and %o0,%o7,%o0
434 cmp %i1,%o0
435 bgeu .LL65
436 add %o2,%o1,%o2
437 add %o2,1,%o2
438.LL65:
439 cmp %i0,%o2
440 bgeu .LL66
441 sub %i1,%o0,%i1
442 add %i0,%i2,%i0
443 add %o3,-1,%o3
444.LL66:
445 addcc %l0,-1,%l0
446 be .LL56
447 sub %i0,%o2,%i0
448 sll %o3,16,%l1
449 sll %i0,16,%o0
450 srl %i1,16,%o1
451 or %o0,%o1,%i0
452 and %i1,%g1,%o0
453 b .LL55
454 sll %o0,16,%i1
455.LL56:
456 or %l1,%o3,%i0
457.LL68:
458 ret
459 restore
460.LLfe5:
461 .size bn_div64,.LLfe5-bn_div64
462 .ident "GCC: (GNU) 2.7.2.3"
diff --git a/src/lib/libcrypto/bn/asm/sparcv8.S b/src/lib/libcrypto/bn/asm/sparcv8.S
new file mode 100644
index 0000000000..88c5dc480a
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/sparcv8.S
@@ -0,0 +1,1458 @@
1.ident "sparcv8.s, Version 1.4"
2.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * See bn_asm.sparc.v8plus.S for more details.
22 */
23
24/*
25 * Revision history.
26 *
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
30 * 1.4 - some retunes;
31 *
32 * (*) see bn_asm.sparc.v8plus.S for details
33 */
34
35.section ".text",#alloc,#execinstr
36.file "bn_asm.sparc.v8.S"
37
38.align 32
39
40.global bn_mul_add_words
41/*
42 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43 * BN_ULONG *rp,*ap;
44 * int num;
45 * BN_ULONG w;
46 */
47bn_mul_add_words:
48 cmp %o2,0
49 bg,a .L_bn_mul_add_words_proceed
50 ld [%o1],%g2
51 retl
52 clr %o0
53
54.L_bn_mul_add_words_proceed:
55 andcc %o2,-4,%g0
56 bz .L_bn_mul_add_words_tail
57 clr %o5
58
59.L_bn_mul_add_words_loop:
60 ld [%o0],%o4
61 ld [%o1+4],%g3
62 umul %o3,%g2,%g2
63 rd %y,%g1
64 addcc %o4,%o5,%o4
65 addx %g1,0,%g1
66 addcc %o4,%g2,%o4
67 st %o4,[%o0]
68 addx %g1,0,%o5
69
70 ld [%o0+4],%o4
71 ld [%o1+8],%g2
72 umul %o3,%g3,%g3
73 dec 4,%o2
74 rd %y,%g1
75 addcc %o4,%o5,%o4
76 addx %g1,0,%g1
77 addcc %o4,%g3,%o4
78 st %o4,[%o0+4]
79 addx %g1,0,%o5
80
81 ld [%o0+8],%o4
82 ld [%o1+12],%g3
83 umul %o3,%g2,%g2
84 inc 16,%o1
85 rd %y,%g1
86 addcc %o4,%o5,%o4
87 addx %g1,0,%g1
88 addcc %o4,%g2,%o4
89 st %o4,[%o0+8]
90 addx %g1,0,%o5
91
92 ld [%o0+12],%o4
93 umul %o3,%g3,%g3
94 inc 16,%o0
95 rd %y,%g1
96 addcc %o4,%o5,%o4
97 addx %g1,0,%g1
98 addcc %o4,%g3,%o4
99 st %o4,[%o0-4]
100 addx %g1,0,%o5
101 andcc %o2,-4,%g0
102 bnz,a .L_bn_mul_add_words_loop
103 ld [%o1],%g2
104
105 tst %o2
106 bnz,a .L_bn_mul_add_words_tail
107 ld [%o1],%g2
108.L_bn_mul_add_words_return:
109 retl
110 mov %o5,%o0
111 nop
112
113.L_bn_mul_add_words_tail:
114 ld [%o0],%o4
115 umul %o3,%g2,%g2
116 addcc %o4,%o5,%o4
117 rd %y,%g1
118 addx %g1,0,%g1
119 addcc %o4,%g2,%o4
120 addx %g1,0,%o5
121 deccc %o2
122 bz .L_bn_mul_add_words_return
123 st %o4,[%o0]
124
125 ld [%o1+4],%g2
126 ld [%o0+4],%o4
127 umul %o3,%g2,%g2
128 rd %y,%g1
129 addcc %o4,%o5,%o4
130 addx %g1,0,%g1
131 addcc %o4,%g2,%o4
132 addx %g1,0,%o5
133 deccc %o2
134 bz .L_bn_mul_add_words_return
135 st %o4,[%o0+4]
136
137 ld [%o1+8],%g2
138 ld [%o0+8],%o4
139 umul %o3,%g2,%g2
140 rd %y,%g1
141 addcc %o4,%o5,%o4
142 addx %g1,0,%g1
143 addcc %o4,%g2,%o4
144 st %o4,[%o0+8]
145 retl
146 addx %g1,0,%o0
147
148.type bn_mul_add_words,#function
149.size bn_mul_add_words,(.-bn_mul_add_words)
150
151.align 32
152
153.global bn_mul_words
154/*
155 * BN_ULONG bn_mul_words(rp,ap,num,w)
156 * BN_ULONG *rp,*ap;
157 * int num;
158 * BN_ULONG w;
159 */
160bn_mul_words:
161 cmp %o2,0
162 bg,a .L_bn_mul_words_proceeed
163 ld [%o1],%g2
164 retl
165 clr %o0
166
167.L_bn_mul_words_proceeed:
168 andcc %o2,-4,%g0
169 bz .L_bn_mul_words_tail
170 clr %o5
171
172.L_bn_mul_words_loop:
173 ld [%o1+4],%g3
174 umul %o3,%g2,%g2
175 addcc %g2,%o5,%g2
176 rd %y,%g1
177 addx %g1,0,%o5
178 st %g2,[%o0]
179
180 ld [%o1+8],%g2
181 umul %o3,%g3,%g3
182 addcc %g3,%o5,%g3
183 rd %y,%g1
184 dec 4,%o2
185 addx %g1,0,%o5
186 st %g3,[%o0+4]
187
188 ld [%o1+12],%g3
189 umul %o3,%g2,%g2
190 addcc %g2,%o5,%g2
191 rd %y,%g1
192 inc 16,%o1
193 st %g2,[%o0+8]
194 addx %g1,0,%o5
195
196 umul %o3,%g3,%g3
197 addcc %g3,%o5,%g3
198 rd %y,%g1
199 inc 16,%o0
200 addx %g1,0,%o5
201 st %g3,[%o0-4]
202 andcc %o2,-4,%g0
203 nop
204 bnz,a .L_bn_mul_words_loop
205 ld [%o1],%g2
206
207 tst %o2
208 bnz,a .L_bn_mul_words_tail
209 ld [%o1],%g2
210.L_bn_mul_words_return:
211 retl
212 mov %o5,%o0
213 nop
214
215.L_bn_mul_words_tail:
216 umul %o3,%g2,%g2
217 addcc %g2,%o5,%g2
218 rd %y,%g1
219 addx %g1,0,%o5
220 deccc %o2
221 bz .L_bn_mul_words_return
222 st %g2,[%o0]
223 nop
224
225 ld [%o1+4],%g2
226 umul %o3,%g2,%g2
227 addcc %g2,%o5,%g2
228 rd %y,%g1
229 addx %g1,0,%o5
230 deccc %o2
231 bz .L_bn_mul_words_return
232 st %g2,[%o0+4]
233
234 ld [%o1+8],%g2
235 umul %o3,%g2,%g2
236 addcc %g2,%o5,%g2
237 rd %y,%g1
238 st %g2,[%o0+8]
239 retl
240 addx %g1,0,%o0
241
242.type bn_mul_words,#function
243.size bn_mul_words,(.-bn_mul_words)
244
245.align 32
246.global bn_sqr_words
247/*
248 * void bn_sqr_words(r,a,n)
249 * BN_ULONG *r,*a;
250 * int n;
251 */
252bn_sqr_words:
253 cmp %o2,0
254 bg,a .L_bn_sqr_words_proceeed
255 ld [%o1],%g2
256 retl
257 clr %o0
258
259.L_bn_sqr_words_proceeed:
260 andcc %o2,-4,%g0
261 bz .L_bn_sqr_words_tail
262 clr %o5
263
264.L_bn_sqr_words_loop:
265 ld [%o1+4],%g3
266 umul %g2,%g2,%o4
267 st %o4,[%o0]
268 rd %y,%o5
269 st %o5,[%o0+4]
270
271 ld [%o1+8],%g2
272 umul %g3,%g3,%o4
273 dec 4,%o2
274 st %o4,[%o0+8]
275 rd %y,%o5
276 st %o5,[%o0+12]
277 nop
278
279 ld [%o1+12],%g3
280 umul %g2,%g2,%o4
281 st %o4,[%o0+16]
282 rd %y,%o5
283 inc 16,%o1
284 st %o5,[%o0+20]
285
286 umul %g3,%g3,%o4
287 inc 32,%o0
288 st %o4,[%o0-8]
289 rd %y,%o5
290 st %o5,[%o0-4]
291 andcc %o2,-4,%g2
292 bnz,a .L_bn_sqr_words_loop
293 ld [%o1],%g2
294
295 tst %o2
296 nop
297 bnz,a .L_bn_sqr_words_tail
298 ld [%o1],%g2
299.L_bn_sqr_words_return:
300 retl
301 clr %o0
302
303.L_bn_sqr_words_tail:
304 umul %g2,%g2,%o4
305 st %o4,[%o0]
306 deccc %o2
307 rd %y,%o5
308 bz .L_bn_sqr_words_return
309 st %o5,[%o0+4]
310
311 ld [%o1+4],%g2
312 umul %g2,%g2,%o4
313 st %o4,[%o0+8]
314 deccc %o2
315 rd %y,%o5
316 nop
317 bz .L_bn_sqr_words_return
318 st %o5,[%o0+12]
319
320 ld [%o1+8],%g2
321 umul %g2,%g2,%o4
322 st %o4,[%o0+16]
323 rd %y,%o5
324 st %o5,[%o0+20]
325 retl
326 clr %o0
327
328.type bn_sqr_words,#function
329.size bn_sqr_words,(.-bn_sqr_words)
330
331.align 32
332
333.global bn_div_words
334/*
335 * BN_ULONG bn_div_words(h,l,d)
336 * BN_ULONG h,l,d;
337 */
338bn_div_words:
339 wr %o0,%y
340 udiv %o1,%o2,%o0
341 retl
342 nop
343
344.type bn_div_words,#function
345.size bn_div_words,(.-bn_div_words)
346
347.align 32
348
349.global bn_add_words
350/*
351 * BN_ULONG bn_add_words(rp,ap,bp,n)
352 * BN_ULONG *rp,*ap,*bp;
353 * int n;
354 */
355bn_add_words:
356 cmp %o3,0
357 bg,a .L_bn_add_words_proceed
358 ld [%o1],%o4
359 retl
360 clr %o0
361
362.L_bn_add_words_proceed:
363 andcc %o3,-4,%g0
364 bz .L_bn_add_words_tail
365 clr %g1
366 ba .L_bn_add_words_warn_loop
367 addcc %g0,0,%g0 ! clear carry flag
368
369.L_bn_add_words_loop:
370 ld [%o1],%o4
371.L_bn_add_words_warn_loop:
372 ld [%o2],%o5
373 ld [%o1+4],%g3
374 ld [%o2+4],%g4
375 dec 4,%o3
376 addxcc %o5,%o4,%o5
377 st %o5,[%o0]
378
379 ld [%o1+8],%o4
380 ld [%o2+8],%o5
381 inc 16,%o1
382 addxcc %g3,%g4,%g3
383 st %g3,[%o0+4]
384
385 ld [%o1-4],%g3
386 ld [%o2+12],%g4
387 inc 16,%o2
388 addxcc %o5,%o4,%o5
389 st %o5,[%o0+8]
390
391 inc 16,%o0
392 addxcc %g3,%g4,%g3
393 st %g3,[%o0-4]
394 addx %g0,0,%g1
395 andcc %o3,-4,%g0
396 bnz,a .L_bn_add_words_loop
397 addcc %g1,-1,%g0
398
399 tst %o3
400 bnz,a .L_bn_add_words_tail
401 ld [%o1],%o4
402.L_bn_add_words_return:
403 retl
404 mov %g1,%o0
405
406.L_bn_add_words_tail:
407 addcc %g1,-1,%g0
408 ld [%o2],%o5
409 addxcc %o5,%o4,%o5
410 addx %g0,0,%g1
411 deccc %o3
412 bz .L_bn_add_words_return
413 st %o5,[%o0]
414
415 ld [%o1+4],%o4
416 addcc %g1,-1,%g0
417 ld [%o2+4],%o5
418 addxcc %o5,%o4,%o5
419 addx %g0,0,%g1
420 deccc %o3
421 bz .L_bn_add_words_return
422 st %o5,[%o0+4]
423
424 ld [%o1+8],%o4
425 addcc %g1,-1,%g0
426 ld [%o2+8],%o5
427 addxcc %o5,%o4,%o5
428 st %o5,[%o0+8]
429 retl
430 addx %g0,0,%o0
431
432.type bn_add_words,#function
433.size bn_add_words,(.-bn_add_words)
434
435.align 32
436
437.global bn_sub_words
438/*
439 * BN_ULONG bn_sub_words(rp,ap,bp,n)
440 * BN_ULONG *rp,*ap,*bp;
441 * int n;
442 */
443bn_sub_words:
444 cmp %o3,0
445 bg,a .L_bn_sub_words_proceed
446 ld [%o1],%o4
447 retl
448 clr %o0
449
450.L_bn_sub_words_proceed:
451 andcc %o3,-4,%g0
452 bz .L_bn_sub_words_tail
453 clr %g1
454 ba .L_bn_sub_words_warm_loop
455 addcc %g0,0,%g0 ! clear carry flag
456
457.L_bn_sub_words_loop:
458 ld [%o1],%o4
459.L_bn_sub_words_warm_loop:
460 ld [%o2],%o5
461 ld [%o1+4],%g3
462 ld [%o2+4],%g4
463 dec 4,%o3
464 subxcc %o4,%o5,%o5
465 st %o5,[%o0]
466
467 ld [%o1+8],%o4
468 ld [%o2+8],%o5
469 inc 16,%o1
470 subxcc %g3,%g4,%g4
471 st %g4,[%o0+4]
472
473 ld [%o1-4],%g3
474 ld [%o2+12],%g4
475 inc 16,%o2
476 subxcc %o4,%o5,%o5
477 st %o5,[%o0+8]
478
479 inc 16,%o0
480 subxcc %g3,%g4,%g4
481 st %g4,[%o0-4]
482 addx %g0,0,%g1
483 andcc %o3,-4,%g0
484 bnz,a .L_bn_sub_words_loop
485 addcc %g1,-1,%g0
486
487 tst %o3
488 nop
489 bnz,a .L_bn_sub_words_tail
490 ld [%o1],%o4
491.L_bn_sub_words_return:
492 retl
493 mov %g1,%o0
494
495.L_bn_sub_words_tail:
496 addcc %g1,-1,%g0
497 ld [%o2],%o5
498 subxcc %o4,%o5,%o5
499 addx %g0,0,%g1
500 deccc %o3
501 bz .L_bn_sub_words_return
502 st %o5,[%o0]
503 nop
504
505 ld [%o1+4],%o4
506 addcc %g1,-1,%g0
507 ld [%o2+4],%o5
508 subxcc %o4,%o5,%o5
509 addx %g0,0,%g1
510 deccc %o3
511 bz .L_bn_sub_words_return
512 st %o5,[%o0+4]
513
514 ld [%o1+8],%o4
515 addcc %g1,-1,%g0
516 ld [%o2+8],%o5
517 subxcc %o4,%o5,%o5
518 st %o5,[%o0+8]
519 retl
520 addx %g0,0,%o0
521
522.type bn_sub_words,#function
523.size bn_sub_words,(.-bn_sub_words)
524
525#define FRAME_SIZE -96
526
527/*
528 * Here is register usage map for *all* routines below.
529 */
530#define t_1 %o0
531#define t_2 %o1
532#define c_1 %o2
533#define c_2 %o3
534#define c_3 %o4
535
536#define ap(I) [%i1+4*I]
537#define bp(I) [%i2+4*I]
538#define rp(I) [%i0+4*I]
539
540#define a_0 %l0
541#define a_1 %l1
542#define a_2 %l2
543#define a_3 %l3
544#define a_4 %l4
545#define a_5 %l5
546#define a_6 %l6
547#define a_7 %l7
548
549#define b_0 %i3
550#define b_1 %i4
551#define b_2 %i5
552#define b_3 %o5
553#define b_4 %g1
554#define b_5 %g2
555#define b_6 %g3
556#define b_7 %g4
557
558.align 32
559.global bn_mul_comba8
560/*
561 * void bn_mul_comba8(r,a,b)
562 * BN_ULONG *r,*a,*b;
563 */
564bn_mul_comba8:
565 save %sp,FRAME_SIZE,%sp
566 ld ap(0),a_0
567 ld bp(0),b_0
568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
569 ld bp(1),b_1
570 rd %y,c_2
571 st c_1,rp(0) !r[0]=c1;
572
573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
574 ld ap(1),a_1
575 addcc c_2,t_1,c_2
576 rd %y,t_2
577 addxcc %g0,t_2,c_3 !=
578 addx %g0,%g0,c_1
579 ld ap(2),a_2
580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
581 addcc c_2,t_1,c_2 !=
582 rd %y,t_2
583 addxcc c_3,t_2,c_3
584 st c_2,rp(1) !r[1]=c2;
585 addx c_1,%g0,c_1 !=
586
587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
588 addcc c_3,t_1,c_3
589 rd %y,t_2
590 addxcc c_1,t_2,c_1 !=
591 addx %g0,%g0,c_2
592 ld bp(2),b_2
593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
594 addcc c_3,t_1,c_3 !=
595 rd %y,t_2
596 addxcc c_1,t_2,c_1
597 ld bp(3),b_3
598 addx c_2,%g0,c_2 !=
599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
600 addcc c_3,t_1,c_3
601 rd %y,t_2
602 addxcc c_1,t_2,c_1 !=
603 addx c_2,%g0,c_2
604 st c_3,rp(2) !r[2]=c3;
605
606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
607 addcc c_1,t_1,c_1 !=
608 rd %y,t_2
609 addxcc c_2,t_2,c_2
610 addx %g0,%g0,c_3
611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
612 addcc c_1,t_1,c_1
613 rd %y,t_2
614 addxcc c_2,t_2,c_2
615 addx c_3,%g0,c_3 !=
616 ld ap(3),a_3
617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
618 addcc c_1,t_1,c_1
619 rd %y,t_2 !=
620 addxcc c_2,t_2,c_2
621 addx c_3,%g0,c_3
622 ld ap(4),a_4
623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
624 addcc c_1,t_1,c_1
625 rd %y,t_2
626 addxcc c_2,t_2,c_2
627 addx c_3,%g0,c_3 !=
628 st c_1,rp(3) !r[3]=c1;
629
630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
631 addcc c_2,t_1,c_2
632 rd %y,t_2 !=
633 addxcc c_3,t_2,c_3
634 addx %g0,%g0,c_1
635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
636 addcc c_2,t_1,c_2 !=
637 rd %y,t_2
638 addxcc c_3,t_2,c_3
639 addx c_1,%g0,c_1
640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
641 addcc c_2,t_1,c_2
642 rd %y,t_2
643 addxcc c_3,t_2,c_3
644 addx c_1,%g0,c_1 !=
645 ld bp(4),b_4
646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
647 addcc c_2,t_1,c_2
648 rd %y,t_2 !=
649 addxcc c_3,t_2,c_3
650 addx c_1,%g0,c_1
651 ld bp(5),b_5
652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
653 addcc c_2,t_1,c_2
654 rd %y,t_2
655 addxcc c_3,t_2,c_3
656 addx c_1,%g0,c_1 !=
657 st c_2,rp(4) !r[4]=c2;
658
659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
660 addcc c_3,t_1,c_3
661 rd %y,t_2 !=
662 addxcc c_1,t_2,c_1
663 addx %g0,%g0,c_2
664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
665 addcc c_3,t_1,c_3 !=
666 rd %y,t_2
667 addxcc c_1,t_2,c_1
668 addx c_2,%g0,c_2
669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
670 addcc c_3,t_1,c_3
671 rd %y,t_2
672 addxcc c_1,t_2,c_1
673 addx c_2,%g0,c_2 !=
674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
675 addcc c_3,t_1,c_3
676 rd %y,t_2
677 addxcc c_1,t_2,c_1 !=
678 addx c_2,%g0,c_2
679 ld ap(5),a_5
680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
681 addcc c_3,t_1,c_3 !=
682 rd %y,t_2
683 addxcc c_1,t_2,c_1
684 ld ap(6),a_6
685 addx c_2,%g0,c_2 !=
686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
687 addcc c_3,t_1,c_3
688 rd %y,t_2
689 addxcc c_1,t_2,c_1 !=
690 addx c_2,%g0,c_2
691 st c_3,rp(5) !r[5]=c3;
692
693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
694 addcc c_1,t_1,c_1 !=
695 rd %y,t_2
696 addxcc c_2,t_2,c_2
697 addx %g0,%g0,c_3
698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
699 addcc c_1,t_1,c_1
700 rd %y,t_2
701 addxcc c_2,t_2,c_2
702 addx c_3,%g0,c_3 !=
703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
704 addcc c_1,t_1,c_1
705 rd %y,t_2
706 addxcc c_2,t_2,c_2 !=
707 addx c_3,%g0,c_3
708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
709 addcc c_1,t_1,c_1
710 rd %y,t_2 !=
711 addxcc c_2,t_2,c_2
712 addx c_3,%g0,c_3
713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
714 addcc c_1,t_1,c_1 !=
715 rd %y,t_2
716 addxcc c_2,t_2,c_2
717 ld bp(6),b_6
718 addx c_3,%g0,c_3 !=
719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
720 addcc c_1,t_1,c_1
721 rd %y,t_2
722 addxcc c_2,t_2,c_2 !=
723 addx c_3,%g0,c_3
724 ld bp(7),b_7
725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
726 addcc c_1,t_1,c_1 !=
727 rd %y,t_2
728 addxcc c_2,t_2,c_2
729 st c_1,rp(6) !r[6]=c1;
730 addx c_3,%g0,c_3 !=
731
732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
733 addcc c_2,t_1,c_2
734 rd %y,t_2
735 addxcc c_3,t_2,c_3 !=
736 addx %g0,%g0,c_1
737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
738 addcc c_2,t_1,c_2
739 rd %y,t_2 !=
740 addxcc c_3,t_2,c_3
741 addx c_1,%g0,c_1
742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
743 addcc c_2,t_1,c_2 !=
744 rd %y,t_2
745 addxcc c_3,t_2,c_3
746 addx c_1,%g0,c_1
747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
748 addcc c_2,t_1,c_2
749 rd %y,t_2
750 addxcc c_3,t_2,c_3
751 addx c_1,%g0,c_1 !=
752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
753 addcc c_2,t_1,c_2
754 rd %y,t_2
755 addxcc c_3,t_2,c_3 !=
756 addx c_1,%g0,c_1
757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
758 addcc c_2,t_1,c_2
759 rd %y,t_2 !=
760 addxcc c_3,t_2,c_3
761 addx c_1,%g0,c_1
762 ld ap(7),a_7
763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
764 addcc c_2,t_1,c_2
765 rd %y,t_2
766 addxcc c_3,t_2,c_3
767 addx c_1,%g0,c_1 !=
768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
769 addcc c_2,t_1,c_2
770 rd %y,t_2
771 addxcc c_3,t_2,c_3 !=
772 addx c_1,%g0,c_1
773 st c_2,rp(7) !r[7]=c2;
774
775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
776 addcc c_3,t_1,c_3 !=
777 rd %y,t_2
778 addxcc c_1,t_2,c_1
779 addx %g0,%g0,c_2
780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
781 addcc c_3,t_1,c_3
782 rd %y,t_2
783 addxcc c_1,t_2,c_1
784 addx c_2,%g0,c_2 !=
785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
786 addcc c_3,t_1,c_3
787 rd %y,t_2
788 addxcc c_1,t_2,c_1 !=
789 addx c_2,%g0,c_2
790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
791 addcc c_3,t_1,c_3
792 rd %y,t_2 !=
793 addxcc c_1,t_2,c_1
794 addx c_2,%g0,c_2
795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
796 addcc c_3,t_1,c_3 !=
797 rd %y,t_2
798 addxcc c_1,t_2,c_1
799 addx c_2,%g0,c_2
800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
801 addcc c_3,t_1,c_3
802 rd %y,t_2
803 addxcc c_1,t_2,c_1
804 addx c_2,%g0,c_2 !=
805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
806 addcc c_3,t_1,c_3
807 rd %y,t_2
808 addxcc c_1,t_2,c_1 !
809 addx c_2,%g0,c_2
810 st c_3,rp(8) !r[8]=c3;
811
812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
813 addcc c_1,t_1,c_1 !=
814 rd %y,t_2
815 addxcc c_2,t_2,c_2
816 addx %g0,%g0,c_3
817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
818 addcc c_1,t_1,c_1
819 rd %y,t_2
820 addxcc c_2,t_2,c_2
821 addx c_3,%g0,c_3 !=
822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
823 addcc c_1,t_1,c_1
824 rd %y,t_2
825 addxcc c_2,t_2,c_2 !=
826 addx c_3,%g0,c_3
827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
828 addcc c_1,t_1,c_1
829 rd %y,t_2 !=
830 addxcc c_2,t_2,c_2
831 addx c_3,%g0,c_3
832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
833 addcc c_1,t_1,c_1 !=
834 rd %y,t_2
835 addxcc c_2,t_2,c_2
836 addx c_3,%g0,c_3
837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
838 addcc c_1,t_1,c_1
839 rd %y,t_2
840 addxcc c_2,t_2,c_2
841 addx c_3,%g0,c_3 !=
842 st c_1,rp(9) !r[9]=c1;
843
844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
845 addcc c_2,t_1,c_2
846 rd %y,t_2 !=
847 addxcc c_3,t_2,c_3
848 addx %g0,%g0,c_1
849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
850 addcc c_2,t_1,c_2 !=
851 rd %y,t_2
852 addxcc c_3,t_2,c_3
853 addx c_1,%g0,c_1
854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
855 addcc c_2,t_1,c_2
856 rd %y,t_2
857 addxcc c_3,t_2,c_3
858 addx c_1,%g0,c_1 !=
859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
860 addcc c_2,t_1,c_2
861 rd %y,t_2
862 addxcc c_3,t_2,c_3 !=
863 addx c_1,%g0,c_1
864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
865 addcc c_2,t_1,c_2
866 rd %y,t_2 !=
867 addxcc c_3,t_2,c_3
868 addx c_1,%g0,c_1
869 st c_2,rp(10) !r[10]=c2;
870
871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
872 addcc c_3,t_1,c_3
873 rd %y,t_2
874 addxcc c_1,t_2,c_1
875 addx %g0,%g0,c_2 !=
876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
877 addcc c_3,t_1,c_3
878 rd %y,t_2
879 addxcc c_1,t_2,c_1 !=
880 addx c_2,%g0,c_2
881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
882 addcc c_3,t_1,c_3
883 rd %y,t_2 !=
884 addxcc c_1,t_2,c_1
885 addx c_2,%g0,c_2
886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
887 addcc c_3,t_1,c_3 !=
888 rd %y,t_2
889 addxcc c_1,t_2,c_1
890 st c_3,rp(11) !r[11]=c3;
891 addx c_2,%g0,c_2 !=
892
893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
894 addcc c_1,t_1,c_1
895 rd %y,t_2
896 addxcc c_2,t_2,c_2 !=
897 addx %g0,%g0,c_3
898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
899 addcc c_1,t_1,c_1
900 rd %y,t_2 !=
901 addxcc c_2,t_2,c_2
902 addx c_3,%g0,c_3
903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
904 addcc c_1,t_1,c_1 !=
905 rd %y,t_2
906 addxcc c_2,t_2,c_2
907 st c_1,rp(12) !r[12]=c1;
908 addx c_3,%g0,c_3 !=
909
910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
911 addcc c_2,t_1,c_2
912 rd %y,t_2
913 addxcc c_3,t_2,c_3 !=
914 addx %g0,%g0,c_1
915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
916 addcc c_2,t_1,c_2
917 rd %y,t_2 !=
918 addxcc c_3,t_2,c_3
919 addx c_1,%g0,c_1
920 st c_2,rp(13) !r[13]=c2;
921
922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
923 addcc c_3,t_1,c_3
924 rd %y,t_2
925 addxcc c_1,t_2,c_1
926 nop !=
927 st c_3,rp(14) !r[14]=c3;
928 st c_1,rp(15) !r[15]=c1;
929
930 ret
931 restore %g0,%g0,%o0
932
933.type bn_mul_comba8,#function
934.size bn_mul_comba8,(.-bn_mul_comba8)
935
936.align 32
937
938.global bn_mul_comba4
939/*
940 * void bn_mul_comba4(r,a,b)
941 * BN_ULONG *r,*a,*b;
942 */
943bn_mul_comba4:
944 save %sp,FRAME_SIZE,%sp
945 ld ap(0),a_0
946 ld bp(0),b_0
947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
948 ld bp(1),b_1
949 rd %y,c_2
950 st c_1,rp(0) !r[0]=c1;
951
952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
953 ld ap(1),a_1
954 addcc c_2,t_1,c_2
955 rd %y,t_2 !=
956 addxcc %g0,t_2,c_3
957 addx %g0,%g0,c_1
958 ld ap(2),a_2
959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
960 addcc c_2,t_1,c_2
961 rd %y,t_2
962 addxcc c_3,t_2,c_3
963 addx c_1,%g0,c_1 !=
964 st c_2,rp(1) !r[1]=c2;
965
966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
967 addcc c_3,t_1,c_3
968 rd %y,t_2 !=
969 addxcc c_1,t_2,c_1
970 addx %g0,%g0,c_2
971 ld bp(2),b_2
972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
973 addcc c_3,t_1,c_3
974 rd %y,t_2
975 addxcc c_1,t_2,c_1
976 addx c_2,%g0,c_2 !=
977 ld bp(3),b_3
978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
979 addcc c_3,t_1,c_3
980 rd %y,t_2 !=
981 addxcc c_1,t_2,c_1
982 addx c_2,%g0,c_2
983 st c_3,rp(2) !r[2]=c3;
984
985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
986 addcc c_1,t_1,c_1
987 rd %y,t_2
988 addxcc c_2,t_2,c_2
989 addx %g0,%g0,c_3 !=
990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
991 addcc c_1,t_1,c_1
992 rd %y,t_2
993 addxcc c_2,t_2,c_2 !=
994 addx c_3,%g0,c_3
995 ld ap(3),a_3
996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
997 addcc c_1,t_1,c_1 !=
998 rd %y,t_2
999 addxcc c_2,t_2,c_2
1000 addx c_3,%g0,c_3
1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1002 addcc c_1,t_1,c_1
1003 rd %y,t_2
1004 addxcc c_2,t_2,c_2
1005 addx c_3,%g0,c_3 !=
1006 st c_1,rp(3) !r[3]=c1;
1007
1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1009 addcc c_2,t_1,c_2
1010 rd %y,t_2 !=
1011 addxcc c_3,t_2,c_3
1012 addx %g0,%g0,c_1
1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1014 addcc c_2,t_1,c_2 !=
1015 rd %y,t_2
1016 addxcc c_3,t_2,c_3
1017 addx c_1,%g0,c_1
1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1019 addcc c_2,t_1,c_2
1020 rd %y,t_2
1021 addxcc c_3,t_2,c_3
1022 addx c_1,%g0,c_1 !=
1023 st c_2,rp(4) !r[4]=c2;
1024
1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1026 addcc c_3,t_1,c_3
1027 rd %y,t_2 !=
1028 addxcc c_1,t_2,c_1
1029 addx %g0,%g0,c_2
1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1031 addcc c_3,t_1,c_3 !=
1032 rd %y,t_2
1033 addxcc c_1,t_2,c_1
1034 st c_3,rp(5) !r[5]=c3;
1035 addx c_2,%g0,c_2 !=
1036
1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1038 addcc c_1,t_1,c_1
1039 rd %y,t_2
1040 addxcc c_2,t_2,c_2 !=
1041 st c_1,rp(6) !r[6]=c1;
1042 st c_2,rp(7) !r[7]=c2;
1043
1044 ret
1045 restore %g0,%g0,%o0
1046
1047.type bn_mul_comba4,#function
1048.size bn_mul_comba4,(.-bn_mul_comba4)
1049
1050.align 32
1051
1052.global bn_sqr_comba8
1053bn_sqr_comba8:
1054 save %sp,FRAME_SIZE,%sp
1055 ld ap(0),a_0
1056 ld ap(1),a_1
1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1058 rd %y,c_2
1059 st c_1,rp(0) !r[0]=c1;
1060
1061 ld ap(2),a_2
1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1063 addcc c_2,t_1,c_2
1064 rd %y,t_2
1065 addxcc %g0,t_2,c_3
1066 addx %g0,%g0,c_1 !=
1067 addcc c_2,t_1,c_2
1068 addxcc c_3,t_2,c_3
1069 st c_2,rp(1) !r[1]=c2;
1070 addx c_1,%g0,c_1 !=
1071
1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1073 addcc c_3,t_1,c_3
1074 rd %y,t_2
1075 addxcc c_1,t_2,c_1 !=
1076 addx %g0,%g0,c_2
1077 addcc c_3,t_1,c_3
1078 addxcc c_1,t_2,c_1
1079 addx c_2,%g0,c_2 !=
1080 ld ap(3),a_3
1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1082 addcc c_3,t_1,c_3
1083 rd %y,t_2 !=
1084 addxcc c_1,t_2,c_1
1085 addx c_2,%g0,c_2
1086 st c_3,rp(2) !r[2]=c3;
1087
1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1089 addcc c_1,t_1,c_1
1090 rd %y,t_2
1091 addxcc c_2,t_2,c_2
1092 addx %g0,%g0,c_3 !=
1093 addcc c_1,t_1,c_1
1094 addxcc c_2,t_2,c_2
1095 ld ap(4),a_4
1096 addx c_3,%g0,c_3 !=
1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1098 addcc c_1,t_1,c_1
1099 rd %y,t_2
1100 addxcc c_2,t_2,c_2 !=
1101 addx c_3,%g0,c_3
1102 addcc c_1,t_1,c_1
1103 addxcc c_2,t_2,c_2
1104 addx c_3,%g0,c_3 !=
1105 st c_1,rp(3) !r[3]=c1;
1106
1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1108 addcc c_2,t_1,c_2
1109 rd %y,t_2 !=
1110 addxcc c_3,t_2,c_3
1111 addx %g0,%g0,c_1
1112 addcc c_2,t_1,c_2
1113 addxcc c_3,t_2,c_3 !=
1114 addx c_1,%g0,c_1
1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1116 addcc c_2,t_1,c_2
1117 rd %y,t_2 !=
1118 addxcc c_3,t_2,c_3
1119 addx c_1,%g0,c_1
1120 addcc c_2,t_1,c_2
1121 addxcc c_3,t_2,c_3 !=
1122 addx c_1,%g0,c_1
1123 ld ap(5),a_5
1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1125 addcc c_2,t_1,c_2 !=
1126 rd %y,t_2
1127 addxcc c_3,t_2,c_3
1128 st c_2,rp(4) !r[4]=c2;
1129 addx c_1,%g0,c_1 !=
1130
1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1132 addcc c_3,t_1,c_3
1133 rd %y,t_2
1134 addxcc c_1,t_2,c_1 !=
1135 addx %g0,%g0,c_2
1136 addcc c_3,t_1,c_3
1137 addxcc c_1,t_2,c_1
1138 addx c_2,%g0,c_2 !=
1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1140 addcc c_3,t_1,c_3
1141 rd %y,t_2
1142 addxcc c_1,t_2,c_1 !=
1143 addx c_2,%g0,c_2
1144 addcc c_3,t_1,c_3
1145 addxcc c_1,t_2,c_1
1146 addx c_2,%g0,c_2 !=
1147 ld ap(6),a_6
1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1149 addcc c_3,t_1,c_3
1150 rd %y,t_2 !=
1151 addxcc c_1,t_2,c_1
1152 addx c_2,%g0,c_2
1153 addcc c_3,t_1,c_3
1154 addxcc c_1,t_2,c_1 !=
1155 addx c_2,%g0,c_2
1156 st c_3,rp(5) !r[5]=c3;
1157
1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1159 addcc c_1,t_1,c_1 !=
1160 rd %y,t_2
1161 addxcc c_2,t_2,c_2
1162 addx %g0,%g0,c_3
1163 addcc c_1,t_1,c_1 !=
1164 addxcc c_2,t_2,c_2
1165 addx c_3,%g0,c_3
1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1167 addcc c_1,t_1,c_1 !=
1168 rd %y,t_2
1169 addxcc c_2,t_2,c_2
1170 addx c_3,%g0,c_3
1171 addcc c_1,t_1,c_1 !=
1172 addxcc c_2,t_2,c_2
1173 addx c_3,%g0,c_3
1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1175 addcc c_1,t_1,c_1 !=
1176 rd %y,t_2
1177 addxcc c_2,t_2,c_2
1178 addx c_3,%g0,c_3
1179 addcc c_1,t_1,c_1 !=
1180 addxcc c_2,t_2,c_2
1181 addx c_3,%g0,c_3
1182 ld ap(7),a_7
1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1184 addcc c_1,t_1,c_1
1185 rd %y,t_2
1186 addxcc c_2,t_2,c_2
1187 addx c_3,%g0,c_3 !=
1188 st c_1,rp(6) !r[6]=c1;
1189
1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1191 addcc c_2,t_1,c_2
1192 rd %y,t_2 !=
1193 addxcc c_3,t_2,c_3
1194 addx %g0,%g0,c_1
1195 addcc c_2,t_1,c_2
1196 addxcc c_3,t_2,c_3 !=
1197 addx c_1,%g0,c_1
1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1199 addcc c_2,t_1,c_2
1200 rd %y,t_2 !=
1201 addxcc c_3,t_2,c_3
1202 addx c_1,%g0,c_1
1203 addcc c_2,t_1,c_2
1204 addxcc c_3,t_2,c_3 !=
1205 addx c_1,%g0,c_1
1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1207 addcc c_2,t_1,c_2
1208 rd %y,t_2 !=
1209 addxcc c_3,t_2,c_3
1210 addx c_1,%g0,c_1
1211 addcc c_2,t_1,c_2
1212 addxcc c_3,t_2,c_3 !=
1213 addx c_1,%g0,c_1
1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1215 addcc c_2,t_1,c_2
1216 rd %y,t_2 !=
1217 addxcc c_3,t_2,c_3
1218 addx c_1,%g0,c_1
1219 addcc c_2,t_1,c_2
1220 addxcc c_3,t_2,c_3 !=
1221 addx c_1,%g0,c_1
1222 st c_2,rp(7) !r[7]=c2;
1223
1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1225 addcc c_3,t_1,c_3 !=
1226 rd %y,t_2
1227 addxcc c_1,t_2,c_1
1228 addx %g0,%g0,c_2
1229 addcc c_3,t_1,c_3 !=
1230 addxcc c_1,t_2,c_1
1231 addx c_2,%g0,c_2
1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1233 addcc c_3,t_1,c_3 !=
1234 rd %y,t_2
1235 addxcc c_1,t_2,c_1
1236 addx c_2,%g0,c_2
1237 addcc c_3,t_1,c_3 !=
1238 addxcc c_1,t_2,c_1
1239 addx c_2,%g0,c_2
1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1241 addcc c_3,t_1,c_3 !=
1242 rd %y,t_2
1243 addxcc c_1,t_2,c_1
1244 addx c_2,%g0,c_2
1245 addcc c_3,t_1,c_3 !=
1246 addxcc c_1,t_2,c_1
1247 addx c_2,%g0,c_2
1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1249 addcc c_3,t_1,c_3 !=
1250 rd %y,t_2
1251 addxcc c_1,t_2,c_1
1252 st c_3,rp(8) !r[8]=c3;
1253 addx c_2,%g0,c_2 !=
1254
1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1256 addcc c_1,t_1,c_1
1257 rd %y,t_2
1258 addxcc c_2,t_2,c_2 !=
1259 addx %g0,%g0,c_3
1260 addcc c_1,t_1,c_1
1261 addxcc c_2,t_2,c_2
1262 addx c_3,%g0,c_3 !=
1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1264 addcc c_1,t_1,c_1
1265 rd %y,t_2
1266 addxcc c_2,t_2,c_2 !=
1267 addx c_3,%g0,c_3
1268 addcc c_1,t_1,c_1
1269 addxcc c_2,t_2,c_2
1270 addx c_3,%g0,c_3 !=
1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1272 addcc c_1,t_1,c_1
1273 rd %y,t_2
1274 addxcc c_2,t_2,c_2 !=
1275 addx c_3,%g0,c_3
1276 addcc c_1,t_1,c_1
1277 addxcc c_2,t_2,c_2
1278 addx c_3,%g0,c_3 !=
1279 st c_1,rp(9) !r[9]=c1;
1280
1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1282 addcc c_2,t_1,c_2
1283 rd %y,t_2 !=
1284 addxcc c_3,t_2,c_3
1285 addx %g0,%g0,c_1
1286 addcc c_2,t_1,c_2
1287 addxcc c_3,t_2,c_3 !=
1288 addx c_1,%g0,c_1
1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1290 addcc c_2,t_1,c_2
1291 rd %y,t_2 !=
1292 addxcc c_3,t_2,c_3
1293 addx c_1,%g0,c_1
1294 addcc c_2,t_1,c_2
1295 addxcc c_3,t_2,c_3 !=
1296 addx c_1,%g0,c_1
1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1298 addcc c_2,t_1,c_2
1299 rd %y,t_2 !=
1300 addxcc c_3,t_2,c_3
1301 addx c_1,%g0,c_1
1302 st c_2,rp(10) !r[10]=c2;
1303
1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1305 addcc c_3,t_1,c_3
1306 rd %y,t_2
1307 addxcc c_1,t_2,c_1
1308 addx %g0,%g0,c_2 !=
1309 addcc c_3,t_1,c_3
1310 addxcc c_1,t_2,c_1
1311 addx c_2,%g0,c_2
1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1313 addcc c_3,t_1,c_3
1314 rd %y,t_2
1315 addxcc c_1,t_2,c_1
1316 addx c_2,%g0,c_2 !=
1317 addcc c_3,t_1,c_3
1318 addxcc c_1,t_2,c_1
1319 st c_3,rp(11) !r[11]=c3;
1320 addx c_2,%g0,c_2 !=
1321
1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1323 addcc c_1,t_1,c_1
1324 rd %y,t_2
1325 addxcc c_2,t_2,c_2 !=
1326 addx %g0,%g0,c_3
1327 addcc c_1,t_1,c_1
1328 addxcc c_2,t_2,c_2
1329 addx c_3,%g0,c_3 !=
1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1331 addcc c_1,t_1,c_1
1332 rd %y,t_2
1333 addxcc c_2,t_2,c_2 !=
1334 addx c_3,%g0,c_3
1335 st c_1,rp(12) !r[12]=c1;
1336
1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1338 addcc c_2,t_1,c_2 !=
1339 rd %y,t_2
1340 addxcc c_3,t_2,c_3
1341 addx %g0,%g0,c_1
1342 addcc c_2,t_1,c_2 !=
1343 addxcc c_3,t_2,c_3
1344 st c_2,rp(13) !r[13]=c2;
1345 addx c_1,%g0,c_1 !=
1346
1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1348 addcc c_3,t_1,c_3
1349 rd %y,t_2
1350 addxcc c_1,t_2,c_1 !=
1351 st c_3,rp(14) !r[14]=c3;
1352 st c_1,rp(15) !r[15]=c1;
1353
1354 ret
1355 restore %g0,%g0,%o0
1356
1357.type bn_sqr_comba8,#function
1358.size bn_sqr_comba8,(.-bn_sqr_comba8)
1359
1360.align 32
1361
1362.global bn_sqr_comba4
1363/*
1364 * void bn_sqr_comba4(r,a)
1365 * BN_ULONG *r,*a;
1366 */
1367bn_sqr_comba4:
1368 save %sp,FRAME_SIZE,%sp
1369 ld ap(0),a_0
1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1371 ld ap(1),a_1 !=
1372 rd %y,c_2
1373 st c_1,rp(0) !r[0]=c1;
1374
1375 ld ap(2),a_2
1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1377 addcc c_2,t_1,c_2
1378 rd %y,t_2
1379 addxcc %g0,t_2,c_3
1380 addx %g0,%g0,c_1 !=
1381 addcc c_2,t_1,c_2
1382 addxcc c_3,t_2,c_3
1383 addx c_1,%g0,c_1 !=
1384 st c_2,rp(1) !r[1]=c2;
1385
1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1387 addcc c_3,t_1,c_3
1388 rd %y,t_2 !=
1389 addxcc c_1,t_2,c_1
1390 addx %g0,%g0,c_2
1391 addcc c_3,t_1,c_3
1392 addxcc c_1,t_2,c_1 !=
1393 addx c_2,%g0,c_2
1394 ld ap(3),a_3
1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1396 addcc c_3,t_1,c_3 !=
1397 rd %y,t_2
1398 addxcc c_1,t_2,c_1
1399 st c_3,rp(2) !r[2]=c3;
1400 addx c_2,%g0,c_2 !=
1401
1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1403 addcc c_1,t_1,c_1
1404 rd %y,t_2
1405 addxcc c_2,t_2,c_2 !=
1406 addx %g0,%g0,c_3
1407 addcc c_1,t_1,c_1
1408 addxcc c_2,t_2,c_2
1409 addx c_3,%g0,c_3 !=
1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1411 addcc c_1,t_1,c_1
1412 rd %y,t_2
1413 addxcc c_2,t_2,c_2 !=
1414 addx c_3,%g0,c_3
1415 addcc c_1,t_1,c_1
1416 addxcc c_2,t_2,c_2
1417 addx c_3,%g0,c_3 !=
1418 st c_1,rp(3) !r[3]=c1;
1419
1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1421 addcc c_2,t_1,c_2
1422 rd %y,t_2 !=
1423 addxcc c_3,t_2,c_3
1424 addx %g0,%g0,c_1
1425 addcc c_2,t_1,c_2
1426 addxcc c_3,t_2,c_3 !=
1427 addx c_1,%g0,c_1
1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1429 addcc c_2,t_1,c_2
1430 rd %y,t_2 !=
1431 addxcc c_3,t_2,c_3
1432 addx c_1,%g0,c_1
1433 st c_2,rp(4) !r[4]=c2;
1434
1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1436 addcc c_3,t_1,c_3
1437 rd %y,t_2
1438 addxcc c_1,t_2,c_1
1439 addx %g0,%g0,c_2 !=
1440 addcc c_3,t_1,c_3
1441 addxcc c_1,t_2,c_1
1442 st c_3,rp(5) !r[5]=c3;
1443 addx c_2,%g0,c_2 !=
1444
1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1446 addcc c_1,t_1,c_1
1447 rd %y,t_2
1448 addxcc c_2,t_2,c_2 !=
1449 st c_1,rp(6) !r[6]=c1;
1450 st c_2,rp(7) !r[7]=c2;
1451
1452 ret
1453 restore %g0,%g0,%o0
1454
1455.type bn_sqr_comba4,#function
1456.size bn_sqr_comba4,(.-bn_sqr_comba4)
1457
1458.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
new file mode 100644
index 0000000000..0074dfdb75
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/sparcv8plus.S
@@ -0,0 +1,1535 @@
1.ident "sparcv8plus.s, Version 1.4"
2.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * Questions-n-answers.
22 *
23 * Q. How to compile?
24 * A. With SC4.x/SC5.x:
25 *
26 * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
27 *
28 * and with gcc:
29 *
30 * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o
31 *
32 * or if above fails (it does if you have gas installed):
33 *
34 * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o
35 *
36 * Quick-n-dirty way to fuse the module into the library.
37 * Provided that the library is already configured and built
38 * (in 0.9.2 case with no-asm option):
39 *
40 * # cd crypto/bn
41 * # cp /some/place/bn_asm.sparc.v8plus.S .
42 * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
43 * # make
44 * # cd ../..
45 * # make; make test
46 *
47 * Quick-n-dirty way to get rid of it:
48 *
49 * # cd crypto/bn
50 * # touch bn_asm.c
51 * # make
52 * # cd ../..
53 * # make; make test
54 *
55 * Q. V8plus achitecture? What kind of beast is that?
56 * A. Well, it's rather a programming model than an architecture...
57 * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
58 * special conditions, namely when kernel doesn't preserve upper
59 * 32 bits of otherwise 64-bit registers during a context switch.
60 *
61 * Q. Why just UltraSPARC? What about SuperSPARC?
62 * A. Original release did target UltraSPARC only. Now SuperSPARC
63 * version is provided along. Both version share bn_*comba[48]
64 * implementations (see comment later in code for explanation).
65 * But what's so special about this UltraSPARC implementation?
66 * Why didn't I let compiler do the job? Trouble is that most of
67 * available compilers (well, SC5.0 is the only exception) don't
68 * attempt to take advantage of UltraSPARC's 64-bitness under
69 * 32-bit kernels even though it's perfectly possible (see next
70 * question).
71 *
72 * Q. 64-bit registers under 32-bit kernels? Didn't you just say it
73 * doesn't work?
74 * A. You can't adress *all* registers as 64-bit wide:-( The catch is
75 * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
76 * preserved if you're in a leaf function, i.e. such never calling
77 * any other functions. All functions in this module are leaf and
78 * 10 registers is a handful. And as a matter of fact none-"comba"
79 * routines don't require even that much and I could even afford to
80 * not allocate own stack frame for 'em:-)
81 *
82 * Q. What about 64-bit kernels?
83 * A. What about 'em? Just kidding:-) Pure 64-bit version is currently
84 * under evaluation and development...
85 *
86 * Q. What about shared libraries?
87 * A. What about 'em? Kidding again:-) Code does *not* contain any
88 * code position dependencies and it's safe to include it into
89 * shared library as is.
90 *
91 * Q. How much faster does it go?
92 * A. Do you have a good benchmark? In either case below is what I
93 * experience with crypto/bn/expspeed.c test program:
94 *
95 * v8plus module on U10/300MHz against bn_asm.c compiled with:
96 *
97 * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12%
98 * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35%
99 * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45%
100 *
101 * v8 module on SS10/60MHz against bn_asm.c compiled with:
102 *
103 * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10%
104 * cc-4.2 -xarch=v8 -xO5 -xdepend +10%
105 * egcs-1.1.2 -mv8 -O3 +35-45%
106 *
107 * As you can see it's damn hard to beat the new Sun C compiler
108 * and it's in first place GNU C users who will appreciate this
109 * assembler implementation:-)
110 */
111
112/*
113 * Revision history.
114 *
115 * 1.0 - initial release;
116 * 1.1 - new loop unrolling model(*);
117 * - some more fine tuning;
118 * 1.2 - made gas friendly;
119 * - updates to documentation concerning v9;
120 * - new performance comparison matrix;
121 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
122 * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient)
123 * resulting in slight overall performance kick;
124 * - some retunes;
125 * - support for GNU as added;
126 *
127 * (*) Originally unrolled loop looked like this:
128 * for (;;) {
129 * op(p+0); if (--n==0) break;
130 * op(p+1); if (--n==0) break;
131 * op(p+2); if (--n==0) break;
132 * op(p+3); if (--n==0) break;
133 * p+=4;
134 * }
135 * I unroll according to following:
136 * while (n&~3) {
137 * op(p+0); op(p+1); op(p+2); op(p+3);
138 * p+=4; n=-4;
139 * }
140 * if (n) {
141 * op(p+0); if (--n==0) return;
142 * op(p+2); if (--n==0) return;
143 * op(p+3); return;
144 * }
145 */
146
147/*
148 * GNU assembler can't stand stuw:-(
149 */
150#define stuw st
151
152.section ".text",#alloc,#execinstr
153.file "bn_asm.sparc.v8plus.S"
154
155.align 32
156
157.global bn_mul_add_words
158/*
159 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
160 * BN_ULONG *rp,*ap;
161 * int num;
162 * BN_ULONG w;
163 */
164bn_mul_add_words:
165 brgz,a %o2,.L_bn_mul_add_words_proceed
166 lduw [%o1],%g2
167 retl
168 clr %o0
169
170.L_bn_mul_add_words_proceed:
171 srl %o3,%g0,%o3 ! clruw %o3
172 andcc %o2,-4,%g0
173 bz,pn %icc,.L_bn_mul_add_words_tail
174 clr %o5
175
176.L_bn_mul_add_words_loop: ! wow! 32 aligned!
177 lduw [%o0],%g1
178 lduw [%o1+4],%g3
179 mulx %o3,%g2,%g2
180 add %g1,%o5,%o4
181 nop
182 add %o4,%g2,%o4
183 stuw %o4,[%o0]
184 srlx %o4,32,%o5
185
186 lduw [%o0+4],%g1
187 lduw [%o1+8],%g2
188 mulx %o3,%g3,%g3
189 add %g1,%o5,%o4
190 dec 4,%o2
191 add %o4,%g3,%o4
192 stuw %o4,[%o0+4]
193 srlx %o4,32,%o5
194
195 lduw [%o0+8],%g1
196 lduw [%o1+12],%g3
197 mulx %o3,%g2,%g2
198 add %g1,%o5,%o4
199 inc 16,%o1
200 add %o4,%g2,%o4
201 stuw %o4,[%o0+8]
202 srlx %o4,32,%o5
203
204 lduw [%o0+12],%g1
205 mulx %o3,%g3,%g3
206 add %g1,%o5,%o4
207 inc 16,%o0
208 add %o4,%g3,%o4
209 andcc %o2,-4,%g0
210 stuw %o4,[%o0-4]
211 srlx %o4,32,%o5
212 bnz,a,pt %icc,.L_bn_mul_add_words_loop
213 lduw [%o1],%g2
214
215 brnz,a,pn %o2,.L_bn_mul_add_words_tail
216 lduw [%o1],%g2
217.L_bn_mul_add_words_return:
218 retl
219 mov %o5,%o0
220
221.L_bn_mul_add_words_tail:
222 lduw [%o0],%g1
223 mulx %o3,%g2,%g2
224 add %g1,%o5,%o4
225 dec %o2
226 add %o4,%g2,%o4
227 srlx %o4,32,%o5
228 brz,pt %o2,.L_bn_mul_add_words_return
229 stuw %o4,[%o0]
230
231 lduw [%o1+4],%g2
232 lduw [%o0+4],%g1
233 mulx %o3,%g2,%g2
234 add %g1,%o5,%o4
235 dec %o2
236 add %o4,%g2,%o4
237 srlx %o4,32,%o5
238 brz,pt %o2,.L_bn_mul_add_words_return
239 stuw %o4,[%o0+4]
240
241 lduw [%o1+8],%g2
242 lduw [%o0+8],%g1
243 mulx %o3,%g2,%g2
244 add %g1,%o5,%o4
245 add %o4,%g2,%o4
246 stuw %o4,[%o0+8]
247 retl
248 srlx %o4,32,%o0
249
250.type bn_mul_add_words,#function
251.size bn_mul_add_words,(.-bn_mul_add_words)
252
253.align 32
254
255.global bn_mul_words
256/*
257 * BN_ULONG bn_mul_words(rp,ap,num,w)
258 * BN_ULONG *rp,*ap;
259 * int num;
260 * BN_ULONG w;
261 */
262bn_mul_words:
263 brgz,a %o2,.L_bn_mul_words_proceeed
264 lduw [%o1],%g2
265 retl
266 clr %o0
267
268.L_bn_mul_words_proceeed:
269 srl %o3,%g0,%o3 ! clruw %o3
270 andcc %o2,-4,%g0
271 bz,pn %icc,.L_bn_mul_words_tail
272 clr %o5
273
274.L_bn_mul_words_loop: ! wow! 32 aligned!
275 lduw [%o1+4],%g3
276 mulx %o3,%g2,%g2
277 add %g2,%o5,%o4
278 nop
279 stuw %o4,[%o0]
280 srlx %o4,32,%o5
281
282 lduw [%o1+8],%g2
283 mulx %o3,%g3,%g3
284 add %g3,%o5,%o4
285 dec 4,%o2
286 stuw %o4,[%o0+4]
287 srlx %o4,32,%o5
288
289 lduw [%o1+12],%g3
290 mulx %o3,%g2,%g2
291 add %g2,%o5,%o4
292 inc 16,%o1
293 stuw %o4,[%o0+8]
294 srlx %o4,32,%o5
295
296 mulx %o3,%g3,%g3
297 add %g3,%o5,%o4
298 inc 16,%o0
299 stuw %o4,[%o0-4]
300 srlx %o4,32,%o5
301 andcc %o2,-4,%g0
302 bnz,a,pt %icc,.L_bn_mul_words_loop
303 lduw [%o1],%g2
304 nop
305 nop
306
307 brnz,a,pn %o2,.L_bn_mul_words_tail
308 lduw [%o1],%g2
309.L_bn_mul_words_return:
310 retl
311 mov %o5,%o0
312
313.L_bn_mul_words_tail:
314 mulx %o3,%g2,%g2
315 add %g2,%o5,%o4
316 dec %o2
317 srlx %o4,32,%o5
318 brz,pt %o2,.L_bn_mul_words_return
319 stuw %o4,[%o0]
320
321 lduw [%o1+4],%g2
322 mulx %o3,%g2,%g2
323 add %g2,%o5,%o4
324 dec %o2
325 srlx %o4,32,%o5
326 brz,pt %o2,.L_bn_mul_words_return
327 stuw %o4,[%o0+4]
328
329 lduw [%o1+8],%g2
330 mulx %o3,%g2,%g2
331 add %g2,%o5,%o4
332 stuw %o4,[%o0+8]
333 retl
334 srlx %o4,32,%o0
335
336.type bn_mul_words,#function
337.size bn_mul_words,(.-bn_mul_words)
338
339.align 32
340.global bn_sqr_words
341/*
342 * void bn_sqr_words(r,a,n)
343 * BN_ULONG *r,*a;
344 * int n;
345 */
346bn_sqr_words:
347 brgz,a %o2,.L_bn_sqr_words_proceeed
348 lduw [%o1],%g2
349 retl
350 clr %o0
351
352.L_bn_sqr_words_proceeed:
353 andcc %o2,-4,%g0
354 nop
355 bz,pn %icc,.L_bn_sqr_words_tail
356 nop
357
358.L_bn_sqr_words_loop: ! wow! 32 aligned!
359 lduw [%o1+4],%g3
360 mulx %g2,%g2,%o4
361 stuw %o4,[%o0]
362 srlx %o4,32,%o5
363 stuw %o5,[%o0+4]
364 nop
365
366 lduw [%o1+8],%g2
367 mulx %g3,%g3,%o4
368 dec 4,%o2
369 stuw %o4,[%o0+8]
370 srlx %o4,32,%o5
371 stuw %o5,[%o0+12]
372
373 lduw [%o1+12],%g3
374 mulx %g2,%g2,%o4
375 srlx %o4,32,%o5
376 stuw %o4,[%o0+16]
377 inc 16,%o1
378 stuw %o5,[%o0+20]
379
380 mulx %g3,%g3,%o4
381 inc 32,%o0
382 stuw %o4,[%o0-8]
383 srlx %o4,32,%o5
384 andcc %o2,-4,%g2
385 stuw %o5,[%o0-4]
386 bnz,a,pt %icc,.L_bn_sqr_words_loop
387 lduw [%o1],%g2
388 nop
389
390 brnz,a,pn %o2,.L_bn_sqr_words_tail
391 lduw [%o1],%g2
392.L_bn_sqr_words_return:
393 retl
394 clr %o0
395
396.L_bn_sqr_words_tail:
397 mulx %g2,%g2,%o4
398 dec %o2
399 stuw %o4,[%o0]
400 srlx %o4,32,%o5
401 brz,pt %o2,.L_bn_sqr_words_return
402 stuw %o5,[%o0+4]
403
404 lduw [%o1+4],%g2
405 mulx %g2,%g2,%o4
406 dec %o2
407 stuw %o4,[%o0+8]
408 srlx %o4,32,%o5
409 brz,pt %o2,.L_bn_sqr_words_return
410 stuw %o5,[%o0+12]
411
412 lduw [%o1+8],%g2
413 mulx %g2,%g2,%o4
414 srlx %o4,32,%o5
415 stuw %o4,[%o0+16]
416 stuw %o5,[%o0+20]
417 retl
418 clr %o0
419
420.type bn_sqr_words,#function
421.size bn_sqr_words,(.-bn_sqr_words)
422
423.align 32
424.global bn_div_words
425/*
426 * BN_ULONG bn_div_words(h,l,d)
427 * BN_ULONG h,l,d;
428 */
429bn_div_words:
430 sllx %o0,32,%o0
431 or %o0,%o1,%o0
432 udivx %o0,%o2,%o0
433 retl
434 srl %o0,%g0,%o0 ! clruw %o0
435
436.type bn_div_words,#function
437.size bn_div_words,(.-bn_div_words)
438
439.align 32
440
441.global bn_add_words
442/*
443 * BN_ULONG bn_add_words(rp,ap,bp,n)
444 * BN_ULONG *rp,*ap,*bp;
445 * int n;
446 */
447bn_add_words:
448 brgz,a %o3,.L_bn_add_words_proceed
449 lduw [%o1],%o4
450 retl
451 clr %o0
452
453.L_bn_add_words_proceed:
454 andcc %o3,-4,%g0
455 bz,pn %icc,.L_bn_add_words_tail
456 addcc %g0,0,%g0 ! clear carry flag
457 nop
458
459.L_bn_add_words_loop: ! wow! 32 aligned!
460 dec 4,%o3
461 lduw [%o2],%o5
462 lduw [%o1+4],%g1
463 lduw [%o2+4],%g2
464 lduw [%o1+8],%g3
465 lduw [%o2+8],%g4
466 addccc %o5,%o4,%o5
467 stuw %o5,[%o0]
468
469 lduw [%o1+12],%o4
470 lduw [%o2+12],%o5
471 inc 16,%o1
472 addccc %g1,%g2,%g1
473 stuw %g1,[%o0+4]
474
475 inc 16,%o2
476 addccc %g3,%g4,%g3
477 stuw %g3,[%o0+8]
478
479 inc 16,%o0
480 addccc %o5,%o4,%o5
481 stuw %o5,[%o0-4]
482 and %o3,-4,%g1
483 brnz,a,pt %g1,.L_bn_add_words_loop
484 lduw [%o1],%o4
485
486 brnz,a,pn %o3,.L_bn_add_words_tail
487 lduw [%o1],%o4
488.L_bn_add_words_return:
489 clr %o0
490 retl
491 movcs %icc,1,%o0
492 nop
493
494.L_bn_add_words_tail:
495 lduw [%o2],%o5
496 dec %o3
497 addccc %o5,%o4,%o5
498 brz,pt %o3,.L_bn_add_words_return
499 stuw %o5,[%o0]
500
501 lduw [%o1+4],%o4
502 lduw [%o2+4],%o5
503 dec %o3
504 addccc %o5,%o4,%o5
505 brz,pt %o3,.L_bn_add_words_return
506 stuw %o5,[%o0+4]
507
508 lduw [%o1+8],%o4
509 lduw [%o2+8],%o5
510 addccc %o5,%o4,%o5
511 stuw %o5,[%o0+8]
512 clr %o0
513 retl
514 movcs %icc,1,%o0
515
516.type bn_add_words,#function
517.size bn_add_words,(.-bn_add_words)
518
519.global bn_sub_words
520/*
521 * BN_ULONG bn_sub_words(rp,ap,bp,n)
522 * BN_ULONG *rp,*ap,*bp;
523 * int n;
524 */
525bn_sub_words:
526 brgz,a %o3,.L_bn_sub_words_proceed
527 lduw [%o1],%o4
528 retl
529 clr %o0
530
531.L_bn_sub_words_proceed:
532 andcc %o3,-4,%g0
533 bz,pn %icc,.L_bn_sub_words_tail
534 addcc %g0,0,%g0 ! clear carry flag
535 nop
536
537.L_bn_sub_words_loop: ! wow! 32 aligned!
538 dec 4,%o3
539 lduw [%o2],%o5
540 lduw [%o1+4],%g1
541 lduw [%o2+4],%g2
542 lduw [%o1+8],%g3
543 lduw [%o2+8],%g4
544 subccc %o4,%o5,%o5
545 stuw %o5,[%o0]
546
547 lduw [%o1+12],%o4
548 lduw [%o2+12],%o5
549 inc 16,%o1
550 subccc %g1,%g2,%g2
551 stuw %g2,[%o0+4]
552
553 inc 16,%o2
554 subccc %g3,%g4,%g4
555 stuw %g4,[%o0+8]
556
557 inc 16,%o0
558 subccc %o4,%o5,%o5
559 stuw %o5,[%o0-4]
560 and %o3,-4,%g1
561 brnz,a,pt %g1,.L_bn_sub_words_loop
562 lduw [%o1],%o4
563
564 brnz,a,pn %o3,.L_bn_sub_words_tail
565 lduw [%o1],%o4
566.L_bn_sub_words_return:
567 clr %o0
568 retl
569 movcs %icc,1,%o0
570 nop
571
572.L_bn_sub_words_tail: ! wow! 32 aligned!
573 lduw [%o2],%o5
574 dec %o3
575 subccc %o4,%o5,%o5
576 brz,pt %o3,.L_bn_sub_words_return
577 stuw %o5,[%o0]
578
579 lduw [%o1+4],%o4
580 lduw [%o2+4],%o5
581 dec %o3
582 subccc %o4,%o5,%o5
583 brz,pt %o3,.L_bn_sub_words_return
584 stuw %o5,[%o0+4]
585
586 lduw [%o1+8],%o4
587 lduw [%o2+8],%o5
588 subccc %o4,%o5,%o5
589 stuw %o5,[%o0+8]
590 clr %o0
591 retl
592 movcs %icc,1,%o0
593
594.type bn_sub_words,#function
595.size bn_sub_words,(.-bn_sub_words)
596
597/*
598 * Code below depends on the fact that upper parts of the %l0-%l7
599 * and %i0-%i7 are zeroed by kernel after context switch. In
600 * previous versions this comment stated that "the trouble is that
601 * it's not feasible to implement the mumbo-jumbo in less V9
602 * instructions:-(" which apparently isn't true thanks to
603 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement
604 * results not from the shorter code, but from elimination of
605 * multicycle none-pairable 'rd %y,%rd' instructions.
606 *
607 * Andy.
608 */
609
610#define FRAME_SIZE -96
611
612/*
613 * Here is register usage map for *all* routines below.
614 */
615#define t_1 %o0
616#define t_2 %o1
617#define c_12 %o2
618#define c_3 %o3
619
620#define ap(I) [%i1+4*I]
621#define bp(I) [%i2+4*I]
622#define rp(I) [%i0+4*I]
623
624#define a_0 %l0
625#define a_1 %l1
626#define a_2 %l2
627#define a_3 %l3
628#define a_4 %l4
629#define a_5 %l5
630#define a_6 %l6
631#define a_7 %l7
632
633#define b_0 %i3
634#define b_1 %i4
635#define b_2 %i5
636#define b_3 %o4
637#define b_4 %o5
638#define b_5 %o7
639#define b_6 %g1
640#define b_7 %g4
641
642.align 32
643.global bn_mul_comba8
644/*
645 * void bn_mul_comba8(r,a,b)
646 * BN_ULONG *r,*a,*b;
647 */
648bn_mul_comba8:
649 save %sp,FRAME_SIZE,%sp
650 mov 1,t_2
651 lduw ap(0),a_0
652 sllx t_2,32,t_2
653 lduw bp(0),b_0 !=
654 lduw bp(1),b_1
655 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
656 srlx t_1,32,c_12
657 stuw t_1,rp(0) !=!r[0]=c1;
658
659 lduw ap(1),a_1
660 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
661 addcc c_12,t_1,c_12
662 clr c_3 !=
663 bcs,a %xcc,.+8
664 add c_3,t_2,c_3
665 lduw ap(2),a_2
666 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
667 addcc c_12,t_1,t_1
668 bcs,a %xcc,.+8
669 add c_3,t_2,c_3
670 srlx t_1,32,c_12 !=
671 stuw t_1,rp(1) !r[1]=c2;
672 or c_12,c_3,c_12
673
674 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
675 addcc c_12,t_1,c_12 !=
676 clr c_3
677 bcs,a %xcc,.+8
678 add c_3,t_2,c_3
679 lduw bp(2),b_2 !=
680 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
681 addcc c_12,t_1,c_12
682 bcs,a %xcc,.+8
683 add c_3,t_2,c_3 !=
684 lduw bp(3),b_3
685 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
686 addcc c_12,t_1,t_1
687 bcs,a %xcc,.+8 !=
688 add c_3,t_2,c_3
689 srlx t_1,32,c_12
690 stuw t_1,rp(2) !r[2]=c3;
691 or c_12,c_3,c_12 !=
692
693 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
694 addcc c_12,t_1,c_12
695 clr c_3
696 bcs,a %xcc,.+8 !=
697 add c_3,t_2,c_3
698 mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
699 addcc c_12,t_1,c_12
700 bcs,a %xcc,.+8 !=
701 add c_3,t_2,c_3
702 lduw ap(3),a_3
703 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
704 addcc c_12,t_1,c_12 !=
705 bcs,a %xcc,.+8
706 add c_3,t_2,c_3
707 lduw ap(4),a_4
708 mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!=
709 addcc c_12,t_1,t_1
710 bcs,a %xcc,.+8
711 add c_3,t_2,c_3
712 srlx t_1,32,c_12 !=
713 stuw t_1,rp(3) !r[3]=c1;
714 or c_12,c_3,c_12
715
716 mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
717 addcc c_12,t_1,c_12 !=
718 clr c_3
719 bcs,a %xcc,.+8
720 add c_3,t_2,c_3
721 mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1);
722 addcc c_12,t_1,c_12
723 bcs,a %xcc,.+8
724 add c_3,t_2,c_3
725 mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
726 addcc c_12,t_1,c_12
727 bcs,a %xcc,.+8
728 add c_3,t_2,c_3
729 lduw bp(4),b_4 !=
730 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
731 addcc c_12,t_1,c_12
732 bcs,a %xcc,.+8
733 add c_3,t_2,c_3 !=
734 lduw bp(5),b_5
735 mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1);
736 addcc c_12,t_1,t_1
737 bcs,a %xcc,.+8 !=
738 add c_3,t_2,c_3
739 srlx t_1,32,c_12
740 stuw t_1,rp(4) !r[4]=c2;
741 or c_12,c_3,c_12 !=
742
743 mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
744 addcc c_12,t_1,c_12
745 clr c_3
746 bcs,a %xcc,.+8 !=
747 add c_3,t_2,c_3
748 mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
749 addcc c_12,t_1,c_12
750 bcs,a %xcc,.+8 !=
751 add c_3,t_2,c_3
752 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
753 addcc c_12,t_1,c_12
754 bcs,a %xcc,.+8 !=
755 add c_3,t_2,c_3
756 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
757 addcc c_12,t_1,c_12
758 bcs,a %xcc,.+8 !=
759 add c_3,t_2,c_3
760 lduw ap(5),a_5
761 mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
762 addcc c_12,t_1,c_12 !=
763 bcs,a %xcc,.+8
764 add c_3,t_2,c_3
765 lduw ap(6),a_6
766 mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2);
767 addcc c_12,t_1,t_1
768 bcs,a %xcc,.+8
769 add c_3,t_2,c_3
770 srlx t_1,32,c_12 !=
771 stuw t_1,rp(5) !r[5]=c3;
772 or c_12,c_3,c_12
773
774 mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
775 addcc c_12,t_1,c_12 !=
776 clr c_3
777 bcs,a %xcc,.+8
778 add c_3,t_2,c_3
779 mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
780 addcc c_12,t_1,c_12
781 bcs,a %xcc,.+8
782 add c_3,t_2,c_3
783 mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3);
784 addcc c_12,t_1,c_12
785 bcs,a %xcc,.+8
786 add c_3,t_2,c_3
787 mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3);
788 addcc c_12,t_1,c_12
789 bcs,a %xcc,.+8
790 add c_3,t_2,c_3
791 mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3);
792 addcc c_12,t_1,c_12
793 bcs,a %xcc,.+8
794 add c_3,t_2,c_3
795 lduw bp(6),b_6 !=
796 mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
797 addcc c_12,t_1,c_12
798 bcs,a %xcc,.+8
799 add c_3,t_2,c_3 !=
800 lduw bp(7),b_7
801 mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
802 addcc c_12,t_1,t_1
803 bcs,a %xcc,.+8 !=
804 add c_3,t_2,c_3
805 srlx t_1,32,c_12
806 stuw t_1,rp(6) !r[6]=c1;
807 or c_12,c_3,c_12 !=
808
809 mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
810 addcc c_12,t_1,c_12
811 clr c_3
812 bcs,a %xcc,.+8 !=
813 add c_3,t_2,c_3
814 mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
815 addcc c_12,t_1,c_12
816 bcs,a %xcc,.+8 !=
817 add c_3,t_2,c_3
818 mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
819 addcc c_12,t_1,c_12
820 bcs,a %xcc,.+8 !=
821 add c_3,t_2,c_3
822 mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1);
823 addcc c_12,t_1,c_12
824 bcs,a %xcc,.+8 !=
825 add c_3,t_2,c_3
826 mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
827 addcc c_12,t_1,c_12
828 bcs,a %xcc,.+8 !=
829 add c_3,t_2,c_3
830 mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
831 addcc c_12,t_1,c_12
832 bcs,a %xcc,.+8 !=
833 add c_3,t_2,c_3
834 lduw ap(7),a_7
835 mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
836 addcc c_12,t_1,c_12
837 bcs,a %xcc,.+8
838 add c_3,t_2,c_3
839 mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1);
840 addcc c_12,t_1,t_1
841 bcs,a %xcc,.+8
842 add c_3,t_2,c_3
843 srlx t_1,32,c_12 !=
844 stuw t_1,rp(7) !r[7]=c2;
845 or c_12,c_3,c_12
846
847 mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2);
848 addcc c_12,t_1,c_12
849 clr c_3
850 bcs,a %xcc,.+8
851 add c_3,t_2,c_3 !=
852 mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2);
853 addcc c_12,t_1,c_12
854 bcs,a %xcc,.+8
855 add c_3,t_2,c_3 !=
856 mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
857 addcc c_12,t_1,c_12
858 bcs,a %xcc,.+8
859 add c_3,t_2,c_3 !=
860 mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
861 addcc c_12,t_1,c_12
862 bcs,a %xcc,.+8
863 add c_3,t_2,c_3 !=
864 mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
865 addcc c_12,t_1,c_12
866 bcs,a %xcc,.+8
867 add c_3,t_2,c_3 !=
868 mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2);
869 addcc c_12,t_1,c_12
870 bcs,a %xcc,.+8
871 add c_3,t_2,c_3 !=
872 mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
873 addcc c_12,t_1,t_1
874 bcs,a %xcc,.+8
875 add c_3,t_2,c_3 !=
876 srlx t_1,32,c_12
877 stuw t_1,rp(8) !r[8]=c3;
878 or c_12,c_3,c_12
879
880 mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3);
881 addcc c_12,t_1,c_12
882 clr c_3
883 bcs,a %xcc,.+8
884 add c_3,t_2,c_3 !=
885 mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3);
886 addcc c_12,t_1,c_12
887 bcs,a %xcc,.+8 !=
888 add c_3,t_2,c_3
889 mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
890 addcc c_12,t_1,c_12
891 bcs,a %xcc,.+8 !=
892 add c_3,t_2,c_3
893 mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
894 addcc c_12,t_1,c_12
895 bcs,a %xcc,.+8 !=
896 add c_3,t_2,c_3
897 mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
898 addcc c_12,t_1,c_12
899 bcs,a %xcc,.+8 !=
900 add c_3,t_2,c_3
901 mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3);
902 addcc c_12,t_1,t_1
903 bcs,a %xcc,.+8 !=
904 add c_3,t_2,c_3
905 srlx t_1,32,c_12
906 stuw t_1,rp(9) !r[9]=c1;
907 or c_12,c_3,c_12 !=
908
909 mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
910 addcc c_12,t_1,c_12
911 clr c_3
912 bcs,a %xcc,.+8 !=
913 add c_3,t_2,c_3
914 mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
915 addcc c_12,t_1,c_12
916 bcs,a %xcc,.+8 !=
917 add c_3,t_2,c_3
918 mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1);
919 addcc c_12,t_1,c_12
920 bcs,a %xcc,.+8 !=
921 add c_3,t_2,c_3
922 mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
923 addcc c_12,t_1,c_12
924 bcs,a %xcc,.+8 !=
925 add c_3,t_2,c_3
926 mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
927 addcc c_12,t_1,t_1
928 bcs,a %xcc,.+8 !=
929 add c_3,t_2,c_3
930 srlx t_1,32,c_12
931 stuw t_1,rp(10) !r[10]=c2;
932 or c_12,c_3,c_12 !=
933
934 mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2);
935 addcc c_12,t_1,c_12
936 clr c_3
937 bcs,a %xcc,.+8 !=
938 add c_3,t_2,c_3
939 mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
940 addcc c_12,t_1,c_12
941 bcs,a %xcc,.+8 !=
942 add c_3,t_2,c_3
943 mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
944 addcc c_12,t_1,c_12
945 bcs,a %xcc,.+8 !=
946 add c_3,t_2,c_3
947 mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
948 addcc c_12,t_1,t_1
949 bcs,a %xcc,.+8 !=
950 add c_3,t_2,c_3
951 srlx t_1,32,c_12
952 stuw t_1,rp(11) !r[11]=c3;
953 or c_12,c_3,c_12 !=
954
955 mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
956 addcc c_12,t_1,c_12
957 clr c_3
958 bcs,a %xcc,.+8 !=
959 add c_3,t_2,c_3
960 mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
961 addcc c_12,t_1,c_12
962 bcs,a %xcc,.+8 !=
963 add c_3,t_2,c_3
964 mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
965 addcc c_12,t_1,t_1
966 bcs,a %xcc,.+8 !=
967 add c_3,t_2,c_3
968 srlx t_1,32,c_12
969 stuw t_1,rp(12) !r[12]=c1;
970 or c_12,c_3,c_12 !=
971
972 mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
973 addcc c_12,t_1,c_12
974 clr c_3
975 bcs,a %xcc,.+8 !=
976 add c_3,t_2,c_3
977 mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
978 addcc c_12,t_1,t_1
979 bcs,a %xcc,.+8 !=
980 add c_3,t_2,c_3
981 srlx t_1,32,c_12
982 st t_1,rp(13) !r[13]=c2;
983 or c_12,c_3,c_12 !=
984
985 mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2);
986 addcc c_12,t_1,t_1
987 srlx t_1,32,c_12 !=
988 stuw t_1,rp(14) !r[14]=c3;
989 stuw c_12,rp(15) !r[15]=c1;
990
991 ret
992 restore %g0,%g0,%o0 !=
993
994.type bn_mul_comba8,#function
995.size bn_mul_comba8,(.-bn_mul_comba8)
996
997.align 32
998
999.global bn_mul_comba4
1000/*
1001 * void bn_mul_comba4(r,a,b)
1002 * BN_ULONG *r,*a,*b;
1003 */
1004bn_mul_comba4:
1005 save %sp,FRAME_SIZE,%sp
1006 lduw ap(0),a_0
1007 mov 1,t_2
1008 lduw bp(0),b_0
1009 sllx t_2,32,t_2 !=
1010 lduw bp(1),b_1
1011 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
1012 srlx t_1,32,c_12
1013 stuw t_1,rp(0) !=!r[0]=c1;
1014
1015 lduw ap(1),a_1
1016 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
1017 addcc c_12,t_1,c_12
1018 clr c_3 !=
1019 bcs,a %xcc,.+8
1020 add c_3,t_2,c_3
1021 lduw ap(2),a_2
1022 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
1023 addcc c_12,t_1,t_1
1024 bcs,a %xcc,.+8
1025 add c_3,t_2,c_3
1026 srlx t_1,32,c_12 !=
1027 stuw t_1,rp(1) !r[1]=c2;
1028 or c_12,c_3,c_12
1029
1030 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1031 addcc c_12,t_1,c_12 !=
1032 clr c_3
1033 bcs,a %xcc,.+8
1034 add c_3,t_2,c_3
1035 lduw bp(2),b_2 !=
1036 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
1037 addcc c_12,t_1,c_12
1038 bcs,a %xcc,.+8
1039 add c_3,t_2,c_3 !=
1040 lduw bp(3),b_3
1041 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1042 addcc c_12,t_1,t_1
1043 bcs,a %xcc,.+8 !=
1044 add c_3,t_2,c_3
1045 srlx t_1,32,c_12
1046 stuw t_1,rp(2) !r[2]=c3;
1047 or c_12,c_3,c_12 !=
1048
1049 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
1050 addcc c_12,t_1,c_12
1051 clr c_3
1052 bcs,a %xcc,.+8 !=
1053 add c_3,t_2,c_3
1054 mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1055 addcc c_12,t_1,c_12
1056 bcs,a %xcc,.+8 !=
1057 add c_3,t_2,c_3
1058 lduw ap(3),a_3
1059 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1060 addcc c_12,t_1,c_12 !=
1061 bcs,a %xcc,.+8
1062 add c_3,t_2,c_3
1063 mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
1064 addcc c_12,t_1,t_1 !=
1065 bcs,a %xcc,.+8
1066 add c_3,t_2,c_3
1067 srlx t_1,32,c_12
1068 stuw t_1,rp(3) !=!r[3]=c1;
1069 or c_12,c_3,c_12
1070
1071 mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1072 addcc c_12,t_1,c_12
1073 clr c_3 !=
1074 bcs,a %xcc,.+8
1075 add c_3,t_2,c_3
1076 mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1077 addcc c_12,t_1,c_12 !=
1078 bcs,a %xcc,.+8
1079 add c_3,t_2,c_3
1080 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
1081 addcc c_12,t_1,t_1 !=
1082 bcs,a %xcc,.+8
1083 add c_3,t_2,c_3
1084 srlx t_1,32,c_12
1085 stuw t_1,rp(4) !=!r[4]=c2;
1086 or c_12,c_3,c_12
1087
1088 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1089 addcc c_12,t_1,c_12
1090 clr c_3 !=
1091 bcs,a %xcc,.+8
1092 add c_3,t_2,c_3
1093 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1094 addcc c_12,t_1,t_1 !=
1095 bcs,a %xcc,.+8
1096 add c_3,t_2,c_3
1097 srlx t_1,32,c_12
1098 stuw t_1,rp(5) !=!r[5]=c3;
1099 or c_12,c_3,c_12
1100
1101 mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1102 addcc c_12,t_1,t_1
1103 srlx t_1,32,c_12 !=
1104 stuw t_1,rp(6) !r[6]=c1;
1105 stuw c_12,rp(7) !r[7]=c2;
1106
1107 ret
1108 restore %g0,%g0,%o0
1109
1110.type bn_mul_comba4,#function
1111.size bn_mul_comba4,(.-bn_mul_comba4)
1112
1113.align 32
1114
1115.global bn_sqr_comba8
1116bn_sqr_comba8:
1117 save %sp,FRAME_SIZE,%sp
1118 mov 1,t_2
1119 lduw ap(0),a_0
1120 sllx t_2,32,t_2
1121 lduw ap(1),a_1
1122 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1123 srlx t_1,32,c_12
1124 stuw t_1,rp(0) !r[0]=c1;
1125
1126 lduw ap(2),a_2
1127 mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1128 addcc c_12,t_1,c_12
1129 clr c_3
1130 bcs,a %xcc,.+8
1131 add c_3,t_2,c_3
1132 addcc c_12,t_1,t_1
1133 bcs,a %xcc,.+8
1134 add c_3,t_2,c_3
1135 srlx t_1,32,c_12
1136 stuw t_1,rp(1) !r[1]=c2;
1137 or c_12,c_3,c_12
1138
1139 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1140 addcc c_12,t_1,c_12
1141 clr c_3
1142 bcs,a %xcc,.+8
1143 add c_3,t_2,c_3
1144 addcc c_12,t_1,c_12
1145 bcs,a %xcc,.+8
1146 add c_3,t_2,c_3
1147 lduw ap(3),a_3
1148 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1149 addcc c_12,t_1,t_1
1150 bcs,a %xcc,.+8
1151 add c_3,t_2,c_3
1152 srlx t_1,32,c_12
1153 stuw t_1,rp(2) !r[2]=c3;
1154 or c_12,c_3,c_12
1155
1156 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1157 addcc c_12,t_1,c_12
1158 clr c_3
1159 bcs,a %xcc,.+8
1160 add c_3,t_2,c_3
1161 addcc c_12,t_1,c_12
1162 bcs,a %xcc,.+8
1163 add c_3,t_2,c_3
1164 lduw ap(4),a_4
1165 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1166 addcc c_12,t_1,c_12
1167 bcs,a %xcc,.+8
1168 add c_3,t_2,c_3
1169 addcc c_12,t_1,t_1
1170 bcs,a %xcc,.+8
1171 add c_3,t_2,c_3
1172 srlx t_1,32,c_12
1173 st t_1,rp(3) !r[3]=c1;
1174 or c_12,c_3,c_12
1175
1176 mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1177 addcc c_12,t_1,c_12
1178 clr c_3
1179 bcs,a %xcc,.+8
1180 add c_3,t_2,c_3
1181 addcc c_12,t_1,c_12
1182 bcs,a %xcc,.+8
1183 add c_3,t_2,c_3
1184 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1185 addcc c_12,t_1,c_12
1186 bcs,a %xcc,.+8
1187 add c_3,t_2,c_3
1188 addcc c_12,t_1,c_12
1189 bcs,a %xcc,.+8
1190 add c_3,t_2,c_3
1191 lduw ap(5),a_5
1192 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1193 addcc c_12,t_1,t_1
1194 bcs,a %xcc,.+8
1195 add c_3,t_2,c_3
1196 srlx t_1,32,c_12
1197 stuw t_1,rp(4) !r[4]=c2;
1198 or c_12,c_3,c_12
1199
1200 mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1201 addcc c_12,t_1,c_12
1202 clr c_3
1203 bcs,a %xcc,.+8
1204 add c_3,t_2,c_3
1205 addcc c_12,t_1,c_12
1206 bcs,a %xcc,.+8
1207 add c_3,t_2,c_3
1208 mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1209 addcc c_12,t_1,c_12
1210 bcs,a %xcc,.+8
1211 add c_3,t_2,c_3
1212 addcc c_12,t_1,c_12
1213 bcs,a %xcc,.+8
1214 add c_3,t_2,c_3
1215 lduw ap(6),a_6
1216 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1217 addcc c_12,t_1,c_12
1218 bcs,a %xcc,.+8
1219 add c_3,t_2,c_3
1220 addcc c_12,t_1,t_1
1221 bcs,a %xcc,.+8
1222 add c_3,t_2,c_3
1223 srlx t_1,32,c_12
1224 stuw t_1,rp(5) !r[5]=c3;
1225 or c_12,c_3,c_12
1226
1227 mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1228 addcc c_12,t_1,c_12
1229 clr c_3
1230 bcs,a %xcc,.+8
1231 add c_3,t_2,c_3
1232 addcc c_12,t_1,c_12
1233 bcs,a %xcc,.+8
1234 add c_3,t_2,c_3
1235 mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1236 addcc c_12,t_1,c_12
1237 bcs,a %xcc,.+8
1238 add c_3,t_2,c_3
1239 addcc c_12,t_1,c_12
1240 bcs,a %xcc,.+8
1241 add c_3,t_2,c_3
1242 mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1243 addcc c_12,t_1,c_12
1244 bcs,a %xcc,.+8
1245 add c_3,t_2,c_3
1246 addcc c_12,t_1,c_12
1247 bcs,a %xcc,.+8
1248 add c_3,t_2,c_3
1249 lduw ap(7),a_7
1250 mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1251 addcc c_12,t_1,t_1
1252 bcs,a %xcc,.+8
1253 add c_3,t_2,c_3
1254 srlx t_1,32,c_12
1255 stuw t_1,rp(6) !r[6]=c1;
1256 or c_12,c_3,c_12
1257
1258 mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1259 addcc c_12,t_1,c_12
1260 clr c_3
1261 bcs,a %xcc,.+8
1262 add c_3,t_2,c_3
1263 addcc c_12,t_1,c_12
1264 bcs,a %xcc,.+8
1265 add c_3,t_2,c_3
1266 mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1267 addcc c_12,t_1,c_12
1268 bcs,a %xcc,.+8
1269 add c_3,t_2,c_3
1270 addcc c_12,t_1,c_12
1271 bcs,a %xcc,.+8
1272 add c_3,t_2,c_3
1273 mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1274 addcc c_12,t_1,c_12
1275 bcs,a %xcc,.+8
1276 add c_3,t_2,c_3
1277 addcc c_12,t_1,c_12
1278 bcs,a %xcc,.+8
1279 add c_3,t_2,c_3
1280 mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1281 addcc c_12,t_1,c_12
1282 bcs,a %xcc,.+8
1283 add c_3,t_2,c_3
1284 addcc c_12,t_1,t_1
1285 bcs,a %xcc,.+8
1286 add c_3,t_2,c_3
1287 srlx t_1,32,c_12
1288 stuw t_1,rp(7) !r[7]=c2;
1289 or c_12,c_3,c_12
1290
1291 mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1292 addcc c_12,t_1,c_12
1293 clr c_3
1294 bcs,a %xcc,.+8
1295 add c_3,t_2,c_3
1296 addcc c_12,t_1,c_12
1297 bcs,a %xcc,.+8
1298 add c_3,t_2,c_3
1299 mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1300 addcc c_12,t_1,c_12
1301 bcs,a %xcc,.+8
1302 add c_3,t_2,c_3
1303 addcc c_12,t_1,c_12
1304 bcs,a %xcc,.+8
1305 add c_3,t_2,c_3
1306 mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1307 addcc c_12,t_1,c_12
1308 bcs,a %xcc,.+8
1309 add c_3,t_2,c_3
1310 addcc c_12,t_1,c_12
1311 bcs,a %xcc,.+8
1312 add c_3,t_2,c_3
1313 mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1314 addcc c_12,t_1,t_1
1315 bcs,a %xcc,.+8
1316 add c_3,t_2,c_3
1317 srlx t_1,32,c_12
1318 stuw t_1,rp(8) !r[8]=c3;
1319 or c_12,c_3,c_12
1320
1321 mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1322 addcc c_12,t_1,c_12
1323 clr c_3
1324 bcs,a %xcc,.+8
1325 add c_3,t_2,c_3
1326 addcc c_12,t_1,c_12
1327 bcs,a %xcc,.+8
1328 add c_3,t_2,c_3
1329 mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1330 addcc c_12,t_1,c_12
1331 bcs,a %xcc,.+8
1332 add c_3,t_2,c_3
1333 addcc c_12,t_1,c_12
1334 bcs,a %xcc,.+8
1335 add c_3,t_2,c_3
1336 mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1337 addcc c_12,t_1,c_12
1338 bcs,a %xcc,.+8
1339 add c_3,t_2,c_3
1340 addcc c_12,t_1,t_1
1341 bcs,a %xcc,.+8
1342 add c_3,t_2,c_3
1343 srlx t_1,32,c_12
1344 stuw t_1,rp(9) !r[9]=c1;
1345 or c_12,c_3,c_12
1346
1347 mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1348 addcc c_12,t_1,c_12
1349 clr c_3
1350 bcs,a %xcc,.+8
1351 add c_3,t_2,c_3
1352 addcc c_12,t_1,c_12
1353 bcs,a %xcc,.+8
1354 add c_3,t_2,c_3
1355 mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1356 addcc c_12,t_1,c_12
1357 bcs,a %xcc,.+8
1358 add c_3,t_2,c_3
1359 addcc c_12,t_1,c_12
1360 bcs,a %xcc,.+8
1361 add c_3,t_2,c_3
1362 mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1363 addcc c_12,t_1,t_1
1364 bcs,a %xcc,.+8
1365 add c_3,t_2,c_3
1366 srlx t_1,32,c_12
1367 stuw t_1,rp(10) !r[10]=c2;
1368 or c_12,c_3,c_12
1369
1370 mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2);
1371 addcc c_12,t_1,c_12
1372 clr c_3
1373 bcs,a %xcc,.+8
1374 add c_3,t_2,c_3
1375 addcc c_12,t_1,c_12
1376 bcs,a %xcc,.+8
1377 add c_3,t_2,c_3
1378 mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2);
1379 addcc c_12,t_1,c_12
1380 bcs,a %xcc,.+8
1381 add c_3,t_2,c_3
1382 addcc c_12,t_1,t_1
1383 bcs,a %xcc,.+8
1384 add c_3,t_2,c_3
1385 srlx t_1,32,c_12
1386 stuw t_1,rp(11) !r[11]=c3;
1387 or c_12,c_3,c_12
1388
1389 mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1390 addcc c_12,t_1,c_12
1391 clr c_3
1392 bcs,a %xcc,.+8
1393 add c_3,t_2,c_3
1394 addcc c_12,t_1,c_12
1395 bcs,a %xcc,.+8
1396 add c_3,t_2,c_3
1397 mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1398 addcc c_12,t_1,t_1
1399 bcs,a %xcc,.+8
1400 add c_3,t_2,c_3
1401 srlx t_1,32,c_12
1402 stuw t_1,rp(12) !r[12]=c1;
1403 or c_12,c_3,c_12
1404
1405 mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1406 addcc c_12,t_1,c_12
1407 clr c_3
1408 bcs,a %xcc,.+8
1409 add c_3,t_2,c_3
1410 addcc c_12,t_1,t_1
1411 bcs,a %xcc,.+8
1412 add c_3,t_2,c_3
1413 srlx t_1,32,c_12
1414 stuw t_1,rp(13) !r[13]=c2;
1415 or c_12,c_3,c_12
1416
1417 mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1418 addcc c_12,t_1,t_1
1419 srlx t_1,32,c_12
1420 stuw t_1,rp(14) !r[14]=c3;
1421 stuw c_12,rp(15) !r[15]=c1;
1422
1423 ret
1424 restore %g0,%g0,%o0
1425
1426.type bn_sqr_comba8,#function
1427.size bn_sqr_comba8,(.-bn_sqr_comba8)
1428
1429.align 32
1430
1431.global bn_sqr_comba4
1432/*
1433 * void bn_sqr_comba4(r,a)
1434 * BN_ULONG *r,*a;
1435 */
1436bn_sqr_comba4:
1437 save %sp,FRAME_SIZE,%sp
1438 mov 1,t_2
1439 lduw ap(0),a_0
1440 sllx t_2,32,t_2
1441 lduw ap(1),a_1
1442 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1443 srlx t_1,32,c_12
1444 stuw t_1,rp(0) !r[0]=c1;
1445
1446 lduw ap(2),a_2
1447 mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1);
1448 addcc c_12,t_1,c_12
1449 clr c_3
1450 bcs,a %xcc,.+8
1451 add c_3,t_2,c_3
1452 addcc c_12,t_1,t_1
1453 bcs,a %xcc,.+8
1454 add c_3,t_2,c_3
1455 srlx t_1,32,c_12
1456 stuw t_1,rp(1) !r[1]=c2;
1457 or c_12,c_3,c_12
1458
1459 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1460 addcc c_12,t_1,c_12
1461 clr c_3
1462 bcs,a %xcc,.+8
1463 add c_3,t_2,c_3
1464 addcc c_12,t_1,c_12
1465 bcs,a %xcc,.+8
1466 add c_3,t_2,c_3
1467 lduw ap(3),a_3
1468 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1469 addcc c_12,t_1,t_1
1470 bcs,a %xcc,.+8
1471 add c_3,t_2,c_3
1472 srlx t_1,32,c_12
1473 stuw t_1,rp(2) !r[2]=c3;
1474 or c_12,c_3,c_12
1475
1476 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1477 addcc c_12,t_1,c_12
1478 clr c_3
1479 bcs,a %xcc,.+8
1480 add c_3,t_2,c_3
1481 addcc c_12,t_1,c_12
1482 bcs,a %xcc,.+8
1483 add c_3,t_2,c_3
1484 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1485 addcc c_12,t_1,c_12
1486 bcs,a %xcc,.+8
1487 add c_3,t_2,c_3
1488 addcc c_12,t_1,t_1
1489 bcs,a %xcc,.+8
1490 add c_3,t_2,c_3
1491 srlx t_1,32,c_12
1492 stuw t_1,rp(3) !r[3]=c1;
1493 or c_12,c_3,c_12
1494
1495 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1496 addcc c_12,t_1,c_12
1497 clr c_3
1498 bcs,a %xcc,.+8
1499 add c_3,t_2,c_3
1500 addcc c_12,t_1,c_12
1501 bcs,a %xcc,.+8
1502 add c_3,t_2,c_3
1503 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1504 addcc c_12,t_1,t_1
1505 bcs,a %xcc,.+8
1506 add c_3,t_2,c_3
1507 srlx t_1,32,c_12
1508 stuw t_1,rp(4) !r[4]=c2;
1509 or c_12,c_3,c_12
1510
1511 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1512 addcc c_12,t_1,c_12
1513 clr c_3
1514 bcs,a %xcc,.+8
1515 add c_3,t_2,c_3
1516 addcc c_12,t_1,t_1
1517 bcs,a %xcc,.+8
1518 add c_3,t_2,c_3
1519 srlx t_1,32,c_12
1520 stuw t_1,rp(5) !r[5]=c3;
1521 or c_12,c_3,c_12
1522
1523 mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1524 addcc c_12,t_1,t_1
1525 srlx t_1,32,c_12
1526 stuw t_1,rp(6) !r[6]=c1;
1527 stuw c_12,rp(7) !r[7]=c2;
1528
1529 ret
1530 restore %g0,%g0,%o0
1531
1532.type bn_sqr_comba4,#function
1533.size bn_sqr_comba4,(.-bn_sqr_comba4)
1534
1535.align 32
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar
new file mode 100644
index 0000000000..ac9d57d7b0
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/vms.mar
@@ -0,0 +1,6695 @@
1 .title vax_bn_mul_add_word unsigned multiply & add, 32*32+32+32=>64
2;
3; w.j.m. 15-jan-1999
4;
5; it's magic ...
6;
7; ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
8; ULONG c = 0;
9; int i;
10; for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
11; return c;
12; }
13
14r=4 ;(AP)
15a=8 ;(AP)
16n=12 ;(AP) n by value (input)
17w=16 ;(AP) w by value (input)
18
19
20 .psect code,nowrt
21
22.entry bn_mul_add_words,^m<r2,r3,r4,r5,r6>
23
24 moval @r(ap),r2
25 moval @a(ap),r3
26 movl n(ap),r4 ; assumed >0 by C code
27 movl w(ap),r5
28 clrl r6 ; c
29
300$:
31 emul r5,(r3),(r2),r0 ; w, a[], r[] considered signed
32
33 ; fixup for "negative" r[]
34 tstl (r2)
35 bgeq 10$
36 incl r1
3710$:
38
39 ; add in c
40 addl2 r6,r0
41 adwc #0,r1
42
43 ; combined fixup for "negative" w, a[]
44 tstl r5
45 bgeq 20$
46 addl2 (r3),r1
4720$:
48 tstl (r3)
49 bgeq 30$
50 addl2 r5,r1
5130$:
52
53 movl r0,(r2)+ ; store lo result in r[] & advance
54 addl #4,r3 ; advance a[]
55 movl r1,r6 ; store hi result => c
56
57 sobgtr r4,0$
58
59 movl r6,r0 ; return c
60 ret
61
62 .title vax_bn_mul_word unsigned multiply & add, 32*32+32=>64
63;
64; w.j.m. 15-jan-1999
65;
66; it's magic ...
67;
68; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
69; ULONG c = 0;
70; int i;
71; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
72; return(c);
73; }
74
75r=4 ;(AP)
76a=8 ;(AP)
77n=12 ;(AP) n by value (input)
78w=16 ;(AP) w by value (input)
79
80
81 .psect code,nowrt
82
83.entry bn_mul_words,^m<r2,r3,r4,r5,r6>
84
85 moval @r(ap),r2 ; r2 -> r[]
86 moval @a(ap),r3 ; r3 -> a[]
87 movl n(ap),r4 ; r4 = loop count (assumed >0 by C code)
88 movl w(ap),r5 ; r5 = w
89 clrl r6 ; r6 = c
90
910$:
92 ; <r1,r0> := w * a[] + c
93 emul r5,(r3),r6,r0 ; w, a[], c considered signed
94
95 ; fixup for "negative" c
96 tstl r6 ; c
97 bgeq 10$
98 incl r1
9910$:
100
101 ; combined fixup for "negative" w, a[]
102 tstl r5 ; w
103 bgeq 20$
104 addl2 (r3),r1 ; a[]
10520$:
106 tstl (r3) ; a[]
107 bgeq 30$
108 addl2 r5,r1 ; w
10930$:
110
111 movl r0,(r2)+ ; store lo result in r[] & advance
112 addl #4,r3 ; advance a[]
113 movl r1,r6 ; store hi result => c
114
115 sobgtr r4,0$
116
117 movl r6,r0 ; return c
118 ret
119
120 .title vax_bn_sqr_words unsigned square, 32*32=>64
121;
122; w.j.m. 15-jan-1999
123;
124; it's magic ...
125;
126; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
127; int i;
128; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
129; }
130
131r=4 ;(AP)
132a=8 ;(AP)
133n=12 ;(AP) n by value (input)
134
135
136 .psect code,nowrt
137
138.entry bn_sqr_words,^m<r2,r3,r4,r5>
139
140 moval @r(ap),r2 ; r2 -> r[]
141 moval @a(ap),r3 ; r3 -> a[]
142 movl n(ap),r4 ; r4 = n (assumed >0 by C code)
143
1440$:
145 movl (r3)+,r5 ; r5 = a[] & advance
146
147 ; <r1,r0> := a[] * a[]
148 emul r5,r5,#0,r0 ; a[] considered signed
149
150 ; fixup for "negative" a[]
151 tstl r5 ; a[]
152 bgeq 30$
153 addl2 r5,r1 ; a[]
154 addl2 r5,r1 ; a[]
15530$:
156
157 movl r0,(r2)+ ; store lo result in r[] & advance
158 movl r1,(r2)+ ; store hi result in r[] & advance
159
160 sobgtr r4,0$
161
162 movl #1,r0 ; return SS$_NORMAL
163 ret
164
165 .title (generated)
166
167 .psect code,nowrt
168
169.entry BN_DIV_WORDS,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
170 subl2 #4,sp
171
172 clrl r9
173 movl #2,r8
174
175 tstl 12(ap)
176 bneq noname.2
177 mnegl #1,r10
178 brw noname.3
179 tstl r0
180 nop
181noname.2:
182
183 pushl 12(ap)
184 calls #1,BN_NUM_BITS_WORD
185 movl r0,r7
186
187 cmpl r7,#32
188 beql noname.4
189 ashl r7,#1,r2
190 cmpl 4(ap),r2
191 blequ noname.4
192
193 pushl r7
194 calls #1,BN_DIV_WORDS_ABORT
195noname.4:
196
197 subl3 r7,#32,r7
198
199 movl 12(ap),r2
200 cmpl 4(ap),r2
201 blssu noname.5
202 subl2 r2,4(ap)
203noname.5:
204
205 tstl r7
206 beql noname.6
207
208 ashl r7,r2,12(ap)
209
210 ashl r7,4(ap),r4
211 subl3 r7,#32,r3
212 subl3 r3,#32,r2
213 extzv r3,r2,8(ap),r2
214 bisl3 r4,r2,4(ap)
215
216 ashl r7,8(ap),8(ap)
217noname.6:
218
219 bicl3 #65535,12(ap),r2
220 extzv #16,#16,r2,r5
221
222 bicl3 #-65536,12(ap),r6
223
224noname.7:
225
226 moval 4(ap),r2
227 movzwl 2(r2),r0
228 cmpl r0,r5
229 bneq noname.8
230
231 movzwl #65535,r4
232 brb noname.9
233noname.8:
234
235 clrl r1
236 movl (r2),r0
237 movl r5,r2
238 bgeq vcg.1
239 cmpl r2,r0
240 bgtru vcg.2
241 incl r1
242 brb vcg.2
243 nop
244vcg.1:
245 ediv r2,r0,r1,r0
246vcg.2:
247 movl r1,r4
248noname.9:
249
250noname.10:
251
252 mull3 r5,r4,r0
253 subl3 r0,4(ap),r3
254
255 bicl3 #65535,r3,r0
256 bneq noname.13
257 mull3 r6,r4,r2
258 ashl #16,r3,r1
259 bicl3 #65535,8(ap),r0
260 extzv #16,#16,r0,r0
261 addl2 r0,r1
262 cmpl r2,r1
263 bgtru noname.12
264noname.11:
265
266 brb noname.13
267 nop
268noname.12:
269
270 decl r4
271 brb noname.10
272noname.13:
273
274 mull3 r5,r4,r1
275
276 mull3 r6,r4,r0
277
278 extzv #16,#16,r0,r3
279
280 ashl #16,r0,r2
281 bicl3 #65535,r2,r0
282
283 addl2 r3,r1
284
285 moval 8(ap),r3
286 cmpl (r3),r0
287 bgequ noname.15
288 incl r1
289noname.15:
290
291 subl2 r0,(r3)
292
293 cmpl 4(ap),r1
294 bgequ noname.16
295
296 addl2 12(ap),4(ap)
297
298 decl r4
299noname.16:
300
301 subl2 r1,4(ap)
302
303 decl r8
304 beql noname.18
305noname.17:
306
307 ashl #16,r4,r9
308
309 ashl #16,4(ap),r2
310 movzwl 2(r3),r0
311 bisl2 r0,r2
312 bicl3 #0,r2,4(ap)
313
314 bicl3 #-65536,(r3),r0
315 ashl #16,r0,(r3)
316 brw noname.7
317 nop
318noname.18:
319
320 bisl2 r4,r9
321
322 movl r9,r10
323
324noname.3:
325 movl r10,r0
326 ret
327 tstl r0
328
329
330 .psect code,nowrt
331
332.entry BN_ADD_WORDS,^m<r2,r3,r4,r5,r6,r7>
333
334 tstl 16(ap)
335 bgtr noname.21
336 clrl r7
337 brw noname.22
338noname.21:
339
340 clrl r4
341
342 tstl r0
343noname.23:
344
345 movl 8(ap),r6
346 addl3 r4,(r6),r2
347
348 bicl2 #0,r2
349
350 clrl r0
351 cmpl r2,r4
352 bgequ vcg.3
353 incl r0
354vcg.3:
355 movl r0,r4
356
357 movl 12(ap),r5
358 addl3 (r5),r2,r1
359 bicl2 #0,r1
360
361 clrl r0
362 cmpl r1,r2
363 bgequ vcg.4
364 incl r0
365vcg.4:
366 addl2 r0,r4
367
368 movl 4(ap),r3
369 movl r1,(r3)
370
371 decl 16(ap)
372 bgtr gen.1
373 brw noname.25
374gen.1:
375noname.24:
376
377 addl3 r4,4(r6),r2
378
379 bicl2 #0,r2
380
381 clrl r0
382 cmpl r2,r4
383 bgequ vcg.5
384 incl r0
385vcg.5:
386 movl r0,r4
387
388 addl3 4(r5),r2,r1
389 bicl2 #0,r1
390
391 clrl r0
392 cmpl r1,r2
393 bgequ vcg.6
394 incl r0
395vcg.6:
396 addl2 r0,r4
397
398 movl r1,4(r3)
399
400 decl 16(ap)
401 bleq noname.25
402noname.26:
403
404 addl3 r4,8(r6),r2
405
406 bicl2 #0,r2
407
408 clrl r0
409 cmpl r2,r4
410 bgequ vcg.7
411 incl r0
412vcg.7:
413 movl r0,r4
414
415 addl3 8(r5),r2,r1
416 bicl2 #0,r1
417
418 clrl r0
419 cmpl r1,r2
420 bgequ vcg.8
421 incl r0
422vcg.8:
423 addl2 r0,r4
424
425 movl r1,8(r3)
426
427 decl 16(ap)
428 bleq noname.25
429noname.27:
430
431 addl3 r4,12(r6),r2
432
433 bicl2 #0,r2
434
435 clrl r0
436 cmpl r2,r4
437 bgequ vcg.9
438 incl r0
439vcg.9:
440 movl r0,r4
441
442 addl3 12(r5),r2,r1
443 bicl2 #0,r1
444
445 clrl r0
446 cmpl r1,r2
447 bgequ vcg.10
448 incl r0
449vcg.10:
450 addl2 r0,r4
451
452 movl r1,12(r3)
453
454 decl 16(ap)
455 bleq noname.25
456noname.28:
457
458 addl3 #16,r6,8(ap)
459
460 addl3 #16,r5,12(ap)
461
462 addl3 #16,r3,4(ap)
463 brw noname.23
464 tstl r0
465noname.25:
466
467 movl r4,r7
468
469noname.22:
470 movl r7,r0
471 ret
472 nop
473
474
475
476;r=4 ;(AP)
477;a=8 ;(AP)
478;b=12 ;(AP)
479;n=16 ;(AP) n by value (input)
480
481 .psect code,nowrt
482
483.entry BN_SUB_WORDS,^m<r2,r3,r4,r5,r6,r7>
484
485 clrl r6
486
487 tstl 16(ap)
488 bgtr noname.31
489 clrl r7
490 brw noname.32
491 tstl r0
492noname.31:
493
494noname.33:
495
496 movl 8(ap),r5
497 movl (r5),r1
498 movl 12(ap),r4
499 movl (r4),r2
500
501 movl 4(ap),r3
502 subl3 r2,r1,r0
503 subl2 r6,r0
504 bicl3 #0,r0,(r3)
505
506 cmpl r1,r2
507 beql noname.34
508 clrl r0
509 cmpl r1,r2
510 bgequ vcg.11
511 incl r0
512vcg.11:
513 movl r0,r6
514noname.34:
515
516 decl 16(ap)
517 bgtr gen.2
518 brw noname.36
519gen.2:
520noname.35:
521
522 movl 4(r5),r2
523 movl 4(r4),r1
524
525 subl3 r1,r2,r0
526 subl2 r6,r0
527 bicl3 #0,r0,4(r3)
528
529 cmpl r2,r1
530 beql noname.37
531 clrl r0
532 cmpl r2,r1
533 bgequ vcg.12
534 incl r0
535vcg.12:
536 movl r0,r6
537noname.37:
538
539 decl 16(ap)
540 bleq noname.36
541noname.38:
542
543 movl 8(r5),r1
544 movl 8(r4),r2
545
546 subl3 r2,r1,r0
547 subl2 r6,r0
548 bicl3 #0,r0,8(r3)
549
550 cmpl r1,r2
551 beql noname.39
552 clrl r0
553 cmpl r1,r2
554 bgequ vcg.13
555 incl r0
556vcg.13:
557 movl r0,r6
558noname.39:
559
560 decl 16(ap)
561 bleq noname.36
562noname.40:
563
564 movl 12(r5),r1
565 movl 12(r4),r2
566
567 subl3 r2,r1,r0
568 subl2 r6,r0
569 bicl3 #0,r0,12(r3)
570
571 cmpl r1,r2
572 beql noname.41
573 clrl r0
574 cmpl r1,r2
575 bgequ vcg.14
576 incl r0
577vcg.14:
578 movl r0,r6
579noname.41:
580
581 decl 16(ap)
582 bleq noname.36
583noname.42:
584
585 addl3 #16,r5,8(ap)
586
587 addl3 #16,r4,12(ap)
588
589 addl3 #16,r3,4(ap)
590 brw noname.33
591 tstl r0
592noname.36:
593
594 movl r6,r7
595
596noname.32:
597 movl r7,r0
598 ret
599 nop
600
601
602
603;r=4 ;(AP)
604;a=8 ;(AP)
605;b=12 ;(AP)
606;n=16 ;(AP) n by value (input)
607
608 .psect code,nowrt
609
610.entry BN_MUL_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
611 movab -924(sp),sp
612 clrq r8
613
614 clrl r10
615
616 movl 8(ap),r6
617 movzwl 2(r6),r3
618 movl 12(ap),r7
619 bicl3 #-65536,(r7),r2
620 movzwl 2(r7),r0
621 bicl2 #-65536,r0
622 bicl3 #-65536,(r6),-12(fp)
623 bicl3 #-65536,r3,-16(fp)
624 mull3 r0,-12(fp),-4(fp)
625 mull2 r2,-12(fp)
626 mull3 r2,-16(fp),-8(fp)
627 mull2 r0,-16(fp)
628 addl3 -4(fp),-8(fp),r0
629 bicl3 #0,r0,-4(fp)
630 cmpl -4(fp),-8(fp)
631 bgequ noname.45
632 addl2 #65536,-16(fp)
633noname.45:
634 movzwl -2(fp),r0
635 bicl2 #-65536,r0
636 addl2 r0,-16(fp)
637 bicl3 #-65536,-4(fp),r0
638 ashl #16,r0,-8(fp)
639 addl3 -8(fp),-12(fp),r0
640 bicl3 #0,r0,-12(fp)
641 cmpl -12(fp),-8(fp)
642 bgequ noname.46
643 incl -16(fp)
644noname.46:
645 movl -12(fp),r1
646 movl -16(fp),r2
647 addl2 r1,r9
648 bicl2 #0,r9
649 cmpl r9,r1
650 bgequ noname.47
651 incl r2
652noname.47:
653 addl2 r2,r8
654 bicl2 #0,r8
655 cmpl r8,r2
656 bgequ noname.48
657 incl r10
658noname.48:
659
660 movl 4(ap),r11
661 movl r9,(r11)
662
663 clrl r9
664
665 movzwl 2(r6),r2
666 bicl3 #-65536,4(r7),r3
667 movzwl 6(r7),r0
668 bicl2 #-65536,r0
669 bicl3 #-65536,(r6),-28(fp)
670 bicl3 #-65536,r2,-32(fp)
671 mull3 r0,-28(fp),-20(fp)
672 mull2 r3,-28(fp)
673 mull3 r3,-32(fp),-24(fp)
674 mull2 r0,-32(fp)
675 addl3 -20(fp),-24(fp),r0
676 bicl3 #0,r0,-20(fp)
677 cmpl -20(fp),-24(fp)
678 bgequ noname.49
679 addl2 #65536,-32(fp)
680noname.49:
681 movzwl -18(fp),r0
682 bicl2 #-65536,r0
683 addl2 r0,-32(fp)
684 bicl3 #-65536,-20(fp),r0
685 ashl #16,r0,-24(fp)
686 addl3 -24(fp),-28(fp),r0
687 bicl3 #0,r0,-28(fp)
688 cmpl -28(fp),-24(fp)
689 bgequ noname.50
690 incl -32(fp)
691noname.50:
692 movl -28(fp),r1
693 movl -32(fp),r2
694 addl2 r1,r8
695 bicl2 #0,r8
696 cmpl r8,r1
697 bgequ noname.51
698 incl r2
699noname.51:
700 addl2 r2,r10
701 bicl2 #0,r10
702 cmpl r10,r2
703 bgequ noname.52
704 incl r9
705noname.52:
706
707 movzwl 6(r6),r2
708 bicl3 #-65536,(r7),r3
709 movzwl 2(r7),r0
710 bicl2 #-65536,r0
711 bicl3 #-65536,4(r6),-44(fp)
712 bicl3 #-65536,r2,-48(fp)
713 mull3 r0,-44(fp),-36(fp)
714 mull2 r3,-44(fp)
715 mull3 r3,-48(fp),-40(fp)
716 mull2 r0,-48(fp)
717 addl3 -36(fp),-40(fp),r0
718 bicl3 #0,r0,-36(fp)
719 cmpl -36(fp),-40(fp)
720 bgequ noname.53
721 addl2 #65536,-48(fp)
722noname.53:
723 movzwl -34(fp),r0
724 bicl2 #-65536,r0
725 addl2 r0,-48(fp)
726 bicl3 #-65536,-36(fp),r0
727 ashl #16,r0,-40(fp)
728 addl3 -40(fp),-44(fp),r0
729 bicl3 #0,r0,-44(fp)
730 cmpl -44(fp),-40(fp)
731 bgequ noname.54
732 incl -48(fp)
733noname.54:
734 movl -44(fp),r1
735 movl -48(fp),r2
736 addl2 r1,r8
737 bicl2 #0,r8
738 cmpl r8,r1
739 bgequ noname.55
740 incl r2
741noname.55:
742 addl2 r2,r10
743 bicl2 #0,r10
744 cmpl r10,r2
745 bgequ noname.56
746 incl r9
747noname.56:
748
749 movl r8,4(r11)
750
751 clrl r8
752
753 movzwl 10(r6),r2
754 bicl3 #-65536,(r7),r3
755 movzwl 2(r7),r0
756 bicl2 #-65536,r0
757 bicl3 #-65536,8(r6),-60(fp)
758 bicl3 #-65536,r2,-64(fp)
759 mull3 r0,-60(fp),-52(fp)
760 mull2 r3,-60(fp)
761 mull3 r3,-64(fp),-56(fp)
762 mull2 r0,-64(fp)
763 addl3 -52(fp),-56(fp),r0
764 bicl3 #0,r0,-52(fp)
765 cmpl -52(fp),-56(fp)
766 bgequ noname.57
767 addl2 #65536,-64(fp)
768noname.57:
769 movzwl -50(fp),r0
770 bicl2 #-65536,r0
771 addl2 r0,-64(fp)
772 bicl3 #-65536,-52(fp),r0
773 ashl #16,r0,-56(fp)
774 addl3 -56(fp),-60(fp),r0
775 bicl3 #0,r0,-60(fp)
776 cmpl -60(fp),-56(fp)
777 bgequ noname.58
778 incl -64(fp)
779noname.58:
780 movl -60(fp),r1
781 movl -64(fp),r2
782 addl2 r1,r10
783 bicl2 #0,r10
784 cmpl r10,r1
785 bgequ noname.59
786 incl r2
787noname.59:
788 addl2 r2,r9
789 bicl2 #0,r9
790 cmpl r9,r2
791 bgequ noname.60
792 incl r8
793noname.60:
794
795 movzwl 6(r6),r2
796 bicl3 #-65536,4(r7),r3
797 movzwl 6(r7),r0
798 bicl2 #-65536,r0
799 bicl3 #-65536,4(r6),-76(fp)
800 bicl3 #-65536,r2,-80(fp)
801 mull3 r0,-76(fp),-68(fp)
802 mull2 r3,-76(fp)
803 mull3 r3,-80(fp),-72(fp)
804 mull2 r0,-80(fp)
805 addl3 -68(fp),-72(fp),r0
806 bicl3 #0,r0,-68(fp)
807 cmpl -68(fp),-72(fp)
808 bgequ noname.61
809 addl2 #65536,-80(fp)
810noname.61:
811 movzwl -66(fp),r0
812 bicl2 #-65536,r0
813 addl2 r0,-80(fp)
814 bicl3 #-65536,-68(fp),r0
815 ashl #16,r0,-72(fp)
816 addl3 -72(fp),-76(fp),r0
817 bicl3 #0,r0,-76(fp)
818 cmpl -76(fp),-72(fp)
819 bgequ noname.62
820 incl -80(fp)
821noname.62:
822 movl -76(fp),r1
823 movl -80(fp),r2
824 addl2 r1,r10
825 bicl2 #0,r10
826 cmpl r10,r1
827 bgequ noname.63
828 incl r2
829noname.63:
830 addl2 r2,r9
831 bicl2 #0,r9
832 cmpl r9,r2
833 bgequ noname.64
834 incl r8
835noname.64:
836
837 movzwl 2(r6),r2
838 bicl3 #-65536,8(r7),r3
839 movzwl 10(r7),r0
840 bicl2 #-65536,r0
841 bicl3 #-65536,(r6),-92(fp)
842 bicl3 #-65536,r2,-96(fp)
843 mull3 r0,-92(fp),-84(fp)
844 mull2 r3,-92(fp)
845 mull3 r3,-96(fp),-88(fp)
846 mull2 r0,-96(fp)
847 addl3 -84(fp),-88(fp),r0
848 bicl3 #0,r0,-84(fp)
849 cmpl -84(fp),-88(fp)
850 bgequ noname.65
851 addl2 #65536,-96(fp)
852noname.65:
853 movzwl -82(fp),r0
854 bicl2 #-65536,r0
855 addl2 r0,-96(fp)
856 bicl3 #-65536,-84(fp),r0
857 ashl #16,r0,-88(fp)
858 addl3 -88(fp),-92(fp),r0
859 bicl3 #0,r0,-92(fp)
860 cmpl -92(fp),-88(fp)
861 bgequ noname.66
862 incl -96(fp)
863noname.66:
864 movl -92(fp),r1
865 movl -96(fp),r2
866 addl2 r1,r10
867 bicl2 #0,r10
868 cmpl r10,r1
869 bgequ noname.67
870 incl r2
871noname.67:
872 addl2 r2,r9
873 bicl2 #0,r9
874 cmpl r9,r2
875 bgequ noname.68
876 incl r8
877noname.68:
878
879 movl r10,8(r11)
880
881 clrl r10
882
883 movzwl 2(r6),r2
884 bicl3 #-65536,12(r7),r3
885 movzwl 14(r7),r0
886 bicl2 #-65536,r0
887 bicl3 #-65536,(r6),-108(fp)
888 bicl3 #-65536,r2,-112(fp)
889 mull3 r0,-108(fp),-100(fp)
890 mull2 r3,-108(fp)
891 mull3 r3,-112(fp),-104(fp)
892 mull2 r0,-112(fp)
893 addl3 -100(fp),-104(fp),r0
894 bicl3 #0,r0,-100(fp)
895 cmpl -100(fp),-104(fp)
896 bgequ noname.69
897 addl2 #65536,-112(fp)
898noname.69:
899 movzwl -98(fp),r0
900 bicl2 #-65536,r0
901 addl2 r0,-112(fp)
902 bicl3 #-65536,-100(fp),r0
903 ashl #16,r0,-104(fp)
904 addl3 -104(fp),-108(fp),r0
905 bicl3 #0,r0,-108(fp)
906 cmpl -108(fp),-104(fp)
907 bgequ noname.70
908 incl -112(fp)
909noname.70:
910 movl -108(fp),r1
911 movl -112(fp),r2
912 addl2 r1,r9
913 bicl2 #0,r9
914 cmpl r9,r1
915 bgequ noname.71
916 incl r2
917noname.71:
918 addl2 r2,r8
919 bicl2 #0,r8
920 cmpl r8,r2
921 bgequ noname.72
922 incl r10
923noname.72:
924
925 movzwl 6(r6),r2
926 bicl3 #-65536,8(r7),r3
927 movzwl 10(r7),r0
928 bicl2 #-65536,r0
929 bicl3 #-65536,4(r6),-124(fp)
930 bicl3 #-65536,r2,-128(fp)
931 mull3 r0,-124(fp),-116(fp)
932 mull2 r3,-124(fp)
933 mull3 r3,-128(fp),-120(fp)
934 mull2 r0,-128(fp)
935 addl3 -116(fp),-120(fp),r0
936 bicl3 #0,r0,-116(fp)
937 cmpl -116(fp),-120(fp)
938 bgequ noname.73
939 addl2 #65536,-128(fp)
940noname.73:
941 movzwl -114(fp),r0
942 bicl2 #-65536,r0
943 addl2 r0,-128(fp)
944 bicl3 #-65536,-116(fp),r0
945 ashl #16,r0,-120(fp)
946 addl3 -120(fp),-124(fp),r0
947 bicl3 #0,r0,-124(fp)
948 cmpl -124(fp),-120(fp)
949 bgequ noname.74
950 incl -128(fp)
951noname.74:
952 movl -124(fp),r1
953 movl -128(fp),r2
954 addl2 r1,r9
955 bicl2 #0,r9
956 cmpl r9,r1
957 bgequ noname.75
958 incl r2
959noname.75:
960 addl2 r2,r8
961 bicl2 #0,r8
962 cmpl r8,r2
963 bgequ noname.76
964 incl r10
965noname.76:
966
967 movzwl 10(r6),r2
968 bicl3 #-65536,4(r7),r3
969 movzwl 6(r7),r0
970 bicl2 #-65536,r0
971 bicl3 #-65536,8(r6),-140(fp)
972 bicl3 #-65536,r2,-144(fp)
973 mull3 r0,-140(fp),-132(fp)
974 mull2 r3,-140(fp)
975 mull3 r3,-144(fp),-136(fp)
976 mull2 r0,-144(fp)
977 addl3 -132(fp),-136(fp),r0
978 bicl3 #0,r0,-132(fp)
979 cmpl -132(fp),-136(fp)
980 bgequ noname.77
981 addl2 #65536,-144(fp)
982noname.77:
983 movzwl -130(fp),r0
984 bicl2 #-65536,r0
985 addl2 r0,-144(fp)
986 bicl3 #-65536,-132(fp),r0
987 ashl #16,r0,-136(fp)
988 addl3 -136(fp),-140(fp),r0
989 bicl3 #0,r0,-140(fp)
990 cmpl -140(fp),-136(fp)
991 bgequ noname.78
992 incl -144(fp)
993noname.78:
994 movl -140(fp),r1
995 movl -144(fp),r2
996 addl2 r1,r9
997 bicl2 #0,r9
998 cmpl r9,r1
999 bgequ noname.79
1000 incl r2
1001noname.79:
1002 addl2 r2,r8
1003 bicl2 #0,r8
1004 cmpl r8,r2
1005 bgequ noname.80
1006 incl r10
1007noname.80:
1008
1009 movzwl 14(r6),r2
1010 bicl3 #-65536,(r7),r3
1011 movzwl 2(r7),r0
1012 bicl2 #-65536,r0
1013 bicl3 #-65536,12(r6),-156(fp)
1014 bicl3 #-65536,r2,-160(fp)
1015 mull3 r0,-156(fp),-148(fp)
1016 mull2 r3,-156(fp)
1017 mull3 r3,-160(fp),-152(fp)
1018 mull2 r0,-160(fp)
1019 addl3 -148(fp),-152(fp),r0
1020 bicl3 #0,r0,-148(fp)
1021 cmpl -148(fp),-152(fp)
1022 bgequ noname.81
1023 addl2 #65536,-160(fp)
1024noname.81:
1025 movzwl -146(fp),r0
1026 bicl2 #-65536,r0
1027 addl2 r0,-160(fp)
1028 bicl3 #-65536,-148(fp),r0
1029 ashl #16,r0,-152(fp)
1030 addl3 -152(fp),-156(fp),r0
1031 bicl3 #0,r0,-156(fp)
1032 cmpl -156(fp),-152(fp)
1033 bgequ noname.82
1034 incl -160(fp)
1035noname.82:
1036 movl -156(fp),r1
1037 movl -160(fp),r2
1038 addl2 r1,r9
1039 bicl2 #0,r9
1040 cmpl r9,r1
1041 bgequ noname.83
1042 incl r2
1043noname.83:
1044 addl2 r2,r8
1045 bicl2 #0,r8
1046 cmpl r8,r2
1047 bgequ noname.84
1048 incl r10
1049noname.84:
1050
1051 movl r9,12(r11)
1052
1053 clrl r9
1054
1055 movzwl 18(r6),r2
1056 bicl3 #-65536,(r7),r3
1057 movzwl 2(r7),r0
1058 bicl2 #-65536,r0
1059 bicl3 #-65536,16(r6),-172(fp)
1060 bicl3 #-65536,r2,-176(fp)
1061 mull3 r0,-172(fp),-164(fp)
1062 mull2 r3,-172(fp)
1063 mull3 r3,-176(fp),-168(fp)
1064 mull2 r0,-176(fp)
1065 addl3 -164(fp),-168(fp),r0
1066 bicl3 #0,r0,-164(fp)
1067 cmpl -164(fp),-168(fp)
1068 bgequ noname.85
1069 addl2 #65536,-176(fp)
1070noname.85:
1071 movzwl -162(fp),r0
1072 bicl2 #-65536,r0
1073 addl2 r0,-176(fp)
1074 bicl3 #-65536,-164(fp),r0
1075 ashl #16,r0,-168(fp)
1076 addl3 -168(fp),-172(fp),r0
1077 bicl3 #0,r0,-172(fp)
1078 cmpl -172(fp),-168(fp)
1079 bgequ noname.86
1080 incl -176(fp)
1081noname.86:
1082 movl -172(fp),r1
1083 movl -176(fp),r2
1084 addl2 r1,r8
1085 bicl2 #0,r8
1086 cmpl r8,r1
1087 bgequ noname.87
1088 incl r2
1089noname.87:
1090 addl2 r2,r10
1091 bicl2 #0,r10
1092 cmpl r10,r2
1093 bgequ noname.88
1094 incl r9
1095noname.88:
1096
1097 movzwl 14(r6),r2
1098 bicl3 #-65536,4(r7),r3
1099 movzwl 6(r7),r0
1100 bicl2 #-65536,r0
1101 bicl3 #-65536,12(r6),-188(fp)
1102 bicl3 #-65536,r2,-192(fp)
1103 mull3 r0,-188(fp),-180(fp)
1104 mull2 r3,-188(fp)
1105 mull3 r3,-192(fp),-184(fp)
1106 mull2 r0,-192(fp)
1107 addl3 -180(fp),-184(fp),r0
1108 bicl3 #0,r0,-180(fp)
1109 cmpl -180(fp),-184(fp)
1110 bgequ noname.89
1111 addl2 #65536,-192(fp)
1112noname.89:
1113 movzwl -178(fp),r0
1114 bicl2 #-65536,r0
1115 addl2 r0,-192(fp)
1116 bicl3 #-65536,-180(fp),r0
1117 ashl #16,r0,-184(fp)
1118 addl3 -184(fp),-188(fp),r0
1119 bicl3 #0,r0,-188(fp)
1120 cmpl -188(fp),-184(fp)
1121 bgequ noname.90
1122 incl -192(fp)
1123noname.90:
1124 movl -188(fp),r1
1125 movl -192(fp),r2
1126 addl2 r1,r8
1127 bicl2 #0,r8
1128 cmpl r8,r1
1129 bgequ noname.91
1130 incl r2
1131noname.91:
1132 addl2 r2,r10
1133 bicl2 #0,r10
1134 cmpl r10,r2
1135 bgequ noname.92
1136 incl r9
1137noname.92:
1138
1139 movzwl 10(r6),r2
1140 bicl3 #-65536,8(r7),r3
1141 movzwl 10(r7),r0
1142 bicl2 #-65536,r0
1143 bicl3 #-65536,8(r6),-204(fp)
1144 bicl3 #-65536,r2,-208(fp)
1145 mull3 r0,-204(fp),-196(fp)
1146 mull2 r3,-204(fp)
1147 mull3 r3,-208(fp),-200(fp)
1148 mull2 r0,-208(fp)
1149 addl3 -196(fp),-200(fp),r0
1150 bicl3 #0,r0,-196(fp)
1151 cmpl -196(fp),-200(fp)
1152 bgequ noname.93
1153 addl2 #65536,-208(fp)
1154noname.93:
1155 movzwl -194(fp),r0
1156 bicl2 #-65536,r0
1157 addl2 r0,-208(fp)
1158 bicl3 #-65536,-196(fp),r0
1159 ashl #16,r0,-200(fp)
1160 addl3 -200(fp),-204(fp),r0
1161 bicl3 #0,r0,-204(fp)
1162 cmpl -204(fp),-200(fp)
1163 bgequ noname.94
1164 incl -208(fp)
1165noname.94:
1166 movl -204(fp),r1
1167 movl -208(fp),r2
1168 addl2 r1,r8
1169 bicl2 #0,r8
1170 cmpl r8,r1
1171 bgequ noname.95
1172 incl r2
1173noname.95:
1174 addl2 r2,r10
1175 bicl2 #0,r10
1176 cmpl r10,r2
1177 bgequ noname.96
1178 incl r9
1179noname.96:
1180
1181 movzwl 6(r6),r2
1182 bicl3 #-65536,12(r7),r3
1183 movzwl 14(r7),r0
1184 bicl2 #-65536,r0
1185 bicl3 #-65536,4(r6),-220(fp)
1186 bicl3 #-65536,r2,-224(fp)
1187 mull3 r0,-220(fp),-212(fp)
1188 mull2 r3,-220(fp)
1189 mull3 r3,-224(fp),-216(fp)
1190 mull2 r0,-224(fp)
1191 addl3 -212(fp),-216(fp),r0
1192 bicl3 #0,r0,-212(fp)
1193 cmpl -212(fp),-216(fp)
1194 bgequ noname.97
1195 addl2 #65536,-224(fp)
1196noname.97:
1197 movzwl -210(fp),r0
1198 bicl2 #-65536,r0
1199 addl2 r0,-224(fp)
1200 bicl3 #-65536,-212(fp),r0
1201 ashl #16,r0,-216(fp)
1202 addl3 -216(fp),-220(fp),r0
1203 bicl3 #0,r0,-220(fp)
1204 cmpl -220(fp),-216(fp)
1205 bgequ noname.98
1206 incl -224(fp)
1207noname.98:
1208 movl -220(fp),r1
1209 movl -224(fp),r2
1210 addl2 r1,r8
1211 bicl2 #0,r8
1212 cmpl r8,r1
1213 bgequ noname.99
1214 incl r2
1215noname.99:
1216 addl2 r2,r10
1217 bicl2 #0,r10
1218 cmpl r10,r2
1219 bgequ noname.100
1220 incl r9
1221noname.100:
1222
1223 movzwl 2(r6),r2
1224 bicl3 #-65536,16(r7),r3
1225 movzwl 18(r7),r0
1226 bicl2 #-65536,r0
1227 bicl3 #-65536,(r6),-236(fp)
1228 bicl3 #-65536,r2,-240(fp)
1229 mull3 r0,-236(fp),-228(fp)
1230 mull2 r3,-236(fp)
1231 mull3 r3,-240(fp),-232(fp)
1232 mull2 r0,-240(fp)
1233 addl3 -228(fp),-232(fp),r0
1234 bicl3 #0,r0,-228(fp)
1235 cmpl -228(fp),-232(fp)
1236 bgequ noname.101
1237 addl2 #65536,-240(fp)
1238noname.101:
1239 movzwl -226(fp),r0
1240 bicl2 #-65536,r0
1241 addl2 r0,-240(fp)
1242 bicl3 #-65536,-228(fp),r0
1243 ashl #16,r0,-232(fp)
1244 addl3 -232(fp),-236(fp),r0
1245 bicl3 #0,r0,-236(fp)
1246 cmpl -236(fp),-232(fp)
1247 bgequ noname.102
1248 incl -240(fp)
1249noname.102:
1250 movl -236(fp),r1
1251 movl -240(fp),r2
1252 addl2 r1,r8
1253 bicl2 #0,r8
1254 cmpl r8,r1
1255 bgequ noname.103
1256 incl r2
1257noname.103:
1258 addl2 r2,r10
1259 bicl2 #0,r10
1260 cmpl r10,r2
1261 bgequ noname.104
1262 incl r9
1263noname.104:
1264
1265 movl r8,16(r11)
1266
1267 clrl r8
1268
1269 movzwl 2(r6),r2
1270 bicl3 #-65536,20(r7),r3
1271 movzwl 22(r7),r0
1272 bicl2 #-65536,r0
1273 bicl3 #-65536,(r6),-252(fp)
1274 bicl3 #-65536,r2,-256(fp)
1275 mull3 r0,-252(fp),-244(fp)
1276 mull2 r3,-252(fp)
1277 mull3 r3,-256(fp),-248(fp)
1278 mull2 r0,-256(fp)
1279 addl3 -244(fp),-248(fp),r0
1280 bicl3 #0,r0,-244(fp)
1281 cmpl -244(fp),-248(fp)
1282 bgequ noname.105
1283 addl2 #65536,-256(fp)
1284noname.105:
1285 movzwl -242(fp),r0
1286 bicl2 #-65536,r0
1287 addl2 r0,-256(fp)
1288 bicl3 #-65536,-244(fp),r0
1289 ashl #16,r0,-248(fp)
1290 addl3 -248(fp),-252(fp),r0
1291 bicl3 #0,r0,-252(fp)
1292 cmpl -252(fp),-248(fp)
1293 bgequ noname.106
1294 incl -256(fp)
1295noname.106:
1296 movl -252(fp),r1
1297 movl -256(fp),r2
1298 addl2 r1,r10
1299 bicl2 #0,r10
1300 cmpl r10,r1
1301 bgequ noname.107
1302 incl r2
1303noname.107:
1304 addl2 r2,r9
1305 bicl2 #0,r9
1306 cmpl r9,r2
1307 bgequ noname.108
1308 incl r8
1309noname.108:
1310
1311 movzwl 6(r6),r2
1312 bicl3 #-65536,16(r7),r3
1313 movzwl 18(r7),r0
1314 bicl2 #-65536,r0
1315 bicl3 #-65536,4(r6),-268(fp)
1316 bicl3 #-65536,r2,-272(fp)
1317 mull3 r0,-268(fp),-260(fp)
1318 mull2 r3,-268(fp)
1319 mull3 r3,-272(fp),-264(fp)
1320 mull2 r0,-272(fp)
1321 addl3 -260(fp),-264(fp),r0
1322 bicl3 #0,r0,-260(fp)
1323 cmpl -260(fp),-264(fp)
1324 bgequ noname.109
1325 addl2 #65536,-272(fp)
1326noname.109:
1327 movzwl -258(fp),r0
1328 bicl2 #-65536,r0
1329 addl2 r0,-272(fp)
1330 bicl3 #-65536,-260(fp),r0
1331 ashl #16,r0,-264(fp)
1332 addl3 -264(fp),-268(fp),r0
1333 bicl3 #0,r0,-268(fp)
1334 cmpl -268(fp),-264(fp)
1335 bgequ noname.110
1336 incl -272(fp)
1337noname.110:
1338 movl -268(fp),r1
1339 movl -272(fp),r2
1340 addl2 r1,r10
1341 bicl2 #0,r10
1342 cmpl r10,r1
1343 bgequ noname.111
1344 incl r2
1345noname.111:
1346 addl2 r2,r9
1347 bicl2 #0,r9
1348 cmpl r9,r2
1349 bgequ noname.112
1350 incl r8
1351noname.112:
1352
1353 movzwl 10(r6),r2
1354 bicl3 #-65536,12(r7),r3
1355 movzwl 14(r7),r0
1356 bicl2 #-65536,r0
1357 bicl3 #-65536,8(r6),-284(fp)
1358 bicl3 #-65536,r2,-288(fp)
1359 mull3 r0,-284(fp),-276(fp)
1360 mull2 r3,-284(fp)
1361 mull3 r3,-288(fp),-280(fp)
1362 mull2 r0,-288(fp)
1363 addl3 -276(fp),-280(fp),r0
1364 bicl3 #0,r0,-276(fp)
1365 cmpl -276(fp),-280(fp)
1366 bgequ noname.113
1367 addl2 #65536,-288(fp)
1368noname.113:
1369 movzwl -274(fp),r0
1370 bicl2 #-65536,r0
1371 addl2 r0,-288(fp)
1372 bicl3 #-65536,-276(fp),r0
1373 ashl #16,r0,-280(fp)
1374 addl3 -280(fp),-284(fp),r0
1375 bicl3 #0,r0,-284(fp)
1376 cmpl -284(fp),-280(fp)
1377 bgequ noname.114
1378 incl -288(fp)
1379noname.114:
1380 movl -284(fp),r1
1381 movl -288(fp),r2
1382 addl2 r1,r10
1383 bicl2 #0,r10
1384 cmpl r10,r1
1385 bgequ noname.115
1386 incl r2
1387noname.115:
1388 addl2 r2,r9
1389 bicl2 #0,r9
1390 cmpl r9,r2
1391 bgequ noname.116
1392 incl r8
1393noname.116:
1394
1395 movzwl 14(r6),r2
1396 bicl3 #-65536,8(r7),r3
1397 movzwl 10(r7),r0
1398 bicl2 #-65536,r0
1399 bicl3 #-65536,12(r6),-300(fp)
1400 bicl3 #-65536,r2,-304(fp)
1401 mull3 r0,-300(fp),-292(fp)
1402 mull2 r3,-300(fp)
1403 mull3 r3,-304(fp),-296(fp)
1404 mull2 r0,-304(fp)
1405 addl3 -292(fp),-296(fp),r0
1406 bicl3 #0,r0,-292(fp)
1407 cmpl -292(fp),-296(fp)
1408 bgequ noname.117
1409 addl2 #65536,-304(fp)
1410noname.117:
1411 movzwl -290(fp),r0
1412 bicl2 #-65536,r0
1413 addl2 r0,-304(fp)
1414 bicl3 #-65536,-292(fp),r0
1415 ashl #16,r0,-296(fp)
1416 addl3 -296(fp),-300(fp),r0
1417 bicl3 #0,r0,-300(fp)
1418 cmpl -300(fp),-296(fp)
1419 bgequ noname.118
1420 incl -304(fp)
1421noname.118:
1422 movl -300(fp),r1
1423 movl -304(fp),r2
1424 addl2 r1,r10
1425 bicl2 #0,r10
1426 cmpl r10,r1
1427 bgequ noname.119
1428 incl r2
1429noname.119:
1430 addl2 r2,r9
1431 bicl2 #0,r9
1432 cmpl r9,r2
1433 bgequ noname.120
1434 incl r8
1435noname.120:
1436
1437 movzwl 18(r6),r2
1438 bicl3 #-65536,4(r7),r3
1439 movzwl 6(r7),r0
1440 bicl2 #-65536,r0
1441 bicl3 #-65536,16(r6),-316(fp)
1442 bicl3 #-65536,r2,-320(fp)
1443 mull3 r0,-316(fp),-308(fp)
1444 mull2 r3,-316(fp)
1445 mull3 r3,-320(fp),-312(fp)
1446 mull2 r0,-320(fp)
1447 addl3 -308(fp),-312(fp),r0
1448 bicl3 #0,r0,-308(fp)
1449 cmpl -308(fp),-312(fp)
1450 bgequ noname.121
1451 addl2 #65536,-320(fp)
1452noname.121:
1453 movzwl -306(fp),r0
1454 bicl2 #-65536,r0
1455 addl2 r0,-320(fp)
1456 bicl3 #-65536,-308(fp),r0
1457 ashl #16,r0,-312(fp)
1458 addl3 -312(fp),-316(fp),r0
1459 bicl3 #0,r0,-316(fp)
1460 cmpl -316(fp),-312(fp)
1461 bgequ noname.122
1462 incl -320(fp)
1463noname.122:
1464 movl -316(fp),r1
1465 movl -320(fp),r2
1466 addl2 r1,r10
1467 bicl2 #0,r10
1468 cmpl r10,r1
1469 bgequ noname.123
1470 incl r2
1471
1472noname.123:
1473 addl2 r2,r9
1474 bicl2 #0,r9
1475 cmpl r9,r2
1476 bgequ noname.124
1477 incl r8
1478noname.124:
1479
1480 movzwl 22(r6),r2
1481 bicl3 #-65536,(r7),r3
1482 movzwl 2(r7),r0
1483 bicl2 #-65536,r0
1484 bicl3 #-65536,20(r6),-332(fp)
1485 bicl3 #-65536,r2,-336(fp)
1486 mull3 r0,-332(fp),-324(fp)
1487 mull2 r3,-332(fp)
1488 mull3 r3,-336(fp),-328(fp)
1489 mull2 r0,-336(fp)
1490 addl3 -324(fp),-328(fp),r0
1491 bicl3 #0,r0,-324(fp)
1492 cmpl -324(fp),-328(fp)
1493 bgequ noname.125
1494 addl2 #65536,-336(fp)
1495noname.125:
1496 movzwl -322(fp),r0
1497 bicl2 #-65536,r0
1498 addl2 r0,-336(fp)
1499 bicl3 #-65536,-324(fp),r0
1500 ashl #16,r0,-328(fp)
1501 addl3 -328(fp),-332(fp),r0
1502 bicl3 #0,r0,-332(fp)
1503 cmpl -332(fp),-328(fp)
1504 bgequ noname.126
1505 incl -336(fp)
1506noname.126:
1507 movl -332(fp),r1
1508 movl -336(fp),r2
1509 addl2 r1,r10
1510 bicl2 #0,r10
1511 cmpl r10,r1
1512 bgequ noname.127
1513 incl r2
1514noname.127:
1515 addl2 r2,r9
1516 bicl2 #0,r9
1517 cmpl r9,r2
1518 bgequ noname.128
1519 incl r8
1520noname.128:
1521
1522 movl r10,20(r11)
1523
1524 clrl r10
1525
1526 movzwl 26(r6),r2
1527 bicl3 #-65536,(r7),r3
1528 movzwl 2(r7),r0
1529 bicl2 #-65536,r0
1530 bicl3 #-65536,24(r6),-348(fp)
1531 bicl3 #-65536,r2,-352(fp)
1532 mull3 r0,-348(fp),-340(fp)
1533 mull2 r3,-348(fp)
1534 mull3 r3,-352(fp),-344(fp)
1535 mull2 r0,-352(fp)
1536 addl3 -340(fp),-344(fp),r0
1537 bicl3 #0,r0,-340(fp)
1538 cmpl -340(fp),-344(fp)
1539 bgequ noname.129
1540 addl2 #65536,-352(fp)
1541noname.129:
1542 movzwl -338(fp),r0
1543 bicl2 #-65536,r0
1544 addl2 r0,-352(fp)
1545 bicl3 #-65536,-340(fp),r0
1546 ashl #16,r0,-344(fp)
1547 addl3 -344(fp),-348(fp),r0
1548 bicl3 #0,r0,-348(fp)
1549 cmpl -348(fp),-344(fp)
1550 bgequ noname.130
1551 incl -352(fp)
1552noname.130:
1553 movl -348(fp),r1
1554 movl -352(fp),r2
1555 addl2 r1,r9
1556 bicl2 #0,r9
1557 cmpl r9,r1
1558 bgequ noname.131
1559 incl r2
1560noname.131:
1561 addl2 r2,r8
1562 bicl2 #0,r8
1563 cmpl r8,r2
1564 bgequ noname.132
1565 incl r10
1566noname.132:
1567
1568 movzwl 22(r6),r2
1569 bicl3 #-65536,4(r7),r3
1570 movzwl 6(r7),r0
1571 bicl2 #-65536,r0
1572 bicl3 #-65536,20(r6),-364(fp)
1573 bicl3 #-65536,r2,-368(fp)
1574 mull3 r0,-364(fp),-356(fp)
1575 mull2 r3,-364(fp)
1576 mull3 r3,-368(fp),-360(fp)
1577 mull2 r0,-368(fp)
1578 addl3 -356(fp),-360(fp),r0
1579 bicl3 #0,r0,-356(fp)
1580 cmpl -356(fp),-360(fp)
1581 bgequ noname.133
1582 addl2 #65536,-368(fp)
1583noname.133:
1584 movzwl -354(fp),r0
1585 bicl2 #-65536,r0
1586 addl2 r0,-368(fp)
1587 bicl3 #-65536,-356(fp),r0
1588 ashl #16,r0,-360(fp)
1589 addl3 -360(fp),-364(fp),r0
1590 bicl3 #0,r0,-364(fp)
1591 cmpl -364(fp),-360(fp)
1592 bgequ noname.134
1593 incl -368(fp)
1594noname.134:
1595 movl -364(fp),r1
1596 movl -368(fp),r2
1597 addl2 r1,r9
1598 bicl2 #0,r9
1599 cmpl r9,r1
1600 bgequ noname.135
1601 incl r2
1602noname.135:
1603 addl2 r2,r8
1604 bicl2 #0,r8
1605 cmpl r8,r2
1606 bgequ noname.136
1607 incl r10
1608noname.136:
1609
1610 movzwl 18(r6),r2
1611 bicl3 #-65536,8(r7),r3
1612 movzwl 10(r7),r0
1613 bicl2 #-65536,r0
1614 bicl3 #-65536,16(r6),-380(fp)
1615 bicl3 #-65536,r2,-384(fp)
1616 mull3 r0,-380(fp),-372(fp)
1617 mull2 r3,-380(fp)
1618 mull3 r3,-384(fp),-376(fp)
1619 mull2 r0,-384(fp)
1620 addl3 -372(fp),-376(fp),r0
1621 bicl3 #0,r0,-372(fp)
1622 cmpl -372(fp),-376(fp)
1623 bgequ noname.137
1624 addl2 #65536,-384(fp)
1625noname.137:
1626 movzwl -370(fp),r0
1627 bicl2 #-65536,r0
1628 addl2 r0,-384(fp)
1629 bicl3 #-65536,-372(fp),r0
1630 ashl #16,r0,-376(fp)
1631 addl3 -376(fp),-380(fp),r0
1632 bicl3 #0,r0,-380(fp)
1633 cmpl -380(fp),-376(fp)
1634 bgequ noname.138
1635 incl -384(fp)
1636noname.138:
1637 movl -380(fp),r1
1638 movl -384(fp),r2
1639 addl2 r1,r9
1640 bicl2 #0,r9
1641 cmpl r9,r1
1642 bgequ noname.139
1643 incl r2
1644noname.139:
1645 addl2 r2,r8
1646 bicl2 #0,r8
1647 cmpl r8,r2
1648 bgequ noname.140
1649 incl r10
1650noname.140:
1651
1652 movzwl 14(r6),r2
1653 bicl3 #-65536,12(r7),r3
1654 movzwl 14(r7),r0
1655 bicl2 #-65536,r0
1656 bicl3 #-65536,12(r6),-396(fp)
1657 bicl3 #-65536,r2,-400(fp)
1658 mull3 r0,-396(fp),-388(fp)
1659 mull2 r3,-396(fp)
1660 mull3 r3,-400(fp),-392(fp)
1661 mull2 r0,-400(fp)
1662 addl3 -388(fp),-392(fp),r0
1663 bicl3 #0,r0,-388(fp)
1664 cmpl -388(fp),-392(fp)
1665 bgequ noname.141
1666 addl2 #65536,-400(fp)
1667noname.141:
1668 movzwl -386(fp),r0
1669 bicl2 #-65536,r0
1670 addl2 r0,-400(fp)
1671 bicl3 #-65536,-388(fp),r0
1672 ashl #16,r0,-392(fp)
1673 addl3 -392(fp),-396(fp),r0
1674 bicl3 #0,r0,-396(fp)
1675 cmpl -396(fp),-392(fp)
1676 bgequ noname.142
1677 incl -400(fp)
1678noname.142:
1679 movl -396(fp),r1
1680 movl -400(fp),r2
1681 addl2 r1,r9
1682 bicl2 #0,r9
1683 cmpl r9,r1
1684 bgequ noname.143
1685 incl r2
1686noname.143:
1687 addl2 r2,r8
1688 bicl2 #0,r8
1689 cmpl r8,r2
1690 bgequ noname.144
1691 incl r10
1692noname.144:
1693
1694 movzwl 10(r6),r2
1695 bicl3 #-65536,16(r7),r3
1696 movzwl 18(r7),r0
1697 bicl2 #-65536,r0
1698 bicl3 #-65536,8(r6),-412(fp)
1699 bicl3 #-65536,r2,-416(fp)
1700 mull3 r0,-412(fp),-404(fp)
1701 mull2 r3,-412(fp)
1702 mull3 r3,-416(fp),-408(fp)
1703 mull2 r0,-416(fp)
1704 addl3 -404(fp),-408(fp),r0
1705 bicl3 #0,r0,-404(fp)
1706 cmpl -404(fp),-408(fp)
1707 bgequ noname.145
1708 addl2 #65536,-416(fp)
1709noname.145:
1710 movzwl -402(fp),r0
1711 bicl2 #-65536,r0
1712 addl2 r0,-416(fp)
1713 bicl3 #-65536,-404(fp),r0
1714 ashl #16,r0,-408(fp)
1715 addl3 -408(fp),-412(fp),r0
1716 bicl3 #0,r0,-412(fp)
1717 cmpl -412(fp),-408(fp)
1718 bgequ noname.146
1719 incl -416(fp)
1720noname.146:
1721 movl -412(fp),r1
1722 movl -416(fp),r2
1723 addl2 r1,r9
1724 bicl2 #0,r9
1725 cmpl r9,r1
1726 bgequ noname.147
1727 incl r2
1728noname.147:
1729 addl2 r2,r8
1730 bicl2 #0,r8
1731 cmpl r8,r2
1732 bgequ noname.148
1733 incl r10
1734noname.148:
1735
1736 movzwl 6(r6),r2
1737 bicl3 #-65536,20(r7),r3
1738 movzwl 22(r7),r0
1739 bicl2 #-65536,r0
1740 bicl3 #-65536,4(r6),-428(fp)
1741 bicl3 #-65536,r2,-432(fp)
1742 mull3 r0,-428(fp),-420(fp)
1743 mull2 r3,-428(fp)
1744 mull3 r3,-432(fp),-424(fp)
1745 mull2 r0,-432(fp)
1746 addl3 -420(fp),-424(fp),r0
1747 bicl3 #0,r0,-420(fp)
1748 cmpl -420(fp),-424(fp)
1749 bgequ noname.149
1750 addl2 #65536,-432(fp)
1751noname.149:
1752 movzwl -418(fp),r0
1753 bicl2 #-65536,r0
1754 addl2 r0,-432(fp)
1755 bicl3 #-65536,-420(fp),r0
1756 ashl #16,r0,-424(fp)
1757 addl3 -424(fp),-428(fp),r0
1758 bicl3 #0,r0,-428(fp)
1759 cmpl -428(fp),-424(fp)
1760 bgequ noname.150
1761 incl -432(fp)
1762noname.150:
1763 movl -428(fp),r1
1764 movl -432(fp),r2
1765 addl2 r1,r9
1766 bicl2 #0,r9
1767 cmpl r9,r1
1768 bgequ noname.151
1769 incl r2
1770noname.151:
1771 addl2 r2,r8
1772 bicl2 #0,r8
1773 cmpl r8,r2
1774 bgequ noname.152
1775 incl r10
1776noname.152:
1777
1778 movzwl 2(r6),r2
1779 bicl3 #-65536,24(r7),r3
1780 movzwl 26(r7),r0
1781 bicl2 #-65536,r0
1782 bicl3 #-65536,(r6),-444(fp)
1783 bicl3 #-65536,r2,-448(fp)
1784 mull3 r0,-444(fp),-436(fp)
1785 mull2 r3,-444(fp)
1786 mull3 r3,-448(fp),-440(fp)
1787 mull2 r0,-448(fp)
1788 addl3 -436(fp),-440(fp),r0
1789 bicl3 #0,r0,-436(fp)
1790 cmpl -436(fp),-440(fp)
1791 bgequ noname.153
1792 addl2 #65536,-448(fp)
1793noname.153:
1794 movzwl -434(fp),r0
1795 bicl2 #-65536,r0
1796 addl2 r0,-448(fp)
1797 bicl3 #-65536,-436(fp),r0
1798 ashl #16,r0,-440(fp)
1799 addl3 -440(fp),-444(fp),r0
1800 bicl3 #0,r0,-444(fp)
1801 cmpl -444(fp),-440(fp)
1802 bgequ noname.154
1803 incl -448(fp)
1804noname.154:
1805 movl -444(fp),r1
1806 movl -448(fp),r2
1807 addl2 r1,r9
1808 bicl2 #0,r9
1809 cmpl r9,r1
1810 bgequ noname.155
1811 incl r2
1812noname.155:
1813 addl2 r2,r8
1814 bicl2 #0,r8
1815 cmpl r8,r2
1816 bgequ noname.156
1817 incl r10
1818noname.156:
1819
1820 movl r9,24(r11)
1821
1822 clrl r9
1823
1824 movzwl 2(r6),r2
1825 bicl3 #-65536,28(r7),r3
1826 movzwl 30(r7),r0
1827 bicl2 #-65536,r0
1828 bicl3 #-65536,(r6),-460(fp)
1829 bicl3 #-65536,r2,-464(fp)
1830 mull3 r0,-460(fp),-452(fp)
1831 mull2 r3,-460(fp)
1832 mull3 r3,-464(fp),-456(fp)
1833 mull2 r0,-464(fp)
1834 addl3 -452(fp),-456(fp),r0
1835 bicl3 #0,r0,-452(fp)
1836 cmpl -452(fp),-456(fp)
1837 bgequ noname.157
1838 addl2 #65536,-464(fp)
1839noname.157:
1840 movzwl -450(fp),r0
1841 bicl2 #-65536,r0
1842 addl2 r0,-464(fp)
1843 bicl3 #-65536,-452(fp),r0
1844 ashl #16,r0,-456(fp)
1845 addl3 -456(fp),-460(fp),r0
1846 bicl3 #0,r0,-460(fp)
1847 cmpl -460(fp),-456(fp)
1848 bgequ noname.158
1849 incl -464(fp)
1850noname.158:
1851 movl -460(fp),r1
1852 movl -464(fp),r2
1853 addl2 r1,r8
1854 bicl2 #0,r8
1855 cmpl r8,r1
1856 bgequ noname.159
1857 incl r2
1858noname.159:
1859 addl2 r2,r10
1860 bicl2 #0,r10
1861 cmpl r10,r2
1862 bgequ noname.160
1863 incl r9
1864noname.160:
1865
1866 movzwl 6(r6),r2
1867 bicl3 #-65536,24(r7),r3
1868 movzwl 26(r7),r0
1869 bicl2 #-65536,r0
1870 bicl3 #-65536,4(r6),-476(fp)
1871 bicl3 #-65536,r2,-480(fp)
1872 mull3 r0,-476(fp),-468(fp)
1873 mull2 r3,-476(fp)
1874 mull3 r3,-480(fp),-472(fp)
1875 mull2 r0,-480(fp)
1876 addl3 -468(fp),-472(fp),r0
1877 bicl3 #0,r0,-468(fp)
1878 cmpl -468(fp),-472(fp)
1879 bgequ noname.161
1880 addl2 #65536,-480(fp)
1881noname.161:
1882 movzwl -466(fp),r0
1883 bicl2 #-65536,r0
1884 addl2 r0,-480(fp)
1885 bicl3 #-65536,-468(fp),r0
1886 ashl #16,r0,-472(fp)
1887 addl3 -472(fp),-476(fp),r0
1888 bicl3 #0,r0,-476(fp)
1889 cmpl -476(fp),-472(fp)
1890 bgequ noname.162
1891 incl -480(fp)
1892noname.162:
1893 movl -476(fp),r1
1894 movl -480(fp),r2
1895 addl2 r1,r8
1896 bicl2 #0,r8
1897 cmpl r8,r1
1898 bgequ noname.163
1899 incl r2
1900noname.163:
1901 addl2 r2,r10
1902 bicl2 #0,r10
1903 cmpl r10,r2
1904 bgequ noname.164
1905 incl r9
1906noname.164:
1907
1908 movzwl 10(r6),r2
1909 bicl3 #-65536,20(r7),r3
1910 movzwl 22(r7),r0
1911 bicl2 #-65536,r0
1912 bicl3 #-65536,8(r6),-492(fp)
1913 bicl3 #-65536,r2,-496(fp)
1914 mull3 r0,-492(fp),-484(fp)
1915 mull2 r3,-492(fp)
1916 mull3 r3,-496(fp),-488(fp)
1917 mull2 r0,-496(fp)
1918 addl3 -484(fp),-488(fp),r0
1919 bicl3 #0,r0,-484(fp)
1920 cmpl -484(fp),-488(fp)
1921 bgequ noname.165
1922 addl2 #65536,-496(fp)
1923noname.165:
1924 movzwl -482(fp),r0
1925 bicl2 #-65536,r0
1926 addl2 r0,-496(fp)
1927 bicl3 #-65536,-484(fp),r0
1928 ashl #16,r0,-488(fp)
1929 addl3 -488(fp),-492(fp),r0
1930 bicl3 #0,r0,-492(fp)
1931 cmpl -492(fp),-488(fp)
1932 bgequ noname.166
1933 incl -496(fp)
1934noname.166:
1935 movl -492(fp),r1
1936 movl -496(fp),r2
1937 addl2 r1,r8
1938 bicl2 #0,r8
1939 cmpl r8,r1
1940 bgequ noname.167
1941 incl r2
1942noname.167:
1943 addl2 r2,r10
1944 bicl2 #0,r10
1945 cmpl r10,r2
1946 bgequ noname.168
1947 incl r9
1948noname.168:
1949
1950 movzwl 14(r6),r2
1951 bicl3 #-65536,16(r7),r3
1952 movzwl 18(r7),r0
1953 bicl2 #-65536,r0
1954 bicl3 #-65536,12(r6),-508(fp)
1955 bicl3 #-65536,r2,-512(fp)
1956 mull3 r0,-508(fp),-500(fp)
1957 mull2 r3,-508(fp)
1958 mull3 r3,-512(fp),-504(fp)
1959 mull2 r0,-512(fp)
1960 addl3 -500(fp),-504(fp),r0
1961 bicl3 #0,r0,-500(fp)
1962 cmpl -500(fp),-504(fp)
1963 bgequ noname.169
1964 addl2 #65536,-512(fp)
1965noname.169:
1966 movzwl -498(fp),r0
1967 bicl2 #-65536,r0
1968 addl2 r0,-512(fp)
1969 bicl3 #-65536,-500(fp),r0
1970 ashl #16,r0,-504(fp)
1971 addl3 -504(fp),-508(fp),r0
1972 bicl3 #0,r0,-508(fp)
1973 cmpl -508(fp),-504(fp)
1974 bgequ noname.170
1975 incl -512(fp)
1976noname.170:
1977 movl -508(fp),r1
1978 movl -512(fp),r2
1979 addl2 r1,r8
1980 bicl2 #0,r8
1981 cmpl r8,r1
1982 bgequ noname.171
1983 incl r2
1984noname.171:
1985 addl2 r2,r10
1986 bicl2 #0,r10
1987 cmpl r10,r2
1988 bgequ noname.172
1989 incl r9
1990noname.172:
1991
1992 movzwl 18(r6),r2
1993 bicl3 #-65536,12(r7),r3
1994 movzwl 14(r7),r0
1995 bicl2 #-65536,r0
1996 bicl3 #-65536,16(r6),-524(fp)
1997 bicl3 #-65536,r2,-528(fp)
1998 mull3 r0,-524(fp),-516(fp)
1999 mull2 r3,-524(fp)
2000 mull3 r3,-528(fp),-520(fp)
2001 mull2 r0,-528(fp)
2002 addl3 -516(fp),-520(fp),r0
2003 bicl3 #0,r0,-516(fp)
2004 cmpl -516(fp),-520(fp)
2005 bgequ noname.173
2006 addl2 #65536,-528(fp)
2007noname.173:
2008 movzwl -514(fp),r0
2009 bicl2 #-65536,r0
2010 addl2 r0,-528(fp)
2011 bicl3 #-65536,-516(fp),r0
2012 ashl #16,r0,-520(fp)
2013 addl3 -520(fp),-524(fp),r0
2014 bicl3 #0,r0,-524(fp)
2015 cmpl -524(fp),-520(fp)
2016 bgequ noname.174
2017 incl -528(fp)
2018noname.174:
2019 movl -524(fp),r1
2020 movl -528(fp),r2
2021 addl2 r1,r8
2022 bicl2 #0,r8
2023 cmpl r8,r1
2024 bgequ noname.175
2025 incl r2
2026noname.175:
2027 addl2 r2,r10
2028 bicl2 #0,r10
2029 cmpl r10,r2
2030 bgequ noname.176
2031 incl r9
2032noname.176:
2033
2034 movzwl 22(r6),r2
2035 bicl3 #-65536,8(r7),r3
2036 movzwl 10(r7),r0
2037 bicl2 #-65536,r0
2038 bicl3 #-65536,20(r6),-540(fp)
2039 bicl3 #-65536,r2,-544(fp)
2040 mull3 r0,-540(fp),-532(fp)
2041 mull2 r3,-540(fp)
2042 mull3 r3,-544(fp),-536(fp)
2043 mull2 r0,-544(fp)
2044 addl3 -532(fp),-536(fp),r0
2045 bicl3 #0,r0,-532(fp)
2046 cmpl -532(fp),-536(fp)
2047 bgequ noname.177
2048 addl2 #65536,-544(fp)
2049noname.177:
2050 movzwl -530(fp),r0
2051 bicl2 #-65536,r0
2052 addl2 r0,-544(fp)
2053 bicl3 #-65536,-532(fp),r0
2054 ashl #16,r0,-536(fp)
2055 addl3 -536(fp),-540(fp),r0
2056 bicl3 #0,r0,-540(fp)
2057 cmpl -540(fp),-536(fp)
2058 bgequ noname.178
2059 incl -544(fp)
2060noname.178:
2061 movl -540(fp),r1
2062 movl -544(fp),r2
2063 addl2 r1,r8
2064 bicl2 #0,r8
2065 cmpl r8,r1
2066 bgequ noname.179
2067 incl r2
2068noname.179:
2069 addl2 r2,r10
2070 bicl2 #0,r10
2071 cmpl r10,r2
2072 bgequ noname.180
2073 incl r9
2074noname.180:
2075
2076 movzwl 26(r6),r2
2077 bicl3 #-65536,4(r7),r3
2078 movzwl 6(r7),r0
2079 bicl2 #-65536,r0
2080 bicl3 #-65536,24(r6),-556(fp)
2081 bicl3 #-65536,r2,-560(fp)
2082 mull3 r0,-556(fp),-548(fp)
2083 mull2 r3,-556(fp)
2084 mull3 r3,-560(fp),-552(fp)
2085 mull2 r0,-560(fp)
2086 addl3 -548(fp),-552(fp),r0
2087 bicl3 #0,r0,-548(fp)
2088 cmpl -548(fp),-552(fp)
2089 bgequ noname.181
2090 addl2 #65536,-560(fp)
2091noname.181:
2092 movzwl -546(fp),r0
2093 bicl2 #-65536,r0
2094 addl2 r0,-560(fp)
2095 bicl3 #-65536,-548(fp),r0
2096 ashl #16,r0,-552(fp)
2097 addl3 -552(fp),-556(fp),r0
2098 bicl3 #0,r0,-556(fp)
2099 cmpl -556(fp),-552(fp)
2100 bgequ noname.182
2101 incl -560(fp)
2102noname.182:
2103 movl -556(fp),r1
2104 movl -560(fp),r2
2105 addl2 r1,r8
2106 bicl2 #0,r8
2107 cmpl r8,r1
2108 bgequ noname.183
2109 incl r2
2110noname.183:
2111 addl2 r2,r10
2112 bicl2 #0,r10
2113 cmpl r10,r2
2114 bgequ noname.184
2115 incl r9
2116noname.184:
2117
2118 movzwl 30(r6),r2
2119 bicl3 #-65536,(r7),r3
2120 movzwl 2(r7),r0
2121 bicl2 #-65536,r0
2122 bicl3 #-65536,28(r6),-572(fp)
2123 bicl3 #-65536,r2,-576(fp)
2124 mull3 r0,-572(fp),-564(fp)
2125 mull2 r3,-572(fp)
2126 mull3 r3,-576(fp),-568(fp)
2127 mull2 r0,-576(fp)
2128 addl3 -564(fp),-568(fp),r0
2129 bicl3 #0,r0,-564(fp)
2130 cmpl -564(fp),-568(fp)
2131 bgequ noname.185
2132 addl2 #65536,-576(fp)
2133noname.185:
2134 movzwl -562(fp),r0
2135 bicl2 #-65536,r0
2136 addl2 r0,-576(fp)
2137 bicl3 #-65536,-564(fp),r0
2138 ashl #16,r0,-568(fp)
2139 addl3 -568(fp),-572(fp),r0
2140 bicl3 #0,r0,-572(fp)
2141 cmpl -572(fp),-568(fp)
2142 bgequ noname.186
2143 incl -576(fp)
2144noname.186:
2145 movl -572(fp),r1
2146 movl -576(fp),r2
2147 addl2 r1,r8
2148 bicl2 #0,r8
2149 cmpl r8,r1
2150 bgequ noname.187
2151 incl r2
2152noname.187:
2153 addl2 r2,r10
2154 bicl2 #0,r10
2155 cmpl r10,r2
2156 bgequ noname.188
2157 incl r9
2158noname.188:
2159
2160 movl r8,28(r11)
2161
2162 clrl r8
2163
2164 movzwl 30(r6),r2
2165 bicl3 #-65536,4(r7),r3
2166 movzwl 6(r7),r0
2167 bicl2 #-65536,r0
2168 bicl3 #-65536,28(r6),-588(fp)
2169 bicl3 #-65536,r2,-592(fp)
2170 mull3 r0,-588(fp),-580(fp)
2171 mull2 r3,-588(fp)
2172 mull3 r3,-592(fp),-584(fp)
2173 mull2 r0,-592(fp)
2174 addl3 -580(fp),-584(fp),r0
2175 bicl3 #0,r0,-580(fp)
2176 cmpl -580(fp),-584(fp)
2177 bgequ noname.189
2178 addl2 #65536,-592(fp)
2179noname.189:
2180 movzwl -578(fp),r0
2181 bicl2 #-65536,r0
2182 addl2 r0,-592(fp)
2183 bicl3 #-65536,-580(fp),r0
2184 ashl #16,r0,-584(fp)
2185 addl3 -584(fp),-588(fp),r0
2186 bicl3 #0,r0,-588(fp)
2187 cmpl -588(fp),-584(fp)
2188 bgequ noname.190
2189 incl -592(fp)
2190noname.190:
2191 movl -588(fp),r1
2192 movl -592(fp),r2
2193 addl2 r1,r10
2194 bicl2 #0,r10
2195 cmpl r10,r1
2196 bgequ noname.191
2197 incl r2
2198noname.191:
2199 addl2 r2,r9
2200 bicl2 #0,r9
2201 cmpl r9,r2
2202 bgequ noname.192
2203 incl r8
2204noname.192:
2205
2206 movzwl 26(r6),r2
2207 bicl3 #-65536,8(r7),r3
2208 movzwl 10(r7),r0
2209 bicl2 #-65536,r0
2210 bicl3 #-65536,24(r6),-604(fp)
2211 bicl3 #-65536,r2,-608(fp)
2212 mull3 r0,-604(fp),-596(fp)
2213 mull2 r3,-604(fp)
2214 mull3 r3,-608(fp),-600(fp)
2215 mull2 r0,-608(fp)
2216 addl3 -596(fp),-600(fp),r0
2217 bicl3 #0,r0,-596(fp)
2218 cmpl -596(fp),-600(fp)
2219 bgequ noname.193
2220 addl2 #65536,-608(fp)
2221noname.193:
2222 movzwl -594(fp),r0
2223 bicl2 #-65536,r0
2224 addl2 r0,-608(fp)
2225 bicl3 #-65536,-596(fp),r0
2226 ashl #16,r0,-600(fp)
2227 addl3 -600(fp),-604(fp),r0
2228 bicl3 #0,r0,-604(fp)
2229 cmpl -604(fp),-600(fp)
2230 bgequ noname.194
2231 incl -608(fp)
2232noname.194:
2233 movl -604(fp),r1
2234 movl -608(fp),r2
2235 addl2 r1,r10
2236 bicl2 #0,r10
2237 cmpl r10,r1
2238 bgequ noname.195
2239 incl r2
2240noname.195:
2241 addl2 r2,r9
2242 bicl2 #0,r9
2243 cmpl r9,r2
2244 bgequ noname.196
2245 incl r8
2246noname.196:
2247
2248 movzwl 22(r6),r2
2249 bicl3 #-65536,12(r7),r3
2250 movzwl 14(r7),r0
2251 bicl2 #-65536,r0
2252 bicl3 #-65536,20(r6),-620(fp)
2253 bicl3 #-65536,r2,-624(fp)
2254 mull3 r0,-620(fp),-612(fp)
2255 mull2 r3,-620(fp)
2256 mull3 r3,-624(fp),-616(fp)
2257 mull2 r0,-624(fp)
2258 addl3 -612(fp),-616(fp),r0
2259 bicl3 #0,r0,-612(fp)
2260 cmpl -612(fp),-616(fp)
2261 bgequ noname.197
2262 addl2 #65536,-624(fp)
2263noname.197:
2264 movzwl -610(fp),r0
2265 bicl2 #-65536,r0
2266 addl2 r0,-624(fp)
2267 bicl3 #-65536,-612(fp),r0
2268 ashl #16,r0,-616(fp)
2269 addl3 -616(fp),-620(fp),r0
2270 bicl3 #0,r0,-620(fp)
2271 cmpl -620(fp),-616(fp)
2272 bgequ noname.198
2273 incl -624(fp)
2274noname.198:
2275 movl -620(fp),r1
2276 movl -624(fp),r2
2277 addl2 r1,r10
2278 bicl2 #0,r10
2279 cmpl r10,r1
2280 bgequ noname.199
2281 incl r2
2282noname.199:
2283 addl2 r2,r9
2284 bicl2 #0,r9
2285 cmpl r9,r2
2286 bgequ noname.200
2287 incl r8
2288noname.200:
2289
2290 movzwl 18(r6),r2
2291 bicl3 #-65536,16(r7),r3
2292 movzwl 18(r7),r0
2293 bicl2 #-65536,r0
2294 bicl3 #-65536,16(r6),-636(fp)
2295 bicl3 #-65536,r2,-640(fp)
2296 mull3 r0,-636(fp),-628(fp)
2297 mull2 r3,-636(fp)
2298 mull3 r3,-640(fp),-632(fp)
2299 mull2 r0,-640(fp)
2300 addl3 -628(fp),-632(fp),r0
2301 bicl3 #0,r0,-628(fp)
2302 cmpl -628(fp),-632(fp)
2303 bgequ noname.201
2304 addl2 #65536,-640(fp)
2305noname.201:
2306 movzwl -626(fp),r0
2307 bicl2 #-65536,r0
2308 addl2 r0,-640(fp)
2309 bicl3 #-65536,-628(fp),r0
2310 ashl #16,r0,-632(fp)
2311 addl3 -632(fp),-636(fp),r0
2312 bicl3 #0,r0,-636(fp)
2313 cmpl -636(fp),-632(fp)
2314 bgequ noname.202
2315 incl -640(fp)
2316noname.202:
2317 movl -636(fp),r1
2318 movl -640(fp),r2
2319 addl2 r1,r10
2320 bicl2 #0,r10
2321 cmpl r10,r1
2322 bgequ noname.203
2323 incl r2
2324noname.203:
2325 addl2 r2,r9
2326 bicl2 #0,r9
2327 cmpl r9,r2
2328 bgequ noname.204
2329 incl r8
2330noname.204:
2331
2332 movzwl 14(r6),r2
2333 bicl3 #-65536,20(r7),r3
2334 movzwl 22(r7),r0
2335 bicl2 #-65536,r0
2336 bicl3 #-65536,12(r6),-652(fp)
2337 bicl3 #-65536,r2,-656(fp)
2338 mull3 r0,-652(fp),-644(fp)
2339 mull2 r3,-652(fp)
2340 mull3 r3,-656(fp),-648(fp)
2341 mull2 r0,-656(fp)
2342 addl3 -644(fp),-648(fp),r0
2343 bicl3 #0,r0,-644(fp)
2344 cmpl -644(fp),-648(fp)
2345 bgequ noname.205
2346 addl2 #65536,-656(fp)
2347noname.205:
2348 movzwl -642(fp),r0
2349 bicl2 #-65536,r0
2350 addl2 r0,-656(fp)
2351 bicl3 #-65536,-644(fp),r0
2352 ashl #16,r0,-648(fp)
2353 addl3 -648(fp),-652(fp),r0
2354 bicl3 #0,r0,-652(fp)
2355 cmpl -652(fp),-648(fp)
2356 bgequ noname.206
2357 incl -656(fp)
2358noname.206:
2359 movl -652(fp),r1
2360 movl -656(fp),r2
2361 addl2 r1,r10
2362 bicl2 #0,r10
2363 cmpl r10,r1
2364 bgequ noname.207
2365 incl r2
2366noname.207:
2367 addl2 r2,r9
2368 bicl2 #0,r9
2369 cmpl r9,r2
2370 bgequ noname.208
2371 incl r8
2372noname.208:
2373
2374 movzwl 10(r6),r2
2375 bicl3 #-65536,24(r7),r3
2376 movzwl 26(r7),r0
2377 bicl2 #-65536,r0
2378 bicl3 #-65536,8(r6),-668(fp)
2379 bicl3 #-65536,r2,-672(fp)
2380 mull3 r0,-668(fp),-660(fp)
2381 mull2 r3,-668(fp)
2382 mull3 r3,-672(fp),-664(fp)
2383 mull2 r0,-672(fp)
2384 addl3 -660(fp),-664(fp),r0
2385 bicl3 #0,r0,-660(fp)
2386 cmpl -660(fp),-664(fp)
2387 bgequ noname.209
2388 addl2 #65536,-672(fp)
2389noname.209:
2390 movzwl -658(fp),r0
2391 bicl2 #-65536,r0
2392 addl2 r0,-672(fp)
2393 bicl3 #-65536,-660(fp),r0
2394 ashl #16,r0,-664(fp)
2395 addl3 -664(fp),-668(fp),r0
2396 bicl3 #0,r0,-668(fp)
2397 cmpl -668(fp),-664(fp)
2398 bgequ noname.210
2399 incl -672(fp)
2400noname.210:
2401 movl -668(fp),r1
2402 movl -672(fp),r2
2403 addl2 r1,r10
2404 bicl2 #0,r10
2405 cmpl r10,r1
2406 bgequ noname.211
2407 incl r2
2408noname.211:
2409 addl2 r2,r9
2410 bicl2 #0,r9
2411 cmpl r9,r2
2412 bgequ noname.212
2413 incl r8
2414noname.212:
2415
2416 movzwl 6(r6),r2
2417 bicl3 #-65536,28(r7),r3
2418 movzwl 30(r7),r0
2419 bicl2 #-65536,r0
2420 bicl3 #-65536,4(r6),-684(fp)
2421 bicl3 #-65536,r2,-688(fp)
2422 mull3 r0,-684(fp),-676(fp)
2423 mull2 r3,-684(fp)
2424 mull3 r3,-688(fp),-680(fp)
2425 mull2 r0,-688(fp)
2426 addl3 -676(fp),-680(fp),r0
2427 bicl3 #0,r0,-676(fp)
2428 cmpl -676(fp),-680(fp)
2429 bgequ noname.213
2430 addl2 #65536,-688(fp)
2431noname.213:
2432 movzwl -674(fp),r0
2433 bicl2 #-65536,r0
2434 addl2 r0,-688(fp)
2435 bicl3 #-65536,-676(fp),r0
2436 ashl #16,r0,-680(fp)
2437 addl3 -680(fp),-684(fp),r0
2438 bicl3 #0,r0,-684(fp)
2439 cmpl -684(fp),-680(fp)
2440 bgequ noname.214
2441 incl -688(fp)
2442noname.214:
2443 movl -684(fp),r1
2444 movl -688(fp),r2
2445 addl2 r1,r10
2446 bicl2 #0,r10
2447 cmpl r10,r1
2448 bgequ noname.215
2449 incl r2
2450noname.215:
2451 addl2 r2,r9
2452 bicl2 #0,r9
2453 cmpl r9,r2
2454 bgequ noname.216
2455 incl r8
2456noname.216:
2457
2458 movl r10,32(r11)
2459
2460 clrl r10
2461
2462 movzwl 10(r6),r2
2463 bicl3 #-65536,28(r7),r3
2464 movzwl 30(r7),r0
2465 bicl2 #-65536,r0
2466 bicl3 #-65536,8(r6),-700(fp)
2467 bicl3 #-65536,r2,-704(fp)
2468 mull3 r0,-700(fp),-692(fp)
2469 mull2 r3,-700(fp)
2470 mull3 r3,-704(fp),-696(fp)
2471 mull2 r0,-704(fp)
2472 addl3 -692(fp),-696(fp),r0
2473 bicl3 #0,r0,-692(fp)
2474 cmpl -692(fp),-696(fp)
2475 bgequ noname.217
2476 addl2 #65536,-704(fp)
2477noname.217:
2478 movzwl -690(fp),r0
2479 bicl2 #-65536,r0
2480 addl2 r0,-704(fp)
2481 bicl3 #-65536,-692(fp),r0
2482 ashl #16,r0,-696(fp)
2483 addl3 -696(fp),-700(fp),r0
2484 bicl3 #0,r0,-700(fp)
2485 cmpl -700(fp),-696(fp)
2486 bgequ noname.218
2487 incl -704(fp)
2488noname.218:
2489 movl -700(fp),r1
2490 movl -704(fp),r2
2491 addl2 r1,r9
2492 bicl2 #0,r9
2493 cmpl r9,r1
2494 bgequ noname.219
2495 incl r2
2496noname.219:
2497 addl2 r2,r8
2498 bicl2 #0,r8
2499 cmpl r8,r2
2500 bgequ noname.220
2501 incl r10
2502noname.220:
2503
2504 movzwl 14(r6),r2
2505 bicl3 #-65536,24(r7),r3
2506 movzwl 26(r7),r0
2507 bicl2 #-65536,r0
2508 bicl3 #-65536,12(r6),-716(fp)
2509 bicl3 #-65536,r2,-720(fp)
2510 mull3 r0,-716(fp),-708(fp)
2511 mull2 r3,-716(fp)
2512 mull3 r3,-720(fp),-712(fp)
2513 mull2 r0,-720(fp)
2514 addl3 -708(fp),-712(fp),r0
2515 bicl3 #0,r0,-708(fp)
2516 cmpl -708(fp),-712(fp)
2517 bgequ noname.221
2518 addl2 #65536,-720(fp)
2519noname.221:
2520 movzwl -706(fp),r0
2521 bicl2 #-65536,r0
2522 addl2 r0,-720(fp)
2523 bicl3 #-65536,-708(fp),r0
2524 ashl #16,r0,-712(fp)
2525 addl3 -712(fp),-716(fp),r0
2526 bicl3 #0,r0,-716(fp)
2527 cmpl -716(fp),-712(fp)
2528 bgequ noname.222
2529 incl -720(fp)
2530noname.222:
2531 movl -716(fp),r1
2532 movl -720(fp),r2
2533 addl2 r1,r9
2534 bicl2 #0,r9
2535 cmpl r9,r1
2536 bgequ noname.223
2537 incl r2
2538noname.223:
2539 addl2 r2,r8
2540 bicl2 #0,r8
2541 cmpl r8,r2
2542 bgequ noname.224
2543 incl r10
2544noname.224:
2545
2546 movzwl 18(r6),r2
2547 bicl3 #-65536,20(r7),r3
2548 movzwl 22(r7),r0
2549 bicl2 #-65536,r0
2550 bicl3 #-65536,16(r6),-732(fp)
2551 bicl3 #-65536,r2,-736(fp)
2552 mull3 r0,-732(fp),-724(fp)
2553 mull2 r3,-732(fp)
2554 mull3 r3,-736(fp),-728(fp)
2555 mull2 r0,-736(fp)
2556 addl3 -724(fp),-728(fp),r0
2557 bicl3 #0,r0,-724(fp)
2558 cmpl -724(fp),-728(fp)
2559 bgequ noname.225
2560 addl2 #65536,-736(fp)
2561noname.225:
2562 movzwl -722(fp),r0
2563 bicl2 #-65536,r0
2564 addl2 r0,-736(fp)
2565 bicl3 #-65536,-724(fp),r0
2566 ashl #16,r0,-728(fp)
2567 addl3 -728(fp),-732(fp),r0
2568 bicl3 #0,r0,-732(fp)
2569 cmpl -732(fp),-728(fp)
2570 bgequ noname.226
2571 incl -736(fp)
2572noname.226:
2573 movl -732(fp),r1
2574 movl -736(fp),r2
2575 addl2 r1,r9
2576 bicl2 #0,r9
2577 cmpl r9,r1
2578 bgequ noname.227
2579 incl r2
2580noname.227:
2581 addl2 r2,r8
2582 bicl2 #0,r8
2583 cmpl r8,r2
2584 bgequ noname.228
2585 incl r10
2586noname.228:
2587
2588 movzwl 22(r6),r2
2589 bicl3 #-65536,16(r7),r3
2590 movzwl 18(r7),r0
2591 bicl2 #-65536,r0
2592 bicl3 #-65536,20(r6),-748(fp)
2593 bicl3 #-65536,r2,-752(fp)
2594 mull3 r0,-748(fp),-740(fp)
2595 mull2 r3,-748(fp)
2596 mull3 r3,-752(fp),-744(fp)
2597 mull2 r0,-752(fp)
2598 addl3 -740(fp),-744(fp),r0
2599 bicl3 #0,r0,-740(fp)
2600 cmpl -740(fp),-744(fp)
2601 bgequ noname.229
2602 addl2 #65536,-752(fp)
2603noname.229:
2604 movzwl -738(fp),r0
2605 bicl2 #-65536,r0
2606 addl2 r0,-752(fp)
2607 bicl3 #-65536,-740(fp),r0
2608 ashl #16,r0,-744(fp)
2609 addl3 -744(fp),-748(fp),r0
2610 bicl3 #0,r0,-748(fp)
2611 cmpl -748(fp),-744(fp)
2612 bgequ noname.230
2613 incl -752(fp)
2614noname.230:
2615 movl -748(fp),r1
2616 movl -752(fp),r2
2617 addl2 r1,r9
2618 bicl2 #0,r9
2619 cmpl r9,r1
2620 bgequ noname.231
2621 incl r2
2622noname.231:
2623 addl2 r2,r8
2624 bicl2 #0,r8
2625 cmpl r8,r2
2626 bgequ noname.232
2627 incl r10
2628noname.232:
2629
2630 movzwl 26(r6),r2
2631 bicl3 #-65536,12(r7),r3
2632 movzwl 14(r7),r0
2633 bicl2 #-65536,r0
2634 bicl3 #-65536,24(r6),-764(fp)
2635 bicl3 #-65536,r2,-768(fp)
2636 mull3 r0,-764(fp),-756(fp)
2637 mull2 r3,-764(fp)
2638 mull3 r3,-768(fp),-760(fp)
2639 mull2 r0,-768(fp)
2640 addl3 -756(fp),-760(fp),r0
2641 bicl3 #0,r0,-756(fp)
2642 cmpl -756(fp),-760(fp)
2643 bgequ noname.233
2644 addl2 #65536,-768(fp)
2645noname.233:
2646 movzwl -754(fp),r0
2647 bicl2 #-65536,r0
2648 addl2 r0,-768(fp)
2649 bicl3 #-65536,-756(fp),r0
2650 ashl #16,r0,-760(fp)
2651 addl3 -760(fp),-764(fp),r0
2652 bicl3 #0,r0,-764(fp)
2653 cmpl -764(fp),-760(fp)
2654 bgequ noname.234
2655 incl -768(fp)
2656noname.234:
2657 movl -764(fp),r1
2658 movl -768(fp),r2
2659 addl2 r1,r9
2660 bicl2 #0,r9
2661 cmpl r9,r1
2662 bgequ noname.235
2663 incl r2
2664noname.235:
2665 addl2 r2,r8
2666 bicl2 #0,r8
2667 cmpl r8,r2
2668 bgequ noname.236
2669 incl r10
2670noname.236:
2671
2672 bicl3 #-65536,28(r6),r3
2673 movzwl 30(r6),r1
2674 bicl2 #-65536,r1
2675 bicl3 #-65536,8(r7),r2
2676 movzwl 10(r7),r0
2677 bicl2 #-65536,r0
2678 movl r3,r5
2679 movl r1,r4
2680 mull3 r0,r5,-772(fp)
2681 mull2 r2,r5
2682 mull3 r2,r4,-776(fp)
2683 mull2 r0,r4
2684 addl3 -772(fp),-776(fp),r0
2685 bicl3 #0,r0,-772(fp)
2686 cmpl -772(fp),-776(fp)
2687 bgequ noname.237
2688 addl2 #65536,r4
2689noname.237:
2690 movzwl -770(fp),r0
2691 bicl2 #-65536,r0
2692 addl2 r0,r4
2693 bicl3 #-65536,-772(fp),r0
2694 ashl #16,r0,-776(fp)
2695 addl2 -776(fp),r5
2696 bicl2 #0,r5
2697 cmpl r5,-776(fp)
2698 bgequ noname.238
2699 incl r4
2700noname.238:
2701 movl r5,r1
2702 movl r4,r2
2703 addl2 r1,r9
2704 bicl2 #0,r9
2705 cmpl r9,r1
2706 bgequ noname.239
2707 incl r2
2708noname.239:
2709 addl2 r2,r8
2710 bicl2 #0,r8
2711 cmpl r8,r2
2712 bgequ noname.240
2713 incl r10
2714noname.240:
2715
2716 movl r9,36(r11)
2717
2718 clrl r9
2719
2720 bicl3 #-65536,28(r6),r3
2721 movzwl 30(r6),r1
2722 bicl2 #-65536,r1
2723 bicl3 #-65536,12(r7),r2
2724 movzwl 14(r7),r0
2725 bicl2 #-65536,r0
2726 movl r3,r5
2727 movl r1,r4
2728 mull3 r0,r5,-780(fp)
2729 mull2 r2,r5
2730 mull3 r2,r4,-784(fp)
2731 mull2 r0,r4
2732 addl3 -780(fp),-784(fp),r0
2733 bicl3 #0,r0,-780(fp)
2734 cmpl -780(fp),-784(fp)
2735 bgequ noname.241
2736 addl2 #65536,r4
2737noname.241:
2738 movzwl -778(fp),r0
2739 bicl2 #-65536,r0
2740 addl2 r0,r4
2741 bicl3 #-65536,-780(fp),r0
2742 ashl #16,r0,-784(fp)
2743 addl2 -784(fp),r5
2744 bicl2 #0,r5
2745 cmpl r5,-784(fp)
2746 bgequ noname.242
2747 incl r4
2748noname.242:
2749 movl r5,r1
2750 movl r4,r2
2751 addl2 r1,r8
2752 bicl2 #0,r8
2753 cmpl r8,r1
2754 bgequ noname.243
2755 incl r2
2756noname.243:
2757 addl2 r2,r10
2758 bicl2 #0,r10
2759 cmpl r10,r2
2760 bgequ noname.244
2761 incl r9
2762noname.244:
2763
2764 bicl3 #-65536,24(r6),r3
2765 movzwl 26(r6),r1
2766 bicl2 #-65536,r1
2767 bicl3 #-65536,16(r7),r2
2768 movzwl 18(r7),r0
2769 bicl2 #-65536,r0
2770 movl r3,r5
2771 movl r1,r4
2772 mull3 r0,r5,-788(fp)
2773 mull2 r2,r5
2774 mull3 r2,r4,-792(fp)
2775 mull2 r0,r4
2776 addl3 -788(fp),-792(fp),r0
2777 bicl3 #0,r0,-788(fp)
2778 cmpl -788(fp),-792(fp)
2779 bgequ noname.245
2780 addl2 #65536,r4
2781noname.245:
2782 movzwl -786(fp),r0
2783 bicl2 #-65536,r0
2784 addl2 r0,r4
2785 bicl3 #-65536,-788(fp),r0
2786 ashl #16,r0,-792(fp)
2787 addl2 -792(fp),r5
2788 bicl2 #0,r5
2789 cmpl r5,-792(fp)
2790 bgequ noname.246
2791 incl r4
2792noname.246:
2793 movl r5,r1
2794 movl r4,r2
2795 addl2 r1,r8
2796 bicl2 #0,r8
2797 cmpl r8,r1
2798 bgequ noname.247
2799 incl r2
2800noname.247:
2801 addl2 r2,r10
2802 bicl2 #0,r10
2803 cmpl r10,r2
2804 bgequ noname.248
2805 incl r9
2806noname.248:
2807
2808 bicl3 #-65536,20(r6),r3
2809 movzwl 22(r6),r1
2810 bicl2 #-65536,r1
2811 bicl3 #-65536,20(r7),r2
2812 movzwl 22(r7),r0
2813 bicl2 #-65536,r0
2814 movl r3,r5
2815 movl r1,r4
2816 mull3 r0,r5,-796(fp)
2817 mull2 r2,r5
2818 mull3 r2,r4,-800(fp)
2819 mull2 r0,r4
2820 addl3 -796(fp),-800(fp),r0
2821 bicl3 #0,r0,-796(fp)
2822 cmpl -796(fp),-800(fp)
2823 bgequ noname.249
2824 addl2 #65536,r4
2825noname.249:
2826 movzwl -794(fp),r0
2827 bicl2 #-65536,r0
2828 addl2 r0,r4
2829 bicl3 #-65536,-796(fp),r0
2830 ashl #16,r0,-800(fp)
2831 addl2 -800(fp),r5
2832 bicl2 #0,r5
2833 cmpl r5,-800(fp)
2834 bgequ noname.250
2835 incl r4
2836noname.250:
2837 movl r5,r1
2838 movl r4,r2
2839 addl2 r1,r8
2840 bicl2 #0,r8
2841 cmpl r8,r1
2842 bgequ noname.251
2843 incl r2
2844noname.251:
2845 addl2 r2,r10
2846 bicl2 #0,r10
2847 cmpl r10,r2
2848 bgequ noname.252
2849 incl r9
2850noname.252:
2851
2852 bicl3 #-65536,16(r6),r3
2853 movzwl 18(r6),r1
2854 bicl2 #-65536,r1
2855 bicl3 #-65536,24(r7),r2
2856 movzwl 26(r7),r0
2857 bicl2 #-65536,r0
2858 movl r3,r5
2859 movl r1,r4
2860 mull3 r0,r5,-804(fp)
2861 mull2 r2,r5
2862 mull3 r2,r4,-808(fp)
2863 mull2 r0,r4
2864 addl3 -804(fp),-808(fp),r0
2865 bicl3 #0,r0,-804(fp)
2866 cmpl -804(fp),-808(fp)
2867 bgequ noname.253
2868 addl2 #65536,r4
2869noname.253:
2870 movzwl -802(fp),r0
2871 bicl2 #-65536,r0
2872 addl2 r0,r4
2873 bicl3 #-65536,-804(fp),r0
2874 ashl #16,r0,-808(fp)
2875 addl2 -808(fp),r5
2876 bicl2 #0,r5
2877 cmpl r5,-808(fp)
2878 bgequ noname.254
2879 incl r4
2880noname.254:
2881 movl r5,r1
2882 movl r4,r2
2883 addl2 r1,r8
2884 bicl2 #0,r8
2885 cmpl r8,r1
2886 bgequ noname.255
2887 incl r2
2888noname.255:
2889 addl2 r2,r10
2890 bicl2 #0,r10
2891 cmpl r10,r2
2892 bgequ noname.256
2893 incl r9
2894noname.256:
2895
2896 bicl3 #-65536,12(r6),r3
2897 movzwl 14(r6),r1
2898 bicl2 #-65536,r1
2899 bicl3 #-65536,28(r7),r2
2900 movzwl 30(r7),r0
2901 bicl2 #-65536,r0
2902 movl r3,r5
2903 movl r1,r4
2904 mull3 r0,r5,-812(fp)
2905 mull2 r2,r5
2906 mull3 r2,r4,-816(fp)
2907 mull2 r0,r4
2908 addl3 -812(fp),-816(fp),r0
2909 bicl3 #0,r0,-812(fp)
2910 cmpl -812(fp),-816(fp)
2911 bgequ noname.257
2912 addl2 #65536,r4
2913noname.257:
2914 movzwl -810(fp),r0
2915 bicl2 #-65536,r0
2916 addl2 r0,r4
2917 bicl3 #-65536,-812(fp),r0
2918 ashl #16,r0,-816(fp)
2919 addl2 -816(fp),r5
2920 bicl2 #0,r5
2921 cmpl r5,-816(fp)
2922 bgequ noname.258
2923 incl r4
2924noname.258:
2925 movl r5,r1
2926 movl r4,r2
2927 addl2 r1,r8
2928 bicl2 #0,r8
2929 cmpl r8,r1
2930 bgequ noname.259
2931 incl r2
2932noname.259:
2933 addl2 r2,r10
2934 bicl2 #0,r10
2935 cmpl r10,r2
2936 bgequ noname.260
2937 incl r9
2938noname.260:
2939
2940 movl r8,40(r11)
2941
2942 clrl r8
2943
2944 bicl3 #-65536,16(r6),r3
2945 movzwl 18(r6),r2
2946 bicl3 #-65536,28(r7),r1
2947 movzwl 30(r7),r0
2948 bicl2 #-65536,r0
2949 movl r3,r4
2950 bicl3 #-65536,r2,-828(fp)
2951 mull3 r0,r4,-820(fp)
2952 mull2 r1,r4
2953 mull3 r1,-828(fp),-824(fp)
2954 mull2 r0,-828(fp)
2955 addl3 -820(fp),-824(fp),r0
2956 bicl3 #0,r0,-820(fp)
2957 cmpl -820(fp),-824(fp)
2958 bgequ noname.261
2959 addl2 #65536,-828(fp)
2960noname.261:
2961 movzwl -818(fp),r0
2962 bicl2 #-65536,r0
2963 addl2 r0,-828(fp)
2964 bicl3 #-65536,-820(fp),r0
2965 ashl #16,r0,-824(fp)
2966 addl2 -824(fp),r4
2967 bicl2 #0,r4
2968 cmpl r4,-824(fp)
2969 bgequ noname.262
2970 incl -828(fp)
2971noname.262:
2972 movl r4,r1
2973 movl -828(fp),r2
2974 addl2 r1,r10
2975 bicl2 #0,r10
2976 cmpl r10,r1
2977 bgequ noname.263
2978 incl r2
2979noname.263:
2980 addl2 r2,r9
2981 bicl2 #0,r9
2982 cmpl r9,r2
2983 bgequ noname.264
2984 incl r8
2985noname.264:
2986
2987 movzwl 22(r6),r2
2988 bicl3 #-65536,24(r7),r3
2989 movzwl 26(r7),r0
2990 bicl2 #-65536,r0
2991 bicl3 #-65536,20(r6),-840(fp)
2992 bicl3 #-65536,r2,-844(fp)
2993 mull3 r0,-840(fp),-832(fp)
2994 mull2 r3,-840(fp)
2995 mull3 r3,-844(fp),-836(fp)
2996 mull2 r0,-844(fp)
2997 addl3 -832(fp),-836(fp),r0
2998 bicl3 #0,r0,-832(fp)
2999 cmpl -832(fp),-836(fp)
3000 bgequ noname.265
3001 addl2 #65536,-844(fp)
3002noname.265:
3003 movzwl -830(fp),r0
3004 bicl2 #-65536,r0
3005 addl2 r0,-844(fp)
3006 bicl3 #-65536,-832(fp),r0
3007 ashl #16,r0,-836(fp)
3008 addl3 -836(fp),-840(fp),r0
3009 bicl3 #0,r0,-840(fp)
3010 cmpl -840(fp),-836(fp)
3011 bgequ noname.266
3012 incl -844(fp)
3013noname.266:
3014 movl -840(fp),r1
3015 movl -844(fp),r2
3016 addl2 r1,r10
3017 bicl2 #0,r10
3018 cmpl r10,r1
3019 bgequ noname.267
3020 incl r2
3021noname.267:
3022 addl2 r2,r9
3023 bicl2 #0,r9
3024 cmpl r9,r2
3025 bgequ noname.268
3026 incl r8
3027noname.268:
3028
3029 bicl3 #-65536,24(r6),r3
3030 movzwl 26(r6),r1
3031 bicl2 #-65536,r1
3032 bicl3 #-65536,20(r7),r2
3033 movzwl 22(r7),r0
3034 bicl2 #-65536,r0
3035 movl r3,r5
3036 movl r1,r4
3037 mull3 r0,r5,-848(fp)
3038 mull2 r2,r5
3039 mull3 r2,r4,-852(fp)
3040 mull2 r0,r4
3041 addl3 -848(fp),-852(fp),r0
3042 bicl3 #0,r0,-848(fp)
3043 cmpl -848(fp),-852(fp)
3044 bgequ noname.269
3045 addl2 #65536,r4
3046noname.269:
3047 movzwl -846(fp),r0
3048 bicl2 #-65536,r0
3049 addl2 r0,r4
3050 bicl3 #-65536,-848(fp),r0
3051 ashl #16,r0,-852(fp)
3052 addl2 -852(fp),r5
3053 bicl2 #0,r5
3054 cmpl r5,-852(fp)
3055 bgequ noname.270
3056 incl r4
3057noname.270:
3058 movl r5,r1
3059 movl r4,r2
3060 addl2 r1,r10
3061 bicl2 #0,r10
3062 cmpl r10,r1
3063 bgequ noname.271
3064 incl r2
3065noname.271:
3066 addl2 r2,r9
3067 bicl2 #0,r9
3068 cmpl r9,r2
3069 bgequ noname.272
3070 incl r8
3071noname.272:
3072
3073 bicl3 #-65536,28(r6),r3
3074 movzwl 30(r6),r1
3075 bicl2 #-65536,r1
3076 bicl3 #-65536,16(r7),r2
3077 movzwl 18(r7),r0
3078 bicl2 #-65536,r0
3079 movl r3,r5
3080 movl r1,r4
3081 mull3 r0,r5,-856(fp)
3082 mull2 r2,r5
3083 mull3 r2,r4,-860(fp)
3084 mull2 r0,r4
3085 addl3 -856(fp),-860(fp),r0
3086 bicl3 #0,r0,-856(fp)
3087 cmpl -856(fp),-860(fp)
3088 bgequ noname.273
3089 addl2 #65536,r4
3090noname.273:
3091 movzwl -854(fp),r0
3092 bicl2 #-65536,r0
3093 addl2 r0,r4
3094 bicl3 #-65536,-856(fp),r0
3095 ashl #16,r0,-860(fp)
3096 addl2 -860(fp),r5
3097 bicl2 #0,r5
3098 cmpl r5,-860(fp)
3099 bgequ noname.274
3100 incl r4
3101noname.274:
3102 movl r5,r1
3103 movl r4,r2
3104 addl2 r1,r10
3105 bicl2 #0,r10
3106 cmpl r10,r1
3107 bgequ noname.275
3108 incl r2
3109noname.275:
3110 addl2 r2,r9
3111 bicl2 #0,r9
3112 cmpl r9,r2
3113 bgequ noname.276
3114 incl r8
3115noname.276:
3116
3117 movl r10,44(r11)
3118
3119 clrl r10
3120
3121 bicl3 #-65536,28(r6),r3
3122 movzwl 30(r6),r1
3123 bicl2 #-65536,r1
3124 bicl3 #-65536,20(r7),r2
3125 movzwl 22(r7),r0
3126 bicl2 #-65536,r0
3127 movl r3,r5
3128 movl r1,r4
3129 mull3 r0,r5,-864(fp)
3130 mull2 r2,r5
3131 mull3 r2,r4,-868(fp)
3132 mull2 r0,r4
3133 addl3 -864(fp),-868(fp),r0
3134 bicl3 #0,r0,-864(fp)
3135 cmpl -864(fp),-868(fp)
3136 bgequ noname.277
3137 addl2 #65536,r4
3138noname.277:
3139 movzwl -862(fp),r0
3140 bicl2 #-65536,r0
3141 addl2 r0,r4
3142 bicl3 #-65536,-864(fp),r0
3143 ashl #16,r0,-868(fp)
3144 addl2 -868(fp),r5
3145 bicl2 #0,r5
3146 cmpl r5,-868(fp)
3147 bgequ noname.278
3148 incl r4
3149noname.278:
3150 movl r5,r1
3151 movl r4,r2
3152 addl2 r1,r9
3153 bicl2 #0,r9
3154 cmpl r9,r1
3155 bgequ noname.279
3156 incl r2
3157noname.279:
3158 addl2 r2,r8
3159 bicl2 #0,r8
3160 cmpl r8,r2
3161 bgequ noname.280
3162 incl r10
3163noname.280:
3164
3165 bicl3 #-65536,24(r6),r3
3166 movzwl 26(r6),r1
3167 bicl2 #-65536,r1
3168 bicl3 #-65536,24(r7),r2
3169 movzwl 26(r7),r0
3170 bicl2 #-65536,r0
3171 movl r3,r5
3172 movl r1,r4
3173 mull3 r0,r5,-872(fp)
3174 mull2 r2,r5
3175 mull3 r2,r4,-876(fp)
3176 mull2 r0,r4
3177 addl3 -872(fp),-876(fp),r0
3178 bicl3 #0,r0,-872(fp)
3179 cmpl -872(fp),-876(fp)
3180 bgequ noname.281
3181 addl2 #65536,r4
3182noname.281:
3183 movzwl -870(fp),r0
3184 bicl2 #-65536,r0
3185 addl2 r0,r4
3186 bicl3 #-65536,-872(fp),r0
3187 ashl #16,r0,-876(fp)
3188 addl2 -876(fp),r5
3189 bicl2 #0,r5
3190 cmpl r5,-876(fp)
3191 bgequ noname.282
3192 incl r4
3193noname.282:
3194 movl r5,r1
3195 movl r4,r2
3196 addl2 r1,r9
3197 bicl2 #0,r9
3198 cmpl r9,r1
3199 bgequ noname.283
3200 incl r2
3201noname.283:
3202 addl2 r2,r8
3203 bicl2 #0,r8
3204 cmpl r8,r2
3205 bgequ noname.284
3206 incl r10
3207noname.284:
3208
3209 bicl3 #-65536,20(r6),r3
3210 movzwl 22(r6),r1
3211 bicl2 #-65536,r1
3212 bicl3 #-65536,28(r7),r2
3213 movzwl 30(r7),r0
3214 bicl2 #-65536,r0
3215 movl r3,r5
3216 movl r1,r4
3217 mull3 r0,r5,-880(fp)
3218 mull2 r2,r5
3219 mull3 r2,r4,-884(fp)
3220 mull2 r0,r4
3221 addl3 -880(fp),-884(fp),r0
3222 bicl3 #0,r0,-880(fp)
3223 cmpl -880(fp),-884(fp)
3224 bgequ noname.285
3225 addl2 #65536,r4
3226noname.285:
3227 movzwl -878(fp),r0
3228 bicl2 #-65536,r0
3229 addl2 r0,r4
3230 bicl3 #-65536,-880(fp),r0
3231 ashl #16,r0,-884(fp)
3232 addl2 -884(fp),r5
3233 bicl2 #0,r5
3234 cmpl r5,-884(fp)
3235 bgequ noname.286
3236 incl r4
3237noname.286:
3238 movl r5,r1
3239 movl r4,r2
3240 addl2 r1,r9
3241 bicl2 #0,r9
3242 cmpl r9,r1
3243 bgequ noname.287
3244 incl r2
3245noname.287:
3246 addl2 r2,r8
3247 bicl2 #0,r8
3248 cmpl r8,r2
3249 bgequ noname.288
3250 incl r10
3251noname.288:
3252
3253 movl r9,48(r11)
3254
3255 clrl r9
3256
3257 bicl3 #-65536,24(r6),r3
3258 movzwl 26(r6),r1
3259 bicl2 #-65536,r1
3260 bicl3 #-65536,28(r7),r2
3261 movzwl 30(r7),r0
3262 bicl2 #-65536,r0
3263 movl r3,r5
3264 movl r1,r4
3265 mull3 r0,r5,-888(fp)
3266 mull2 r2,r5
3267 mull3 r2,r4,-892(fp)
3268 mull2 r0,r4
3269 addl3 -888(fp),-892(fp),r0
3270 bicl3 #0,r0,-888(fp)
3271 cmpl -888(fp),-892(fp)
3272 bgequ noname.289
3273 addl2 #65536,r4
3274noname.289:
3275 movzwl -886(fp),r0
3276 bicl2 #-65536,r0
3277 addl2 r0,r4
3278 bicl3 #-65536,-888(fp),r0
3279 ashl #16,r0,-892(fp)
3280 addl2 -892(fp),r5
3281 bicl2 #0,r5
3282 cmpl r5,-892(fp)
3283 bgequ noname.290
3284 incl r4
3285noname.290:
3286 movl r5,r1
3287 movl r4,r2
3288 addl2 r1,r8
3289 bicl2 #0,r8
3290 cmpl r8,r1
3291 bgequ noname.291
3292 incl r2
3293noname.291:
3294 addl2 r2,r10
3295 bicl2 #0,r10
3296 cmpl r10,r2
3297 bgequ noname.292
3298 incl r9
3299noname.292:
3300
3301 movzwl 30(r6),r2
3302 bicl3 #-65536,24(r7),r3
3303 movzwl 26(r7),r0
3304 bicl2 #-65536,r0
3305 bicl3 #-65536,28(r6),-904(fp)
3306 bicl3 #-65536,r2,-908(fp)
3307 mull3 r0,-904(fp),-896(fp)
3308 mull2 r3,-904(fp)
3309 mull3 r3,-908(fp),-900(fp)
3310 mull2 r0,-908(fp)
3311 addl3 -896(fp),-900(fp),r0
3312 bicl3 #0,r0,-896(fp)
3313 cmpl -896(fp),-900(fp)
3314 bgequ noname.293
3315 addl2 #65536,-908(fp)
3316noname.293:
3317 movzwl -894(fp),r0
3318 bicl2 #-65536,r0
3319 addl2 r0,-908(fp)
3320 bicl3 #-65536,-896(fp),r0
3321 ashl #16,r0,-900(fp)
3322 addl3 -900(fp),-904(fp),r0
3323 bicl3 #0,r0,-904(fp)
3324 cmpl -904(fp),-900(fp)
3325 bgequ noname.294
3326 incl -908(fp)
3327noname.294:
3328 movl -904(fp),r1
3329 movl -908(fp),r2
3330 addl2 r1,r8
3331 bicl2 #0,r8
3332 cmpl r8,r1
3333 bgequ noname.295
3334 incl r2
3335noname.295:
3336 addl2 r2,r10
3337 bicl2 #0,r10
3338 cmpl r10,r2
3339 bgequ noname.296
3340 incl r9
3341noname.296:
3342
3343 movl r8,52(r11)
3344
3345 clrl r8
3346
3347 movzwl 30(r6),r2
3348 bicl3 #-65536,28(r7),r3
3349 movzwl 30(r7),r0
3350 bicl2 #-65536,r0
3351 bicl3 #-65536,28(r6),-920(fp)
3352 bicl3 #-65536,r2,-924(fp)
3353 mull3 r0,-920(fp),-912(fp)
3354 mull2 r3,-920(fp)
3355 mull3 r3,-924(fp),-916(fp)
3356 mull2 r0,-924(fp)
3357 addl3 -912(fp),-916(fp),r0
3358 bicl3 #0,r0,-912(fp)
3359 cmpl -912(fp),-916(fp)
3360 bgequ noname.297
3361 addl2 #65536,-924(fp)
3362noname.297:
3363 movzwl -910(fp),r0
3364 bicl2 #-65536,r0
3365 addl2 r0,-924(fp)
3366 bicl3 #-65536,-912(fp),r0
3367 ashl #16,r0,-916(fp)
3368 addl3 -916(fp),-920(fp),r0
3369 bicl3 #0,r0,-920(fp)
3370 cmpl -920(fp),-916(fp)
3371 bgequ noname.298
3372 incl -924(fp)
3373noname.298:
3374 movl -920(fp),r1
3375 movl -924(fp),r2
3376 addl2 r1,r10
3377 bicl2 #0,r10
3378 cmpl r10,r1
3379 bgequ noname.299
3380 incl r2
3381noname.299:
3382 addl2 r2,r9
3383 bicl2 #0,r9
3384 cmpl r9,r2
3385 bgequ noname.300
3386 incl r8
3387noname.300:
3388
3389 movl r10,56(r11)
3390
3391 movl r9,60(r11)
3392
3393 ret
3394
3395
3396
3397;r=4 ;(AP)
3398;a=8 ;(AP)
3399;b=12 ;(AP)
3400;n=16 ;(AP) n by value (input)
3401
3402 .psect code,nowrt
3403
3404.entry BN_MUL_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
3405 movab -156(sp),sp
3406
3407 clrq r9
3408
3409 clrl r8
3410
3411 movl 8(ap),r6
3412 bicl3 #-65536,(r6),r3
3413 movzwl 2(r6),r2
3414 bicl2 #-65536,r2
3415 movl 12(ap),r7
3416 bicl3 #-65536,(r7),r1
3417 movzwl 2(r7),r0
3418 bicl2 #-65536,r0
3419 movl r3,r5
3420 movl r2,r4
3421 mull3 r0,r5,-4(fp)
3422 mull2 r1,r5
3423 mull3 r1,r4,-8(fp)
3424 mull2 r0,r4
3425 addl3 -4(fp),-8(fp),r0
3426 bicl3 #0,r0,-4(fp)
3427 cmpl -4(fp),-8(fp)
3428 bgequ noname.303
3429 addl2 #65536,r4
3430noname.303:
3431 movzwl -2(fp),r0
3432 bicl2 #-65536,r0
3433 addl2 r0,r4
3434 bicl3 #-65536,-4(fp),r0
3435 ashl #16,r0,-8(fp)
3436 addl2 -8(fp),r5
3437 bicl2 #0,r5
3438 cmpl r5,-8(fp)
3439 bgequ noname.304
3440 incl r4
3441noname.304:
3442 movl r5,r1
3443 movl r4,r2
3444 addl2 r1,r10
3445 bicl2 #0,r10
3446 cmpl r10,r1
3447 bgequ noname.305
3448 incl r2
3449noname.305:
3450 addl2 r2,r9
3451 bicl2 #0,r9
3452 cmpl r9,r2
3453 bgequ noname.306
3454 incl r8
3455noname.306:
3456
3457 movl 4(ap),r11
3458 movl r10,(r11)
3459
3460 clrl r10
3461
3462 bicl3 #-65536,(r6),r3
3463 movzwl 2(r6),r1
3464 bicl2 #-65536,r1
3465 bicl3 #-65536,4(r7),r2
3466 movzwl 6(r7),r0
3467 bicl2 #-65536,r0
3468 movl r3,r5
3469 movl r1,r4
3470 mull3 r0,r5,-12(fp)
3471 mull2 r2,r5
3472 mull3 r2,r4,-16(fp)
3473 mull2 r0,r4
3474 addl3 -12(fp),-16(fp),r0
3475 bicl3 #0,r0,-12(fp)
3476 cmpl -12(fp),-16(fp)
3477 bgequ noname.307
3478 addl2 #65536,r4
3479noname.307:
3480 movzwl -10(fp),r0
3481 bicl2 #-65536,r0
3482 addl2 r0,r4
3483 bicl3 #-65536,-12(fp),r0
3484 ashl #16,r0,-16(fp)
3485 addl2 -16(fp),r5
3486 bicl2 #0,r5
3487 cmpl r5,-16(fp)
3488 bgequ noname.308
3489 incl r4
3490noname.308:
3491 movl r5,r1
3492 movl r4,r2
3493 addl2 r1,r9
3494 bicl2 #0,r9
3495 cmpl r9,r1
3496 bgequ noname.309
3497 incl r2
3498noname.309:
3499 addl2 r2,r8
3500 bicl2 #0,r8
3501 cmpl r8,r2
3502 bgequ noname.310
3503 incl r10
3504noname.310:
3505
3506 bicl3 #-65536,4(r6),r3
3507 movzwl 6(r6),r1
3508 bicl2 #-65536,r1
3509 bicl3 #-65536,(r7),r2
3510 movzwl 2(r7),r0
3511 bicl2 #-65536,r0
3512 movl r3,r5
3513 movl r1,r4
3514 mull3 r0,r5,-20(fp)
3515 mull2 r2,r5
3516 mull3 r2,r4,-24(fp)
3517 mull2 r0,r4
3518 addl3 -20(fp),-24(fp),r0
3519 bicl3 #0,r0,-20(fp)
3520 cmpl -20(fp),-24(fp)
3521 bgequ noname.311
3522 addl2 #65536,r4
3523noname.311:
3524 movzwl -18(fp),r0
3525 bicl2 #-65536,r0
3526 addl2 r0,r4
3527 bicl3 #-65536,-20(fp),r0
3528 ashl #16,r0,-24(fp)
3529 addl2 -24(fp),r5
3530 bicl2 #0,r5
3531 cmpl r5,-24(fp)
3532 bgequ noname.312
3533 incl r4
3534noname.312:
3535 movl r5,r1
3536 movl r4,r2
3537 addl2 r1,r9
3538 bicl2 #0,r9
3539 cmpl r9,r1
3540 bgequ noname.313
3541 incl r2
3542noname.313:
3543 addl2 r2,r8
3544 bicl2 #0,r8
3545 cmpl r8,r2
3546 bgequ noname.314
3547 incl r10
3548noname.314:
3549
3550 movl r9,4(r11)
3551
3552 clrl r9
3553
3554 bicl3 #-65536,8(r6),r3
3555 movzwl 10(r6),r1
3556 bicl2 #-65536,r1
3557 bicl3 #-65536,(r7),r2
3558 movzwl 2(r7),r0
3559 bicl2 #-65536,r0
3560 movl r3,r5
3561 movl r1,r4
3562 mull3 r0,r5,-28(fp)
3563 mull2 r2,r5
3564 mull3 r2,r4,-32(fp)
3565 mull2 r0,r4
3566 addl3 -28(fp),-32(fp),r0
3567 bicl3 #0,r0,-28(fp)
3568 cmpl -28(fp),-32(fp)
3569 bgequ noname.315
3570 addl2 #65536,r4
3571noname.315:
3572 movzwl -26(fp),r0
3573 bicl2 #-65536,r0
3574 addl2 r0,r4
3575 bicl3 #-65536,-28(fp),r0
3576 ashl #16,r0,-32(fp)
3577 addl2 -32(fp),r5
3578 bicl2 #0,r5
3579 cmpl r5,-32(fp)
3580 bgequ noname.316
3581 incl r4
3582noname.316:
3583 movl r5,r1
3584 movl r4,r2
3585 addl2 r1,r8
3586 bicl2 #0,r8
3587 cmpl r8,r1
3588 bgequ noname.317
3589 incl r2
3590noname.317:
3591 addl2 r2,r10
3592 bicl2 #0,r10
3593 cmpl r10,r2
3594 bgequ noname.318
3595 incl r9
3596noname.318:
3597
3598 bicl3 #-65536,4(r6),r3
3599 movzwl 6(r6),r1
3600 bicl2 #-65536,r1
3601 bicl3 #-65536,4(r7),r2
3602 movzwl 6(r7),r0
3603 bicl2 #-65536,r0
3604 movl r3,r5
3605 movl r1,r4
3606 mull3 r0,r5,-36(fp)
3607 mull2 r2,r5
3608 mull3 r2,r4,-40(fp)
3609 mull2 r0,r4
3610 addl3 -36(fp),-40(fp),r0
3611 bicl3 #0,r0,-36(fp)
3612 cmpl -36(fp),-40(fp)
3613 bgequ noname.319
3614 addl2 #65536,r4
3615noname.319:
3616 movzwl -34(fp),r0
3617 bicl2 #-65536,r0
3618 addl2 r0,r4
3619 bicl3 #-65536,-36(fp),r0
3620 ashl #16,r0,-40(fp)
3621 addl2 -40(fp),r5
3622 bicl2 #0,r5
3623 cmpl r5,-40(fp)
3624 bgequ noname.320
3625 incl r4
3626noname.320:
3627 movl r5,r1
3628 movl r4,r2
3629 addl2 r1,r8
3630 bicl2 #0,r8
3631 cmpl r8,r1
3632 bgequ noname.321
3633 incl r2
3634noname.321:
3635 addl2 r2,r10
3636 bicl2 #0,r10
3637 cmpl r10,r2
3638 bgequ noname.322
3639 incl r9
3640noname.322:
3641
3642 bicl3 #-65536,(r6),r3
3643 movzwl 2(r6),r1
3644 bicl2 #-65536,r1
3645 bicl3 #-65536,8(r7),r2
3646 movzwl 10(r7),r0
3647 bicl2 #-65536,r0
3648 movl r3,r5
3649 movl r1,r4
3650 mull3 r0,r5,-44(fp)
3651 mull2 r2,r5
3652 mull3 r2,r4,-48(fp)
3653 mull2 r0,r4
3654 addl3 -44(fp),-48(fp),r0
3655 bicl3 #0,r0,-44(fp)
3656 cmpl -44(fp),-48(fp)
3657 bgequ noname.323
3658 addl2 #65536,r4
3659noname.323:
3660 movzwl -42(fp),r0
3661 bicl2 #-65536,r0
3662 addl2 r0,r4
3663 bicl3 #-65536,-44(fp),r0
3664 ashl #16,r0,-48(fp)
3665 addl2 -48(fp),r5
3666 bicl2 #0,r5
3667 cmpl r5,-48(fp)
3668 bgequ noname.324
3669 incl r4
3670noname.324:
3671 movl r5,r1
3672 movl r4,r2
3673 addl2 r1,r8
3674 bicl2 #0,r8
3675 cmpl r8,r1
3676 bgequ noname.325
3677 incl r2
3678noname.325:
3679 addl2 r2,r10
3680 bicl2 #0,r10
3681 cmpl r10,r2
3682 bgequ noname.326
3683 incl r9
3684noname.326:
3685
3686 movl r8,8(r11)
3687
3688 clrl r8
3689
3690 bicl3 #-65536,(r6),r3
3691 movzwl 2(r6),r2
3692 bicl3 #-65536,12(r7),r1
3693 movzwl 14(r7),r0
3694 bicl2 #-65536,r0
3695 movl r3,r4
3696 bicl3 #-65536,r2,-60(fp)
3697 mull3 r0,r4,-52(fp)
3698 mull2 r1,r4
3699 mull3 r1,-60(fp),-56(fp)
3700 mull2 r0,-60(fp)
3701 addl3 -52(fp),-56(fp),r0
3702 bicl3 #0,r0,-52(fp)
3703 cmpl -52(fp),-56(fp)
3704 bgequ noname.327
3705 addl2 #65536,-60(fp)
3706noname.327:
3707 movzwl -50(fp),r0
3708 bicl2 #-65536,r0
3709 addl2 r0,-60(fp)
3710 bicl3 #-65536,-52(fp),r0
3711 ashl #16,r0,-56(fp)
3712 addl2 -56(fp),r4
3713 bicl2 #0,r4
3714 cmpl r4,-56(fp)
3715 bgequ noname.328
3716 incl -60(fp)
3717noname.328:
3718 movl r4,r1
3719 movl -60(fp),r2
3720 addl2 r1,r10
3721 bicl2 #0,r10
3722 cmpl r10,r1
3723 bgequ noname.329
3724 incl r2
3725noname.329:
3726 addl2 r2,r9
3727 bicl2 #0,r9
3728 cmpl r9,r2
3729 bgequ noname.330
3730 incl r8
3731noname.330:
3732
3733 movzwl 6(r6),r2
3734 bicl3 #-65536,8(r7),r3
3735 movzwl 10(r7),r0
3736 bicl2 #-65536,r0
3737 bicl3 #-65536,4(r6),-72(fp)
3738 bicl3 #-65536,r2,-76(fp)
3739 mull3 r0,-72(fp),-64(fp)
3740 mull2 r3,-72(fp)
3741 mull3 r3,-76(fp),-68(fp)
3742 mull2 r0,-76(fp)
3743 addl3 -64(fp),-68(fp),r0
3744 bicl3 #0,r0,-64(fp)
3745 cmpl -64(fp),-68(fp)
3746 bgequ noname.331
3747 addl2 #65536,-76(fp)
3748noname.331:
3749 movzwl -62(fp),r0
3750 bicl2 #-65536,r0
3751 addl2 r0,-76(fp)
3752 bicl3 #-65536,-64(fp),r0
3753 ashl #16,r0,-68(fp)
3754 addl3 -68(fp),-72(fp),r0
3755 bicl3 #0,r0,-72(fp)
3756 cmpl -72(fp),-68(fp)
3757 bgequ noname.332
3758 incl -76(fp)
3759noname.332:
3760 movl -72(fp),r1
3761 movl -76(fp),r2
3762 addl2 r1,r10
3763 bicl2 #0,r10
3764 cmpl r10,r1
3765 bgequ noname.333
3766 incl r2
3767noname.333:
3768 addl2 r2,r9
3769 bicl2 #0,r9
3770 cmpl r9,r2
3771 bgequ noname.334
3772 incl r8
3773noname.334:
3774
3775 bicl3 #-65536,8(r6),r3
3776 movzwl 10(r6),r1
3777 bicl2 #-65536,r1
3778 bicl3 #-65536,4(r7),r2
3779 movzwl 6(r7),r0
3780 bicl2 #-65536,r0
3781 movl r3,r5
3782 movl r1,r4
3783 mull3 r0,r5,-80(fp)
3784 mull2 r2,r5
3785 mull3 r2,r4,-84(fp)
3786 mull2 r0,r4
3787 addl3 -80(fp),-84(fp),r0
3788 bicl3 #0,r0,-80(fp)
3789 cmpl -80(fp),-84(fp)
3790 bgequ noname.335
3791 addl2 #65536,r4
3792noname.335:
3793 movzwl -78(fp),r0
3794 bicl2 #-65536,r0
3795 addl2 r0,r4
3796 bicl3 #-65536,-80(fp),r0
3797 ashl #16,r0,-84(fp)
3798 addl2 -84(fp),r5
3799 bicl2 #0,r5
3800 cmpl r5,-84(fp)
3801 bgequ noname.336
3802 incl r4
3803noname.336:
3804 movl r5,r1
3805 movl r4,r2
3806 addl2 r1,r10
3807 bicl2 #0,r10
3808 cmpl r10,r1
3809 bgequ noname.337
3810 incl r2
3811noname.337:
3812 addl2 r2,r9
3813 bicl2 #0,r9
3814 cmpl r9,r2
3815 bgequ noname.338
3816 incl r8
3817noname.338:
3818
3819 bicl3 #-65536,12(r6),r3
3820 movzwl 14(r6),r1
3821 bicl2 #-65536,r1
3822 bicl3 #-65536,(r7),r2
3823 movzwl 2(r7),r0
3824 bicl2 #-65536,r0
3825 movl r3,r5
3826 movl r1,r4
3827 mull3 r0,r5,-88(fp)
3828 mull2 r2,r5
3829 mull3 r2,r4,-92(fp)
3830 mull2 r0,r4
3831 addl3 -88(fp),-92(fp),r0
3832 bicl3 #0,r0,-88(fp)
3833 cmpl -88(fp),-92(fp)
3834 bgequ noname.339
3835 addl2 #65536,r4
3836noname.339:
3837 movzwl -86(fp),r0
3838 bicl2 #-65536,r0
3839 addl2 r0,r4
3840 bicl3 #-65536,-88(fp),r0
3841 ashl #16,r0,-92(fp)
3842 addl2 -92(fp),r5
3843 bicl2 #0,r5
3844 cmpl r5,-92(fp)
3845 bgequ noname.340
3846 incl r4
3847noname.340:
3848 movl r5,r1
3849 movl r4,r2
3850 addl2 r1,r10
3851 bicl2 #0,r10
3852 cmpl r10,r1
3853 bgequ noname.341
3854 incl r2
3855noname.341:
3856 addl2 r2,r9
3857 bicl2 #0,r9
3858 cmpl r9,r2
3859 bgequ noname.342
3860 incl r8
3861noname.342:
3862
3863 movl r10,12(r11)
3864
3865 clrl r10
3866
3867 bicl3 #-65536,12(r6),r3
3868 movzwl 14(r6),r1
3869 bicl2 #-65536,r1
3870 bicl3 #-65536,4(r7),r2
3871 movzwl 6(r7),r0
3872 bicl2 #-65536,r0
3873 movl r3,r5
3874 movl r1,r4
3875 mull3 r0,r5,-96(fp)
3876 mull2 r2,r5
3877 mull3 r2,r4,-100(fp)
3878 mull2 r0,r4
3879 addl3 -96(fp),-100(fp),r0
3880 bicl3 #0,r0,-96(fp)
3881 cmpl -96(fp),-100(fp)
3882 bgequ noname.343
3883 addl2 #65536,r4
3884noname.343:
3885 movzwl -94(fp),r0
3886 bicl2 #-65536,r0
3887 addl2 r0,r4
3888 bicl3 #-65536,-96(fp),r0
3889 ashl #16,r0,-100(fp)
3890 addl2 -100(fp),r5
3891 bicl2 #0,r5
3892 cmpl r5,-100(fp)
3893 bgequ noname.344
3894 incl r4
3895noname.344:
3896 movl r5,r1
3897 movl r4,r2
3898 addl2 r1,r9
3899 bicl2 #0,r9
3900 cmpl r9,r1
3901 bgequ noname.345
3902 incl r2
3903noname.345:
3904 addl2 r2,r8
3905 bicl2 #0,r8
3906 cmpl r8,r2
3907 bgequ noname.346
3908 incl r10
3909noname.346:
3910
3911 bicl3 #-65536,8(r6),r3
3912 movzwl 10(r6),r1
3913 bicl2 #-65536,r1
3914 bicl3 #-65536,8(r7),r2
3915 movzwl 10(r7),r0
3916 bicl2 #-65536,r0
3917 movl r3,r5
3918 movl r1,r4
3919 mull3 r0,r5,-104(fp)
3920 mull2 r2,r5
3921 mull3 r2,r4,-108(fp)
3922 mull2 r0,r4
3923 addl3 -104(fp),-108(fp),r0
3924 bicl3 #0,r0,-104(fp)
3925 cmpl -104(fp),-108(fp)
3926 bgequ noname.347
3927 addl2 #65536,r4
3928noname.347:
3929 movzwl -102(fp),r0
3930 bicl2 #-65536,r0
3931 addl2 r0,r4
3932 bicl3 #-65536,-104(fp),r0
3933 ashl #16,r0,-108(fp)
3934 addl2 -108(fp),r5
3935 bicl2 #0,r5
3936 cmpl r5,-108(fp)
3937 bgequ noname.348
3938 incl r4
3939noname.348:
3940 movl r5,r1
3941 movl r4,r2
3942 addl2 r1,r9
3943 bicl2 #0,r9
3944 cmpl r9,r1
3945 bgequ noname.349
3946 incl r2
3947noname.349:
3948 addl2 r2,r8
3949 bicl2 #0,r8
3950 cmpl r8,r2
3951 bgequ noname.350
3952 incl r10
3953noname.350:
3954
3955 bicl3 #-65536,4(r6),r3
3956 movzwl 6(r6),r1
3957 bicl2 #-65536,r1
3958 bicl3 #-65536,12(r7),r2
3959 movzwl 14(r7),r0
3960 bicl2 #-65536,r0
3961 movl r3,r5
3962 movl r1,r4
3963 mull3 r0,r5,-112(fp)
3964 mull2 r2,r5
3965 mull3 r2,r4,-116(fp)
3966 mull2 r0,r4
3967 addl3 -112(fp),-116(fp),r0
3968 bicl3 #0,r0,-112(fp)
3969 cmpl -112(fp),-116(fp)
3970 bgequ noname.351
3971 addl2 #65536,r4
3972noname.351:
3973 movzwl -110(fp),r0
3974 bicl2 #-65536,r0
3975 addl2 r0,r4
3976 bicl3 #-65536,-112(fp),r0
3977 ashl #16,r0,-116(fp)
3978 addl2 -116(fp),r5
3979 bicl2 #0,r5
3980 cmpl r5,-116(fp)
3981 bgequ noname.352
3982 incl r4
3983noname.352:
3984 movl r5,r1
3985 movl r4,r2
3986 addl2 r1,r9
3987 bicl2 #0,r9
3988 cmpl r9,r1
3989 bgequ noname.353
3990 incl r2
3991noname.353:
3992 addl2 r2,r8
3993 bicl2 #0,r8
3994 cmpl r8,r2
3995 bgequ noname.354
3996 incl r10
3997noname.354:
3998
3999 movl r9,16(r11)
4000
4001 clrl r9
4002
4003 bicl3 #-65536,8(r6),r3
4004 movzwl 10(r6),r1
4005 bicl2 #-65536,r1
4006 bicl3 #-65536,12(r7),r2
4007 movzwl 14(r7),r0
4008 bicl2 #-65536,r0
4009 movl r3,r5
4010 movl r1,r4
4011 mull3 r0,r5,-120(fp)
4012 mull2 r2,r5
4013 mull3 r2,r4,-124(fp)
4014 mull2 r0,r4
4015 addl3 -120(fp),-124(fp),r0
4016 bicl3 #0,r0,-120(fp)
4017 cmpl -120(fp),-124(fp)
4018 bgequ noname.355
4019 addl2 #65536,r4
4020noname.355:
4021 movzwl -118(fp),r0
4022 bicl2 #-65536,r0
4023 addl2 r0,r4
4024 bicl3 #-65536,-120(fp),r0
4025 ashl #16,r0,-124(fp)
4026 addl2 -124(fp),r5
4027 bicl2 #0,r5
4028 cmpl r5,-124(fp)
4029 bgequ noname.356
4030 incl r4
4031noname.356:
4032 movl r5,r1
4033 movl r4,r2
4034 addl2 r1,r8
4035 bicl2 #0,r8
4036 cmpl r8,r1
4037 bgequ noname.357
4038 incl r2
4039noname.357:
4040 addl2 r2,r10
4041 bicl2 #0,r10
4042 cmpl r10,r2
4043 bgequ noname.358
4044 incl r9
4045noname.358:
4046
4047 movzwl 14(r6),r2
4048 bicl3 #-65536,8(r7),r3
4049 movzwl 10(r7),r0
4050 bicl2 #-65536,r0
4051 bicl3 #-65536,12(r6),-136(fp)
4052 bicl3 #-65536,r2,-140(fp)
4053 mull3 r0,-136(fp),-128(fp)
4054 mull2 r3,-136(fp)
4055 mull3 r3,-140(fp),-132(fp)
4056 mull2 r0,-140(fp)
4057 addl3 -128(fp),-132(fp),r0
4058 bicl3 #0,r0,-128(fp)
4059 cmpl -128(fp),-132(fp)
4060 bgequ noname.359
4061 addl2 #65536,-140(fp)
4062noname.359:
4063 movzwl -126(fp),r0
4064 bicl2 #-65536,r0
4065 addl2 r0,-140(fp)
4066 bicl3 #-65536,-128(fp),r0
4067 ashl #16,r0,-132(fp)
4068 addl3 -132(fp),-136(fp),r0
4069 bicl3 #0,r0,-136(fp)
4070 cmpl -136(fp),-132(fp)
4071 bgequ noname.360
4072 incl -140(fp)
4073noname.360:
4074 movl -136(fp),r1
4075 movl -140(fp),r2
4076 addl2 r1,r8
4077 bicl2 #0,r8
4078 cmpl r8,r1
4079 bgequ noname.361
4080 incl r2
4081noname.361:
4082 addl2 r2,r10
4083 bicl2 #0,r10
4084 cmpl r10,r2
4085 bgequ noname.362
4086 incl r9
4087noname.362:
4088
4089 movl r8,20(r11)
4090
4091 clrl r8
4092
4093 movzwl 14(r6),r2
4094 bicl3 #-65536,12(r7),r3
4095 movzwl 14(r7),r0
4096 bicl2 #-65536,r0
4097 bicl3 #-65536,12(r6),-152(fp)
4098 bicl3 #-65536,r2,-156(fp)
4099 mull3 r0,-152(fp),-144(fp)
4100 mull2 r3,-152(fp)
4101 mull3 r3,-156(fp),-148(fp)
4102 mull2 r0,-156(fp)
4103 addl3 -144(fp),-148(fp),r0
4104 bicl3 #0,r0,-144(fp)
4105 cmpl -144(fp),-148(fp)
4106 bgequ noname.363
4107 addl2 #65536,-156(fp)
4108noname.363:
4109 movzwl -142(fp),r0
4110 bicl2 #-65536,r0
4111 addl2 r0,-156(fp)
4112 bicl3 #-65536,-144(fp),r0
4113 ashl #16,r0,-148(fp)
4114 addl3 -148(fp),-152(fp),r0
4115 bicl3 #0,r0,-152(fp)
4116 cmpl -152(fp),-148(fp)
4117 bgequ noname.364
4118 incl -156(fp)
4119noname.364:
4120 movl -152(fp),r1
4121 movl -156(fp),r2
4122 addl2 r1,r10
4123 bicl2 #0,r10
4124 cmpl r10,r1
4125 bgequ noname.365
4126 incl r2
4127noname.365:
4128 addl2 r2,r9
4129 bicl2 #0,r9
4130 cmpl r9,r2
4131 bgequ noname.366
4132 incl r8
4133noname.366:
4134
4135 movl r10,24(r11)
4136
4137 movl r9,28(r11)
4138
4139 ret
4140
4141
4142
4143;r=4 ;(AP)
4144;a=8 ;(AP)
4145;b=12 ;(AP)
4146;n=16 ;(AP) n by value (input)
4147
4148 .psect code,nowrt
4149
4150.entry BN_SQR_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9>
4151 movab -444(sp),sp
4152
4153 clrq r8
4154
4155 clrl r7
4156
4157 movl 8(ap),r4
4158 movl (r4),r3
4159 bicl3 #-65536,r3,-4(fp)
4160 extzv #16,#16,r3,r0
4161 bicl3 #-65536,r0,r3
4162 movl -4(fp),r0
4163 mull3 r0,r3,-8(fp)
4164 mull3 r0,r0,-4(fp)
4165 mull2 r3,r3
4166 bicl3 #32767,-8(fp),r0
4167 extzv #15,#17,r0,r0
4168 addl2 r0,r3
4169 bicl3 #-65536,-8(fp),r0
4170 ashl #17,r0,-8(fp)
4171 addl3 -4(fp),-8(fp),r0
4172 bicl3 #0,r0,-4(fp)
4173 cmpl -4(fp),-8(fp)
4174 bgequ noname.369
4175 incl r3
4176noname.369:
4177 movl -4(fp),r1
4178 movl r3,r2
4179 addl2 r1,r9
4180 bicl2 #0,r9
4181 cmpl r9,r1
4182 bgequ noname.370
4183 incl r2
4184noname.370:
4185 addl2 r2,r8
4186 bicl2 #0,r8
4187 cmpl r8,r2
4188 bgequ noname.371
4189 incl r7
4190noname.371:
4191
4192 movl r9,@4(ap)
4193
4194 clrl r9
4195
4196 movzwl 6(r4),r2
4197 bicl3 #-65536,(r4),r3
4198 movzwl 2(r4),r0
4199 bicl2 #-65536,r0
4200 bicl3 #-65536,4(r4),-20(fp)
4201 bicl3 #-65536,r2,-24(fp)
4202 mull3 r0,-20(fp),-12(fp)
4203 mull2 r3,-20(fp)
4204 mull3 r3,-24(fp),-16(fp)
4205 mull2 r0,-24(fp)
4206 addl3 -12(fp),-16(fp),r0
4207 bicl3 #0,r0,-12(fp)
4208 cmpl -12(fp),-16(fp)
4209 bgequ noname.372
4210 addl2 #65536,-24(fp)
4211noname.372:
4212 movzwl -10(fp),r0
4213 bicl2 #-65536,r0
4214 addl2 r0,-24(fp)
4215 bicl3 #-65536,-12(fp),r0
4216 ashl #16,r0,-16(fp)
4217 addl3 -16(fp),-20(fp),r0
4218 bicl3 #0,r0,-20(fp)
4219 cmpl -20(fp),-16(fp)
4220 bgequ noname.373
4221 incl -24(fp)
4222noname.373:
4223 movl -20(fp),r3
4224 movl -24(fp),r2
4225 bbc #31,r2,noname.374
4226 incl r9
4227noname.374:
4228 addl2 r2,r2
4229 bicl2 #0,r2
4230 bbc #31,r3,noname.375
4231 incl r2
4232noname.375:
4233 addl2 r3,r3
4234 bicl2 #0,r3
4235 addl2 r3,r8
4236 bicl2 #0,r8
4237 cmpl r8,r3
4238 bgequ noname.376
4239 incl r2
4240 bicl3 #0,r2,r0
4241 bneq noname.376
4242 incl r9
4243noname.376:
4244 addl2 r2,r7
4245 bicl2 #0,r7
4246 cmpl r7,r2
4247 bgequ noname.377
4248 incl r9
4249noname.377:
4250
4251 movl 4(ap),r0
4252 movl r8,4(r0)
4253
4254 clrl r8
4255
4256 movl 8(ap),r4
4257 movl 4(r4),r3
4258 bicl3 #-65536,r3,-28(fp)
4259 extzv #16,#16,r3,r0
4260 bicl3 #-65536,r0,r3
4261 movl -28(fp),r0
4262 mull3 r0,r3,-32(fp)
4263 mull3 r0,r0,-28(fp)
4264 mull2 r3,r3
4265 bicl3 #32767,-32(fp),r0
4266 extzv #15,#17,r0,r0
4267 addl2 r0,r3
4268 bicl3 #-65536,-32(fp),r0
4269 ashl #17,r0,-32(fp)
4270 addl3 -28(fp),-32(fp),r0
4271 bicl3 #0,r0,-28(fp)
4272 cmpl -28(fp),-32(fp)
4273 bgequ noname.378
4274 incl r3
4275noname.378:
4276 movl -28(fp),r1
4277 movl r3,r2
4278 addl2 r1,r7
4279 bicl2 #0,r7
4280 cmpl r7,r1
4281 bgequ noname.379
4282 incl r2
4283noname.379:
4284 addl2 r2,r9
4285 bicl2 #0,r9
4286 cmpl r9,r2
4287 bgequ noname.380
4288 incl r8
4289noname.380:
4290
4291 movzwl 10(r4),r2
4292 bicl3 #-65536,(r4),r3
4293 movzwl 2(r4),r0
4294 bicl2 #-65536,r0
4295 bicl3 #-65536,8(r4),-44(fp)
4296 bicl3 #-65536,r2,-48(fp)
4297 mull3 r0,-44(fp),-36(fp)
4298 mull2 r3,-44(fp)
4299 mull3 r3,-48(fp),-40(fp)
4300 mull2 r0,-48(fp)
4301 addl3 -36(fp),-40(fp),r0
4302 bicl3 #0,r0,-36(fp)
4303 cmpl -36(fp),-40(fp)
4304 bgequ noname.381
4305 addl2 #65536,-48(fp)
4306noname.381:
4307 movzwl -34(fp),r0
4308 bicl2 #-65536,r0
4309 addl2 r0,-48(fp)
4310 bicl3 #-65536,-36(fp),r0
4311 ashl #16,r0,-40(fp)
4312 addl3 -40(fp),-44(fp),r0
4313 bicl3 #0,r0,-44(fp)
4314 cmpl -44(fp),-40(fp)
4315 bgequ noname.382
4316 incl -48(fp)
4317noname.382:
4318 movl -44(fp),r3
4319 movl -48(fp),r2
4320 bbc #31,r2,noname.383
4321 incl r8
4322noname.383:
4323 addl2 r2,r2
4324 bicl2 #0,r2
4325 bbc #31,r3,noname.384
4326 incl r2
4327noname.384:
4328 addl2 r3,r3
4329 bicl2 #0,r3
4330 addl2 r3,r7
4331 bicl2 #0,r7
4332 cmpl r7,r3
4333 bgequ noname.385
4334 incl r2
4335 bicl3 #0,r2,r0
4336 bneq noname.385
4337 incl r8
4338noname.385:
4339 addl2 r2,r9
4340 bicl2 #0,r9
4341 cmpl r9,r2
4342 bgequ noname.386
4343 incl r8
4344noname.386:
4345
4346 movl 4(ap),r0
4347 movl r7,8(r0)
4348
4349 clrl r7
4350
4351 movl 8(ap),r0
4352 movzwl 14(r0),r2
4353 bicl3 #-65536,(r0),r3
4354 movzwl 2(r0),r1
4355 bicl2 #-65536,r1
4356 bicl3 #-65536,12(r0),-60(fp)
4357 bicl3 #-65536,r2,-64(fp)
4358 mull3 r1,-60(fp),-52(fp)
4359 mull2 r3,-60(fp)
4360 mull3 r3,-64(fp),-56(fp)
4361 mull2 r1,-64(fp)
4362 addl3 -52(fp),-56(fp),r0
4363 bicl3 #0,r0,-52(fp)
4364 cmpl -52(fp),-56(fp)
4365 bgequ noname.387
4366 addl2 #65536,-64(fp)
4367noname.387:
4368 movzwl -50(fp),r0
4369 bicl2 #-65536,r0
4370 addl2 r0,-64(fp)
4371 bicl3 #-65536,-52(fp),r0
4372 ashl #16,r0,-56(fp)
4373 addl3 -56(fp),-60(fp),r0
4374 bicl3 #0,r0,-60(fp)
4375 cmpl -60(fp),-56(fp)
4376 bgequ noname.388
4377 incl -64(fp)
4378noname.388:
4379 movl -60(fp),r3
4380 movl -64(fp),r2
4381 bbc #31,r2,noname.389
4382 incl r7
4383noname.389:
4384 addl2 r2,r2
4385 bicl2 #0,r2
4386 bbc #31,r3,noname.390
4387 incl r2
4388noname.390:
4389 addl2 r3,r3
4390 bicl2 #0,r3
4391 addl2 r3,r9
4392 bicl2 #0,r9
4393 cmpl r9,r3
4394 bgequ noname.391
4395 incl r2
4396 bicl3 #0,r2,r0
4397 bneq noname.391
4398 incl r7
4399noname.391:
4400 addl2 r2,r8
4401 bicl2 #0,r8
4402 cmpl r8,r2
4403 bgequ noname.392
4404 incl r7
4405noname.392:
4406
4407 movl 8(ap),r0
4408 movzwl 10(r0),r2
4409 bicl3 #-65536,4(r0),r3
4410 movzwl 6(r0),r1
4411 bicl2 #-65536,r1
4412 bicl3 #-65536,8(r0),-76(fp)
4413 bicl3 #-65536,r2,-80(fp)
4414 mull3 r1,-76(fp),-68(fp)
4415 mull2 r3,-76(fp)
4416 mull3 r3,-80(fp),-72(fp)
4417 mull2 r1,-80(fp)
4418 addl3 -68(fp),-72(fp),r0
4419 bicl3 #0,r0,-68(fp)
4420 cmpl -68(fp),-72(fp)
4421 bgequ noname.393
4422 addl2 #65536,-80(fp)
4423noname.393:
4424 movzwl -66(fp),r0
4425 bicl2 #-65536,r0
4426 addl2 r0,-80(fp)
4427 bicl3 #-65536,-68(fp),r0
4428 ashl #16,r0,-72(fp)
4429 addl3 -72(fp),-76(fp),r0
4430 bicl3 #0,r0,-76(fp)
4431 cmpl -76(fp),-72(fp)
4432 bgequ noname.394
4433 incl -80(fp)
4434noname.394:
4435 movl -76(fp),r3
4436 movl -80(fp),r2
4437 bbc #31,r2,noname.395
4438 incl r7
4439noname.395:
4440 addl2 r2,r2
4441 bicl2 #0,r2
4442 bbc #31,r3,noname.396
4443 incl r2
4444noname.396:
4445 addl2 r3,r3
4446 bicl2 #0,r3
4447 addl2 r3,r9
4448 bicl2 #0,r9
4449 cmpl r9,r3
4450 bgequ noname.397
4451 incl r2
4452 bicl3 #0,r2,r0
4453 bneq noname.397
4454 incl r7
4455noname.397:
4456 addl2 r2,r8
4457 bicl2 #0,r8
4458 cmpl r8,r2
4459 bgequ noname.398
4460 incl r7
4461noname.398:
4462
4463 movl 4(ap),r0
4464 movl r9,12(r0)
4465
4466 clrl r9
4467
4468 movl 8(ap),r2
4469 movl 8(r2),r4
4470 bicl3 #-65536,r4,-84(fp)
4471 extzv #16,#16,r4,r0
4472 bicl3 #-65536,r0,r4
4473 movl -84(fp),r0
4474 mull3 r0,r4,-88(fp)
4475 mull3 r0,r0,-84(fp)
4476 mull2 r4,r4
4477 bicl3 #32767,-88(fp),r0
4478 extzv #15,#17,r0,r0
4479 addl2 r0,r4
4480 bicl3 #-65536,-88(fp),r0
4481 ashl #17,r0,-88(fp)
4482 addl3 -84(fp),-88(fp),r0
4483 bicl3 #0,r0,-84(fp)
4484 cmpl -84(fp),-88(fp)
4485 bgequ noname.399
4486 incl r4
4487noname.399:
4488 movl -84(fp),r1
4489 movl r4,r3
4490 addl2 r1,r8
4491 bicl2 #0,r8
4492 cmpl r8,r1
4493 bgequ noname.400
4494 incl r3
4495noname.400:
4496 addl2 r3,r7
4497 bicl2 #0,r7
4498 cmpl r7,r3
4499 bgequ noname.401
4500 incl r9
4501noname.401:
4502
4503 movzwl 14(r2),r3
4504 bicl3 #-65536,4(r2),r1
4505 movzwl 6(r2),r0
4506 bicl2 #-65536,r0
4507 bicl3 #-65536,12(r2),-100(fp)
4508 bicl3 #-65536,r3,-104(fp)
4509 mull3 r0,-100(fp),-92(fp)
4510 mull2 r1,-100(fp)
4511 mull3 r1,-104(fp),-96(fp)
4512 mull2 r0,-104(fp)
4513 addl3 -92(fp),-96(fp),r0
4514 bicl3 #0,r0,-92(fp)
4515 cmpl -92(fp),-96(fp)
4516 bgequ noname.402
4517 addl2 #65536,-104(fp)
4518noname.402:
4519 movzwl -90(fp),r0
4520 bicl2 #-65536,r0
4521 addl2 r0,-104(fp)
4522 bicl3 #-65536,-92(fp),r0
4523 ashl #16,r0,-96(fp)
4524 addl3 -96(fp),-100(fp),r0
4525 bicl3 #0,r0,-100(fp)
4526 cmpl -100(fp),-96(fp)
4527 bgequ noname.403
4528 incl -104(fp)
4529noname.403:
4530 movl -100(fp),r3
4531 movl -104(fp),r2
4532 bbc #31,r2,noname.404
4533 incl r9
4534noname.404:
4535 addl2 r2,r2
4536 bicl2 #0,r2
4537 bbc #31,r3,noname.405
4538 incl r2
4539noname.405:
4540 addl2 r3,r3
4541 bicl2 #0,r3
4542 addl2 r3,r8
4543 bicl2 #0,r8
4544 cmpl r8,r3
4545 bgequ noname.406
4546 incl r2
4547 bicl3 #0,r2,r0
4548 bneq noname.406
4549 incl r9
4550noname.406:
4551 addl2 r2,r7
4552 bicl2 #0,r7
4553 cmpl r7,r2
4554 bgequ noname.407
4555 incl r9
4556noname.407:
4557
4558 movl 8(ap),r0
4559 movzwl 18(r0),r2
4560 bicl3 #-65536,(r0),r3
4561 movzwl 2(r0),r1
4562 bicl2 #-65536,r1
4563 bicl3 #-65536,16(r0),-116(fp)
4564 bicl3 #-65536,r2,-120(fp)
4565 mull3 r1,-116(fp),-108(fp)
4566 mull2 r3,-116(fp)
4567 mull3 r3,-120(fp),-112(fp)
4568 mull2 r1,-120(fp)
4569 addl3 -108(fp),-112(fp),r0
4570 bicl3 #0,r0,-108(fp)
4571 cmpl -108(fp),-112(fp)
4572 bgequ noname.408
4573 addl2 #65536,-120(fp)
4574noname.408:
4575 movzwl -106(fp),r0
4576 bicl2 #-65536,r0
4577 addl2 r0,-120(fp)
4578 bicl3 #-65536,-108(fp),r0
4579 ashl #16,r0,-112(fp)
4580 addl3 -112(fp),-116(fp),r0
4581 bicl3 #0,r0,-116(fp)
4582 cmpl -116(fp),-112(fp)
4583 bgequ noname.409
4584 incl -120(fp)
4585noname.409:
4586 movl -116(fp),r3
4587 movl -120(fp),r2
4588 bbc #31,r2,noname.410
4589 incl r9
4590noname.410:
4591 addl2 r2,r2
4592 bicl2 #0,r2
4593 bbc #31,r3,noname.411
4594 incl r2
4595noname.411:
4596 addl2 r3,r3
4597 bicl2 #0,r3
4598 addl2 r3,r8
4599 bicl2 #0,r8
4600 cmpl r8,r3
4601 bgequ noname.412
4602 incl r2
4603 bicl3 #0,r2,r0
4604 bneq noname.412
4605 incl r9
4606noname.412:
4607 addl2 r2,r7
4608 bicl2 #0,r7
4609 cmpl r7,r2
4610 bgequ noname.413
4611 incl r9
4612noname.413:
4613
4614 movl 4(ap),r0
4615 movl r8,16(r0)
4616
4617 clrl r8
4618
4619 movl 8(ap),r0
4620 movzwl 22(r0),r2
4621 bicl3 #-65536,(r0),r3
4622 movzwl 2(r0),r1
4623 bicl2 #-65536,r1
4624 bicl3 #-65536,20(r0),-132(fp)
4625 bicl3 #-65536,r2,-136(fp)
4626 mull3 r1,-132(fp),-124(fp)
4627 mull2 r3,-132(fp)
4628 mull3 r3,-136(fp),-128(fp)
4629 mull2 r1,-136(fp)
4630 addl3 -124(fp),-128(fp),r0
4631 bicl3 #0,r0,-124(fp)
4632 cmpl -124(fp),-128(fp)
4633 bgequ noname.414
4634 addl2 #65536,-136(fp)
4635noname.414:
4636 movzwl -122(fp),r0
4637 bicl2 #-65536,r0
4638 addl2 r0,-136(fp)
4639 bicl3 #-65536,-124(fp),r0
4640 ashl #16,r0,-128(fp)
4641 addl3 -128(fp),-132(fp),r0
4642 bicl3 #0,r0,-132(fp)
4643 cmpl -132(fp),-128(fp)
4644 bgequ noname.415
4645 incl -136(fp)
4646noname.415:
4647 movl -132(fp),r3
4648 movl -136(fp),r2
4649 bbc #31,r2,noname.416
4650 incl r8
4651noname.416:
4652 addl2 r2,r2
4653 bicl2 #0,r2
4654 bbc #31,r3,noname.417
4655 incl r2
4656noname.417:
4657 addl2 r3,r3
4658 bicl2 #0,r3
4659 addl2 r3,r7
4660 bicl2 #0,r7
4661 cmpl r7,r3
4662 bgequ noname.418
4663 incl r2
4664 bicl3 #0,r2,r0
4665 bneq noname.418
4666 incl r8
4667noname.418:
4668 addl2 r2,r9
4669 bicl2 #0,r9
4670 cmpl r9,r2
4671 bgequ noname.419
4672 incl r8
4673noname.419:
4674
4675 movl 8(ap),r0
4676 movzwl 18(r0),r2
4677 bicl3 #-65536,4(r0),r3
4678 movzwl 6(r0),r1
4679 bicl2 #-65536,r1
4680 bicl3 #-65536,16(r0),-148(fp)
4681 bicl3 #-65536,r2,-152(fp)
4682 mull3 r1,-148(fp),-140(fp)
4683 mull2 r3,-148(fp)
4684 mull3 r3,-152(fp),-144(fp)
4685 mull2 r1,-152(fp)
4686 addl3 -140(fp),-144(fp),r0
4687 bicl3 #0,r0,-140(fp)
4688 cmpl -140(fp),-144(fp)
4689 bgequ noname.420
4690 addl2 #65536,-152(fp)
4691noname.420:
4692 movzwl -138(fp),r0
4693 bicl2 #-65536,r0
4694 addl2 r0,-152(fp)
4695 bicl3 #-65536,-140(fp),r0
4696 ashl #16,r0,-144(fp)
4697 addl3 -144(fp),-148(fp),r0
4698 bicl3 #0,r0,-148(fp)
4699 cmpl -148(fp),-144(fp)
4700 bgequ noname.421
4701 incl -152(fp)
4702noname.421:
4703 movl -148(fp),r3
4704 movl -152(fp),r2
4705 bbc #31,r2,noname.422
4706 incl r8
4707noname.422:
4708 addl2 r2,r2
4709 bicl2 #0,r2
4710 bbc #31,r3,noname.423
4711 incl r2
4712noname.423:
4713 addl2 r3,r3
4714 bicl2 #0,r3
4715 addl2 r3,r7
4716 bicl2 #0,r7
4717 cmpl r7,r3
4718 bgequ noname.424
4719 incl r2
4720 bicl3 #0,r2,r0
4721 bneq noname.424
4722 incl r8
4723noname.424:
4724 addl2 r2,r9
4725 bicl2 #0,r9
4726 cmpl r9,r2
4727 bgequ noname.425
4728 incl r8
4729noname.425:
4730
4731 movl 8(ap),r0
4732 movzwl 14(r0),r2
4733 bicl3 #-65536,8(r0),r3
4734 movzwl 10(r0),r1
4735 bicl2 #-65536,r1
4736 bicl3 #-65536,12(r0),-164(fp)
4737 bicl3 #-65536,r2,-168(fp)
4738 mull3 r1,-164(fp),-156(fp)
4739 mull2 r3,-164(fp)
4740 mull3 r3,-168(fp),-160(fp)
4741 mull2 r1,-168(fp)
4742 addl3 -156(fp),-160(fp),r0
4743 bicl3 #0,r0,-156(fp)
4744 cmpl -156(fp),-160(fp)
4745 bgequ noname.426
4746 addl2 #65536,-168(fp)
4747noname.426:
4748 movzwl -154(fp),r0
4749 bicl2 #-65536,r0
4750 addl2 r0,-168(fp)
4751 bicl3 #-65536,-156(fp),r0
4752 ashl #16,r0,-160(fp)
4753 addl3 -160(fp),-164(fp),r0
4754 bicl3 #0,r0,-164(fp)
4755 cmpl -164(fp),-160(fp)
4756 bgequ noname.427
4757 incl -168(fp)
4758noname.427:
4759 movl -164(fp),r3
4760 movl -168(fp),r2
4761 bbc #31,r2,noname.428
4762 incl r8
4763noname.428:
4764 addl2 r2,r2
4765 bicl2 #0,r2
4766 bbc #31,r3,noname.429
4767 incl r2
4768noname.429:
4769 addl2 r3,r3
4770 bicl2 #0,r3
4771 addl2 r3,r7
4772 bicl2 #0,r7
4773 cmpl r7,r3
4774 bgequ noname.430
4775 incl r2
4776 bicl3 #0,r2,r0
4777 bneq noname.430
4778 incl r8
4779noname.430:
4780 addl2 r2,r9
4781 bicl2 #0,r9
4782 cmpl r9,r2
4783 bgequ noname.431
4784 incl r8
4785noname.431:
4786
4787 movl 4(ap),r0
4788 movl r7,20(r0)
4789
4790 clrl r7
4791
4792 movl 8(ap),r2
4793 movl 12(r2),r4
4794 bicl3 #-65536,r4,-172(fp)
4795 extzv #16,#16,r4,r0
4796 bicl3 #-65536,r0,r4
4797 movl -172(fp),r0
4798 mull3 r0,r4,-176(fp)
4799 mull3 r0,r0,-172(fp)
4800 mull2 r4,r4
4801 bicl3 #32767,-176(fp),r0
4802 extzv #15,#17,r0,r0
4803 addl2 r0,r4
4804 bicl3 #-65536,-176(fp),r0
4805 ashl #17,r0,-176(fp)
4806 addl3 -172(fp),-176(fp),r0
4807 bicl3 #0,r0,-172(fp)
4808 cmpl -172(fp),-176(fp)
4809 bgequ noname.432
4810 incl r4
4811noname.432:
4812 movl -172(fp),r1
4813 movl r4,r3
4814 addl2 r1,r9
4815 bicl2 #0,r9
4816 cmpl r9,r1
4817 bgequ noname.433
4818 incl r3
4819noname.433:
4820 addl2 r3,r8
4821 bicl2 #0,r8
4822 cmpl r8,r3
4823 bgequ noname.434
4824 incl r7
4825noname.434:
4826
4827 movzwl 18(r2),r3
4828 bicl3 #-65536,8(r2),r1
4829 movzwl 10(r2),r0
4830 bicl2 #-65536,r0
4831 bicl3 #-65536,16(r2),-188(fp)
4832 bicl3 #-65536,r3,-192(fp)
4833 mull3 r0,-188(fp),-180(fp)
4834 mull2 r1,-188(fp)
4835 mull3 r1,-192(fp),-184(fp)
4836 mull2 r0,-192(fp)
4837 addl3 -180(fp),-184(fp),r0
4838 bicl3 #0,r0,-180(fp)
4839 cmpl -180(fp),-184(fp)
4840 bgequ noname.435
4841 addl2 #65536,-192(fp)
4842noname.435:
4843 movzwl -178(fp),r0
4844 bicl2 #-65536,r0
4845 addl2 r0,-192(fp)
4846 bicl3 #-65536,-180(fp),r0
4847 ashl #16,r0,-184(fp)
4848 addl3 -184(fp),-188(fp),r0
4849 bicl3 #0,r0,-188(fp)
4850 cmpl -188(fp),-184(fp)
4851 bgequ noname.436
4852 incl -192(fp)
4853noname.436:
4854 movl -188(fp),r3
4855 movl -192(fp),r2
4856 bbc #31,r2,noname.437
4857 incl r7
4858noname.437:
4859 addl2 r2,r2
4860 bicl2 #0,r2
4861 bbc #31,r3,noname.438
4862 incl r2
4863noname.438:
4864 addl2 r3,r3
4865 bicl2 #0,r3
4866 addl2 r3,r9
4867 bicl2 #0,r9
4868 cmpl r9,r3
4869 bgequ noname.439
4870 incl r2
4871 bicl3 #0,r2,r0
4872 bneq noname.439
4873 incl r7
4874noname.439:
4875 addl2 r2,r8
4876 bicl2 #0,r8
4877 cmpl r8,r2
4878 bgequ noname.440
4879 incl r7
4880noname.440:
4881
4882 movl 8(ap),r0
4883 movzwl 22(r0),r2
4884 bicl3 #-65536,4(r0),r3
4885 movzwl 6(r0),r1
4886 bicl2 #-65536,r1
4887 bicl3 #-65536,20(r0),-204(fp)
4888 bicl3 #-65536,r2,-208(fp)
4889 mull3 r1,-204(fp),-196(fp)
4890 mull2 r3,-204(fp)
4891 mull3 r3,-208(fp),-200(fp)
4892 mull2 r1,-208(fp)
4893 addl3 -196(fp),-200(fp),r0
4894 bicl3 #0,r0,-196(fp)
4895 cmpl -196(fp),-200(fp)
4896 bgequ noname.441
4897 addl2 #65536,-208(fp)
4898noname.441:
4899 movzwl -194(fp),r0
4900 bicl2 #-65536,r0
4901 addl2 r0,-208(fp)
4902 bicl3 #-65536,-196(fp),r0
4903 ashl #16,r0,-200(fp)
4904 addl3 -200(fp),-204(fp),r0
4905 bicl3 #0,r0,-204(fp)
4906 cmpl -204(fp),-200(fp)
4907 bgequ noname.442
4908 incl -208(fp)
4909noname.442:
4910 movl -204(fp),r3
4911 movl -208(fp),r2
4912 bbc #31,r2,noname.443
4913 incl r7
4914noname.443:
4915 addl2 r2,r2
4916 bicl2 #0,r2
4917 bbc #31,r3,noname.444
4918 incl r2
4919noname.444:
4920 addl2 r3,r3
4921 bicl2 #0,r3
4922 addl2 r3,r9
4923 bicl2 #0,r9
4924 cmpl r9,r3
4925 bgequ noname.445
4926 incl r2
4927 bicl3 #0,r2,r0
4928 bneq noname.445
4929 incl r7
4930noname.445:
4931 addl2 r2,r8
4932 bicl2 #0,r8
4933 cmpl r8,r2
4934 bgequ noname.446
4935 incl r7
4936noname.446:
4937
4938 movl 8(ap),r0
4939 movzwl 26(r0),r2
4940 bicl3 #-65536,(r0),r3
4941 movzwl 2(r0),r1
4942 bicl2 #-65536,r1
4943 bicl3 #-65536,24(r0),-220(fp)
4944 bicl3 #-65536,r2,-224(fp)
4945 mull3 r1,-220(fp),-212(fp)
4946 mull2 r3,-220(fp)
4947 mull3 r3,-224(fp),-216(fp)
4948 mull2 r1,-224(fp)
4949 addl3 -212(fp),-216(fp),r0
4950 bicl3 #0,r0,-212(fp)
4951 cmpl -212(fp),-216(fp)
4952 bgequ noname.447
4953 addl2 #65536,-224(fp)
4954noname.447:
4955 movzwl -210(fp),r0
4956 bicl2 #-65536,r0
4957 addl2 r0,-224(fp)
4958 bicl3 #-65536,-212(fp),r0
4959 ashl #16,r0,-216(fp)
4960 addl3 -216(fp),-220(fp),r0
4961 bicl3 #0,r0,-220(fp)
4962 cmpl -220(fp),-216(fp)
4963 bgequ noname.448
4964 incl -224(fp)
4965noname.448:
4966 movl -220(fp),r3
4967 movl -224(fp),r2
4968 bbc #31,r2,noname.449
4969 incl r7
4970noname.449:
4971 addl2 r2,r2
4972 bicl2 #0,r2
4973 bbc #31,r3,noname.450
4974 incl r2
4975noname.450:
4976 addl2 r3,r3
4977 bicl2 #0,r3
4978 addl2 r3,r9
4979 bicl2 #0,r9
4980 cmpl r9,r3
4981 bgequ noname.451
4982 incl r2
4983 bicl3 #0,r2,r0
4984 bneq noname.451
4985 incl r7
4986noname.451:
4987 addl2 r2,r8
4988 bicl2 #0,r8
4989 cmpl r8,r2
4990 bgequ noname.452
4991 incl r7
4992noname.452:
4993
4994 movl 4(ap),r0
4995 movl r9,24(r0)
4996
4997 clrl r9
4998
4999 movl 8(ap),r0
5000 movzwl 30(r0),r2
5001 bicl3 #-65536,(r0),r3
5002 movzwl 2(r0),r1
5003 bicl2 #-65536,r1
5004 bicl3 #-65536,28(r0),-236(fp)
5005 bicl3 #-65536,r2,-240(fp)
5006 mull3 r1,-236(fp),-228(fp)
5007 mull2 r3,-236(fp)
5008 mull3 r3,-240(fp),-232(fp)
5009 mull2 r1,-240(fp)
5010 addl3 -228(fp),-232(fp),r0
5011 bicl3 #0,r0,-228(fp)
5012 cmpl -228(fp),-232(fp)
5013 bgequ noname.453
5014 addl2 #65536,-240(fp)
5015noname.453:
5016 movzwl -226(fp),r0
5017 bicl2 #-65536,r0
5018 addl2 r0,-240(fp)
5019 bicl3 #-65536,-228(fp),r0
5020 ashl #16,r0,-232(fp)
5021 addl3 -232(fp),-236(fp),r0
5022 bicl3 #0,r0,-236(fp)
5023 cmpl -236(fp),-232(fp)
5024 bgequ noname.454
5025 incl -240(fp)
5026noname.454:
5027 movl -236(fp),r3
5028 movl -240(fp),r2
5029 bbc #31,r2,noname.455
5030 incl r9
5031noname.455:
5032 addl2 r2,r2
5033 bicl2 #0,r2
5034 bbc #31,r3,noname.456
5035 incl r2
5036noname.456:
5037 addl2 r3,r3
5038 bicl2 #0,r3
5039 addl2 r3,r8
5040 bicl2 #0,r8
5041 cmpl r8,r3
5042 bgequ noname.457
5043 incl r2
5044 bicl3 #0,r2,r0
5045 bneq noname.457
5046 incl r9
5047noname.457:
5048 addl2 r2,r7
5049 bicl2 #0,r7
5050 cmpl r7,r2
5051 bgequ noname.458
5052 incl r9
5053noname.458:
5054
5055 movl 8(ap),r0
5056 movzwl 26(r0),r2
5057 bicl3 #-65536,4(r0),r3
5058 movzwl 6(r0),r1
5059 bicl2 #-65536,r1
5060 bicl3 #-65536,24(r0),-252(fp)
5061 bicl3 #-65536,r2,-256(fp)
5062 mull3 r1,-252(fp),-244(fp)
5063 mull2 r3,-252(fp)
5064 mull3 r3,-256(fp),-248(fp)
5065 mull2 r1,-256(fp)
5066 addl3 -244(fp),-248(fp),r0
5067 bicl3 #0,r0,-244(fp)
5068 cmpl -244(fp),-248(fp)
5069 bgequ noname.459
5070 addl2 #65536,-256(fp)
5071noname.459:
5072 movzwl -242(fp),r0
5073 bicl2 #-65536,r0
5074 addl2 r0,-256(fp)
5075 bicl3 #-65536,-244(fp),r0
5076 ashl #16,r0,-248(fp)
5077 addl3 -248(fp),-252(fp),r0
5078 bicl3 #0,r0,-252(fp)
5079 cmpl -252(fp),-248(fp)
5080 bgequ noname.460
5081 incl -256(fp)
5082noname.460:
5083 movl -252(fp),r3
5084 movl -256(fp),r2
5085 bbc #31,r2,noname.461
5086 incl r9
5087noname.461:
5088 addl2 r2,r2
5089 bicl2 #0,r2
5090 bbc #31,r3,noname.462
5091 incl r2
5092noname.462:
5093 addl2 r3,r3
5094 bicl2 #0,r3
5095 addl2 r3,r8
5096 bicl2 #0,r8
5097 cmpl r8,r3
5098 bgequ noname.463
5099 incl r2
5100 bicl3 #0,r2,r0
5101 bneq noname.463
5102 incl r9
5103noname.463:
5104 addl2 r2,r7
5105 bicl2 #0,r7
5106 cmpl r7,r2
5107 bgequ noname.464
5108 incl r9
5109noname.464:
5110
5111 movl 8(ap),r0
5112 movzwl 22(r0),r2
5113 bicl3 #-65536,8(r0),r3
5114 movzwl 10(r0),r1
5115 bicl2 #-65536,r1
5116 bicl3 #-65536,20(r0),-268(fp)
5117 bicl3 #-65536,r2,-272(fp)
5118 mull3 r1,-268(fp),-260(fp)
5119 mull2 r3,-268(fp)
5120 mull3 r3,-272(fp),-264(fp)
5121 mull2 r1,-272(fp)
5122 addl3 -260(fp),-264(fp),r0
5123 bicl3 #0,r0,-260(fp)
5124 cmpl -260(fp),-264(fp)
5125 bgequ noname.465
5126 addl2 #65536,-272(fp)
5127noname.465:
5128 movzwl -258(fp),r0
5129 bicl2 #-65536,r0
5130 addl2 r0,-272(fp)
5131 bicl3 #-65536,-260(fp),r0
5132 ashl #16,r0,-264(fp)
5133 addl3 -264(fp),-268(fp),r0
5134 bicl3 #0,r0,-268(fp)
5135 cmpl -268(fp),-264(fp)
5136 bgequ noname.466
5137 incl -272(fp)
5138noname.466:
5139 movl -268(fp),r3
5140 movl -272(fp),r2
5141 bbc #31,r2,noname.467
5142 incl r9
5143noname.467:
5144 addl2 r2,r2
5145 bicl2 #0,r2
5146 bbc #31,r3,noname.468
5147 incl r2
5148noname.468:
5149 addl2 r3,r3
5150 bicl2 #0,r3
5151 addl2 r3,r8
5152 bicl2 #0,r8
5153 cmpl r8,r3
5154 bgequ noname.469
5155 incl r2
5156 bicl3 #0,r2,r0
5157 bneq noname.469
5158 incl r9
5159noname.469:
5160 addl2 r2,r7
5161 bicl2 #0,r7
5162 cmpl r7,r2
5163 bgequ noname.470
5164 incl r9
5165noname.470:
5166
5167 movl 8(ap),r0
5168 movzwl 18(r0),r2
5169 bicl3 #-65536,12(r0),r3
5170 movzwl 14(r0),r1
5171 bicl2 #-65536,r1
5172 bicl3 #-65536,16(r0),-284(fp)
5173 bicl3 #-65536,r2,-288(fp)
5174 mull3 r1,-284(fp),-276(fp)
5175 mull2 r3,-284(fp)
5176 mull3 r3,-288(fp),-280(fp)
5177 mull2 r1,-288(fp)
5178 addl3 -276(fp),-280(fp),r0
5179 bicl3 #0,r0,-276(fp)
5180 cmpl -276(fp),-280(fp)
5181 bgequ noname.471
5182 addl2 #65536,-288(fp)
5183noname.471:
5184 movzwl -274(fp),r0
5185 bicl2 #-65536,r0
5186 addl2 r0,-288(fp)
5187 bicl3 #-65536,-276(fp),r0
5188 ashl #16,r0,-280(fp)
5189 addl3 -280(fp),-284(fp),r0
5190 bicl3 #0,r0,-284(fp)
5191 cmpl -284(fp),-280(fp)
5192 bgequ noname.472
5193 incl -288(fp)
5194noname.472:
5195 movl -284(fp),r3
5196 movl -288(fp),r2
5197 bbc #31,r2,noname.473
5198 incl r9
5199noname.473:
5200 addl2 r2,r2
5201 bicl2 #0,r2
5202 bbc #31,r3,noname.474
5203 incl r2
5204noname.474:
5205 addl2 r3,r3
5206 bicl2 #0,r3
5207 addl2 r3,r8
5208 bicl2 #0,r8
5209 cmpl r8,r3
5210 bgequ noname.475
5211 incl r2
5212 bicl3 #0,r2,r0
5213 bneq noname.475
5214 incl r9
5215noname.475:
5216 addl2 r2,r7
5217 bicl2 #0,r7
5218 cmpl r7,r2
5219 bgequ noname.476
5220 incl r9
5221noname.476:
5222
5223 movl 4(ap),r0
5224 movl r8,28(r0)
5225
5226 clrl r8
5227
5228 movl 8(ap),r3
5229 movl 16(r3),r4
5230 bicl3 #-65536,r4,r5
5231 extzv #16,#16,r4,r0
5232 bicl3 #-65536,r0,r4
5233 mull3 r5,r4,-292(fp)
5234 mull2 r5,r5
5235 mull2 r4,r4
5236 bicl3 #32767,-292(fp),r0
5237 extzv #15,#17,r0,r0
5238 addl2 r0,r4
5239 bicl3 #-65536,-292(fp),r0
5240 ashl #17,r0,-292(fp)
5241 addl2 -292(fp),r5
5242 bicl2 #0,r5
5243 cmpl r5,-292(fp)
5244 bgequ noname.477
5245 incl r4
5246noname.477:
5247 movl r5,r1
5248 movl r4,r2
5249 addl2 r1,r7
5250 bicl2 #0,r7
5251 cmpl r7,r1
5252 bgequ noname.478
5253 incl r2
5254noname.478:
5255 addl2 r2,r9
5256 bicl2 #0,r9
5257 cmpl r9,r2
5258 bgequ noname.479
5259 incl r8
5260noname.479:
5261
5262 bicl3 #-65536,20(r3),r4
5263 movzwl 22(r3),r1
5264 bicl2 #-65536,r1
5265 bicl3 #-65536,12(r3),r2
5266 movzwl 14(r3),r0
5267 bicl2 #-65536,r0
5268 movl r4,r6
5269 movl r1,r5
5270 mull3 r0,r6,-296(fp)
5271 mull2 r2,r6
5272 mull3 r2,r5,-300(fp)
5273 mull2 r0,r5
5274 addl3 -296(fp),-300(fp),r0
5275 bicl3 #0,r0,-296(fp)
5276 cmpl -296(fp),-300(fp)
5277 bgequ noname.480
5278 addl2 #65536,r5
5279noname.480:
5280 movzwl -294(fp),r0
5281 bicl2 #-65536,r0
5282 addl2 r0,r5
5283 bicl3 #-65536,-296(fp),r0
5284 ashl #16,r0,-300(fp)
5285 addl2 -300(fp),r6
5286 bicl2 #0,r6
5287 cmpl r6,-300(fp)
5288 bgequ noname.481
5289 incl r5
5290noname.481:
5291 movl r6,r3
5292 movl r5,r2
5293 bbc #31,r2,noname.482
5294 incl r8
5295noname.482:
5296 addl2 r2,r2
5297 bicl2 #0,r2
5298 bbc #31,r3,noname.483
5299 incl r2
5300noname.483:
5301 addl2 r3,r3
5302 bicl2 #0,r3
5303 addl2 r3,r7
5304 bicl2 #0,r7
5305 cmpl r7,r3
5306 bgequ noname.484
5307 incl r2
5308 bicl3 #0,r2,r0
5309 bneq noname.484
5310 incl r8
5311noname.484:
5312 addl2 r2,r9
5313 bicl2 #0,r9
5314 cmpl r9,r2
5315 bgequ noname.485
5316 incl r8
5317noname.485:
5318
5319 movl 8(ap),r0
5320 bicl3 #-65536,24(r0),r3
5321 movzwl 26(r0),r1
5322 bicl2 #-65536,r1
5323 bicl3 #-65536,8(r0),r2
5324 movzwl 10(r0),r0
5325 bicl2 #-65536,r0
5326 movl r3,r5
5327 movl r1,r4
5328 mull3 r0,r5,-304(fp)
5329 mull2 r2,r5
5330 mull3 r2,r4,-308(fp)
5331 mull2 r0,r4
5332 addl3 -304(fp),-308(fp),r0
5333 bicl3 #0,r0,-304(fp)
5334 cmpl -304(fp),-308(fp)
5335 bgequ noname.486
5336 addl2 #65536,r4
5337noname.486:
5338 movzwl -302(fp),r0
5339 bicl2 #-65536,r0
5340 addl2 r0,r4
5341 bicl3 #-65536,-304(fp),r0
5342 ashl #16,r0,-308(fp)
5343 addl2 -308(fp),r5
5344 bicl2 #0,r5
5345 cmpl r5,-308(fp)
5346 bgequ noname.487
5347 incl r4
5348noname.487:
5349 movl r5,r3
5350 movl r4,r2
5351 bbc #31,r2,noname.488
5352 incl r8
5353noname.488:
5354 addl2 r2,r2
5355 bicl2 #0,r2
5356 bbc #31,r3,noname.489
5357 incl r2
5358noname.489:
5359 addl2 r3,r3
5360 bicl2 #0,r3
5361 addl2 r3,r7
5362 bicl2 #0,r7
5363 cmpl r7,r3
5364 bgequ noname.490
5365 incl r2
5366 bicl3 #0,r2,r0
5367 bneq noname.490
5368 incl r8
5369noname.490:
5370 addl2 r2,r9
5371 bicl2 #0,r9
5372 cmpl r9,r2
5373 bgequ noname.491
5374 incl r8
5375noname.491:
5376
5377 movl 8(ap),r0
5378 bicl3 #-65536,28(r0),r3
5379 movzwl 30(r0),r1
5380 bicl2 #-65536,r1
5381 bicl3 #-65536,4(r0),r2
5382 movzwl 6(r0),r0
5383 bicl2 #-65536,r0
5384 movl r3,r5
5385 movl r1,r4
5386 mull3 r0,r5,-312(fp)
5387 mull2 r2,r5
5388 mull3 r2,r4,-316(fp)
5389 mull2 r0,r4
5390 addl3 -312(fp),-316(fp),r0
5391 bicl3 #0,r0,-312(fp)
5392 cmpl -312(fp),-316(fp)
5393 bgequ noname.492
5394 addl2 #65536,r4
5395noname.492:
5396 movzwl -310(fp),r0
5397 bicl2 #-65536,r0
5398 addl2 r0,r4
5399 bicl3 #-65536,-312(fp),r0
5400 ashl #16,r0,-316(fp)
5401 addl2 -316(fp),r5
5402 bicl2 #0,r5
5403 cmpl r5,-316(fp)
5404 bgequ noname.493
5405 incl r4
5406noname.493:
5407 movl r5,r3
5408 movl r4,r2
5409 bbc #31,r2,noname.494
5410 incl r8
5411noname.494:
5412 addl2 r2,r2
5413 bicl2 #0,r2
5414 bbc #31,r3,noname.495
5415 incl r2
5416noname.495:
5417 addl2 r3,r3
5418 bicl2 #0,r3
5419 addl2 r3,r7
5420 bicl2 #0,r7
5421 cmpl r7,r3
5422 bgequ noname.496
5423 incl r2
5424 bicl3 #0,r2,r0
5425 bneq noname.496
5426 incl r8
5427noname.496:
5428 addl2 r2,r9
5429 bicl2 #0,r9
5430 cmpl r9,r2
5431 bgequ noname.497
5432 incl r8
5433noname.497:
5434
5435 movl 4(ap),r0
5436 movl r7,32(r0)
5437
5438 clrl r7
5439
5440 movl 8(ap),r0
5441 bicl3 #-65536,28(r0),r3
5442 movzwl 30(r0),r2
5443 bicl3 #-65536,8(r0),r1
5444 movzwl 10(r0),r0
5445 bicl2 #-65536,r0
5446 movl r3,r4
5447 bicl3 #-65536,r2,-328(fp)
5448 mull3 r0,r4,-320(fp)
5449 mull2 r1,r4
5450 mull3 r1,-328(fp),-324(fp)
5451 mull2 r0,-328(fp)
5452 addl3 -320(fp),-324(fp),r0
5453 bicl3 #0,r0,-320(fp)
5454 cmpl -320(fp),-324(fp)
5455 bgequ noname.498
5456 addl2 #65536,-328(fp)
5457noname.498:
5458 movzwl -318(fp),r0
5459 bicl2 #-65536,r0
5460 addl2 r0,-328(fp)
5461 bicl3 #-65536,-320(fp),r0
5462 ashl #16,r0,-324(fp)
5463 addl2 -324(fp),r4
5464 bicl2 #0,r4
5465 cmpl r4,-324(fp)
5466 bgequ noname.499
5467 incl -328(fp)
5468noname.499:
5469 movl r4,r3
5470 movl -328(fp),r2
5471 bbc #31,r2,noname.500
5472 incl r7
5473noname.500:
5474 addl2 r2,r2
5475 bicl2 #0,r2
5476 bbc #31,r3,noname.501
5477 incl r2
5478noname.501:
5479 addl2 r3,r3
5480 bicl2 #0,r3
5481 addl2 r3,r9
5482 bicl2 #0,r9
5483 cmpl r9,r3
5484 bgequ noname.502
5485 incl r2
5486 bicl3 #0,r2,r0
5487 bneq noname.502
5488 incl r7
5489noname.502:
5490 addl2 r2,r8
5491 bicl2 #0,r8
5492 cmpl r8,r2
5493 bgequ noname.503
5494 incl r7
5495noname.503:
5496
5497 movl 8(ap),r0
5498 movzwl 26(r0),r2
5499 bicl3 #-65536,12(r0),r3
5500 movzwl 14(r0),r1
5501 bicl2 #-65536,r1
5502 bicl3 #-65536,24(r0),-340(fp)
5503 bicl3 #-65536,r2,-344(fp)
5504 mull3 r1,-340(fp),-332(fp)
5505 mull2 r3,-340(fp)
5506 mull3 r3,-344(fp),-336(fp)
5507 mull2 r1,-344(fp)
5508 addl3 -332(fp),-336(fp),r0
5509 bicl3 #0,r0,-332(fp)
5510 cmpl -332(fp),-336(fp)
5511 bgequ noname.504
5512 addl2 #65536,-344(fp)
5513noname.504:
5514 movzwl -330(fp),r0
5515 bicl2 #-65536,r0
5516 addl2 r0,-344(fp)
5517 bicl3 #-65536,-332(fp),r0
5518 ashl #16,r0,-336(fp)
5519 addl3 -336(fp),-340(fp),r0
5520 bicl3 #0,r0,-340(fp)
5521 cmpl -340(fp),-336(fp)
5522 bgequ noname.505
5523 incl -344(fp)
5524noname.505:
5525 movl -340(fp),r3
5526 movl -344(fp),r2
5527 bbc #31,r2,noname.506
5528 incl r7
5529noname.506:
5530 addl2 r2,r2
5531 bicl2 #0,r2
5532 bbc #31,r3,noname.507
5533 incl r2
5534noname.507:
5535 addl2 r3,r3
5536 bicl2 #0,r3
5537 addl2 r3,r9
5538 bicl2 #0,r9
5539 cmpl r9,r3
5540 bgequ noname.508
5541 incl r2
5542 bicl3 #0,r2,r0
5543 bneq noname.508
5544 incl r7
5545noname.508:
5546 addl2 r2,r8
5547 bicl2 #0,r8
5548 cmpl r8,r2
5549 bgequ noname.509
5550 incl r7
5551noname.509:
5552
5553 movl 8(ap),r0
5554 movzwl 22(r0),r2
5555 bicl3 #-65536,16(r0),r3
5556 movzwl 18(r0),r1
5557 bicl2 #-65536,r1
5558 bicl3 #-65536,20(r0),-356(fp)
5559 bicl3 #-65536,r2,-360(fp)
5560 mull3 r1,-356(fp),-348(fp)
5561 mull2 r3,-356(fp)
5562 mull3 r3,-360(fp),-352(fp)
5563 mull2 r1,-360(fp)
5564 addl3 -348(fp),-352(fp),r0
5565 bicl3 #0,r0,-348(fp)
5566 cmpl -348(fp),-352(fp)
5567 bgequ noname.510
5568 addl2 #65536,-360(fp)
5569noname.510:
5570 movzwl -346(fp),r0
5571 bicl2 #-65536,r0
5572 addl2 r0,-360(fp)
5573 bicl3 #-65536,-348(fp),r0
5574 ashl #16,r0,-352(fp)
5575 addl3 -352(fp),-356(fp),r0
5576 bicl3 #0,r0,-356(fp)
5577 cmpl -356(fp),-352(fp)
5578 bgequ noname.511
5579 incl -360(fp)
5580noname.511:
5581 movl -356(fp),r3
5582 movl -360(fp),r2
5583 bbc #31,r2,noname.512
5584 incl r7
5585noname.512:
5586 addl2 r2,r2
5587 bicl2 #0,r2
5588 bbc #31,r3,noname.513
5589 incl r2
5590noname.513:
5591 addl2 r3,r3
5592 bicl2 #0,r3
5593 addl2 r3,r9
5594 bicl2 #0,r9
5595 cmpl r9,r3
5596 bgequ noname.514
5597 incl r2
5598 bicl3 #0,r2,r0
5599 bneq noname.514
5600 incl r7
5601noname.514:
5602 addl2 r2,r8
5603 bicl2 #0,r8
5604 cmpl r8,r2
5605 bgequ noname.515
5606 incl r7
5607noname.515:
5608
5609 movl 4(ap),r0
5610 movl r9,36(r0)
5611
5612 clrl r9
5613
5614 movl 8(ap),r3
5615 movl 20(r3),r4
5616 bicl3 #-65536,r4,-364(fp)
5617 extzv #16,#16,r4,r0
5618 bicl3 #-65536,r0,r4
5619 movl -364(fp),r0
5620 mull3 r0,r4,-368(fp)
5621 mull3 r0,r0,-364(fp)
5622 mull2 r4,r4
5623 bicl3 #32767,-368(fp),r0
5624 extzv #15,#17,r0,r0
5625 addl2 r0,r4
5626 bicl3 #-65536,-368(fp),r0
5627 ashl #17,r0,-368(fp)
5628 addl3 -364(fp),-368(fp),r0
5629 bicl3 #0,r0,-364(fp)
5630 cmpl -364(fp),-368(fp)
5631 bgequ noname.516
5632 incl r4
5633noname.516:
5634 movl -364(fp),r1
5635 movl r4,r2
5636 addl2 r1,r8
5637 bicl2 #0,r8
5638 cmpl r8,r1
5639 bgequ noname.517
5640 incl r2
5641noname.517:
5642 addl2 r2,r7
5643 bicl2 #0,r7
5644 cmpl r7,r2
5645 bgequ noname.518
5646 incl r9
5647noname.518:
5648
5649 bicl3 #-65536,24(r3),r4
5650 movzwl 26(r3),r1
5651 bicl2 #-65536,r1
5652 bicl3 #-65536,16(r3),r2
5653 movzwl 18(r3),r0
5654 bicl2 #-65536,r0
5655 movl r4,r6
5656 movl r1,r5
5657 mull3 r0,r6,-372(fp)
5658 mull2 r2,r6
5659 mull3 r2,r5,-376(fp)
5660 mull2 r0,r5
5661 addl3 -372(fp),-376(fp),r0
5662 bicl3 #0,r0,-372(fp)
5663 cmpl -372(fp),-376(fp)
5664 bgequ noname.519
5665 addl2 #65536,r5
5666noname.519:
5667 movzwl -370(fp),r0
5668 bicl2 #-65536,r0
5669 addl2 r0,r5
5670 bicl3 #-65536,-372(fp),r0
5671 ashl #16,r0,-376(fp)
5672 addl2 -376(fp),r6
5673 bicl2 #0,r6
5674 cmpl r6,-376(fp)
5675 bgequ noname.520
5676 incl r5
5677noname.520:
5678 movl r6,r3
5679 movl r5,r2
5680 bbc #31,r2,noname.521
5681 incl r9
5682noname.521:
5683 addl2 r2,r2
5684 bicl2 #0,r2
5685 bbc #31,r3,noname.522
5686 incl r2
5687noname.522:
5688 addl2 r3,r3
5689 bicl2 #0,r3
5690 addl2 r3,r8
5691 bicl2 #0,r8
5692 cmpl r8,r3
5693 bgequ noname.523
5694 incl r2
5695 bicl3 #0,r2,r0
5696 bneq noname.523
5697 incl r9
5698noname.523:
5699 addl2 r2,r7
5700 bicl2 #0,r7
5701 cmpl r7,r2
5702 bgequ noname.524
5703 incl r9
5704noname.524:
5705
5706 movl 8(ap),r0
5707 bicl3 #-65536,28(r0),r3
5708 movzwl 30(r0),r1
5709 bicl2 #-65536,r1
5710 bicl3 #-65536,12(r0),r2
5711 movzwl 14(r0),r0
5712 bicl2 #-65536,r0
5713 movl r3,r5
5714 movl r1,r4
5715 mull3 r0,r5,-380(fp)
5716 mull2 r2,r5
5717 mull3 r2,r4,-384(fp)
5718 mull2 r0,r4
5719 addl3 -380(fp),-384(fp),r0
5720 bicl3 #0,r0,-380(fp)
5721 cmpl -380(fp),-384(fp)
5722 bgequ noname.525
5723 addl2 #65536,r4
5724noname.525:
5725 movzwl -378(fp),r0
5726 bicl2 #-65536,r0
5727 addl2 r0,r4
5728 bicl3 #-65536,-380(fp),r0
5729 ashl #16,r0,-384(fp)
5730 addl2 -384(fp),r5
5731 bicl2 #0,r5
5732 cmpl r5,-384(fp)
5733 bgequ noname.526
5734 incl r4
5735noname.526:
5736 movl r5,r3
5737 movl r4,r2
5738 bbc #31,r2,noname.527
5739 incl r9
5740noname.527:
5741 addl2 r2,r2
5742 bicl2 #0,r2
5743 bbc #31,r3,noname.528
5744 incl r2
5745noname.528:
5746 addl2 r3,r3
5747 bicl2 #0,r3
5748 addl2 r3,r8
5749 bicl2 #0,r8
5750 cmpl r8,r3
5751 bgequ noname.529
5752 incl r2
5753 bicl3 #0,r2,r0
5754 bneq noname.529
5755 incl r9
5756noname.529:
5757 addl2 r2,r7
5758 bicl2 #0,r7
5759 cmpl r7,r2
5760 bgequ noname.530
5761 incl r9
5762noname.530:
5763 movl 4(ap),r0
5764 movl r8,40(r0)
5765
5766 clrl r8
5767
5768 movl 8(ap),r0
5769 bicl3 #-65536,28(r0),r3
5770 movzwl 30(r0),r1
5771 bicl2 #-65536,r1
5772 bicl3 #-65536,16(r0),r2
5773 movzwl 18(r0),r0
5774 bicl2 #-65536,r0
5775 movl r3,r5
5776 movl r1,r4
5777 mull3 r0,r5,-388(fp)
5778 mull2 r2,r5
5779 mull3 r2,r4,-392(fp)
5780 mull2 r0,r4
5781 addl3 -388(fp),-392(fp),r0
5782 bicl3 #0,r0,-388(fp)
5783 cmpl -388(fp),-392(fp)
5784 bgequ noname.531
5785 addl2 #65536,r4
5786noname.531:
5787 movzwl -386(fp),r0
5788 bicl2 #-65536,r0
5789 addl2 r0,r4
5790 bicl3 #-65536,-388(fp),r0
5791 ashl #16,r0,-392(fp)
5792 addl2 -392(fp),r5
5793 bicl2 #0,r5
5794 cmpl r5,-392(fp)
5795 bgequ noname.532
5796 incl r4
5797noname.532:
5798 movl r5,r3
5799 movl r4,r2
5800 bbc #31,r2,noname.533
5801 incl r8
5802noname.533:
5803 addl2 r2,r2
5804 bicl2 #0,r2
5805 bbc #31,r3,noname.534
5806 incl r2
5807noname.534:
5808 addl2 r3,r3
5809 bicl2 #0,r3
5810 addl2 r3,r7
5811 bicl2 #0,r7
5812 cmpl r7,r3
5813 bgequ noname.535
5814 incl r2
5815 bicl3 #0,r2,r0
5816 bneq noname.535
5817 incl r8
5818noname.535:
5819 addl2 r2,r9
5820 bicl2 #0,r9
5821 cmpl r9,r2
5822 bgequ noname.536
5823 incl r8
5824noname.536:
5825
5826 movl 8(ap),r0
5827 bicl3 #-65536,24(r0),r3
5828 movzwl 26(r0),r1
5829 bicl2 #-65536,r1
5830 bicl3 #-65536,20(r0),r2
5831 movzwl 22(r0),r0
5832 bicl2 #-65536,r0
5833 movl r3,r5
5834 movl r1,r4
5835 mull3 r0,r5,-396(fp)
5836 mull2 r2,r5
5837 mull3 r2,r4,-400(fp)
5838 mull2 r0,r4
5839 addl3 -396(fp),-400(fp),r0
5840 bicl3 #0,r0,-396(fp)
5841 cmpl -396(fp),-400(fp)
5842 bgequ noname.537
5843 addl2 #65536,r4
5844noname.537:
5845 movzwl -394(fp),r0
5846 bicl2 #-65536,r0
5847 addl2 r0,r4
5848 bicl3 #-65536,-396(fp),r0
5849 ashl #16,r0,-400(fp)
5850 addl2 -400(fp),r5
5851 bicl2 #0,r5
5852 cmpl r5,-400(fp)
5853 bgequ noname.538
5854 incl r4
5855noname.538:
5856 movl r5,r3
5857 movl r4,r2
5858 bbc #31,r2,noname.539
5859 incl r8
5860noname.539:
5861 addl2 r2,r2
5862 bicl2 #0,r2
5863 bbc #31,r3,noname.540
5864 incl r2
5865noname.540:
5866 addl2 r3,r3
5867 bicl2 #0,r3
5868 addl2 r3,r7
5869 bicl2 #0,r7
5870 cmpl r7,r3
5871 bgequ noname.541
5872 incl r2
5873 bicl3 #0,r2,r0
5874 bneq noname.541
5875 incl r8
5876noname.541:
5877 addl2 r2,r9
5878 bicl2 #0,r9
5879 cmpl r9,r2
5880 bgequ noname.542
5881 incl r8
5882noname.542:
5883
5884 movl 4(ap),r0
5885 movl r7,44(r0)
5886
5887 clrl r7
5888
5889 movl 8(ap),r3
5890 movl 24(r3),r4
5891 bicl3 #-65536,r4,r5
5892 extzv #16,#16,r4,r0
5893 bicl3 #-65536,r0,r4
5894 mull3 r5,r4,-404(fp)
5895 mull2 r5,r5
5896 mull2 r4,r4
5897 bicl3 #32767,-404(fp),r0
5898 extzv #15,#17,r0,r0
5899 addl2 r0,r4
5900 bicl3 #-65536,-404(fp),r0
5901 ashl #17,r0,-404(fp)
5902 addl2 -404(fp),r5
5903 bicl2 #0,r5
5904 cmpl r5,-404(fp)
5905 bgequ noname.543
5906 incl r4
5907noname.543:
5908 movl r5,r1
5909 movl r4,r2
5910 addl2 r1,r9
5911 bicl2 #0,r9
5912 cmpl r9,r1
5913 bgequ noname.544
5914 incl r2
5915noname.544:
5916 addl2 r2,r8
5917 bicl2 #0,r8
5918 cmpl r8,r2
5919 bgequ noname.545
5920 incl r7
5921noname.545:
5922
5923 movzwl 30(r3),r2
5924 bicl3 #-65536,20(r3),r1
5925 movzwl 22(r3),r0
5926 bicl2 #-65536,r0
5927 bicl3 #-65536,28(r3),-416(fp)
5928 bicl3 #-65536,r2,-420(fp)
5929 mull3 r0,-416(fp),-408(fp)
5930 mull2 r1,-416(fp)
5931 mull3 r1,-420(fp),-412(fp)
5932 mull2 r0,-420(fp)
5933 addl3 -408(fp),-412(fp),r0
5934 bicl3 #0,r0,-408(fp)
5935 cmpl -408(fp),-412(fp)
5936 bgequ noname.546
5937 addl2 #65536,-420(fp)
5938noname.546:
5939 movzwl -406(fp),r0
5940 bicl2 #-65536,r0
5941 addl2 r0,-420(fp)
5942 bicl3 #-65536,-408(fp),r0
5943 ashl #16,r0,-412(fp)
5944 addl3 -412(fp),-416(fp),r0
5945 bicl3 #0,r0,-416(fp)
5946 cmpl -416(fp),-412(fp)
5947 bgequ noname.547
5948 incl -420(fp)
5949noname.547:
5950 movl -416(fp),r3
5951 movl -420(fp),r2
5952 bbc #31,r2,noname.548
5953 incl r7
5954noname.548:
5955 addl2 r2,r2
5956 bicl2 #0,r2
5957 bbc #31,r3,noname.549
5958 incl r2
5959noname.549:
5960 addl2 r3,r3
5961 bicl2 #0,r3
5962 addl2 r3,r9
5963 bicl2 #0,r9
5964 cmpl r9,r3
5965 bgequ noname.550
5966 incl r2
5967 bicl3 #0,r2,r0
5968 bneq noname.550
5969 incl r7
5970noname.550:
5971 addl2 r2,r8
5972 bicl2 #0,r8
5973 cmpl r8,r2
5974 bgequ noname.551
5975 incl r7
5976noname.551:
5977
5978 movl 4(ap),r0
5979 movl r9,48(r0)
5980
5981 clrl r9
5982
5983 movl 8(ap),r0
5984 movzwl 30(r0),r2
5985 bicl3 #-65536,24(r0),r3
5986 movzwl 26(r0),r1
5987 bicl2 #-65536,r1
5988 bicl3 #-65536,28(r0),-432(fp)
5989 bicl3 #-65536,r2,-436(fp)
5990 mull3 r1,-432(fp),-424(fp)
5991 mull2 r3,-432(fp)
5992 mull3 r3,-436(fp),-428(fp)
5993 mull2 r1,-436(fp)
5994 addl3 -424(fp),-428(fp),r0
5995 bicl3 #0,r0,-424(fp)
5996 cmpl -424(fp),-428(fp)
5997 bgequ noname.552
5998 addl2 #65536,-436(fp)
5999noname.552:
6000 movzwl -422(fp),r0
6001 bicl2 #-65536,r0
6002 addl2 r0,-436(fp)
6003 bicl3 #-65536,-424(fp),r0
6004 ashl #16,r0,-428(fp)
6005 addl3 -428(fp),-432(fp),r0
6006 bicl3 #0,r0,-432(fp)
6007 cmpl -432(fp),-428(fp)
6008 bgequ noname.553
6009 incl -436(fp)
6010noname.553:
6011 movl -432(fp),r3
6012 movl -436(fp),r2
6013 bbc #31,r2,noname.554
6014 incl r9
6015noname.554:
6016 addl2 r2,r2
6017 bicl2 #0,r2
6018 bbc #31,r3,noname.555
6019 incl r2
6020noname.555:
6021 addl2 r3,r3
6022 bicl2 #0,r3
6023 addl2 r3,r8
6024 bicl2 #0,r8
6025 cmpl r8,r3
6026 bgequ noname.556
6027 incl r2
6028 bicl3 #0,r2,r0
6029 bneq noname.556
6030 incl r9
6031noname.556:
6032 addl2 r2,r7
6033 bicl2 #0,r7
6034 cmpl r7,r2
6035 bgequ noname.557
6036 incl r9
6037noname.557:
6038
6039 movl 4(ap),r4
6040 movl r8,52(r4)
6041
6042 clrl r8
6043
6044 movl 8(ap),r0
6045 movl 28(r0),r3
6046 bicl3 #-65536,r3,-440(fp)
6047 extzv #16,#16,r3,r0
6048 bicl3 #-65536,r0,r3
6049 movl -440(fp),r0
6050 mull3 r0,r3,-444(fp)
6051 mull3 r0,r0,-440(fp)
6052 mull2 r3,r3
6053 bicl3 #32767,-444(fp),r0
6054 extzv #15,#17,r0,r0
6055 addl2 r0,r3
6056 bicl3 #-65536,-444(fp),r0
6057 ashl #17,r0,-444(fp)
6058 addl3 -440(fp),-444(fp),r0
6059 bicl3 #0,r0,-440(fp)
6060 cmpl -440(fp),-444(fp)
6061 bgequ noname.558
6062 incl r3
6063noname.558:
6064 movl -440(fp),r1
6065 movl r3,r2
6066 addl2 r1,r7
6067 bicl2 #0,r7
6068 cmpl r7,r1
6069 bgequ noname.559
6070 incl r2
6071noname.559:
6072 addl2 r2,r9
6073 bicl2 #0,r9
6074 cmpl r9,r2
6075 bgequ noname.560
6076 incl r8
6077noname.560:
6078
6079 movl r7,56(r4)
6080
6081 movl r9,60(r4)
6082
6083 ret
6084
6085
6086
6087;r=4 ;(AP)
6088;a=8 ;(AP)
6089;b=12 ;(AP)
6090;n=16 ;(AP) n by value (input)
6091
6092 .psect code,nowrt
6093
6094.entry BN_SQR_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
6095 subl2 #44,sp
6096
6097 clrq r8
6098
6099 clrl r10
6100
6101 movl 8(ap),r5
6102 movl (r5),r3
6103 bicl3 #-65536,r3,r4
6104 extzv #16,#16,r3,r0
6105 bicl3 #-65536,r0,r3
6106 mull3 r4,r3,-4(fp)
6107 mull2 r4,r4
6108 mull2 r3,r3
6109 bicl3 #32767,-4(fp),r0
6110 extzv #15,#17,r0,r0
6111 addl2 r0,r3
6112 bicl3 #-65536,-4(fp),r0
6113 ashl #17,r0,-4(fp)
6114 addl2 -4(fp),r4
6115 bicl2 #0,r4
6116 cmpl r4,-4(fp)
6117 bgequ noname.563
6118 incl r3
6119noname.563:
6120 movl r4,r1
6121 movl r3,r2
6122 addl2 r1,r9
6123 bicl2 #0,r9
6124 cmpl r9,r1
6125 bgequ noname.564
6126 incl r2
6127noname.564:
6128 addl2 r2,r8
6129 bicl2 #0,r8
6130 cmpl r8,r2
6131 bgequ noname.565
6132 incl r10
6133noname.565:
6134
6135 movl r9,@4(ap)
6136
6137 clrl r9
6138
6139 bicl3 #-65536,4(r5),r3
6140 movzwl 6(r5),r1
6141 bicl2 #-65536,r1
6142 bicl3 #-65536,(r5),r2
6143 movzwl 2(r5),r0
6144 bicl2 #-65536,r0
6145 movl r3,r6
6146 movl r1,r4
6147 mull3 r0,r6,-8(fp)
6148 mull2 r2,r6
6149 mull2 r4,r2
6150 mull2 r0,r4
6151 addl3 -8(fp),r2,r0
6152 bicl3 #0,r0,-8(fp)
6153 cmpl -8(fp),r2
6154 bgequ noname.566
6155 addl2 #65536,r4
6156noname.566:
6157 movzwl -6(fp),r0
6158 bicl2 #-65536,r0
6159 addl2 r0,r4
6160 bicl3 #-65536,-8(fp),r0
6161 ashl #16,r0,r1
6162 addl2 r1,r6
6163 bicl2 #0,r6
6164 cmpl r6,r1
6165 bgequ noname.567
6166 incl r4
6167noname.567:
6168 movl r6,r3
6169 movl r4,r2
6170 bbc #31,r2,noname.568
6171 incl r9
6172noname.568:
6173 addl2 r2,r2
6174 bicl2 #0,r2
6175 bbc #31,r3,noname.569
6176 incl r2
6177noname.569:
6178 addl2 r3,r3
6179 bicl2 #0,r3
6180 addl2 r3,r8
6181 bicl2 #0,r8
6182 cmpl r8,r3
6183 bgequ noname.570
6184 incl r2
6185 bicl3 #0,r2,r0
6186 bneq noname.570
6187 incl r9
6188noname.570:
6189 addl2 r2,r10
6190 bicl2 #0,r10
6191 cmpl r10,r2
6192 bgequ noname.571
6193 incl r9
6194noname.571:
6195
6196 movl 4(ap),r0
6197 movl r8,4(r0)
6198
6199 clrl r8
6200
6201 movl 8(ap),r4
6202 movl 4(r4),r3
6203 bicl3 #-65536,r3,r5
6204 extzv #16,#16,r3,r0
6205 bicl3 #-65536,r0,r3
6206 mull3 r5,r3,r1
6207 mull2 r5,r5
6208 mull2 r3,r3
6209 bicl3 #32767,r1,r0
6210 extzv #15,#17,r0,r0
6211 addl2 r0,r3
6212 bicl2 #-65536,r1
6213 ashl #17,r1,r1
6214 addl2 r1,r5
6215 bicl2 #0,r5
6216 cmpl r5,r1
6217 bgequ noname.572
6218 incl r3
6219noname.572:
6220 movl r5,r1
6221 movl r3,r2
6222 addl2 r1,r10
6223 bicl2 #0,r10
6224 cmpl r10,r1
6225 bgequ noname.573
6226 incl r2
6227noname.573:
6228 addl2 r2,r9
6229 bicl2 #0,r9
6230 cmpl r9,r2
6231 bgequ noname.574
6232 incl r8
6233noname.574:
6234
6235 bicl3 #-65536,8(r4),r3
6236 movzwl 10(r4),r1
6237 bicl2 #-65536,r1
6238 bicl3 #-65536,(r4),r2
6239 movzwl 2(r4),r0
6240 bicl2 #-65536,r0
6241 movl r3,r6
6242 movl r1,r5
6243 mull3 r0,r6,r7
6244 mull2 r2,r6
6245 mull2 r5,r2
6246 mull2 r0,r5
6247 addl2 r2,r7
6248 bicl2 #0,r7
6249 cmpl r7,r2
6250 bgequ noname.575
6251 addl2 #65536,r5
6252noname.575:
6253 extzv #16,#16,r7,r0
6254 bicl2 #-65536,r0
6255 addl2 r0,r5
6256 bicl3 #-65536,r7,r0
6257 ashl #16,r0,r1
6258 addl2 r1,r6
6259 bicl2 #0,r6
6260 cmpl r6,r1
6261 bgequ noname.576
6262 incl r5
6263noname.576:
6264 movl r6,r3
6265 movl r5,r2
6266 bbc #31,r2,noname.577
6267 incl r8
6268noname.577:
6269 addl2 r2,r2
6270 bicl2 #0,r2
6271 bbc #31,r3,noname.578
6272 incl r2
6273noname.578:
6274 addl2 r3,r3
6275 bicl2 #0,r3
6276 addl2 r3,r10
6277 bicl2 #0,r10
6278 cmpl r10,r3
6279 bgequ noname.579
6280 incl r2
6281 bicl3 #0,r2,r0
6282 bneq noname.579
6283 incl r8
6284noname.579:
6285 addl2 r2,r9
6286 bicl2 #0,r9
6287 cmpl r9,r2
6288 bgequ noname.580
6289 incl r8
6290noname.580:
6291
6292 movl 4(ap),r0
6293 movl r10,8(r0)
6294
6295 clrl r10
6296
6297 movl 8(ap),r0
6298 bicl3 #-65536,12(r0),r3
6299 movzwl 14(r0),r1
6300 bicl2 #-65536,r1
6301 bicl3 #-65536,(r0),r2
6302 movzwl 2(r0),r0
6303 bicl2 #-65536,r0
6304 movl r3,r5
6305 movl r1,r4
6306 mull3 r0,r5,r6
6307 mull2 r2,r5
6308 mull3 r2,r4,-12(fp)
6309 mull2 r0,r4
6310 addl2 -12(fp),r6
6311 bicl2 #0,r6
6312 cmpl r6,-12(fp)
6313 bgequ noname.581
6314 addl2 #65536,r4
6315noname.581:
6316 extzv #16,#16,r6,r0
6317 bicl2 #-65536,r0
6318 addl2 r0,r4
6319 bicl3 #-65536,r6,r0
6320 ashl #16,r0,-12(fp)
6321 addl2 -12(fp),r5
6322 bicl2 #0,r5
6323 cmpl r5,-12(fp)
6324 bgequ noname.582
6325 incl r4
6326noname.582:
6327 movl r5,r3
6328 movl r4,r2
6329 bbc #31,r2,noname.583
6330 incl r10
6331noname.583:
6332 addl2 r2,r2
6333 bicl2 #0,r2
6334 bbc #31,r3,noname.584
6335 incl r2
6336noname.584:
6337 addl2 r3,r3
6338 bicl2 #0,r3
6339 addl2 r3,r9
6340 bicl2 #0,r9
6341 cmpl r9,r3
6342 bgequ noname.585
6343 incl r2
6344 bicl3 #0,r2,r0
6345 bneq noname.585
6346 incl r10
6347noname.585:
6348 addl2 r2,r8
6349 bicl2 #0,r8
6350 cmpl r8,r2
6351 bgequ noname.586
6352 incl r10
6353noname.586:
6354
6355 movl 8(ap),r0
6356 bicl3 #-65536,8(r0),r3
6357 movzwl 10(r0),r1
6358 bicl2 #-65536,r1
6359 bicl3 #-65536,4(r0),r2
6360 movzwl 6(r0),r0
6361 bicl2 #-65536,r0
6362 movl r3,r5
6363 movl r1,r4
6364 mull3 r0,r5,-16(fp)
6365 mull2 r2,r5
6366 mull3 r2,r4,-20(fp)
6367 mull2 r0,r4
6368 addl3 -16(fp),-20(fp),r0
6369 bicl3 #0,r0,-16(fp)
6370 cmpl -16(fp),-20(fp)
6371 bgequ noname.587
6372 addl2 #65536,r4
6373noname.587:
6374 movzwl -14(fp),r0
6375 bicl2 #-65536,r0
6376 addl2 r0,r4
6377 bicl3 #-65536,-16(fp),r0
6378 ashl #16,r0,-20(fp)
6379 addl2 -20(fp),r5
6380 bicl2 #0,r5
6381 cmpl r5,-20(fp)
6382 bgequ noname.588
6383 incl r4
6384noname.588:
6385 movl r5,r3
6386 movl r4,r2
6387 bbc #31,r2,noname.589
6388 incl r10
6389noname.589:
6390 addl2 r2,r2
6391 bicl2 #0,r2
6392 bbc #31,r3,noname.590
6393 incl r2
6394noname.590:
6395 addl2 r3,r3
6396 bicl2 #0,r3
6397 addl2 r3,r9
6398 bicl2 #0,r9
6399 cmpl r9,r3
6400 bgequ noname.591
6401 incl r2
6402 bicl3 #0,r2,r0
6403 bneq noname.591
6404 incl r10
6405noname.591:
6406 addl2 r2,r8
6407 bicl2 #0,r8
6408 cmpl r8,r2
6409 bgequ noname.592
6410 incl r10
6411noname.592:
6412 movl 4(ap),r0
6413 movl r9,12(r0)
6414
6415 clrl r9
6416
6417 movl 8(ap),r3
6418 movl 8(r3),r4
6419 bicl3 #-65536,r4,r5
6420 extzv #16,#16,r4,r0
6421 bicl3 #-65536,r0,r4
6422 mull3 r5,r4,-24(fp)
6423 mull2 r5,r5
6424 mull2 r4,r4
6425 bicl3 #32767,-24(fp),r0
6426 extzv #15,#17,r0,r0
6427 addl2 r0,r4
6428 bicl3 #-65536,-24(fp),r0
6429 ashl #17,r0,-24(fp)
6430 addl2 -24(fp),r5
6431 bicl2 #0,r5
6432 cmpl r5,-24(fp)
6433 bgequ noname.593
6434 incl r4
6435noname.593:
6436 movl r5,r1
6437 movl r4,r2
6438 addl2 r1,r8
6439 bicl2 #0,r8
6440 cmpl r8,r1
6441 bgequ noname.594
6442 incl r2
6443noname.594:
6444 addl2 r2,r10
6445 bicl2 #0,r10
6446 cmpl r10,r2
6447 bgequ noname.595
6448 incl r9
6449noname.595:
6450
6451 bicl3 #-65536,12(r3),r4
6452 movzwl 14(r3),r1
6453 bicl2 #-65536,r1
6454 bicl3 #-65536,4(r3),r2
6455 movzwl 6(r3),r0
6456 bicl2 #-65536,r0
6457 movl r4,r6
6458 movl r1,r5
6459 mull3 r0,r6,-28(fp)
6460 mull2 r2,r6
6461 mull3 r2,r5,-32(fp)
6462 mull2 r0,r5
6463 addl3 -28(fp),-32(fp),r0
6464 bicl3 #0,r0,-28(fp)
6465 cmpl -28(fp),-32(fp)
6466 bgequ noname.596
6467 addl2 #65536,r5
6468noname.596:
6469 movzwl -26(fp),r0
6470 bicl2 #-65536,r0
6471 addl2 r0,r5
6472 bicl3 #-65536,-28(fp),r0
6473 ashl #16,r0,-32(fp)
6474 addl2 -32(fp),r6
6475 bicl2 #0,r6
6476 cmpl r6,-32(fp)
6477 bgequ noname.597
6478 incl r5
6479noname.597:
6480 movl r6,r3
6481 movl r5,r2
6482 bbc #31,r2,noname.598
6483 incl r9
6484noname.598:
6485 addl2 r2,r2
6486 bicl2 #0,r2
6487 bbc #31,r3,noname.599
6488 incl r2
6489noname.599:
6490 addl2 r3,r3
6491 bicl2 #0,r3
6492 addl2 r3,r8
6493 bicl2 #0,r8
6494 cmpl r8,r3
6495 bgequ noname.600
6496 incl r2
6497 bicl3 #0,r2,r0
6498 bneq noname.600
6499 incl r9
6500noname.600:
6501 addl2 r2,r10
6502 bicl2 #0,r10
6503 cmpl r10,r2
6504 bgequ noname.601
6505 incl r9
6506noname.601:
6507
6508 movl 4(ap),r0
6509 movl r8,16(r0)
6510
6511 clrl r8
6512
6513 movl 8(ap),r0
6514 bicl3 #-65536,12(r0),r3
6515 movzwl 14(r0),r1
6516 bicl2 #-65536,r1
6517 bicl3 #-65536,8(r0),r2
6518 movzwl 10(r0),r0
6519 bicl2 #-65536,r0
6520 movl r3,r5
6521 movl r1,r4
6522 mull3 r0,r5,-36(fp)
6523 mull2 r2,r5
6524 mull3 r2,r4,-40(fp)
6525 mull2 r0,r4
6526 addl3 -36(fp),-40(fp),r0
6527 bicl3 #0,r0,-36(fp)
6528 cmpl -36(fp),-40(fp)
6529 bgequ noname.602
6530 addl2 #65536,r4
6531noname.602:
6532 movzwl -34(fp),r0
6533 bicl2 #-65536,r0
6534 addl2 r0,r4
6535 bicl3 #-65536,-36(fp),r0
6536 ashl #16,r0,-40(fp)
6537 addl2 -40(fp),r5
6538 bicl2 #0,r5
6539 cmpl r5,-40(fp)
6540 bgequ noname.603
6541 incl r4
6542noname.603:
6543 movl r5,r3
6544 movl r4,r2
6545 bbc #31,r2,noname.604
6546 incl r8
6547noname.604:
6548 addl2 r2,r2
6549 bicl2 #0,r2
6550 bbc #31,r3,noname.605
6551 incl r2
6552noname.605:
6553 addl2 r3,r3
6554 bicl2 #0,r3
6555 addl2 r3,r10
6556 bicl2 #0,r10
6557 cmpl r10,r3
6558 bgequ noname.606
6559 incl r2
6560 bicl3 #0,r2,r0
6561 bneq noname.606
6562 incl r8
6563noname.606:
6564 addl2 r2,r9
6565 bicl2 #0,r9
6566 cmpl r9,r2
6567 bgequ noname.607
6568 incl r8
6569noname.607:
6570
6571 movl 4(ap),r4
6572 movl r10,20(r4)
6573
6574 clrl r10
6575
6576 movl 8(ap),r0
6577 movl 12(r0),r3
6578 bicl3 #-65536,r3,r5
6579 extzv #16,#16,r3,r0
6580 bicl3 #-65536,r0,r3
6581 mull3 r5,r3,-44(fp)
6582 mull2 r5,r5
6583 mull2 r3,r3
6584 bicl3 #32767,-44(fp),r0
6585 extzv #15,#17,r0,r0
6586 addl2 r0,r3
6587 bicl3 #-65536,-44(fp),r0
6588 ashl #17,r0,-44(fp)
6589 addl2 -44(fp),r5
6590 bicl2 #0,r5
6591 cmpl r5,-44(fp)
6592 bgequ noname.608
6593 incl r3
6594noname.608:
6595 movl r5,r1
6596 movl r3,r2
6597 addl2 r1,r9
6598 bicl2 #0,r9
6599 cmpl r9,r1
6600 bgequ noname.609
6601 incl r2
6602noname.609:
6603 addl2 r2,r8
6604 bicl2 #0,r8
6605 cmpl r8,r2
6606 bgequ noname.610
6607 incl r10
6608noname.610:
6609
6610 movl r9,24(r4)
6611
6612 movl r8,28(r4)
6613
6614 ret
6615
6616; For now, the code below doesn't work, so I end this prematurely.
6617.end
6618
6619 .title vax_bn_div64 division 64/32=>32
6620;
6621; r.l. 16-jan-1998
6622;
6623; unsigned int bn_div64(unsigned long h, unsigned long l, unsigned long d)
6624; return <h,l>/d;
6625;
6626
6627 .psect code,nowrt
6628
6629h=4 ;(AP) by value (input)
6630l=8 ;(AP) by value (input)
6631d=12 ;(AP) by value (input)
6632
6633.entry bn_div64,^m<r2,r3,r4,r5,r6,r7,r8,r9>
6634
6635 movl l(ap),r2 ; l
6636 movl h(ap),r3 ; h
6637 movl d(ap),r4 ; d
6638 clrl r5 ; q
6639 clrl r6 ; r
6640
6641 ; Treat "negative" specially
6642 tstl r3
6643 blss 30$
6644
6645 tstl r4
6646 beql 90$
6647
6648 ediv r4,r2,r5,r6
6649 bvs 666$
6650
6651 movl r5,r0
6652 ret
6653
665430$:
6655 ; The theory here is to do some harmless shifting and a little
6656 ; bit of rounding (brackets are to designate when decimals are
6657 ; cut off):
6658 ;
6659 ; result = 2 * [ ([<h,0>/2] + [d/2]) / d ] + [ l / d ]
6660
6661 movl #0,r7
6662 movl r3,r8 ; copy h
6663 ashq #-1,r7,r7 ; [<h,0>/2] => <r8,r7>
6664 bicl2 #^X80000000,r8 ; Remove "sign"
6665
6666 movl r4,r9 ; copy d
6667 ashl #-1,r9,r9 ; [d/2] => r9
6668 bicl2 #^X80000000,r9 ; Remove "sign"
6669
6670 addl2 r9,r7
6671 adwc #0,r8 ; [<h,0>/2] + [d/2] => <r8,r7>
6672
6673 ediv r4,r7,r5,r6 ; [ ([<h,0>/2] + [d/2]) / d ] => <r5,r6>
6674 bvs 666$
6675
6676 movl #0,r6
6677 ashq #1,r5,r5 ; 2 * [ ([<h,0>/2] + [d/2]) / d ] => r5
6678
6679 movl #0,r3
6680 ediv r4,r2,r8,r9 ; [ l / d ] => <r8,r9>
6681
6682 addl2 r8,r5 ;
6683 bcs 666$
6684
6685 movl r5,r0
6686 ret
6687
668890$:
6689 movl #-1,r0
6690 ret
6691
6692666$:
6693
6694
6695.end
diff --git a/src/lib/libcrypto/bn/asm/x86.pl b/src/lib/libcrypto/bn/asm/x86.pl
new file mode 100644
index 0000000000..1bc4f1bb27
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86.pl
@@ -0,0 +1,28 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6require("x86/mul_add.pl");
7require("x86/mul.pl");
8require("x86/sqr.pl");
9require("x86/div.pl");
10require("x86/add.pl");
11require("x86/sub.pl");
12require("x86/comba.pl");
13
14&asm_init($ARGV[0],$0);
15
16&bn_mul_add_words("bn_mul_add_words");
17&bn_mul_words("bn_mul_words");
18&bn_sqr_words("bn_sqr_words");
19&bn_div_words("bn_div_words");
20&bn_add_words("bn_add_words");
21&bn_sub_words("bn_sub_words");
22&bn_mul_comba("bn_mul_comba8",8);
23&bn_mul_comba("bn_mul_comba4",4);
24&bn_sqr_comba("bn_sqr_comba8",8);
25&bn_sqr_comba("bn_sqr_comba4",4);
26
27&asm_finish();
28
diff --git a/src/lib/libcrypto/bn/asm/x86w16.asm b/src/lib/libcrypto/bn/asm/x86w16.asm
index 74a933a8cd..80a9ed6eef 100644
--- a/src/lib/libcrypto/bn/asm/x86w16.asm
+++ b/src/lib/libcrypto/bn/asm/x86w16.asm
@@ -6,11 +6,11 @@ F_TEXT SEGMENT WORD PUBLIC 'CODE'
6F_TEXT ENDS 6F_TEXT ENDS
7_DATA SEGMENT WORD PUBLIC 'DATA' 7_DATA SEGMENT WORD PUBLIC 'DATA'
8_DATA ENDS 8_DATA ENDS
9CONST SEGMENT WORD PUBLIC 'CONST' 9_CONST SEGMENT WORD PUBLIC 'CONST'
10CONST ENDS 10_CONST ENDS
11_BSS SEGMENT WORD PUBLIC 'BSS' 11_BSS SEGMENT WORD PUBLIC 'BSS'
12_BSS ENDS 12_BSS ENDS
13DGROUP GROUP CONST, _BSS, _DATA 13DGROUP GROUP _CONST, _BSS, _DATA
14 ASSUME DS: DGROUP, SS: DGROUP 14 ASSUME DS: DGROUP, SS: DGROUP
15F_TEXT SEGMENT 15F_TEXT SEGMENT
16 ASSUME CS: F_TEXT 16 ASSUME CS: F_TEXT
diff --git a/src/lib/libcrypto/bn/asm/x86w32.asm b/src/lib/libcrypto/bn/asm/x86w32.asm
index fc6f917714..957d71e3b1 100644
--- a/src/lib/libcrypto/bn/asm/x86w32.asm
+++ b/src/lib/libcrypto/bn/asm/x86w32.asm
@@ -6,11 +6,11 @@ F_TEXT SEGMENT WORD USE16 PUBLIC 'CODE'
6F_TEXT ENDS 6F_TEXT ENDS
7_DATA SEGMENT WORD USE16 PUBLIC 'DATA' 7_DATA SEGMENT WORD USE16 PUBLIC 'DATA'
8_DATA ENDS 8_DATA ENDS
9CONST SEGMENT WORD USE16 PUBLIC 'CONST' 9_CONST SEGMENT WORD USE16 PUBLIC 'CONST'
10CONST ENDS 10_CONST ENDS
11_BSS SEGMENT WORD USE16 PUBLIC 'BSS' 11_BSS SEGMENT WORD USE16 PUBLIC 'BSS'
12_BSS ENDS 12_BSS ENDS
13DGROUP GROUP CONST, _BSS, _DATA 13DGROUP GROUP _CONST, _BSS, _DATA
14 ASSUME DS: DGROUP, SS: DGROUP 14 ASSUME DS: DGROUP, SS: DGROUP
15F_TEXT SEGMENT 15F_TEXT SEGMENT
16 ASSUME CS: F_TEXT 16 ASSUME CS: F_TEXT
@@ -89,7 +89,7 @@ $L555:
89 mov bp,WORD PTR [bp+26] ; load num 89 mov bp,WORD PTR [bp+26] ; load num
90 and bp,3 90 and bp,3
91 dec bp 91 dec bp
92 js $L547 92 js $L547m
93 93
94 mov eax,ecx 94 mov eax,ecx
95 mul DWORD PTR es:[bx] ; w* *a 95 mul DWORD PTR es:[bx] ; w* *a
@@ -100,7 +100,7 @@ $L555:
100 mov DWORD PTR ds:[di],eax 100 mov DWORD PTR ds:[di],eax
101 mov esi,edx 101 mov esi,edx
102 dec bp 102 dec bp
103 js $L547 ; Note that we are now testing for -1 103 js $L547m ; Note that we are now testing for -1
104 ; 104 ;
105 mov eax,ecx 105 mov eax,ecx
106 mul DWORD PTR es:[bx+4] ; w* *a 106 mul DWORD PTR es:[bx+4] ; w* *a
@@ -111,7 +111,7 @@ $L555:
111 mov DWORD PTR ds:[di+4],eax 111 mov DWORD PTR ds:[di+4],eax
112 mov esi,edx 112 mov esi,edx
113 dec bp 113 dec bp
114 js $L547 114 js $L547m
115 ; 115 ;
116 mov eax,ecx 116 mov eax,ecx
117 mul DWORD PTR es:[bx+8] ; w* *a 117 mul DWORD PTR es:[bx+8] ; w* *a
@@ -121,7 +121,7 @@ $L555:
121 adc edx,0 121 adc edx,0
122 mov DWORD PTR ds:[di+8],eax 122 mov DWORD PTR ds:[di+8],eax
123 mov esi,edx 123 mov esi,edx
124$L547: 124$L547m:
125 mov eax,esi 125 mov eax,esi
126 mov edx,esi 126 mov edx,esi
127 shr edx,16 127 shr edx,16
@@ -315,37 +315,35 @@ _bn_add_words PROC FAR
315; ap = 22 315; ap = 22
316; rp = 18 316; rp = 18
317 xor esi,esi ;c=0; 317 xor esi,esi ;c=0;
318 mov bx,WORD PTR [bp+18] ; load low r
318 mov si,WORD PTR [bp+22] ; load a 319 mov si,WORD PTR [bp+22] ; load a
319 mov es,WORD PTR [bp+24] ; load a 320 mov es,WORD PTR [bp+24] ; load a
320 mov di,WORD PTR [bp+26] ; load b 321 mov di,WORD PTR [bp+26] ; load b
321 mov ds,WORD PTR [bp+28] ; load b 322 mov ds,WORD PTR [bp+28] ; load b
322 323
323 mov dx,WORD PTR [bp+30] ; load num 324 mov dx,WORD PTR [bp+30] ; load num
324 dec dx
325 js $L547
326 xor ecx,ecx 325 xor ecx,ecx
326 dec dx
327 js $L547a
327 328
328$L5477: 329$L5477:
329 xor ebx,ebx
330 mov eax,DWORD PTR es:[si] ; *a 330 mov eax,DWORD PTR es:[si] ; *a
331 add eax,ecx 331 add eax,ecx
332 adc ebx,0 332 mov ecx,0
333 adc ecx,0
333 add si,4 ; a++ 334 add si,4 ; a++
334 add eax,DWORD PTR ds:[di] ; + *b 335 add eax,DWORD PTR ds:[di] ; + *b
335 mov ecx,ebx
336 adc ecx,0 336 adc ecx,0
337 add di,4
338 mov bx,WORD PTR [bp+18]
339 mov ds,WORD PTR [bp+20] 337 mov ds,WORD PTR [bp+20]
338 add di,4
340 mov DWORD PTR ds:[bx],eax 339 mov DWORD PTR ds:[bx],eax
341 add bx,4
342 mov ds,WORD PTR [bp+28] 340 mov ds,WORD PTR [bp+28]
343 mov WORD PTR [bp+18],bx 341 add bx,4
344 dec dx 342 dec dx
345 js $L547 ; Note that we are now testing for -1 343 js $L547a ; Note that we are now testing for -1
346 jmp $L5477 344 jmp $L5477
347 ; 345 ;
348$L547: 346$L547a:
349 mov eax,ecx 347 mov eax,ecx
350 mov edx,ecx 348 mov edx,ecx
351 shr edx,16 349 shr edx,16
diff --git a/src/lib/libcrypto/bn/bn.err b/src/lib/libcrypto/bn/bn.err
deleted file mode 100644
index 7ccc247c41..0000000000
--- a/src/lib/libcrypto/bn/bn.err
+++ /dev/null
@@ -1,27 +0,0 @@
1/* Error codes for the BN functions. */
2
3/* Function codes. */
4#define BN_F_BN_BLINDING_CONVERT 100
5#define BN_F_BN_BLINDING_INVERT 101
6#define BN_F_BN_BLINDING_NEW 102
7#define BN_F_BN_BLINDING_UPDATE 103
8#define BN_F_BN_BN2DEC 104
9#define BN_F_BN_BN2HEX 105
10#define BN_F_BN_CTX_NEW 106
11#define BN_F_BN_DIV 107
12#define BN_F_BN_EXPAND2 108
13#define BN_F_BN_MOD_EXP_MONT 109
14#define BN_F_BN_MOD_INVERSE 110
15#define BN_F_BN_MOD_MUL_RECIPROCAL 111
16#define BN_F_BN_MPI2BN 112
17#define BN_F_BN_NEW 113
18#define BN_F_BN_RAND 114
19
20/* Reason codes. */
21#define BN_R_BAD_RECIPROCAL 100
22#define BN_R_CALLED_WITH_EVEN_MODULUS 101
23#define BN_R_DIV_BY_ZERO 102
24#define BN_R_ENCODING_ERROR 103
25#define BN_R_INVALID_LENGTH 104
26#define BN_R_NOT_INITALISED 105
27#define BN_R_NO_INVERSE 106
diff --git a/src/lib/libcrypto/bn/bn.org b/src/lib/libcrypto/bn/bn.h
index 66dde285d6..f935e1ca79 100644
--- a/src/lib/libcrypto/bn/bn.org
+++ b/src/lib/libcrypto/bn/bn.h
@@ -1,4 +1,4 @@
1/* crypto/bn/bn.org */ 1/* crypto/bn/bn.h */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -56,27 +56,25 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58 58
59/* WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
60 *
61 * Always modify bn.org since bn.h is automatically generated from
62 * it during SSLeay configuration.
63 *
64 * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
65 */
66
67#ifndef HEADER_BN_H 59#ifndef HEADER_BN_H
68#define HEADER_BN_H 60#define HEADER_BN_H
69 61
62#ifndef WIN16
63#include <stdio.h> /* FILE */
64#endif
65#include <openssl/opensslconf.h>
66
70#ifdef __cplusplus 67#ifdef __cplusplus
71extern "C" { 68extern "C" {
72#endif 69#endif
73 70
74#undef BN_LLONG 71#ifdef VMS
75 72#undef BN_LLONG /* experimental, so far... */
76#ifdef WIN32
77#define BN_LLONG /* This comment stops Configure mutilating things */
78#endif 73#endif
79 74
75#define BN_MUL_COMBA
76#define BN_SQR_COMBA
77#define BN_RECURSION
80#define RECP_MUL_MOD 78#define RECP_MUL_MOD
81#define MONT_MUL_MOD 79#define MONT_MUL_MOD
82 80
@@ -95,16 +93,6 @@ extern "C" {
95#define BN_DIV2W 93#define BN_DIV2W
96#endif 94#endif
97 95
98/* Only one for the following should be defined */
99/* The prime number generation stuff may not work when
100 * EIGHT_BIT but I don't care since I've only used this mode
101 * for debuging the bignum libraries */
102#undef SIXTY_FOUR_BIT_LONG
103#undef SIXTY_FOUR_BIT
104#define THIRTY_TWO_BIT
105#undef SIXTEEN_BIT
106#undef EIGHT_BIT
107
108/* assuming long is 64bit - this is the DEC Alpha 96/* assuming long is 64bit - this is the DEC Alpha
109 * unsigned long long is only 64 bits :-(, don't define 97 * unsigned long long is only 64 bits :-(, don't define
110 * BN_LLONG for the DEC Alpha */ 98 * BN_LLONG for the DEC Alpha */
@@ -116,20 +104,26 @@ extern "C" {
116#define BN_BYTES 8 104#define BN_BYTES 8
117#define BN_BITS2 64 105#define BN_BITS2 64
118#define BN_BITS4 32 106#define BN_BITS4 32
107#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
119#define BN_MASK2 (0xffffffffffffffffL) 108#define BN_MASK2 (0xffffffffffffffffL)
120#define BN_MASK2l (0xffffffffL) 109#define BN_MASK2l (0xffffffffL)
121#define BN_MASK2h (0xffffffff00000000L) 110#define BN_MASK2h (0xffffffff00000000L)
122#define BN_MASK2h1 (0xffffffff80000000L) 111#define BN_MASK2h1 (0xffffffff80000000L)
123#define BN_TBIT (0x8000000000000000L) 112#define BN_TBIT (0x8000000000000000L)
124#define BN_DEC_CONV (10000000000000000000L) 113#define BN_DEC_CONV (10000000000000000000UL)
125#define BN_DEC_FMT1 "%lu" 114#define BN_DEC_FMT1 "%lu"
126#define BN_DEC_FMT2 "%019lu" 115#define BN_DEC_FMT2 "%019lu"
127#define BN_DEC_NUM 19 116#define BN_DEC_NUM 19
128#endif 117#endif
129 118
119/* This is where the long long data type is 64 bits, but long is 32.
120 * For machines where there are 64bit registers, this is the mode to use.
121 * IRIX, on R4000 and above should use this mode, along with the relevent
122 * assember code :-). Do NOT define BN_LLONG.
123 */
130#ifdef SIXTY_FOUR_BIT 124#ifdef SIXTY_FOUR_BIT
131#undef BN_LLONG 125#undef BN_LLONG
132/* #define BN_ULLONG unsigned long long */ 126#undef BN_ULLONG
133#define BN_ULONG unsigned long long 127#define BN_ULONG unsigned long long
134#define BN_LONG long long 128#define BN_LONG long long
135#define BN_BITS 128 129#define BN_BITS 128
@@ -141,14 +135,14 @@ extern "C" {
141#define BN_MASK2h (0xffffffff00000000LL) 135#define BN_MASK2h (0xffffffff00000000LL)
142#define BN_MASK2h1 (0xffffffff80000000LL) 136#define BN_MASK2h1 (0xffffffff80000000LL)
143#define BN_TBIT (0x8000000000000000LL) 137#define BN_TBIT (0x8000000000000000LL)
144#define BN_DEC_CONV (10000000000000000000L) 138#define BN_DEC_CONV (10000000000000000000LL)
145#define BN_DEC_FMT1 "%lu" 139#define BN_DEC_FMT1 "%llu"
146#define BN_DEC_FMT2 "%019lu" 140#define BN_DEC_FMT2 "%019llu"
147#define BN_DEC_NUM 19 141#define BN_DEC_NUM 19
148#endif 142#endif
149 143
150#ifdef THIRTY_TWO_BIT 144#ifdef THIRTY_TWO_BIT
151#ifdef WIN32 145#if defined(WIN32) && !defined(__GNUC__)
152#define BN_ULLONG unsigned _int64 146#define BN_ULLONG unsigned _int64
153#else 147#else
154#define BN_ULLONG unsigned long long 148#define BN_ULLONG unsigned long long
@@ -159,6 +153,12 @@ extern "C" {
159#define BN_BYTES 4 153#define BN_BYTES 4
160#define BN_BITS2 32 154#define BN_BITS2 32
161#define BN_BITS4 16 155#define BN_BITS4 16
156#ifdef WIN32
157/* VC++ doesn't like the LL suffix */
158#define BN_MASK (0xffffffffffffffffL)
159#else
160#define BN_MASK (0xffffffffffffffffLL)
161#endif
162#define BN_MASK2 (0xffffffffL) 162#define BN_MASK2 (0xffffffffL)
163#define BN_MASK2l (0xffff) 163#define BN_MASK2l (0xffff)
164#define BN_MASK2h1 (0xffff8000L) 164#define BN_MASK2h1 (0xffff8000L)
@@ -181,6 +181,7 @@ extern "C" {
181#define BN_BYTES 2 181#define BN_BYTES 2
182#define BN_BITS2 16 182#define BN_BITS2 16
183#define BN_BITS4 8 183#define BN_BITS4 8
184#define BN_MASK (0xffffffff)
184#define BN_MASK2 (0xffff) 185#define BN_MASK2 (0xffff)
185#define BN_MASK2l (0xff) 186#define BN_MASK2l (0xff)
186#define BN_MASK2h1 (0xff80) 187#define BN_MASK2h1 (0xff80)
@@ -203,6 +204,7 @@ extern "C" {
203#define BN_BYTES 1 204#define BN_BYTES 1
204#define BN_BITS2 8 205#define BN_BITS2 8
205#define BN_BITS4 4 206#define BN_BITS4 4
207#define BN_MASK (0xffff)
206#define BN_MASK2 (0xff) 208#define BN_MASK2 (0xff)
207#define BN_MASK2l (0xf) 209#define BN_MASK2l (0xf)
208#define BN_MASK2h1 (0xf8) 210#define BN_MASK2h1 (0xf8)
@@ -220,6 +222,12 @@ extern "C" {
220#undef BIGNUM 222#undef BIGNUM
221#endif 223#endif
222 224
225#define BN_FLG_MALLOCED 0x01
226#define BN_FLG_STATIC_DATA 0x02
227#define BN_FLG_FREE 0x8000 /* used for debuging */
228#define BN_set_flags(b,n) ((b)->flags|=(n))
229#define BN_get_flags(b,n) ((b)->flags&(n))
230
223typedef struct bignum_st 231typedef struct bignum_st
224 { 232 {
225 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ 233 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
@@ -227,6 +235,7 @@ typedef struct bignum_st
227 /* The next are internal book keeping for bn_expand. */ 235 /* The next are internal book keeping for bn_expand. */
228 int max; /* Size of the d array. */ 236 int max; /* Size of the d array. */
229 int neg; /* one if the number is negative */ 237 int neg; /* one if the number is negative */
238 int flags;
230 } BIGNUM; 239 } BIGNUM;
231 240
232/* Used for temp variables */ 241/* Used for temp variables */
@@ -234,7 +243,8 @@ typedef struct bignum_st
234typedef struct bignum_ctx 243typedef struct bignum_ctx
235 { 244 {
236 int tos; 245 int tos;
237 BIGNUM *bn[BN_CTX_NUM+1]; 246 BIGNUM bn[BN_CTX_NUM+1];
247 int flags;
238 } BN_CTX; 248 } BN_CTX;
239 249
240typedef struct bn_blinding_st 250typedef struct bn_blinding_st
@@ -248,130 +258,150 @@ typedef struct bn_blinding_st
248/* Used for montgomery multiplication */ 258/* Used for montgomery multiplication */
249typedef struct bn_mont_ctx_st 259typedef struct bn_mont_ctx_st
250 { 260 {
261 int use_word; /* 0 for word form, 1 for long form */
251 int ri; /* number of bits in R */ 262 int ri; /* number of bits in R */
252 BIGNUM *RR; /* used to convert to montgomery form */ 263 BIGNUM RR; /* used to convert to montgomery form */
253 BIGNUM *N; /* The modulus */ 264 BIGNUM N; /* The modulus */
254 BIGNUM *Ni; /* The inverse of N */ 265 BIGNUM Ni; /* The inverse of N */
255 BN_ULONG n0; /* word form of inverse, normally only one of 266 BN_ULONG n0; /* word form of inverse, normally only one of
256 * Ni or n0 is defined */ 267 * Ni or n0 is defined */
268 int flags;
257 } BN_MONT_CTX; 269 } BN_MONT_CTX;
258 270
271/* Used for reciprocal division/mod functions
272 * It cannot be shared between threads
273 */
274typedef struct bn_recp_ctx_st
275 {
276 BIGNUM N; /* the divisor */
277 BIGNUM Nr; /* the reciprocal */
278 int num_bits;
279 int shift;
280 int flags;
281 } BN_RECP_CTX;
282
259#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\ 283#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
260 r,a,(mont)->RR,(mont),ctx) 284 r,a,&((mont)->RR),(mont),ctx)
261 285
262#define BN_prime_checks (5) 286#define BN_prime_checks (5)
263 287
264#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8) 288#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
265#define BN_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) 289#define BN_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w)))
266#define BN_is_zero(a) (((a)->top <= 1) && ((a)->d[0] == (BN_ULONG)0)) 290#define BN_is_zero(a) (((a)->top == 0) || BN_is_word(a,0))
267#define BN_is_one(a) (BN_is_word((a),1)) 291#define BN_is_one(a) (BN_is_word((a),1))
268#define BN_is_odd(a) ((a)->d[0] & 1) 292#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
269#define BN_one(a) (BN_set_word((a),1)) 293#define BN_one(a) (BN_set_word((a),1))
270#define BN_zero(a) (BN_set_word((a),0)) 294#define BN_zero(a) (BN_set_word((a),0))
271 295
272#define BN_ascii2bn(a) BN_hex2bn(a) 296/*#define BN_ascii2bn(a) BN_hex2bn(a) */
273#define BN_bn2ascii(a) BN_bn2hex(a) 297/*#define BN_bn2ascii(a) BN_bn2hex(a) */
274 298
275#define bn_fix_top(a) \ 299#define bn_expand(n,b) ((((((b+BN_BITS2-1))/BN_BITS2)) <= (n)->max)?\
276 { \ 300 (n):bn_expand2((n),(b)/BN_BITS2+1))
277 BN_ULONG *fix_top_l; \
278 for (fix_top_l= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
279 if (*(fix_top_l--)) break; \
280 }
281
282#define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?\
283 (n):bn_expand2((n),(b)/BN_BITS2))
284#define bn_wexpand(n,b) (((b) <= (n)->max)?(n):bn_expand2((n),(b))) 301#define bn_wexpand(n,b) (((b) <= (n)->max)?(n):bn_expand2((n),(b)))
285 302
303#define bn_fix_top(a) \
304 { \
305 BN_ULONG *ftl; \
306 if ((a)->top > 0) \
307 { \
308 for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
309 if (*(ftl--)) break; \
310 } \
311 }
286 312
287#ifndef NOPROTO
288BIGNUM *BN_value_one(void); 313BIGNUM *BN_value_one(void);
289char * BN_options(void); 314char * BN_options(void);
290BN_CTX *BN_CTX_new(void); 315BN_CTX *BN_CTX_new(void);
316void BN_CTX_init(BN_CTX *c);
291void BN_CTX_free(BN_CTX *c); 317void BN_CTX_free(BN_CTX *c);
292int BN_rand(BIGNUM *rnd, int bits, int top,int bottom); 318int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
293int BN_num_bits(BIGNUM *a); 319int BN_num_bits(const BIGNUM *a);
294int BN_num_bits_word(BN_ULONG); 320int BN_num_bits_word(BN_ULONG);
295BIGNUM *BN_new(void); 321BIGNUM *BN_new(void);
322void BN_init(BIGNUM *);
296void BN_clear_free(BIGNUM *a); 323void BN_clear_free(BIGNUM *a);
297BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b); 324BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
298BIGNUM *BN_bin2bn(unsigned char *s,int len,BIGNUM *ret); 325BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret);
299int BN_bn2bin(BIGNUM *a, unsigned char *to); 326int BN_bn2bin(const BIGNUM *a, unsigned char *to);
300BIGNUM *BN_mpi2bn(unsigned char *s,int len,BIGNUM *ret); 327BIGNUM *BN_mpi2bn(unsigned char *s,int len,BIGNUM *ret);
301int BN_bn2mpi(BIGNUM *a, unsigned char *to); 328int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
302int BN_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b); 329int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
303void bn_qsub(BIGNUM *r, BIGNUM *a, BIGNUM *b); 330int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
304void bn_qadd(BIGNUM *r, BIGNUM *a, BIGNUM *b); 331int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
305int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b); 332int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b);
306int BN_mod(BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx); 333int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
307int BN_div(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx); 334int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
308int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b); 335 BN_CTX *ctx);
336int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b,BN_CTX *ctx);
309int BN_sqr(BIGNUM *r, BIGNUM *a,BN_CTX *ctx); 337int BN_sqr(BIGNUM *r, BIGNUM *a,BN_CTX *ctx);
310BN_ULONG BN_mod_word(BIGNUM *a, unsigned long w); 338BN_ULONG BN_mod_word(BIGNUM *a, BN_ULONG w);
311BN_ULONG BN_div_word(BIGNUM *a, unsigned long w); 339BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
312int BN_mul_word(BIGNUM *a, unsigned long w); 340int BN_mul_word(BIGNUM *a, BN_ULONG w);
313int BN_add_word(BIGNUM *a, unsigned long w); 341int BN_add_word(BIGNUM *a, BN_ULONG w);
314int BN_sub_word(BIGNUM *a, unsigned long w); 342int BN_sub_word(BIGNUM *a, BN_ULONG w);
315int BN_set_word(BIGNUM *a, unsigned long w); 343int BN_set_word(BIGNUM *a, BN_ULONG w);
316unsigned long BN_get_word(BIGNUM *a); 344BN_ULONG BN_get_word(BIGNUM *a);
317int BN_cmp(BIGNUM *a, BIGNUM *b); 345int BN_cmp(const BIGNUM *a, const BIGNUM *b);
318void BN_free(BIGNUM *a); 346void BN_free(BIGNUM *a);
319int BN_is_bit_set(BIGNUM *a, int n); 347int BN_is_bit_set(const BIGNUM *a, int n);
320int BN_lshift(BIGNUM *r, BIGNUM *a, int n); 348int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
321int BN_lshift1(BIGNUM *r, BIGNUM *a); 349int BN_lshift1(BIGNUM *r, BIGNUM *a);
322int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p,BN_CTX *ctx); 350int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p,BN_CTX *ctx);
323int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx); 351int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p,
324int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx, 352 const BIGNUM *m,BN_CTX *ctx);
325 BN_MONT_CTX *m_ctx); 353int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p,
326int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx); 354 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
355int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2,
356 BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx);
327int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, 357int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p,
328 BIGNUM *m,BN_CTX *ctx); 358 BIGNUM *m,BN_CTX *ctx);
329int BN_mask_bits(BIGNUM *a,int n); 359int BN_mask_bits(BIGNUM *a,int n);
330int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BIGNUM *m, 360int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
331 BIGNUM *i, int nb, BN_CTX *ctx);
332int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *m,
333 BN_CTX *ctx);
334#ifndef WIN16 361#ifndef WIN16
335int BN_print_fp(FILE *fp, BIGNUM *a); 362int BN_print_fp(FILE *fp, BIGNUM *a);
336#endif 363#endif
337#ifdef HEADER_BIO_H 364#ifdef HEADER_BIO_H
338int BN_print(BIO *fp, BIGNUM *a); 365int BN_print(BIO *fp, const BIGNUM *a);
339#else 366#else
340int BN_print(char *fp, BIGNUM *a); 367int BN_print(char *fp, const BIGNUM *a);
341#endif 368#endif
342int BN_reciprocal(BIGNUM *r, BIGNUM *m, BN_CTX *ctx); 369int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx);
343int BN_rshift(BIGNUM *r, BIGNUM *a, int n); 370int BN_rshift(BIGNUM *r, BIGNUM *a, int n);
344int BN_rshift1(BIGNUM *r, BIGNUM *a); 371int BN_rshift1(BIGNUM *r, BIGNUM *a);
345void BN_clear(BIGNUM *a); 372void BN_clear(BIGNUM *a);
346BIGNUM *bn_expand2(BIGNUM *b, int bits); 373BIGNUM *bn_expand2(BIGNUM *b, int bits);
347BIGNUM *BN_dup(BIGNUM *a); 374BIGNUM *BN_dup(const BIGNUM *a);
348int BN_ucmp(BIGNUM *a, BIGNUM *b); 375int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
349int BN_set_bit(BIGNUM *a, int n); 376int BN_set_bit(BIGNUM *a, int n);
350int BN_clear_bit(BIGNUM *a, int n); 377int BN_clear_bit(BIGNUM *a, int n);
351char * BN_bn2hex(BIGNUM *a); 378char * BN_bn2hex(const BIGNUM *a);
352char * BN_bn2dec(BIGNUM *a); 379char * BN_bn2dec(const BIGNUM *a);
353int BN_hex2bn(BIGNUM **a,char *str); 380int BN_hex2bn(BIGNUM **a, const char *str);
354int BN_dec2bn(BIGNUM **a,char *str); 381int BN_dec2bn(BIGNUM **a, const char *str);
355int BN_gcd(BIGNUM *r,BIGNUM *in_a,BIGNUM *in_b,BN_CTX *ctx); 382int BN_gcd(BIGNUM *r,BIGNUM *in_a,BIGNUM *in_b,BN_CTX *ctx);
356BIGNUM *BN_mod_inverse(BIGNUM *a, BIGNUM *n,BN_CTX *ctx); 383BIGNUM *BN_mod_inverse(BIGNUM *ret,BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
357BIGNUM *BN_generate_prime(int bits,int strong,BIGNUM *add, 384BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int strong,BIGNUM *add,
358 BIGNUM *rem,void (*callback)(int,int,char *),char *cb_arg); 385 BIGNUM *rem,void (*callback)(int,int,void *),void *cb_arg);
359int BN_is_prime(BIGNUM *p,int nchecks,void (*callback)(int,int,char *), 386int BN_is_prime(BIGNUM *p,int nchecks,void (*callback)(int,int,void *),
360 BN_CTX *ctx,char *cb_arg); 387 BN_CTX *ctx,void *cb_arg);
361void ERR_load_BN_strings(void ); 388void ERR_load_BN_strings(void );
362 389
363BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); 390BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
364BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); 391BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
365void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num); 392void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num);
366BN_ULONG bn_div64(BN_ULONG h, BN_ULONG l, BN_ULONG d); 393BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
367BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); 394BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
395BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
368 396
369BN_MONT_CTX *BN_MONT_CTX_new(void ); 397BN_MONT_CTX *BN_MONT_CTX_new(void );
398void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
370int BN_mod_mul_montgomery(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_MONT_CTX *mont, 399int BN_mod_mul_montgomery(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_MONT_CTX *mont,
371 BN_CTX *ctx); 400 BN_CTX *ctx);
372int BN_from_montgomery(BIGNUM *r,BIGNUM *a,BN_MONT_CTX *mont,BN_CTX *ctx); 401int BN_from_montgomery(BIGNUM *r,BIGNUM *a,BN_MONT_CTX *mont,BN_CTX *ctx);
373void BN_MONT_CTX_free(BN_MONT_CTX *mont); 402void BN_MONT_CTX_free(BN_MONT_CTX *mont);
374int BN_MONT_CTX_set(BN_MONT_CTX *mont,BIGNUM *modulus,BN_CTX *ctx); 403int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *modulus,BN_CTX *ctx);
404BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
375 405
376BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod); 406BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod);
377void BN_BLINDING_free(BN_BLINDING *b); 407void BN_BLINDING_free(BN_BLINDING *b);
@@ -379,94 +409,26 @@ int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
379int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx); 409int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx);
380int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); 410int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
381 411
382#else 412void BN_set_params(int mul,int high,int low,int mont);
413int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
383 414
384BIGNUM *BN_value_one(); 415void BN_RECP_CTX_init(BN_RECP_CTX *recp);
385char * BN_options(); 416BN_RECP_CTX *BN_RECP_CTX_new(void);
386BN_CTX *BN_CTX_new(); 417void BN_RECP_CTX_free(BN_RECP_CTX *recp);
387void BN_CTX_free(); 418int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx);
388int BN_rand(); 419int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y,
389int BN_num_bits(); 420 BN_RECP_CTX *recp,BN_CTX *ctx);
390int BN_num_bits_word(); 421int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
391BIGNUM *BN_new(); 422 const BIGNUM *m, BN_CTX *ctx);
392void BN_clear_free(); 423int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m,
393BIGNUM *BN_copy(); 424 BN_RECP_CTX *recp, BN_CTX *ctx);
394BIGNUM *BN_bin2bn();
395int BN_bn2bin();
396BIGNUM *BN_mpi2bn();
397int BN_bn2mpi();
398int BN_sub();
399void bn_qsub();
400void bn_qadd();
401int BN_add();
402int BN_mod();
403int BN_div();
404int BN_mul();
405int BN_sqr();
406BN_ULONG BN_mod_word();
407BN_ULONG BN_div_word();
408int BN_add_word();
409int BN_sub_word();
410int BN_mul_word();
411int BN_set_word();
412unsigned long BN_get_word();
413int BN_cmp();
414void BN_free();
415int BN_is_bit_set();
416int BN_lshift();
417int BN_lshift1();
418int BN_exp();
419int BN_mod_exp();
420int BN_mod_exp_mont();
421int BN_mod_exp_recp();
422int BN_mod_exp_simple();
423int BN_mask_bits();
424int BN_mod_mul_reciprocal();
425int BN_mod_mul();
426#ifndef WIN16
427int BN_print_fp();
428#endif
429int BN_print();
430int BN_reciprocal();
431int BN_rshift();
432int BN_rshift1();
433void BN_clear();
434BIGNUM *bn_expand2();
435BIGNUM *BN_dup();
436int BN_ucmp();
437int BN_set_bit();
438int BN_clear_bit();
439char * BN_bn2hex();
440char * BN_bn2dec();
441int BN_hex2bn();
442int BN_dec2bn();
443int BN_gcd();
444BIGNUM *BN_mod_inverse();
445BIGNUM *BN_generate_prime();
446int BN_is_prime();
447void ERR_load_BN_strings();
448
449BN_ULONG bn_mul_add_words();
450BN_ULONG bn_mul_words();
451void bn_sqr_words();
452BN_ULONG bn_div64();
453BN_ULONG bn_add_words();
454
455int BN_mod_mul_montgomery();
456int BN_from_montgomery();
457BN_MONT_CTX *BN_MONT_CTX_new();
458void BN_MONT_CTX_free();
459int BN_MONT_CTX_set();
460
461BN_BLINDING *BN_BLINDING_new();
462void BN_BLINDING_free();
463int BN_BLINDING_update();
464int BN_BLINDING_convert();
465int BN_BLINDING_invert();
466 425
467#endif
468 426
469/* BEGIN ERROR CODES */ 427/* BEGIN ERROR CODES */
428/* The following lines are auto generated by the script mkerr.pl. Any changes
429 * made after this point may be overwritten when the script is next run.
430 */
431
470/* Error codes for the BN functions. */ 432/* Error codes for the BN functions. */
471 433
472/* Function codes. */ 434/* Function codes. */
@@ -485,16 +447,19 @@ int BN_BLINDING_invert();
485#define BN_F_BN_MPI2BN 112 447#define BN_F_BN_MPI2BN 112
486#define BN_F_BN_NEW 113 448#define BN_F_BN_NEW 113
487#define BN_F_BN_RAND 114 449#define BN_F_BN_RAND 114
450#define BN_F_BN_USUB 115
488 451
489/* Reason codes. */ 452/* Reason codes. */
490#define BN_R_BAD_RECIPROCAL 100 453#define BN_R_ARG2_LT_ARG3 100
491#define BN_R_CALLED_WITH_EVEN_MODULUS 101 454#define BN_R_BAD_RECIPROCAL 101
492#define BN_R_DIV_BY_ZERO 102 455#define BN_R_CALLED_WITH_EVEN_MODULUS 102
493#define BN_R_ENCODING_ERROR 103 456#define BN_R_DIV_BY_ZERO 103
494#define BN_R_INVALID_LENGTH 104 457#define BN_R_ENCODING_ERROR 104
495#define BN_R_NOT_INITALISED 105 458#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
496#define BN_R_NO_INVERSE 106 459#define BN_R_INVALID_LENGTH 106
497 460#define BN_R_NOT_INITIALIZED 107
461#define BN_R_NO_INVERSE 108
462
498#ifdef __cplusplus 463#ifdef __cplusplus
499} 464}
500#endif 465#endif
diff --git a/src/lib/libcrypto/bn/bn.mul b/src/lib/libcrypto/bn/bn.mul
new file mode 100644
index 0000000000..9728870d38
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn.mul
@@ -0,0 +1,19 @@
1We need
2
3* bn_mul_comba8
4* bn_mul_comba4
5* bn_mul_normal
6* bn_mul_recursive
7
8* bn_sqr_comba8
9* bn_sqr_comba4
10bn_sqr_normal -> BN_sqr
11* bn_sqr_recursive
12
13* bn_mul_low_recursive
14* bn_mul_low_normal
15* bn_mul_high
16
17* bn_mul_part_recursive # symetric but not power of 2
18
19bn_mul_asymetric_recursive # uneven, but do the chop up.
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
index efb2e312e8..c5ab066c9e 100644
--- a/src/lib/libcrypto/bn/bn_add.c
+++ b/src/lib/libcrypto/bn/bn_add.c
@@ -61,14 +61,13 @@
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63/* r can == a or b */ 63/* r can == a or b */
64int BN_add(r, a, b) 64int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b)
65BIGNUM *r;
66BIGNUM *a;
67BIGNUM *b;
68 { 65 {
69 int i;
70 BIGNUM *tmp; 66 BIGNUM *tmp;
71 67
68 bn_check_top(a);
69 bn_check_top(b);
70
72 /* a + b a+b 71 /* a + b a+b
73 * a + -b a-b 72 * a + -b a-b
74 * -a + b b-a 73 * -a + b b-a
@@ -84,14 +83,12 @@ BIGNUM *b;
84 83
85 if (BN_ucmp(a,b) < 0) 84 if (BN_ucmp(a,b) < 0)
86 { 85 {
87 if (bn_wexpand(r,b->top) == NULL) return(0); 86 if (!BN_usub(r,b,a)) return(0);
88 bn_qsub(r,b,a);
89 r->neg=1; 87 r->neg=1;
90 } 88 }
91 else 89 else
92 { 90 {
93 if (bn_wexpand(r,a->top) == NULL) return(0); 91 if (!BN_usub(r,a,b)) return(0);
94 bn_qsub(r,a,b);
95 r->neg=0; 92 r->neg=0;
96 } 93 }
97 return(1); 94 return(1);
@@ -102,35 +99,32 @@ BIGNUM *b;
102 else 99 else
103 r->neg=0; 100 r->neg=0;
104 101
105 i=(a->top > b->top); 102 if (!BN_uadd(r,a,b)) return(0);
106
107 if (i)
108 {
109 if (bn_wexpand(r,a->top+1) == NULL) return(0);
110 bn_qadd(r,a,b);
111 }
112 else
113 {
114 if (bn_wexpand(r,b->top+1) == NULL) return(0);
115 bn_qadd(r,b,a);
116 }
117 return(1); 103 return(1);
118 } 104 }
119 105
120/* unsigned add of b to a, r must be large enough */ 106/* unsigned add of b to a, r must be large enough */
121void bn_qadd(r,a,b) 107int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
122BIGNUM *r;
123BIGNUM *a;
124BIGNUM *b;
125 { 108 {
126 register int i; 109 register int i;
127 int max,min; 110 int max,min;
128 BN_ULONG *ap,*bp,*rp,carry,t1; 111 BN_ULONG *ap,*bp,*rp,carry,t1;
112 const BIGNUM *tmp;
113
114 bn_check_top(a);
115 bn_check_top(b);
129 116
117 if (a->top < b->top)
118 { tmp=a; a=b; b=tmp; }
130 max=a->top; 119 max=a->top;
131 min=b->top; 120 min=b->top;
121
122 if (bn_wexpand(r,max+1) == NULL)
123 return(0);
124
132 r->top=max; 125 r->top=max;
133 126
127
134 ap=a->d; 128 ap=a->d;
135 bp=b->d; 129 bp=b->d;
136 rp=r->d; 130 rp=r->d;
@@ -160,8 +154,154 @@ BIGNUM *b;
160 r->top++; 154 r->top++;
161 } 155 }
162 } 156 }
163 for (; i<max; i++) 157 if (rp != ap)
164 *(rp++)= *(ap++); 158 {
159 for (; i<max; i++)
160 *(rp++)= *(ap++);
161 }
165 /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/ 162 /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/
163 return(1);
164 }
165
166/* unsigned subtraction of b from a, a must be larger than b. */
167int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
168 {
169 int max,min;
170 register BN_ULONG t1,t2,*ap,*bp,*rp;
171 int i,carry;
172#if defined(IRIX_CC_BUG) && !defined(LINT)
173 int dummy;
174#endif
175
176 bn_check_top(a);
177 bn_check_top(b);
178
179 if (a->top < b->top) /* hmm... should not be happening */
180 {
181 BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
182 return(0);
183 }
184
185 max=a->top;
186 min=b->top;
187 if (bn_wexpand(r,max) == NULL) return(0);
188
189 ap=a->d;
190 bp=b->d;
191 rp=r->d;
192
193#if 1
194 carry=0;
195 for (i=0; i<min; i++)
196 {
197 t1= *(ap++);
198 t2= *(bp++);
199 if (carry)
200 {
201 carry=(t1 <= t2);
202 t1=(t1-t2-1)&BN_MASK2;
203 }
204 else
205 {
206 carry=(t1 < t2);
207 t1=(t1-t2)&BN_MASK2;
208 }
209#if defined(IRIX_CC_BUG) && !defined(LINT)
210 dummy=t1;
211#endif
212 *(rp++)=t1&BN_MASK2;
213 }
214#else
215 carry=bn_sub_words(rp,ap,bp,min);
216 ap+=min;
217 bp+=min;
218 rp+=min;
219 i=min;
220#endif
221 if (carry) /* subtracted */
222 {
223 while (i < max)
224 {
225 i++;
226 t1= *(ap++);
227 t2=(t1-1)&BN_MASK2;
228 *(rp++)=t2;
229 if (t1 > t2) break;
230 }
231 }
232#if 0
233 memcpy(rp,ap,sizeof(*rp)*(max-i));
234#else
235 if (rp != ap)
236 {
237 for (;;)
238 {
239 if (i++ >= max) break;
240 rp[0]=ap[0];
241 if (i++ >= max) break;
242 rp[1]=ap[1];
243 if (i++ >= max) break;
244 rp[2]=ap[2];
245 if (i++ >= max) break;
246 rp[3]=ap[3];
247 rp+=4;
248 ap+=4;
249 }
250 }
251#endif
252
253 r->top=max;
254 bn_fix_top(r);
255 return(1);
256 }
257
258int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
259 {
260 int max;
261 int add=0,neg=0;
262 const BIGNUM *tmp;
263
264 bn_check_top(a);
265 bn_check_top(b);
266
267 /* a - b a-b
268 * a - -b a+b
269 * -a - b -(a+b)
270 * -a - -b b-a
271 */
272 if (a->neg)
273 {
274 if (b->neg)
275 { tmp=a; a=b; b=tmp; }
276 else
277 { add=1; neg=1; }
278 }
279 else
280 {
281 if (b->neg) { add=1; neg=0; }
282 }
283
284 if (add)
285 {
286 if (!BN_uadd(r,a,b)) return(0);
287 r->neg=neg;
288 return(1);
289 }
290
291 /* We are actually doing a - b :-) */
292
293 max=(a->top > b->top)?a->top:b->top;
294 if (bn_wexpand(r,max) == NULL) return(0);
295 if (BN_ucmp(a,b) < 0)
296 {
297 if (!BN_usub(r,b,a)) return(0);
298 r->neg=1;
299 }
300 else
301 {
302 if (!BN_usub(r,a,b)) return(0);
303 r->neg=0;
304 }
305 return(1);
166 } 306 }
167 307
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
new file mode 100644
index 0000000000..4d3da16a0c
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -0,0 +1,802 @@
1/* crypto/bn/bn_asm.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63#ifdef BN_LLONG
64
65BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
66 {
67 BN_ULONG c1=0;
68
69 bn_check_num(num);
70 if (num <= 0) return(c1);
71
72 for (;;)
73 {
74 mul_add(rp[0],ap[0],w,c1);
75 if (--num == 0) break;
76 mul_add(rp[1],ap[1],w,c1);
77 if (--num == 0) break;
78 mul_add(rp[2],ap[2],w,c1);
79 if (--num == 0) break;
80 mul_add(rp[3],ap[3],w,c1);
81 if (--num == 0) break;
82 ap+=4;
83 rp+=4;
84 }
85
86 return(c1);
87 }
88
89BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
90 {
91 BN_ULONG c1=0;
92
93 bn_check_num(num);
94 if (num <= 0) return(c1);
95
96 /* for (;;) */
97 while (1) /* circumvent egcs-1.1.2 bug */
98 {
99 mul(rp[0],ap[0],w,c1);
100 if (--num == 0) break;
101 mul(rp[1],ap[1],w,c1);
102 if (--num == 0) break;
103 mul(rp[2],ap[2],w,c1);
104 if (--num == 0) break;
105 mul(rp[3],ap[3],w,c1);
106 if (--num == 0) break;
107 ap+=4;
108 rp+=4;
109 }
110 return(c1);
111 }
112
113void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
114 {
115 bn_check_num(n);
116 if (n <= 0) return;
117 for (;;)
118 {
119 BN_ULLONG t;
120
121 t=(BN_ULLONG)(a[0])*(a[0]);
122 r[0]=Lw(t); r[1]=Hw(t);
123 if (--n == 0) break;
124
125 t=(BN_ULLONG)(a[1])*(a[1]);
126 r[2]=Lw(t); r[3]=Hw(t);
127 if (--n == 0) break;
128
129 t=(BN_ULLONG)(a[2])*(a[2]);
130 r[4]=Lw(t); r[5]=Hw(t);
131 if (--n == 0) break;
132
133 t=(BN_ULLONG)(a[3])*(a[3]);
134 r[6]=Lw(t); r[7]=Hw(t);
135 if (--n == 0) break;
136
137 a+=4;
138 r+=8;
139 }
140 }
141
142#else
143
144BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
145 {
146 BN_ULONG c=0;
147 BN_ULONG bl,bh;
148
149 bn_check_num(num);
150 if (num <= 0) return((BN_ULONG)0);
151
152 bl=LBITS(w);
153 bh=HBITS(w);
154
155 for (;;)
156 {
157 mul_add(rp[0],ap[0],bl,bh,c);
158 if (--num == 0) break;
159 mul_add(rp[1],ap[1],bl,bh,c);
160 if (--num == 0) break;
161 mul_add(rp[2],ap[2],bl,bh,c);
162 if (--num == 0) break;
163 mul_add(rp[3],ap[3],bl,bh,c);
164 if (--num == 0) break;
165 ap+=4;
166 rp+=4;
167 }
168 return(c);
169 }
170
171BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
172 {
173 BN_ULONG carry=0;
174 BN_ULONG bl,bh;
175
176 bn_check_num(num);
177 if (num <= 0) return((BN_ULONG)0);
178
179 bl=LBITS(w);
180 bh=HBITS(w);
181
182 for (;;)
183 {
184 mul(rp[0],ap[0],bl,bh,carry);
185 if (--num == 0) break;
186 mul(rp[1],ap[1],bl,bh,carry);
187 if (--num == 0) break;
188 mul(rp[2],ap[2],bl,bh,carry);
189 if (--num == 0) break;
190 mul(rp[3],ap[3],bl,bh,carry);
191 if (--num == 0) break;
192 ap+=4;
193 rp+=4;
194 }
195 return(carry);
196 }
197
198void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
199 {
200 bn_check_num(n);
201 if (n <= 0) return;
202 for (;;)
203 {
204 sqr64(r[0],r[1],a[0]);
205 if (--n == 0) break;
206
207 sqr64(r[2],r[3],a[1]);
208 if (--n == 0) break;
209
210 sqr64(r[4],r[5],a[2]);
211 if (--n == 0) break;
212
213 sqr64(r[6],r[7],a[3]);
214 if (--n == 0) break;
215
216 a+=4;
217 r+=8;
218 }
219 }
220
221#endif
222
223#if defined(BN_LLONG) && defined(BN_DIV2W)
224
225BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
226 {
227 return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
228 }
229
230#else
231
232/* Divide h-l by d and return the result. */
233/* I need to test this some more :-( */
234BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
235 {
236 BN_ULONG dh,dl,q,ret=0,th,tl,t;
237 int i,count=2;
238
239 if (d == 0) return(BN_MASK2);
240
241 i=BN_num_bits_word(d);
242 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
243 {
244#if !defined(NO_STDIO) && !defined(WIN16)
245 fprintf(stderr,"Division would overflow (%d)\n",i);
246#endif
247 abort();
248 }
249 i=BN_BITS2-i;
250 if (h >= d) h-=d;
251
252 if (i)
253 {
254 d<<=i;
255 h=(h<<i)|(l>>(BN_BITS2-i));
256 l<<=i;
257 }
258 dh=(d&BN_MASK2h)>>BN_BITS4;
259 dl=(d&BN_MASK2l);
260 for (;;)
261 {
262 if ((h>>BN_BITS4) == dh)
263 q=BN_MASK2l;
264 else
265 q=h/dh;
266
267 th=q*dh;
268 tl=dl*q;
269 for (;;)
270 {
271 t=h-th;
272 if ((t&BN_MASK2h) ||
273 ((tl) <= (
274 (t<<BN_BITS4)|
275 ((l&BN_MASK2h)>>BN_BITS4))))
276 break;
277 q--;
278 th-=dh;
279 tl-=dl;
280 }
281 t=(tl>>BN_BITS4);
282 tl=(tl<<BN_BITS4)&BN_MASK2h;
283 th+=t;
284
285 if (l < tl) th++;
286 l-=tl;
287 if (h < th)
288 {
289 h+=d;
290 q--;
291 }
292 h-=th;
293
294 if (--count == 0) break;
295
296 ret=q<<BN_BITS4;
297 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
298 l=(l&BN_MASK2l)<<BN_BITS4;
299 }
300 ret|=q;
301 return(ret);
302 }
303#endif
304
305#ifdef BN_LLONG
306BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
307 {
308 BN_ULLONG ll=0;
309
310 bn_check_num(n);
311 if (n <= 0) return((BN_ULONG)0);
312
313 for (;;)
314 {
315 ll+=(BN_ULLONG)a[0]+b[0];
316 r[0]=(BN_ULONG)ll&BN_MASK2;
317 ll>>=BN_BITS2;
318 if (--n <= 0) break;
319
320 ll+=(BN_ULLONG)a[1]+b[1];
321 r[1]=(BN_ULONG)ll&BN_MASK2;
322 ll>>=BN_BITS2;
323 if (--n <= 0) break;
324
325 ll+=(BN_ULLONG)a[2]+b[2];
326 r[2]=(BN_ULONG)ll&BN_MASK2;
327 ll>>=BN_BITS2;
328 if (--n <= 0) break;
329
330 ll+=(BN_ULLONG)a[3]+b[3];
331 r[3]=(BN_ULONG)ll&BN_MASK2;
332 ll>>=BN_BITS2;
333 if (--n <= 0) break;
334
335 a+=4;
336 b+=4;
337 r+=4;
338 }
339 return((BN_ULONG)ll);
340 }
341#else
342BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
343 {
344 BN_ULONG c,l,t;
345
346 bn_check_num(n);
347 if (n <= 0) return((BN_ULONG)0);
348
349 c=0;
350 for (;;)
351 {
352 t=a[0];
353 t=(t+c)&BN_MASK2;
354 c=(t < c);
355 l=(t+b[0])&BN_MASK2;
356 c+=(l < t);
357 r[0]=l;
358 if (--n <= 0) break;
359
360 t=a[1];
361 t=(t+c)&BN_MASK2;
362 c=(t < c);
363 l=(t+b[1])&BN_MASK2;
364 c+=(l < t);
365 r[1]=l;
366 if (--n <= 0) break;
367
368 t=a[2];
369 t=(t+c)&BN_MASK2;
370 c=(t < c);
371 l=(t+b[2])&BN_MASK2;
372 c+=(l < t);
373 r[2]=l;
374 if (--n <= 0) break;
375
376 t=a[3];
377 t=(t+c)&BN_MASK2;
378 c=(t < c);
379 l=(t+b[3])&BN_MASK2;
380 c+=(l < t);
381 r[3]=l;
382 if (--n <= 0) break;
383
384 a+=4;
385 b+=4;
386 r+=4;
387 }
388 return((BN_ULONG)c);
389 }
390#endif
391
392BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
393 {
394 BN_ULONG t1,t2;
395 int c=0;
396
397 bn_check_num(n);
398 if (n <= 0) return((BN_ULONG)0);
399
400 for (;;)
401 {
402 t1=a[0]; t2=b[0];
403 r[0]=(t1-t2-c)&BN_MASK2;
404 if (t1 != t2) c=(t1 < t2);
405 if (--n <= 0) break;
406
407 t1=a[1]; t2=b[1];
408 r[1]=(t1-t2-c)&BN_MASK2;
409 if (t1 != t2) c=(t1 < t2);
410 if (--n <= 0) break;
411
412 t1=a[2]; t2=b[2];
413 r[2]=(t1-t2-c)&BN_MASK2;
414 if (t1 != t2) c=(t1 < t2);
415 if (--n <= 0) break;
416
417 t1=a[3]; t2=b[3];
418 r[3]=(t1-t2-c)&BN_MASK2;
419 if (t1 != t2) c=(t1 < t2);
420 if (--n <= 0) break;
421
422 a+=4;
423 b+=4;
424 r+=4;
425 }
426 return(c);
427 }
428
429#ifdef BN_MUL_COMBA
430
431#undef bn_mul_comba8
432#undef bn_mul_comba4
433#undef bn_sqr_comba8
434#undef bn_sqr_comba4
435
436#ifdef BN_LLONG
437#define mul_add_c(a,b,c0,c1,c2) \
438 t=(BN_ULLONG)a*b; \
439 t1=(BN_ULONG)Lw(t); \
440 t2=(BN_ULONG)Hw(t); \
441 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
442 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
443
444#define mul_add_c2(a,b,c0,c1,c2) \
445 t=(BN_ULLONG)a*b; \
446 tt=(t+t)&BN_MASK; \
447 if (tt < t) c2++; \
448 t1=(BN_ULONG)Lw(tt); \
449 t2=(BN_ULONG)Hw(tt); \
450 c0=(c0+t1)&BN_MASK2; \
451 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
452 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
453
454#define sqr_add_c(a,i,c0,c1,c2) \
455 t=(BN_ULLONG)a[i]*a[i]; \
456 t1=(BN_ULONG)Lw(t); \
457 t2=(BN_ULONG)Hw(t); \
458 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
459 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
460
461#define sqr_add_c2(a,i,j,c0,c1,c2) \
462 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
463#else
464#define mul_add_c(a,b,c0,c1,c2) \
465 t1=LBITS(a); t2=HBITS(a); \
466 bl=LBITS(b); bh=HBITS(b); \
467 mul64(t1,t2,bl,bh); \
468 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
469 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
470
471#define mul_add_c2(a,b,c0,c1,c2) \
472 t1=LBITS(a); t2=HBITS(a); \
473 bl=LBITS(b); bh=HBITS(b); \
474 mul64(t1,t2,bl,bh); \
475 if (t2 & BN_TBIT) c2++; \
476 t2=(t2+t2)&BN_MASK2; \
477 if (t1 & BN_TBIT) t2++; \
478 t1=(t1+t1)&BN_MASK2; \
479 c0=(c0+t1)&BN_MASK2; \
480 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
481 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
482
483#define sqr_add_c(a,i,c0,c1,c2) \
484 sqr64(t1,t2,(a)[i]); \
485 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
486 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
487
488#define sqr_add_c2(a,i,j,c0,c1,c2) \
489 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
490#endif
491
492void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
493 {
494#ifdef BN_LLONG
495 BN_ULLONG t;
496#else
497 BN_ULONG bl,bh;
498#endif
499 BN_ULONG t1,t2;
500 BN_ULONG c1,c2,c3;
501
502 c1=0;
503 c2=0;
504 c3=0;
505 mul_add_c(a[0],b[0],c1,c2,c3);
506 r[0]=c1;
507 c1=0;
508 mul_add_c(a[0],b[1],c2,c3,c1);
509 mul_add_c(a[1],b[0],c2,c3,c1);
510 r[1]=c2;
511 c2=0;
512 mul_add_c(a[2],b[0],c3,c1,c2);
513 mul_add_c(a[1],b[1],c3,c1,c2);
514 mul_add_c(a[0],b[2],c3,c1,c2);
515 r[2]=c3;
516 c3=0;
517 mul_add_c(a[0],b[3],c1,c2,c3);
518 mul_add_c(a[1],b[2],c1,c2,c3);
519 mul_add_c(a[2],b[1],c1,c2,c3);
520 mul_add_c(a[3],b[0],c1,c2,c3);
521 r[3]=c1;
522 c1=0;
523 mul_add_c(a[4],b[0],c2,c3,c1);
524 mul_add_c(a[3],b[1],c2,c3,c1);
525 mul_add_c(a[2],b[2],c2,c3,c1);
526 mul_add_c(a[1],b[3],c2,c3,c1);
527 mul_add_c(a[0],b[4],c2,c3,c1);
528 r[4]=c2;
529 c2=0;
530 mul_add_c(a[0],b[5],c3,c1,c2);
531 mul_add_c(a[1],b[4],c3,c1,c2);
532 mul_add_c(a[2],b[3],c3,c1,c2);
533 mul_add_c(a[3],b[2],c3,c1,c2);
534 mul_add_c(a[4],b[1],c3,c1,c2);
535 mul_add_c(a[5],b[0],c3,c1,c2);
536 r[5]=c3;
537 c3=0;
538 mul_add_c(a[6],b[0],c1,c2,c3);
539 mul_add_c(a[5],b[1],c1,c2,c3);
540 mul_add_c(a[4],b[2],c1,c2,c3);
541 mul_add_c(a[3],b[3],c1,c2,c3);
542 mul_add_c(a[2],b[4],c1,c2,c3);
543 mul_add_c(a[1],b[5],c1,c2,c3);
544 mul_add_c(a[0],b[6],c1,c2,c3);
545 r[6]=c1;
546 c1=0;
547 mul_add_c(a[0],b[7],c2,c3,c1);
548 mul_add_c(a[1],b[6],c2,c3,c1);
549 mul_add_c(a[2],b[5],c2,c3,c1);
550 mul_add_c(a[3],b[4],c2,c3,c1);
551 mul_add_c(a[4],b[3],c2,c3,c1);
552 mul_add_c(a[5],b[2],c2,c3,c1);
553 mul_add_c(a[6],b[1],c2,c3,c1);
554 mul_add_c(a[7],b[0],c2,c3,c1);
555 r[7]=c2;
556 c2=0;
557 mul_add_c(a[7],b[1],c3,c1,c2);
558 mul_add_c(a[6],b[2],c3,c1,c2);
559 mul_add_c(a[5],b[3],c3,c1,c2);
560 mul_add_c(a[4],b[4],c3,c1,c2);
561 mul_add_c(a[3],b[5],c3,c1,c2);
562 mul_add_c(a[2],b[6],c3,c1,c2);
563 mul_add_c(a[1],b[7],c3,c1,c2);
564 r[8]=c3;
565 c3=0;
566 mul_add_c(a[2],b[7],c1,c2,c3);
567 mul_add_c(a[3],b[6],c1,c2,c3);
568 mul_add_c(a[4],b[5],c1,c2,c3);
569 mul_add_c(a[5],b[4],c1,c2,c3);
570 mul_add_c(a[6],b[3],c1,c2,c3);
571 mul_add_c(a[7],b[2],c1,c2,c3);
572 r[9]=c1;
573 c1=0;
574 mul_add_c(a[7],b[3],c2,c3,c1);
575 mul_add_c(a[6],b[4],c2,c3,c1);
576 mul_add_c(a[5],b[5],c2,c3,c1);
577 mul_add_c(a[4],b[6],c2,c3,c1);
578 mul_add_c(a[3],b[7],c2,c3,c1);
579 r[10]=c2;
580 c2=0;
581 mul_add_c(a[4],b[7],c3,c1,c2);
582 mul_add_c(a[5],b[6],c3,c1,c2);
583 mul_add_c(a[6],b[5],c3,c1,c2);
584 mul_add_c(a[7],b[4],c3,c1,c2);
585 r[11]=c3;
586 c3=0;
587 mul_add_c(a[7],b[5],c1,c2,c3);
588 mul_add_c(a[6],b[6],c1,c2,c3);
589 mul_add_c(a[5],b[7],c1,c2,c3);
590 r[12]=c1;
591 c1=0;
592 mul_add_c(a[6],b[7],c2,c3,c1);
593 mul_add_c(a[7],b[6],c2,c3,c1);
594 r[13]=c2;
595 c2=0;
596 mul_add_c(a[7],b[7],c3,c1,c2);
597 r[14]=c3;
598 r[15]=c1;
599 }
600
601void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
602 {
603#ifdef BN_LLONG
604 BN_ULLONG t;
605#else
606 BN_ULONG bl,bh;
607#endif
608 BN_ULONG t1,t2;
609 BN_ULONG c1,c2,c3;
610
611 c1=0;
612 c2=0;
613 c3=0;
614 mul_add_c(a[0],b[0],c1,c2,c3);
615 r[0]=c1;
616 c1=0;
617 mul_add_c(a[0],b[1],c2,c3,c1);
618 mul_add_c(a[1],b[0],c2,c3,c1);
619 r[1]=c2;
620 c2=0;
621 mul_add_c(a[2],b[0],c3,c1,c2);
622 mul_add_c(a[1],b[1],c3,c1,c2);
623 mul_add_c(a[0],b[2],c3,c1,c2);
624 r[2]=c3;
625 c3=0;
626 mul_add_c(a[0],b[3],c1,c2,c3);
627 mul_add_c(a[1],b[2],c1,c2,c3);
628 mul_add_c(a[2],b[1],c1,c2,c3);
629 mul_add_c(a[3],b[0],c1,c2,c3);
630 r[3]=c1;
631 c1=0;
632 mul_add_c(a[3],b[1],c2,c3,c1);
633 mul_add_c(a[2],b[2],c2,c3,c1);
634 mul_add_c(a[1],b[3],c2,c3,c1);
635 r[4]=c2;
636 c2=0;
637 mul_add_c(a[2],b[3],c3,c1,c2);
638 mul_add_c(a[3],b[2],c3,c1,c2);
639 r[5]=c3;
640 c3=0;
641 mul_add_c(a[3],b[3],c1,c2,c3);
642 r[6]=c1;
643 r[7]=c2;
644 }
645
646void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
647 {
648#ifdef BN_LLONG
649 BN_ULLONG t,tt;
650#else
651 BN_ULONG bl,bh;
652#endif
653 BN_ULONG t1,t2;
654 BN_ULONG c1,c2,c3;
655
656 c1=0;
657 c2=0;
658 c3=0;
659 sqr_add_c(a,0,c1,c2,c3);
660 r[0]=c1;
661 c1=0;
662 sqr_add_c2(a,1,0,c2,c3,c1);
663 r[1]=c2;
664 c2=0;
665 sqr_add_c(a,1,c3,c1,c2);
666 sqr_add_c2(a,2,0,c3,c1,c2);
667 r[2]=c3;
668 c3=0;
669 sqr_add_c2(a,3,0,c1,c2,c3);
670 sqr_add_c2(a,2,1,c1,c2,c3);
671 r[3]=c1;
672 c1=0;
673 sqr_add_c(a,2,c2,c3,c1);
674 sqr_add_c2(a,3,1,c2,c3,c1);
675 sqr_add_c2(a,4,0,c2,c3,c1);
676 r[4]=c2;
677 c2=0;
678 sqr_add_c2(a,5,0,c3,c1,c2);
679 sqr_add_c2(a,4,1,c3,c1,c2);
680 sqr_add_c2(a,3,2,c3,c1,c2);
681 r[5]=c3;
682 c3=0;
683 sqr_add_c(a,3,c1,c2,c3);
684 sqr_add_c2(a,4,2,c1,c2,c3);
685 sqr_add_c2(a,5,1,c1,c2,c3);
686 sqr_add_c2(a,6,0,c1,c2,c3);
687 r[6]=c1;
688 c1=0;
689 sqr_add_c2(a,7,0,c2,c3,c1);
690 sqr_add_c2(a,6,1,c2,c3,c1);
691 sqr_add_c2(a,5,2,c2,c3,c1);
692 sqr_add_c2(a,4,3,c2,c3,c1);
693 r[7]=c2;
694 c2=0;
695 sqr_add_c(a,4,c3,c1,c2);
696 sqr_add_c2(a,5,3,c3,c1,c2);
697 sqr_add_c2(a,6,2,c3,c1,c2);
698 sqr_add_c2(a,7,1,c3,c1,c2);
699 r[8]=c3;
700 c3=0;
701 sqr_add_c2(a,7,2,c1,c2,c3);
702 sqr_add_c2(a,6,3,c1,c2,c3);
703 sqr_add_c2(a,5,4,c1,c2,c3);
704 r[9]=c1;
705 c1=0;
706 sqr_add_c(a,5,c2,c3,c1);
707 sqr_add_c2(a,6,4,c2,c3,c1);
708 sqr_add_c2(a,7,3,c2,c3,c1);
709 r[10]=c2;
710 c2=0;
711 sqr_add_c2(a,7,4,c3,c1,c2);
712 sqr_add_c2(a,6,5,c3,c1,c2);
713 r[11]=c3;
714 c3=0;
715 sqr_add_c(a,6,c1,c2,c3);
716 sqr_add_c2(a,7,5,c1,c2,c3);
717 r[12]=c1;
718 c1=0;
719 sqr_add_c2(a,7,6,c2,c3,c1);
720 r[13]=c2;
721 c2=0;
722 sqr_add_c(a,7,c3,c1,c2);
723 r[14]=c3;
724 r[15]=c1;
725 }
726
727void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
728 {
729#ifdef BN_LLONG
730 BN_ULLONG t,tt;
731#else
732 BN_ULONG bl,bh;
733#endif
734 BN_ULONG t1,t2;
735 BN_ULONG c1,c2,c3;
736
737 c1=0;
738 c2=0;
739 c3=0;
740 sqr_add_c(a,0,c1,c2,c3);
741 r[0]=c1;
742 c1=0;
743 sqr_add_c2(a,1,0,c2,c3,c1);
744 r[1]=c2;
745 c2=0;
746 sqr_add_c(a,1,c3,c1,c2);
747 sqr_add_c2(a,2,0,c3,c1,c2);
748 r[2]=c3;
749 c3=0;
750 sqr_add_c2(a,3,0,c1,c2,c3);
751 sqr_add_c2(a,2,1,c1,c2,c3);
752 r[3]=c1;
753 c1=0;
754 sqr_add_c(a,2,c2,c3,c1);
755 sqr_add_c2(a,3,1,c2,c3,c1);
756 r[4]=c2;
757 c2=0;
758 sqr_add_c2(a,3,2,c3,c1,c2);
759 r[5]=c3;
760 c3=0;
761 sqr_add_c(a,3,c1,c2,c3);
762 r[6]=c1;
763 r[7]=c2;
764 }
765#else
766
767/* hmm... is it faster just to do a multiply? */
768#undef bn_sqr_comba4
769void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
770 {
771 BN_ULONG t[8];
772 bn_sqr_normal(r,a,4,t);
773 }
774
775#undef bn_sqr_comba8
776void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
777 {
778 BN_ULONG t[16];
779 bn_sqr_normal(r,a,8,t);
780 }
781
782void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
783 {
784 r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
785 r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
786 r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
787 r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
788 }
789
790void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
791 {
792 r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
793 r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
794 r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
795 r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
796 r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
797 r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
798 r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
799 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
800 }
801
802#endif /* BN_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
index a7b34f0bf0..1b1bb06046 100644
--- a/src/lib/libcrypto/bn/bn_blind.c
+++ b/src/lib/libcrypto/bn/bn_blind.c
@@ -60,15 +60,18 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63BN_BLINDING *BN_BLINDING_new(A,Ai,mod) 63BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod)
64BIGNUM *A;
65BIGNUM *Ai;
66BIGNUM *mod;
67 { 64 {
68 BN_BLINDING *ret=NULL; 65 BN_BLINDING *ret=NULL;
69 66
67 bn_check_top(Ai);
68 bn_check_top(mod);
69
70 if ((ret=(BN_BLINDING *)Malloc(sizeof(BN_BLINDING))) == NULL) 70 if ((ret=(BN_BLINDING *)Malloc(sizeof(BN_BLINDING))) == NULL)
71 {
71 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE); 72 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
73 return(NULL);
74 }
72 memset(ret,0,sizeof(BN_BLINDING)); 75 memset(ret,0,sizeof(BN_BLINDING));
73 if ((ret->A=BN_new()) == NULL) goto err; 76 if ((ret->A=BN_new()) == NULL) goto err;
74 if ((ret->Ai=BN_new()) == NULL) goto err; 77 if ((ret->Ai=BN_new()) == NULL) goto err;
@@ -78,26 +81,26 @@ BIGNUM *mod;
78 return(ret); 81 return(ret);
79err: 82err:
80 if (ret != NULL) BN_BLINDING_free(ret); 83 if (ret != NULL) BN_BLINDING_free(ret);
81 return(ret); 84 return(NULL);
82 } 85 }
83 86
84void BN_BLINDING_free(r) 87void BN_BLINDING_free(BN_BLINDING *r)
85BN_BLINDING *r;
86 { 88 {
89 if(r == NULL)
90 return;
91
87 if (r->A != NULL) BN_free(r->A ); 92 if (r->A != NULL) BN_free(r->A );
88 if (r->Ai != NULL) BN_free(r->Ai); 93 if (r->Ai != NULL) BN_free(r->Ai);
89 Free(r); 94 Free(r);
90 } 95 }
91 96
92int BN_BLINDING_update(b,ctx) 97int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
93BN_BLINDING *b;
94BN_CTX *ctx;
95 { 98 {
96 int ret=0; 99 int ret=0;
97 100
98 if ((b->A == NULL) || (b->Ai == NULL)) 101 if ((b->A == NULL) || (b->Ai == NULL))
99 { 102 {
100 BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITALISED); 103 BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED);
101 goto err; 104 goto err;
102 } 105 }
103 106
@@ -109,28 +112,26 @@ err:
109 return(ret); 112 return(ret);
110 } 113 }
111 114
112int BN_BLINDING_convert(n,b,ctx) 115int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
113BIGNUM *n;
114BN_BLINDING *b;
115BN_CTX *ctx;
116 { 116 {
117 bn_check_top(n);
118
117 if ((b->A == NULL) || (b->Ai == NULL)) 119 if ((b->A == NULL) || (b->Ai == NULL))
118 { 120 {
119 BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITALISED); 121 BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITIALIZED);
120 return(0); 122 return(0);
121 } 123 }
122 return(BN_mod_mul(n,n,b->A,b->mod,ctx)); 124 return(BN_mod_mul(n,n,b->A,b->mod,ctx));
123 } 125 }
124 126
125int BN_BLINDING_invert(n,b,ctx) 127int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
126BIGNUM *n;
127BN_BLINDING *b;
128BN_CTX *ctx;
129 { 128 {
130 int ret; 129 int ret;
130
131 bn_check_top(n);
131 if ((b->A == NULL) || (b->Ai == NULL)) 132 if ((b->A == NULL) || (b->Ai == NULL))
132 { 133 {
133 BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITALISED); 134 BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITIALIZED);
134 return(0); 135 return(0);
135 } 136 }
136 if ((ret=BN_mod_mul(n,n,b->Ai,b->mod,ctx)) >= 0) 137 if ((ret=BN_mod_mul(n,n,b->Ai,b->mod,ctx)) >= 0)
diff --git a/src/lib/libcrypto/bn/bn_comba.c b/src/lib/libcrypto/bn/bn_comba.c
new file mode 100644
index 0000000000..7ad09b4a6d
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_comba.c
@@ -0,0 +1,345 @@
1/* crypto/bn/bn_comba.c */
2#include <stdio.h>
3#include "bn_lcl.h"
4/* Auto generated from crypto/bn/comba.pl
5 */
6
7#undef bn_mul_comba8
8#undef bn_mul_comba4
9#undef bn_sqr_comba8
10#undef bn_sqr_comba4
11
12#ifdef BN_LLONG
13#define mul_add_c(a,b,c0,c1,c2) \
14 t=(BN_ULLONG)a*b; \
15 t1=(BN_ULONG)Lw(t); \
16 t2=(BN_ULONG)Hw(t); \
17 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
18 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
19
20#define mul_add_c2(a,b,c0,c1,c2) \
21 t=(BN_ULLONG)a*b; \
22 tt=(t+t)&BN_MASK; \
23 if (tt < t) c2++; \
24 t1=(BN_ULONG)Lw(tt); \
25 t2=(BN_ULONG)Hw(tt); \
26 c0=(c0+t1)&BN_MASK2; \
27 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
28 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
29
30#define sqr_add_c(a,i,c0,c1,c2) \
31 t=(BN_ULLONG)a[i]*a[i]; \
32 t1=(BN_ULONG)Lw(t); \
33 t2=(BN_ULONG)Hw(t); \
34 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
35 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
36
37#define sqr_add_c2(a,i,j,c0,c1,c2) \
38 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
39#else
40#define mul_add_c(a,b,c0,c1,c2) \
41 t1=LBITS(a); t2=HBITS(a); \
42 bl=LBITS(b); bh=HBITS(b); \
43 mul64(t1,t2,bl,bh); \
44 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
45 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
46
47#define mul_add_c2(a,b,c0,c1,c2) \
48 t1=LBITS(a); t2=HBITS(a); \
49 bl=LBITS(b); bh=HBITS(b); \
50 mul64(t1,t2,bl,bh); \
51 if (t2 & BN_TBIT) c2++; \
52 t2=(t2+t2)&BN_MASK2; \
53 if (t1 & BN_TBIT) t2++; \
54 t1=(t1+t1)&BN_MASK2; \
55 c0=(c0+t1)&BN_MASK2; \
56 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
57 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
58
59#define sqr_add_c(a,i,c0,c1,c2) \
60 sqr64(t1,t2,(a)[i]); \
61 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
62 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
63
64#define sqr_add_c2(a,i,j,c0,c1,c2) \
65 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
66#endif
67
68void bn_mul_comba88(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
69void bn_mul_comba44(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
70void bn_sqr_comba88(BN_ULONG *r,BN_ULONG *a);
71void bn_sqr_comba44(BN_ULONG *r,BN_ULONG *a);
72
73void bn_mul_comba88(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
74 {
75#ifdef BN_LLONG
76 BN_ULLONG t;
77#else
78 BN_ULONG bl,bh;
79#endif
80 BN_ULONG t1,t2;
81 BN_ULONG c1,c2,c3;
82
83 c1=0;
84 c2=0;
85 c3=0;
86 mul_add_c(a[0],b[0],c1,c2,c3);
87 r[0]=c1;
88 c1=0;
89 mul_add_c(a[0],b[1],c2,c3,c1);
90 mul_add_c(a[1],b[0],c2,c3,c1);
91 r[1]=c2;
92 c2=0;
93 mul_add_c(a[2],b[0],c3,c1,c2);
94 mul_add_c(a[1],b[1],c3,c1,c2);
95 mul_add_c(a[0],b[2],c3,c1,c2);
96 r[2]=c3;
97 c3=0;
98 mul_add_c(a[0],b[3],c1,c2,c3);
99 mul_add_c(a[1],b[2],c1,c2,c3);
100 mul_add_c(a[2],b[1],c1,c2,c3);
101 mul_add_c(a[3],b[0],c1,c2,c3);
102 r[3]=c1;
103 c1=0;
104 mul_add_c(a[4],b[0],c2,c3,c1);
105 mul_add_c(a[3],b[1],c2,c3,c1);
106 mul_add_c(a[2],b[2],c2,c3,c1);
107 mul_add_c(a[1],b[3],c2,c3,c1);
108 mul_add_c(a[0],b[4],c2,c3,c1);
109 r[4]=c2;
110 c2=0;
111 mul_add_c(a[0],b[5],c3,c1,c2);
112 mul_add_c(a[1],b[4],c3,c1,c2);
113 mul_add_c(a[2],b[3],c3,c1,c2);
114 mul_add_c(a[3],b[2],c3,c1,c2);
115 mul_add_c(a[4],b[1],c3,c1,c2);
116 mul_add_c(a[5],b[0],c3,c1,c2);
117 r[5]=c3;
118 c3=0;
119 mul_add_c(a[6],b[0],c1,c2,c3);
120 mul_add_c(a[5],b[1],c1,c2,c3);
121 mul_add_c(a[4],b[2],c1,c2,c3);
122 mul_add_c(a[3],b[3],c1,c2,c3);
123 mul_add_c(a[2],b[4],c1,c2,c3);
124 mul_add_c(a[1],b[5],c1,c2,c3);
125 mul_add_c(a[0],b[6],c1,c2,c3);
126 r[6]=c1;
127 c1=0;
128 mul_add_c(a[0],b[7],c2,c3,c1);
129 mul_add_c(a[1],b[6],c2,c3,c1);
130 mul_add_c(a[2],b[5],c2,c3,c1);
131 mul_add_c(a[3],b[4],c2,c3,c1);
132 mul_add_c(a[4],b[3],c2,c3,c1);
133 mul_add_c(a[5],b[2],c2,c3,c1);
134 mul_add_c(a[6],b[1],c2,c3,c1);
135 mul_add_c(a[7],b[0],c2,c3,c1);
136 r[7]=c2;
137 c2=0;
138 mul_add_c(a[7],b[1],c3,c1,c2);
139 mul_add_c(a[6],b[2],c3,c1,c2);
140 mul_add_c(a[5],b[3],c3,c1,c2);
141 mul_add_c(a[4],b[4],c3,c1,c2);
142 mul_add_c(a[3],b[5],c3,c1,c2);
143 mul_add_c(a[2],b[6],c3,c1,c2);
144 mul_add_c(a[1],b[7],c3,c1,c2);
145 r[8]=c3;
146 c3=0;
147 mul_add_c(a[2],b[7],c1,c2,c3);
148 mul_add_c(a[3],b[6],c1,c2,c3);
149 mul_add_c(a[4],b[5],c1,c2,c3);
150 mul_add_c(a[5],b[4],c1,c2,c3);
151 mul_add_c(a[6],b[3],c1,c2,c3);
152 mul_add_c(a[7],b[2],c1,c2,c3);
153 r[9]=c1;
154 c1=0;
155 mul_add_c(a[7],b[3],c2,c3,c1);
156 mul_add_c(a[6],b[4],c2,c3,c1);
157 mul_add_c(a[5],b[5],c2,c3,c1);
158 mul_add_c(a[4],b[6],c2,c3,c1);
159 mul_add_c(a[3],b[7],c2,c3,c1);
160 r[10]=c2;
161 c2=0;
162 mul_add_c(a[4],b[7],c3,c1,c2);
163 mul_add_c(a[5],b[6],c3,c1,c2);
164 mul_add_c(a[6],b[5],c3,c1,c2);
165 mul_add_c(a[7],b[4],c3,c1,c2);
166 r[11]=c3;
167 c3=0;
168 mul_add_c(a[7],b[5],c1,c2,c3);
169 mul_add_c(a[6],b[6],c1,c2,c3);
170 mul_add_c(a[5],b[7],c1,c2,c3);
171 r[12]=c1;
172 c1=0;
173 mul_add_c(a[6],b[7],c2,c3,c1);
174 mul_add_c(a[7],b[6],c2,c3,c1);
175 r[13]=c2;
176 c2=0;
177 mul_add_c(a[7],b[7],c3,c1,c2);
178 r[14]=c3;
179 r[15]=c1;
180 }
181
182void bn_mul_comba44(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
183 {
184#ifdef BN_LLONG
185 BN_ULLONG t;
186#else
187 BN_ULONG bl,bh;
188#endif
189 BN_ULONG t1,t2;
190 BN_ULONG c1,c2,c3;
191
192 c1=0;
193 c2=0;
194 c3=0;
195 mul_add_c(a[0],b[0],c1,c2,c3);
196 r[0]=c1;
197 c1=0;
198 mul_add_c(a[0],b[1],c2,c3,c1);
199 mul_add_c(a[1],b[0],c2,c3,c1);
200 r[1]=c2;
201 c2=0;
202 mul_add_c(a[2],b[0],c3,c1,c2);
203 mul_add_c(a[1],b[1],c3,c1,c2);
204 mul_add_c(a[0],b[2],c3,c1,c2);
205 r[2]=c3;
206 c3=0;
207 mul_add_c(a[0],b[3],c1,c2,c3);
208 mul_add_c(a[1],b[2],c1,c2,c3);
209 mul_add_c(a[2],b[1],c1,c2,c3);
210 mul_add_c(a[3],b[0],c1,c2,c3);
211 r[3]=c1;
212 c1=0;
213 mul_add_c(a[3],b[1],c2,c3,c1);
214 mul_add_c(a[2],b[2],c2,c3,c1);
215 mul_add_c(a[1],b[3],c2,c3,c1);
216 r[4]=c2;
217 c2=0;
218 mul_add_c(a[2],b[3],c3,c1,c2);
219 mul_add_c(a[3],b[2],c3,c1,c2);
220 r[5]=c3;
221 c3=0;
222 mul_add_c(a[3],b[3],c1,c2,c3);
223 r[6]=c1;
224 r[7]=c2;
225 }
226
227void bn_sqr_comba88(BN_ULONG *r, BN_ULONG *a)
228 {
229#ifdef BN_LLONG
230 BN_ULLONG t,tt;
231#else
232 BN_ULONG bl,bh;
233#endif
234 BN_ULONG t1,t2;
235 BN_ULONG c1,c2,c3;
236
237 c1=0;
238 c2=0;
239 c3=0;
240 sqr_add_c(a,0,c1,c2,c3);
241 r[0]=c1;
242 c1=0;
243 sqr_add_c2(a,1,0,c2,c3,c1);
244 r[1]=c2;
245 c2=0;
246 sqr_add_c(a,1,c3,c1,c2);
247 sqr_add_c2(a,2,0,c3,c1,c2);
248 r[2]=c3;
249 c3=0;
250 sqr_add_c2(a,3,0,c1,c2,c3);
251 sqr_add_c2(a,2,1,c1,c2,c3);
252 r[3]=c1;
253 c1=0;
254 sqr_add_c(a,2,c2,c3,c1);
255 sqr_add_c2(a,3,1,c2,c3,c1);
256 sqr_add_c2(a,4,0,c2,c3,c1);
257 r[4]=c2;
258 c2=0;
259 sqr_add_c2(a,5,0,c3,c1,c2);
260 sqr_add_c2(a,4,1,c3,c1,c2);
261 sqr_add_c2(a,3,2,c3,c1,c2);
262 r[5]=c3;
263 c3=0;
264 sqr_add_c(a,3,c1,c2,c3);
265 sqr_add_c2(a,4,2,c1,c2,c3);
266 sqr_add_c2(a,5,1,c1,c2,c3);
267 sqr_add_c2(a,6,0,c1,c2,c3);
268 r[6]=c1;
269 c1=0;
270 sqr_add_c2(a,7,0,c2,c3,c1);
271 sqr_add_c2(a,6,1,c2,c3,c1);
272 sqr_add_c2(a,5,2,c2,c3,c1);
273 sqr_add_c2(a,4,3,c2,c3,c1);
274 r[7]=c2;
275 c2=0;
276 sqr_add_c(a,4,c3,c1,c2);
277 sqr_add_c2(a,5,3,c3,c1,c2);
278 sqr_add_c2(a,6,2,c3,c1,c2);
279 sqr_add_c2(a,7,1,c3,c1,c2);
280 r[8]=c3;
281 c3=0;
282 sqr_add_c2(a,7,2,c1,c2,c3);
283 sqr_add_c2(a,6,3,c1,c2,c3);
284 sqr_add_c2(a,5,4,c1,c2,c3);
285 r[9]=c1;
286 c1=0;
287 sqr_add_c(a,5,c2,c3,c1);
288 sqr_add_c2(a,6,4,c2,c3,c1);
289 sqr_add_c2(a,7,3,c2,c3,c1);
290 r[10]=c2;
291 c2=0;
292 sqr_add_c2(a,7,4,c3,c1,c2);
293 sqr_add_c2(a,6,5,c3,c1,c2);
294 r[11]=c3;
295 c3=0;
296 sqr_add_c(a,6,c1,c2,c3);
297 sqr_add_c2(a,7,5,c1,c2,c3);
298 r[12]=c1;
299 c1=0;
300 sqr_add_c2(a,7,6,c2,c3,c1);
301 r[13]=c2;
302 c2=0;
303 sqr_add_c(a,7,c3,c1,c2);
304 r[14]=c3;
305 r[15]=c1;
306 }
307
308void bn_sqr_comba44(BN_ULONG *r, BN_ULONG *a)
309 {
310#ifdef BN_LLONG
311 BN_ULLONG t,tt;
312#else
313 BN_ULONG bl,bh;
314#endif
315 BN_ULONG t1,t2;
316 BN_ULONG c1,c2,c3;
317
318 c1=0;
319 c2=0;
320 c3=0;
321 sqr_add_c(a,0,c1,c2,c3);
322 r[0]=c1;
323 c1=0;
324 sqr_add_c2(a,1,0,c2,c3,c1);
325 r[1]=c2;
326 c2=0;
327 sqr_add_c(a,1,c3,c1,c2);
328 sqr_add_c2(a,2,0,c3,c1,c2);
329 r[2]=c3;
330 c3=0;
331 sqr_add_c2(a,3,0,c1,c2,c3);
332 sqr_add_c2(a,2,1,c1,c2,c3);
333 r[3]=c1;
334 c1=0;
335 sqr_add_c(a,2,c2,c3,c1);
336 sqr_add_c2(a,3,1,c2,c3,c1);
337 r[4]=c2;
338 c2=0;
339 sqr_add_c2(a,3,2,c3,c1,c2);
340 r[5]=c3;
341 c3=0;
342 sqr_add_c(a,3,c1,c2,c3);
343 r[6]=c1;
344 r[7]=c2;
345 }
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
index 2263bdc7da..150dd289a5 100644
--- a/src/lib/libcrypto/bn/bn_div.c
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -57,21 +57,19 @@
57 */ 57 */
58 58
59#include <stdio.h> 59#include <stdio.h>
60#include <openssl/bn.h>
60#include "cryptlib.h" 61#include "cryptlib.h"
61#include "bn_lcl.h" 62#include "bn_lcl.h"
62 63
63/* The old slow way */ 64/* The old slow way */
64#if 0 65#if 0
65int BN_div(dv, rem, m, d,ctx) 66int BN_div(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx)
66BIGNUM *dv;
67BIGNUM *rem;
68BIGNUM *m;
69BIGNUM *d;
70BN_CTX *ctx;
71 { 67 {
72 int i,nm,nd; 68 int i,nm,nd;
73 BIGNUM *D; 69 BIGNUM *D;
74 70
71 bn_check_top(m);
72 bn_check_top(d);
75 if (BN_is_zero(d)) 73 if (BN_is_zero(d))
76 { 74 {
77 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); 75 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
@@ -86,9 +84,9 @@ BN_CTX *ctx;
86 return(1); 84 return(1);
87 } 85 }
88 86
89 D=ctx->bn[ctx->tos]; 87 D= &(ctx->bn[ctx->tos]);
90 if (dv == NULL) dv=ctx->bn[ctx->tos+1]; 88 if (dv == NULL) dv= &(ctx->bn[ctx->tos+1]);
91 if (rem == NULL) rem=ctx->bn[ctx->tos+2]; 89 if (rem == NULL) rem= &(ctx->bn[ctx->tos+2]);
92 90
93 nd=BN_num_bits(d); 91 nd=BN_num_bits(d);
94 nm=BN_num_bits(m); 92 nm=BN_num_bits(m);
@@ -98,6 +96,7 @@ BN_CTX *ctx;
98 /* The next 2 are needed so we can do a dv->d[0]|=1 later 96 /* The next 2 are needed so we can do a dv->d[0]|=1 later
99 * since BN_lshift1 will only work once there is a value :-) */ 97 * since BN_lshift1 will only work once there is a value :-) */
100 BN_zero(dv); 98 BN_zero(dv);
99 bn_wexpand(dv,1);
101 dv->top=1; 100 dv->top=1;
102 101
103 if (!BN_lshift(D,D,nm-nd)) return(0); 102 if (!BN_lshift(D,D,nm-nd)) return(0);
@@ -107,7 +106,7 @@ BN_CTX *ctx;
107 if (BN_ucmp(rem,D) >= 0) 106 if (BN_ucmp(rem,D) >= 0)
108 { 107 {
109 dv->d[0]|=1; 108 dv->d[0]|=1;
110 bn_qsub(rem,rem,D); 109 if (!BN_usub(rem,rem,D)) return(0);
111 } 110 }
112/* CAN IMPROVE (and have now :=) */ 111/* CAN IMPROVE (and have now :=) */
113 if (!BN_rshift1(D,D)) return(0); 112 if (!BN_rshift1(D,D)) return(0);
@@ -119,12 +118,8 @@ BN_CTX *ctx;
119 118
120#else 119#else
121 120
122int BN_div(dv, rm, num, divisor,ctx) 121int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
123BIGNUM *dv; 122 BN_CTX *ctx)
124BIGNUM *rm;
125BIGNUM *num;
126BIGNUM *divisor;
127BN_CTX *ctx;
128 { 123 {
129 int norm_shift,i,j,loop; 124 int norm_shift,i,j,loop;
130 BIGNUM *tmp,wnum,*snum,*sdiv,*res; 125 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
@@ -132,6 +127,9 @@ BN_CTX *ctx;
132 BN_ULONG d0,d1; 127 BN_ULONG d0,d1;
133 int num_n,div_n; 128 int num_n,div_n;
134 129
130 bn_check_top(num);
131 bn_check_top(divisor);
132
135 if (BN_is_zero(divisor)) 133 if (BN_is_zero(divisor))
136 { 134 {
137 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); 135 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
@@ -146,12 +144,12 @@ BN_CTX *ctx;
146 return(1); 144 return(1);
147 } 145 }
148 146
149 tmp=ctx->bn[ctx->tos]; 147 tmp= &(ctx->bn[ctx->tos]);
150 tmp->neg=0; 148 tmp->neg=0;
151 snum=ctx->bn[ctx->tos+1]; 149 snum= &(ctx->bn[ctx->tos+1]);
152 sdiv=ctx->bn[ctx->tos+2]; 150 sdiv= &(ctx->bn[ctx->tos+2]);
153 if (dv == NULL) 151 if (dv == NULL)
154 res=ctx->bn[ctx->tos+3]; 152 res= &(ctx->bn[ctx->tos+3]);
155 else res=dv; 153 else res=dv;
156 154
157 /* First we normalise the numbers */ 155 /* First we normalise the numbers */
@@ -168,10 +166,10 @@ BN_CTX *ctx;
168 /* Lets setup a 'window' into snum 166 /* Lets setup a 'window' into snum
169 * This is the part that corresponds to the current 167 * This is the part that corresponds to the current
170 * 'area' being divided */ 168 * 'area' being divided */
169 BN_init(&wnum);
171 wnum.d= &(snum->d[loop]); 170 wnum.d= &(snum->d[loop]);
172 wnum.top= div_n; 171 wnum.top= div_n;
173 wnum.max= snum->max; /* a bit of a lie */ 172 wnum.max= snum->max+1; /* a bit of a lie */
174 wnum.neg= 0;
175 173
176 /* Get the top 2 words of sdiv */ 174 /* Get the top 2 words of sdiv */
177 /* i=sdiv->top; */ 175 /* i=sdiv->top; */
@@ -183,8 +181,8 @@ BN_CTX *ctx;
183 181
184 /* Setup to 'res' */ 182 /* Setup to 'res' */
185 res->neg= (num->neg^divisor->neg); 183 res->neg= (num->neg^divisor->neg);
186 res->top=loop;
187 if (!bn_wexpand(res,(loop+1))) goto err; 184 if (!bn_wexpand(res,(loop+1))) goto err;
185 res->top=loop;
188 resp= &(res->d[loop-1]); 186 resp= &(res->d[loop-1]);
189 187
190 /* space for temp */ 188 /* space for temp */
@@ -192,7 +190,7 @@ BN_CTX *ctx;
192 190
193 if (BN_ucmp(&wnum,sdiv) >= 0) 191 if (BN_ucmp(&wnum,sdiv) >= 0)
194 { 192 {
195 bn_qsub(&wnum,&wnum,sdiv); 193 if (!BN_usub(&wnum,&wnum,sdiv)) goto err;
196 *resp=1; 194 *resp=1;
197 res->d[res->top-1]=1; 195 res->d[res->top-1]=1;
198 } 196 }
@@ -202,56 +200,98 @@ BN_CTX *ctx;
202 200
203 for (i=0; i<loop-1; i++) 201 for (i=0; i<loop-1; i++)
204 { 202 {
205 BN_ULONG q,n0,n1; 203 BN_ULONG q,l0;
206 BN_ULONG l0; 204#ifdef BN_DIV3W
205 q=bn_div_3_words(wnump,d0,d1);
206#else
207
208#if !defined(NO_ASM) && !defined(PEDANTIC)
209# if defined(__GNUC__) && __GNUC__>=2
210# if defined(__i386)
211 /*
212 * There were two reasons for implementing this template:
213 * - GNU C generates a call to a function (__udivdi3 to be exact)
214 * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
215 * understand why...);
216 * - divl doesn't only calculate quotient, but also leaves
217 * remainder in %edx which we can definitely use here:-)
218 *
219 * <appro@fy.chalmers.se>
220 */
221# define bn_div_words(n0,n1,d0) \
222 ({ asm volatile ( \
223 "divl %4" \
224 : "=a"(q), "=d"(rem) \
225 : "a"(n1), "d"(n0), "g"(d0) \
226 : "cc"); \
227 q; \
228 })
229# define REMINDER_IS_ALREADY_CALCULATED
230# endif /* __<cpu> */
231# endif /* __GNUC__ */
232#endif /* NO_ASM */
233 BN_ULONG n0,n1,rem=0;
207 234
208 wnum.d--; wnum.top++;
209 n0=wnump[0]; 235 n0=wnump[0];
210 n1=wnump[-1]; 236 n1=wnump[-1];
211 if (n0 == d0) 237 if (n0 == d0)
212 q=BN_MASK2; 238 q=BN_MASK2;
213 else 239 else
214 q=bn_div64(n0,n1,d0); 240#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
241 q=((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0;
242#else
243 q=bn_div_words(n0,n1,d0);
244#endif
215 { 245 {
216#ifdef BN_LLONG 246#ifdef BN_LLONG
217 BN_ULLONG t1,t2,rem; 247 BN_ULLONG t2;
218 t1=((BN_ULLONG)n0<<BN_BITS2)|n1; 248
249#ifndef REMINDER_IS_ALREADY_CALCULATED
250 /*
251 * rem doesn't have to be BN_ULLONG. The least we
252 * know it's less that d0, isn't it?
253 */
254 rem=(n1-q*d0)&BN_MASK2;
255#endif
256 t2=(BN_ULLONG)d1*q;
257
219 for (;;) 258 for (;;)
220 { 259 {
221 t2=(BN_ULLONG)d1*q; 260 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
222 rem=t1-(BN_ULLONG)q*d0;
223 if ((rem>>BN_BITS2) ||
224 (t2 <= ((BN_ULLONG)(rem<<BN_BITS2)+wnump[-2])))
225 break; 261 break;
226 q--; 262 q--;
263 rem += d0;
264 if (rem < d0) break; /* don't let rem overflow */
265 t2 -= d1;
227 } 266 }
228#else 267#else
229 BN_ULONG t1l,t1h,t2l,t2h,t3l,t3h,ql,qh,t3t; 268 BN_ULONG t2l,t2h,ql,qh;
230 t1h=n0; 269
231 t1l=n1; 270#ifndef REMINDER_IS_ALREADY_CALCULATED
271 /*
272 * It's more than enough with the only multiplication.
273 * See the comment above in BN_LLONG section...
274 */
275 rem=(n1-q*d0)&BN_MASK2;
276#endif
277 t2l=LBITS(d1); t2h=HBITS(d1);
278 ql =LBITS(q); qh =HBITS(q);
279 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
280
232 for (;;) 281 for (;;)
233 { 282 {
234 t2l=LBITS(d1); t2h=HBITS(d1); 283 if ((t2h < rem) ||
235 ql =LBITS(q); qh =HBITS(q); 284 ((t2h == rem) && (t2l <= wnump[-2])))
236 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ 285 break;
237
238 t3t=LBITS(d0); t3h=HBITS(d0);
239 mul64(t3t,t3h,ql,qh); /* t3=t1-(BN_ULLONG)q*d0; */
240 t3l=(t1l-t3t)&BN_MASK2;
241 if (t3l > t1l) t3h++;
242 t3h=(t1h-t3h)&BN_MASK2;
243
244 /*if ((t3>>BN_BITS2) ||
245 (t2 <= ((t3<<BN_BITS2)+wnump[-2])))
246 break; */
247 if (t3h) break;
248 if (t2h < t3l) break;
249 if ((t2h == t3l) && (t2l <= wnump[-2])) break;
250
251 q--; 286 q--;
287 rem += d0;
288 if (rem < d0) break; /* don't let rem overflow */
289 if (t2l < d1) t2h--; t2l -= d1;
252 } 290 }
253#endif 291#endif
254 } 292 }
293#endif /* !BN_DIV3W */
294 wnum.d--; wnum.top++;
255 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q); 295 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
256 tmp->d[div_n]=l0; 296 tmp->d[div_n]=l0;
257 for (j=div_n+1; j>0; j--) 297 for (j=div_n+1; j>0; j--)
@@ -284,3 +324,35 @@ err:
284 } 324 }
285 325
286#endif 326#endif
327
328/* rem != m */
329int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
330 {
331#if 0 /* The old slow way */
332 int i,nm,nd;
333 BIGNUM *dv;
334
335 if (BN_ucmp(m,d) < 0)
336 return((BN_copy(rem,m) == NULL)?0:1);
337
338 dv= &(ctx->bn[ctx->tos]);
339
340 if (!BN_copy(rem,m)) return(0);
341
342 nm=BN_num_bits(rem);
343 nd=BN_num_bits(d);
344 if (!BN_lshift(dv,d,nm-nd)) return(0);
345 for (i=nm-nd; i>=0; i--)
346 {
347 if (BN_cmp(rem,dv) >= 0)
348 {
349 if (!BN_sub(rem,rem,dv)) return(0);
350 }
351 if (!BN_rshift1(dv,dv)) return(0);
352 }
353 return(1);
354#else
355 return(BN_div(NULL,rem,m,d,ctx));
356#endif
357 }
358
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
index 029ae810d5..73e80774e5 100644
--- a/src/lib/libcrypto/bn/bn_err.c
+++ b/src/lib/libcrypto/bn/bn_err.c
@@ -1,63 +1,65 @@
1/* lib/bn/bn_err.c */ 1/* crypto/bn/bn_err.c */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) 2/* ====================================================================
3 * All rights reserved. 3 * Copyright (c) 1999 The OpenSSL Project. All rights reserved.
4 * 4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
25 * are met: 7 * are met:
26 * 1. Redistributions of source code must retain the copyright 8 *
27 * notice, this list of conditions and the following disclaimer. 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
28 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in
30 * documentation and/or other materials provided with the distribution. 14 * the documentation and/or other materials provided with the
31 * 3. All advertising materials mentioning features or use of this software 15 * distribution.
32 * must display the following acknowledgement: 16 *
33 * "This product includes cryptographic software written by 17 * 3. All advertising materials mentioning features or use of this
34 * Eric Young (eay@cryptsoft.com)" 18 * software must display the following acknowledgment:
35 * The word 'cryptographic' can be left out if the rouines from the library 19 * "This product includes software developed by the OpenSSL Project
36 * being used are not cryptographic related :-). 20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
37 * 4. If you include any Windows specific code (or a derivative thereof) from 21 *
38 * the apps directory (application code) you must include an acknowledgement: 22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 23 * endorse or promote products derived from this software without
40 * 24 * prior written permission. For written permission, please contact
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 25 * openssl-core@OpenSSL.org.
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 *
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * 5. Products derived from this software may not be called "OpenSSL"
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * nor may "OpenSSL" appear in their names without prior written
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * permission of the OpenSSL Project.
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 *
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * 6. Redistributions of any form whatsoever must retain the following
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * acknowledgment:
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * "This product includes software developed by the OpenSSL Project
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
51 * SUCH DAMAGE. 35 *
52 * 36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
53 * The licence and distribution terms for any publically available version or 37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * derivative of this code cannot be changed. i.e. this code cannot simply be 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * copied and put under another distribution licence 39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
56 * [including the GNU Public Licence.] 40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
57 */ 54 */
55
56/* NOTE: this file was auto generated by the mkerr.pl script: any changes
57 * made to it will be overwritten when the script next updates this file.
58 */
59
58#include <stdio.h> 60#include <stdio.h>
59#include "err.h" 61#include <openssl/err.h>
60#include "bn.h" 62#include <openssl/bn.h>
61 63
62/* BEGIN ERROR CODES */ 64/* BEGIN ERROR CODES */
63#ifndef NO_ERR 65#ifndef NO_ERR
@@ -78,29 +80,32 @@ static ERR_STRING_DATA BN_str_functs[]=
78{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, 80{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"},
79{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"}, 81{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"},
80{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"}, 82{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"},
81{0,NULL}, 83{ERR_PACK(0,BN_F_BN_USUB,0), "BN_usub"},
84{0,NULL}
82 }; 85 };
83 86
84static ERR_STRING_DATA BN_str_reasons[]= 87static ERR_STRING_DATA BN_str_reasons[]=
85 { 88 {
89{BN_R_ARG2_LT_ARG3 ,"arg2 lt arg3"},
86{BN_R_BAD_RECIPROCAL ,"bad reciprocal"}, 90{BN_R_BAD_RECIPROCAL ,"bad reciprocal"},
87{BN_R_CALLED_WITH_EVEN_MODULUS ,"called with even modulus"}, 91{BN_R_CALLED_WITH_EVEN_MODULUS ,"called with even modulus"},
88{BN_R_DIV_BY_ZERO ,"div by zero"}, 92{BN_R_DIV_BY_ZERO ,"div by zero"},
89{BN_R_ENCODING_ERROR ,"encoding error"}, 93{BN_R_ENCODING_ERROR ,"encoding error"},
94{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"},
90{BN_R_INVALID_LENGTH ,"invalid length"}, 95{BN_R_INVALID_LENGTH ,"invalid length"},
91{BN_R_NOT_INITALISED ,"not initalised"}, 96{BN_R_NOT_INITIALIZED ,"not initialized"},
92{BN_R_NO_INVERSE ,"no inverse"}, 97{BN_R_NO_INVERSE ,"no inverse"},
93{0,NULL}, 98{0,NULL}
94 }; 99 };
95 100
96#endif 101#endif
97 102
98void ERR_load_BN_strings() 103void ERR_load_BN_strings(void)
99 { 104 {
100 static int init=1; 105 static int init=1;
101 106
102 if (init); 107 if (init)
103 {; 108 {
104 init=0; 109 init=0;
105#ifndef NO_ERR 110#ifndef NO_ERR
106 ERR_load_strings(ERR_LIB_BN,BN_str_functs); 111 ERR_load_strings(ERR_LIB_BN,BN_str_functs);
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index c056a5083f..2df1614ada 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -60,22 +60,23 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63#define TABLE_SIZE 16
64
63/* slow but works */ 65/* slow but works */
64int BN_mod_mul(ret, a, b, m, ctx) 66int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
65BIGNUM *ret;
66BIGNUM *a;
67BIGNUM *b;
68BIGNUM *m;
69BN_CTX *ctx;
70 { 67 {
71 BIGNUM *t; 68 BIGNUM *t;
72 int r=0; 69 int r=0;
73 70
74 t=ctx->bn[ctx->tos++]; 71 bn_check_top(a);
72 bn_check_top(b);
73 bn_check_top(m);
74
75 t= &(ctx->bn[ctx->tos++]);
75 if (a == b) 76 if (a == b)
76 { if (!BN_sqr(t,a,ctx)) goto err; } 77 { if (!BN_sqr(t,a,ctx)) goto err; }
77 else 78 else
78 { if (!BN_mul(t,a,b)) goto err; } 79 { if (!BN_mul(t,a,b,ctx)) goto err; }
79 if (!BN_mod(ret,t,m,ctx)) goto err; 80 if (!BN_mod(ret,t,m,ctx)) goto err;
80 r=1; 81 r=1;
81err: 82err:
@@ -85,22 +86,20 @@ err:
85 86
86#if 0 87#if 0
87/* this one works - simple but works */ 88/* this one works - simple but works */
88int BN_mod_exp(r,a,p,m,ctx) 89int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, BN_CTX *ctx)
89BIGNUM *r,*a,*p,*m;
90BN_CTX *ctx;
91 { 90 {
92 int i,bits,ret=0; 91 int i,bits,ret=0;
93 BIGNUM *v,*tmp; 92 BIGNUM *v,*tmp;
94 93
95 v=ctx->bn[ctx->tos++]; 94 v= &(ctx->bn[ctx->tos++]);
96 tmp=ctx->bn[ctx->tos++]; 95 tmp= &(ctx->bn[ctx->tos++]);
97 96
98 if (BN_copy(v,a) == NULL) goto err; 97 if (BN_copy(v,a) == NULL) goto err;
99 bits=BN_num_bits(p); 98 bits=BN_num_bits(p);
100 99
101 if (BN_is_odd(p)) 100 if (BN_is_odd(p))
102 { if (BN_copy(r,a) == NULL) goto err; } 101 { if (BN_copy(r,a) == NULL) goto err; }
103 else { if (BN_one(r)) goto err; } 102 else { if (!BN_one(r)) goto err; }
104 103
105 for (i=1; i<bits; i++) 104 for (i=1; i<bits; i++)
106 { 105 {
@@ -108,7 +107,7 @@ BN_CTX *ctx;
108 if (!BN_mod(v,tmp,m,ctx)) goto err; 107 if (!BN_mod(v,tmp,m,ctx)) goto err;
109 if (BN_is_bit_set(p,i)) 108 if (BN_is_bit_set(p,i))
110 { 109 {
111 if (!BN_mul(tmp,r,v)) goto err; 110 if (!BN_mul(tmp,r,v,ctx)) goto err;
112 if (!BN_mod(r,tmp,m,ctx)) goto err; 111 if (!BN_mod(r,tmp,m,ctx)) goto err;
113 } 112 }
114 } 113 }
@@ -121,46 +120,49 @@ err:
121#endif 120#endif
122 121
123/* this one works - simple but works */ 122/* this one works - simple but works */
124int BN_exp(r,a,p,ctx) 123int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx)
125BIGNUM *r,*a,*p;
126BN_CTX *ctx;
127 { 124 {
128 int i,bits,ret=0; 125 int i,bits,ret=0,tos;
129 BIGNUM *v,*tmp; 126 BIGNUM *v,*rr;
130 127
131 v=ctx->bn[ctx->tos++]; 128 tos=ctx->tos;
132 tmp=ctx->bn[ctx->tos++]; 129 v= &(ctx->bn[ctx->tos++]);
130 if ((r == a) || (r == p))
131 rr= &(ctx->bn[ctx->tos++]);
132 else
133 rr=r;
133 134
134 if (BN_copy(v,a) == NULL) goto err; 135 if (BN_copy(v,a) == NULL) goto err;
135 bits=BN_num_bits(p); 136 bits=BN_num_bits(p);
136 137
137 if (BN_is_odd(p)) 138 if (BN_is_odd(p))
138 { if (BN_copy(r,a) == NULL) goto err; } 139 { if (BN_copy(rr,a) == NULL) goto err; }
139 else { if (BN_one(r)) goto err; } 140 else { if (!BN_one(rr)) goto err; }
140 141
141 for (i=1; i<bits; i++) 142 for (i=1; i<bits; i++)
142 { 143 {
143 if (!BN_sqr(tmp,v,ctx)) goto err; 144 if (!BN_sqr(v,v,ctx)) goto err;
144 if (BN_is_bit_set(p,i)) 145 if (BN_is_bit_set(p,i))
145 { 146 {
146 if (!BN_mul(tmp,r,v)) goto err; 147 if (!BN_mul(rr,rr,v,ctx)) goto err;
147 } 148 }
148 } 149 }
149 ret=1; 150 ret=1;
150err: 151err:
151 ctx->tos-=2; 152 ctx->tos=tos;
153 if (r != rr) BN_copy(r,rr);
152 return(ret); 154 return(ret);
153 } 155 }
154 156
155int BN_mod_exp(r,a,p,m,ctx) 157int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
156BIGNUM *r; 158 BN_CTX *ctx)
157BIGNUM *a;
158BIGNUM *p;
159BIGNUM *m;
160BN_CTX *ctx;
161 { 159 {
162 int ret; 160 int ret;
163 161
162 bn_check_top(a);
163 bn_check_top(p);
164 bn_check_top(m);
165
164#ifdef MONT_MUL_MOD 166#ifdef MONT_MUL_MOD
165 /* I have finally been able to take out this pre-condition of 167 /* I have finally been able to take out this pre-condition of
166 * the top bit being set. It was caused by an error in BN_div 168 * the top bit being set. It was caused by an error in BN_div
@@ -182,20 +184,16 @@ BN_CTX *ctx;
182 } 184 }
183 185
184/* #ifdef RECP_MUL_MOD */ 186/* #ifdef RECP_MUL_MOD */
185int BN_mod_exp_recp(r,a,p,m,ctx) 187int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
186BIGNUM *r; 188 const BIGNUM *m, BN_CTX *ctx)
187BIGNUM *a;
188BIGNUM *p;
189BIGNUM *m;
190BN_CTX *ctx;
191 { 189 {
192 int nb,i,j,bits,ret=0,wstart,wend,window,wvalue; 190 int i,j,bits,ret=0,wstart,wend,window,wvalue;
193 int start=1; 191 int start=1,ts=0;
194 BIGNUM *d,*aa; 192 BIGNUM *aa;
195 BIGNUM *val[16]; 193 BIGNUM val[TABLE_SIZE];
194 BN_RECP_CTX recp;
196 195
197 d=ctx->bn[ctx->tos++]; 196 aa= &(ctx->bn[ctx->tos++]);
198 aa=ctx->bn[ctx->tos++];
199 bits=BN_num_bits(p); 197 bits=BN_num_bits(p);
200 198
201 if (bits == 0) 199 if (bits == 0)
@@ -203,12 +201,14 @@ BN_CTX *ctx;
203 BN_one(r); 201 BN_one(r);
204 return(1); 202 return(1);
205 } 203 }
206 nb=BN_reciprocal(d,m,ctx); 204 BN_RECP_CTX_init(&recp);
207 if (nb == -1) goto err; 205 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
208 206
209 val[0]=BN_new(); 207 BN_init(&(val[0]));
210 if (!BN_mod(val[0],a,m,ctx)) goto err; /* 1 */ 208 ts=1;
211 if (!BN_mod_mul_reciprocal(aa,val[0],val[0],m,d,nb,ctx)) 209
210 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */
211 if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
212 goto err; /* 2 */ 212 goto err; /* 2 */
213 213
214 if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ 214 if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
@@ -223,12 +223,11 @@ BN_CTX *ctx;
223 j=1<<(window-1); 223 j=1<<(window-1);
224 for (i=1; i<j; i++) 224 for (i=1; i<j; i++)
225 { 225 {
226 val[i]=BN_new(); 226 BN_init(&val[i]);
227 if (!BN_mod_mul_reciprocal(val[i],val[i-1],aa,m,d,nb,ctx)) 227 if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
228 goto err; 228 goto err;
229 } 229 }
230 for (; i<16; i++) 230 ts=i;
231 val[i]=NULL;
232 231
233 start=1; /* This is used to avoid multiplication etc 232 start=1; /* This is used to avoid multiplication etc
234 * when there is only the value '1' in the 233 * when there is only the value '1' in the
@@ -244,7 +243,7 @@ BN_CTX *ctx;
244 if (BN_is_bit_set(p,wstart) == 0) 243 if (BN_is_bit_set(p,wstart) == 0)
245 { 244 {
246 if (!start) 245 if (!start)
247 if (!BN_mod_mul_reciprocal(r,r,r,m,d,nb,ctx)) 246 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
248 goto err; 247 goto err;
249 if (wstart == 0) break; 248 if (wstart == 0) break;
250 wstart--; 249 wstart--;
@@ -274,12 +273,12 @@ BN_CTX *ctx;
274 if (!start) 273 if (!start)
275 for (i=0; i<j; i++) 274 for (i=0; i<j; i++)
276 { 275 {
277 if (!BN_mod_mul_reciprocal(r,r,r,m,d,nb,ctx)) 276 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
278 goto err; 277 goto err;
279 } 278 }
280 279
281 /* wvalue will be an odd number < 2^window */ 280 /* wvalue will be an odd number < 2^window */
282 if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],m,d,nb,ctx)) 281 if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
283 goto err; 282 goto err;
284 283
285 /* move the 'window' down further */ 284 /* move the 'window' down further */
@@ -290,35 +289,36 @@ BN_CTX *ctx;
290 } 289 }
291 ret=1; 290 ret=1;
292err: 291err:
293 ctx->tos-=2; 292 ctx->tos--;
294 for (i=0; i<16; i++) 293 for (i=0; i<ts; i++)
295 if (val[i] != NULL) BN_clear_free(val[i]); 294 BN_clear_free(&(val[i]));
295 BN_RECP_CTX_free(&recp);
296 return(ret); 296 return(ret);
297 } 297 }
298/* #endif */ 298/* #endif */
299 299
300/* #ifdef MONT_MUL_MOD */ 300/* #ifdef MONT_MUL_MOD */
301int BN_mod_exp_mont(r,a,p,m,ctx,in_mont) 301int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
302BIGNUM *r; 302 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
303BIGNUM *a;
304BIGNUM *p;
305BIGNUM *m;
306BN_CTX *ctx;
307BN_MONT_CTX *in_mont;
308 { 303 {
309#define TABLE_SIZE 16
310 int i,j,bits,ret=0,wstart,wend,window,wvalue; 304 int i,j,bits,ret=0,wstart,wend,window,wvalue;
311 int start=1; 305 int start=1,ts=0;
312 BIGNUM *d,*aa; 306 BIGNUM *d,*r;
313 BIGNUM *val[TABLE_SIZE]; 307 BIGNUM *aa;
308 BIGNUM val[TABLE_SIZE];
314 BN_MONT_CTX *mont=NULL; 309 BN_MONT_CTX *mont=NULL;
315 310
311 bn_check_top(a);
312 bn_check_top(p);
313 bn_check_top(m);
314
316 if (!(m->d[0] & 1)) 315 if (!(m->d[0] & 1))
317 { 316 {
318 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); 317 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
319 return(0); 318 return(0);
320 } 319 }
321 d=ctx->bn[ctx->tos++]; 320 d= &(ctx->bn[ctx->tos++]);
321 r= &(ctx->bn[ctx->tos++]);
322 bits=BN_num_bits(p); 322 bits=BN_num_bits(p);
323 if (bits == 0) 323 if (bits == 0)
324 { 324 {
@@ -339,22 +339,23 @@ BN_MONT_CTX *in_mont;
339 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 339 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
340 } 340 }
341 341
342 val[0]=BN_new(); 342 BN_init(&val[0]);
343 ts=1;
343 if (BN_ucmp(a,m) >= 0) 344 if (BN_ucmp(a,m) >= 0)
344 { 345 {
345 BN_mod(val[0],a,m,ctx); 346 BN_mod(&(val[0]),a,m,ctx);
346 aa=val[0]; 347 aa= &(val[0]);
347 } 348 }
348 else 349 else
349 aa=a; 350 aa=a;
350 if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */ 351 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
351 if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ 352 if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
352 353
353 if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ 354 if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
354 window=1; 355 window=1;
355 else if (bits > 250) 356 else if (bits >= 256)
356 window=5; /* max size of window */ 357 window=5; /* max size of window */
357 else if (bits >= 120) 358 else if (bits >= 128)
358 window=4; 359 window=4;
359 else 360 else
360 window=3; 361 window=3;
@@ -362,12 +363,11 @@ BN_MONT_CTX *in_mont;
362 j=1<<(window-1); 363 j=1<<(window-1);
363 for (i=1; i<j; i++) 364 for (i=1; i<j; i++)
364 { 365 {
365 val[i]=BN_new(); 366 BN_init(&(val[i]));
366 if (!BN_mod_mul_montgomery(val[i],val[i-1],d,mont,ctx)) 367 if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
367 goto err; 368 goto err;
368 } 369 }
369 for (; i<TABLE_SIZE; i++) 370 ts=i;
370 val[i]=NULL;
371 371
372 start=1; /* This is used to avoid multiplication etc 372 start=1; /* This is used to avoid multiplication etc
373 * when there is only the value '1' in the 373 * when there is only the value '1' in the
@@ -419,7 +419,7 @@ BN_MONT_CTX *in_mont;
419 } 419 }
420 420
421 /* wvalue will be an odd number < 2^window */ 421 /* wvalue will be an odd number < 2^window */
422 if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx)) 422 if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
423 goto err; 423 goto err;
424 424
425 /* move the 'window' down further */ 425 /* move the 'window' down further */
@@ -428,31 +428,27 @@ BN_MONT_CTX *in_mont;
428 start=0; 428 start=0;
429 if (wstart < 0) break; 429 if (wstart < 0) break;
430 } 430 }
431 BN_from_montgomery(r,r,mont,ctx); 431 BN_from_montgomery(rr,r,mont,ctx);
432 ret=1; 432 ret=1;
433err: 433err:
434 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 434 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
435 ctx->tos--; 435 ctx->tos-=2;
436 for (i=0; i<TABLE_SIZE; i++) 436 for (i=0; i<ts; i++)
437 if (val[i] != NULL) BN_clear_free(val[i]); 437 BN_clear_free(&(val[i]));
438 return(ret); 438 return(ret);
439 } 439 }
440/* #endif */ 440/* #endif */
441 441
442/* The old fallback, simple version :-) */ 442/* The old fallback, simple version :-) */
443int BN_mod_exp_simple(r,a,p,m,ctx) 443int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
444BIGNUM *r; 444 BN_CTX *ctx)
445BIGNUM *a;
446BIGNUM *p;
447BIGNUM *m;
448BN_CTX *ctx;
449 { 445 {
450 int i,j,bits,ret=0,wstart,wend,window,wvalue; 446 int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
451 int start=1; 447 int start=1;
452 BIGNUM *d; 448 BIGNUM *d;
453 BIGNUM *val[16]; 449 BIGNUM val[TABLE_SIZE];
454 450
455 d=ctx->bn[ctx->tos++]; 451 d= &(ctx->bn[ctx->tos++]);
456 bits=BN_num_bits(p); 452 bits=BN_num_bits(p);
457 453
458 if (bits == 0) 454 if (bits == 0)
@@ -461,9 +457,10 @@ BN_CTX *ctx;
461 return(1); 457 return(1);
462 } 458 }
463 459
464 val[0]=BN_new(); 460 BN_init(&(val[0]));
465 if (!BN_mod(val[0],a,m,ctx)) goto err; /* 1 */ 461 ts=1;
466 if (!BN_mod_mul(d,val[0],val[0],m,ctx)) 462 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */
463 if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
467 goto err; /* 2 */ 464 goto err; /* 2 */
468 465
469 if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ 466 if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
@@ -478,12 +475,11 @@ BN_CTX *ctx;
478 j=1<<(window-1); 475 j=1<<(window-1);
479 for (i=1; i<j; i++) 476 for (i=1; i<j; i++)
480 { 477 {
481 val[i]=BN_new(); 478 BN_init(&(val[i]));
482 if (!BN_mod_mul(val[i],val[i-1],d,m,ctx)) 479 if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
483 goto err; 480 goto err;
484 } 481 }
485 for (; i<16; i++) 482 ts=i;
486 val[i]=NULL;
487 483
488 start=1; /* This is used to avoid multiplication etc 484 start=1; /* This is used to avoid multiplication etc
489 * when there is only the value '1' in the 485 * when there is only the value '1' in the
@@ -534,7 +530,7 @@ BN_CTX *ctx;
534 } 530 }
535 531
536 /* wvalue will be an odd number < 2^window */ 532 /* wvalue will be an odd number < 2^window */
537 if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx)) 533 if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx))
538 goto err; 534 goto err;
539 535
540 /* move the 'window' down further */ 536 /* move the 'window' down further */
@@ -546,8 +542,8 @@ BN_CTX *ctx;
546 ret=1; 542 ret=1;
547err: 543err:
548 ctx->tos--; 544 ctx->tos--;
549 for (i=0; i<16; i++) 545 for (i=0; i<ts; i++)
550 if (val[i] != NULL) BN_clear_free(val[i]); 546 BN_clear_free(&(val[i]));
551 return(ret); 547 return(ret);
552 } 548 }
553 549
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
new file mode 100644
index 0000000000..1132d53365
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_exp2.c
@@ -0,0 +1,195 @@
1#include <stdio.h>
2#include "cryptlib.h"
3#include "bn_lcl.h"
4
5/* I've done some timing with different table sizes.
6 * The main hassle is that even with bits set at 3, this requires
7 * 63 BIGNUMs to store the pre-calculated values.
8 * 512 1024
9 * bits=1 75.4% 79.4%
10 * bits=2 61.2% 62.4%
11 * bits=3 61.3% 59.3%
12 * The lack of speed improvment is also a function of the pre-calculation
13 * which could be removed.
14 */
15#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */
16#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */
17
18int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
19 BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
20 {
21 int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue;
22 int start=1,ts=0,x,y;
23 BIGNUM *d,*aa1,*aa2,*r;
24 BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE];
25 BN_MONT_CTX *mont=NULL;
26
27 bn_check_top(a1);
28 bn_check_top(p1);
29 bn_check_top(a2);
30 bn_check_top(p2);
31 bn_check_top(m);
32
33 if (!(m->d[0] & 1))
34 {
35 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
36 return(0);
37 }
38 d= &(ctx->bn[ctx->tos++]);
39 r= &(ctx->bn[ctx->tos++]);
40 bits1=BN_num_bits(p1);
41 bits2=BN_num_bits(p2);
42 if ((bits1 == 0) && (bits2 == 0))
43 {
44 BN_one(r);
45 return(1);
46 }
47 bits=(bits1 > bits2)?bits1:bits2;
48
49 /* If this is not done, things will break in the montgomery
50 * part */
51
52 if (in_mont != NULL)
53 mont=in_mont;
54 else
55 {
56 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
57 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
58 }
59
60 BN_init(&(val[0][0]));
61 BN_init(&(val[1][1]));
62 BN_init(&(val[0][1]));
63 BN_init(&(val[1][0]));
64 ts=1;
65 if (BN_ucmp(a1,m) >= 0)
66 {
67 BN_mod(&(val[1][0]),a1,m,ctx);
68 aa1= &(val[1][0]);
69 }
70 else
71 aa1=a1;
72 if (BN_ucmp(a2,m) >= 0)
73 {
74 BN_mod(&(val[0][1]),a2,m,ctx);
75 aa2= &(val[0][1]);
76 }
77 else
78 aa2=a2;
79 if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err;
80 if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err;
81 if (!BN_mod_mul_montgomery(&(val[1][1]),
82 &(val[1][0]),&(val[0][1]),mont,ctx))
83 goto err;
84
85#if 0
86 if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
87 window=1;
88 else if (bits > 250)
89 window=5; /* max size of window */
90 else if (bits >= 120)
91 window=4;
92 else
93 window=3;
94#else
95 window=EXP2_TABLE_BITS;
96#endif
97
98 k=1<<window;
99 for (x=0; x<k; x++)
100 {
101 if (x >= 2)
102 {
103 BN_init(&(val[x][0]));
104 BN_init(&(val[x][1]));
105 if (!BN_mod_mul_montgomery(&(val[x][0]),
106 &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err;
107 if (!BN_mod_mul_montgomery(&(val[x][1]),
108 &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err;
109 }
110 for (y=2; y<k; y++)
111 {
112 BN_init(&(val[x][y]));
113 if (!BN_mod_mul_montgomery(&(val[x][y]),
114 &(val[x][y-1]),&(val[0][1]),mont,ctx))
115 goto err;
116 }
117 }
118 ts=k;
119
120 start=1; /* This is used to avoid multiplication etc
121 * when there is only the value '1' in the
122 * buffer. */
123 xvalue=0; /* The 'x value' of the window */
124 yvalue=0; /* The 'y value' of the window */
125 wstart=bits-1; /* The top bit of the window */
126 wend=0; /* The bottom bit of the window */
127
128 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
129 for (;;)
130 {
131 xvalue=BN_is_bit_set(p1,wstart);
132 yvalue=BN_is_bit_set(p2,wstart);
133 if (!(xvalue || yvalue))
134 {
135 if (!start)
136 {
137 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
138 goto err;
139 }
140 wstart--;
141 if (wstart < 0) break;
142 continue;
143 }
144 /* We now have wstart on a 'set' bit, we now need to work out
145 * how bit a window to do. To do this we need to scan
146 * forward until the last set bit before the end of the
147 * window */
148 j=wstart;
149 /* xvalue=BN_is_bit_set(p1,wstart); already set */
150 /* yvalue=BN_is_bit_set(p1,wstart); already set */
151 wend=0;
152 for (i=1; i<window; i++)
153 {
154 if (wstart-i < 0) break;
155 xvalue+=xvalue;
156 xvalue|=BN_is_bit_set(p1,wstart-i);
157 yvalue+=yvalue;
158 yvalue|=BN_is_bit_set(p2,wstart-i);
159 }
160
161 /* i is the size of the current window */
162 /* add the 'bytes above' */
163 if (!start)
164 for (j=0; j<i; j++)
165 {
166 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
167 goto err;
168 }
169
170 /* wvalue will be an odd number < 2^window */
171 if (xvalue || yvalue)
172 {
173 if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]),
174 mont,ctx)) goto err;
175 }
176
177 /* move the 'window' down further */
178 wstart-=i;
179 start=0;
180 if (wstart < 0) break;
181 }
182 BN_from_montgomery(rr,r,mont,ctx);
183 ret=1;
184err:
185 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
186 ctx->tos-=2;
187 for (i=0; i<ts; i++)
188 {
189 for (j=0; j<ts; j++)
190 {
191 BN_clear_free(&(val[i][j]));
192 }
193 }
194 return(ret);
195 }
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
index 071bba3b4b..64a76f4498 100644
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ b/src/lib/libcrypto/bn/bn_gcd.c
@@ -60,21 +60,17 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63#ifndef NOPROTO
64static BIGNUM *euclid(BIGNUM *a, BIGNUM *b); 63static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
65#else 64int BN_gcd(BIGNUM *r, BIGNUM *in_a, BIGNUM *in_b, BN_CTX *ctx)
66static BIGNUM *euclid();
67#endif
68
69int BN_gcd(r,in_a,in_b,ctx)
70BIGNUM *r,*in_a,*in_b;
71BN_CTX *ctx;
72 { 65 {
73 BIGNUM *a,*b,*t; 66 BIGNUM *a,*b,*t;
74 int ret=0; 67 int ret=0;
75 68
76 a=ctx->bn[ctx->tos]; 69 bn_check_top(in_a);
77 b=ctx->bn[ctx->tos+1]; 70 bn_check_top(in_b);
71
72 a= &(ctx->bn[ctx->tos]);
73 b= &(ctx->bn[ctx->tos+1]);
78 74
79 if (BN_copy(a,in_a) == NULL) goto err; 75 if (BN_copy(a,in_a) == NULL) goto err;
80 if (BN_copy(b,in_b) == NULL) goto err; 76 if (BN_copy(b,in_b) == NULL) goto err;
@@ -89,12 +85,14 @@ err:
89 return(ret); 85 return(ret);
90 } 86 }
91 87
92static BIGNUM *euclid(a,b) 88static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
93BIGNUM *a,*b;
94 { 89 {
95 BIGNUM *t; 90 BIGNUM *t;
96 int shifts=0; 91 int shifts=0;
97 92
93 bn_check_top(a);
94 bn_check_top(b);
95
98 for (;;) 96 for (;;)
99 { 97 {
100 if (BN_is_zero(b)) 98 if (BN_is_zero(b))
@@ -142,23 +140,26 @@ err:
142 } 140 }
143 141
144/* solves ax == 1 (mod n) */ 142/* solves ax == 1 (mod n) */
145BIGNUM *BN_mod_inverse(a, n, ctx) 143BIGNUM *BN_mod_inverse(BIGNUM *in, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
146BIGNUM *a;
147BIGNUM *n;
148BN_CTX *ctx;
149 { 144 {
150 BIGNUM *A,*B,*X,*Y,*M,*D,*R; 145 BIGNUM *A,*B,*X,*Y,*M,*D,*R;
151 BIGNUM *ret=NULL,*T; 146 BIGNUM *T,*ret=NULL;
152 int sign; 147 int sign;
153 148
154 A=ctx->bn[ctx->tos]; 149 bn_check_top(a);
155 B=ctx->bn[ctx->tos+1]; 150 bn_check_top(n);
156 X=ctx->bn[ctx->tos+2]; 151
157 D=ctx->bn[ctx->tos+3]; 152 A= &(ctx->bn[ctx->tos]);
158 M=ctx->bn[ctx->tos+4]; 153 B= &(ctx->bn[ctx->tos+1]);
159 Y=ctx->bn[ctx->tos+5]; 154 X= &(ctx->bn[ctx->tos+2]);
155 D= &(ctx->bn[ctx->tos+3]);
156 M= &(ctx->bn[ctx->tos+4]);
157 Y= &(ctx->bn[ctx->tos+5]);
160 ctx->tos+=6; 158 ctx->tos+=6;
161 R=BN_new(); 159 if (in == NULL)
160 R=BN_new();
161 else
162 R=in;
162 if (R == NULL) goto err; 163 if (R == NULL) goto err;
163 164
164 BN_zero(X); 165 BN_zero(X);
@@ -175,7 +176,7 @@ BN_CTX *ctx;
175 B=M; 176 B=M;
176 /* T has a struct, M does not */ 177 /* T has a struct, M does not */
177 178
178 if (!BN_mul(T,D,X)) goto err; 179 if (!BN_mul(T,D,X,ctx)) goto err;
179 if (!BN_add(T,T,Y)) goto err; 180 if (!BN_add(T,T,Y)) goto err;
180 M=Y; 181 M=Y;
181 Y=X; 182 Y=X;
@@ -196,7 +197,7 @@ BN_CTX *ctx;
196 } 197 }
197 ret=R; 198 ret=R;
198err: 199err:
199 if ((ret == NULL) && (R != NULL)) BN_free(R); 200 if ((ret == NULL) && (in == NULL)) BN_free(R);
200 ctx->tos-=6; 201 ctx->tos-=6;
201 return(ret); 202 return(ret);
202 } 203 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
index edfd788338..85a372695b 100644
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ b/src/lib/libcrypto/bn/bn_lcl.h
@@ -59,23 +59,79 @@
59#ifndef HEADER_BN_LCL_H 59#ifndef HEADER_BN_LCL_H
60#define HEADER_BN_LCL_H 60#define HEADER_BN_LCL_H
61 61
62#include "bn.h" 62#include <openssl/bn.h>
63 63
64#ifdef __cplusplus 64#ifdef __cplusplus
65extern "C" { 65extern "C" {
66#endif 66#endif
67 67
68/* Pentium pro 16,16,16,32,64 */
69/* Alpha 16,16,16,16.64 */
70#define BN_MULL_SIZE_NORMAL (16) /* 32 */
71#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
72#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
73#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
74#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
75
76#if 0
77#ifndef BN_MUL_COMBA
78/* #define bn_mul_comba8(r,a,b) bn_mul_normal(r,a,8,b,8) */
79/* #define bn_mul_comba4(r,a,b) bn_mul_normal(r,a,4,b,4) */
80#endif
81
82#ifndef BN_SQR_COMBA
83/* This is probably faster than using the C code - I need to check */
84#define bn_sqr_comba8(r,a) bn_mul_normal(r,a,8,a,8)
85#define bn_sqr_comba4(r,a) bn_mul_normal(r,a,4,a,4)
86#endif
87#endif
88
68/************************************************************* 89/*************************************************************
69 * Using the long long type 90 * Using the long long type
70 */ 91 */
71#define Lw(t) (((BN_ULONG)(t))&BN_MASK2) 92#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
72#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) 93#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
73 94
74#define bn_fix_top(a) \ 95/* These are used for internal error checking and are not normally used */
75 { \ 96#ifdef BN_DEBUG
76 BN_ULONG *fix_top_l; \ 97#define bn_check_top(a) \
77 for (fix_top_l= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ 98 { if (((a)->top < 0) || ((a)->top > (a)->max)) \
78 if (*(fix_top_l--)) break; \ 99 { char *nullp=NULL; *nullp='z'; } }
100#define bn_check_num(a) if ((a) < 0) { char *nullp=NULL; *nullp='z'; }
101#else
102#define bn_check_top(a)
103#define bn_check_num(a)
104#endif
105
106/* This macro is to add extra stuff for development checking */
107#ifdef BN_DEBUG
108#define bn_set_max(r) ((r)->max=(r)->top,BN_set_flags((r),BN_FLG_STATIC_DATA))
109#else
110#define bn_set_max(r)
111#endif
112
113/* These macros are used to 'take' a section of a bignum for read only use */
114#define bn_set_low(r,a,n) \
115 { \
116 (r)->top=((a)->top > (n))?(n):(a)->top; \
117 (r)->d=(a)->d; \
118 (r)->neg=(a)->neg; \
119 (r)->flags|=BN_FLG_STATIC_DATA; \
120 bn_set_max(r); \
121 }
122
123#define bn_set_high(r,a,n) \
124 { \
125 if ((a)->top > (n)) \
126 { \
127 (r)->top=(a)->top-n; \
128 (r)->d= &((a)->d[n]); \
129 } \
130 else \
131 (r)->top=0; \
132 (r)->neg=(a)->neg; \
133 (r)->flags|=BN_FLG_STATIC_DATA; \
134 bn_set_max(r); \
79 } 135 }
80 136
81/* #define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?(n):bn_expand2((n),(b))) */ 137/* #define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?(n):bn_expand2((n),(b))) */
@@ -175,22 +231,35 @@ extern "C" {
175 231
176#endif 232#endif
177 233
178#ifndef NOPROTO 234OPENSSL_EXTERN int bn_limit_bits;
235OPENSSL_EXTERN int bn_limit_num; /* (1<<bn_limit_bits) */
236/* Recursive 'low' limit */
237OPENSSL_EXTERN int bn_limit_bits_low;
238OPENSSL_EXTERN int bn_limit_num_low; /* (1<<bn_limit_bits_low) */
239/* Do modified 'high' part calculation' */
240OPENSSL_EXTERN int bn_limit_bits_high;
241OPENSSL_EXTERN int bn_limit_num_high; /* (1<<bn_limit_bits_high) */
242OPENSSL_EXTERN int bn_limit_bits_mont;
243OPENSSL_EXTERN int bn_limit_num_mont; /* (1<<bn_limit_bits_mont) */
179 244
180BIGNUM *bn_expand2(BIGNUM *b, int bits); 245BIGNUM *bn_expand2(BIGNUM *b, int bits);
181 246
182#ifdef X86_ASM 247void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
183void bn_add_words(BN_ULONG *r,BN_ULONG *a,int num); 248void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
184#endif 249void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
185 250void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
186#else 251void bn_sqr_comba8(BN_ULONG *r,BN_ULONG *a);
187 252void bn_sqr_comba4(BN_ULONG *r,BN_ULONG *a);
188BIGNUM *bn_expand2(); 253int bn_cmp_words(BN_ULONG *a,BN_ULONG *b,int n);
189#ifdef X86_ASM 254void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,BN_ULONG *t);
190BN_ULONG bn_add_words(); 255void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
191#endif 256 int tn, int n,BN_ULONG *t);
192 257void bn_sqr_recursive(BN_ULONG *r,BN_ULONG *a, int n2, BN_ULONG *t);
193#endif 258void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
259void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
260 BN_ULONG *t);
261void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
262 BN_ULONG *t);
194 263
195#ifdef __cplusplus 264#ifdef __cplusplus
196} 265}
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
index bfe7628ad4..5d62d88e8b 100644
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -60,9 +60,68 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63char *BN_version="Big Number part of SSLeay 0.9.0b 29-Jun-1998"; 63const char *BN_version="Big Number" OPENSSL_VERSION_PTEXT;
64
65/* For a 32 bit machine
66 * 2 - 4 == 128
67 * 3 - 8 == 256
68 * 4 - 16 == 512
69 * 5 - 32 == 1024
70 * 6 - 64 == 2048
71 * 7 - 128 == 4096
72 * 8 - 256 == 8192
73 */
74OPENSSL_GLOBAL int bn_limit_bits=0;
75OPENSSL_GLOBAL int bn_limit_num=8; /* (1<<bn_limit_bits) */
76OPENSSL_GLOBAL int bn_limit_bits_low=0;
77OPENSSL_GLOBAL int bn_limit_num_low=8; /* (1<<bn_limit_bits_low) */
78OPENSSL_GLOBAL int bn_limit_bits_high=0;
79OPENSSL_GLOBAL int bn_limit_num_high=8; /* (1<<bn_limit_bits_high) */
80OPENSSL_GLOBAL int bn_limit_bits_mont=0;
81OPENSSL_GLOBAL int bn_limit_num_mont=8; /* (1<<bn_limit_bits_mont) */
82
83void BN_set_params(int mult, int high, int low, int mont)
84 {
85 if (mult >= 0)
86 {
87 if (mult > (sizeof(int)*8)-1)
88 mult=sizeof(int)*8-1;
89 bn_limit_bits=mult;
90 bn_limit_num=1<<mult;
91 }
92 if (high >= 0)
93 {
94 if (high > (sizeof(int)*8)-1)
95 high=sizeof(int)*8-1;
96 bn_limit_bits_high=high;
97 bn_limit_num_high=1<<high;
98 }
99 if (low >= 0)
100 {
101 if (low > (sizeof(int)*8)-1)
102 low=sizeof(int)*8-1;
103 bn_limit_bits_low=low;
104 bn_limit_num_low=1<<low;
105 }
106 if (mont >= 0)
107 {
108 if (mont > (sizeof(int)*8)-1)
109 mont=sizeof(int)*8-1;
110 bn_limit_bits_mont=mont;
111 bn_limit_num_mont=1<<mont;
112 }
113 }
64 114
65BIGNUM *BN_value_one() 115int BN_get_params(int which)
116 {
117 if (which == 0) return(bn_limit_bits);
118 else if (which == 1) return(bn_limit_bits_high);
119 else if (which == 2) return(bn_limit_bits_low);
120 else if (which == 3) return(bn_limit_bits_mont);
121 else return(0);
122 }
123
124BIGNUM *BN_value_one(void)
66 { 125 {
67 static BN_ULONG data_one=1L; 126 static BN_ULONG data_one=1L;
68 static BIGNUM const_one={&data_one,1,1,0}; 127 static BIGNUM const_one={&data_one,1,1,0};
@@ -70,7 +129,7 @@ BIGNUM *BN_value_one()
70 return(&const_one); 129 return(&const_one);
71 } 130 }
72 131
73char *BN_options() 132char *BN_options(void)
74 { 133 {
75 static int init=0; 134 static int init=0;
76 static char data[16]; 135 static char data[16];
@@ -89,10 +148,9 @@ char *BN_options()
89 return(data); 148 return(data);
90 } 149 }
91 150
92int BN_num_bits_word(l) 151int BN_num_bits_word(BN_ULONG l)
93BN_ULONG l;
94 { 152 {
95 static char bits[256]={ 153 static const char bits[256]={
96 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, 154 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
97 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 155 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
98 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 156 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
@@ -111,24 +169,24 @@ BN_ULONG l;
111 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 169 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
112 }; 170 };
113 171
114#ifdef SIXTY_FOUR_BIT_LONG 172#if defined(SIXTY_FOUR_BIT_LONG)
115 if (l & 0xffffffff00000000L) 173 if (l & 0xffffffff00000000L)
116 { 174 {
117 if (l & 0xffff000000000000L) 175 if (l & 0xffff000000000000L)
118 { 176 {
119 if (l & 0xff00000000000000L) 177 if (l & 0xff00000000000000L)
120 { 178 {
121 return(bits[l>>56]+56); 179 return(bits[(int)(l>>56)]+56);
122 } 180 }
123 else return(bits[l>>48]+48); 181 else return(bits[(int)(l>>48)]+48);
124 } 182 }
125 else 183 else
126 { 184 {
127 if (l & 0x0000ff0000000000L) 185 if (l & 0x0000ff0000000000L)
128 { 186 {
129 return(bits[l>>40]+40); 187 return(bits[(int)(l>>40)]+40);
130 } 188 }
131 else return(bits[l>>32]+32); 189 else return(bits[(int)(l>>32)]+32);
132 } 190 }
133 } 191 }
134 else 192 else
@@ -140,17 +198,17 @@ BN_ULONG l;
140 { 198 {
141 if (l & 0xff00000000000000LL) 199 if (l & 0xff00000000000000LL)
142 { 200 {
143 return(bits[l>>56]+56); 201 return(bits[(int)(l>>56)]+56);
144 } 202 }
145 else return(bits[l>>48]+48); 203 else return(bits[(int)(l>>48)]+48);
146 } 204 }
147 else 205 else
148 { 206 {
149 if (l & 0x0000ff0000000000LL) 207 if (l & 0x0000ff0000000000LL)
150 { 208 {
151 return(bits[l>>40]+40); 209 return(bits[(int)(l>>40)]+40);
152 } 210 }
153 else return(bits[l>>32]+32); 211 else return(bits[(int)(l>>32)]+32);
154 } 212 }
155 } 213 }
156 else 214 else
@@ -161,28 +219,29 @@ BN_ULONG l;
161 if (l & 0xffff0000L) 219 if (l & 0xffff0000L)
162 { 220 {
163 if (l & 0xff000000L) 221 if (l & 0xff000000L)
164 return(bits[l>>24L]+24); 222 return(bits[(int)(l>>24L)]+24);
165 else return(bits[l>>16L]+16); 223 else return(bits[(int)(l>>16L)]+16);
166 } 224 }
167 else 225 else
168#endif 226#endif
169 { 227 {
170#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) 228#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
171 if (l & 0xff00L) 229 if (l & 0xff00L)
172 return(bits[l>>8]+8); 230 return(bits[(int)(l>>8)]+8);
173 else 231 else
174#endif 232#endif
175 return(bits[l ] ); 233 return(bits[(int)(l )] );
176 } 234 }
177 } 235 }
178 } 236 }
179 237
180int BN_num_bits(a) 238int BN_num_bits(const BIGNUM *a)
181BIGNUM *a;
182 { 239 {
183 BN_ULONG l; 240 BN_ULONG l;
184 int i; 241 int i;
185 242
243 bn_check_top(a);
244
186 if (a->top == 0) return(0); 245 if (a->top == 0) return(0);
187 l=a->d[a->top-1]; 246 l=a->d[a->top-1];
188 i=(a->top-1)*BN_BITS2; 247 i=(a->top-1)*BN_BITS2;
@@ -196,126 +255,256 @@ BIGNUM *a;
196 return(i+BN_num_bits_word(l)); 255 return(i+BN_num_bits_word(l));
197 } 256 }
198 257
199void BN_clear_free(a) 258void BN_clear_free(BIGNUM *a)
200BIGNUM *a;
201 { 259 {
260 int i;
261
202 if (a == NULL) return; 262 if (a == NULL) return;
203 if (a->d != NULL) 263 if (a->d != NULL)
204 { 264 {
205 memset(a->d,0,a->max*sizeof(a->d[0])); 265 memset(a->d,0,a->max*sizeof(a->d[0]));
206 Free(a->d); 266 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
267 Free(a->d);
207 } 268 }
269 i=BN_get_flags(a,BN_FLG_MALLOCED);
208 memset(a,0,sizeof(BIGNUM)); 270 memset(a,0,sizeof(BIGNUM));
209 Free(a); 271 if (i)
272 Free(a);
210 } 273 }
211 274
212void BN_free(a) 275void BN_free(BIGNUM *a)
213BIGNUM *a;
214 { 276 {
215 if (a == NULL) return; 277 if (a == NULL) return;
216 if (a->d != NULL) Free(a->d); 278 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
217 Free(a); 279 Free(a->d);
280 a->flags|=BN_FLG_FREE; /* REMOVE? */
281 if (a->flags & BN_FLG_MALLOCED)
282 Free(a);
283 }
284
285void BN_init(BIGNUM *a)
286 {
287 memset(a,0,sizeof(BIGNUM));
218 } 288 }
219 289
220BIGNUM *BN_new() 290BIGNUM *BN_new(void)
221 { 291 {
222 BIGNUM *ret; 292 BIGNUM *ret;
223 BN_ULONG *p;
224 293
225 ret=(BIGNUM *)Malloc(sizeof(BIGNUM)); 294 if ((ret=(BIGNUM *)Malloc(sizeof(BIGNUM))) == NULL)
226 if (ret == NULL) goto err; 295 {
296 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
297 return(NULL);
298 }
299 ret->flags=BN_FLG_MALLOCED;
227 ret->top=0; 300 ret->top=0;
228 ret->neg=0; 301 ret->neg=0;
229 ret->max=(BN_DEFAULT_BITS/BN_BITS2); 302 ret->max=0;
230 p=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(ret->max+1)); 303 ret->d=NULL;
231 if (p == NULL) goto err;
232 ret->d=p;
233
234 memset(p,0,(ret->max+1)*sizeof(p[0]));
235 return(ret); 304 return(ret);
236err:
237 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
238 return(NULL);
239 } 305 }
240 306
241BN_CTX *BN_CTX_new() 307
308BN_CTX *BN_CTX_new(void)
242 { 309 {
243 BN_CTX *ret; 310 BN_CTX *ret;
244 BIGNUM *n;
245 int i,j;
246 311
247 ret=(BN_CTX *)Malloc(sizeof(BN_CTX)); 312 ret=(BN_CTX *)Malloc(sizeof(BN_CTX));
248 if (ret == NULL) goto err2; 313 if (ret == NULL)
249
250 for (i=0; i<BN_CTX_NUM; i++)
251 { 314 {
252 n=BN_new(); 315 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
253 if (n == NULL) goto err; 316 return(NULL);
254 ret->bn[i]=n;
255 } 317 }
256 318
257 /* There is actually an extra one, this is for debugging my 319 BN_CTX_init(ret);
258 * stuff */ 320 ret->flags=BN_FLG_MALLOCED;
259 ret->bn[BN_CTX_NUM]=NULL;
260
261 ret->tos=0;
262 return(ret); 321 return(ret);
263err:
264 for (j=0; j<i; j++)
265 BN_free(ret->bn[j]);
266 Free(ret);
267err2:
268 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
269 return(NULL);
270 } 322 }
271 323
272void BN_CTX_free(c) 324void BN_CTX_init(BN_CTX *ctx)
273BN_CTX *c; 325 {
326 memset(ctx,0,sizeof(BN_CTX));
327 ctx->tos=0;
328 ctx->flags=0;
329 }
330
331void BN_CTX_free(BN_CTX *c)
274 { 332 {
275 int i; 333 int i;
276 334
335 if(c == NULL)
336 return;
337
277 for (i=0; i<BN_CTX_NUM; i++) 338 for (i=0; i<BN_CTX_NUM; i++)
278 BN_clear_free(c->bn[i]); 339 BN_clear_free(&(c->bn[i]));
279 Free(c); 340 if (c->flags & BN_FLG_MALLOCED)
341 Free(c);
280 } 342 }
281 343
282BIGNUM *bn_expand2(b, words) 344BIGNUM *bn_expand2(BIGNUM *b, int words)
283BIGNUM *b;
284int words;
285 { 345 {
286 BN_ULONG *p; 346 BN_ULONG *A,*a;
347 const BN_ULONG *B;
348 int i;
349
350 bn_check_top(b);
287 351
288 if (words > b->max) 352 if (words > b->max)
289 { 353 {
290 p=(BN_ULONG *)Realloc(b->d,sizeof(BN_ULONG)*(words+1)); 354 bn_check_top(b);
291 if (p == NULL) 355 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
356 {
357 BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
358 return(NULL);
359 }
360 a=A=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(words+1));
361 if (A == NULL)
292 { 362 {
293 BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); 363 BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
294 return(NULL); 364 return(NULL);
295 } 365 }
296 b->d=p; 366#if 1
297 memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); 367 B=b->d;
368 /* Check if the previous number needs to be copied */
369 if (B != NULL)
370 {
371#if 0
372 /* This lot is an unrolled loop to copy b->top
373 * BN_ULONGs from B to A
374 */
375/*
376 * I have nothing against unrolling but it's usually done for
377 * several reasons, namely:
378 * - minimize percentage of decision making code, i.e. branches;
379 * - avoid cache trashing;
380 * - make it possible to schedule loads earlier;
381 * Now let's examine the code below. The cornerstone of C is
382 * "programmer is always right" and that's what we love it for:-)
383 * For this very reason C compilers have to be paranoid when it
384 * comes to data aliasing and assume the worst. Yeah, but what
385 * does it mean in real life? This means that loop body below will
386 * be compiled to sequence of loads immediately followed by stores
387 * as compiler assumes the worst, something in A==B+1 style. As a
388 * result CPU pipeline is going to starve for incoming data. Secondly
389 * if A and B happen to share same cache line such code is going to
390 * cause severe cache trashing. Both factors have severe impact on
391 * performance of modern CPUs and this is the reason why this
392 * particulare piece of code is #ifdefed away and replaced by more
393 * "friendly" version found in #else section below. This comment
394 * also applies to BN_copy function.
395 *
396 * <appro@fy.chalmers.se>
397 */
398 for (i=b->top&(~7); i>0; i-=8)
399 {
400 A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
401 A[4]=B[4]; A[5]=B[5]; A[6]=B[6]; A[7]=B[7];
402 A+=8;
403 B+=8;
404 }
405 switch (b->top&7)
406 {
407 case 7:
408 A[6]=B[6];
409 case 6:
410 A[5]=B[5];
411 case 5:
412 A[4]=B[4];
413 case 4:
414 A[3]=B[3];
415 case 3:
416 A[2]=B[2];
417 case 2:
418 A[1]=B[1];
419 case 1:
420 A[0]=B[0];
421 case 0:
422 /* I need the 'case 0' entry for utrix cc.
423 * If the optimiser is turned on, it does the
424 * switch table by doing
425 * a=top&7
426 * a--;
427 * goto jump_table[a];
428 * If top is 0, this makes us jump to 0xffffffc
429 * which is rather bad :-(.
430 * eric 23-Apr-1998
431 */
432 ;
433 }
434#else
435 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
436 {
437 /*
438 * The fact that the loop is unrolled
439 * 4-wise is a tribute to Intel. It's
440 * the one that doesn't have enough
441 * registers to accomodate more data.
442 * I'd unroll it 8-wise otherwise:-)
443 *
444 * <appro@fy.chalmers.se>
445 */
446 BN_ULONG a0,a1,a2,a3;
447 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
448 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
449 }
450 switch (b->top&3)
451 {
452 case 3: A[2]=B[2];
453 case 2: A[1]=B[1];
454 case 1: A[0]=B[0];
455 case 0: ; /* ultrix cc workaround, see above */
456 }
457#endif
458 Free(b->d);
459 }
460
461 b->d=a;
298 b->max=words; 462 b->max=words;
463
464 /* Now need to zero any data between b->top and b->max */
465
466 A= &(b->d[b->top]);
467 for (i=(b->max - b->top)>>3; i>0; i--,A+=8)
468 {
469 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
470 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
471 }
472 for (i=(b->max - b->top)&7; i>0; i--,A++)
473 A[0]=0;
474#else
475 memset(A,0,sizeof(BN_ULONG)*(words+1));
476 memcpy(A,b->d,sizeof(b->d[0])*b->top);
477 b->d=a;
478 b->max=words;
479#endif
480
481/* memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */
482/* { int i; for (i=b->max; i<words+1; i++) p[i]=i;} */
483
299 } 484 }
300 return(b); 485 return(b);
301 } 486 }
302 487
303BIGNUM *BN_dup(a) 488BIGNUM *BN_dup(const BIGNUM *a)
304BIGNUM *a;
305 { 489 {
306 BIGNUM *r; 490 BIGNUM *r;
307 491
492 if (a == NULL) return NULL;
493
494 bn_check_top(a);
495
308 r=BN_new(); 496 r=BN_new();
309 if (r == NULL) return(NULL); 497 if (r == NULL) return(NULL);
310 return((BIGNUM *)BN_copy(r,a)); 498 return((BIGNUM *)BN_copy(r,a));
311 } 499 }
312 500
313BIGNUM *BN_copy(a, b) 501BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
314BIGNUM *a;
315BIGNUM *b;
316 { 502 {
317 int i; 503 int i;
318 BN_ULONG *A,*B; 504 BN_ULONG *A;
505 const BN_ULONG *B;
506
507 bn_check_top(b);
319 508
320 if (a == b) return(a); 509 if (a == b) return(a);
321 if (bn_wexpand(a,b->top) == NULL) return(NULL); 510 if (bn_wexpand(a,b->top) == NULL) return(NULL);
@@ -323,35 +512,18 @@ BIGNUM *b;
323#if 1 512#if 1
324 A=a->d; 513 A=a->d;
325 B=b->d; 514 B=b->d;
326 for (i=b->top&(~7); i>0; i-=8) 515 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
327 { 516 {
328 A[0]=B[0]; 517 BN_ULONG a0,a1,a2,a3;
329 A[1]=B[1]; 518 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
330 A[2]=B[2]; 519 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
331 A[3]=B[3];
332 A[4]=B[4];
333 A[5]=B[5];
334 A[6]=B[6];
335 A[7]=B[7];
336 A+=8;
337 B+=8;
338 } 520 }
339 switch (b->top&7) 521 switch (b->top&3)
340 { 522 {
341 case 7: 523 case 3: A[2]=B[2];
342 A[6]=B[6]; 524 case 2: A[1]=B[1];
343 case 6: 525 case 1: A[0]=B[0];
344 A[5]=B[5]; 526 case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */
345 case 5:
346 A[4]=B[4];
347 case 4:
348 A[3]=B[3];
349 case 3:
350 A[2]=B[2];
351 case 2:
352 A[1]=B[1];
353 case 1:
354 A[0]=B[0];
355 } 527 }
356#else 528#else
357 memcpy(a->d,b->d,sizeof(b->d[0])*b->top); 529 memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
@@ -359,52 +531,47 @@ BIGNUM *b;
359 531
360/* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/ 532/* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/
361 a->top=b->top; 533 a->top=b->top;
362 if (a->top == 0) 534 if ((a->top == 0) && (a->d != NULL))
363 a->d[0]=0; 535 a->d[0]=0;
364 a->neg=b->neg; 536 a->neg=b->neg;
365 return(a); 537 return(a);
366 } 538 }
367 539
368void BN_clear(a) 540void BN_clear(BIGNUM *a)
369BIGNUM *a;
370 { 541 {
371 memset(a->d,0,a->max*sizeof(a->d[0])); 542 if (a->d != NULL)
543 memset(a->d,0,a->max*sizeof(a->d[0]));
372 a->top=0; 544 a->top=0;
373 a->neg=0; 545 a->neg=0;
374 } 546 }
375 547
376unsigned long BN_get_word(a) 548BN_ULONG BN_get_word(BIGNUM *a)
377BIGNUM *a;
378 { 549 {
379 int i,n; 550 int i,n;
380 unsigned long ret=0; 551 BN_ULONG ret=0;
381 552
382 n=BN_num_bytes(a); 553 n=BN_num_bytes(a);
383 if (n > sizeof(unsigned long)) 554 if (n > sizeof(BN_ULONG))
384#ifdef SIXTY_FOUR_BIT_LONG
385 return(BN_MASK2); 555 return(BN_MASK2);
386#else
387 return(0xFFFFFFFFL);
388#endif
389 for (i=a->top-1; i>=0; i--) 556 for (i=a->top-1; i>=0; i--)
390 { 557 {
391#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ 558#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
392 ret<<=BN_BITS4; /* stops the compiler complaining */ 559 ret<<=BN_BITS4; /* stops the compiler complaining */
393 ret<<=BN_BITS4; 560 ret<<=BN_BITS4;
561#else
562 ret=0;
394#endif 563#endif
395 ret|=a->d[i]; 564 ret|=a->d[i];
396 } 565 }
397 return(ret); 566 return(ret);
398 } 567 }
399 568
400int BN_set_word(a,w) 569int BN_set_word(BIGNUM *a, BN_ULONG w)
401BIGNUM *a;
402unsigned long w;
403 { 570 {
404 int i,n; 571 int i,n;
405 if (bn_expand(a,sizeof(unsigned long)*8) == NULL) return(0); 572 if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0);
406 573
407 n=sizeof(unsigned long)/BN_BYTES; 574 n=sizeof(BN_ULONG)/BN_BYTES;
408 a->neg=0; 575 a->neg=0;
409 a->top=0; 576 a->top=0;
410 a->d[0]=(BN_ULONG)w&BN_MASK2; 577 a->d[0]=(BN_ULONG)w&BN_MASK2;
@@ -417,6 +584,8 @@ unsigned long w;
417#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ 584#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
418 w>>=BN_BITS4; 585 w>>=BN_BITS4;
419 w>>=BN_BITS4; 586 w>>=BN_BITS4;
587#else
588 w=0;
420#endif 589#endif
421 a->d[i]=(BN_ULONG)w&BN_MASK2; 590 a->d[i]=(BN_ULONG)w&BN_MASK2;
422 if (a->d[i] != 0) a->top=i+1; 591 if (a->d[i] != 0) a->top=i+1;
@@ -425,10 +594,7 @@ unsigned long w;
425 } 594 }
426 595
427/* ignore negative */ 596/* ignore negative */
428BIGNUM *BN_bin2bn(s, len, ret) 597BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
429unsigned char *s;
430int len;
431BIGNUM *ret;
432 { 598 {
433 unsigned int i,m; 599 unsigned int i,m;
434 unsigned int n; 600 unsigned int n;
@@ -465,9 +631,7 @@ BIGNUM *ret;
465 } 631 }
466 632
467/* ignore negative */ 633/* ignore negative */
468int BN_bn2bin(a, to) 634int BN_bn2bin(const BIGNUM *a, unsigned char *to)
469BIGNUM *a;
470unsigned char *to;
471 { 635 {
472 int n,i; 636 int n,i;
473 BN_ULONG l; 637 BN_ULONG l;
@@ -481,13 +645,14 @@ unsigned char *to;
481 return(n); 645 return(n);
482 } 646 }
483 647
484int BN_ucmp(a, b) 648int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
485BIGNUM *a;
486BIGNUM *b;
487 { 649 {
488 int i; 650 int i;
489 BN_ULONG t1,t2,*ap,*bp; 651 BN_ULONG t1,t2,*ap,*bp;
490 652
653 bn_check_top(a);
654 bn_check_top(b);
655
491 i=a->top-b->top; 656 i=a->top-b->top;
492 if (i != 0) return(i); 657 if (i != 0) return(i);
493 ap=a->d; 658 ap=a->d;
@@ -502,9 +667,7 @@ BIGNUM *b;
502 return(0); 667 return(0);
503 } 668 }
504 669
505int BN_cmp(a, b) 670int BN_cmp(const BIGNUM *a, const BIGNUM *b)
506BIGNUM *a;
507BIGNUM *b;
508 { 671 {
509 int i; 672 int i;
510 int gt,lt; 673 int gt,lt;
@@ -519,6 +682,10 @@ BIGNUM *b;
519 else 682 else
520 return(0); 683 return(0);
521 } 684 }
685
686 bn_check_top(a);
687 bn_check_top(b);
688
522 if (a->neg != b->neg) 689 if (a->neg != b->neg)
523 { 690 {
524 if (a->neg) 691 if (a->neg)
@@ -541,27 +708,25 @@ BIGNUM *b;
541 return(0); 708 return(0);
542 } 709 }
543 710
544int BN_set_bit(a, n) 711int BN_set_bit(BIGNUM *a, int n)
545BIGNUM *a;
546int n;
547 { 712 {
548 int i,j; 713 int i,j,k;
549 714
550 i=n/BN_BITS2; 715 i=n/BN_BITS2;
551 j=n%BN_BITS2; 716 j=n%BN_BITS2;
552 if (a->top <= i) 717 if (a->top <= i)
553 { 718 {
554 if (bn_expand(a,n) == NULL) return(0); 719 if (bn_wexpand(a,i+1) == NULL) return(0);
720 for(k=a->top; k<i+1; k++)
721 a->d[k]=0;
555 a->top=i+1; 722 a->top=i+1;
556 } 723 }
557 724
558 a->d[i]|=(1L<<j); 725 a->d[i]|=(((BN_ULONG)1)<<j);
559 return(1); 726 return(1);
560 } 727 }
561 728
562int BN_clear_bit(a, n) 729int BN_clear_bit(BIGNUM *a, int n)
563BIGNUM *a;
564int n;
565 { 730 {
566 int i,j; 731 int i,j;
567 732
@@ -569,13 +734,12 @@ int n;
569 j=n%BN_BITS2; 734 j=n%BN_BITS2;
570 if (a->top <= i) return(0); 735 if (a->top <= i) return(0);
571 736
572 a->d[i]&=(~(1L<<j)); 737 a->d[i]&=(~(((BN_ULONG)1)<<j));
738 bn_fix_top(a);
573 return(1); 739 return(1);
574 } 740 }
575 741
576int BN_is_bit_set(a, n) 742int BN_is_bit_set(const BIGNUM *a, int n)
577BIGNUM *a;
578int n;
579 { 743 {
580 int i,j; 744 int i,j;
581 745
@@ -586,9 +750,7 @@ int n;
586 return((a->d[i]&(((BN_ULONG)1)<<j))?1:0); 750 return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
587 } 751 }
588 752
589int BN_mask_bits(a,n) 753int BN_mask_bits(BIGNUM *a, int n)
590BIGNUM *a;
591int n;
592 { 754 {
593 int b,w; 755 int b,w;
594 756
@@ -601,11 +763,25 @@ int n;
601 { 763 {
602 a->top=w+1; 764 a->top=w+1;
603 a->d[w]&= ~(BN_MASK2<<b); 765 a->d[w]&= ~(BN_MASK2<<b);
604 while ((w >= 0) && (a->d[w] == 0))
605 {
606 a->top--;
607 w--;
608 }
609 } 766 }
767 bn_fix_top(a);
610 return(1); 768 return(1);
611 } 769 }
770
771int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n)
772 {
773 int i;
774 BN_ULONG aa,bb;
775
776 aa=a[n-1];
777 bb=b[n-1];
778 if (aa != bb) return((aa > bb)?1:-1);
779 for (i=n-2; i>=0; i--)
780 {
781 aa=a[i];
782 bb=b[i];
783 if (aa != bb) return((aa > bb)?1:-1);
784 }
785 return(0);
786 }
787
diff --git a/src/lib/libcrypto/bn/bn_m.c b/src/lib/libcrypto/bn/bn_m.c
deleted file mode 100644
index 5166daaeec..0000000000
--- a/src/lib/libcrypto/bn/bn_m.c
+++ /dev/null
@@ -1,169 +0,0 @@
1/* crypto/bn/bn_m.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62#include "stack.h"
63
64int limit=16;
65
66typedef struct bn_pool_st
67 {
68 int used;
69 int tos;
70 STACK *sk;
71 } BN_POOL;
72
73BIGNUM *BN_POOL_push(bp)
74BN_POOL *bp;
75 {
76 BIGNUM *ret;
77
78 if (bp->used >= bp->tos)
79 {
80 ret=BN_new();
81 sk_push(bp->sk,(char *)ret);
82 bp->tos++;
83 bp->used++;
84 }
85 else
86 {
87 ret=(BIGNUM *)sk_value(bp->sk,bp->used);
88 bp->used++;
89 }
90 return(ret);
91 }
92
93void BN_POOL_pop(bp,num)
94BN_POOL *bp;
95int num;
96 {
97 bp->used-=num;
98 }
99
100int BN_m(r,a,b)
101BIGNUM *r,*a,*b;
102 {
103 static BN_POOL bp;
104 static init=1;
105
106 if (init)
107 {
108 bp.used=0;
109 bp.tos=0;
110 bp.sk=sk_new_null();
111 init=0;
112 }
113 return(BN_mm(r,a,b,&bp));
114 }
115
116/* r must be different to a and b */
117int BN_mm(m, A, B, bp)
118BIGNUM *m,*A,*B;
119BN_POOL *bp;
120 {
121 int i,num;
122 int an,bn;
123 BIGNUM *a,*b,*c,*d,*ac,*bd;
124
125 an=A->top;
126 bn=B->top;
127 if ((an <= limit) || (bn <= limit))
128 {
129 return(BN_mul(m,A,B));
130 }
131
132 a=BN_POOL_push(bp);
133 b=BN_POOL_push(bp);
134 c=BN_POOL_push(bp);
135 d=BN_POOL_push(bp);
136 ac=BN_POOL_push(bp);
137 bd=BN_POOL_push(bp);
138
139 num=(an <= bn)?an:bn;
140 num=1<<(BN_num_bits_word(num-1)-1);
141
142 /* Are going to now chop things into 'num' word chunks. */
143 num*=BN_BITS2;
144
145 BN_copy(a,A);
146 BN_mask_bits(a,num);
147 BN_rshift(b,A,num);
148
149 BN_copy(c,B);
150 BN_mask_bits(c,num);
151 BN_rshift(d,B,num);
152
153 BN_sub(ac ,b,a);
154 BN_sub(bd,c,d);
155 BN_mm(m,ac,bd,bp);
156 BN_mm(ac,a,c,bp);
157 BN_mm(bd,b,d,bp);
158
159 BN_add(m,m,ac);
160 BN_add(m,m,bd);
161 BN_lshift(m,m,num);
162 BN_lshift(bd,bd,num*2);
163
164 BN_add(m,m,ac);
165 BN_add(m,m,bd);
166 BN_POOL_pop(bp,6);
167 return(1);
168 }
169
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
deleted file mode 100644
index c351aac14f..0000000000
--- a/src/lib/libcrypto/bn/bn_mod.c
+++ /dev/null
@@ -1,97 +0,0 @@
1/* crypto/bn/bn_mod.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* rem != m */
64int BN_mod(rem, m, d,ctx)
65BIGNUM *rem;
66BIGNUM *m;
67BIGNUM *d;
68BN_CTX *ctx;
69 {
70#if 0 /* The old slow way */
71 int i,nm,nd;
72 BIGNUM *dv;
73
74 if (BN_ucmp(m,d) < 0)
75 return((BN_copy(rem,m) == NULL)?0:1);
76
77 dv=ctx->bn[ctx->tos];
78
79 if (!BN_copy(rem,m)) return(0);
80
81 nm=BN_num_bits(rem);
82 nd=BN_num_bits(d);
83 if (!BN_lshift(dv,d,nm-nd)) return(0);
84 for (i=nm-nd; i>=0; i--)
85 {
86 if (BN_cmp(rem,dv) >= 0)
87 {
88 if (!BN_sub(rem,rem,dv)) return(0);
89 }
90 if (!BN_rshift1(dv,dv)) return(0);
91 }
92 return(1);
93#else
94 return(BN_div(NULL,rem,m,d,ctx));
95#endif
96 }
97
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index e435df61f8..ee0f410c22 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -56,251 +56,352 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58 58
59/*
60 * Details about Montgomery multiplication algorithms can be found at:
61 * http://www.ece.orst.edu/ISL/Publications.html
62 * http://www.ece.orst.edu/ISL/Koc/papers/j37acmon.pdf
63 */
64
59#include <stdio.h> 65#include <stdio.h>
60#include "cryptlib.h" 66#include "cryptlib.h"
61#include "bn_lcl.h" 67#include "bn_lcl.h"
62 68
63int BN_mod_mul_montgomery(r,a,b,mont,ctx) 69#define MONT_WORD
64BIGNUM *r,*a,*b; 70
65BN_MONT_CTX *mont; 71int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b,
66BN_CTX *ctx; 72 BN_MONT_CTX *mont, BN_CTX *ctx)
67 { 73 {
68 BIGNUM *tmp; 74 BIGNUM *tmp,*tmp2;
75
76 tmp= &(ctx->bn[ctx->tos]);
77 tmp2= &(ctx->bn[ctx->tos]);
78 ctx->tos+=2;
69 79
70 tmp=ctx->bn[ctx->tos++]; 80 bn_check_top(tmp);
81 bn_check_top(tmp2);
71 82
72 if (a == b) 83 if (a == b)
73 { 84 {
85#if 0
86 bn_wexpand(tmp,a->top*2);
87 bn_wexpand(tmp2,a->top*4);
88 bn_sqr_recursive(tmp->d,a->d,a->top,tmp2->d);
89 tmp->top=a->top*2;
90 if (tmp->d[tmp->top-1] == 0)
91 tmp->top--;
92#else
74 if (!BN_sqr(tmp,a,ctx)) goto err; 93 if (!BN_sqr(tmp,a,ctx)) goto err;
94#endif
75 } 95 }
76 else 96 else
77 { 97 {
78 if (!BN_mul(tmp,a,b)) goto err; 98 if (!BN_mul(tmp,a,b,ctx)) goto err;
79 } 99 }
80 /* reduce from aRR to aR */ 100 /* reduce from aRR to aR */
81 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; 101 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
82 ctx->tos--; 102 ctx->tos-=2;
83 return(1); 103 return(1);
84err: 104err:
85 return(0); 105 return(0);
86 } 106 }
87 107
88#define MONT_WORD 108int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont,
89 109 BN_CTX *ctx)
90#ifdef MONT_WORD
91int BN_from_montgomery(ret,a,mont,ctx)
92BIGNUM *ret;
93BIGNUM *a;
94BN_MONT_CTX *mont;
95BN_CTX *ctx;
96 { 110 {
97 BIGNUM *n,*t1,*r; 111#ifdef BN_RECURSION_MONT
98 BN_ULONG *ap,*np,*rp,n0,v; 112 if (mont->use_word)
99 int al,nl,max,i,x,ri; 113#endif
100 int retn=0; 114 {
115 BIGNUM *n,*r;
116 BN_ULONG *ap,*np,*rp,n0,v,*nrp;
117 int al,nl,max,i,x,ri;
118 int retn=0;
101 119
102 t1=ctx->bn[ctx->tos]; 120 r= &(ctx->bn[ctx->tos]);
103 r=ctx->bn[ctx->tos+1];
104 121
105 if (!BN_copy(r,a)) goto err; 122 if (!BN_copy(r,a)) goto err1;
106 n=mont->N; 123 n= &(mont->N);
107 124
108 ap=a->d; 125 ap=a->d;
109 /* mont->ri is the size of mont->N in bits/words */ 126 /* mont->ri is the size of mont->N in bits/words */
110 al=ri=mont->ri/BN_BITS2; 127 al=ri=mont->ri/BN_BITS2;
111 128
112 nl=n->top; 129 nl=n->top;
113 if ((al == 0) || (nl == 0)) { r->top=0; return(1); } 130 if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
114 131
115 max=(nl+al+1); /* allow for overflow (no?) XXX */ 132 max=(nl+al+1); /* allow for overflow (no?) XXX */
116 if (bn_wexpand(r,max) == NULL) goto err; 133 if (bn_wexpand(r,max) == NULL) goto err1;
117 if (bn_wexpand(ret,max) == NULL) goto err; 134 if (bn_wexpand(ret,max) == NULL) goto err1;
118 135
119 r->neg=a->neg^n->neg; 136 r->neg=a->neg^n->neg;
120 np=n->d; 137 np=n->d;
121 rp=r->d; 138 rp=r->d;
139 nrp= &(r->d[nl]);
122 140
123 /* clear the top words of T */ 141 /* clear the top words of T */
124#if 1 142#if 1
125 for (i=r->top; i<max; i++) /* memset? XXX */ 143 for (i=r->top; i<max; i++) /* memset? XXX */
126 r->d[i]=0; 144 r->d[i]=0;
127#else 145#else
128 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); 146 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
129#endif 147#endif
130 148
131 r->top=max; 149 r->top=max;
132 n0=mont->n0; 150 n0=mont->n0;
133
134 for (i=0; i<nl; i++)
135 {
136#if 0
137 int x1,x2;
138 151
139 if (i+4 > nl) 152#ifdef BN_COUNT
153printf("word BN_from_montgomery %d * %d\n",nl,nl);
154#endif
155 for (i=0; i<nl; i++)
140 { 156 {
141 x2=nl; 157 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
142 x1=0; 158 nrp++;
159 rp++;
160 if (((nrp[-1]+=v)&BN_MASK2) >= v)
161 continue;
162 else
163 {
164 if (((++nrp[0])&BN_MASK2) != 0) continue;
165 if (((++nrp[1])&BN_MASK2) != 0) continue;
166 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
167 }
143 } 168 }
169 bn_fix_top(r);
170
171 /* mont->ri will be a multiple of the word size */
172#if 0
173 BN_rshift(ret,r,mont->ri);
174#else
175 x=ri;
176 rp=ret->d;
177 ap= &(r->d[x]);
178 if (r->top < x)
179 al=0;
144 else 180 else
181 al=r->top-x;
182 ret->top=al;
183 al-=4;
184 for (i=0; i<al; i+=4)
145 { 185 {
146 x2=i+4; 186 BN_ULONG t1,t2,t3,t4;
147 x1=nl-x2; 187
188 t1=ap[i+0];
189 t2=ap[i+1];
190 t3=ap[i+2];
191 t4=ap[i+3];
192 rp[i+0]=t1;
193 rp[i+1]=t2;
194 rp[i+2]=t3;
195 rp[i+3]=t4;
148 } 196 }
149 v=bn_mul_add_words(&(rp[x1]),&(np[x1]),x2,(rp[x1]*n0)&BN_MASK2); 197 al+=4;
150#else 198 for (; i<al; i++)
151 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); 199 rp[i]=ap[i];
152#endif 200#endif
153 201
154 if (((rp[nl]+=v)&BN_MASK2) < v) 202 if (BN_ucmp(ret, &(mont->N)) >= 0)
155 { 203 {
156 for (x=(nl+1); (((++rp[x])&BN_MASK2) == 0); x++) 204 BN_usub(ret,ret,&(mont->N)); /* XXX */
157 ;
158 } 205 }
159 rp++; 206 retn=1;
207err1:
208 return(retn);
160 } 209 }
161 while (r->d[r->top-1] == 0) 210#ifdef BN_RECURSION_MONT
162 r->top--; 211 else /* bignum version */
163
164 /* mont->ri will be a multiple of the word size */
165#if 0
166 BN_rshift(ret,r,mont->ri);
167#else
168 ap=r->d;
169 rp=ret->d;
170 x=ri;
171 al=r->top-x;
172 for (i=0; i<al; i++)
173 { 212 {
174 rp[i]=ap[i+x]; 213 BIGNUM *t1,*t2,*t3;
175 } 214 int j,i;
176 ret->top=al; 215
216#ifdef BN_COUNT
217printf("number BN_from_montgomery\n");
177#endif 218#endif
178 219
179 if (BN_ucmp(ret,mont->N) >= 0) 220 t1= &(ctx->bn[ctx->tos]);
180 { 221 t2= &(ctx->bn[ctx->tos+1]);
181 bn_qsub(ret,ret,mont->N); /* XXX */ 222 t3= &(ctx->bn[ctx->tos+2]);
182 }
183 retn=1;
184err:
185 return(retn);
186 }
187#else
188int BN_from_montgomery(r,a,mont,ctx)
189BIGNUM *r;
190BIGNUM *a;
191BN_MONT_CTX *mont;
192BN_CTX *ctx;
193 {
194 BIGNUM *t1,*t2;
195 223
196 t1=ctx->bn[ctx->tos]; 224 i=mont->Ni.top;
197 t2=ctx->bn[ctx->tos+1]; 225 bn_wexpand(ret,i); /* perhaps only i*2 */
226 bn_wexpand(t1,i*4); /* perhaps only i*2 */
227 bn_wexpand(t2,i*2); /* perhaps only i */
198 228
199 if (!BN_copy(t1,a)) goto err; 229 bn_mul_low_recursive(t2->d,a->d,mont->Ni.d,i,t1->d);
200 /* can cheat */
201 BN_mask_bits(t1,mont->ri);
202 230
203 if (!BN_mul(t2,t1,mont->Ni)) goto err; 231 BN_zero(t3);
204 BN_mask_bits(t2,mont->ri); 232 BN_set_bit(t3,mont->N.top*BN_BITS2);
233 bn_sub_words(t3->d,t3->d,a->d,i);
234 bn_mul_high(ret->d,t2->d,mont->N.d,t3->d,i,t1->d);
205 235
206 if (!BN_mul(t1,t2,mont->N)) goto err; 236 /* hmm... if a is between i and 2*i, things are bad */
207 if (!BN_add(t2,a,t1)) goto err; 237 if (a->top > i)
208 BN_rshift(r,t2,mont->ri); 238 {
239 j=(int)(bn_add_words(ret->d,ret->d,&(a->d[i]),i));
240 if (j) /* overflow */
241 bn_sub_words(ret->d,ret->d,mont->N.d,i);
242 }
243 ret->top=i;
244 bn_fix_top(ret);
245 if (a->d[0])
246 BN_add_word(ret,1); /* Always? */
247 else /* Very very rare */
248 {
249 for (i=1; i<mont->N.top-1; i++)
250 {
251 if (a->d[i])
252 {
253 BN_add_word(ret,1); /* Always? */
254 break;
255 }
256 }
257 }
209 258
210 if (BN_ucmp(r,mont->N) >= 0) 259 if (BN_ucmp(ret,&(mont->N)) >= 0)
211 bn_qsub(r,r,mont->N); 260 BN_usub(ret,ret,&(mont->N));
212 261
213 return(1); 262 return(1);
214err: 263 }
215 return(0);
216 }
217#endif 264#endif
265 }
218 266
219BN_MONT_CTX *BN_MONT_CTX_new() 267BN_MONT_CTX *BN_MONT_CTX_new(void)
220 { 268 {
221 BN_MONT_CTX *ret; 269 BN_MONT_CTX *ret;
222 270
223 if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL) 271 if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL)
224 return(NULL); 272 return(NULL);
225 ret->ri=0; 273
226 ret->RR=BN_new(); 274 BN_MONT_CTX_init(ret);
227 ret->N=BN_new(); 275 ret->flags=BN_FLG_MALLOCED;
228 ret->Ni=NULL;
229 if ((ret->RR == NULL) || (ret->N == NULL))
230 {
231 BN_MONT_CTX_free(ret);
232 return(NULL);
233 }
234 return(ret); 276 return(ret);
235 } 277 }
236 278
237void BN_MONT_CTX_free(mont) 279void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
238BN_MONT_CTX *mont; 280 {
281 ctx->use_word=0;
282 ctx->ri=0;
283 BN_init(&(ctx->RR));
284 BN_init(&(ctx->N));
285 BN_init(&(ctx->Ni));
286 ctx->flags=0;
287 }
288
289void BN_MONT_CTX_free(BN_MONT_CTX *mont)
239 { 290 {
240 if (mont->RR != NULL) BN_free(mont->RR); 291 if(mont == NULL)
241 if (mont->N != NULL) BN_free(mont->N); 292 return;
242 if (mont->Ni != NULL) BN_free(mont->Ni); 293
243 Free(mont); 294 BN_free(&(mont->RR));
295 BN_free(&(mont->N));
296 BN_free(&(mont->Ni));
297 if (mont->flags & BN_FLG_MALLOCED)
298 Free(mont);
244 } 299 }
245 300
246int BN_MONT_CTX_set(mont,mod,ctx) 301int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
247BN_MONT_CTX *mont;
248BIGNUM *mod;
249BN_CTX *ctx;
250 { 302 {
251 BIGNUM *Ri=NULL,*R=NULL; 303 BIGNUM Ri,*R;
252 304
253 if (mont->RR == NULL) mont->RR=BN_new(); 305 BN_init(&Ri);
254 if (mont->N == NULL) mont->N=BN_new(); 306 R= &(mont->RR); /* grab RR as a temp */
255 307 BN_copy(&(mont->N),mod); /* Set N */
256 R=mont->RR; /* grab RR as a temp */ 308
257 BN_copy(mont->N,mod); /* Set N */ 309#ifdef BN_RECURSION_MONT
258 310 if (mont->N.top < BN_MONT_CTX_SET_SIZE_WORD)
259#ifdef MONT_WORD 311#endif
260{ 312 {
261 BIGNUM tmod; 313 BIGNUM tmod;
262 BN_ULONG buf[2]; 314 BN_ULONG buf[2];
263 /* int z; */ 315
264 316 mont->use_word=1;
265 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; 317
266 BN_lshift(R,BN_value_one(),BN_BITS2); /* R */ 318 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
267 /* I was bad, this modification of a passed variable was 319 BN_zero(R);
268 * breaking the multithreaded stuff :-( 320 BN_set_bit(R,BN_BITS2);
269 * z=mod->top; 321 /* I was bad, this modification of a passed variable was
270 * mod->top=1; */ 322 * breaking the multithreaded stuff :-(
271 323 * z=mod->top;
272 buf[0]=mod->d[0]; 324 * mod->top=1; */
273 buf[1]=0; 325
274 tmod.d=buf; 326 buf[0]=mod->d[0];
275 tmod.top=1; 327 buf[1]=0;
276 tmod.max=mod->max; 328 tmod.d=buf;
277 tmod.neg=mod->neg; 329 tmod.top=1;
278 330 tmod.max=mod->max;
279 if ((Ri=BN_mod_inverse(R,&tmod,ctx)) == NULL) goto err; /* Ri */ 331 tmod.neg=mod->neg;
280 BN_lshift(Ri,Ri,BN_BITS2); /* R*Ri */ 332
281 bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */ 333 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
282 BN_div(Ri,NULL,Ri,&tmod,ctx); 334 goto err;
283 mont->n0=Ri->d[0]; 335 BN_lshift(&Ri,&Ri,BN_BITS2); /* R*Ri */
284 BN_free(Ri); 336 if (!BN_is_zero(&Ri))
285 /* mod->top=z; */ 337 {
286} 338#if 1
339 BN_sub_word(&Ri,1);
287#else 340#else
288 mont->ri=BN_num_bits(mod); 341 BN_usub(&Ri,&Ri,BN_value_one()); /* R*Ri - 1 */
289 BN_lshift(R,BN_value_one(),mont->ri); /* R */ 342#endif
290 if ((Ri=BN_mod_inverse(R,mod,ctx)) == NULL) goto err; /* Ri */ 343 }
291 BN_lshift(Ri,Ri,mont->ri); /* R*Ri */ 344 else
292 bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */ 345 {
293 BN_div(Ri,NULL,Ri,mod,ctx); 346 /* This is not common..., 1 in BN_MASK2,
294 if (mont->Ni != NULL) BN_free(mont->Ni); 347 * It happens when buf[0] was == 1. So for 8 bit,
295 mont->Ni=Ri; /* Ni=(R*Ri-1)/N */ 348 * this is 1/256, 16bit, 1 in 2^16 etc.
349 */
350 BN_set_word(&Ri,BN_MASK2);
351 }
352 BN_div(&Ri,NULL,&Ri,&tmod,ctx);
353 mont->n0=Ri.d[0];
354 BN_free(&Ri);
355 /* mod->top=z; */
356 }
357#ifdef BN_RECURSION_MONT
358 else
359 {
360 mont->use_word=0;
361 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
362#if 1
363 BN_zero(R);
364 BN_set_bit(R,mont->ri);
365#else
366 BN_lshift(R,BN_value_one(),mont->ri); /* R */
367#endif
368 if ((BN_mod_inverse(&Ri,R,mod,ctx)) == NULL)
369 goto err;
370 BN_lshift(&Ri,&Ri,mont->ri); /* R*Ri */
371#if 1
372 BN_sub_word(&Ri,1);
373#else
374 BN_usub(&Ri,&Ri,BN_value_one()); /* R*Ri - 1 */
375#endif
376 BN_div(&(mont->Ni),NULL,&Ri,mod,ctx);
377 BN_free(&Ri);
378 }
296#endif 379#endif
297 380
298 /* setup RR for conversions */ 381 /* setup RR for conversions */
382#if 1
383 BN_zero(&(mont->RR));
384 BN_set_bit(&(mont->RR),mont->ri*2);
385#else
299 BN_lshift(mont->RR,BN_value_one(),mont->ri*2); 386 BN_lshift(mont->RR,BN_value_one(),mont->ri*2);
300 BN_mod(mont->RR,mont->RR,mont->N,ctx); 387#endif
388 BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx);
301 389
302 return(1); 390 return(1);
303err: 391err:
304 return(0); 392 return(0);
305 } 393 }
306 394
395BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
396 {
397 if (to == from) return(to);
398
399 BN_copy(&(to->RR),&(from->RR));
400 BN_copy(&(to->N),&(from->N));
401 BN_copy(&(to->Ni),&(from->Ni));
402 to->use_word=from->use_word;
403 to->ri=from->ri;
404 to->n0=from->n0;
405 return(to);
406 }
407
diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c
index 53945c1057..80e1dca6b7 100644
--- a/src/lib/libcrypto/bn/bn_mpi.c
+++ b/src/lib/libcrypto/bn/bn_mpi.c
@@ -60,9 +60,7 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63int BN_bn2mpi(a,d) 63int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
64BIGNUM *a;
65unsigned char *d;
66 { 64 {
67 int bits; 65 int bits;
68 int num=0; 66 int num=0;
@@ -90,10 +88,7 @@ unsigned char *d;
90 return(num+4+ext); 88 return(num+4+ext);
91 } 89 }
92 90
93BIGNUM *BN_mpi2bn(d,n,a) 91BIGNUM *BN_mpi2bn(unsigned char *d, int n, BIGNUM *a)
94unsigned char *d;
95int n;
96BIGNUM *a;
97 { 92 {
98 long len; 93 long len;
99 int neg=0; 94 int neg=0;
@@ -103,7 +98,7 @@ BIGNUM *a;
103 BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH); 98 BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
104 return(NULL); 99 return(NULL);
105 } 100 }
106 len=(d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3]; 101 len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
107 if ((len+4) != n) 102 if ((len+4) != n)
108 { 103 {
109 BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR); 104 BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index d0c04e1d4b..38c47f3d1f 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -60,150 +60,697 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63/* r must be different to a and b */ 63#ifdef BN_RECURSION
64/* int BN_mmul(r, a, b) */ 64/* r is 2*n2 words in size,
65int BN_mul(r, a, b) 65 * a and b are both n2 words in size.
66BIGNUM *r; 66 * n2 must be a power of 2.
67BIGNUM *a; 67 * We multiply and return the result.
68BIGNUM *b; 68 * t must be 2*n2 words in size
69 * We calulate
70 * a[0]*b[0]
71 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
72 * a[1]*b[1]
73 */
74void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
75 BN_ULONG *t)
69 { 76 {
70 int i; 77 int n=n2/2,c1,c2;
71 int max,al,bl; 78 unsigned int neg,zero;
72 BN_ULONG *ap,*bp,*rp; 79 BN_ULONG ln,lo,*p;
73 80
74 al=a->top; 81#ifdef BN_COUNT
75 bl=b->top; 82printf(" bn_mul_recursive %d * %d\n",n2,n2);
76 if ((al == 0) || (bl == 0)) 83#endif
84#ifdef BN_MUL_COMBA
85/* if (n2 == 4)
77 { 86 {
78 r->top=0; 87 bn_mul_comba4(r,a,b);
79 return(1); 88 return;
89 }
90 else */ if (n2 == 8)
91 {
92 bn_mul_comba8(r,a,b);
93 return;
94 }
95#endif
96 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
97 {
98 /* This should not happen */
99 bn_mul_normal(r,a,n2,b,n2);
100 return;
101 }
102 /* r=(a[0]-a[1])*(b[1]-b[0]) */
103 c1=bn_cmp_words(a,&(a[n]),n);
104 c2=bn_cmp_words(&(b[n]),b,n);
105 zero=neg=0;
106 switch (c1*3+c2)
107 {
108 case -4:
109 bn_sub_words(t, &(a[n]),a, n); /* - */
110 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
111 break;
112 case -3:
113 zero=1;
114 break;
115 case -2:
116 bn_sub_words(t, &(a[n]),a, n); /* - */
117 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
118 neg=1;
119 break;
120 case -1:
121 case 0:
122 case 1:
123 zero=1;
124 break;
125 case 2:
126 bn_sub_words(t, a, &(a[n]),n); /* + */
127 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
128 neg=1;
129 break;
130 case 3:
131 zero=1;
132 break;
133 case 4:
134 bn_sub_words(t, a, &(a[n]),n);
135 bn_sub_words(&(t[n]),&(b[n]),b, n);
136 break;
80 } 137 }
81 138
82 max=(al+bl); 139#ifdef BN_MUL_COMBA
83 if (bn_wexpand(r,max) == NULL) return(0); 140 if (n == 4)
84 r->top=max; 141 {
85 r->neg=a->neg^b->neg; 142 if (!zero)
86 ap=a->d; 143 bn_mul_comba4(&(t[n2]),t,&(t[n]));
87 bp=b->d; 144 else
88 rp=r->d; 145 memset(&(t[n2]),0,8*sizeof(BN_ULONG));
146
147 bn_mul_comba4(r,a,b);
148 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
149 }
150 else if (n == 8)
151 {
152 if (!zero)
153 bn_mul_comba8(&(t[n2]),t,&(t[n]));
154 else
155 memset(&(t[n2]),0,16*sizeof(BN_ULONG));
156
157 bn_mul_comba8(r,a,b);
158 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
159 }
160 else
161#endif
162 {
163 p= &(t[n2*2]);
164 if (!zero)
165 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
166 else
167 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
168 bn_mul_recursive(r,a,b,n,p);
169 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
170 }
89 171
90 rp[al]=bn_mul_words(rp,ap,al,*(bp++)); 172 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
91 rp++; 173 * r[10] holds (a[0]*b[0])
92 for (i=1; i<bl; i++) 174 * r[32] holds (b[1]*b[1])
175 */
176
177 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
178
179 if (neg) /* if t[32] is negative */
93 { 180 {
94 rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); 181 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
95 rp++; 182 }
183 else
184 {
185 /* Might have a carry */
186 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
96 } 187 }
97 if (r->d[max-1] == 0) r->top--;
98 return(1);
99 }
100 188
101#if 0 189 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
102#include "stack.h" 190 * r[10] holds (a[0]*b[0])
191 * r[32] holds (b[1]*b[1])
192 * c1 holds the carry bits
193 */
194 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
195 if (c1)
196 {
197 p= &(r[n+n2]);
198 lo= *p;
199 ln=(lo+c1)&BN_MASK2;
200 *p=ln;
103 201
104int limit=16; 202 /* The overflow will stop before we over write
203 * words we should not overwrite */
204 if (ln < (BN_ULONG)c1)
205 {
206 do {
207 p++;
208 lo= *p;
209 ln=(lo+1)&BN_MASK2;
210 *p=ln;
211 } while (ln == 0);
212 }
213 }
214 }
105 215
106typedef struct bn_pool_st 216/* n+tn is the word length
217 * t needs to be n*4 is size, as does r */
218void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
219 int n, BN_ULONG *t)
107 { 220 {
108 int used; 221 int i,j,n2=n*2;
109 int tos; 222 unsigned int c1;
110 STACK *sk; 223 BN_ULONG ln,lo,*p;
111 } BN_POOL;
112 224
113BIGNUM *BN_POOL_push(bp) 225#ifdef BN_COUNT
114BN_POOL *bp; 226printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
115 { 227#endif
116 BIGNUM *ret; 228 if (n < 8)
229 {
230 i=tn+n;
231 bn_mul_normal(r,a,i,b,i);
232 return;
233 }
234
235 /* r=(a[0]-a[1])*(b[1]-b[0]) */
236 bn_sub_words(t, a, &(a[n]),n); /* + */
237 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
117 238
118 if (bp->used >= bp->tos) 239/* if (n == 4)
240 {
241 bn_mul_comba4(&(t[n2]),t,&(t[n]));
242 bn_mul_comba4(r,a,b);
243 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
244 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
245 }
246 else */ if (n == 8)
119 { 247 {
120 ret=BN_new(); 248 bn_mul_comba8(&(t[n2]),t,&(t[n]));
121 sk_push(bp->sk,(char *)ret); 249 bn_mul_comba8(r,a,b);
122 bp->tos++; 250 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
123 bp->used++; 251 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
124 } 252 }
125 else 253 else
126 { 254 {
127 ret=(BIGNUM *)sk_value(bp->sk,bp->used); 255 p= &(t[n2*2]);
128 bp->used++; 256 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
257 bn_mul_recursive(r,a,b,n,p);
258 i=n/2;
259 /* If there is only a bottom half to the number,
260 * just do it */
261 j=tn-i;
262 if (j == 0)
263 {
264 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
265 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
266 }
267 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
268 {
269 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
270 j,i,p);
271 memset(&(r[n2+tn*2]),0,
272 sizeof(BN_ULONG)*(n2-tn*2));
273 }
274 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
275 {
276 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
277 if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
278 {
279 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
280 }
281 else
282 {
283 for (;;)
284 {
285 i/=2;
286 if (i < tn)
287 {
288 bn_mul_part_recursive(&(r[n2]),
289 &(a[n]),&(b[n]),
290 tn-i,i,p);
291 break;
292 }
293 else if (i == tn)
294 {
295 bn_mul_recursive(&(r[n2]),
296 &(a[n]),&(b[n]),
297 i,p);
298 break;
299 }
300 }
301 }
302 }
303 }
304
305 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
306 * r[10] holds (a[0]*b[0])
307 * r[32] holds (b[1]*b[1])
308 */
309
310 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
311 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
312
313 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
314 * r[10] holds (a[0]*b[0])
315 * r[32] holds (b[1]*b[1])
316 * c1 holds the carry bits
317 */
318 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
319 if (c1)
320 {
321 p= &(r[n+n2]);
322 lo= *p;
323 ln=(lo+c1)&BN_MASK2;
324 *p=ln;
325
326 /* The overflow will stop before we over write
327 * words we should not overwrite */
328 if (ln < c1)
329 {
330 do {
331 p++;
332 lo= *p;
333 ln=(lo+1)&BN_MASK2;
334 *p=ln;
335 } while (ln == 0);
336 }
129 } 337 }
130 return(ret);
131 } 338 }
132 339
133void BN_POOL_pop(bp,num) 340/* a and b must be the same size, which is n2.
134BN_POOL *bp; 341 * r needs to be n2 words and t needs to be n2*2
135int num; 342 */
343void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
344 BN_ULONG *t)
136 { 345 {
137 bp->used-=num; 346 int n=n2/2;
347
348#ifdef BN_COUNT
349printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
350#endif
351
352 bn_mul_recursive(r,a,b,n,&(t[0]));
353 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
354 {
355 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
356 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
357 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
358 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
359 }
360 else
361 {
362 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
363 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
364 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
365 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
366 }
138 } 367 }
139 368
140int BN_mul(r,a,b) 369/* a and b must be the same size, which is n2.
141BIGNUM *r,*a,*b; 370 * r needs to be n2 words and t needs to be n2*2
371 * l is the low words of the output.
372 * t needs to be n2*3
373 */
374void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
375 BN_ULONG *t)
142 { 376 {
143 static BN_POOL bp; 377 int i,n;
144 static init=1; 378 int c1,c2;
379 int neg,oneg,zero;
380 BN_ULONG ll,lc,*lp,*mp;
381
382#ifdef BN_COUNT
383printf(" bn_mul_high %d * %d\n",n2,n2);
384#endif
385 n=n2/2;
386
387 /* Calculate (al-ah)*(bh-bl) */
388 neg=zero=0;
389 c1=bn_cmp_words(&(a[0]),&(a[n]),n);
390 c2=bn_cmp_words(&(b[n]),&(b[0]),n);
391 switch (c1*3+c2)
392 {
393 case -4:
394 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
395 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
396 break;
397 case -3:
398 zero=1;
399 break;
400 case -2:
401 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
402 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
403 neg=1;
404 break;
405 case -1:
406 case 0:
407 case 1:
408 zero=1;
409 break;
410 case 2:
411 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
412 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
413 neg=1;
414 break;
415 case 3:
416 zero=1;
417 break;
418 case 4:
419 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
420 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
421 break;
422 }
423
424 oneg=neg;
425 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
426 /* r[10] = (a[1]*b[1]) */
427#ifdef BN_MUL_COMBA
428 if (n == 8)
429 {
430 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
431 bn_mul_comba8(r,&(a[n]),&(b[n]));
432 }
433 else
434#endif
435 {
436 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
437 bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
438 }
439
440 /* s0 == low(al*bl)
441 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
442 * We know s0 and s1 so the only unknown is high(al*bl)
443 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
444 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
445 */
446 if (l != NULL)
447 {
448 lp= &(t[n2+n]);
449 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
450 }
451 else
452 {
453 c1=0;
454 lp= &(r[0]);
455 }
456
457 if (neg)
458 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
459 else
460 {
461 bn_add_words(&(t[n2]),lp,&(t[0]),n);
462 neg=0;
463 }
464
465 if (l != NULL)
466 {
467 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
468 }
469 else
470 {
471 lp= &(t[n2+n]);
472 mp= &(t[n2]);
473 for (i=0; i<n; i++)
474 lp[i]=((~mp[i])+1)&BN_MASK2;
475 }
476
477 /* s[0] = low(al*bl)
478 * t[3] = high(al*bl)
479 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
480 * r[10] = (a[1]*b[1])
481 */
482 /* R[10] = al*bl
483 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
484 * R[32] = ah*bh
485 */
486 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
487 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
488 * R[3]=r[1]+(carry/borrow)
489 */
490 if (l != NULL)
491 {
492 lp= &(t[n2]);
493 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
494 }
495 else
496 {
497 lp= &(t[n2+n]);
498 c1=0;
499 }
500 c1+=(int)(bn_add_words(&(t[n2]),lp, &(r[0]),n));
501 if (oneg)
502 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
503 else
504 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
145 505
146 if (init) 506 c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
507 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
508 if (oneg)
509 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
510 else
511 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
512
513 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
147 { 514 {
148 bp.used=0; 515 i=0;
149 bp.tos=0; 516 if (c1 > 0)
150 bp.sk=sk_new_null(); 517 {
151 init=0; 518 lc=c1;
519 do {
520 ll=(r[i]+lc)&BN_MASK2;
521 r[i++]=ll;
522 lc=(lc > ll);
523 } while (lc);
524 }
525 else
526 {
527 lc= -c1;
528 do {
529 ll=r[i];
530 r[i++]=(ll-lc)&BN_MASK2;
531 lc=(lc > ll);
532 } while (lc);
533 }
534 }
535 if (c2 != 0) /* Add starting at r[1] */
536 {
537 i=n;
538 if (c2 > 0)
539 {
540 lc=c2;
541 do {
542 ll=(r[i]+lc)&BN_MASK2;
543 r[i++]=ll;
544 lc=(lc > ll);
545 } while (lc);
546 }
547 else
548 {
549 lc= -c2;
550 do {
551 ll=r[i];
552 r[i++]=(ll-lc)&BN_MASK2;
553 lc=(lc > ll);
554 } while (lc);
555 }
152 } 556 }
153 return(BN_mm(r,a,b,&bp));
154 } 557 }
558#endif
155 559
156/* r must be different to a and b */ 560int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
157int BN_mm(m, A, B, bp)
158BIGNUM *m,*A,*B;
159BN_POOL *bp;
160 { 561 {
161 int i,num; 562 int top,al,bl;
162 int an,bn; 563 BIGNUM *rr;
163 BIGNUM *a,*b,*c,*d,*ac,*bd; 564#ifdef BN_RECURSION
565 BIGNUM *t;
566 int i,j,k;
567#endif
568
569#ifdef BN_COUNT
570printf("BN_mul %d * %d\n",a->top,b->top);
571#endif
572
573 bn_check_top(a);
574 bn_check_top(b);
575 bn_check_top(r);
576
577 al=a->top;
578 bl=b->top;
579 r->neg=a->neg^b->neg;
580
581 if ((al == 0) || (bl == 0))
582 {
583 BN_zero(r);
584 return(1);
585 }
586 top=al+bl;
164 587
165 an=A->top; 588 if ((r == a) || (r == b))
166 bn=B->top; 589 rr= &(ctx->bn[ctx->tos+1]);
167 if ((an <= limit) || (bn <= limit)) 590 else
591 rr=r;
592
593#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
594 if (al == bl)
168 { 595 {
169 return(BN_mmul(m,A,B)); 596# ifdef BN_MUL_COMBA
597/* if (al == 4)
598 {
599 if (bn_wexpand(rr,8) == NULL) return(0);
600 rr->top=8;
601 bn_mul_comba4(rr->d,a->d,b->d);
602 goto end;
603 }
604 else */ if (al == 8)
605 {
606 if (bn_wexpand(rr,16) == NULL) return(0);
607 rr->top=16;
608 bn_mul_comba8(rr->d,a->d,b->d);
609 goto end;
610 }
611 else
612# endif
613#ifdef BN_RECURSION
614 if (al < BN_MULL_SIZE_NORMAL)
615#endif
616 {
617 if (bn_wexpand(rr,top) == NULL) return(0);
618 rr->top=top;
619 bn_mul_normal(rr->d,a->d,al,b->d,bl);
620 goto end;
621 }
622# ifdef BN_RECURSION
623 goto symetric;
624# endif
170 } 625 }
626#endif
627#ifdef BN_RECURSION
628 else if ((al < BN_MULL_SIZE_NORMAL) || (bl < BN_MULL_SIZE_NORMAL))
629 {
630 if (bn_wexpand(rr,top) == NULL) return(0);
631 rr->top=top;
632 bn_mul_normal(rr->d,a->d,al,b->d,bl);
633 goto end;
634 }
635 else
636 {
637 i=(al-bl);
638 if ((i == 1) && !BN_get_flags(b,BN_FLG_STATIC_DATA))
639 {
640 bn_wexpand(b,al);
641 b->d[bl]=0;
642 bl++;
643 goto symetric;
644 }
645 else if ((i == -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA))
646 {
647 bn_wexpand(a,bl);
648 a->d[al]=0;
649 al++;
650 goto symetric;
651 }
652 }
653#endif
171 654
172 a=BN_POOL_push(bp); 655 /* asymetric and >= 4 */
173 b=BN_POOL_push(bp); 656 if (bn_wexpand(rr,top) == NULL) return(0);
174 c=BN_POOL_push(bp); 657 rr->top=top;
175 d=BN_POOL_push(bp); 658 bn_mul_normal(rr->d,a->d,al,b->d,bl);
176 ac=BN_POOL_push(bp);
177 bd=BN_POOL_push(bp);
178 659
179 num=(an <= bn)?an:bn; 660#ifdef BN_RECURSION
180 num=1<<(BN_num_bits_word(num-1)-1); 661 if (0)
662 {
663symetric:
664 /* symetric and > 4 */
665 /* 16 or larger */
666 j=BN_num_bits_word((BN_ULONG)al);
667 j=1<<(j-1);
668 k=j+j;
669 t= &(ctx->bn[ctx->tos]);
670 if (al == j) /* exact multiple */
671 {
672 bn_wexpand(t,k*2);
673 bn_wexpand(rr,k*2);
674 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
675 }
676 else
677 {
678 bn_wexpand(a,k);
679 bn_wexpand(b,k);
680 bn_wexpand(t,k*4);
681 bn_wexpand(rr,k*4);
682 for (i=a->top; i<k; i++)
683 a->d[i]=0;
684 for (i=b->top; i<k; i++)
685 b->d[i]=0;
686 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
687 }
688 rr->top=top;
689 }
690#endif
691#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
692end:
693#endif
694 bn_fix_top(rr);
695 if (r != rr) BN_copy(r,rr);
696 return(1);
697 }
181 698
182 /* Are going to now chop things into 'num' word chunks. */ 699void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
183 num*=BN_BITS2; 700 {
701 BN_ULONG *rr;
184 702
185 BN_copy(a,A); 703#ifdef BN_COUNT
186 BN_mask_bits(a,num); 704printf(" bn_mul_normal %d * %d\n",na,nb);
187 BN_rshift(b,A,num); 705#endif
188 706
189 BN_copy(c,B); 707 if (na < nb)
190 BN_mask_bits(c,num); 708 {
191 BN_rshift(d,B,num); 709 int itmp;
710 BN_ULONG *ltmp;
192 711
193 BN_sub(ac ,b,a); 712 itmp=na; na=nb; nb=itmp;
194 BN_sub(bd,c,d); 713 ltmp=a; a=b; b=ltmp;
195 BN_mm(m,ac,bd,bp);
196 BN_mm(ac,a,c,bp);
197 BN_mm(bd,b,d,bp);
198 714
199 BN_add(m,m,ac); 715 }
200 BN_add(m,m,bd); 716 rr= &(r[na]);
201 BN_lshift(m,m,num); 717 rr[0]=bn_mul_words(r,a,na,b[0]);
202 BN_lshift(bd,bd,num*2);
203 718
204 BN_add(m,m,ac); 719 for (;;)
205 BN_add(m,m,bd); 720 {
206 BN_POOL_pop(bp,6); 721 if (--nb <= 0) return;
207 return(1); 722 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
723 if (--nb <= 0) return;
724 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
725 if (--nb <= 0) return;
726 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
727 if (--nb <= 0) return;
728 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
729 rr+=4;
730 r+=4;
731 b+=4;
732 }
208 } 733 }
734
735void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
736 {
737#ifdef BN_COUNT
738printf(" bn_mul_low_normal %d * %d\n",n,n);
209#endif 739#endif
740 bn_mul_words(r,a,n,b[0]);
741
742 for (;;)
743 {
744 if (--n <= 0) return;
745 bn_mul_add_words(&(r[1]),a,n,b[1]);
746 if (--n <= 0) return;
747 bn_mul_add_words(&(r[2]),a,n,b[2]);
748 if (--n <= 0) return;
749 bn_mul_add_words(&(r[3]),a,n,b[3]);
750 if (--n <= 0) return;
751 bn_mul_add_words(&(r[4]),a,n,b[4]);
752 r+=4;
753 b+=4;
754 }
755 }
756
diff --git a/src/lib/libcrypto/bn/bn_mulw.c b/src/lib/libcrypto/bn/bn_mulw.c
deleted file mode 100644
index abfc7e4d6c..0000000000
--- a/src/lib/libcrypto/bn/bn_mulw.c
+++ /dev/null
@@ -1,366 +0,0 @@
1/* crypto/bn/bn_mulw.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63#ifdef BN_LLONG
64
65BN_ULONG bn_mul_add_words(rp,ap,num,w)
66BN_ULONG *rp,*ap;
67int num;
68BN_ULONG w;
69 {
70 BN_ULONG c1=0;
71
72 for (;;)
73 {
74 mul_add(rp[0],ap[0],w,c1);
75 if (--num == 0) break;
76 mul_add(rp[1],ap[1],w,c1);
77 if (--num == 0) break;
78 mul_add(rp[2],ap[2],w,c1);
79 if (--num == 0) break;
80 mul_add(rp[3],ap[3],w,c1);
81 if (--num == 0) break;
82 ap+=4;
83 rp+=4;
84 }
85
86 return(c1);
87 }
88
89BN_ULONG bn_mul_words(rp,ap,num,w)
90BN_ULONG *rp,*ap;
91int num;
92BN_ULONG w;
93 {
94 BN_ULONG c1=0;
95
96 for (;;)
97 {
98 mul(rp[0],ap[0],w,c1);
99 if (--num == 0) break;
100 mul(rp[1],ap[1],w,c1);
101 if (--num == 0) break;
102 mul(rp[2],ap[2],w,c1);
103 if (--num == 0) break;
104 mul(rp[3],ap[3],w,c1);
105 if (--num == 0) break;
106 ap+=4;
107 rp+=4;
108 }
109 return(c1);
110 }
111
112void bn_sqr_words(r,a,n)
113BN_ULONG *r,*a;
114int n;
115 {
116 for (;;)
117 {
118 BN_ULLONG t;
119
120 t=(BN_ULLONG)(a[0])*(a[0]);
121 r[0]=Lw(t); r[1]=Hw(t);
122 if (--n == 0) break;
123
124 t=(BN_ULLONG)(a[1])*(a[1]);
125 r[2]=Lw(t); r[3]=Hw(t);
126 if (--n == 0) break;
127
128 t=(BN_ULLONG)(a[2])*(a[2]);
129 r[4]=Lw(t); r[5]=Hw(t);
130 if (--n == 0) break;
131
132 t=(BN_ULLONG)(a[3])*(a[3]);
133 r[6]=Lw(t); r[7]=Hw(t);
134 if (--n == 0) break;
135
136 a+=4;
137 r+=8;
138 }
139 }
140
141BN_ULONG bn_add_words(r,a,b,n)
142BN_ULONG *r,*a,*b;
143int n;
144 {
145 BN_ULLONG ll;
146
147 ll=0;
148 for (;;)
149 {
150 ll+= (BN_ULLONG)a[0]+b[0];
151 r[0]=(BN_ULONG)ll&BN_MASK2;
152 ll>>=BN_BITS2;
153 if (--n <= 0) break;
154
155 ll+= (BN_ULLONG)a[1]+b[1];
156 r[1]=(BN_ULONG)ll&BN_MASK2;
157 ll>>=BN_BITS2;
158 if (--n <= 0) break;
159
160 ll+= (BN_ULLONG)a[2]+b[2];
161 r[2]=(BN_ULONG)ll&BN_MASK2;
162 ll>>=BN_BITS2;
163 if (--n <= 0) break;
164
165 ll+= (BN_ULLONG)a[3]+b[3];
166 r[3]=(BN_ULONG)ll&BN_MASK2;
167 ll>>=BN_BITS2;
168 if (--n <= 0) break;
169
170 a+=4;
171 b+=4;
172 r+=4;
173 }
174 return(ll&BN_MASK2);
175 }
176
177#else
178
179BN_ULONG bn_mul_add_words(rp,ap,num,w)
180BN_ULONG *rp,*ap;
181int num;
182BN_ULONG w;
183 {
184 BN_ULONG c=0;
185 BN_ULONG bl,bh;
186
187 bl=LBITS(w);
188 bh=HBITS(w);
189
190 for (;;)
191 {
192 mul_add(rp[0],ap[0],bl,bh,c);
193 if (--num == 0) break;
194 mul_add(rp[1],ap[1],bl,bh,c);
195 if (--num == 0) break;
196 mul_add(rp[2],ap[2],bl,bh,c);
197 if (--num == 0) break;
198 mul_add(rp[3],ap[3],bl,bh,c);
199 if (--num == 0) break;
200 ap+=4;
201 rp+=4;
202 }
203 return(c);
204 }
205
206BN_ULONG bn_mul_words(rp,ap,num,w)
207BN_ULONG *rp,*ap;
208int num;
209BN_ULONG w;
210 {
211 BN_ULONG carry=0;
212 BN_ULONG bl,bh;
213
214 bl=LBITS(w);
215 bh=HBITS(w);
216
217 for (;;)
218 {
219 mul(rp[0],ap[0],bl,bh,carry);
220 if (--num == 0) break;
221 mul(rp[1],ap[1],bl,bh,carry);
222 if (--num == 0) break;
223 mul(rp[2],ap[2],bl,bh,carry);
224 if (--num == 0) break;
225 mul(rp[3],ap[3],bl,bh,carry);
226 if (--num == 0) break;
227 ap+=4;
228 rp+=4;
229 }
230 return(carry);
231 }
232
233void bn_sqr_words(r,a,n)
234BN_ULONG *r,*a;
235int n;
236 {
237 for (;;)
238 {
239 sqr64(r[0],r[1],a[0]);
240 if (--n == 0) break;
241
242 sqr64(r[2],r[3],a[1]);
243 if (--n == 0) break;
244
245 sqr64(r[4],r[5],a[2]);
246 if (--n == 0) break;
247
248 sqr64(r[6],r[7],a[3]);
249 if (--n == 0) break;
250
251 a+=4;
252 r+=8;
253 }
254 }
255
256BN_ULONG bn_add_words(r,a,b,n)
257BN_ULONG *r,*a,*b;
258int n;
259 {
260 BN_ULONG t1,t2;
261 int carry,i;
262
263 carry=0;
264 for (i=0; i<n; i++)
265 {
266 t1= *(a++);
267 t2= *(b++);
268 if (carry)
269 {
270 carry=(t2 >= ((~t1)&BN_MASK2));
271 t2=(t1+t2+1)&BN_MASK2;
272 }
273 else
274 {
275 t2=(t1+t2)&BN_MASK2;
276 carry=(t2<t1);
277 }
278 *(r++)=t2;
279 }
280 return(carry);
281 }
282
283#endif
284
285#if defined(BN_LLONG) && defined(BN_DIV2W)
286
287BN_ULONG bn_div64(h,l,d)
288BN_ULONG h,l,d;
289 {
290 return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
291 }
292
293#else
294
295/* Divide h-l by d and return the result. */
296/* I need to test this some more :-( */
297BN_ULONG bn_div64(h,l,d)
298BN_ULONG h,l,d;
299 {
300 BN_ULONG dh,dl,q,ret=0,th,tl,t;
301 int i,count=2;
302
303 if (d == 0) return(BN_MASK2);
304
305 i=BN_num_bits_word(d);
306 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
307 {
308#if !defined(NO_STDIO) && !defined(WIN16)
309 fprintf(stderr,"Division would overflow (%d)\n",i);
310#endif
311 abort();
312 }
313 i=BN_BITS2-i;
314 if (h >= d) h-=d;
315
316 if (i)
317 {
318 d<<=i;
319 h=(h<<i)|(l>>(BN_BITS2-i));
320 l<<=i;
321 }
322 dh=(d&BN_MASK2h)>>BN_BITS4;
323 dl=(d&BN_MASK2l);
324 for (;;)
325 {
326 if ((h>>BN_BITS4) == dh)
327 q=BN_MASK2l;
328 else
329 q=h/dh;
330
331 for (;;)
332 {
333 t=(h-q*dh);
334 if ((t&BN_MASK2h) ||
335 ((dl*q) <= (
336 (t<<BN_BITS4)+
337 ((l&BN_MASK2h)>>BN_BITS4))))
338 break;
339 q--;
340 }
341 th=q*dh;
342 tl=q*dl;
343 t=(tl>>BN_BITS4);
344 tl=(tl<<BN_BITS4)&BN_MASK2h;
345 th+=t;
346
347 if (l < tl) th++;
348 l-=tl;
349 if (h < th)
350 {
351 h+=d;
352 q--;
353 }
354 h-=th;
355
356 if (--count == 0) break;
357
358 ret=q<<BN_BITS4;
359 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
360 l=(l&BN_MASK2l)<<BN_BITS4;
361 }
362 ret|=q;
363 return(ret);
364 }
365#endif
366
diff --git a/src/lib/libcrypto/bn/bn_opts.c b/src/lib/libcrypto/bn/bn_opts.c
new file mode 100644
index 0000000000..381be529b2
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_opts.c
@@ -0,0 +1,324 @@
1/* crypto/bn/expspeed.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/* most of this code has been pilfered from my libdes speed.c program */
60
61#include <stdio.h>
62#include <stdlib.h>
63#include <signal.h>
64#include <string.h>
65#include <openssl/crypto.h>
66#include <openssl/tmdiff.h>
67#include <openssl/bn.h>
68#include <openssl/err.h>
69
70#define DEFAULT_SIZE 512
71#define DEFAULT_TIME 3
72
73int verbose=1;
74
75typedef struct parms_st
76 {
77 char *name;
78 void (*func)();
79 BIGNUM r;
80 BIGNUM a;
81 BIGNUM b;
82 BIGNUM c;
83 BIGNUM low;
84 BN_CTX *ctx;
85 BN_MONT_CTX *mont;
86 int w;
87 } PARMS;
88
89void do_mul_exp(int num,PARMS *p);
90void do_mul(int num,PARMS *p);
91void do_sqr(int num,PARMS *p);
92void do_mul_low(int num,PARMS *p);
93void do_mul_high(int num,PARMS *p);
94void do_from_montgomery(int num,PARMS *p);
95int time_it(int sec, PARMS *p);
96void do_it(int sec, PARMS *p);
97
98#define P_EXP 1
99#define P_MUL 2
100#define P_SQR 3
101#define P_MULL 4
102#define P_MULH 5
103#define P_MRED 6
104
105int main(int argc, char **argv)
106 {
107 PARMS p;
108 BN_MONT_CTX *mont;
109 int size=0,num;
110 char *name;
111 int type=P_EXP;
112
113 mont=BN_MONT_CTX_new();
114 p.mont=NULL;
115 p.ctx=BN_CTX_new();
116 BN_init(&p.r);
117 BN_init(&p.a);
118 BN_init(&p.b);
119 BN_init(&p.c);
120 BN_init(&p.low);
121 p.w=0;
122
123 for (;;)
124 {
125 if (argc > 1)
126 {
127 if (argv[1][0] == '-')
128 {
129 switch(argv[1][1])
130 {
131 case 'e': type=P_EXP; break;
132 case 'm': type=P_MUL; break;
133 case 's': type=P_SQR; break;
134 case 'l': type=P_MULL; break;
135 case 'h': type=P_MULH; break;
136 case 'r': type=P_MRED; break;
137 default:
138 fprintf(stderr,"options: -[emslhr]\n");
139 exit(1);
140 }
141 }
142 else
143 {
144 size=atoi(argv[1]);
145 }
146 argc--;
147 argv++;
148 }
149 else
150 break;
151 }
152 if (size == 0)
153 size=DEFAULT_SIZE;
154
155 printf("bit size:%5d\n",size);
156
157 BN_rand(&p.a,size,1,0);
158 BN_rand(&p.b,size,1,0);
159 BN_rand(&p.c,size,1,1);
160 BN_mod(&p.a,&p.a,&p.c,p.ctx);
161 BN_mod(&p.b,&p.b,&p.c,p.ctx);
162 p.w=(p.a.top+1)/2;
163
164 BN_mul(&p.low,&p.a,&p.b,p.ctx);
165 p.low.top=p.a.top;
166
167 switch(type)
168 {
169 case P_EXP:
170 p.name="r=a^b%c";
171 p.func=do_mul_exp;
172 p.mont=mont;
173 break;
174 case P_MUL:
175 p.name="r=a*b";
176 p.func=do_mul;
177 break;
178 case P_SQR:
179 p.name="r=a*a";
180 p.func=do_sqr;
181 break;
182 case P_MULL:
183 p.name="r=low(a*b)";
184 p.func=do_mul_low;
185 break;
186 case P_MULH:
187 p.name="r=high(a*b)";
188 p.func=do_mul_high;
189 break;
190 case P_MRED:
191 p.name="r=montgomery_reduction(a)";
192 p.func=do_from_montgomery;
193 p.mont=mont;
194 break;
195 default:
196 fprintf(stderr,"options: -[emslhr]\n");
197 exit(1);
198 }
199
200 num=time_it(DEFAULT_TIME,&p);
201 do_it(num,&p);
202 }
203
204void do_it(int num, PARMS *p)
205 {
206 char *start,*end;
207 int i,j,number;
208 double d;
209
210 start=ms_time_new();
211 end=ms_time_new();
212
213 number=BN_num_bits_word((BN_ULONG)BN_num_bits(&(p->c)))-
214 BN_num_bits_word(BN_BITS2)+2;
215 for (i=number-1; i >=0; i--)
216 {
217 if (i == 1) continue;
218 BN_set_params(i,i,i,1);
219 if (p->mont != NULL)
220 BN_MONT_CTX_set(p->mont,&(p->c),p->ctx);
221
222 printf("Timing %5d (%2d bit) %2d %2d %2d %2d :",
223 (1<<i)*BN_BITS2,i,
224 BN_get_params(0),
225 BN_get_params(1),
226 BN_get_params(2),
227 BN_get_params(3));
228 fflush(stdout);
229
230 ms_time_get(start);
231 p->func(num,p);
232 ms_time_get(end);
233 d=ms_time_diff(start,end);
234 printf("%6.6f sec, or %d in %.4f seconds\n",
235 (double)d/num,num,d);
236 }
237 }
238
239int time_it(int sec, PARMS *p)
240 {
241 char *start,*end;
242 int i,j;
243 double d;
244
245 if (p->mont != NULL)
246 BN_MONT_CTX_set(p->mont,&(p->c),p->ctx);
247
248 start=ms_time_new();
249 end=ms_time_new();
250
251 i=1;
252 for (;;)
253 {
254 if (verbose)
255 printf("timing %s for %d interations\n",p->name,i);
256
257 ms_time_get(start);
258 p->func(i,p);
259 ms_time_get(end);
260 d=ms_time_diff(start,end);
261
262 if (d < 0.01) i*=100;
263 else if (d < 0.1 ) i*=10;
264 else if (d > (double)sec) break;
265 else
266 {
267 i=(int)(1.0*i*sec/d);
268 break;
269 }
270 }
271 if (verbose)
272 printf("using %d interations\n",i);
273 return(i);
274 }
275
276void do_mul_exp(int num, PARMS *p)
277 {
278 int i;
279
280 for (i=0; i<num; i++)
281 BN_mod_exp_mont(&(p->r),&(p->a),&(p->b),&(p->c),
282 p->ctx,p->mont);
283 }
284
285void do_mul(int num, PARMS *p)
286 {
287 int i;
288
289 for (i=0; i<num; i++)
290 BN_mul(&(p->r),&(p->a),&(p->b),p->ctx);
291 }
292
293void do_sqr(int num, PARMS *p)
294 {
295 int i;
296
297 for (i=0; i<num; i++)
298 BN_sqr(&(p->r),&(p->a),p->ctx);
299 }
300
301void do_mul_low(int num, PARMS *p)
302 {
303 int i;
304
305 for (i=0; i<num; i++)
306 BN_mul_low(&(p->r),&(p->a),&(p->b),p->w,p->ctx);
307 }
308
309void do_mul_high(int num, PARMS *p)
310 {
311 int i;
312
313 for (i=0; i<num; i++)
314 BN_mul_low(&(p->r),&(p->a),&(p->b),&(p->low),p->w,p->ctx);
315 }
316
317void do_from_montgomery(int num, PARMS *p)
318 {
319 int i;
320
321 for (i=0; i<num; i++)
322 BN_from_montgomery(&(p->r),&(p->a),p->mont,p->ctx);
323 }
324
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
index 0c85f70b59..6fa0f9be1e 100644
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ b/src/lib/libcrypto/bn/bn_prime.c
@@ -60,7 +60,7 @@
60#include <time.h> 60#include <time.h>
61#include "cryptlib.h" 61#include "cryptlib.h"
62#include "bn_lcl.h" 62#include "bn_lcl.h"
63#include "rand.h" 63#include <openssl/rand.h>
64 64
65/* The quick seive algorithm approach to weeding out primes is 65/* The quick seive algorithm approach to weeding out primes is
66 * Philip Zimmermann's, as implemented in PGP. I have had a read of 66 * Philip Zimmermann's, as implemented in PGP. I have had a read of
@@ -68,7 +68,6 @@
68 */ 68 */
69#include "bn_prime.h" 69#include "bn_prime.h"
70 70
71#ifndef NOPROTO
72static int witness(BIGNUM *a, BIGNUM *n, BN_CTX *ctx,BN_CTX *ctx2, 71static int witness(BIGNUM *a, BIGNUM *n, BN_CTX *ctx,BN_CTX *ctx2,
73 BN_MONT_CTX *mont); 72 BN_MONT_CTX *mont);
74static int probable_prime(BIGNUM *rnd, int bits); 73static int probable_prime(BIGNUM *rnd, int bits);
@@ -76,32 +75,23 @@ static int probable_prime_dh(BIGNUM *rnd, int bits,
76 BIGNUM *add, BIGNUM *rem, BN_CTX *ctx); 75 BIGNUM *add, BIGNUM *rem, BN_CTX *ctx);
77static int probable_prime_dh_strong(BIGNUM *rnd, int bits, 76static int probable_prime_dh_strong(BIGNUM *rnd, int bits,
78 BIGNUM *add, BIGNUM *rem, BN_CTX *ctx); 77 BIGNUM *add, BIGNUM *rem, BN_CTX *ctx);
79#else 78BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int strong, BIGNUM *add,
80static int witness(); 79 BIGNUM *rem, void (*callback)(int,int,void *), void *cb_arg)
81static int probable_prime();
82static int probable_prime_dh();
83static int probable_prime_dh_strong();
84#endif
85
86BIGNUM *BN_generate_prime(bits,strong,add,rem,callback,cb_arg)
87int bits;
88int strong;
89BIGNUM *add;
90BIGNUM *rem;
91void (*callback)(P_I_I_P);
92char *cb_arg;
93 { 80 {
94 BIGNUM *rnd=NULL; 81 BIGNUM *rnd=NULL;
95 BIGNUM *ret=NULL; 82 BIGNUM t;
96 BIGNUM *t=NULL;
97 int i,j,c1=0; 83 int i,j,c1=0;
98 BN_CTX *ctx; 84 BN_CTX *ctx;
99 85
100 ctx=BN_CTX_new(); 86 ctx=BN_CTX_new();
101 if (ctx == NULL) goto err; 87 if (ctx == NULL) goto err;
102 if ((rnd=BN_new()) == NULL) goto err; 88 if (ret == NULL)
103 if (strong) 89 {
104 if ((t=BN_new()) == NULL) goto err; 90 if ((rnd=BN_new()) == NULL) goto err;
91 }
92 else
93 rnd=ret;
94 BN_init(&t);
105loop: 95loop:
106 /* make a random number and set the top and bottom bits */ 96 /* make a random number and set the top and bottom bits */
107 if (add == NULL) 97 if (add == NULL)
@@ -136,7 +126,7 @@ loop:
136 * check that (p-1)/2 is prime. 126 * check that (p-1)/2 is prime.
137 * Since a prime is odd, We just 127 * Since a prime is odd, We just
138 * need to divide by 2 */ 128 * need to divide by 2 */
139 if (!BN_rshift1(t,rnd)) goto err; 129 if (!BN_rshift1(&t,rnd)) goto err;
140 130
141 for (i=0; i<BN_prime_checks; i++) 131 for (i=0; i<BN_prime_checks; i++)
142 { 132 {
@@ -144,7 +134,7 @@ loop:
144 if (j == -1) goto err; 134 if (j == -1) goto err;
145 if (j == 0) goto loop; 135 if (j == 0) goto loop;
146 136
147 j=BN_is_prime(t,1,callback,ctx,cb_arg); 137 j=BN_is_prime(&t,1,callback,ctx,cb_arg);
148 if (j == -1) goto err; 138 if (j == -1) goto err;
149 if (j == 0) goto loop; 139 if (j == 0) goto loop;
150 140
@@ -156,17 +146,13 @@ loop:
156 ret=rnd; 146 ret=rnd;
157err: 147err:
158 if ((ret == NULL) && (rnd != NULL)) BN_free(rnd); 148 if ((ret == NULL) && (rnd != NULL)) BN_free(rnd);
159 if (t != NULL) BN_free(t); 149 BN_free(&t);
160 if (ctx != NULL) BN_CTX_free(ctx); 150 if (ctx != NULL) BN_CTX_free(ctx);
161 return(ret); 151 return(ret);
162 } 152 }
163 153
164int BN_is_prime(a,checks,callback,ctx_passed,cb_arg) 154int BN_is_prime(BIGNUM *a, int checks, void (*callback)(int,int,void *),
165BIGNUM *a; 155 BN_CTX *ctx_passed, void *cb_arg)
166int checks;
167void (*callback)(P_I_I_P);
168BN_CTX *ctx_passed;
169char *cb_arg;
170 { 156 {
171 int i,j,c2=0,ret= -1; 157 int i,j,c2=0,ret= -1;
172 BIGNUM *check; 158 BIGNUM *check;
@@ -183,7 +169,7 @@ char *cb_arg;
183 if ((ctx2=BN_CTX_new()) == NULL) goto err; 169 if ((ctx2=BN_CTX_new()) == NULL) goto err;
184 if ((mont=BN_MONT_CTX_new()) == NULL) goto err; 170 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
185 171
186 check=ctx->bn[ctx->tos++]; 172 check= &(ctx->bn[ctx->tos++]);
187 173
188 /* Setup the montgomery structure */ 174 /* Setup the montgomery structure */
189 if (!BN_MONT_CTX_set(mont,a,ctx2)) goto err; 175 if (!BN_MONT_CTX_set(mont,a,ctx2)) goto err;
@@ -214,24 +200,21 @@ err:
214 200
215#define RECP_MUL_MOD 201#define RECP_MUL_MOD
216 202
217static int witness(a,n,ctx,ctx2,mont) 203static int witness(BIGNUM *a, BIGNUM *n, BN_CTX *ctx, BN_CTX *ctx2,
218BIGNUM *a; 204 BN_MONT_CTX *mont)
219BIGNUM *n;
220BN_CTX *ctx,*ctx2;
221BN_MONT_CTX *mont;
222 { 205 {
223 int k,i,ret= -1,good; 206 int k,i,ret= -1,good;
224 BIGNUM *d,*dd,*tmp,*d1,*d2,*n1; 207 BIGNUM *d,*dd,*tmp,*d1,*d2,*n1;
225 BIGNUM *mont_one,*mont_n1,*mont_a; 208 BIGNUM *mont_one,*mont_n1,*mont_a;
226 209
227 d1=ctx->bn[ctx->tos]; 210 d1= &(ctx->bn[ctx->tos]);
228 d2=ctx->bn[ctx->tos+1]; 211 d2= &(ctx->bn[ctx->tos+1]);
229 n1=ctx->bn[ctx->tos+2]; 212 n1= &(ctx->bn[ctx->tos+2]);
230 ctx->tos+=3; 213 ctx->tos+=3;
231 214
232 mont_one=ctx2->bn[ctx2->tos]; 215 mont_one= &(ctx2->bn[ctx2->tos]);
233 mont_n1=ctx2->bn[ctx2->tos+1]; 216 mont_n1= &(ctx2->bn[ctx2->tos+1]);
234 mont_a=ctx2->bn[ctx2->tos+2]; 217 mont_a= &(ctx2->bn[ctx2->tos+2]);
235 ctx2->tos+=3; 218 ctx2->tos+=3;
236 219
237 d=d1; 220 d=d1;
@@ -254,7 +237,7 @@ BN_MONT_CTX *mont;
254 good=0; 237 good=0;
255 238
256 BN_mod_mul_montgomery(dd,d,d,mont,ctx2); 239 BN_mod_mul_montgomery(dd,d,d,mont,ctx2);
257 240
258 if (good && (BN_cmp(dd,mont_one) == 0)) 241 if (good && (BN_cmp(dd,mont_one) == 0))
259 { 242 {
260 ret=1; 243 ret=1;
@@ -281,14 +264,13 @@ err:
281 return(ret); 264 return(ret);
282 } 265 }
283 266
284static int probable_prime(rnd, bits) 267static int probable_prime(BIGNUM *rnd, int bits)
285BIGNUM *rnd;
286int bits;
287 { 268 {
288 int i; 269 int i;
289 MS_STATIC BN_ULONG mods[NUMPRIMES]; 270 MS_STATIC BN_ULONG mods[NUMPRIMES];
290 BN_ULONG delta; 271 BN_ULONG delta,d;
291 272
273again:
292 if (!BN_rand(rnd,bits,1,1)) return(0); 274 if (!BN_rand(rnd,bits,1,1)) return(0);
293 /* we now have a random number 'rand' to test. */ 275 /* we now have a random number 'rand' to test. */
294 for (i=1; i<NUMPRIMES; i++) 276 for (i=1; i<NUMPRIMES; i++)
@@ -300,9 +282,12 @@ int bits;
300 * that gcd(rnd-1,primes) == 1 (except for 2) */ 282 * that gcd(rnd-1,primes) == 1 (except for 2) */
301 if (((mods[i]+delta)%primes[i]) <= 1) 283 if (((mods[i]+delta)%primes[i]) <= 1)
302 { 284 {
285 d=delta;
303 delta+=2; 286 delta+=2;
304 /* perhaps need to check for overflow of 287 /* perhaps need to check for overflow of
305 * delta (but delta can be upto 2^32) */ 288 * delta (but delta can be upto 2^32)
289 * 21-May-98 eay - added overflow check */
290 if (delta < d) goto again;
306 goto loop; 291 goto loop;
307 } 292 }
308 } 293 }
@@ -310,17 +295,13 @@ int bits;
310 return(1); 295 return(1);
311 } 296 }
312 297
313static int probable_prime_dh(rnd, bits, add, rem,ctx) 298static int probable_prime_dh(BIGNUM *rnd, int bits, BIGNUM *add, BIGNUM *rem,
314BIGNUM *rnd; 299 BN_CTX *ctx)
315int bits;
316BIGNUM *add;
317BIGNUM *rem;
318BN_CTX *ctx;
319 { 300 {
320 int i,ret=0; 301 int i,ret=0;
321 BIGNUM *t1; 302 BIGNUM *t1;
322 303
323 t1=ctx->bn[ctx->tos++]; 304 t1= &(ctx->bn[ctx->tos++]);
324 305
325 if (!BN_rand(rnd,bits,0,1)) goto err; 306 if (!BN_rand(rnd,bits,0,1)) goto err;
326 307
@@ -338,7 +319,7 @@ BN_CTX *ctx;
338 loop: for (i=1; i<NUMPRIMES; i++) 319 loop: for (i=1; i<NUMPRIMES; i++)
339 { 320 {
340 /* check that rnd is a prime */ 321 /* check that rnd is a prime */
341 if (BN_mod_word(rnd,(BN_LONG)primes[i]) <= 1) 322 if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
342 { 323 {
343 if (!BN_add(rnd,rnd,add)) goto err; 324 if (!BN_add(rnd,rnd,add)) goto err;
344 goto loop; 325 goto loop;
@@ -350,20 +331,16 @@ err:
350 return(ret); 331 return(ret);
351 } 332 }
352 333
353static int probable_prime_dh_strong(p, bits, padd, rem,ctx) 334static int probable_prime_dh_strong(BIGNUM *p, int bits, BIGNUM *padd,
354BIGNUM *p; 335 BIGNUM *rem, BN_CTX *ctx)
355int bits;
356BIGNUM *padd;
357BIGNUM *rem;
358BN_CTX *ctx;
359 { 336 {
360 int i,ret=0; 337 int i,ret=0;
361 BIGNUM *t1,*qadd=NULL,*q=NULL; 338 BIGNUM *t1,*qadd=NULL,*q=NULL;
362 339
363 bits--; 340 bits--;
364 t1=ctx->bn[ctx->tos++]; 341 t1= &(ctx->bn[ctx->tos++]);
365 q=ctx->bn[ctx->tos++]; 342 q= &(ctx->bn[ctx->tos++]);
366 qadd=ctx->bn[ctx->tos++]; 343 qadd= &(ctx->bn[ctx->tos++]);
367 344
368 if (!BN_rshift1(qadd,padd)) goto err; 345 if (!BN_rshift1(qadd,padd)) goto err;
369 346
@@ -389,8 +366,8 @@ BN_CTX *ctx;
389 /* check that p and q are prime */ 366 /* check that p and q are prime */
390 /* check that for p and q 367 /* check that for p and q
391 * gcd(p-1,primes) == 1 (except for 2) */ 368 * gcd(p-1,primes) == 1 (except for 2) */
392 if ( (BN_mod_word(p,(BN_LONG)primes[i]) == 0) || 369 if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
393 (BN_mod_word(q,(BN_LONG)primes[i]) == 0)) 370 (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
394 { 371 {
395 if (!BN_add(p,p,padd)) goto err; 372 if (!BN_add(p,p,padd)) goto err;
396 if (!BN_add(q,q,qadd)) goto err; 373 if (!BN_add(q,q,qadd)) goto err;
@@ -404,20 +381,17 @@ err:
404 } 381 }
405 382
406#if 0 383#if 0
407static int witness(a, n,ctx) 384static int witness(BIGNUM *a, BIGNUM *n, BN_CTX *ctx)
408BIGNUM *a;
409BIGNUM *n;
410BN_CTX *ctx;
411 { 385 {
412 int k,i,nb,ret= -1; 386 int k,i,nb,ret= -1;
413 BIGNUM *d,*dd,*tmp; 387 BIGNUM *d,*dd,*tmp;
414 BIGNUM *d1,*d2,*x,*n1,*inv; 388 BIGNUM *d1,*d2,*x,*n1,*inv;
415 389
416 d1=ctx->bn[ctx->tos]; 390 d1= &(ctx->bn[ctx->tos]);
417 d2=ctx->bn[ctx->tos+1]; 391 d2= &(ctx->bn[ctx->tos+1]);
418 x=ctx->bn[ctx->tos+2]; 392 x= &(ctx->bn[ctx->tos+2]);
419 n1=ctx->bn[ctx->tos+3]; 393 n1= &(ctx->bn[ctx->tos+3]);
420 inv=ctx->bn[ctx->tos+4]; 394 inv=&(ctx->bn[ctx->tos+4]);
421 ctx->tos+=5; 395 ctx->tos+=5;
422 396
423 d=d1; 397 d=d1;
diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl
index 1b00c21a77..979385a334 100644
--- a/src/lib/libcrypto/bn/bn_prime.pl
+++ b/src/lib/libcrypto/bn/bn_prime.pl
@@ -1,4 +1,4 @@
1#!/usr/bin/perl 1#!/usr/local/bin/perl
2# bn_prime.pl 2# bn_prime.pl
3 3
4$num=2048; 4$num=2048;
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
index 2bcc11c852..2f5ab2617b 100644
--- a/src/lib/libcrypto/bn/bn_print.c
+++ b/src/lib/libcrypto/bn/bn_print.c
@@ -59,14 +59,13 @@
59#include <stdio.h> 59#include <stdio.h>
60#include <ctype.h> 60#include <ctype.h>
61#include "cryptlib.h" 61#include "cryptlib.h"
62#include "buffer.h" 62#include <openssl/buffer.h>
63#include "bn_lcl.h" 63#include "bn_lcl.h"
64 64
65static char *Hex="0123456789ABCDEF"; 65static const char *Hex="0123456789ABCDEF";
66 66
67/* Must 'Free' the returned data */ 67/* Must 'Free' the returned data */
68char *BN_bn2hex(a) 68char *BN_bn2hex(const BIGNUM *a)
69BIGNUM *a;
70 { 69 {
71 int i,j,v,z=0; 70 int i,j,v,z=0;
72 char *buf; 71 char *buf;
@@ -101,8 +100,7 @@ err:
101 } 100 }
102 101
103/* Must 'Free' the returned data */ 102/* Must 'Free' the returned data */
104char *BN_bn2dec(a) 103char *BN_bn2dec(const BIGNUM *a)
105BIGNUM *a;
106 { 104 {
107 int i=0,num; 105 int i=0,num;
108 char *buf=NULL; 106 char *buf=NULL;
@@ -156,9 +154,7 @@ err:
156 return(buf); 154 return(buf);
157 } 155 }
158 156
159int BN_hex2bn(bn,a) 157int BN_hex2bn(BIGNUM **bn, const char *a)
160BIGNUM **bn;
161char *a;
162 { 158 {
163 BIGNUM *ret=NULL; 159 BIGNUM *ret=NULL;
164 BN_ULONG l=0; 160 BN_ULONG l=0;
@@ -169,7 +165,7 @@ char *a;
169 165
170 if (*a == '-') { neg=1; a++; } 166 if (*a == '-') { neg=1; a++; }
171 167
172 for (i=0; isxdigit(a[i]); i++) 168 for (i=0; isxdigit((unsigned char) a[i]); i++)
173 ; 169 ;
174 170
175 num=i+neg; 171 num=i+neg;
@@ -224,9 +220,7 @@ err:
224 return(0); 220 return(0);
225 } 221 }
226 222
227int BN_dec2bn(bn,a) 223int BN_dec2bn(BIGNUM **bn, const char *a)
228BIGNUM **bn;
229char *a;
230 { 224 {
231 BIGNUM *ret=NULL; 225 BIGNUM *ret=NULL;
232 BN_ULONG l=0; 226 BN_ULONG l=0;
@@ -236,7 +230,7 @@ char *a;
236 if ((a == NULL) || (*a == '\0')) return(0); 230 if ((a == NULL) || (*a == '\0')) return(0);
237 if (*a == '-') { neg=1; a++; } 231 if (*a == '-') { neg=1; a++; }
238 232
239 for (i=0; isdigit(a[i]); i++) 233 for (i=0; isdigit((unsigned char) a[i]); i++)
240 ; 234 ;
241 235
242 num=i+neg; 236 num=i+neg;
@@ -286,9 +280,7 @@ err:
286#ifndef NO_BIO 280#ifndef NO_BIO
287 281
288#ifndef NO_FP_API 282#ifndef NO_FP_API
289int BN_print_fp(fp, a) 283int BN_print_fp(FILE *fp, BIGNUM *a)
290FILE *fp;
291BIGNUM *a;
292 { 284 {
293 BIO *b; 285 BIO *b;
294 int ret; 286 int ret;
@@ -302,9 +294,7 @@ BIGNUM *a;
302 } 294 }
303#endif 295#endif
304 296
305int BN_print(bp, a) 297int BN_print(BIO *bp, const BIGNUM *a)
306BIO *bp;
307BIGNUM *a;
308 { 298 {
309 int i,j,v,z=0; 299 int i,j,v,z=0;
310 int ret=0; 300 int ret=0;
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
index 75b6b0493b..91b8e34ae6 100644
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ b/src/lib/libcrypto/bn/bn_rand.c
@@ -60,13 +60,9 @@
60#include <time.h> 60#include <time.h>
61#include "cryptlib.h" 61#include "cryptlib.h"
62#include "bn_lcl.h" 62#include "bn_lcl.h"
63#include "rand.h" 63#include <openssl/rand.h>
64 64
65int BN_rand(rnd, bits, top, bottom) 65int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
66BIGNUM *rnd;
67int bits;
68int top;
69int bottom;
70 { 66 {
71 unsigned char *buf=NULL; 67 unsigned char *buf=NULL;
72 int ret=0,bit,bytes,mask; 68 int ret=0,bit,bytes,mask;
@@ -85,7 +81,7 @@ int bottom;
85 81
86 /* make a random number and set the top and bottom bits */ 82 /* make a random number and set the top and bottom bits */
87 time(&tim); 83 time(&tim);
88 RAND_seed((unsigned char *)&tim,sizeof(tim)); 84 RAND_seed(&tim,sizeof(tim));
89 85
90 RAND_bytes(buf,(int)bytes); 86 RAND_bytes(buf,(int)bytes);
91 if (top) 87 if (top)
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
index 72cd69d3fc..c1b0e230ea 100644
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ b/src/lib/libcrypto/bn/bn_recp.c
@@ -60,66 +60,168 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63int BN_mod_mul_reciprocal(r, x, y, m, i, nb, ctx) 63void BN_RECP_CTX_init(BN_RECP_CTX *recp)
64BIGNUM *r;
65BIGNUM *x;
66BIGNUM *y;
67BIGNUM *m;
68BIGNUM *i;
69int nb;
70BN_CTX *ctx;
71 { 64 {
72 int ret=0,j; 65 BN_init(&(recp->N));
73 BIGNUM *a,*b,*c,*d; 66 BN_init(&(recp->Nr));
67 recp->num_bits=0;
68 recp->flags=0;
69 }
70
71BN_RECP_CTX *BN_RECP_CTX_new(void)
72 {
73 BN_RECP_CTX *ret;
74
75 if ((ret=(BN_RECP_CTX *)Malloc(sizeof(BN_RECP_CTX))) == NULL)
76 return(NULL);
77
78 BN_RECP_CTX_init(ret);
79 ret->flags=BN_FLG_MALLOCED;
80 return(ret);
81 }
82
83void BN_RECP_CTX_free(BN_RECP_CTX *recp)
84 {
85 if(recp == NULL)
86 return;
74 87
75 a=ctx->bn[ctx->tos++]; 88 BN_free(&(recp->N));
76 b=ctx->bn[ctx->tos++]; 89 BN_free(&(recp->Nr));
77 c=ctx->bn[ctx->tos++]; 90 if (recp->flags & BN_FLG_MALLOCED)
78 d=ctx->bn[ctx->tos++]; 91 Free(recp);
92 }
79 93
80 if (x == y) 94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
81 { if (!BN_sqr(a,x,ctx)) goto err; } 95 {
96 BN_copy(&(recp->N),d);
97 BN_zero(&(recp->Nr));
98 recp->num_bits=BN_num_bits(d);
99 recp->shift=0;
100 return(1);
101 }
102
103int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BN_RECP_CTX *recp,
104 BN_CTX *ctx)
105 {
106 int ret=0;
107 BIGNUM *a;
108
109 a= &(ctx->bn[ctx->tos++]);
110 if (y != NULL)
111 {
112 if (x == y)
113 { if (!BN_sqr(a,x,ctx)) goto err; }
114 else
115 { if (!BN_mul(a,x,y,ctx)) goto err; }
116 }
82 else 117 else
83 { if (!BN_mul(a,x,y)) goto err; } 118 a=x; /* Just do the mod */
84 if (!BN_rshift(d,a,nb)) goto err; 119
85 if (!BN_mul(b,d,i)) goto err; 120 BN_div_recp(NULL,r,a,recp,ctx);
86 if (!BN_rshift(c,b,nb)) goto err; 121 ret=1;
87 if (!BN_mul(b,m,c)) goto err; 122err:
88 if (!BN_sub(r,a,b)) goto err; 123 ctx->tos--;
124 return(ret);
125 }
126
127int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BN_RECP_CTX *recp,
128 BN_CTX *ctx)
129 {
130 int i,j,tos,ret=0,ex;
131 BIGNUM *a,*b,*d,*r;
132
133 tos=ctx->tos;
134 a= &(ctx->bn[ctx->tos++]);
135 b= &(ctx->bn[ctx->tos++]);
136 if (dv != NULL)
137 d=dv;
138 else
139 d= &(ctx->bn[ctx->tos++]);
140 if (rem != NULL)
141 r=rem;
142 else
143 r= &(ctx->bn[ctx->tos++]);
144
145 if (BN_ucmp(m,&(recp->N)) < 0)
146 {
147 BN_zero(d);
148 BN_copy(r,m);
149 ctx->tos=tos;
150 return(1);
151 }
152
153 /* We want the remainder
154 * Given input of ABCDEF / ab
155 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
156 *
157 */
158 i=BN_num_bits(m);
159
160 j=recp->num_bits*2;
161 if (j > i)
162 {
163 i=j;
164 ex=0;
165 }
166 else
167 {
168 ex=(i-j)/2;
169 }
170
171 j=i/2;
172
173 if (i != recp->shift)
174 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
175 i,ctx);
176
177 if (!BN_rshift(a,m,j-ex)) goto err;
178 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
179 if (!BN_rshift(d,b,j+ex)) goto err;
180 d->neg=0;
181 if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
182 if (!BN_usub(r,m,b)) goto err;
183 r->neg=0;
184
89 j=0; 185 j=0;
90 while (BN_cmp(r,m) >= 0) 186#if 1
187 while (BN_ucmp(r,&(recp->N)) >= 0)
91 { 188 {
92 if (j++ > 2) 189 if (j++ > 2)
93 { 190 {
94 BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL); 191 BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL);
95 goto err; 192 goto err;
96 } 193 }
97 if (!BN_sub(r,r,m)) goto err; 194 if (!BN_usub(r,r,&(recp->N))) goto err;
195 if (!BN_add_word(d,1)) goto err;
98 } 196 }
197#endif
99 198
199 r->neg=BN_is_zero(r)?0:m->neg;
200 d->neg=m->neg^recp->N.neg;
100 ret=1; 201 ret=1;
101err: 202err:
102 ctx->tos-=4; 203 ctx->tos=tos;
103 return(ret); 204 return(ret);
104 } 205 }
105 206
106int BN_reciprocal(r, m,ctx) 207/* len is the expected size of the result
107BIGNUM *r; 208 * We actually calculate with an extra word of precision, so
108BIGNUM *m; 209 * we can do faster division if the remainder is not required.
109BN_CTX *ctx; 210 */
211int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx)
110 { 212 {
111 int nm,ret= -1; 213 int ret= -1;
112 BIGNUM *t; 214 BIGNUM t;
113 215
114 t=ctx->bn[ctx->tos++]; 216 BN_init(&t);
115 217
116 nm=BN_num_bits(m); 218 BN_zero(&t);
117 if (!BN_lshift(t,BN_value_one(),nm*2)) goto err; 219 if (!BN_set_bit(&t,len)) goto err;
118 220
119 if (!BN_div(r,NULL,t,m,ctx)) goto err; 221 if (!BN_div(r,NULL,&t,m,ctx)) goto err;
120 ret=nm; 222 ret=len;
121err: 223err:
122 ctx->tos--; 224 BN_free(&t);
123 return(ret); 225 return(ret);
124 } 226 }
125 227
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
index 944bf1794b..61aae65a6b 100644
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ b/src/lib/libcrypto/bn/bn_shift.c
@@ -60,9 +60,7 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63int BN_lshift1(r, a) 63int BN_lshift1(BIGNUM *r, BIGNUM *a)
64BIGNUM *r;
65BIGNUM *a;
66 { 64 {
67 register BN_ULONG *ap,*rp,t,c; 65 register BN_ULONG *ap,*rp,t,c;
68 int i; 66 int i;
@@ -94,9 +92,7 @@ BIGNUM *a;
94 return(1); 92 return(1);
95 } 93 }
96 94
97int BN_rshift1(r, a) 95int BN_rshift1(BIGNUM *r, BIGNUM *a)
98BIGNUM *r;
99BIGNUM *a;
100 { 96 {
101 BN_ULONG *ap,*rp,t,c; 97 BN_ULONG *ap,*rp,t,c;
102 int i; 98 int i;
@@ -125,10 +121,7 @@ BIGNUM *a;
125 return(1); 121 return(1);
126 } 122 }
127 123
128int BN_lshift(r, a, n) 124int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
129BIGNUM *r;
130BIGNUM *a;
131int n;
132 { 125 {
133 int i,nw,lb,rb; 126 int i,nw,lb,rb;
134 BN_ULONG *t,*f; 127 BN_ULONG *t,*f;
@@ -160,10 +153,7 @@ int n;
160 return(1); 153 return(1);
161 } 154 }
162 155
163int BN_rshift(r, a, n) 156int BN_rshift(BIGNUM *r, BIGNUM *a, int n)
164BIGNUM *r;
165BIGNUM *a;
166int n;
167 { 157 {
168 int i,j,nw,lb,rb; 158 int i,j,nw,lb,rb;
169 BN_ULONG *t,*f; 159 BN_ULONG *t,*f;
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
index a8464610e5..12cce4d7ce 100644
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -62,35 +62,98 @@
62 62
63/* r must not be a */ 63/* r must not be a */
64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */ 64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
65int BN_sqr(r, a, ctx) 65int BN_sqr(BIGNUM *r, BIGNUM *a, BN_CTX *ctx)
66BIGNUM *r;
67BIGNUM *a;
68BN_CTX *ctx;
69 { 66 {
70 int i,j,max,al; 67 int max,al;
71 BIGNUM *tmp; 68 BIGNUM *tmp,*rr;
72 BN_ULONG *ap,*rp;
73 69
74 tmp=ctx->bn[ctx->tos]; 70#ifdef BN_COUNT
71printf("BN_sqr %d * %d\n",a->top,a->top);
72#endif
73 bn_check_top(a);
74 tmp= &(ctx->bn[ctx->tos]);
75 rr=(a != r)?r: (&ctx->bn[ctx->tos+1]);
75 76
76 al=a->top; 77 al=a->top;
77 if (al == 0) 78 if (al <= 0)
78 { 79 {
79 r->top=0; 80 r->top=0;
80 return(1); 81 return(1);
81 } 82 }
82 83
83 max=(al*2); 84 max=(al+al);
84 if (bn_wexpand(r,1+max) == NULL) return(0); 85 if (bn_wexpand(rr,max+1) == NULL) return(0);
85 if (bn_wexpand(tmp,1+max) == NULL) return(0);
86 86
87 r->neg=0; 87 r->neg=0;
88 if (al == 4)
89 {
90#ifndef BN_SQR_COMBA
91 BN_ULONG t[8];
92 bn_sqr_normal(rr->d,a->d,4,t);
93#else
94 bn_sqr_comba4(rr->d,a->d);
95#endif
96 }
97 else if (al == 8)
98 {
99#ifndef BN_SQR_COMBA
100 BN_ULONG t[16];
101 bn_sqr_normal(rr->d,a->d,8,t);
102#else
103 bn_sqr_comba8(rr->d,a->d);
104#endif
105 }
106 else
107 {
108#if defined(BN_RECURSION)
109 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
110 {
111 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
112 bn_sqr_normal(rr->d,a->d,al,t);
113 }
114 else
115 {
116 int j,k;
117
118 j=BN_num_bits_word((BN_ULONG)al);
119 j=1<<(j-1);
120 k=j+j;
121 if (al == j)
122 {
123 if (bn_wexpand(a,k*2) == NULL) return(0);
124 if (bn_wexpand(tmp,k*2) == NULL) return(0);
125 bn_sqr_recursive(rr->d,a->d,al,tmp->d);
126 }
127 else
128 {
129 if (bn_wexpand(tmp,max) == NULL) return(0);
130 bn_sqr_normal(rr->d,a->d,al,tmp->d);
131 }
132 }
133#else
134 if (bn_wexpand(tmp,max) == NULL) return(0);
135 bn_sqr_normal(rr->d,a->d,al,tmp->d);
136#endif
137 }
138
139 rr->top=max;
140 if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
141 if (rr != r) BN_copy(r,rr);
142 return(1);
143 }
144
145/* tmp must have 2*n words */
146void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp)
147 {
148 int i,j,max;
149 BN_ULONG *ap,*rp;
88 150
89 ap=a->d; 151 max=n*2;
90 rp=r->d; 152 ap=a;
153 rp=r;
91 rp[0]=rp[max-1]=0; 154 rp[0]=rp[max-1]=0;
92 rp++; 155 rp++;
93 j=al; 156 j=n;
94 157
95 if (--j > 0) 158 if (--j > 0)
96 { 159 {
@@ -99,7 +162,7 @@ BN_CTX *ctx;
99 rp+=2; 162 rp+=2;
100 } 163 }
101 164
102 for (i=2; i<al; i++) 165 for (i=n-2; i>0; i--)
103 { 166 {
104 j--; 167 j--;
105 ap++; 168 ap++;
@@ -107,16 +170,112 @@ BN_CTX *ctx;
107 rp+=2; 170 rp+=2;
108 } 171 }
109 172
110 bn_add_words(r->d,r->d,r->d,max); 173 bn_add_words(r,r,r,max);
111 174
112 /* There will not be a carry */ 175 /* There will not be a carry */
113 176
114 bn_sqr_words(tmp->d,a->d,al); 177 bn_sqr_words(tmp,a,n);
115 178
116 bn_add_words(r->d,r->d,tmp->d,max); 179 bn_add_words(r,r,tmp,max);
117
118 r->top=max;
119 if (r->d[max-1] == 0) r->top--;
120 return(1);
121 } 180 }
122 181
182#ifdef BN_RECURSION
183/* r is 2*n words in size,
184 * a and b are both n words in size.
185 * n must be a power of 2.
186 * We multiply and return the result.
187 * t must be 2*n words in size
188 * We calulate
189 * a[0]*b[0]
190 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
191 * a[1]*b[1]
192 */
193void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t)
194 {
195 int n=n2/2;
196 int zero,c1;
197 BN_ULONG ln,lo,*p;
198
199#ifdef BN_COUNT
200printf(" bn_sqr_recursive %d * %d\n",n2,n2);
201#endif
202 if (n2 == 4)
203 {
204#ifndef BN_SQR_COMBA
205 bn_sqr_normal(r,a,4,t);
206#else
207 bn_sqr_comba4(r,a);
208#endif
209 return;
210 }
211 else if (n2 == 8)
212 {
213#ifndef BN_SQR_COMBA
214 bn_sqr_normal(r,a,8,t);
215#else
216 bn_sqr_comba8(r,a);
217#endif
218 return;
219 }
220 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
221 {
222 bn_sqr_normal(r,a,n2,t);
223 return;
224 }
225 /* r=(a[0]-a[1])*(a[1]-a[0]) */
226 c1=bn_cmp_words(a,&(a[n]),n);
227 zero=0;
228 if (c1 > 0)
229 bn_sub_words(t,a,&(a[n]),n);
230 else if (c1 < 0)
231 bn_sub_words(t,&(a[n]),a,n);
232 else
233 zero=1;
234
235 /* The result will always be negative unless it is zero */
236 p= &(t[n2*2]);
237
238 if (!zero)
239 bn_sqr_recursive(&(t[n2]),t,n,p);
240 else
241 memset(&(t[n2]),0,n*sizeof(BN_ULONG));
242 bn_sqr_recursive(r,a,n,p);
243 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
244
245 /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
246 * r[10] holds (a[0]*b[0])
247 * r[32] holds (b[1]*b[1])
248 */
249
250 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
251
252 /* t[32] is negative */
253 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
254
255 /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
256 * r[10] holds (a[0]*a[0])
257 * r[32] holds (a[1]*a[1])
258 * c1 holds the carry bits
259 */
260 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
261 if (c1)
262 {
263 p= &(r[n+n2]);
264 lo= *p;
265 ln=(lo+c1)&BN_MASK2;
266 *p=ln;
267
268 /* The overflow will stop before we over write
269 * words we should not overwrite */
270 if (ln < (BN_ULONG)c1)
271 {
272 do {
273 p++;
274 lo= *p;
275 ln=(lo+1)&BN_MASK2;
276 *p=ln;
277 } while (ln == 0);
278 }
279 }
280 }
281#endif
diff --git a/src/lib/libcrypto/bn/bn_sub.c b/src/lib/libcrypto/bn/bn_sub.c
deleted file mode 100644
index bba80f8afb..0000000000
--- a/src/lib/libcrypto/bn/bn_sub.c
+++ /dev/null
@@ -1,180 +0,0 @@
1/* crypto/bn/bn_sub.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* unsigned subtraction of b from a, a must be larger than b. */
64void bn_qsub(r, a, b)
65BIGNUM *r;
66BIGNUM *a;
67BIGNUM *b;
68 {
69 int max,min;
70 register BN_ULONG t1,t2,*ap,*bp,*rp;
71 int i,carry;
72#if defined(IRIX_CC_BUG) && !defined(LINT)
73 int dummy;
74#endif
75
76 max=a->top;
77 min=b->top;
78 ap=a->d;
79 bp=b->d;
80 rp=r->d;
81
82 carry=0;
83 for (i=0; i<min; i++)
84 {
85 t1= *(ap++);
86 t2= *(bp++);
87 if (carry)
88 {
89 carry=(t1 <= t2);
90 t1=(t1-t2-1)&BN_MASK2;
91 }
92 else
93 {
94 carry=(t1 < t2);
95 t1=(t1-t2)&BN_MASK2;
96 }
97#if defined(IRIX_CC_BUG) && !defined(LINT)
98 dummy=t1;
99#endif
100 *(rp++)=t1&BN_MASK2;
101 }
102 if (carry) /* subtracted */
103 {
104 while (i < max)
105 {
106 i++;
107 t1= *(ap++);
108 t2=(t1-1)&BN_MASK2;
109 *(rp++)=t2;
110 if (t1 > t2) break;
111 }
112 }
113#if 0
114 memcpy(rp,ap,sizeof(*rp)*(max-i));
115#else
116 for (; i<max; i++)
117 *(rp++)= *(ap++);
118#endif
119
120 r->top=max;
121 bn_fix_top(r);
122 }
123
124int BN_sub(r, a, b)
125BIGNUM *r;
126BIGNUM *a;
127BIGNUM *b;
128 {
129 int max,i;
130 int add=0,neg=0;
131 BIGNUM *tmp;
132
133 /* a - b a-b
134 * a - -b a+b
135 * -a - b -(a+b)
136 * -a - -b b-a
137 */
138 if (a->neg)
139 {
140 if (b->neg)
141 { tmp=a; a=b; b=tmp; }
142 else
143 { add=1; neg=1; }
144 }
145 else
146 {
147 if (b->neg) { add=1; neg=0; }
148 }
149
150 if (add)
151 {
152 /* As a fast max size, do a a->top | b->top */
153 i=(a->top | b->top)+1;
154 if (bn_wexpand(r,i) == NULL)
155 return(0);
156 if (i)
157 bn_qadd(r,a,b);
158 else
159 bn_qadd(r,b,a);
160 r->neg=neg;
161 return(1);
162 }
163
164 /* We are actually doing a - b :-) */
165
166 max=(a->top > b->top)?a->top:b->top;
167 if (bn_wexpand(r,max) == NULL) return(0);
168 if (BN_ucmp(a,b) < 0)
169 {
170 bn_qsub(r,b,a);
171 r->neg=1;
172 }
173 else
174 {
175 bn_qsub(r,a,b);
176 r->neg=0;
177 }
178 return(1);
179 }
180
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
index 4b3d0f011d..c0cfbc6797 100644
--- a/src/lib/libcrypto/bn/bn_word.c
+++ b/src/lib/libcrypto/bn/bn_word.c
@@ -60,9 +60,7 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63BN_ULONG BN_mod_word(a, w) 63BN_ULONG BN_mod_word(BIGNUM *a, BN_ULONG w)
64BIGNUM *a;
65unsigned long w;
66 { 64 {
67#ifndef BN_LLONG 65#ifndef BN_LLONG
68 BN_ULONG ret=0; 66 BN_ULONG ret=0;
@@ -75,8 +73,8 @@ unsigned long w;
75 for (i=a->top-1; i>=0; i--) 73 for (i=a->top-1; i>=0; i--)
76 { 74 {
77#ifndef BN_LLONG 75#ifndef BN_LLONG
78 ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%(unsigned long)w; 76 ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
79 ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%(unsigned long)w; 77 ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
80#else 78#else
81 ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])% 79 ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
82 (BN_ULLONG)w); 80 (BN_ULLONG)w);
@@ -85,9 +83,7 @@ unsigned long w;
85 return((BN_ULONG)ret); 83 return((BN_ULONG)ret);
86 } 84 }
87 85
88BN_ULONG BN_div_word(a, w) 86BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
89BIGNUM *a;
90unsigned long w;
91 { 87 {
92 BN_ULONG ret; 88 BN_ULONG ret;
93 int i; 89 int i;
@@ -100,18 +96,16 @@ unsigned long w;
100 BN_ULONG l,d; 96 BN_ULONG l,d;
101 97
102 l=a->d[i]; 98 l=a->d[i];
103 d=bn_div64(ret,l,w); 99 d=bn_div_words(ret,l,w);
104 ret=(l-((d*w)&BN_MASK2))&BN_MASK2; 100 ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
105 a->d[i]=d; 101 a->d[i]=d;
106 } 102 }
107 if (a->d[a->top-1] == 0) 103 if ((a->top > 0) && (a->d[a->top-1] == 0))
108 a->top--; 104 a->top--;
109 return(ret); 105 return(ret);
110 } 106 }
111 107
112int BN_add_word(a, w) 108int BN_add_word(BIGNUM *a, BN_ULONG w)
113BIGNUM *a;
114unsigned long w;
115 { 109 {
116 BN_ULONG l; 110 BN_ULONG l;
117 int i; 111 int i;
@@ -142,9 +136,7 @@ unsigned long w;
142 return(1); 136 return(1);
143 } 137 }
144 138
145int BN_sub_word(a, w) 139int BN_sub_word(BIGNUM *a, BN_ULONG w)
146BIGNUM *a;
147unsigned long w;
148 { 140 {
149 int i; 141 int i;
150 142
@@ -183,9 +175,7 @@ unsigned long w;
183 return(1); 175 return(1);
184 } 176 }
185 177
186int BN_mul_word(a,w) 178int BN_mul_word(BIGNUM *a, BN_ULONG w)
187BIGNUM *a;
188unsigned long w;
189 { 179 {
190 BN_ULONG ll; 180 BN_ULONG ll;
191 181
@@ -199,6 +189,6 @@ unsigned long w;
199 a->d[a->top++]=ll; 189 a->d[a->top++]=ll;
200 } 190 }
201 } 191 }
202 return(0); 192 return(1);
203 } 193 }
204 194
diff --git a/src/lib/libcrypto/bn/bnspeed.c b/src/lib/libcrypto/bn/bnspeed.c
index f7c2790fff..0922aa3e16 100644
--- a/src/lib/libcrypto/bn/bnspeed.c
+++ b/src/lib/libcrypto/bn/bnspeed.c
@@ -66,14 +66,13 @@
66#include <stdlib.h> 66#include <stdlib.h>
67#include <signal.h> 67#include <signal.h>
68#include <string.h> 68#include <string.h>
69#include "crypto.h" 69#include <openssl/crypto.h>
70#include "err.h" 70#include <openssl/err.h>
71 71
72#ifndef MSDOS 72#if !defined(MSDOS) && (!defined(VMS) || defined(__DECC))
73#define TIMES 73#define TIMES
74#endif 74#endif
75 75
76#ifndef VMS
77#ifndef _IRIX 76#ifndef _IRIX
78#include <time.h> 77#include <time.h>
79#endif 78#endif
@@ -81,36 +80,33 @@
81#include <sys/types.h> 80#include <sys/types.h>
82#include <sys/times.h> 81#include <sys/times.h>
83#endif 82#endif
84#else /* VMS */ 83
85#include <types.h> 84/* Depending on the VMS version, the tms structure is perhaps defined.
86struct tms { 85 The __TMS macro will show if it was. If it wasn't defined, we should
87 time_t tms_utime; 86 undefine TIMES, since that tells the rest of the program how things
88 time_t tms_stime; 87 should be handled. -- Richard Levitte */
89 time_t tms_uchild; /* I dunno... */ 88#if defined(VMS) && defined(__DECC) && !defined(__TMS)
90 time_t tms_uchildsys; /* so these names are a guess :-) */ 89#undef TIMES
91 }
92#endif 90#endif
91
93#ifndef TIMES 92#ifndef TIMES
94#include <sys/timeb.h> 93#include <sys/timeb.h>
95#endif 94#endif
96 95
97#ifdef sun 96#if defined(sun) || defined(__ultrix)
97#define _POSIX_SOURCE
98#include <limits.h> 98#include <limits.h>
99#include <sys/param.h> 99#include <sys/param.h>
100#endif 100#endif
101 101
102#include "bn.h" 102#include <openssl/bn.h>
103#include "x509.h" 103#include <openssl/x509.h>
104 104
105/* The following if from times(3) man page. It may need to be changed */ 105/* The following if from times(3) man page. It may need to be changed */
106#ifndef HZ 106#ifndef HZ
107# ifndef CLK_TCK 107# ifndef CLK_TCK
108# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */ 108# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
109# ifndef VMS 109# define HZ 100.0
110# define HZ 100.0
111# else /* VMS */
112# define HZ 100.0
113# endif
114# else /* _BSD_CLK_TCK_ */ 110# else /* _BSD_CLK_TCK_ */
115# define HZ ((double)_BSD_CLK_TCK_) 111# define HZ ((double)_BSD_CLK_TCK_)
116# endif 112# endif
@@ -123,17 +119,11 @@ struct tms {
123#define BUFSIZE ((long)1024*8) 119#define BUFSIZE ((long)1024*8)
124int run=0; 120int run=0;
125 121
126#ifndef NOPROTO
127static double Time_F(int s); 122static double Time_F(int s);
128#else
129static double Time_F();
130#endif
131
132#define START 0 123#define START 0
133#define STOP 1 124#define STOP 1
134 125
135static double Time_F(s) 126static double Time_F(int s)
136int s;
137 { 127 {
138 double ret; 128 double ret;
139#ifdef TIMES 129#ifdef TIMES
@@ -175,27 +165,20 @@ static int sizes[NUM_SIZES]={128,256,512,1024,2048};
175 165
176void do_mul(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_CTX *ctx); 166void do_mul(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_CTX *ctx);
177 167
178int main(argc,argv) 168int main(int argc, char **argv)
179int argc;
180char **argv;
181 { 169 {
182 BN_CTX *ctx; 170 BN_CTX *ctx;
183 BIGNUM *a,*b,*c,*r; 171 BIGNUM a,b,c;
184 172
185 ctx=BN_CTX_new(); 173 ctx=BN_CTX_new();
186 a=BN_new(); 174 BN_init(&a);
187 b=BN_new(); 175 BN_init(&b);
188 c=BN_new(); 176 BN_init(&c);
189 r=BN_new();
190 177
191 do_mul(a,b,c,ctx); 178 do_mul(&a,&b,&c,ctx);
192 } 179 }
193 180
194void do_mul(r,a,b,ctx) 181void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
195BIGNUM *r;
196BIGNUM *a;
197BIGNUM *b;
198BN_CTX *ctx;
199 { 182 {
200 int i,j,k; 183 int i,j,k;
201 double tm; 184 double tm;
@@ -211,7 +194,7 @@ BN_CTX *ctx;
211 BN_rand(b,sizes[j],1,0); 194 BN_rand(b,sizes[j],1,0);
212 Time_F(START); 195 Time_F(START);
213 for (k=0; k<num; k++) 196 for (k=0; k<num; k++)
214 BN_mul(r,b,a); 197 BN_mul(r,b,a,ctx);
215 tm=Time_F(STOP); 198 tm=Time_F(STOP);
216 printf("mul %4d x %4d -> %8.3fms\n",sizes[i],sizes[j],tm*1000.0/num); 199 printf("mul %4d x %4d -> %8.3fms\n",sizes[i],sizes[j],tm*1000.0/num);
217 } 200 }
diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c
index 9ebd68b429..df4b81f5b2 100644
--- a/src/lib/libcrypto/bn/bntest.c
+++ b/src/lib/libcrypto/bn/bntest.c
@@ -59,49 +59,35 @@
59#include <stdio.h> 59#include <stdio.h>
60#include <stdlib.h> 60#include <stdlib.h>
61#include <string.h> 61#include <string.h>
62#include "e_os.h" 62
63#include "bio.h" 63#include "openssl/e_os.h"
64#include "bn.h" 64
65#include "rand.h" 65#include <openssl/bio.h>
66#include "x509.h" 66#include <openssl/bn.h>
67#include "err.h" 67#include <openssl/rand.h>
68#include <openssl/x509.h>
69#include <openssl/err.h>
68 70
69#ifdef WINDOWS 71#ifdef WINDOWS
70#include "../bio/bss_file.c" 72#include "../bio/bss_file.c"
71#endif 73#endif
72 74
73#ifndef NOPROTO 75int test_add(BIO *bp);
74int test_add (BIO *bp); 76int test_sub(BIO *bp);
75int test_sub (BIO *bp); 77int test_lshift1(BIO *bp);
76int test_lshift1 (BIO *bp); 78int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_);
77int test_lshift (BIO *bp); 79int test_rshift1(BIO *bp);
78int test_rshift1 (BIO *bp); 80int test_rshift(BIO *bp,BN_CTX *ctx);
79int test_rshift (BIO *bp); 81int test_div(BIO *bp,BN_CTX *ctx);
80int test_div (BIO *bp,BN_CTX *ctx); 82int test_div_recp(BIO *bp,BN_CTX *ctx);
81int test_mul (BIO *bp); 83int test_mul(BIO *bp);
82int test_sqr (BIO *bp,BN_CTX *ctx); 84int test_sqr(BIO *bp,BN_CTX *ctx);
83int test_mont (BIO *bp,BN_CTX *ctx); 85int test_mont(BIO *bp,BN_CTX *ctx);
84int test_mod (BIO *bp,BN_CTX *ctx); 86int test_mod(BIO *bp,BN_CTX *ctx);
85int test_mod_mul (BIO *bp,BN_CTX *ctx); 87int test_mod_mul(BIO *bp,BN_CTX *ctx);
86int test_mod_exp (BIO *bp,BN_CTX *ctx); 88int test_mod_exp(BIO *bp,BN_CTX *ctx);
89int test_exp(BIO *bp,BN_CTX *ctx);
87int rand_neg(void); 90int rand_neg(void);
88#else
89int test_add ();
90int test_sub ();
91int test_lshift1 ();
92int test_lshift ();
93int test_rshift1 ();
94int test_rshift ();
95int test_div ();
96int test_mul ();
97int test_sqr ();
98int test_mont ();
99int test_mod ();
100int test_mod_mul ();
101int test_mod_exp ();
102int rand_neg();
103#endif
104
105static int results=0; 91static int results=0;
106 92
107#ifdef NO_STDIO 93#ifdef NO_STDIO
@@ -109,16 +95,15 @@ static int results=0;
109#include "bss_file.c" 95#include "bss_file.c"
110#endif 96#endif
111 97
112int main(argc,argv) 98static unsigned char lst1[]="\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9"
113int argc; 99"\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0";
114char *argv[]; 100
101int main(int argc, char *argv[])
115 { 102 {
116 BN_CTX *ctx; 103 BN_CTX *ctx;
117 BIO *out; 104 BIO *out;
118 char *outfile=NULL; 105 char *outfile=NULL;
119 106
120 srand((unsigned int)time(NULL));
121
122 argc--; 107 argc--;
123 argv++; 108 argv++;
124 while (argc >= 1) 109 while (argc >= 1)
@@ -168,8 +153,13 @@ char *argv[];
168 if (!test_lshift1(out)) goto err; 153 if (!test_lshift1(out)) goto err;
169 fflush(stdout); 154 fflush(stdout);
170 155
156 fprintf(stderr,"test BN_lshift (fixed)\n");
157 if (!test_lshift(out,ctx,BN_bin2bn(lst1,sizeof(lst1)-1,NULL)))
158 goto err;
159 fflush(stdout);
160
171 fprintf(stderr,"test BN_lshift\n"); 161 fprintf(stderr,"test BN_lshift\n");
172 if (!test_lshift(out)) goto err; 162 if (!test_lshift(out,ctx,NULL)) goto err;
173 fflush(stdout); 163 fflush(stdout);
174 164
175 fprintf(stderr,"test BN_rshift1\n"); 165 fprintf(stderr,"test BN_rshift1\n");
@@ -177,7 +167,7 @@ char *argv[];
177 fflush(stdout); 167 fflush(stdout);
178 168
179 fprintf(stderr,"test BN_rshift\n"); 169 fprintf(stderr,"test BN_rshift\n");
180 if (!test_rshift(out)) goto err; 170 if (!test_rshift(out,ctx)) goto err;
181 fflush(stdout); 171 fflush(stdout);
182 172
183 fprintf(stderr,"test BN_sqr\n"); 173 fprintf(stderr,"test BN_sqr\n");
@@ -192,6 +182,10 @@ char *argv[];
192 if (!test_div(out,ctx)) goto err; 182 if (!test_div(out,ctx)) goto err;
193 fflush(stdout); 183 fflush(stdout);
194 184
185 fprintf(stderr,"test BN_div_recp\n");
186 if (!test_div_recp(out,ctx)) goto err;
187 fflush(stdout);
188
195 fprintf(stderr,"test BN_mod\n"); 189 fprintf(stderr,"test BN_mod\n");
196 if (!test_mod(out,ctx)) goto err; 190 if (!test_mod(out,ctx)) goto err;
197 fflush(stdout); 191 fflush(stdout);
@@ -209,298 +203,423 @@ char *argv[];
209 if (!test_mod_exp(out,ctx)) goto err; 203 if (!test_mod_exp(out,ctx)) goto err;
210 fflush(stdout); 204 fflush(stdout);
211 205
206 fprintf(stderr,"test BN_exp\n");
207 if (!test_exp(out,ctx)) goto err;
208 fflush(stdout);
209
212/**/ 210/**/
213 exit(0); 211 exit(0);
214err: 212err:
213 BIO_puts(out,"1\n"); /* make sure bc fails if we are piping to it */
215 ERR_load_crypto_strings(); 214 ERR_load_crypto_strings();
216 ERR_print_errors(out); 215 ERR_print_errors(out);
217 exit(1); 216 exit(1);
218 return(1); 217 return(1);
219 } 218 }
220 219
221int test_add(bp) 220int test_add(BIO *bp)
222BIO *bp;
223 { 221 {
224 BIGNUM *a,*b,*c; 222 BIGNUM a,b,c;
225 int i; 223 int i;
226 int j; 224 int j;
227 225
228 a=BN_new(); 226 BN_init(&a);
229 b=BN_new(); 227 BN_init(&b);
230 c=BN_new(); 228 BN_init(&c);
231 229
232 BN_rand(a,512,0,0); 230 BN_rand(&a,512,0,0);
233 for (i=0; i<100; i++) 231 for (i=0; i<100; i++)
234 { 232 {
235 BN_rand(b,450+i,0,0); 233 BN_rand(&b,450+i,0,0);
236 a->neg=rand_neg(); 234 a.neg=rand_neg();
237 b->neg=rand_neg(); 235 b.neg=rand_neg();
238 if (bp == NULL) 236 if (bp == NULL)
239 for (j=0; j<10000; j++) 237 for (j=0; j<10000; j++)
240 BN_add(c,a,b); 238 BN_add(&c,&a,&b);
241 BN_add(c,a,b); 239 BN_add(&c,&a,&b);
242 if (bp != NULL) 240 if (bp != NULL)
243 { 241 {
244 if (!results) 242 if (!results)
245 { 243 {
246 BN_print(bp,a); 244 BN_print(bp,&a);
247 BIO_puts(bp," + "); 245 BIO_puts(bp," + ");
248 BN_print(bp,b); 246 BN_print(bp,&b);
249 BIO_puts(bp," - "); 247 BIO_puts(bp," - ");
250 } 248 }
251 BN_print(bp,c); 249 BN_print(bp,&c);
252 BIO_puts(bp,"\n"); 250 BIO_puts(bp,"\n");
253 } 251 }
252 a.neg=!a.neg;
253 b.neg=!b.neg;
254 BN_add(&c,&c,&b);
255 BN_add(&c,&c,&a);
256 if(!BN_is_zero(&c))
257 {
258 BIO_puts(bp,"Add test failed!\n");
259 return 0;
260 }
254 } 261 }
255 BN_free(a); 262 BN_free(&a);
256 BN_free(b); 263 BN_free(&b);
257 BN_free(c); 264 BN_free(&c);
258 return(1); 265 return(1);
259 } 266 }
260 267
261int test_sub(bp) 268int test_sub(BIO *bp)
262BIO *bp;
263 { 269 {
264 BIGNUM *a,*b,*c; 270 BIGNUM a,b,c;
265 int i; 271 int i;
266 int j; 272 int j;
267 273
268 a=BN_new(); 274 BN_init(&a);
269 b=BN_new(); 275 BN_init(&b);
270 c=BN_new(); 276 BN_init(&c);
271 277
272 BN_rand(a,512,0,0); 278 BN_rand(&a,512,0,0);
273 for (i=0; i<100; i++) 279 for (i=0; i<100; i++)
274 { 280 {
275 BN_rand(b,400+i,0,0); 281 BN_rand(&b,400+i,0,0);
276 a->neg=rand_neg(); 282 a.neg=rand_neg();
277 b->neg=rand_neg(); 283 b.neg=rand_neg();
278 if (bp == NULL) 284 if (bp == NULL)
279 for (j=0; j<10000; j++) 285 for (j=0; j<10000; j++)
280 BN_sub(c,a,b); 286 BN_sub(&c,&a,&b);
281 BN_sub(c,a,b); 287 BN_sub(&c,&a,&b);
282 if (bp != NULL) 288 if (bp != NULL)
283 { 289 {
284 if (!results) 290 if (!results)
285 { 291 {
286 BN_print(bp,a); 292 BN_print(bp,&a);
287 BIO_puts(bp," - "); 293 BIO_puts(bp," - ");
288 BN_print(bp,b); 294 BN_print(bp,&b);
289 BIO_puts(bp," - "); 295 BIO_puts(bp," - ");
290 } 296 }
291 BN_print(bp,c); 297 BN_print(bp,&c);
292 BIO_puts(bp,"\n"); 298 BIO_puts(bp,"\n");
293 } 299 }
300 BN_add(&c,&c,&b);
301 BN_sub(&c,&c,&a);
302 if(!BN_is_zero(&c))
303 {
304 BIO_puts(bp,"Subtract test failed!\n");
305 return 0;
306 }
294 } 307 }
295 BN_free(a); 308 BN_free(&a);
296 BN_free(b); 309 BN_free(&b);
297 BN_free(c); 310 BN_free(&c);
298 return(1); 311 return(1);
299 } 312 }
300 313
301int test_div(bp,ctx) 314int test_div(BIO *bp, BN_CTX *ctx)
302BIO *bp;
303BN_CTX *ctx;
304 { 315 {
305 BIGNUM *a,*b,*c,*d; 316 BIGNUM a,b,c,d,e;
306 int i; 317 int i;
307 int j; 318 int j;
308 319
309 a=BN_new(); 320 BN_init(&a);
310 b=BN_new(); 321 BN_init(&b);
311 c=BN_new(); 322 BN_init(&c);
312 d=BN_new(); 323 BN_init(&d);
324 BN_init(&e);
313 325
314 BN_rand(a,400,0,0); 326 BN_rand(&a,400,0,0);
315 for (i=0; i<100; i++) 327 for (i=0; i<100; i++)
316 { 328 {
317 BN_rand(b,50+i,0,0); 329 BN_rand(&b,50+i,0,0);
318 a->neg=rand_neg(); 330 a.neg=rand_neg();
319 b->neg=rand_neg(); 331 b.neg=rand_neg();
320 if (bp == NULL) 332 if (bp == NULL)
321 for (j=0; j<100; j++) 333 for (j=0; j<100; j++)
322 BN_div(d,c,a,b,ctx); 334 BN_div(&d,&c,&a,&b,ctx);
323 BN_div(d,c,a,b,ctx); 335 BN_div(&d,&c,&a,&b,ctx);
324 if (bp != NULL) 336 if (bp != NULL)
325 { 337 {
326 if (!results) 338 if (!results)
327 { 339 {
328 BN_print(bp,a); 340 BN_print(bp,&a);
329 BIO_puts(bp," / "); 341 BIO_puts(bp," / ");
330 BN_print(bp,b); 342 BN_print(bp,&b);
331 BIO_puts(bp," - "); 343 BIO_puts(bp," - ");
332 } 344 }
333 BN_print(bp,d); 345 BN_print(bp,&d);
334 BIO_puts(bp,"\n"); 346 BIO_puts(bp,"\n");
335 347
336 if (!results) 348 if (!results)
337 { 349 {
338 BN_print(bp,a); 350 BN_print(bp,&a);
339 BIO_puts(bp," % "); 351 BIO_puts(bp," % ");
340 BN_print(bp,b); 352 BN_print(bp,&b);
341 BIO_puts(bp," - "); 353 BIO_puts(bp," - ");
342 } 354 }
343 BN_print(bp,c); 355 BN_print(bp,&c);
344 BIO_puts(bp,"\n"); 356 BIO_puts(bp,"\n");
345 } 357 }
358 BN_mul(&e,&d,&b,ctx);
359 BN_add(&d,&e,&c);
360 BN_sub(&d,&d,&a);
361 if(!BN_is_zero(&d))
362 {
363 BIO_puts(bp,"Division test failed!\n");
364 return 0;
365 }
346 } 366 }
347 BN_free(a); 367 BN_free(&a);
348 BN_free(b); 368 BN_free(&b);
349 BN_free(c); 369 BN_free(&c);
350 BN_free(d); 370 BN_free(&d);
371 BN_free(&e);
351 return(1); 372 return(1);
352 } 373 }
353 374
354int test_mul(bp) 375int test_div_recp(BIO *bp, BN_CTX *ctx)
355BIO *bp;
356 { 376 {
357 BIGNUM *a,*b,*c; 377 BIGNUM a,b,c,d,e;
378 BN_RECP_CTX recp;
358 int i; 379 int i;
359 int j; 380 int j;
360 381
361 a=BN_new(); 382 BN_RECP_CTX_init(&recp);
362 b=BN_new(); 383 BN_init(&a);
363 c=BN_new(); 384 BN_init(&b);
385 BN_init(&c);
386 BN_init(&d);
387 BN_init(&e);
364 388
365 BN_rand(a,200,0,0); 389 BN_rand(&a,400,0,0);
366 for (i=0; i<100; i++) 390 for (i=0; i<100; i++)
367 { 391 {
368 BN_rand(b,250+i,0,0); 392 BN_rand(&b,50+i,0,0);
369 a->neg=rand_neg(); 393 a.neg=rand_neg();
370 b->neg=rand_neg(); 394 b.neg=rand_neg();
395 BN_RECP_CTX_set(&recp,&b,ctx);
371 if (bp == NULL) 396 if (bp == NULL)
372 for (j=0; j<100; j++) 397 for (j=0; j<100; j++)
373 BN_mul(c,a,b); 398 BN_div_recp(&d,&c,&a,&recp,ctx);
374 BN_mul(c,a,b); 399 BN_div_recp(&d,&c,&a,&recp,ctx);
375 if (bp != NULL) 400 if (bp != NULL)
376 { 401 {
377 if (!results) 402 if (!results)
378 { 403 {
379 BN_print(bp,a); 404 BN_print(bp,&a);
405 BIO_puts(bp," / ");
406 BN_print(bp,&b);
407 BIO_puts(bp," - ");
408 }
409 BN_print(bp,&d);
410 BIO_puts(bp,"\n");
411
412 if (!results)
413 {
414 BN_print(bp,&a);
415 BIO_puts(bp," % ");
416 BN_print(bp,&b);
417 BIO_puts(bp," - ");
418 }
419 BN_print(bp,&c);
420 BIO_puts(bp,"\n");
421 }
422 BN_mul(&e,&d,&b,ctx);
423 BN_add(&d,&e,&c);
424 BN_sub(&d,&d,&a);
425 if(!BN_is_zero(&d))
426 {
427 BIO_puts(bp,"Reciprocal division test failed!\n");
428 return 0;
429 }
430 }
431 BN_free(&a);
432 BN_free(&b);
433 BN_free(&c);
434 BN_free(&d);
435 BN_free(&e);
436 BN_RECP_CTX_free(&recp);
437 return(1);
438 }
439
440int test_mul(BIO *bp)
441 {
442 BIGNUM a,b,c,d,e;
443 int i;
444 int j;
445 BN_CTX ctx;
446
447 BN_CTX_init(&ctx);
448 BN_init(&a);
449 BN_init(&b);
450 BN_init(&c);
451 BN_init(&d);
452 BN_init(&e);
453
454 BN_rand(&a,200,0,0);
455 for (i=0; i<100; i++)
456 {
457 BN_rand(&b,250+i,0,0);
458 BN_rand(&b,200,0,0);
459 a.neg=rand_neg();
460 b.neg=rand_neg();
461 if (bp == NULL)
462 for (j=0; j<100; j++)
463 BN_mul(&c,&a,&b,&ctx);
464 BN_mul(&c,&a,&b,&ctx);
465 if (bp != NULL)
466 {
467 if (!results)
468 {
469 BN_print(bp,&a);
380 BIO_puts(bp," * "); 470 BIO_puts(bp," * ");
381 BN_print(bp,b); 471 BN_print(bp,&b);
382 BIO_puts(bp," - "); 472 BIO_puts(bp," - ");
383 } 473 }
384 BN_print(bp,c); 474 BN_print(bp,&c);
385 BIO_puts(bp,"\n"); 475 BIO_puts(bp,"\n");
386 } 476 }
477 BN_div(&d,&e,&c,&a,&ctx);
478 BN_sub(&d,&d,&b);
479 if(!BN_is_zero(&d) || !BN_is_zero(&e))
480 {
481 BIO_puts(bp,"Multiplication test failed!\n");
482 return 0;
483 }
387 } 484 }
388 BN_free(a); 485 BN_free(&a);
389 BN_free(b); 486 BN_free(&b);
390 BN_free(c); 487 BN_free(&c);
488 BN_free(&d);
489 BN_free(&e);
490 BN_CTX_free(&ctx);
391 return(1); 491 return(1);
392 } 492 }
393 493
394int test_sqr(bp,ctx) 494int test_sqr(BIO *bp, BN_CTX *ctx)
395BIO *bp;
396BN_CTX *ctx;
397 { 495 {
398 BIGNUM *a,*c; 496 BIGNUM a,c,d,e;
399 int i; 497 int i;
400 int j; 498 int j;
401 499
402 a=BN_new(); 500 BN_init(&a);
403 c=BN_new(); 501 BN_init(&c);
502 BN_init(&d);
503 BN_init(&e);
404 504
405 for (i=0; i<40; i++) 505 for (i=0; i<40; i++)
406 { 506 {
407 BN_rand(a,40+i*10,0,0); 507 BN_rand(&a,40+i*10,0,0);
408 a->neg=rand_neg(); 508 a.neg=rand_neg();
409 if (bp == NULL) 509 if (bp == NULL)
410 for (j=0; j<100; j++) 510 for (j=0; j<100; j++)
411 BN_sqr(c,a,ctx); 511 BN_sqr(&c,&a,ctx);
412 BN_sqr(c,a,ctx); 512 BN_sqr(&c,&a,ctx);
413 if (bp != NULL) 513 if (bp != NULL)
414 { 514 {
415 if (!results) 515 if (!results)
416 { 516 {
417 BN_print(bp,a); 517 BN_print(bp,&a);
418 BIO_puts(bp," * "); 518 BIO_puts(bp," * ");
419 BN_print(bp,a); 519 BN_print(bp,&a);
420 BIO_puts(bp," - "); 520 BIO_puts(bp," - ");
421 } 521 }
422 BN_print(bp,c); 522 BN_print(bp,&c);
423 BIO_puts(bp,"\n"); 523 BIO_puts(bp,"\n");
424 } 524 }
525 BN_div(&d,&e,&c,&a,ctx);
526 BN_sub(&d,&d,&a);
527 if(!BN_is_zero(&d) || !BN_is_zero(&e))
528 {
529 BIO_puts(bp,"Square test failed!\n");
530 return 0;
531 }
425 } 532 }
426 BN_free(a); 533 BN_free(&a);
427 BN_free(c); 534 BN_free(&c);
535 BN_free(&d);
536 BN_free(&e);
428 return(1); 537 return(1);
429 } 538 }
430 539
431int test_mont(bp,ctx) 540int test_mont(BIO *bp, BN_CTX *ctx)
432BIO *bp;
433BN_CTX *ctx;
434 { 541 {
435 BIGNUM *a,*b,*c,*A,*B; 542 BIGNUM a,b,c,d,A,B;
436 BIGNUM *n; 543 BIGNUM n;
437 int i; 544 int i;
438 int j; 545 int j;
439 BN_MONT_CTX *mont; 546 BN_MONT_CTX *mont;
440 547
441 a=BN_new(); 548 BN_init(&a);
442 b=BN_new(); 549 BN_init(&b);
443 c=BN_new(); 550 BN_init(&c);
444 A=BN_new(); 551 BN_init(&d);
445 B=BN_new(); 552 BN_init(&A);
446 n=BN_new(); 553 BN_init(&B);
554 BN_init(&n);
447 555
448 mont=BN_MONT_CTX_new(); 556 mont=BN_MONT_CTX_new();
449 557
450 BN_rand(a,100,0,0); /**/ 558 BN_rand(&a,100,0,0); /**/
451 BN_rand(b,100,0,0); /**/ 559 BN_rand(&b,100,0,0); /**/
452 for (i=0; i<10; i++) 560 for (i=0; i<10; i++)
453 { 561 {
454 BN_rand(n,(100%BN_BITS2+1)*BN_BITS2*i*BN_BITS2,0,1); /**/ 562 BN_rand(&n,(100%BN_BITS2+1)*BN_BITS2*i*BN_BITS2,0,1); /**/
455 BN_MONT_CTX_set(mont,n,ctx); 563 BN_MONT_CTX_set(mont,&n,ctx);
456 564
457 BN_to_montgomery(A,a,mont,ctx); 565 BN_to_montgomery(&A,&a,mont,ctx);
458 BN_to_montgomery(B,b,mont,ctx); 566 BN_to_montgomery(&B,&b,mont,ctx);
459 567
460 if (bp == NULL) 568 if (bp == NULL)
461 for (j=0; j<100; j++) 569 for (j=0; j<100; j++)
462 BN_mod_mul_montgomery(c,A,B,mont,ctx);/**/ 570 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
463 BN_mod_mul_montgomery(c,A,B,mont,ctx);/**/ 571 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
464 BN_from_montgomery(A,c,mont,ctx);/**/ 572 BN_from_montgomery(&A,&c,mont,ctx);/**/
465 if (bp != NULL) 573 if (bp != NULL)
466 { 574 {
467 if (!results) 575 if (!results)
468 { 576 {
469#ifdef undef 577#ifdef undef
470fprintf(stderr,"%d * %d %% %d\n", 578fprintf(stderr,"%d * %d %% %d\n",
471BN_num_bits(a), 579BN_num_bits(&a),
472BN_num_bits(b), 580BN_num_bits(&b),
473BN_num_bits(mont->N)); 581BN_num_bits(mont->N));
474#endif 582#endif
475 BN_print(bp,a); 583 BN_print(bp,&a);
476 BIO_puts(bp," * "); 584 BIO_puts(bp," * ");
477 BN_print(bp,b); 585 BN_print(bp,&b);
478 BIO_puts(bp," % "); 586 BIO_puts(bp," % ");
479 BN_print(bp,mont->N); 587 BN_print(bp,&(mont->N));
480 BIO_puts(bp," - "); 588 BIO_puts(bp," - ");
481 } 589 }
482 BN_print(bp,A); 590 BN_print(bp,&A);
483 BIO_puts(bp,"\n"); 591 BIO_puts(bp,"\n");
484 } 592 }
593 BN_mod_mul(&d,&a,&b,&n,ctx);
594 BN_sub(&d,&d,&A);
595 if(!BN_is_zero(&d))
596 {
597 BIO_puts(bp,"Montgomery multiplication test failed!\n");
598 return 0;
599 }
485 } 600 }
486 BN_MONT_CTX_free(mont); 601 BN_MONT_CTX_free(mont);
487 BN_free(a); 602 BN_free(&a);
488 BN_free(b); 603 BN_free(&b);
489 BN_free(c); 604 BN_free(&c);
605 BN_free(&d);
606 BN_free(&A);
607 BN_free(&B);
608 BN_free(&n);
490 return(1); 609 return(1);
491 } 610 }
492 611
493int test_mod(bp,ctx) 612int test_mod(BIO *bp, BN_CTX *ctx)
494BIO *bp;
495BN_CTX *ctx;
496 { 613 {
497 BIGNUM *a,*b,*c; 614 BIGNUM *a,*b,*c,*d,*e;
498 int i; 615 int i;
499 int j; 616 int j;
500 617
501 a=BN_new(); 618 a=BN_new();
502 b=BN_new(); 619 b=BN_new();
503 c=BN_new(); 620 c=BN_new();
621 d=BN_new();
622 e=BN_new();
504 623
505 BN_rand(a,1024,0,0); /**/ 624 BN_rand(a,1024,0,0); /**/
506 for (i=0; i<20; i++) 625 for (i=0; i<20; i++)
@@ -524,16 +643,23 @@ BN_CTX *ctx;
524 BN_print(bp,c); 643 BN_print(bp,c);
525 BIO_puts(bp,"\n"); 644 BIO_puts(bp,"\n");
526 } 645 }
646 BN_div(d,e,a,b,ctx);
647 BN_sub(e,e,c);
648 if(!BN_is_zero(e))
649 {
650 BIO_puts(bp,"Modulo test failed!\n");
651 return 0;
652 }
527 } 653 }
528 BN_free(a); 654 BN_free(a);
529 BN_free(b); 655 BN_free(b);
530 BN_free(c); 656 BN_free(c);
657 BN_free(d);
658 BN_free(e);
531 return(1); 659 return(1);
532 } 660 }
533 661
534int test_mod_mul(bp,ctx) 662int test_mod_mul(BIO *bp, BN_CTX *ctx)
535BIO *bp;
536BN_CTX *ctx;
537 { 663 {
538 BIGNUM *a,*b,*c,*d,*e; 664 BIGNUM *a,*b,*c,*d,*e;
539 int i; 665 int i;
@@ -578,6 +704,14 @@ BN_CTX *ctx;
578 BN_print(bp,e); 704 BN_print(bp,e);
579 BIO_puts(bp,"\n"); 705 BIO_puts(bp,"\n");
580 } 706 }
707 BN_mul(d,a,b,ctx);
708 BN_sub(d,d,e);
709 BN_div(a,b,d,c,ctx);
710 if(!BN_is_zero(b))
711 {
712 BIO_puts(bp,"Modulo multiply test failed!\n");
713 return 0;
714 }
581 } 715 }
582 BN_free(a); 716 BN_free(a);
583 BN_free(b); 717 BN_free(b);
@@ -587,9 +721,7 @@ BN_CTX *ctx;
587 return(1); 721 return(1);
588 } 722 }
589 723
590int test_mod_exp(bp,ctx) 724int test_mod_exp(BIO *bp, BN_CTX *ctx)
591BIO *bp;
592BN_CTX *ctx;
593 { 725 {
594 BIGNUM *a,*b,*c,*d,*e; 726 BIGNUM *a,*b,*c,*d,*e;
595 int i; 727 int i;
@@ -623,6 +755,14 @@ BN_CTX *ctx;
623 BN_print(bp,d); 755 BN_print(bp,d);
624 BIO_puts(bp,"\n"); 756 BIO_puts(bp,"\n");
625 } 757 }
758 BN_exp(e,a,b,ctx);
759 BN_sub(e,e,d);
760 BN_div(a,b,e,c,ctx);
761 if(!BN_is_zero(b))
762 {
763 BIO_puts(bp,"Modulo exponentiation test failed!\n");
764 return 0;
765 }
626 } 766 }
627 BN_free(a); 767 BN_free(a);
628 BN_free(b); 768 BN_free(b);
@@ -632,19 +772,74 @@ BN_CTX *ctx;
632 return(1); 772 return(1);
633 } 773 }
634 774
635int test_lshift(bp) 775int test_exp(BIO *bp, BN_CTX *ctx)
636BIO *bp;
637 { 776 {
638 BIGNUM *a,*b,*c; 777 BIGNUM *a,*b,*d,*e,*one;
639 int i; 778 int i;
640 779
641 a=BN_new(); 780 a=BN_new();
642 b=BN_new(); 781 b=BN_new();
782 d=BN_new();
783 e=BN_new();
784 one=BN_new();
785 BN_one(one);
786
787 for (i=0; i<6; i++)
788 {
789 BN_rand(a,20+i*5,0,0); /**/
790 BN_rand(b,2+i,0,0); /**/
791
792 if (!BN_exp(d,a,b,ctx))
793 return(00);
794
795 if (bp != NULL)
796 {
797 if (!results)
798 {
799 BN_print(bp,a);
800 BIO_puts(bp," ^ ");
801 BN_print(bp,b);
802 BIO_puts(bp," - ");
803 }
804 BN_print(bp,d);
805 BIO_puts(bp,"\n");
806 }
807 BN_one(e);
808 for( ; !BN_is_zero(b) ; BN_sub(b,b,one))
809 BN_mul(e,e,a,ctx);
810 BN_sub(e,e,d);
811 if(!BN_is_zero(e))
812 {
813 BIO_puts(bp,"Exponentiation test failed!\n");
814 return 0;
815 }
816 }
817 BN_free(a);
818 BN_free(b);
819 BN_free(d);
820 BN_free(e);
821 BN_free(one);
822 return(1);
823 }
824
825int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_)
826 {
827 BIGNUM *a,*b,*c,*d;
828 int i;
829
830 b=BN_new();
643 c=BN_new(); 831 c=BN_new();
832 d=BN_new();
644 BN_one(c); 833 BN_one(c);
645 834
646 BN_rand(a,200,0,0); /**/ 835 if(a_)
647 a->neg=rand_neg(); 836 a=a_;
837 else
838 {
839 a=BN_new();
840 BN_rand(a,200,0,0); /**/
841 a->neg=rand_neg();
842 }
648 for (i=0; i<70; i++) 843 for (i=0; i<70; i++)
649 { 844 {
650 BN_lshift(b,a,i+1); 845 BN_lshift(b,a,i+1);
@@ -661,21 +856,38 @@ BIO *bp;
661 BN_print(bp,b); 856 BN_print(bp,b);
662 BIO_puts(bp,"\n"); 857 BIO_puts(bp,"\n");
663 } 858 }
859 BN_mul(d,a,c,ctx);
860 BN_sub(d,d,b);
861 if(!BN_is_zero(d))
862 {
863 BIO_puts(bp,"Left shift test failed!\n");
864 BIO_puts(bp,"a=");
865 BN_print(bp,a);
866 BIO_puts(bp,"\nb=");
867 BN_print(bp,b);
868 BIO_puts(bp,"\nc=");
869 BN_print(bp,c);
870 BIO_puts(bp,"\nd=");
871 BN_print(bp,d);
872 BIO_puts(bp,"\n");
873 return 0;
874 }
664 } 875 }
665 BN_free(a); 876 BN_free(a);
666 BN_free(b); 877 BN_free(b);
667 BN_free(c); 878 BN_free(c);
879 BN_free(d);
668 return(1); 880 return(1);
669 } 881 }
670 882
671int test_lshift1(bp) 883int test_lshift1(BIO *bp)
672BIO *bp;
673 { 884 {
674 BIGNUM *a,*b; 885 BIGNUM *a,*b,*c;
675 int i; 886 int i;
676 887
677 a=BN_new(); 888 a=BN_new();
678 b=BN_new(); 889 b=BN_new();
890 c=BN_new();
679 891
680 BN_rand(a,200,0,0); /**/ 892 BN_rand(a,200,0,0); /**/
681 a->neg=rand_neg(); 893 a->neg=rand_neg();
@@ -693,22 +905,32 @@ BIO *bp;
693 BN_print(bp,b); 905 BN_print(bp,b);
694 BIO_puts(bp,"\n"); 906 BIO_puts(bp,"\n");
695 } 907 }
908 BN_add(c,a,a);
909 BN_sub(a,b,c);
910 if(!BN_is_zero(a))
911 {
912 BIO_puts(bp,"Left shift one test failed!\n");
913 return 0;
914 }
915
696 BN_copy(a,b); 916 BN_copy(a,b);
697 } 917 }
698 BN_free(a); 918 BN_free(a);
699 BN_free(b); 919 BN_free(b);
920 BN_free(c);
700 return(1); 921 return(1);
701 } 922 }
702 923
703int test_rshift(bp) 924int test_rshift(BIO *bp,BN_CTX *ctx)
704BIO *bp;
705 { 925 {
706 BIGNUM *a,*b,*c; 926 BIGNUM *a,*b,*c,*d,*e;
707 int i; 927 int i;
708 928
709 a=BN_new(); 929 a=BN_new();
710 b=BN_new(); 930 b=BN_new();
711 c=BN_new(); 931 c=BN_new();
932 d=BN_new();
933 e=BN_new();
712 BN_one(c); 934 BN_one(c);
713 935
714 BN_rand(a,200,0,0); /**/ 936 BN_rand(a,200,0,0); /**/
@@ -729,21 +951,30 @@ BIO *bp;
729 BN_print(bp,b); 951 BN_print(bp,b);
730 BIO_puts(bp,"\n"); 952 BIO_puts(bp,"\n");
731 } 953 }
954 BN_div(d,e,a,c,ctx);
955 BN_sub(d,d,b);
956 if(!BN_is_zero(d))
957 {
958 BIO_puts(bp,"Right shift test failed!\n");
959 return 0;
960 }
732 } 961 }
733 BN_free(a); 962 BN_free(a);
734 BN_free(b); 963 BN_free(b);
735 BN_free(c); 964 BN_free(c);
965 BN_free(d);
966 BN_free(e);
736 return(1); 967 return(1);
737 } 968 }
738 969
739int test_rshift1(bp) 970int test_rshift1(BIO *bp)
740BIO *bp;
741 { 971 {
742 BIGNUM *a,*b; 972 BIGNUM *a,*b,*c;
743 int i; 973 int i;
744 974
745 a=BN_new(); 975 a=BN_new();
746 b=BN_new(); 976 b=BN_new();
977 c=BN_new();
747 978
748 BN_rand(a,200,0,0); /**/ 979 BN_rand(a,200,0,0); /**/
749 a->neg=rand_neg(); 980 a->neg=rand_neg();
@@ -761,14 +992,22 @@ BIO *bp;
761 BN_print(bp,b); 992 BN_print(bp,b);
762 BIO_puts(bp,"\n"); 993 BIO_puts(bp,"\n");
763 } 994 }
995 BN_sub(c,a,b);
996 BN_sub(c,c,b);
997 if(!BN_is_zero(c) && !BN_is_one(c))
998 {
999 BIO_puts(bp,"Right shift one test failed!\n");
1000 return 0;
1001 }
764 BN_copy(a,b); 1002 BN_copy(a,b);
765 } 1003 }
766 BN_free(a); 1004 BN_free(a);
767 BN_free(b); 1005 BN_free(b);
1006 BN_free(c);
768 return(1); 1007 return(1);
769 } 1008 }
770 1009
771int rand_neg() 1010int rand_neg(void)
772 { 1011 {
773 static unsigned int neg=0; 1012 static unsigned int neg=0;
774 static int sign[8]={0,0,0,1,1,0,1,1}; 1013 static int sign[8]={0,0,0,1,1,0,1,1};
diff --git a/src/lib/libcrypto/bn/comba.pl b/src/lib/libcrypto/bn/comba.pl
new file mode 100644
index 0000000000..211a8b45c7
--- /dev/null
+++ b/src/lib/libcrypto/bn/comba.pl
@@ -0,0 +1,285 @@
1#!/usr/local/bin/perl
2
3$num=8;
4$num2=8/2;
5
6print <<"EOF";
7/* crypto/bn/bn_comba.c */
8#include <stdio.h>
9#include "bn_lcl.h"
10/* Auto generated from crypto/bn/comba.pl
11 */
12
13#undef bn_mul_comba8
14#undef bn_mul_comba4
15#undef bn_sqr_comba8
16#undef bn_sqr_comba4
17
18#ifdef BN_LLONG
19#define mul_add_c(a,b,c0,c1,c2) \\
20 t=(BN_ULLONG)a*b; \\
21 t1=(BN_ULONG)Lw(t); \\
22 t2=(BN_ULONG)Hw(t); \\
23 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
24 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
25
26#define mul_add_c2(a,b,c0,c1,c2) \\
27 t=(BN_ULLONG)a*b; \\
28 tt=(t+t)&BN_MASK; \\
29 if (tt < t) c2++; \\
30 t1=(BN_ULONG)Lw(tt); \\
31 t2=(BN_ULONG)Hw(tt); \\
32 c0=(c0+t1)&BN_MASK2; \\
33 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \\
34 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
35
36#define sqr_add_c(a,i,c0,c1,c2) \\
37 t=(BN_ULLONG)a[i]*a[i]; \\
38 t1=(BN_ULONG)Lw(t); \\
39 t2=(BN_ULONG)Hw(t); \\
40 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
41 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
42
43#define sqr_add_c2(a,i,j,c0,c1,c2) \\
44 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
45#else
46#define mul_add_c(a,b,c0,c1,c2) \\
47 t1=LBITS(a); t2=HBITS(a); \\
48 bl=LBITS(b); bh=HBITS(b); \\
49 mul64(t1,t2,bl,bh); \\
50 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
51 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
52
53#define mul_add_c2(a,b,c0,c1,c2) \\
54 t1=LBITS(a); t2=HBITS(a); \\
55 bl=LBITS(b); bh=HBITS(b); \\
56 mul64(t1,t2,bl,bh); \\
57 if (t2 & BN_TBIT) c2++; \\
58 t2=(t2+t2)&BN_MASK2; \\
59 if (t1 & BN_TBIT) t2++; \\
60 t1=(t1+t1)&BN_MASK2; \\
61 c0=(c0+t1)&BN_MASK2; \\
62 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \\
63 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
64
65#define sqr_add_c(a,i,c0,c1,c2) \\
66 sqr64(t1,t2,(a)[i]); \\
67 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
68 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
69
70#define sqr_add_c2(a,i,j,c0,c1,c2) \\
71 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
72#endif
73
74void bn_mul_comba${num}(r,a,b)
75BN_ULONG *r,*a,*b;
76 {
77#ifdef BN_LLONG
78 BN_ULLONG t;
79#else
80 BN_ULONG bl,bh;
81#endif
82 BN_ULONG t1,t2;
83 BN_ULONG c1,c2,c3;
84
85EOF
86$ret=&combas_mul("r","a","b",$num,"c1","c2","c3");
87printf <<"EOF";
88 }
89
90void bn_mul_comba${num2}(r,a,b)
91BN_ULONG *r,*a,*b;
92 {
93#ifdef BN_LLONG
94 BN_ULLONG t;
95#else
96 BN_ULONG bl,bh;
97#endif
98 BN_ULONG t1,t2;
99 BN_ULONG c1,c2,c3;
100
101EOF
102$ret=&combas_mul("r","a","b",$num2,"c1","c2","c3");
103printf <<"EOF";
104 }
105
106void bn_sqr_comba${num}(r,a)
107BN_ULONG *r,*a;
108 {
109#ifdef BN_LLONG
110 BN_ULLONG t,tt;
111#else
112 BN_ULONG bl,bh;
113#endif
114 BN_ULONG t1,t2;
115 BN_ULONG c1,c2,c3;
116
117EOF
118$ret=&combas_sqr("r","a",$num,"c1","c2","c3");
119printf <<"EOF";
120 }
121
122void bn_sqr_comba${num2}(r,a)
123BN_ULONG *r,*a;
124 {
125#ifdef BN_LLONG
126 BN_ULLONG t,tt;
127#else
128 BN_ULONG bl,bh;
129#endif
130 BN_ULONG t1,t2;
131 BN_ULONG c1,c2,c3;
132
133EOF
134$ret=&combas_sqr("r","a",$num2,"c1","c2","c3");
135printf <<"EOF";
136 }
137EOF
138
139sub bn_str
140 {
141 local($var,$val)=@_;
142 print "\t$var=$val;\n";
143 }
144
145sub bn_ary
146 {
147 local($var,$idx)=@_;
148 return("${var}[$idx]");
149 }
150
151sub bn_clr
152 {
153 local($var)=@_;
154
155 print "\t$var=0;\n";
156 }
157
158sub bn_mad
159 {
160 local($a,$b,$c0,$c1,$c2,$num)=@_;
161
162 if ($num == 2)
163 { printf("\tmul_add_c2($a,$b,$c0,$c1,$c2);\n"); }
164 else
165 { printf("\tmul_add_c($a,$b,$c0,$c1,$c2);\n"); }
166 }
167
168sub bn_sad
169 {
170 local($a,$i,$j,$c0,$c1,$c2,$num)=@_;
171
172 if ($num == 2)
173 { printf("\tsqr_add_c2($a,$i,$j,$c0,$c1,$c2);\n"); }
174 else
175 { printf("\tsqr_add_c($a,$i,$c0,$c1,$c2);\n"); }
176 }
177
178sub combas_mul
179 {
180 local($r,$a,$b,$num,$c0,$c1,$c2)=@_;
181 local($i,$as,$ae,$bs,$be,$ai,$bi);
182 local($tot,$end);
183
184 $as=0;
185 $ae=0;
186 $bs=0;
187 $be=0;
188 $tot=$num+$num-1;
189 &bn_clr($c0);
190 &bn_clr($c1);
191 for ($i=0; $i<$tot; $i++)
192 {
193 $ai=$as;
194 $bi=$bs;
195 $end=$be+1;
196 @numa=@numb=();
197
198#print "($as $ae) ($bs $be) $bs -> $end [$i $num]\n";
199 for ($j=$bs; $j<$end; $j++)
200 {
201 push(@numa,$ai);
202 push(@numb,$bi);
203 $ai--;
204 $bi++;
205 }
206
207 if ($i & 1)
208 {
209 @numa=reverse(@numa);
210 @numb=reverse(@numb);
211 }
212
213 &bn_clr($c2);
214 for ($j=0; $j<=$#numa; $j++)
215 {
216 &bn_mad(&bn_ary($a,$numa[$j]),
217 &bn_ary($b,$numb[$j]),$c0,$c1,$c2,1);
218 }
219 &bn_str(&bn_ary($r,$i),$c0);
220 ($c0,$c1,$c2)=($c1,$c2,$c0);
221
222 $as++ if ($i < ($num-1));
223 $ae++ if ($i >= ($num-1));
224
225 $bs++ if ($i >= ($num-1));
226 $be++ if ($i < ($num-1));
227 }
228 &bn_str(&bn_ary($r,$i),$c0);
229 }
230
231sub combas_sqr
232 {
233 local($r,$a,$num,$c0,$c1,$c2)=@_;
234 local($i,$as,$ae,$bs,$be,$ai,$bi);
235 local($b,$tot,$end,$half);
236
237 $b=$a;
238 $as=0;
239 $ae=0;
240 $bs=0;
241 $be=0;
242 $tot=$num+$num-1;
243 &bn_clr($c0);
244 &bn_clr($c1);
245 for ($i=0; $i<$tot; $i++)
246 {
247 $ai=$as;
248 $bi=$bs;
249 $end=$be+1;
250 @numa=@numb=();
251
252#print "($as $ae) ($bs $be) $bs -> $end [$i $num]\n";
253 for ($j=$bs; $j<$end; $j++)
254 {
255 push(@numa,$ai);
256 push(@numb,$bi);
257 $ai--;
258 $bi++;
259 last if ($ai < $bi);
260 }
261 if (!($i & 1))
262 {
263 @numa=reverse(@numa);
264 @numb=reverse(@numb);
265 }
266
267 &bn_clr($c2);
268 for ($j=0; $j <= $#numa; $j++)
269 {
270 if ($numa[$j] == $numb[$j])
271 {&bn_sad($a,$numa[$j],$numb[$j],$c0,$c1,$c2,1);}
272 else
273 {&bn_sad($a,$numa[$j],$numb[$j],$c0,$c1,$c2,2);}
274 }
275 &bn_str(&bn_ary($r,$i),$c0);
276 ($c0,$c1,$c2)=($c1,$c2,$c0);
277
278 $as++ if ($i < ($num-1));
279 $ae++ if ($i >= ($num-1));
280
281 $bs++ if ($i >= ($num-1));
282 $be++ if ($i < ($num-1));
283 }
284 &bn_str(&bn_ary($r,$i),$c0);
285 }
diff --git a/src/lib/libcrypto/bn/d.c b/src/lib/libcrypto/bn/d.c
new file mode 100644
index 0000000000..ced2291b25
--- /dev/null
+++ b/src/lib/libcrypto/bn/d.c
@@ -0,0 +1,72 @@
1#include <stdio.h>
2#include <openssl/bio.h>
3#include "bn_lcl.h"
4
5#define SIZE_A (100*4+4)
6#define SIZE_B (13*4)
7
8main(argc,argv)
9int argc;
10char *argv[];
11 {
12 BN_CTX ctx;
13 BN_RECP_CTX recp;
14 BIGNUM a,b,dd,d,r,rr,t,l;
15 int i;
16
17 MemCheck_start();
18 MemCheck_on();
19 BN_CTX_init(&ctx);
20 BN_RECP_CTX_init(&recp);
21
22 BN_init(&r);
23 BN_init(&rr);
24 BN_init(&d);
25 BN_init(&dd);
26 BN_init(&a);
27 BN_init(&b);
28
29 {
30 BN_rand(&a,SIZE_A,0,0);
31 BN_rand(&b,SIZE_B,0,0);
32
33 a.neg=1;
34 BN_RECP_CTX_set(&recp,&b,&ctx);
35
36 BN_print_fp(stdout,&a); printf(" a\n");
37 BN_print_fp(stdout,&b); printf(" b\n");
38
39 BN_print_fp(stdout,&recp.N); printf(" N\n");
40 BN_print_fp(stdout,&recp.Nr); printf(" Nr num_bits=%d\n",recp.num_bits);
41
42 BN_div_recp(&r,&d,&a,&recp,&ctx);
43
44for (i=0; i<300; i++)
45 BN_div(&rr,&dd,&a,&b,&ctx);
46
47 BN_print_fp(stdout,&r); printf(" div recp\n");
48 BN_print_fp(stdout,&rr); printf(" div\n");
49 BN_print_fp(stdout,&d); printf(" rem recp\n");
50 BN_print_fp(stdout,&dd); printf(" rem\n");
51 }
52 BN_CTX_free(&ctx);
53 BN_RECP_CTX_free(&recp);
54
55 BN_free(&r);
56 BN_free(&rr);
57 BN_free(&d);
58 BN_free(&dd);
59 BN_free(&a);
60 BN_free(&b);
61
62 {
63 BIO *out;
64
65 if ((out=BIO_new(BIO_s_file())) != NULL)
66 BIO_set_fp(out,stderr,BIO_NOCLOSE|BIO_FP_TEXT);
67
68 CRYPTO_mem_leaks(out);
69 BIO_free(out);
70 }
71
72 }
diff --git a/src/lib/libcrypto/bn/exp.c b/src/lib/libcrypto/bn/exp.c
new file mode 100644
index 0000000000..ec443459d8
--- /dev/null
+++ b/src/lib/libcrypto/bn/exp.c
@@ -0,0 +1,60 @@
1#include <stdio.h>
2#include <openssl/tmdiff.h>
3#include "bn_lcl.h"
4
5#define SIZE 256
6#define NUM (8*8*8)
7#define MOD (8*8*8*8*8)
8
9main(argc,argv)
10int argc;
11char *argv[];
12 {
13 BN_CTX ctx;
14 BIGNUM a,b,c,r,rr,t,l;
15 int j,i,size=SIZE,num=NUM,mod=MOD;
16 char *start,*end;
17 BN_MONT_CTX mont;
18 double d,md;
19
20 BN_MONT_CTX_init(&mont);
21 BN_CTX_init(&ctx);
22 BN_init(&a);
23 BN_init(&b);
24 BN_init(&c);
25 BN_init(&r);
26
27 start=ms_time_new();
28 end=ms_time_new();
29 while (size <= 1024*8)
30 {
31 BN_rand(&a,size,0,0);
32 BN_rand(&b,size,1,0);
33 BN_rand(&c,size,0,1);
34
35 BN_mod(&a,&a,&c,&ctx);
36
37 ms_time_get(start);
38 for (i=0; i<10; i++)
39 BN_MONT_CTX_set(&mont,&c,&ctx);
40 ms_time_get(end);
41 md=ms_time_diff(start,end);
42
43 ms_time_get(start);
44 for (i=0; i<num; i++)
45 {
46 /* bn_mull(&r,&a,&b,&ctx); */
47 /* BN_sqr(&r,&a,&ctx); */
48 BN_mod_exp_mont(&r,&a,&b,&c,&ctx,&mont);
49 }
50 ms_time_get(end);
51 d=ms_time_diff(start,end)/* *50/33 */;
52 printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n",size,
53 d,num,d/num,(int)((d/num)*mod),md/10.0);
54 num/=8;
55 mod/=8;
56 if (num <= 0) num=1;
57 size*=2;
58 }
59
60 }
diff --git a/src/lib/libcrypto/bn/expspeed.c b/src/lib/libcrypto/bn/expspeed.c
index 344f883d35..3656d5bb4c 100644
--- a/src/lib/libcrypto/bn/expspeed.c
+++ b/src/lib/libcrypto/bn/expspeed.c
@@ -66,14 +66,13 @@
66#include <stdlib.h> 66#include <stdlib.h>
67#include <signal.h> 67#include <signal.h>
68#include <string.h> 68#include <string.h>
69#include "crypto.h" 69#include <openssl/crypto.h>
70#include "err.h" 70#include <openssl/err.h>
71 71
72#ifndef MSDOS 72#if !defined(MSDOS) && (!defined(VMS) || defined(__DECC))
73#define TIMES 73#define TIMES
74#endif 74#endif
75 75
76#ifndef VMS
77#ifndef _IRIX 76#ifndef _IRIX
78#include <time.h> 77#include <time.h>
79#endif 78#endif
@@ -81,36 +80,33 @@
81#include <sys/types.h> 80#include <sys/types.h>
82#include <sys/times.h> 81#include <sys/times.h>
83#endif 82#endif
84#else /* VMS */ 83
85#include <types.h> 84/* Depending on the VMS version, the tms structure is perhaps defined.
86struct tms { 85 The __TMS macro will show if it was. If it wasn't defined, we should
87 time_t tms_utime; 86 undefine TIMES, since that tells the rest of the program how things
88 time_t tms_stime; 87 should be handled. -- Richard Levitte */
89 time_t tms_uchild; /* I dunno... */ 88#if defined(VMS) && defined(__DECC) && !defined(__TMS)
90 time_t tms_uchildsys; /* so these names are a guess :-) */ 89#undef TIMES
91 }
92#endif 90#endif
91
93#ifndef TIMES 92#ifndef TIMES
94#include <sys/timeb.h> 93#include <sys/timeb.h>
95#endif 94#endif
96 95
97#ifdef sun 96#if defined(sun) || defined(__ultrix)
97#define _POSIX_SOURCE
98#include <limits.h> 98#include <limits.h>
99#include <sys/param.h> 99#include <sys/param.h>
100#endif 100#endif
101 101
102#include "bn.h" 102#include <openssl/bn.h>
103#include "x509.h" 103#include <openssl/x509.h>
104 104
105/* The following if from times(3) man page. It may need to be changed */ 105/* The following if from times(3) man page. It may need to be changed */
106#ifndef HZ 106#ifndef HZ
107# ifndef CLK_TCK 107# ifndef CLK_TCK
108# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */ 108# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
109# ifndef VMS 109# define HZ 100.0
110# define HZ 100.0
111# else /* VMS */
112# define HZ 100.0
113# endif
114# else /* _BSD_CLK_TCK_ */ 110# else /* _BSD_CLK_TCK_ */
115# define HZ ((double)_BSD_CLK_TCK_) 111# define HZ ((double)_BSD_CLK_TCK_)
116# endif 112# endif
@@ -123,17 +119,11 @@ struct tms {
123#define BUFSIZE ((long)1024*8) 119#define BUFSIZE ((long)1024*8)
124int run=0; 120int run=0;
125 121
126#ifndef NOPROTO
127static double Time_F(int s); 122static double Time_F(int s);
128#else
129static double Time_F();
130#endif
131
132#define START 0 123#define START 0
133#define STOP 1 124#define STOP 1
134 125
135static double Time_F(s) 126static double Time_F(int s)
136int s;
137 { 127 {
138 double ret; 128 double ret;
139#ifdef TIMES 129#ifdef TIMES
@@ -176,9 +166,7 @@ static int mul_c[NUM_SIZES]={8*8*8*8*8,8*8*8*8,8*8*8,8*8,8,1};
176 166
177void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx); 167void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx);
178 168
179int main(argc,argv) 169int main(int argc, char **argv)
180int argc;
181char **argv;
182 { 170 {
183 BN_CTX *ctx; 171 BN_CTX *ctx;
184 BIGNUM *a,*b,*c,*r; 172 BIGNUM *a,*b,*c,*r;
@@ -192,12 +180,7 @@ char **argv;
192 do_mul_exp(r,a,b,c,ctx); 180 do_mul_exp(r,a,b,c,ctx);
193 } 181 }
194 182
195void do_mul_exp(r,a,b,c,ctx) 183void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
196BIGNUM *r;
197BIGNUM *a;
198BIGNUM *b;
199BIGNUM *c;
200BN_CTX *ctx;
201 { 184 {
202 int i,k; 185 int i,k;
203 double tm; 186 double tm;
diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c
index 67dc95d726..9e4ae91d20 100644
--- a/src/lib/libcrypto/bn/exptest.c
+++ b/src/lib/libcrypto/bn/exptest.c
@@ -59,30 +59,31 @@
59#include <stdio.h> 59#include <stdio.h>
60#include <stdlib.h> 60#include <stdlib.h>
61#include <string.h> 61#include <string.h>
62#include "bio.h" 62#include <openssl/bio.h>
63#include "bn.h" 63#include <openssl/bn.h>
64#include "rand.h" 64#include <openssl/rand.h>
65#include "err.h" 65#include <openssl/err.h>
66#ifdef WINDOWS 66#ifdef WINDOWS
67#include "../bio/bss_file.c" 67#include "../bio/bss_file.c"
68#endif 68#endif
69 69
70#define NUM_BITS (BN_BITS*2) 70#define NUM_BITS (BN_BITS*2)
71 71
72int main(argc,argv) 72int main(int argc, char *argv[])
73int argc;
74char *argv[];
75 { 73 {
76 BN_CTX *ctx; 74 BN_CTX *ctx;
77 BIO *out=NULL; 75 BIO *out=NULL;
78 int i,ret; 76 int i,ret;
79 unsigned char c; 77 unsigned char c;
80 BIGNUM *r_mont,*r_recp,*a,*b,*m; 78 BIGNUM *r_mont,*r_recp,*r_simple,*a,*b,*m;
79
80 ERR_load_BN_strings();
81 81
82 ctx=BN_CTX_new(); 82 ctx=BN_CTX_new();
83 if (ctx == NULL) exit(1); 83 if (ctx == NULL) exit(1);
84 r_mont=BN_new(); 84 r_mont=BN_new();
85 r_recp=BN_new(); 85 r_recp=BN_new();
86 r_simple=BN_new();
86 a=BN_new(); 87 a=BN_new();
87 b=BN_new(); 88 b=BN_new();
88 m=BN_new(); 89 m=BN_new();
@@ -114,29 +115,52 @@ char *argv[];
114 115
115 ret=BN_mod_exp_mont(r_mont,a,b,m,ctx,NULL); 116 ret=BN_mod_exp_mont(r_mont,a,b,m,ctx,NULL);
116 if (ret <= 0) 117 if (ret <= 0)
117 { printf("BN_mod_exp_mont() problems\n"); exit(1); } 118 {
119 printf("BN_mod_exp_mont() problems\n");
120 ERR_print_errors(out);
121 exit(1);
122 }
118 123
119 ret=BN_mod_exp_recp(r_recp,a,b,m,ctx); 124 ret=BN_mod_exp_recp(r_recp,a,b,m,ctx);
120 if (ret <= 0) 125 if (ret <= 0)
121 { printf("BN_mod_exp_recp() problems\n"); exit(1); }
122
123 if (BN_cmp(r_mont,r_recp) != 0)
124 { 126 {
125 printf("\nmont and recp results differ\n"); 127 printf("BN_mod_exp_recp() problems\n");
128 ERR_print_errors(out);
129 exit(1);
130 }
131
132 ret=BN_mod_exp_simple(r_simple,a,b,m,ctx);
133 if (ret <= 0)
134 {
135 printf("BN_mod_exp_simple() problems\n");
136 ERR_print_errors(out);
137 exit(1);
138 }
139
140 if (BN_cmp(r_simple, r_mont) == 0
141 && BN_cmp(r_simple,r_recp) == 0)
142 {
143 printf(".");
144 fflush(stdout);
145 }
146 else
147 {
148 if (BN_cmp(r_simple,r_mont) != 0)
149 printf("\nsimple and mont results differ\n");
150 if (BN_cmp(r_simple,r_recp) != 0)
151 printf("\nsimple and recp results differ\n");
152
126 printf("a (%3d) = ",BN_num_bits(a)); BN_print(out,a); 153 printf("a (%3d) = ",BN_num_bits(a)); BN_print(out,a);
127 printf("\nb (%3d) = ",BN_num_bits(b)); BN_print(out,b); 154 printf("\nb (%3d) = ",BN_num_bits(b)); BN_print(out,b);
128 printf("\nm (%3d) = ",BN_num_bits(m)); BN_print(out,m); 155 printf("\nm (%3d) = ",BN_num_bits(m)); BN_print(out,m);
156 printf("\nsimple ="); BN_print(out,r_simple);
129 printf("\nrecp ="); BN_print(out,r_recp); 157 printf("\nrecp ="); BN_print(out,r_recp);
130 printf("\nmont ="); BN_print(out,r_mont); 158 printf("\nmont ="); BN_print(out,r_mont);
131 printf("\n"); 159 printf("\n");
132 exit(1); 160 exit(1);
133 } 161 }
134 else
135 {
136 printf(".");
137 fflush(stdout);
138 }
139 } 162 }
163 CRYPTO_mem_leaks(out);
140 printf(" done\n"); 164 printf(" done\n");
141 exit(0); 165 exit(0);
142err: 166err:
diff --git a/src/lib/libcrypto/bn/new b/src/lib/libcrypto/bn/new
new file mode 100644
index 0000000000..285d506f19
--- /dev/null
+++ b/src/lib/libcrypto/bn/new
@@ -0,0 +1,23 @@
1void BN_RECP_CTX_init(BN_RECP_CTX *recp);
2BN_RECP_CTX *BN_RECP_CTX_new();
3void BN_RECP_CTX_free(BN_RECP_CTX *recp);
4int BN_RECP_CTX_set(BN_RECP_CTX *recp,BIGNUM *div,BN_CTX *ctx);
5
6int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
7 BN_RECP_CTX *recp,BN_CTX *ctx);
8
9int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BIGNUM *d,
10 BN_RECP_CTX *recp, BN_CTX *ctx);
11int BN_mod_recp(BIGNUM *rem, BIGNUM *m, BIGNUM *d,
12 BN_RECP_CTX *recp, BN_CTX *ctx);
13int BN_mod_mul_recp(BIGNUM *ret,BIGNUM *a,BIGNUM *b,BIGNUM *m
14
15int BN_mod_exp_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *p,
16 BN_MONT_CTX *m_ctx,BN_CTX *ctx);
17int BN_mod_exp2_montgomery(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2,
18 BIGNUM *p2,BN_MONT_CTX *m_ctx,BN_CTX *ctx);
19
20
21bn_div64 -> bn_div_words
22
23
diff --git a/src/lib/libcrypto/bn/test.c b/src/lib/libcrypto/bn/test.c
new file mode 100644
index 0000000000..a048b9f878
--- /dev/null
+++ b/src/lib/libcrypto/bn/test.c
@@ -0,0 +1,241 @@
1#include <stdio.h>
2#include "cryptlib.h"
3#include "bn_lcl.h"
4
5#define SIZE 32
6
7#define BN_MONT_CTX_set bn_mcs
8#define BN_from_montgomery bn_fm
9#define BN_mod_mul_montgomery bn_mmm
10#undef BN_to_montgomery
11#define BN_to_montgomery(r,a,mont,ctx) bn_mmm(\
12 r,a,(mont)->RR,(mont),ctx)
13
14main()
15 {
16 BIGNUM prime,a,b,r,A,B,R;
17 BN_MONT_CTX *mont;
18 BN_CTX *ctx;
19 int i;
20
21 ctx=BN_CTX_new();
22 BN_init(&prime);
23 BN_init(&a); BN_init(&b); BN_init(&r);
24 BN_init(&A); BN_init(&B); BN_init(&R);
25
26 BN_generate_prime(&prime,SIZE,0,NULL,NULL,NULL,NULL);
27 BN_rand(&A,SIZE,1,0);
28 BN_rand(&B,SIZE,1,0);
29 BN_mod(&A,&A,&prime,ctx);
30 BN_mod(&B,&B,&prime,ctx);
31
32 i=A.top;
33 BN_mul(&R,&A,&B,ctx);
34 BN_mask_bits(&R,i*BN_BITS2);
35
36
37 BN_print_fp(stdout,&A); printf(" <- a\n");
38 BN_print_fp(stdout,&B); printf(" <- b\n");
39 BN_mul_high(&r,&A,&B,&R,i);
40 BN_print_fp(stdout,&r); printf(" <- high(BA*DC)\n");
41
42 BN_mask_bits(&A,i*32);
43 BN_mask_bits(&B,i*32);
44
45 BN_mul(&R,&A,&B);
46 BN_rshift(&R,&R,i*32);
47 BN_print_fp(stdout,&R); printf(" <- norm BA*DC\n");
48 BN_sub(&R,&R,&r);
49 BN_print_fp(stdout,&R); printf(" <- diff\n");
50 }
51
52#if 0
53int bn_mul_high(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *low, int words)
54 {
55 int i;
56 BIGNUM t1,t2,t3,h,ah,al,bh,bl,m,s0,s1;
57
58 BN_init(&al); BN_init(&ah);
59 BN_init(&bl); BN_init(&bh);
60 BN_init(&t1); BN_init(&t2); BN_init(&t3);
61 BN_init(&s0); BN_init(&s1);
62 BN_init(&h); BN_init(&m);
63
64 i=a->top;
65 if (i >= words)
66 {
67 al.top=words;
68 ah.top=a->top-words;
69 ah.d= &(a->d[ah.top]);
70 }
71 else
72 al.top=i;
73 al.d=a->d;
74
75 i=b->top;
76 if (i >= words)
77 {
78 bl.top=words;
79 bh.top=i-words;
80 bh.d= &(b->d[bh.top]);
81 }
82 else
83 bl.top=i;
84 bl.d=b->d;
85
86 i=low->top;
87 if (i >= words)
88 {
89 s0.top=words;
90 s1.top=i-words;
91 s1.d= &(low->d[s1.top]);
92 }
93 else
94 s0.top=i;
95 s0.d=low->d;
96
97al.max=al.top; ah.max=ah.top;
98bl.max=bl.top; bh.max=bh.top;
99s0.max=bl.top; s1.max=bh.top;
100
101 /* Calculate (al-ah)*(bh-bl) */
102 BN_sub(&t1,&al,&ah);
103 BN_sub(&t2,&bh,&bl);
104 BN_mul(&m,&t1,&t2);
105
106 /* Calculate ah*bh */
107 BN_mul(&h,&ah,&bh);
108
109 /* s0 == low(al*bl)
110 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
111 * We know s0 and s1 so the only unknown is high(al*bl)
112 * high(al*bl) == s1 - low(ah*bh+(al-ah)*(bh-bl)+s0)
113 */
114 BN_add(&m,&m,&h);
115 BN_add(&t2,&m,&s0);
116 /* Quick and dirty mask off of high words */
117 t3.d=t2.d;
118 t3.top=(t2.top > words)?words:t2.top;
119 t3.neg=t2.neg;
120t3.max=t3.top;
121/* BN_print_fp(stdout,&s1); printf(" s1\n"); */
122/* BN_print_fp(stdout,&t2); printf(" middle value\n"); */
123/* BN_print_fp(stdout,&t3); printf(" low middle value\n"); */
124 BN_sub(&t1,&s1,&t3);
125
126 if (t1.neg)
127 {
128/*printf("neg fixup\n"); BN_print_fp(stdout,&t1); printf(" before\n"); */
129 BN_lshift(&t2,BN_value_one(),words*32);
130 BN_add(&t1,&t2,&t1);
131 BN_mask_bits(&t1,words*32);
132/* BN_print_fp(stdout,&t1); printf(" after\n"); */
133 }
134 /* al*bl == high(al*bl)<<words+s0 */
135 BN_lshift(&t1,&t1,words*32);
136 BN_add(&t1,&t1,&s0);
137
138 /* We now have
139 * al*bl - t1
140 * (al-ah)*(bh-bl)+ah*bh - m
141 * ah*bh - h
142 */
143 BN_copy(r,&t1);
144 BN_mask_bits(r,words*32*2);
145
146 /*BN_lshift(&m,&m,words*/
147
148 BN_free(&t1); BN_free(&t2);
149 BN_free(&m); BN_free(&h);
150 }
151
152int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_MONT_CTX *mont,
153 BN_CTX *ctx)
154 {
155 BIGNUM *tmp;
156
157 tmp= &(ctx->bn[ctx->tos++]);
158
159 if (a == b)
160 {
161 if (!BN_sqr(tmp,a,ctx)) goto err;
162 }
163 else
164 {
165 if (!BN_mul(tmp,a,b)) goto err;
166 }
167 /* reduce from aRR to aR */
168 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
169 ctx->tos--;
170 return(1);
171err:
172 return(0);
173 }
174
175int BN_from_montgomery(BIGNUM *r, BIGNUM *a, BN_MONT_CTX *mont, BN_CTX *ctx)
176 {
177 BIGNUM z1;
178 BIGNUM *t1,*t2;
179 BN_ULONG *ap,*bp,*rp;
180 int j,i,bl,al;
181
182 BN_init(&z1);
183 t1= &(ctx->bn[ctx->tos]);
184 t2= &(ctx->bn[ctx->tos+1]);
185
186 if (!BN_copy(t1,a)) goto err;
187 /* can cheat */
188 BN_mask_bits(t1,mont->ri);
189 if (!BN_mul(t2,t1,mont->Ni)) goto err;
190 BN_mask_bits(t2,mont->ri);
191
192 if (!BN_mul(t1,t2,mont->N)) goto err;
193 if (!BN_add(t2,t1,a)) goto err;
194
195 /* At this point, t2 has the bottom ri bits set to zero.
196 * This means that the bottom ri bits == the 1^ri minus the bottom
197 * ri bits of a.
198 * This means that only the bits above 'ri' in a need to be added,
199 * and XXXXXXXXXXXXXXXXXXXXXXXX
200 */
201BN_print_fp(stdout,t2); printf("\n");
202 BN_rshift(r,t2,mont->ri);
203
204 if (BN_ucmp(r,mont->N) >= 0)
205 BN_usub(r,r,mont->N);
206
207 return(1);
208err:
209 return(0);
210 }
211
212int BN_MONT_CTX_set(BN_MONT_CTX *mont, BIGNUM *mod, BN_CTX *ctx)
213 {
214 BIGNUM *Ri=NULL,*R=NULL;
215
216 if (mont->RR == NULL) mont->RR=BN_new();
217 if (mont->N == NULL) mont->N=BN_new();
218
219 R=mont->RR; /* grab RR as a temp */
220 BN_copy(mont->N,mod); /* Set N */
221
222 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
223 BN_lshift(R,BN_value_one(),mont->ri); /* R */
224 if ((Ri=BN_mod_inverse(NULL,R,mod,ctx)) == NULL) goto err;/* Ri */
225 BN_lshift(Ri,Ri,mont->ri); /* R*Ri */
226 BN_usub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */
227 BN_div(Ri,NULL,Ri,mod,ctx);
228 if (mont->Ni != NULL) BN_free(mont->Ni);
229 mont->Ni=Ri; /* Ni=(R*Ri-1)/N */
230
231 /* setup RR for conversions */
232 BN_lshift(mont->RR,BN_value_one(),mont->ri*2);
233 BN_mod(mont->RR,mont->RR,mont->N,ctx);
234
235 return(1);
236err:
237 return(0);
238 }
239
240
241#endif
diff --git a/src/lib/libcrypto/bn/todo b/src/lib/libcrypto/bn/todo
new file mode 100644
index 0000000000..e47e381aea
--- /dev/null
+++ b/src/lib/libcrypto/bn/todo
@@ -0,0 +1,3 @@
1Cache RECP_CTX values
2make the result argument independant of the inputs.
3split up the _exp_ functions
diff --git a/src/lib/libcrypto/bn/vms-helper.c b/src/lib/libcrypto/bn/vms-helper.c
new file mode 100644
index 0000000000..73af337069
--- /dev/null
+++ b/src/lib/libcrypto/bn/vms-helper.c
@@ -0,0 +1,66 @@
1/* vms-helper.c */
2/* ====================================================================
3 * Copyright (c) 1999 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include <stdio.h>
57#include "cryptlib.h"
58#include "bn_lcl.h"
59
60bn_div_words_abort(int i)
61{
62#if !defined(NO_STDIO) && !defined(WIN16)
63 fprintf(stderr,"Division would overflow (%d)\n",i);
64#endif
65 abort();
66}