summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/Makefile333
-rw-r--r--src/lib/libcrypto/bn/asm/README27
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.s3199
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.s.works533
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/add.pl119
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/div.pl144
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul.pl116
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl120
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl213
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl98
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl177
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sqr.pl113
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl109
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl132
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sub.pl108
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/add.pl118
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/div.pl144
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul.pl104
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_add.pl123
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_c4.pl215
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl98
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_c8.pl177
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sqr.pl113
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl109
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl132
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sub.pl108
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl593
-rw-r--r--src/lib/libcrypto/bn/asm/bn-alpha.pl571
-rw-r--r--src/lib/libcrypto/bn/asm/ca.pl33
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl286
-rw-r--r--src/lib/libcrypto/bn/asm/co-alpha.pl116
-rw-r--r--src/lib/libcrypto/bn/asm/ia64.S1560
-rw-r--r--src/lib/libcrypto/bn/asm/mips1.s539
-rw-r--r--src/lib/libcrypto/bn/asm/mips3.s2201
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc.s710
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2.s1618
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2W.s1605
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl2078
-rw-r--r--src/lib/libcrypto/bn/asm/r3000.s646
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8.S1458
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8plus.S1547
-rw-r--r--src/lib/libcrypto/bn/asm/vms.mar6440
-rw-r--r--src/lib/libcrypto/bn/asm/x86.pl28
-rw-r--r--src/lib/libcrypto/bn/asm/x86/add.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl277
-rw-r--r--src/lib/libcrypto/bn/asm/x86/div.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/x86/f3
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul.pl77
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul_add.pl87
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sqr.pl60
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sub.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gcc.c593
-rw-r--r--src/lib/libcrypto/bn/bn.h583
-rw-r--r--src/lib/libcrypto/bn/bn.mul19
-rw-r--r--src/lib/libcrypto/bn/bn_add.c309
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c832
-rw-r--r--src/lib/libcrypto/bn/bn_blind.c144
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c155
-rw-r--r--src/lib/libcrypto/bn/bn_div.c387
-rw-r--r--src/lib/libcrypto/bn/bn_err.c139
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c987
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c313
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c490
-rw-r--r--src/lib/libcrypto/bn/bn_kron.c182
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h492
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c824
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c296
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c421
-rw-r--r--src/lib/libcrypto/bn/bn_mpi.c129
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c802
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c468
-rw-r--r--src/lib/libcrypto/bn/bn_prime.h325
-rw-r--r--src/lib/libcrypto/bn/bn_prime.pl117
-rw-r--r--src/lib/libcrypto/bn/bn_print.c333
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c291
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c230
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c205
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c288
-rw-r--r--src/lib/libcrypto/bn/bn_sqrt.c387
-rw-r--r--src/lib/libcrypto/bn/bn_word.c208
-rw-r--r--src/lib/libcrypto/bn/bn_x931p.c282
-rw-r--r--src/lib/libcrypto/bn/bnspeed.c233
-rw-r--r--src/lib/libcrypto/bn/bntest.c1290
-rw-r--r--src/lib/libcrypto/bn/divtest.c41
-rw-r--r--src/lib/libcrypto/bn/exp.c62
-rw-r--r--src/lib/libcrypto/bn/expspeed.c353
-rw-r--r--src/lib/libcrypto/bn/exptest.c201
-rw-r--r--src/lib/libcrypto/bn/todo3
-rw-r--r--src/lib/libcrypto/bn/vms-helper.c68
89 files changed, 43164 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/Makefile b/src/lib/libcrypto/bn/Makefile
new file mode 100644
index 0000000000..9969d242cc
--- /dev/null
+++ b/src/lib/libcrypto/bn/Makefile
@@ -0,0 +1,333 @@
1#
2# OpenSSL/crypto/bn/Makefile
3#
4
5DIR= bn
6TOP= ../..
7CC= cc
8CPP= $(CC) -E
9INCLUDES= -I.. -I$(TOP) -I../../include
10CFLAG=-g
11INSTALL_PREFIX=
12OPENSSLDIR= /usr/local/ssl
13INSTALLTOP=/usr/local/ssl
14MAKEDEPPROG= makedepend
15MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG)
16MAKEFILE= Makefile
17AR= ar r
18
19BN_ASM= bn_asm.o
20# or use
21#BN_ASM= bn86-elf.o
22
23CFLAGS= $(INCLUDES) $(CFLAG)
24ASFLAGS= $(INCLUDES) $(ASFLAG)
25
26GENERAL=Makefile
27TEST=bntest.c exptest.c
28APPS=
29
30LIB=$(TOP)/libcrypto.a
31LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
32 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
33 bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
34 bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_x931p.c
35
36LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
37 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
38 bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
39 bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_x931p.o
40
41SRC= $(LIBSRC)
42
43EXHEADER= bn.h
44HEADER= bn_lcl.h bn_prime.h $(EXHEADER)
45
46ALL= $(GENERAL) $(SRC) $(HEADER)
47
48top:
49 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
50
51all: lib
52
53bn_prime.h: bn_prime.pl
54 $(PERL) bn_prime.pl >bn_prime.h
55
56divtest: divtest.c ../../libcrypto.a
57 cc -I../../include divtest.c -o divtest ../../libcrypto.a
58
59bnbug: bnbug.c ../../libcrypto.a top
60 cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a
61
62lib: $(LIBOBJ)
63 $(AR) $(LIB) $(LIBOBJ)
64 $(RANLIB) $(LIB) || echo Never mind.
65 @touch lib
66
67# elf
68asm/bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl
69 (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > bn86-elf.s)
70
71asm/co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl
72 (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > co86-elf.s)
73
74# a.out
75asm/bn86-out.o: asm/bn86unix.cpp
76 $(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o
77
78asm/co86-out.o: asm/co86unix.cpp
79 $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o
80
81# bsdi
82asm/bn86bsdi.o: asm/bn86unix.cpp
83 $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o
84
85asm/co86bsdi.o: asm/co86unix.cpp
86 $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o
87
88asm/bn86unix.cpp: asm/bn-586.pl ../perlasm/x86asm.pl
89 (cd asm; $(PERL) bn-586.pl cpp >bn86unix.cpp )
90
91asm/co86unix.cpp: asm/co-586.pl ../perlasm/x86asm.pl
92 (cd asm; $(PERL) co-586.pl cpp >co86unix.cpp )
93
94asm/sparcv8.o: asm/sparcv8.S
95
96asm/sparcv8plus.o: asm/sparcv8plus.S
97
98# Old GNU assembler doesn't understand V9 instructions, so we
99# hire /usr/ccs/bin/as to do the job. Note that option is called
100# *-gcc27, but even gcc 2>=8 users may experience similar problem
101# if they didn't bother to upgrade GNU assembler. Such users should
102# not choose this option, but be adviced to *remove* GNU assembler
103# or upgrade it.
104asm/sparcv8plus-gcc27.o: asm/sparcv8plus.S
105 $(CC) $(ASFLAGS) -E asm/sparcv8plus.S | \
106 /usr/ccs/bin/as -xarch=v8plus - -o asm/sparcv8plus-gcc27.o
107
108
109asm/ia64.o: asm/ia64.S
110
111# Some compiler drivers (most notably HP-UX and Intel C++) don't
112# understand .S extension:-( I wish I could pipe output from cc -E,
113# but it's too compiler driver/ABI dependent to cover with a single
114# rule... <appro@fy.chalmers.se>
115asm/ia64-cpp.o: asm/ia64.S
116 $(CC) $(ASFLAGS) -E asm/ia64.S > /tmp/ia64.$$$$.s && \
117 $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \
118 rm -f /tmp/ia64.$$$$.s
119
120asm/x86_64-gcc.o: asm/x86_64-gcc.c
121 $(CC) $(ASFLAGS) -c -o $@ $<
122
123asm/pa-risc2W.o: asm/pa-risc2W.s
124 /usr/ccs/bin/as -o asm/pa-risc2W.o asm/pa-risc2W.s
125
126asm/linux_ppc32.s: asm/ppc.pl; $(PERL) $< $@
127asm/linux_ppc64.s: asm/ppc.pl; $(PERL) $< $@
128asm/aix_ppc32.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@
129asm/aix_ppc64.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@
130asm/osx_ppc32.s: asm/ppc.pl; $(PERL) $< $@
131
132files:
133 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
134
135links:
136 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
137 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
138 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
139
140install:
141 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
142 do \
143 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
144 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
145 done;
146
147exptest:
148 rm -f exptest
149 gcc -I../../include -g2 -ggdb -o exptest exptest.c ../../libcrypto.a
150
151div:
152 rm -f a.out
153 gcc -I.. -g div.c ../../libcrypto.a
154
155tags:
156 ctags $(SRC)
157
158tests:
159
160lint:
161 lint -DLINT $(INCLUDES) $(SRC)>fluff
162
163depend:
164 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
165
166dclean:
167 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
168 mv -f Makefile.new $(MAKEFILE)
169
170clean:
171 rm -f asm/co86unix.cpp asm/bn86unix.cpp asm/*-elf.* *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s
172
173# DO NOT DELETE THIS LINE -- make depend depends on it.
174
175bn_add.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
176bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
177bn_add.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
178bn_add.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
179bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
180bn_add.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
181bn_add.o: ../cryptlib.h bn_add.c bn_lcl.h
182bn_asm.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
183bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
184bn_asm.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
185bn_asm.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
186bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
187bn_asm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
188bn_asm.o: ../cryptlib.h bn_asm.c bn_lcl.h
189bn_blind.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
190bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
191bn_blind.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
192bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
193bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
194bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
195bn_blind.o: ../cryptlib.h bn_blind.c bn_lcl.h
196bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
197bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
198bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
199bn_ctx.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
200bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
201bn_ctx.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
202bn_ctx.o: ../cryptlib.h bn_ctx.c bn_lcl.h
203bn_div.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
204bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
205bn_div.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
206bn_div.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
207bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
208bn_div.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
209bn_div.o: ../cryptlib.h bn_div.c bn_lcl.h
210bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
211bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
212bn_err.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
213bn_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
214bn_err.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
215bn_err.o: ../../include/openssl/symhacks.h bn_err.c
216bn_exp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
217bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
218bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
219bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
220bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
221bn_exp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
222bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h
223bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
224bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
225bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
226bn_exp2.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
227bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
228bn_exp2.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
229bn_exp2.o: ../cryptlib.h bn_exp2.c bn_lcl.h
230bn_gcd.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
231bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
232bn_gcd.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
233bn_gcd.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
234bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
235bn_gcd.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
236bn_gcd.o: ../cryptlib.h bn_gcd.c bn_lcl.h
237bn_kron.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h
238bn_kron.o: ../../include/openssl/opensslconf.h bn_kron.c bn_lcl.h
239bn_lib.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
240bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
241bn_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
242bn_lib.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
243bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
244bn_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
245bn_lib.o: ../cryptlib.h bn_lcl.h bn_lib.c
246bn_mod.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
247bn_mod.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
248bn_mod.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
249bn_mod.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
250bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
251bn_mod.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
252bn_mod.o: ../cryptlib.h bn_lcl.h bn_mod.c
253bn_mont.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
254bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
255bn_mont.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
256bn_mont.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
257bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
258bn_mont.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
259bn_mont.o: ../cryptlib.h bn_lcl.h bn_mont.c
260bn_mpi.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
261bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
262bn_mpi.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
263bn_mpi.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
264bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
265bn_mpi.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
266bn_mpi.o: ../cryptlib.h bn_lcl.h bn_mpi.c
267bn_mul.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
268bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
269bn_mul.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
270bn_mul.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
271bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
272bn_mul.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
273bn_mul.o: ../cryptlib.h bn_lcl.h bn_mul.c
274bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
275bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
276bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
277bn_prime.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
278bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
279bn_prime.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
280bn_prime.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
281bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.c bn_prime.h
282bn_print.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
283bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
284bn_print.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
285bn_print.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
286bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
287bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
288bn_print.o: ../cryptlib.h bn_lcl.h bn_print.c
289bn_rand.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
290bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
291bn_rand.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
292bn_rand.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
293bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
294bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
295bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
296bn_rand.o: ../cryptlib.h bn_lcl.h bn_rand.c
297bn_recp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
298bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
299bn_recp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
300bn_recp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
301bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
302bn_recp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
303bn_recp.o: ../cryptlib.h bn_lcl.h bn_recp.c
304bn_shift.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
305bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
306bn_shift.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
307bn_shift.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
308bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
309bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
310bn_shift.o: ../cryptlib.h bn_lcl.h bn_shift.c
311bn_sqr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
312bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
313bn_sqr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
314bn_sqr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
315bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
316bn_sqr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
317bn_sqr.o: ../cryptlib.h bn_lcl.h bn_sqr.c
318bn_sqrt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
319bn_sqrt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
320bn_sqrt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
321bn_sqrt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
322bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
323bn_sqrt.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
324bn_sqrt.o: ../cryptlib.h bn_lcl.h bn_sqrt.c
325bn_word.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
326bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
327bn_word.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
328bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
329bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
330bn_word.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
331bn_word.o: ../cryptlib.h bn_lcl.h bn_word.c
332bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h
333bn_x931p.o: ../../include/openssl/opensslconf.h bn_x931p.c
diff --git a/src/lib/libcrypto/bn/asm/README b/src/lib/libcrypto/bn/asm/README
new file mode 100644
index 0000000000..b0f3a68a06
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/README
@@ -0,0 +1,27 @@
1<OBSOLETE>
2
3All assember in this directory are just version of the file
4crypto/bn/bn_asm.c.
5
6Quite a few of these files are just the assember output from gcc since on
7quite a few machines they are 2 times faster than the system compiler.
8
9For the x86, I have hand written assember because of the bad job all
10compilers seem to do on it. This normally gives a 2 time speed up in the RSA
11routines.
12
13For the DEC alpha, I also hand wrote the assember (except the division which
14is just the output from the C compiler pasted on the end of the file).
15On the 2 alpha C compilers I had access to, it was not possible to do
1664b x 64b -> 128b calculations (both long and the long long data types
17were 64 bits). So the hand assember gives access to the 128 bit result and
18a 2 times speedup :-).
19
20There are 3 versions of assember for the HP PA-RISC.
21
22pa-risc.s is the origional one which works fine and generated using gcc :-)
23
24pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations
25by Chris Ruemmler from HP (with some help from the HP C compiler).
26
27</OBSOLETE>
diff --git a/src/lib/libcrypto/bn/asm/alpha.s b/src/lib/libcrypto/bn/asm/alpha.s
new file mode 100644
index 0000000000..555ff0b92d
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.s
@@ -0,0 +1,3199 @@
1 # DEC Alpha assember
2 # The bn_div_words is actually gcc output but the other parts are hand done.
3 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
4 # bn_div_words.
5 # I've gone back and re-done most of routines.
6 # The key thing to remeber for the 164 CPU is that while a
7 # multiply operation takes 8 cycles, another one can only be issued
8 # after 4 cycles have elapsed. I've done modification to help
9 # improve this. Also, normally, a ld instruction will not be available
10 # for about 3 cycles.
11 .file 1 "bn_asm.c"
12 .set noat
13gcc2_compiled.:
14__gnu_compiled_c:
15 .text
16 .align 3
17 .globl bn_mul_add_words
18 .ent bn_mul_add_words
19bn_mul_add_words:
20bn_mul_add_words..ng:
21 .frame $30,0,$26,0
22 .prologue 0
23 .align 5
24 subq $18,4,$18
25 bis $31,$31,$0
26 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
27 ldq $20,0($17) # 1 1
28 ldq $1,0($16) # 1 1
29 .align 3
30$42:
31 mulq $20,$19,$5 # 1 2 1 ######
32 ldq $21,8($17) # 2 1
33 ldq $2,8($16) # 2 1
34 umulh $20,$19,$20 # 1 2 ######
35 ldq $27,16($17) # 3 1
36 ldq $3,16($16) # 3 1
37 mulq $21,$19,$6 # 2 2 1 ######
38 ldq $28,24($17) # 4 1
39 addq $1,$5,$1 # 1 2 2
40 ldq $4,24($16) # 4 1
41 umulh $21,$19,$21 # 2 2 ######
42 cmpult $1,$5,$22 # 1 2 3 1
43 addq $20,$22,$20 # 1 3 1
44 addq $1,$0,$1 # 1 2 3 1
45 mulq $27,$19,$7 # 3 2 1 ######
46 cmpult $1,$0,$0 # 1 2 3 2
47 addq $2,$6,$2 # 2 2 2
48 addq $20,$0,$0 # 1 3 2
49 cmpult $2,$6,$23 # 2 2 3 1
50 addq $21,$23,$21 # 2 3 1
51 umulh $27,$19,$27 # 3 2 ######
52 addq $2,$0,$2 # 2 2 3 1
53 cmpult $2,$0,$0 # 2 2 3 2
54 subq $18,4,$18
55 mulq $28,$19,$8 # 4 2 1 ######
56 addq $21,$0,$0 # 2 3 2
57 addq $3,$7,$3 # 3 2 2
58 addq $16,32,$16
59 cmpult $3,$7,$24 # 3 2 3 1
60 stq $1,-32($16) # 1 2 4
61 umulh $28,$19,$28 # 4 2 ######
62 addq $27,$24,$27 # 3 3 1
63 addq $3,$0,$3 # 3 2 3 1
64 stq $2,-24($16) # 2 2 4
65 cmpult $3,$0,$0 # 3 2 3 2
66 stq $3,-16($16) # 3 2 4
67 addq $4,$8,$4 # 4 2 2
68 addq $27,$0,$0 # 3 3 2
69 cmpult $4,$8,$25 # 4 2 3 1
70 addq $17,32,$17
71 addq $28,$25,$28 # 4 3 1
72 addq $4,$0,$4 # 4 2 3 1
73 cmpult $4,$0,$0 # 4 2 3 2
74 stq $4,-8($16) # 4 2 4
75 addq $28,$0,$0 # 4 3 2
76 blt $18,$43
77
78 ldq $20,0($17) # 1 1
79 ldq $1,0($16) # 1 1
80
81 br $42
82
83 .align 4
84$45:
85 ldq $20,0($17) # 4 1
86 ldq $1,0($16) # 4 1
87 mulq $20,$19,$5 # 4 2 1
88 subq $18,1,$18
89 addq $16,8,$16
90 addq $17,8,$17
91 umulh $20,$19,$20 # 4 2
92 addq $1,$5,$1 # 4 2 2
93 cmpult $1,$5,$22 # 4 2 3 1
94 addq $20,$22,$20 # 4 3 1
95 addq $1,$0,$1 # 4 2 3 1
96 cmpult $1,$0,$0 # 4 2 3 2
97 addq $20,$0,$0 # 4 3 2
98 stq $1,-8($16) # 4 2 4
99 bgt $18,$45
100 ret $31,($26),1 # else exit
101
102 .align 4
103$43:
104 addq $18,4,$18
105 bgt $18,$45 # goto tail code
106 ret $31,($26),1 # else exit
107
108 .end bn_mul_add_words
109 .align 3
110 .globl bn_mul_words
111 .ent bn_mul_words
112bn_mul_words:
113bn_mul_words..ng:
114 .frame $30,0,$26,0
115 .prologue 0
116 .align 5
117 subq $18,4,$18
118 bis $31,$31,$0
119 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
120 ldq $20,0($17) # 1 1
121 .align 3
122$142:
123
124 mulq $20,$19,$5 # 1 2 1 #####
125 ldq $21,8($17) # 2 1
126 ldq $27,16($17) # 3 1
127 umulh $20,$19,$20 # 1 2 #####
128 ldq $28,24($17) # 4 1
129 mulq $21,$19,$6 # 2 2 1 #####
130 addq $5,$0,$5 # 1 2 3 1
131 subq $18,4,$18
132 cmpult $5,$0,$0 # 1 2 3 2
133 umulh $21,$19,$21 # 2 2 #####
134 addq $20,$0,$0 # 1 3 2
135 addq $17,32,$17
136 addq $6,$0,$6 # 2 2 3 1
137 mulq $27,$19,$7 # 3 2 1 #####
138 cmpult $6,$0,$0 # 2 2 3 2
139 addq $21,$0,$0 # 2 3 2
140 addq $16,32,$16
141 umulh $27,$19,$27 # 3 2 #####
142 stq $5,-32($16) # 1 2 4
143 mulq $28,$19,$8 # 4 2 1 #####
144 addq $7,$0,$7 # 3 2 3 1
145 stq $6,-24($16) # 2 2 4
146 cmpult $7,$0,$0 # 3 2 3 2
147 umulh $28,$19,$28 # 4 2 #####
148 addq $27,$0,$0 # 3 3 2
149 stq $7,-16($16) # 3 2 4
150 addq $8,$0,$8 # 4 2 3 1
151 cmpult $8,$0,$0 # 4 2 3 2
152
153 addq $28,$0,$0 # 4 3 2
154
155 stq $8,-8($16) # 4 2 4
156
157 blt $18,$143
158
159 ldq $20,0($17) # 1 1
160
161 br $142
162
163 .align 4
164$145:
165 ldq $20,0($17) # 4 1
166 mulq $20,$19,$5 # 4 2 1
167 subq $18,1,$18
168 umulh $20,$19,$20 # 4 2
169 addq $5,$0,$5 # 4 2 3 1
170 addq $16,8,$16
171 cmpult $5,$0,$0 # 4 2 3 2
172 addq $17,8,$17
173 addq $20,$0,$0 # 4 3 2
174 stq $5,-8($16) # 4 2 4
175
176 bgt $18,$145
177 ret $31,($26),1 # else exit
178
179 .align 4
180$143:
181 addq $18,4,$18
182 bgt $18,$145 # goto tail code
183 ret $31,($26),1 # else exit
184
185 .end bn_mul_words
186 .align 3
187 .globl bn_sqr_words
188 .ent bn_sqr_words
189bn_sqr_words:
190bn_sqr_words..ng:
191 .frame $30,0,$26,0
192 .prologue 0
193
194 subq $18,4,$18
195 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
196 ldq $20,0($17) # 1 1
197 .align 3
198$542:
199 mulq $20,$20,$5 ######
200 ldq $21,8($17) # 1 1
201 subq $18,4
202 umulh $20,$20,$1 ######
203 ldq $27,16($17) # 1 1
204 mulq $21,$21,$6 ######
205 ldq $28,24($17) # 1 1
206 stq $5,0($16) # r[0]
207 umulh $21,$21,$2 ######
208 stq $1,8($16) # r[1]
209 mulq $27,$27,$7 ######
210 stq $6,16($16) # r[0]
211 umulh $27,$27,$3 ######
212 stq $2,24($16) # r[1]
213 mulq $28,$28,$8 ######
214 stq $7,32($16) # r[0]
215 umulh $28,$28,$4 ######
216 stq $3,40($16) # r[1]
217
218 addq $16,64,$16
219 addq $17,32,$17
220 stq $8,-16($16) # r[0]
221 stq $4,-8($16) # r[1]
222
223 blt $18,$543
224 ldq $20,0($17) # 1 1
225 br $542
226
227$442:
228 ldq $20,0($17) # a[0]
229 mulq $20,$20,$5 # a[0]*w low part r2
230 addq $16,16,$16
231 addq $17,8,$17
232 subq $18,1,$18
233 umulh $20,$20,$1 # a[0]*w high part r3
234 stq $5,-16($16) # r[0]
235 stq $1,-8($16) # r[1]
236
237 bgt $18,$442
238 ret $31,($26),1 # else exit
239
240 .align 4
241$543:
242 addq $18,4,$18
243 bgt $18,$442 # goto tail code
244 ret $31,($26),1 # else exit
245 .end bn_sqr_words
246
247 .align 3
248 .globl bn_add_words
249 .ent bn_add_words
250bn_add_words:
251bn_add_words..ng:
252 .frame $30,0,$26,0
253 .prologue 0
254
255 subq $19,4,$19
256 bis $31,$31,$0 # carry = 0
257 blt $19,$900
258 ldq $5,0($17) # a[0]
259 ldq $1,0($18) # b[1]
260 .align 3
261$901:
262 addq $1,$5,$1 # r=a+b;
263 ldq $6,8($17) # a[1]
264 cmpult $1,$5,$22 # did we overflow?
265 ldq $2,8($18) # b[1]
266 addq $1,$0,$1 # c+= overflow
267 ldq $7,16($17) # a[2]
268 cmpult $1,$0,$0 # overflow?
269 ldq $3,16($18) # b[2]
270 addq $0,$22,$0
271 ldq $8,24($17) # a[3]
272 addq $2,$6,$2 # r=a+b;
273 ldq $4,24($18) # b[3]
274 cmpult $2,$6,$23 # did we overflow?
275 addq $3,$7,$3 # r=a+b;
276 addq $2,$0,$2 # c+= overflow
277 cmpult $3,$7,$24 # did we overflow?
278 cmpult $2,$0,$0 # overflow?
279 addq $4,$8,$4 # r=a+b;
280 addq $0,$23,$0
281 cmpult $4,$8,$25 # did we overflow?
282 addq $3,$0,$3 # c+= overflow
283 stq $1,0($16) # r[0]=c
284 cmpult $3,$0,$0 # overflow?
285 stq $2,8($16) # r[1]=c
286 addq $0,$24,$0
287 stq $3,16($16) # r[2]=c
288 addq $4,$0,$4 # c+= overflow
289 subq $19,4,$19 # loop--
290 cmpult $4,$0,$0 # overflow?
291 addq $17,32,$17 # a++
292 addq $0,$25,$0
293 stq $4,24($16) # r[3]=c
294 addq $18,32,$18 # b++
295 addq $16,32,$16 # r++
296
297 blt $19,$900
298 ldq $5,0($17) # a[0]
299 ldq $1,0($18) # b[1]
300 br $901
301 .align 4
302$945:
303 ldq $5,0($17) # a[0]
304 ldq $1,0($18) # b[1]
305 addq $1,$5,$1 # r=a+b;
306 subq $19,1,$19 # loop--
307 addq $1,$0,$1 # c+= overflow
308 addq $17,8,$17 # a++
309 cmpult $1,$5,$22 # did we overflow?
310 cmpult $1,$0,$0 # overflow?
311 addq $18,8,$18 # b++
312 stq $1,0($16) # r[0]=c
313 addq $0,$22,$0
314 addq $16,8,$16 # r++
315
316 bgt $19,$945
317 ret $31,($26),1 # else exit
318
319$900:
320 addq $19,4,$19
321 bgt $19,$945 # goto tail code
322 ret $31,($26),1 # else exit
323 .end bn_add_words
324
325 #
326 # What follows was taken directly from the C compiler with a few
327 # hacks to redo the lables.
328 #
329.text
330 .align 3
331 .globl bn_div_words
332 .ent bn_div_words
333bn_div_words:
334 ldgp $29,0($27)
335bn_div_words..ng:
336 lda $30,-48($30)
337 .frame $30,48,$26,0
338 stq $26,0($30)
339 stq $9,8($30)
340 stq $10,16($30)
341 stq $11,24($30)
342 stq $12,32($30)
343 stq $13,40($30)
344 .mask 0x4003e00,-48
345 .prologue 1
346 bis $16,$16,$9
347 bis $17,$17,$10
348 bis $18,$18,$11
349 bis $31,$31,$13
350 bis $31,2,$12
351 bne $11,$119
352 lda $0,-1
353 br $31,$136
354 .align 4
355$119:
356 bis $11,$11,$16
357 jsr $26,BN_num_bits_word
358 ldgp $29,0($26)
359 subq $0,64,$1
360 beq $1,$120
361 bis $31,1,$1
362 sll $1,$0,$1
363 cmpule $9,$1,$1
364 bne $1,$120
365 # lda $16,_IO_stderr_
366 # lda $17,$C32
367 # bis $0,$0,$18
368 # jsr $26,fprintf
369 # ldgp $29,0($26)
370 jsr $26,abort
371 ldgp $29,0($26)
372 .align 4
373$120:
374 bis $31,64,$3
375 cmpult $9,$11,$2
376 subq $3,$0,$1
377 addl $1,$31,$0
378 subq $9,$11,$1
379 cmoveq $2,$1,$9
380 beq $0,$122
381 zapnot $0,15,$2
382 subq $3,$0,$1
383 sll $11,$2,$11
384 sll $9,$2,$3
385 srl $10,$1,$1
386 sll $10,$2,$10
387 bis $3,$1,$9
388$122:
389 srl $11,32,$5
390 zapnot $11,15,$6
391 lda $7,-1
392 .align 5
393$123:
394 srl $9,32,$1
395 subq $1,$5,$1
396 bne $1,$126
397 zapnot $7,15,$27
398 br $31,$127
399 .align 4
400$126:
401 bis $9,$9,$24
402 bis $5,$5,$25
403 divqu $24,$25,$27
404$127:
405 srl $10,32,$4
406 .align 5
407$128:
408 mulq $27,$5,$1
409 subq $9,$1,$3
410 zapnot $3,240,$1
411 bne $1,$129
412 mulq $6,$27,$2
413 sll $3,32,$1
414 addq $1,$4,$1
415 cmpule $2,$1,$2
416 bne $2,$129
417 subq $27,1,$27
418 br $31,$128
419 .align 4
420$129:
421 mulq $27,$6,$1
422 mulq $27,$5,$4
423 srl $1,32,$3
424 sll $1,32,$1
425 addq $4,$3,$4
426 cmpult $10,$1,$2
427 subq $10,$1,$10
428 addq $2,$4,$2
429 cmpult $9,$2,$1
430 bis $2,$2,$4
431 beq $1,$134
432 addq $9,$11,$9
433 subq $27,1,$27
434$134:
435 subl $12,1,$12
436 subq $9,$4,$9
437 beq $12,$124
438 sll $27,32,$13
439 sll $9,32,$2
440 srl $10,32,$1
441 sll $10,32,$10
442 bis $2,$1,$9
443 br $31,$123
444 .align 4
445$124:
446 bis $13,$27,$0
447$136:
448 ldq $26,0($30)
449 ldq $9,8($30)
450 ldq $10,16($30)
451 ldq $11,24($30)
452 ldq $12,32($30)
453 ldq $13,40($30)
454 addq $30,48,$30
455 ret $31,($26),1
456 .end bn_div_words
457
458 .set noat
459 .text
460 .align 3
461 .globl bn_sub_words
462 .ent bn_sub_words
463bn_sub_words:
464bn_sub_words..ng:
465 .frame $30,0,$26,0
466 .prologue 0
467
468 subq $19, 4, $19
469 bis $31, $31, $0
470 blt $19, $100
471 ldq $1, 0($17)
472 ldq $2, 0($18)
473$101:
474 ldq $3, 8($17)
475 cmpult $1, $2, $4
476 ldq $5, 8($18)
477 subq $1, $2, $1
478 ldq $6, 16($17)
479 cmpult $1, $0, $2
480 ldq $7, 16($18)
481 subq $1, $0, $23
482 ldq $8, 24($17)
483 addq $2, $4, $0
484 cmpult $3, $5, $24
485 subq $3, $5, $3
486 ldq $22, 24($18)
487 cmpult $3, $0, $5
488 subq $3, $0, $25
489 addq $5, $24, $0
490 cmpult $6, $7, $27
491 subq $6, $7, $6
492 stq $23, 0($16)
493 cmpult $6, $0, $7
494 subq $6, $0, $28
495 addq $7, $27, $0
496 cmpult $8, $22, $21
497 subq $8, $22, $8
498 stq $25, 8($16)
499 cmpult $8, $0, $22
500 subq $8, $0, $20
501 addq $22, $21, $0
502 stq $28, 16($16)
503 subq $19, 4, $19
504 stq $20, 24($16)
505 addq $17, 32, $17
506 addq $18, 32, $18
507 addq $16, 32, $16
508 blt $19, $100
509 ldq $1, 0($17)
510 ldq $2, 0($18)
511 br $101
512$102:
513 ldq $1, 0($17)
514 ldq $2, 0($18)
515 cmpult $1, $2, $27
516 subq $1, $2, $1
517 cmpult $1, $0, $2
518 subq $1, $0, $1
519 stq $1, 0($16)
520 addq $2, $27, $0
521 addq $17, 8, $17
522 addq $18, 8, $18
523 addq $16, 8, $16
524 subq $19, 1, $19
525 bgt $19, $102
526 ret $31,($26),1
527$100:
528 addq $19, 4, $19
529 bgt $19, $102
530$103:
531 ret $31,($26),1
532 .end bn_sub_words
533 .text
534 .align 3
535 .globl bn_mul_comba4
536 .ent bn_mul_comba4
537bn_mul_comba4:
538bn_mul_comba4..ng:
539 .frame $30,0,$26,0
540 .prologue 0
541
542 ldq $0, 0($17)
543 ldq $1, 0($18)
544 ldq $2, 8($17)
545 ldq $3, 8($18)
546 ldq $4, 16($17)
547 ldq $5, 16($18)
548 ldq $6, 24($17)
549 ldq $7, 24($18)
550 bis $31, $31, $23
551 mulq $0, $1, $8
552 umulh $0, $1, $22
553 stq $8, 0($16)
554 bis $31, $31, $8
555 mulq $0, $3, $24
556 umulh $0, $3, $25
557 addq $22, $24, $22
558 cmpult $22, $24, $27
559 addq $27, $25, $25
560 addq $23, $25, $23
561 cmpult $23, $25, $28
562 addq $8, $28, $8
563 mulq $2, $1, $21
564 umulh $2, $1, $20
565 addq $22, $21, $22
566 cmpult $22, $21, $19
567 addq $19, $20, $20
568 addq $23, $20, $23
569 cmpult $23, $20, $17
570 addq $8, $17, $8
571 stq $22, 8($16)
572 bis $31, $31, $22
573 mulq $2, $3, $18
574 umulh $2, $3, $24
575 addq $23, $18, $23
576 cmpult $23, $18, $27
577 addq $27, $24, $24
578 addq $8, $24, $8
579 cmpult $8, $24, $25
580 addq $22, $25, $22
581 mulq $0, $5, $28
582 umulh $0, $5, $21
583 addq $23, $28, $23
584 cmpult $23, $28, $19
585 addq $19, $21, $21
586 addq $8, $21, $8
587 cmpult $8, $21, $20
588 addq $22, $20, $22
589 mulq $4, $1, $17
590 umulh $4, $1, $18
591 addq $23, $17, $23
592 cmpult $23, $17, $27
593 addq $27, $18, $18
594 addq $8, $18, $8
595 cmpult $8, $18, $24
596 addq $22, $24, $22
597 stq $23, 16($16)
598 bis $31, $31, $23
599 mulq $0, $7, $25
600 umulh $0, $7, $28
601 addq $8, $25, $8
602 cmpult $8, $25, $19
603 addq $19, $28, $28
604 addq $22, $28, $22
605 cmpult $22, $28, $21
606 addq $23, $21, $23
607 mulq $2, $5, $20
608 umulh $2, $5, $17
609 addq $8, $20, $8
610 cmpult $8, $20, $27
611 addq $27, $17, $17
612 addq $22, $17, $22
613 cmpult $22, $17, $18
614 addq $23, $18, $23
615 mulq $4, $3, $24
616 umulh $4, $3, $25
617 addq $8, $24, $8
618 cmpult $8, $24, $19
619 addq $19, $25, $25
620 addq $22, $25, $22
621 cmpult $22, $25, $28
622 addq $23, $28, $23
623 mulq $6, $1, $21
624 umulh $6, $1, $0
625 addq $8, $21, $8
626 cmpult $8, $21, $20
627 addq $20, $0, $0
628 addq $22, $0, $22
629 cmpult $22, $0, $27
630 addq $23, $27, $23
631 stq $8, 24($16)
632 bis $31, $31, $8
633 mulq $2, $7, $17
634 umulh $2, $7, $18
635 addq $22, $17, $22
636 cmpult $22, $17, $24
637 addq $24, $18, $18
638 addq $23, $18, $23
639 cmpult $23, $18, $19
640 addq $8, $19, $8
641 mulq $4, $5, $25
642 umulh $4, $5, $28
643 addq $22, $25, $22
644 cmpult $22, $25, $21
645 addq $21, $28, $28
646 addq $23, $28, $23
647 cmpult $23, $28, $20
648 addq $8, $20, $8
649 mulq $6, $3, $0
650 umulh $6, $3, $27
651 addq $22, $0, $22
652 cmpult $22, $0, $1
653 addq $1, $27, $27
654 addq $23, $27, $23
655 cmpult $23, $27, $17
656 addq $8, $17, $8
657 stq $22, 32($16)
658 bis $31, $31, $22
659 mulq $4, $7, $24
660 umulh $4, $7, $18
661 addq $23, $24, $23
662 cmpult $23, $24, $19
663 addq $19, $18, $18
664 addq $8, $18, $8
665 cmpult $8, $18, $2
666 addq $22, $2, $22
667 mulq $6, $5, $25
668 umulh $6, $5, $21
669 addq $23, $25, $23
670 cmpult $23, $25, $28
671 addq $28, $21, $21
672 addq $8, $21, $8
673 cmpult $8, $21, $20
674 addq $22, $20, $22
675 stq $23, 40($16)
676 bis $31, $31, $23
677 mulq $6, $7, $0
678 umulh $6, $7, $1
679 addq $8, $0, $8
680 cmpult $8, $0, $27
681 addq $27, $1, $1
682 addq $22, $1, $22
683 cmpult $22, $1, $17
684 addq $23, $17, $23
685 stq $8, 48($16)
686 stq $22, 56($16)
687 ret $31,($26),1
688 .end bn_mul_comba4
689 .text
690 .align 3
691 .globl bn_mul_comba8
692 .ent bn_mul_comba8
693bn_mul_comba8:
694bn_mul_comba8..ng:
695 .frame $30,0,$26,0
696 .prologue 0
697 ldq $1, 0($17)
698 ldq $2, 0($18)
699 zapnot $1, 15, $7
700 srl $2, 32, $8
701 mulq $8, $7, $22
702 srl $1, 32, $6
703 zapnot $2, 15, $5
704 mulq $5, $6, $4
705 mulq $7, $5, $24
706 addq $22, $4, $22
707 cmpult $22, $4, $1
708 mulq $6, $8, $3
709 beq $1, $173
710 bis $31, 1, $1
711 sll $1, 32, $1
712 addq $3, $1, $3
713$173:
714 sll $22, 32, $4
715 addq $24, $4, $24
716 stq $24, 0($16)
717 ldq $2, 0($17)
718 ldq $1, 8($18)
719 zapnot $2, 15, $7
720 srl $1, 32, $8
721 mulq $8, $7, $25
722 zapnot $1, 15, $5
723 mulq $7, $5, $0
724 srl $2, 32, $6
725 mulq $5, $6, $23
726 mulq $6, $8, $6
727 srl $22, 32, $1
728 cmpult $24, $4, $2
729 addq $3, $1, $3
730 addq $2, $3, $22
731 addq $25, $23, $25
732 cmpult $25, $23, $1
733 bis $31, 1, $2
734 beq $1, $177
735 sll $2, 32, $1
736 addq $6, $1, $6
737$177:
738 sll $25, 32, $23
739 ldq $1, 0($18)
740 addq $0, $23, $0
741 bis $0, $0, $7
742 ldq $3, 8($17)
743 addq $22, $7, $22
744 srl $1, 32, $8
745 cmpult $22, $7, $4
746 zapnot $3, 15, $7
747 mulq $8, $7, $28
748 zapnot $1, 15, $5
749 mulq $7, $5, $21
750 srl $25, 32, $1
751 cmpult $0, $23, $2
752 addq $6, $1, $6
753 addq $2, $6, $6
754 addq $4, $6, $24
755 srl $3, 32, $6
756 mulq $5, $6, $2
757 mulq $6, $8, $6
758 addq $28, $2, $28
759 cmpult $28, $2, $1
760 bis $31, 1, $2
761 beq $1, $181
762 sll $2, 32, $1
763 addq $6, $1, $6
764$181:
765 sll $28, 32, $2
766 addq $21, $2, $21
767 bis $21, $21, $7
768 addq $22, $7, $22
769 stq $22, 8($16)
770 ldq $3, 16($17)
771 ldq $1, 0($18)
772 cmpult $22, $7, $4
773 zapnot $3, 15, $7
774 srl $1, 32, $8
775 mulq $8, $7, $22
776 zapnot $1, 15, $5
777 mulq $7, $5, $20
778 srl $28, 32, $1
779 cmpult $21, $2, $2
780 addq $6, $1, $6
781 addq $2, $6, $6
782 addq $4, $6, $6
783 addq $24, $6, $24
784 cmpult $24, $6, $23
785 srl $3, 32, $6
786 mulq $5, $6, $2
787 mulq $6, $8, $6
788 addq $22, $2, $22
789 cmpult $22, $2, $1
790 bis $31, 1, $2
791 beq $1, $185
792 sll $2, 32, $1
793 addq $6, $1, $6
794$185:
795 sll $22, 32, $2
796 ldq $1, 8($18)
797 addq $20, $2, $20
798 bis $20, $20, $7
799 ldq $4, 8($17)
800 addq $24, $7, $24
801 srl $1, 32, $8
802 cmpult $24, $7, $3
803 zapnot $4, 15, $7
804 mulq $8, $7, $25
805 zapnot $1, 15, $5
806 mulq $7, $5, $0
807 srl $22, 32, $1
808 cmpult $20, $2, $2
809 addq $6, $1, $6
810 addq $2, $6, $6
811 addq $3, $6, $6
812 addq $23, $6, $23
813 cmpult $23, $6, $22
814 srl $4, 32, $6
815 mulq $5, $6, $5
816 bis $31, 1, $21
817 addq $25, $5, $25
818 cmpult $25, $5, $1
819 mulq $6, $8, $6
820 beq $1, $189
821 sll $21, 32, $1
822 addq $6, $1, $6
823$189:
824 sll $25, 32, $5
825 ldq $2, 16($18)
826 addq $0, $5, $0
827 bis $0, $0, $7
828 ldq $4, 0($17)
829 addq $24, $7, $24
830 srl $2, 32, $8
831 cmpult $24, $7, $3
832 zapnot $4, 15, $7
833 mulq $8, $7, $28
834 srl $25, 32, $1
835 addq $6, $1, $6
836 cmpult $0, $5, $1
837 zapnot $2, 15, $5
838 addq $1, $6, $6
839 addq $3, $6, $6
840 addq $23, $6, $23
841 cmpult $23, $6, $1
842 srl $4, 32, $6
843 mulq $5, $6, $25
844 mulq $7, $5, $2
845 addq $1, $22, $22
846 addq $28, $25, $28
847 cmpult $28, $25, $1
848 mulq $6, $8, $6
849 beq $1, $193
850 sll $21, 32, $1
851 addq $6, $1, $6
852$193:
853 sll $28, 32, $25
854 addq $2, $25, $2
855 bis $2, $2, $7
856 addq $24, $7, $24
857 stq $24, 16($16)
858 ldq $4, 0($17)
859 ldq $5, 24($18)
860 cmpult $24, $7, $3
861 zapnot $4, 15, $7
862 srl $5, 32, $8
863 mulq $8, $7, $0
864 srl $28, 32, $1
865 cmpult $2, $25, $2
866 addq $6, $1, $6
867 addq $2, $6, $6
868 addq $3, $6, $6
869 addq $23, $6, $23
870 cmpult $23, $6, $1
871 srl $4, 32, $6
872 zapnot $5, 15, $5
873 mulq $5, $6, $24
874 mulq $7, $5, $2
875 addq $1, $22, $22
876 addq $0, $24, $0
877 cmpult $0, $24, $1
878 mulq $6, $8, $6
879 beq $1, $197
880 sll $21, 32, $1
881 addq $6, $1, $6
882$197:
883 sll $0, 32, $24
884 ldq $1, 16($18)
885 addq $2, $24, $2
886 bis $2, $2, $7
887 ldq $4, 8($17)
888 addq $23, $7, $23
889 srl $1, 32, $8
890 cmpult $23, $7, $3
891 zapnot $4, 15, $7
892 mulq $8, $7, $25
893 zapnot $1, 15, $5
894 mulq $7, $5, $21
895 srl $0, 32, $1
896 cmpult $2, $24, $2
897 addq $6, $1, $6
898 addq $2, $6, $6
899 addq $3, $6, $6
900 addq $22, $6, $22
901 cmpult $22, $6, $24
902 srl $4, 32, $6
903 mulq $5, $6, $5
904 bis $31, 1, $20
905 addq $25, $5, $25
906 cmpult $25, $5, $1
907 mulq $6, $8, $6
908 beq $1, $201
909 sll $20, 32, $1
910 addq $6, $1, $6
911$201:
912 sll $25, 32, $5
913 ldq $2, 8($18)
914 addq $21, $5, $21
915 bis $21, $21, $7
916 ldq $4, 16($17)
917 addq $23, $7, $23
918 srl $2, 32, $8
919 cmpult $23, $7, $3
920 zapnot $4, 15, $7
921 mulq $8, $7, $28
922 srl $25, 32, $1
923 addq $6, $1, $6
924 cmpult $21, $5, $1
925 zapnot $2, 15, $5
926 addq $1, $6, $6
927 addq $3, $6, $6
928 addq $22, $6, $22
929 cmpult $22, $6, $1
930 srl $4, 32, $6
931 mulq $5, $6, $25
932 mulq $7, $5, $5
933 addq $1, $24, $24
934 addq $28, $25, $28
935 cmpult $28, $25, $1
936 mulq $6, $8, $6
937 beq $1, $205
938 sll $20, 32, $1
939 addq $6, $1, $6
940$205:
941 sll $28, 32, $25
942 ldq $2, 0($18)
943 addq $5, $25, $5
944 bis $5, $5, $7
945 ldq $4, 24($17)
946 addq $23, $7, $23
947 srl $2, 32, $8
948 cmpult $23, $7, $3
949 zapnot $4, 15, $7
950 mulq $8, $7, $0
951 srl $28, 32, $1
952 addq $6, $1, $6
953 cmpult $5, $25, $1
954 zapnot $2, 15, $5
955 addq $1, $6, $6
956 addq $3, $6, $6
957 addq $22, $6, $22
958 cmpult $22, $6, $1
959 srl $4, 32, $6
960 mulq $5, $6, $25
961 mulq $7, $5, $2
962 addq $1, $24, $24
963 addq $0, $25, $0
964 cmpult $0, $25, $1
965 mulq $6, $8, $6
966 beq $1, $209
967 sll $20, 32, $1
968 addq $6, $1, $6
969$209:
970 sll $0, 32, $25
971 addq $2, $25, $2
972 bis $2, $2, $7
973 addq $23, $7, $23
974 stq $23, 24($16)
975 ldq $4, 32($17)
976 ldq $5, 0($18)
977 cmpult $23, $7, $3
978 zapnot $4, 15, $7
979 srl $5, 32, $8
980 mulq $8, $7, $28
981 srl $0, 32, $1
982 cmpult $2, $25, $2
983 addq $6, $1, $6
984 addq $2, $6, $6
985 addq $3, $6, $6
986 addq $22, $6, $22
987 cmpult $22, $6, $1
988 srl $4, 32, $6
989 zapnot $5, 15, $5
990 mulq $5, $6, $23
991 mulq $7, $5, $2
992 addq $1, $24, $24
993 addq $28, $23, $28
994 cmpult $28, $23, $1
995 mulq $6, $8, $6
996 beq $1, $213
997 sll $20, 32, $1
998 addq $6, $1, $6
999$213:
1000 sll $28, 32, $23
1001 ldq $1, 8($18)
1002 addq $2, $23, $2
1003 bis $2, $2, $7
1004 ldq $4, 24($17)
1005 addq $22, $7, $22
1006 srl $1, 32, $8
1007 cmpult $22, $7, $3
1008 zapnot $4, 15, $7
1009 mulq $8, $7, $25
1010 zapnot $1, 15, $5
1011 mulq $7, $5, $0
1012 srl $28, 32, $1
1013 cmpult $2, $23, $2
1014 addq $6, $1, $6
1015 addq $2, $6, $6
1016 addq $3, $6, $6
1017 addq $24, $6, $24
1018 cmpult $24, $6, $23
1019 srl $4, 32, $6
1020 mulq $5, $6, $5
1021 bis $31, 1, $21
1022 addq $25, $5, $25
1023 cmpult $25, $5, $1
1024 mulq $6, $8, $6
1025 beq $1, $217
1026 sll $21, 32, $1
1027 addq $6, $1, $6
1028$217:
1029 sll $25, 32, $5
1030 ldq $2, 16($18)
1031 addq $0, $5, $0
1032 bis $0, $0, $7
1033 ldq $4, 16($17)
1034 addq $22, $7, $22
1035 srl $2, 32, $8
1036 cmpult $22, $7, $3
1037 zapnot $4, 15, $7
1038 mulq $8, $7, $28
1039 srl $25, 32, $1
1040 addq $6, $1, $6
1041 cmpult $0, $5, $1
1042 zapnot $2, 15, $5
1043 addq $1, $6, $6
1044 addq $3, $6, $6
1045 addq $24, $6, $24
1046 cmpult $24, $6, $1
1047 srl $4, 32, $6
1048 mulq $5, $6, $25
1049 mulq $7, $5, $5
1050 addq $1, $23, $23
1051 addq $28, $25, $28
1052 cmpult $28, $25, $1
1053 mulq $6, $8, $6
1054 beq $1, $221
1055 sll $21, 32, $1
1056 addq $6, $1, $6
1057$221:
1058 sll $28, 32, $25
1059 ldq $2, 24($18)
1060 addq $5, $25, $5
1061 bis $5, $5, $7
1062 ldq $4, 8($17)
1063 addq $22, $7, $22
1064 srl $2, 32, $8
1065 cmpult $22, $7, $3
1066 zapnot $4, 15, $7
1067 mulq $8, $7, $0
1068 srl $28, 32, $1
1069 addq $6, $1, $6
1070 cmpult $5, $25, $1
1071 zapnot $2, 15, $5
1072 addq $1, $6, $6
1073 addq $3, $6, $6
1074 addq $24, $6, $24
1075 cmpult $24, $6, $1
1076 srl $4, 32, $6
1077 mulq $5, $6, $25
1078 mulq $7, $5, $5
1079 addq $1, $23, $23
1080 addq $0, $25, $0
1081 cmpult $0, $25, $1
1082 mulq $6, $8, $6
1083 beq $1, $225
1084 sll $21, 32, $1
1085 addq $6, $1, $6
1086$225:
1087 sll $0, 32, $25
1088 ldq $2, 32($18)
1089 addq $5, $25, $5
1090 bis $5, $5, $7
1091 ldq $4, 0($17)
1092 addq $22, $7, $22
1093 srl $2, 32, $8
1094 cmpult $22, $7, $3
1095 zapnot $4, 15, $7
1096 mulq $8, $7, $28
1097 srl $0, 32, $1
1098 addq $6, $1, $6
1099 cmpult $5, $25, $1
1100 zapnot $2, 15, $5
1101 addq $1, $6, $6
1102 addq $3, $6, $6
1103 addq $24, $6, $24
1104 cmpult $24, $6, $1
1105 srl $4, 32, $6
1106 mulq $5, $6, $25
1107 mulq $7, $5, $2
1108 addq $1, $23, $23
1109 addq $28, $25, $28
1110 cmpult $28, $25, $1
1111 mulq $6, $8, $6
1112 beq $1, $229
1113 sll $21, 32, $1
1114 addq $6, $1, $6
1115$229:
1116 sll $28, 32, $25
1117 addq $2, $25, $2
1118 bis $2, $2, $7
1119 addq $22, $7, $22
1120 stq $22, 32($16)
1121 ldq $4, 0($17)
1122 ldq $5, 40($18)
1123 cmpult $22, $7, $3
1124 zapnot $4, 15, $7
1125 srl $5, 32, $8
1126 mulq $8, $7, $0
1127 srl $28, 32, $1
1128 cmpult $2, $25, $2
1129 addq $6, $1, $6
1130 addq $2, $6, $6
1131 addq $3, $6, $6
1132 addq $24, $6, $24
1133 cmpult $24, $6, $1
1134 srl $4, 32, $6
1135 zapnot $5, 15, $5
1136 mulq $5, $6, $22
1137 mulq $7, $5, $2
1138 addq $1, $23, $23
1139 addq $0, $22, $0
1140 cmpult $0, $22, $1
1141 mulq $6, $8, $6
1142 beq $1, $233
1143 sll $21, 32, $1
1144 addq $6, $1, $6
1145$233:
1146 sll $0, 32, $22
1147 ldq $1, 32($18)
1148 addq $2, $22, $2
1149 bis $2, $2, $7
1150 ldq $4, 8($17)
1151 addq $24, $7, $24
1152 srl $1, 32, $8
1153 cmpult $24, $7, $3
1154 zapnot $4, 15, $7
1155 mulq $8, $7, $25
1156 zapnot $1, 15, $5
1157 mulq $7, $5, $21
1158 srl $0, 32, $1
1159 cmpult $2, $22, $2
1160 addq $6, $1, $6
1161 addq $2, $6, $6
1162 addq $3, $6, $6
1163 addq $23, $6, $23
1164 cmpult $23, $6, $22
1165 srl $4, 32, $6
1166 mulq $5, $6, $5
1167 bis $31, 1, $20
1168 addq $25, $5, $25
1169 cmpult $25, $5, $1
1170 mulq $6, $8, $6
1171 beq $1, $237
1172 sll $20, 32, $1
1173 addq $6, $1, $6
1174$237:
1175 sll $25, 32, $5
1176 ldq $2, 24($18)
1177 addq $21, $5, $21
1178 bis $21, $21, $7
1179 ldq $4, 16($17)
1180 addq $24, $7, $24
1181 srl $2, 32, $8
1182 cmpult $24, $7, $3
1183 zapnot $4, 15, $7
1184 mulq $8, $7, $28
1185 srl $25, 32, $1
1186 addq $6, $1, $6
1187 cmpult $21, $5, $1
1188 zapnot $2, 15, $5
1189 addq $1, $6, $6
1190 addq $3, $6, $6
1191 addq $23, $6, $23
1192 cmpult $23, $6, $1
1193 srl $4, 32, $6
1194 mulq $5, $6, $25
1195 mulq $7, $5, $5
1196 addq $1, $22, $22
1197 addq $28, $25, $28
1198 cmpult $28, $25, $1
1199 mulq $6, $8, $6
1200 beq $1, $241
1201 sll $20, 32, $1
1202 addq $6, $1, $6
1203$241:
1204 sll $28, 32, $25
1205 ldq $2, 16($18)
1206 addq $5, $25, $5
1207 bis $5, $5, $7
1208 ldq $4, 24($17)
1209 addq $24, $7, $24
1210 srl $2, 32, $8
1211 cmpult $24, $7, $3
1212 zapnot $4, 15, $7
1213 mulq $8, $7, $0
1214 srl $28, 32, $1
1215 addq $6, $1, $6
1216 cmpult $5, $25, $1
1217 zapnot $2, 15, $5
1218 addq $1, $6, $6
1219 addq $3, $6, $6
1220 addq $23, $6, $23
1221 cmpult $23, $6, $1
1222 srl $4, 32, $6
1223 mulq $5, $6, $25
1224 mulq $7, $5, $5
1225 addq $1, $22, $22
1226 addq $0, $25, $0
1227 cmpult $0, $25, $1
1228 mulq $6, $8, $6
1229 beq $1, $245
1230 sll $20, 32, $1
1231 addq $6, $1, $6
1232$245:
1233 sll $0, 32, $25
1234 ldq $2, 8($18)
1235 addq $5, $25, $5
1236 bis $5, $5, $7
1237 ldq $4, 32($17)
1238 addq $24, $7, $24
1239 srl $2, 32, $8
1240 cmpult $24, $7, $3
1241 zapnot $4, 15, $7
1242 mulq $8, $7, $28
1243 srl $0, 32, $1
1244 addq $6, $1, $6
1245 cmpult $5, $25, $1
1246 zapnot $2, 15, $5
1247 addq $1, $6, $6
1248 addq $3, $6, $6
1249 addq $23, $6, $23
1250 cmpult $23, $6, $1
1251 srl $4, 32, $6
1252 mulq $5, $6, $25
1253 mulq $7, $5, $5
1254 addq $1, $22, $22
1255 addq $28, $25, $28
1256 cmpult $28, $25, $1
1257 mulq $6, $8, $6
1258 beq $1, $249
1259 sll $20, 32, $1
1260 addq $6, $1, $6
1261$249:
1262 sll $28, 32, $25
1263 ldq $2, 0($18)
1264 addq $5, $25, $5
1265 bis $5, $5, $7
1266 ldq $4, 40($17)
1267 addq $24, $7, $24
1268 srl $2, 32, $8
1269 cmpult $24, $7, $3
1270 zapnot $4, 15, $7
1271 mulq $8, $7, $0
1272 srl $28, 32, $1
1273 addq $6, $1, $6
1274 cmpult $5, $25, $1
1275 zapnot $2, 15, $5
1276 addq $1, $6, $6
1277 addq $3, $6, $6
1278 addq $23, $6, $23
1279 cmpult $23, $6, $1
1280 srl $4, 32, $6
1281 mulq $5, $6, $25
1282 mulq $7, $5, $2
1283 addq $1, $22, $22
1284 addq $0, $25, $0
1285 cmpult $0, $25, $1
1286 mulq $6, $8, $6
1287 beq $1, $253
1288 sll $20, 32, $1
1289 addq $6, $1, $6
1290$253:
1291 sll $0, 32, $25
1292 addq $2, $25, $2
1293 bis $2, $2, $7
1294 addq $24, $7, $24
1295 stq $24, 40($16)
1296 ldq $4, 48($17)
1297 ldq $5, 0($18)
1298 cmpult $24, $7, $3
1299 zapnot $4, 15, $7
1300 srl $5, 32, $8
1301 mulq $8, $7, $28
1302 srl $0, 32, $1
1303 cmpult $2, $25, $2
1304 addq $6, $1, $6
1305 addq $2, $6, $6
1306 addq $3, $6, $6
1307 addq $23, $6, $23
1308 cmpult $23, $6, $1
1309 srl $4, 32, $6
1310 zapnot $5, 15, $5
1311 mulq $5, $6, $24
1312 mulq $7, $5, $2
1313 addq $1, $22, $22
1314 addq $28, $24, $28
1315 cmpult $28, $24, $1
1316 mulq $6, $8, $6
1317 beq $1, $257
1318 sll $20, 32, $1
1319 addq $6, $1, $6
1320$257:
1321 sll $28, 32, $24
1322 ldq $1, 8($18)
1323 addq $2, $24, $2
1324 bis $2, $2, $7
1325 ldq $4, 40($17)
1326 addq $23, $7, $23
1327 srl $1, 32, $8
1328 cmpult $23, $7, $3
1329 zapnot $4, 15, $7
1330 mulq $8, $7, $25
1331 zapnot $1, 15, $5
1332 mulq $7, $5, $0
1333 srl $28, 32, $1
1334 cmpult $2, $24, $2
1335 addq $6, $1, $6
1336 addq $2, $6, $6
1337 addq $3, $6, $6
1338 addq $22, $6, $22
1339 cmpult $22, $6, $24
1340 srl $4, 32, $6
1341 mulq $5, $6, $5
1342 bis $31, 1, $21
1343 addq $25, $5, $25
1344 cmpult $25, $5, $1
1345 mulq $6, $8, $6
1346 beq $1, $261
1347 sll $21, 32, $1
1348 addq $6, $1, $6
1349$261:
1350 sll $25, 32, $5
1351 ldq $2, 16($18)
1352 addq $0, $5, $0
1353 bis $0, $0, $7
1354 ldq $4, 32($17)
1355 addq $23, $7, $23
1356 srl $2, 32, $8
1357 cmpult $23, $7, $3
1358 zapnot $4, 15, $7
1359 mulq $8, $7, $28
1360 srl $25, 32, $1
1361 addq $6, $1, $6
1362 cmpult $0, $5, $1
1363 zapnot $2, 15, $5
1364 addq $1, $6, $6
1365 addq $3, $6, $6
1366 addq $22, $6, $22
1367 cmpult $22, $6, $1
1368 srl $4, 32, $6
1369 mulq $5, $6, $25
1370 mulq $7, $5, $5
1371 addq $1, $24, $24
1372 addq $28, $25, $28
1373 cmpult $28, $25, $1
1374 mulq $6, $8, $6
1375 beq $1, $265
1376 sll $21, 32, $1
1377 addq $6, $1, $6
1378$265:
1379 sll $28, 32, $25
1380 ldq $2, 24($18)
1381 addq $5, $25, $5
1382 bis $5, $5, $7
1383 ldq $4, 24($17)
1384 addq $23, $7, $23
1385 srl $2, 32, $8
1386 cmpult $23, $7, $3
1387 zapnot $4, 15, $7
1388 mulq $8, $7, $0
1389 srl $28, 32, $1
1390 addq $6, $1, $6
1391 cmpult $5, $25, $1
1392 zapnot $2, 15, $5
1393 addq $1, $6, $6
1394 addq $3, $6, $6
1395 addq $22, $6, $22
1396 cmpult $22, $6, $1
1397 srl $4, 32, $6
1398 mulq $5, $6, $25
1399 mulq $7, $5, $5
1400 addq $1, $24, $24
1401 addq $0, $25, $0
1402 cmpult $0, $25, $1
1403 mulq $6, $8, $6
1404 beq $1, $269
1405 sll $21, 32, $1
1406 addq $6, $1, $6
1407$269:
1408 sll $0, 32, $25
1409 ldq $2, 32($18)
1410 addq $5, $25, $5
1411 bis $5, $5, $7
1412 ldq $4, 16($17)
1413 addq $23, $7, $23
1414 srl $2, 32, $8
1415 cmpult $23, $7, $3
1416 zapnot $4, 15, $7
1417 mulq $8, $7, $28
1418 srl $0, 32, $1
1419 addq $6, $1, $6
1420 cmpult $5, $25, $1
1421 zapnot $2, 15, $5
1422 addq $1, $6, $6
1423 addq $3, $6, $6
1424 addq $22, $6, $22
1425 cmpult $22, $6, $1
1426 srl $4, 32, $6
1427 mulq $5, $6, $25
1428 mulq $7, $5, $5
1429 addq $1, $24, $24
1430 addq $28, $25, $28
1431 cmpult $28, $25, $1
1432 mulq $6, $8, $6
1433 beq $1, $273
1434 sll $21, 32, $1
1435 addq $6, $1, $6
1436$273:
1437 sll $28, 32, $25
1438 ldq $2, 40($18)
1439 addq $5, $25, $5
1440 bis $5, $5, $7
1441 ldq $4, 8($17)
1442 addq $23, $7, $23
1443 srl $2, 32, $8
1444 cmpult $23, $7, $3
1445 zapnot $4, 15, $7
1446 mulq $8, $7, $0
1447 srl $28, 32, $1
1448 addq $6, $1, $6
1449 cmpult $5, $25, $1
1450 zapnot $2, 15, $5
1451 addq $1, $6, $6
1452 addq $3, $6, $6
1453 addq $22, $6, $22
1454 cmpult $22, $6, $1
1455 srl $4, 32, $6
1456 mulq $5, $6, $25
1457 mulq $7, $5, $5
1458 addq $1, $24, $24
1459 addq $0, $25, $0
1460 cmpult $0, $25, $1
1461 mulq $6, $8, $6
1462 beq $1, $277
1463 sll $21, 32, $1
1464 addq $6, $1, $6
1465$277:
1466 sll $0, 32, $25
1467 ldq $2, 48($18)
1468 addq $5, $25, $5
1469 bis $5, $5, $7
1470 ldq $4, 0($17)
1471 addq $23, $7, $23
1472 srl $2, 32, $8
1473 cmpult $23, $7, $3
1474 zapnot $4, 15, $7
1475 mulq $8, $7, $28
1476 srl $0, 32, $1
1477 addq $6, $1, $6
1478 cmpult $5, $25, $1
1479 zapnot $2, 15, $5
1480 addq $1, $6, $6
1481 addq $3, $6, $6
1482 addq $22, $6, $22
1483 cmpult $22, $6, $1
1484 srl $4, 32, $6
1485 mulq $5, $6, $25
1486 mulq $7, $5, $2
1487 addq $1, $24, $24
1488 addq $28, $25, $28
1489 cmpult $28, $25, $1
1490 mulq $6, $8, $6
1491 beq $1, $281
1492 sll $21, 32, $1
1493 addq $6, $1, $6
1494$281:
1495 sll $28, 32, $25
1496 addq $2, $25, $2
1497 bis $2, $2, $7
1498 addq $23, $7, $23
1499 stq $23, 48($16)
1500 ldq $4, 0($17)
1501 ldq $5, 56($18)
1502 cmpult $23, $7, $3
1503 zapnot $4, 15, $7
1504 srl $5, 32, $8
1505 mulq $8, $7, $0
1506 srl $28, 32, $1
1507 cmpult $2, $25, $2
1508 addq $6, $1, $6
1509 addq $2, $6, $6
1510 addq $3, $6, $6
1511 addq $22, $6, $22
1512 cmpult $22, $6, $1
1513 srl $4, 32, $6
1514 zapnot $5, 15, $5
1515 mulq $5, $6, $23
1516 mulq $7, $5, $2
1517 addq $1, $24, $24
1518 addq $0, $23, $0
1519 cmpult $0, $23, $1
1520 mulq $6, $8, $6
1521 beq $1, $285
1522 sll $21, 32, $1
1523 addq $6, $1, $6
1524$285:
1525 sll $0, 32, $23
1526 ldq $1, 48($18)
1527 addq $2, $23, $2
1528 bis $2, $2, $7
1529 ldq $4, 8($17)
1530 addq $22, $7, $22
1531 srl $1, 32, $8
1532 cmpult $22, $7, $3
1533 zapnot $4, 15, $7
1534 mulq $8, $7, $25
1535 zapnot $1, 15, $5
1536 mulq $7, $5, $21
1537 srl $0, 32, $1
1538 cmpult $2, $23, $2
1539 addq $6, $1, $6
1540 addq $2, $6, $6
1541 addq $3, $6, $6
1542 addq $24, $6, $24
1543 cmpult $24, $6, $23
1544 srl $4, 32, $6
1545 mulq $5, $6, $5
1546 bis $31, 1, $20
1547 addq $25, $5, $25
1548 cmpult $25, $5, $1
1549 mulq $6, $8, $6
1550 beq $1, $289
1551 sll $20, 32, $1
1552 addq $6, $1, $6
1553$289:
1554 sll $25, 32, $5
1555 ldq $2, 40($18)
1556 addq $21, $5, $21
1557 bis $21, $21, $7
1558 ldq $4, 16($17)
1559 addq $22, $7, $22
1560 srl $2, 32, $8
1561 cmpult $22, $7, $3
1562 zapnot $4, 15, $7
1563 mulq $8, $7, $28
1564 srl $25, 32, $1
1565 addq $6, $1, $6
1566 cmpult $21, $5, $1
1567 zapnot $2, 15, $5
1568 addq $1, $6, $6
1569 addq $3, $6, $6
1570 addq $24, $6, $24
1571 cmpult $24, $6, $1
1572 srl $4, 32, $6
1573 mulq $5, $6, $25
1574 mulq $7, $5, $5
1575 addq $1, $23, $23
1576 addq $28, $25, $28
1577 cmpult $28, $25, $1
1578 mulq $6, $8, $6
1579 beq $1, $293
1580 sll $20, 32, $1
1581 addq $6, $1, $6
1582$293:
1583 sll $28, 32, $25
1584 ldq $2, 32($18)
1585 addq $5, $25, $5
1586 bis $5, $5, $7
1587 ldq $4, 24($17)
1588 addq $22, $7, $22
1589 srl $2, 32, $8
1590 cmpult $22, $7, $3
1591 zapnot $4, 15, $7
1592 mulq $8, $7, $0
1593 srl $28, 32, $1
1594 addq $6, $1, $6
1595 cmpult $5, $25, $1
1596 zapnot $2, 15, $5
1597 addq $1, $6, $6
1598 addq $3, $6, $6
1599 addq $24, $6, $24
1600 cmpult $24, $6, $1
1601 srl $4, 32, $6
1602 mulq $5, $6, $25
1603 mulq $7, $5, $5
1604 addq $1, $23, $23
1605 addq $0, $25, $0
1606 cmpult $0, $25, $1
1607 mulq $6, $8, $6
1608 beq $1, $297
1609 sll $20, 32, $1
1610 addq $6, $1, $6
1611$297:
1612 sll $0, 32, $25
1613 ldq $2, 24($18)
1614 addq $5, $25, $5
1615 bis $5, $5, $7
1616 ldq $4, 32($17)
1617 addq $22, $7, $22
1618 srl $2, 32, $8
1619 cmpult $22, $7, $3
1620 zapnot $4, 15, $7
1621 mulq $8, $7, $28
1622 srl $0, 32, $1
1623 addq $6, $1, $6
1624 cmpult $5, $25, $1
1625 zapnot $2, 15, $5
1626 addq $1, $6, $6
1627 addq $3, $6, $6
1628 addq $24, $6, $24
1629 cmpult $24, $6, $1
1630 srl $4, 32, $6
1631 mulq $5, $6, $25
1632 mulq $7, $5, $5
1633 addq $1, $23, $23
1634 addq $28, $25, $28
1635 cmpult $28, $25, $1
1636 mulq $6, $8, $6
1637 beq $1, $301
1638 sll $20, 32, $1
1639 addq $6, $1, $6
1640$301:
1641 sll $28, 32, $25
1642 ldq $2, 16($18)
1643 addq $5, $25, $5
1644 bis $5, $5, $7
1645 ldq $4, 40($17)
1646 addq $22, $7, $22
1647 srl $2, 32, $8
1648 cmpult $22, $7, $3
1649 zapnot $4, 15, $7
1650 mulq $8, $7, $0
1651 srl $28, 32, $1
1652 addq $6, $1, $6
1653 cmpult $5, $25, $1
1654 zapnot $2, 15, $5
1655 addq $1, $6, $6
1656 addq $3, $6, $6
1657 addq $24, $6, $24
1658 cmpult $24, $6, $1
1659 srl $4, 32, $6
1660 mulq $5, $6, $25
1661 mulq $7, $5, $5
1662 addq $1, $23, $23
1663 addq $0, $25, $0
1664 cmpult $0, $25, $1
1665 mulq $6, $8, $6
1666 beq $1, $305
1667 sll $20, 32, $1
1668 addq $6, $1, $6
1669$305:
1670 sll $0, 32, $25
1671 ldq $2, 8($18)
1672 addq $5, $25, $5
1673 bis $5, $5, $7
1674 ldq $4, 48($17)
1675 addq $22, $7, $22
1676 srl $2, 32, $8
1677 cmpult $22, $7, $3
1678 zapnot $4, 15, $7
1679 mulq $8, $7, $28
1680 srl $0, 32, $1
1681 addq $6, $1, $6
1682 cmpult $5, $25, $1
1683 zapnot $2, 15, $5
1684 addq $1, $6, $6
1685 addq $3, $6, $6
1686 addq $24, $6, $24
1687 cmpult $24, $6, $1
1688 srl $4, 32, $6
1689 mulq $5, $6, $25
1690 mulq $7, $5, $5
1691 addq $1, $23, $23
1692 addq $28, $25, $28
1693 cmpult $28, $25, $1
1694 mulq $6, $8, $6
1695 beq $1, $309
1696 sll $20, 32, $1
1697 addq $6, $1, $6
1698$309:
1699 sll $28, 32, $25
1700 ldq $2, 0($18)
1701 addq $5, $25, $5
1702 bis $5, $5, $7
1703 ldq $4, 56($17)
1704 addq $22, $7, $22
1705 srl $2, 32, $8
1706 cmpult $22, $7, $3
1707 zapnot $4, 15, $7
1708 mulq $8, $7, $0
1709 srl $28, 32, $1
1710 addq $6, $1, $6
1711 cmpult $5, $25, $1
1712 zapnot $2, 15, $5
1713 addq $1, $6, $6
1714 addq $3, $6, $6
1715 addq $24, $6, $24
1716 cmpult $24, $6, $1
1717 srl $4, 32, $6
1718 mulq $5, $6, $25
1719 mulq $7, $5, $2
1720 addq $1, $23, $23
1721 addq $0, $25, $0
1722 cmpult $0, $25, $1
1723 mulq $6, $8, $6
1724 beq $1, $313
1725 sll $20, 32, $1
1726 addq $6, $1, $6
1727$313:
1728 sll $0, 32, $25
1729 addq $2, $25, $2
1730 bis $2, $2, $7
1731 addq $22, $7, $22
1732 stq $22, 56($16)
1733 ldq $4, 56($17)
1734 ldq $5, 8($18)
1735 cmpult $22, $7, $3
1736 zapnot $4, 15, $7
1737 srl $5, 32, $8
1738 mulq $8, $7, $28
1739 srl $0, 32, $1
1740 cmpult $2, $25, $2
1741 addq $6, $1, $6
1742 addq $2, $6, $6
1743 addq $3, $6, $6
1744 addq $24, $6, $24
1745 cmpult $24, $6, $1
1746 srl $4, 32, $6
1747 zapnot $5, 15, $5
1748 mulq $5, $6, $22
1749 mulq $7, $5, $2
1750 addq $1, $23, $23
1751 addq $28, $22, $28
1752 cmpult $28, $22, $1
1753 mulq $6, $8, $6
1754 beq $1, $317
1755 sll $20, 32, $1
1756 addq $6, $1, $6
1757$317:
1758 sll $28, 32, $22
1759 ldq $1, 16($18)
1760 addq $2, $22, $2
1761 bis $2, $2, $7
1762 ldq $4, 48($17)
1763 addq $24, $7, $24
1764 srl $1, 32, $8
1765 cmpult $24, $7, $3
1766 zapnot $4, 15, $7
1767 mulq $8, $7, $25
1768 zapnot $1, 15, $5
1769 mulq $7, $5, $0
1770 srl $28, 32, $1
1771 cmpult $2, $22, $2
1772 addq $6, $1, $6
1773 addq $2, $6, $6
1774 addq $3, $6, $6
1775 addq $23, $6, $23
1776 cmpult $23, $6, $22
1777 srl $4, 32, $6
1778 mulq $5, $6, $5
1779 bis $31, 1, $21
1780 addq $25, $5, $25
1781 cmpult $25, $5, $1
1782 mulq $6, $8, $6
1783 beq $1, $321
1784 sll $21, 32, $1
1785 addq $6, $1, $6
1786$321:
1787 sll $25, 32, $5
1788 ldq $2, 24($18)
1789 addq $0, $5, $0
1790 bis $0, $0, $7
1791 ldq $4, 40($17)
1792 addq $24, $7, $24
1793 srl $2, 32, $8
1794 cmpult $24, $7, $3
1795 zapnot $4, 15, $7
1796 mulq $8, $7, $28
1797 srl $25, 32, $1
1798 addq $6, $1, $6
1799 cmpult $0, $5, $1
1800 zapnot $2, 15, $5
1801 addq $1, $6, $6
1802 addq $3, $6, $6
1803 addq $23, $6, $23
1804 cmpult $23, $6, $1
1805 srl $4, 32, $6
1806 mulq $5, $6, $25
1807 mulq $7, $5, $5
1808 addq $1, $22, $22
1809 addq $28, $25, $28
1810 cmpult $28, $25, $1
1811 mulq $6, $8, $6
1812 beq $1, $325
1813 sll $21, 32, $1
1814 addq $6, $1, $6
1815$325:
1816 sll $28, 32, $25
1817 ldq $2, 32($18)
1818 addq $5, $25, $5
1819 bis $5, $5, $7
1820 ldq $4, 32($17)
1821 addq $24, $7, $24
1822 srl $2, 32, $8
1823 cmpult $24, $7, $3
1824 zapnot $4, 15, $7
1825 mulq $8, $7, $0
1826 srl $28, 32, $1
1827 addq $6, $1, $6
1828 cmpult $5, $25, $1
1829 zapnot $2, 15, $5
1830 addq $1, $6, $6
1831 addq $3, $6, $6
1832 addq $23, $6, $23
1833 cmpult $23, $6, $1
1834 srl $4, 32, $6
1835 mulq $5, $6, $25
1836 mulq $7, $5, $5
1837 addq $1, $22, $22
1838 addq $0, $25, $0
1839 cmpult $0, $25, $1
1840 mulq $6, $8, $6
1841 beq $1, $329
1842 sll $21, 32, $1
1843 addq $6, $1, $6
1844$329:
1845 sll $0, 32, $25
1846 ldq $2, 40($18)
1847 addq $5, $25, $5
1848 bis $5, $5, $7
1849 ldq $4, 24($17)
1850 addq $24, $7, $24
1851 srl $2, 32, $8
1852 cmpult $24, $7, $3
1853 zapnot $4, 15, $7
1854 mulq $8, $7, $28
1855 srl $0, 32, $1
1856 addq $6, $1, $6
1857 cmpult $5, $25, $1
1858 zapnot $2, 15, $5
1859 addq $1, $6, $6
1860 addq $3, $6, $6
1861 addq $23, $6, $23
1862 cmpult $23, $6, $1
1863 srl $4, 32, $6
1864 mulq $5, $6, $25
1865 mulq $7, $5, $5
1866 addq $1, $22, $22
1867 addq $28, $25, $28
1868 cmpult $28, $25, $1
1869 mulq $6, $8, $6
1870 beq $1, $333
1871 sll $21, 32, $1
1872 addq $6, $1, $6
1873$333:
1874 sll $28, 32, $25
1875 ldq $2, 48($18)
1876 addq $5, $25, $5
1877 bis $5, $5, $7
1878 ldq $4, 16($17)
1879 addq $24, $7, $24
1880 srl $2, 32, $8
1881 cmpult $24, $7, $3
1882 zapnot $4, 15, $7
1883 mulq $8, $7, $0
1884 srl $28, 32, $1
1885 addq $6, $1, $6
1886 cmpult $5, $25, $1
1887 zapnot $2, 15, $5
1888 addq $1, $6, $6
1889 addq $3, $6, $6
1890 addq $23, $6, $23
1891 cmpult $23, $6, $1
1892 srl $4, 32, $6
1893 mulq $5, $6, $25
1894 mulq $7, $5, $5
1895 addq $1, $22, $22
1896 addq $0, $25, $0
1897 cmpult $0, $25, $1
1898 mulq $6, $8, $6
1899 beq $1, $337
1900 sll $21, 32, $1
1901 addq $6, $1, $6
1902$337:
1903 sll $0, 32, $25
1904 ldq $2, 56($18)
1905 addq $5, $25, $5
1906 bis $5, $5, $7
1907 ldq $4, 8($17)
1908 addq $24, $7, $24
1909 srl $2, 32, $8
1910 cmpult $24, $7, $3
1911 zapnot $4, 15, $7
1912 mulq $8, $7, $28
1913 srl $0, 32, $1
1914 addq $6, $1, $6
1915 cmpult $5, $25, $1
1916 zapnot $2, 15, $5
1917 addq $1, $6, $6
1918 addq $3, $6, $6
1919 addq $23, $6, $23
1920 cmpult $23, $6, $1
1921 srl $4, 32, $6
1922 mulq $5, $6, $25
1923 mulq $7, $5, $2
1924 addq $1, $22, $22
1925 addq $28, $25, $28
1926 cmpult $28, $25, $1
1927 mulq $6, $8, $6
1928 beq $1, $341
1929 sll $21, 32, $1
1930 addq $6, $1, $6
1931$341:
1932 sll $28, 32, $25
1933 addq $2, $25, $2
1934 bis $2, $2, $7
1935 addq $24, $7, $24
1936 stq $24, 64($16)
1937 ldq $4, 16($17)
1938 ldq $5, 56($18)
1939 cmpult $24, $7, $3
1940 zapnot $4, 15, $7
1941 srl $5, 32, $8
1942 mulq $8, $7, $0
1943 srl $28, 32, $1
1944 cmpult $2, $25, $2
1945 addq $6, $1, $6
1946 addq $2, $6, $6
1947 addq $3, $6, $6
1948 addq $23, $6, $23
1949 cmpult $23, $6, $1
1950 srl $4, 32, $6
1951 zapnot $5, 15, $5
1952 mulq $5, $6, $24
1953 mulq $7, $5, $2
1954 addq $1, $22, $22
1955 addq $0, $24, $0
1956 cmpult $0, $24, $1
1957 mulq $6, $8, $6
1958 beq $1, $345
1959 sll $21, 32, $1
1960 addq $6, $1, $6
1961$345:
1962 sll $0, 32, $24
1963 ldq $1, 48($18)
1964 addq $2, $24, $2
1965 bis $2, $2, $7
1966 ldq $4, 24($17)
1967 addq $23, $7, $23
1968 srl $1, 32, $8
1969 cmpult $23, $7, $3
1970 zapnot $4, 15, $7
1971 mulq $8, $7, $25
1972 zapnot $1, 15, $5
1973 mulq $7, $5, $21
1974 srl $0, 32, $1
1975 cmpult $2, $24, $2
1976 addq $6, $1, $6
1977 addq $2, $6, $6
1978 addq $3, $6, $6
1979 addq $22, $6, $22
1980 cmpult $22, $6, $24
1981 srl $4, 32, $6
1982 mulq $5, $6, $5
1983 bis $31, 1, $20
1984 addq $25, $5, $25
1985 cmpult $25, $5, $1
1986 mulq $6, $8, $6
1987 beq $1, $349
1988 sll $20, 32, $1
1989 addq $6, $1, $6
1990$349:
1991 sll $25, 32, $5
1992 ldq $2, 40($18)
1993 addq $21, $5, $21
1994 bis $21, $21, $7
1995 ldq $4, 32($17)
1996 addq $23, $7, $23
1997 srl $2, 32, $8
1998 cmpult $23, $7, $3
1999 zapnot $4, 15, $7
2000 mulq $8, $7, $28
2001 srl $25, 32, $1
2002 addq $6, $1, $6
2003 cmpult $21, $5, $1
2004 zapnot $2, 15, $5
2005 addq $1, $6, $6
2006 addq $3, $6, $6
2007 addq $22, $6, $22
2008 cmpult $22, $6, $1
2009 srl $4, 32, $6
2010 mulq $5, $6, $25
2011 mulq $7, $5, $5
2012 addq $1, $24, $24
2013 addq $28, $25, $28
2014 cmpult $28, $25, $1
2015 mulq $6, $8, $6
2016 beq $1, $353
2017 sll $20, 32, $1
2018 addq $6, $1, $6
2019$353:
2020 sll $28, 32, $25
2021 ldq $2, 32($18)
2022 addq $5, $25, $5
2023 bis $5, $5, $7
2024 ldq $4, 40($17)
2025 addq $23, $7, $23
2026 srl $2, 32, $8
2027 cmpult $23, $7, $3
2028 zapnot $4, 15, $7
2029 mulq $8, $7, $0
2030 srl $28, 32, $1
2031 addq $6, $1, $6
2032 cmpult $5, $25, $1
2033 zapnot $2, 15, $5
2034 addq $1, $6, $6
2035 addq $3, $6, $6
2036 addq $22, $6, $22
2037 cmpult $22, $6, $1
2038 srl $4, 32, $6
2039 mulq $5, $6, $25
2040 mulq $7, $5, $5
2041 addq $1, $24, $24
2042 addq $0, $25, $0
2043 cmpult $0, $25, $1
2044 mulq $6, $8, $6
2045 beq $1, $357
2046 sll $20, 32, $1
2047 addq $6, $1, $6
2048$357:
2049 sll $0, 32, $25
2050 ldq $2, 24($18)
2051 addq $5, $25, $5
2052 bis $5, $5, $7
2053 ldq $4, 48($17)
2054 addq $23, $7, $23
2055 srl $2, 32, $8
2056 cmpult $23, $7, $3
2057 zapnot $4, 15, $7
2058 mulq $8, $7, $28
2059 srl $0, 32, $1
2060 addq $6, $1, $6
2061 cmpult $5, $25, $1
2062 zapnot $2, 15, $5
2063 addq $1, $6, $6
2064 addq $3, $6, $6
2065 addq $22, $6, $22
2066 cmpult $22, $6, $1
2067 srl $4, 32, $6
2068 mulq $5, $6, $25
2069 mulq $7, $5, $5
2070 addq $1, $24, $24
2071 addq $28, $25, $28
2072 cmpult $28, $25, $1
2073 mulq $6, $8, $6
2074 beq $1, $361
2075 sll $20, 32, $1
2076 addq $6, $1, $6
2077$361:
2078 sll $28, 32, $25
2079 ldq $2, 16($18)
2080 addq $5, $25, $5
2081 bis $5, $5, $7
2082 ldq $4, 56($17)
2083 addq $23, $7, $23
2084 srl $2, 32, $8
2085 cmpult $23, $7, $3
2086 zapnot $4, 15, $7
2087 mulq $8, $7, $0
2088 srl $28, 32, $1
2089 addq $6, $1, $6
2090 cmpult $5, $25, $1
2091 zapnot $2, 15, $5
2092 addq $1, $6, $6
2093 addq $3, $6, $6
2094 addq $22, $6, $22
2095 cmpult $22, $6, $1
2096 srl $4, 32, $6
2097 mulq $5, $6, $25
2098 mulq $7, $5, $2
2099 addq $1, $24, $24
2100 addq $0, $25, $0
2101 cmpult $0, $25, $1
2102 mulq $6, $8, $6
2103 beq $1, $365
2104 sll $20, 32, $1
2105 addq $6, $1, $6
2106$365:
2107 sll $0, 32, $25
2108 addq $2, $25, $2
2109 bis $2, $2, $7
2110 addq $23, $7, $23
2111 stq $23, 72($16)
2112 ldq $4, 56($17)
2113 ldq $5, 24($18)
2114 cmpult $23, $7, $3
2115 zapnot $4, 15, $7
2116 srl $5, 32, $8
2117 mulq $8, $7, $28
2118 srl $0, 32, $1
2119 cmpult $2, $25, $2
2120 addq $6, $1, $6
2121 addq $2, $6, $6
2122 addq $3, $6, $6
2123 addq $22, $6, $22
2124 cmpult $22, $6, $1
2125 srl $4, 32, $6
2126 zapnot $5, 15, $5
2127 mulq $5, $6, $23
2128 mulq $7, $5, $2
2129 addq $1, $24, $24
2130 addq $28, $23, $28
2131 cmpult $28, $23, $1
2132 mulq $6, $8, $6
2133 beq $1, $369
2134 sll $20, 32, $1
2135 addq $6, $1, $6
2136$369:
2137 sll $28, 32, $23
2138 ldq $1, 32($18)
2139 addq $2, $23, $2
2140 bis $2, $2, $7
2141 ldq $4, 48($17)
2142 addq $22, $7, $22
2143 srl $1, 32, $8
2144 cmpult $22, $7, $3
2145 zapnot $4, 15, $7
2146 mulq $8, $7, $25
2147 zapnot $1, 15, $5
2148 mulq $7, $5, $0
2149 srl $28, 32, $1
2150 cmpult $2, $23, $2
2151 addq $6, $1, $6
2152 addq $2, $6, $6
2153 addq $3, $6, $6
2154 addq $24, $6, $24
2155 cmpult $24, $6, $23
2156 srl $4, 32, $6
2157 mulq $5, $6, $5
2158 bis $31, 1, $21
2159 addq $25, $5, $25
2160 cmpult $25, $5, $1
2161 mulq $6, $8, $6
2162 beq $1, $373
2163 sll $21, 32, $1
2164 addq $6, $1, $6
2165$373:
2166 sll $25, 32, $5
2167 ldq $2, 40($18)
2168 addq $0, $5, $0
2169 bis $0, $0, $7
2170 ldq $4, 40($17)
2171 addq $22, $7, $22
2172 srl $2, 32, $8
2173 cmpult $22, $7, $3
2174 zapnot $4, 15, $7
2175 mulq $8, $7, $28
2176 srl $25, 32, $1
2177 addq $6, $1, $6
2178 cmpult $0, $5, $1
2179 zapnot $2, 15, $5
2180 addq $1, $6, $6
2181 addq $3, $6, $6
2182 addq $24, $6, $24
2183 cmpult $24, $6, $1
2184 srl $4, 32, $6
2185 mulq $5, $6, $25
2186 mulq $7, $5, $5
2187 addq $1, $23, $23
2188 addq $28, $25, $28
2189 cmpult $28, $25, $1
2190 mulq $6, $8, $6
2191 beq $1, $377
2192 sll $21, 32, $1
2193 addq $6, $1, $6
2194$377:
2195 sll $28, 32, $25
2196 ldq $2, 48($18)
2197 addq $5, $25, $5
2198 bis $5, $5, $7
2199 ldq $4, 32($17)
2200 addq $22, $7, $22
2201 srl $2, 32, $8
2202 cmpult $22, $7, $3
2203 zapnot $4, 15, $7
2204 mulq $8, $7, $0
2205 srl $28, 32, $1
2206 addq $6, $1, $6
2207 cmpult $5, $25, $1
2208 zapnot $2, 15, $5
2209 addq $1, $6, $6
2210 addq $3, $6, $6
2211 addq $24, $6, $24
2212 cmpult $24, $6, $1
2213 srl $4, 32, $6
2214 mulq $5, $6, $25
2215 mulq $7, $5, $5
2216 addq $1, $23, $23
2217 addq $0, $25, $0
2218 cmpult $0, $25, $1
2219 mulq $6, $8, $6
2220 beq $1, $381
2221 sll $21, 32, $1
2222 addq $6, $1, $6
2223$381:
2224 sll $0, 32, $25
2225 ldq $2, 56($18)
2226 addq $5, $25, $5
2227 bis $5, $5, $7
2228 ldq $4, 24($17)
2229 addq $22, $7, $22
2230 srl $2, 32, $8
2231 cmpult $22, $7, $3
2232 zapnot $4, 15, $7
2233 mulq $8, $7, $28
2234 srl $0, 32, $1
2235 addq $6, $1, $6
2236 cmpult $5, $25, $1
2237 zapnot $2, 15, $5
2238 addq $1, $6, $6
2239 addq $3, $6, $6
2240 addq $24, $6, $24
2241 cmpult $24, $6, $1
2242 srl $4, 32, $6
2243 mulq $5, $6, $25
2244 mulq $7, $5, $2
2245 addq $1, $23, $23
2246 addq $28, $25, $28
2247 cmpult $28, $25, $1
2248 mulq $6, $8, $6
2249 beq $1, $385
2250 sll $21, 32, $1
2251 addq $6, $1, $6
2252$385:
2253 sll $28, 32, $25
2254 addq $2, $25, $2
2255 bis $2, $2, $7
2256 addq $22, $7, $22
2257 stq $22, 80($16)
2258 ldq $4, 32($17)
2259 ldq $5, 56($18)
2260 cmpult $22, $7, $3
2261 zapnot $4, 15, $7
2262 srl $5, 32, $8
2263 mulq $8, $7, $0
2264 srl $28, 32, $1
2265 cmpult $2, $25, $2
2266 addq $6, $1, $6
2267 addq $2, $6, $6
2268 addq $3, $6, $6
2269 addq $24, $6, $24
2270 cmpult $24, $6, $1
2271 srl $4, 32, $6
2272 zapnot $5, 15, $5
2273 mulq $5, $6, $22
2274 mulq $7, $5, $2
2275 addq $1, $23, $23
2276 addq $0, $22, $0
2277 cmpult $0, $22, $1
2278 mulq $6, $8, $6
2279 beq $1, $389
2280 sll $21, 32, $1
2281 addq $6, $1, $6
2282$389:
2283 sll $0, 32, $22
2284 ldq $1, 48($18)
2285 addq $2, $22, $2
2286 bis $2, $2, $7
2287 ldq $4, 40($17)
2288 addq $24, $7, $24
2289 srl $1, 32, $8
2290 cmpult $24, $7, $3
2291 zapnot $4, 15, $7
2292 mulq $8, $7, $25
2293 zapnot $1, 15, $5
2294 mulq $7, $5, $21
2295 srl $0, 32, $1
2296 cmpult $2, $22, $2
2297 addq $6, $1, $6
2298 addq $2, $6, $6
2299 addq $3, $6, $6
2300 addq $23, $6, $23
2301 cmpult $23, $6, $22
2302 srl $4, 32, $6
2303 mulq $5, $6, $5
2304 bis $31, 1, $20
2305 addq $25, $5, $25
2306 cmpult $25, $5, $1
2307 mulq $6, $8, $6
2308 beq $1, $393
2309 sll $20, 32, $1
2310 addq $6, $1, $6
2311$393:
2312 sll $25, 32, $5
2313 ldq $2, 40($18)
2314 addq $21, $5, $21
2315 bis $21, $21, $7
2316 ldq $4, 48($17)
2317 addq $24, $7, $24
2318 srl $2, 32, $8
2319 cmpult $24, $7, $3
2320 zapnot $4, 15, $7
2321 mulq $8, $7, $28
2322 srl $25, 32, $1
2323 addq $6, $1, $6
2324 cmpult $21, $5, $1
2325 zapnot $2, 15, $5
2326 addq $1, $6, $6
2327 addq $3, $6, $6
2328 addq $23, $6, $23
2329 cmpult $23, $6, $1
2330 srl $4, 32, $6
2331 mulq $5, $6, $25
2332 mulq $7, $5, $5
2333 addq $1, $22, $22
2334 addq $28, $25, $28
2335 cmpult $28, $25, $1
2336 mulq $6, $8, $6
2337 beq $1, $397
2338 sll $20, 32, $1
2339 addq $6, $1, $6
2340$397:
2341 sll $28, 32, $25
2342 ldq $2, 32($18)
2343 addq $5, $25, $5
2344 bis $5, $5, $7
2345 ldq $4, 56($17)
2346 addq $24, $7, $24
2347 srl $2, 32, $8
2348 cmpult $24, $7, $3
2349 zapnot $4, 15, $7
2350 mulq $8, $7, $21
2351 srl $28, 32, $1
2352 addq $6, $1, $6
2353 cmpult $5, $25, $1
2354 zapnot $2, 15, $5
2355 addq $1, $6, $6
2356 addq $3, $6, $6
2357 addq $23, $6, $23
2358 cmpult $23, $6, $1
2359 srl $4, 32, $6
2360 mulq $5, $6, $25
2361 mulq $7, $5, $2
2362 addq $1, $22, $22
2363 addq $21, $25, $21
2364 cmpult $21, $25, $1
2365 mulq $6, $8, $6
2366 beq $1, $401
2367 sll $20, 32, $1
2368 addq $6, $1, $6
2369$401:
2370 sll $21, 32, $25
2371 addq $2, $25, $2
2372 bis $2, $2, $7
2373 addq $24, $7, $24
2374 stq $24, 88($16)
2375 ldq $4, 56($17)
2376 ldq $5, 40($18)
2377 cmpult $24, $7, $3
2378 zapnot $4, 15, $7
2379 srl $5, 32, $8
2380 mulq $8, $7, $0
2381 srl $21, 32, $1
2382 cmpult $2, $25, $2
2383 addq $6, $1, $6
2384 addq $2, $6, $6
2385 addq $3, $6, $6
2386 addq $23, $6, $23
2387 cmpult $23, $6, $1
2388 srl $4, 32, $6
2389 zapnot $5, 15, $5
2390 mulq $5, $6, $24
2391 mulq $7, $5, $5
2392 addq $1, $22, $22
2393 addq $0, $24, $0
2394 cmpult $0, $24, $1
2395 mulq $6, $8, $6
2396 beq $1, $405
2397 sll $20, 32, $1
2398 addq $6, $1, $6
2399$405:
2400 sll $0, 32, $24
2401 ldq $2, 48($18)
2402 addq $5, $24, $5
2403 bis $5, $5, $7
2404 ldq $4, 48($17)
2405 addq $23, $7, $23
2406 srl $2, 32, $8
2407 cmpult $23, $7, $3
2408 zapnot $4, 15, $7
2409 mulq $8, $7, $28
2410 srl $0, 32, $1
2411 addq $6, $1, $6
2412 cmpult $5, $24, $1
2413 zapnot $2, 15, $5
2414 addq $1, $6, $6
2415 addq $3, $6, $6
2416 addq $22, $6, $22
2417 cmpult $22, $6, $24
2418 srl $4, 32, $6
2419 mulq $5, $6, $25
2420 mulq $7, $5, $5
2421 addq $28, $25, $28
2422 cmpult $28, $25, $1
2423 mulq $6, $8, $6
2424 beq $1, $409
2425 sll $20, 32, $1
2426 addq $6, $1, $6
2427$409:
2428 sll $28, 32, $25
2429 ldq $2, 56($18)
2430 addq $5, $25, $5
2431 bis $5, $5, $7
2432 ldq $4, 40($17)
2433 addq $23, $7, $23
2434 srl $2, 32, $8
2435 cmpult $23, $7, $3
2436 zapnot $4, 15, $7
2437 mulq $8, $7, $0
2438 srl $28, 32, $1
2439 addq $6, $1, $6
2440 cmpult $5, $25, $1
2441 zapnot $2, 15, $5
2442 addq $1, $6, $6
2443 addq $3, $6, $6
2444 addq $22, $6, $22
2445 cmpult $22, $6, $1
2446 srl $4, 32, $6
2447 mulq $5, $6, $25
2448 mulq $7, $5, $2
2449 addq $1, $24, $24
2450 addq $0, $25, $0
2451 cmpult $0, $25, $1
2452 mulq $6, $8, $6
2453 beq $1, $413
2454 sll $20, 32, $1
2455 addq $6, $1, $6
2456$413:
2457 sll $0, 32, $25
2458 addq $2, $25, $2
2459 bis $2, $2, $7
2460 addq $23, $7, $23
2461 stq $23, 96($16)
2462 ldq $4, 48($17)
2463 ldq $5, 56($18)
2464 cmpult $23, $7, $3
2465 zapnot $4, 15, $7
2466 srl $5, 32, $8
2467 mulq $8, $7, $28
2468 srl $0, 32, $1
2469 cmpult $2, $25, $2
2470 addq $6, $1, $6
2471 addq $2, $6, $6
2472 addq $3, $6, $6
2473 addq $22, $6, $22
2474 cmpult $22, $6, $1
2475 srl $4, 32, $6
2476 zapnot $5, 15, $5
2477 mulq $5, $6, $23
2478 mulq $7, $5, $5
2479 addq $1, $24, $24
2480 addq $28, $23, $28
2481 cmpult $28, $23, $1
2482 mulq $6, $8, $6
2483 beq $1, $417
2484 sll $20, 32, $1
2485 addq $6, $1, $6
2486$417:
2487 sll $28, 32, $23
2488 ldq $2, 48($18)
2489 addq $5, $23, $5
2490 bis $5, $5, $7
2491 ldq $4, 56($17)
2492 addq $22, $7, $22
2493 srl $2, 32, $8
2494 cmpult $22, $7, $3
2495 zapnot $4, 15, $7
2496 mulq $8, $7, $0
2497 srl $28, 32, $1
2498 addq $6, $1, $6
2499 cmpult $5, $23, $1
2500 zapnot $2, 15, $5
2501 addq $1, $6, $6
2502 addq $3, $6, $6
2503 addq $24, $6, $24
2504 cmpult $24, $6, $23
2505 srl $4, 32, $6
2506 mulq $5, $6, $25
2507 mulq $7, $5, $2
2508 addq $0, $25, $0
2509 cmpult $0, $25, $1
2510 mulq $6, $8, $6
2511 beq $1, $421
2512 sll $20, 32, $1
2513 addq $6, $1, $6
2514$421:
2515 sll $0, 32, $25
2516 addq $2, $25, $2
2517 bis $2, $2, $7
2518 addq $22, $7, $22
2519 stq $22, 104($16)
2520 ldq $4, 56($17)
2521 ldq $5, 56($18)
2522 cmpult $22, $7, $3
2523 zapnot $4, 15, $7
2524 srl $5, 32, $8
2525 mulq $8, $7, $28
2526 srl $0, 32, $1
2527 cmpult $2, $25, $2
2528 addq $6, $1, $6
2529 addq $2, $6, $6
2530 addq $3, $6, $6
2531 addq $24, $6, $24
2532 cmpult $24, $6, $1
2533 srl $4, 32, $6
2534 zapnot $5, 15, $5
2535 mulq $5, $6, $22
2536 mulq $7, $5, $2
2537 addq $1, $23, $23
2538 addq $28, $22, $28
2539 cmpult $28, $22, $1
2540 mulq $6, $8, $3
2541 beq $1, $425
2542 sll $20, 32, $1
2543 addq $3, $1, $3
2544$425:
2545 sll $28, 32, $22
2546 srl $28, 32, $1
2547 addq $2, $22, $2
2548 addq $3, $1, $3
2549 bis $2, $2, $7
2550 addq $24, $7, $24
2551 cmpult $7, $22, $1
2552 cmpult $24, $7, $2
2553 addq $1, $3, $6
2554 addq $2, $6, $6
2555 stq $24, 112($16)
2556 addq $23, $6, $23
2557 stq $23, 120($16)
2558 ret $31, ($26), 1
2559 .end bn_mul_comba8
2560 .text
2561 .align 3
2562 .globl bn_sqr_comba4
2563 .ent bn_sqr_comba4
2564bn_sqr_comba4:
2565bn_sqr_comba4..ng:
2566 .frame $30,0,$26,0
2567 .prologue 0
2568
2569 ldq $0, 0($17)
2570 ldq $1, 8($17)
2571 ldq $2, 16($17)
2572 ldq $3, 24($17)
2573 bis $31, $31, $6
2574 mulq $0, $0, $4
2575 umulh $0, $0, $5
2576 stq $4, 0($16)
2577 bis $31, $31, $4
2578 mulq $0, $1, $7
2579 umulh $0, $1, $8
2580 cmplt $7, $31, $22
2581 cmplt $8, $31, $23
2582 addq $7, $7, $7
2583 addq $8, $8, $8
2584 addq $8, $22, $8
2585 addq $4, $23, $4
2586 addq $5, $7, $5
2587 addq $6, $8, $6
2588 cmpult $5, $7, $24
2589 cmpult $6, $8, $25
2590 addq $6, $24, $6
2591 addq $4, $25, $4
2592 stq $5, 8($16)
2593 bis $31, $31, $5
2594 mulq $1, $1, $27
2595 umulh $1, $1, $28
2596 addq $6, $27, $6
2597 addq $4, $28, $4
2598 cmpult $6, $27, $21
2599 cmpult $4, $28, $20
2600 addq $4, $21, $4
2601 addq $5, $20, $5
2602 mulq $2, $0, $19
2603 umulh $2, $0, $18
2604 cmplt $19, $31, $17
2605 cmplt $18, $31, $22
2606 addq $19, $19, $19
2607 addq $18, $18, $18
2608 addq $18, $17, $18
2609 addq $5, $22, $5
2610 addq $6, $19, $6
2611 addq $4, $18, $4
2612 cmpult $6, $19, $23
2613 cmpult $4, $18, $7
2614 addq $4, $23, $4
2615 addq $5, $7, $5
2616 stq $6, 16($16)
2617 bis $31, $31, $6
2618 mulq $3, $0, $8
2619 umulh $3, $0, $24
2620 cmplt $8, $31, $25
2621 cmplt $24, $31, $27
2622 addq $8, $8, $8
2623 addq $24, $24, $24
2624 addq $24, $25, $24
2625 addq $6, $27, $6
2626 addq $4, $8, $4
2627 addq $5, $24, $5
2628 cmpult $4, $8, $28
2629 cmpult $5, $24, $21
2630 addq $5, $28, $5
2631 addq $6, $21, $6
2632 mulq $2, $1, $20
2633 umulh $2, $1, $17
2634 cmplt $20, $31, $22
2635 cmplt $17, $31, $19
2636 addq $20, $20, $20
2637 addq $17, $17, $17
2638 addq $17, $22, $17
2639 addq $6, $19, $6
2640 addq $4, $20, $4
2641 addq $5, $17, $5
2642 cmpult $4, $20, $18
2643 cmpult $5, $17, $23
2644 addq $5, $18, $5
2645 addq $6, $23, $6
2646 stq $4, 24($16)
2647 bis $31, $31, $4
2648 mulq $2, $2, $7
2649 umulh $2, $2, $25
2650 addq $5, $7, $5
2651 addq $6, $25, $6
2652 cmpult $5, $7, $27
2653 cmpult $6, $25, $8
2654 addq $6, $27, $6
2655 addq $4, $8, $4
2656 mulq $3, $1, $24
2657 umulh $3, $1, $28
2658 cmplt $24, $31, $21
2659 cmplt $28, $31, $22
2660 addq $24, $24, $24
2661 addq $28, $28, $28
2662 addq $28, $21, $28
2663 addq $4, $22, $4
2664 addq $5, $24, $5
2665 addq $6, $28, $6
2666 cmpult $5, $24, $19
2667 cmpult $6, $28, $20
2668 addq $6, $19, $6
2669 addq $4, $20, $4
2670 stq $5, 32($16)
2671 bis $31, $31, $5
2672 mulq $3, $2, $17
2673 umulh $3, $2, $18
2674 cmplt $17, $31, $23
2675 cmplt $18, $31, $7
2676 addq $17, $17, $17
2677 addq $18, $18, $18
2678 addq $18, $23, $18
2679 addq $5, $7, $5
2680 addq $6, $17, $6
2681 addq $4, $18, $4
2682 cmpult $6, $17, $25
2683 cmpult $4, $18, $27
2684 addq $4, $25, $4
2685 addq $5, $27, $5
2686 stq $6, 40($16)
2687 bis $31, $31, $6
2688 mulq $3, $3, $8
2689 umulh $3, $3, $21
2690 addq $4, $8, $4
2691 addq $5, $21, $5
2692 cmpult $4, $8, $22
2693 cmpult $5, $21, $24
2694 addq $5, $22, $5
2695 addq $6, $24, $6
2696 stq $4, 48($16)
2697 stq $5, 56($16)
2698 ret $31,($26),1
2699 .end bn_sqr_comba4
2700 .text
2701 .align 3
2702 .globl bn_sqr_comba8
2703 .ent bn_sqr_comba8
2704bn_sqr_comba8:
2705bn_sqr_comba8..ng:
2706 .frame $30,0,$26,0
2707 .prologue 0
2708
2709 ldq $0, 0($17)
2710 ldq $1, 8($17)
2711 ldq $2, 16($17)
2712 ldq $3, 24($17)
2713 ldq $4, 32($17)
2714 ldq $5, 40($17)
2715 ldq $6, 48($17)
2716 ldq $7, 56($17)
2717 bis $31, $31, $23
2718 mulq $0, $0, $8
2719 umulh $0, $0, $22
2720 stq $8, 0($16)
2721 bis $31, $31, $8
2722 mulq $1, $0, $24
2723 umulh $1, $0, $25
2724 cmplt $24, $31, $27
2725 cmplt $25, $31, $28
2726 addq $24, $24, $24
2727 addq $25, $25, $25
2728 addq $25, $27, $25
2729 addq $8, $28, $8
2730 addq $22, $24, $22
2731 addq $23, $25, $23
2732 cmpult $22, $24, $21
2733 cmpult $23, $25, $20
2734 addq $23, $21, $23
2735 addq $8, $20, $8
2736 stq $22, 8($16)
2737 bis $31, $31, $22
2738 mulq $1, $1, $19
2739 umulh $1, $1, $18
2740 addq $23, $19, $23
2741 addq $8, $18, $8
2742 cmpult $23, $19, $17
2743 cmpult $8, $18, $27
2744 addq $8, $17, $8
2745 addq $22, $27, $22
2746 mulq $2, $0, $28
2747 umulh $2, $0, $24
2748 cmplt $28, $31, $25
2749 cmplt $24, $31, $21
2750 addq $28, $28, $28
2751 addq $24, $24, $24
2752 addq $24, $25, $24
2753 addq $22, $21, $22
2754 addq $23, $28, $23
2755 addq $8, $24, $8
2756 cmpult $23, $28, $20
2757 cmpult $8, $24, $19
2758 addq $8, $20, $8
2759 addq $22, $19, $22
2760 stq $23, 16($16)
2761 bis $31, $31, $23
2762 mulq $2, $1, $18
2763 umulh $2, $1, $17
2764 cmplt $18, $31, $27
2765 cmplt $17, $31, $25
2766 addq $18, $18, $18
2767 addq $17, $17, $17
2768 addq $17, $27, $17
2769 addq $23, $25, $23
2770 addq $8, $18, $8
2771 addq $22, $17, $22
2772 cmpult $8, $18, $21
2773 cmpult $22, $17, $28
2774 addq $22, $21, $22
2775 addq $23, $28, $23
2776 mulq $3, $0, $24
2777 umulh $3, $0, $20
2778 cmplt $24, $31, $19
2779 cmplt $20, $31, $27
2780 addq $24, $24, $24
2781 addq $20, $20, $20
2782 addq $20, $19, $20
2783 addq $23, $27, $23
2784 addq $8, $24, $8
2785 addq $22, $20, $22
2786 cmpult $8, $24, $25
2787 cmpult $22, $20, $18
2788 addq $22, $25, $22
2789 addq $23, $18, $23
2790 stq $8, 24($16)
2791 bis $31, $31, $8
2792 mulq $2, $2, $17
2793 umulh $2, $2, $21
2794 addq $22, $17, $22
2795 addq $23, $21, $23
2796 cmpult $22, $17, $28
2797 cmpult $23, $21, $19
2798 addq $23, $28, $23
2799 addq $8, $19, $8
2800 mulq $3, $1, $27
2801 umulh $3, $1, $24
2802 cmplt $27, $31, $20
2803 cmplt $24, $31, $25
2804 addq $27, $27, $27
2805 addq $24, $24, $24
2806 addq $24, $20, $24
2807 addq $8, $25, $8
2808 addq $22, $27, $22
2809 addq $23, $24, $23
2810 cmpult $22, $27, $18
2811 cmpult $23, $24, $17
2812 addq $23, $18, $23
2813 addq $8, $17, $8
2814 mulq $4, $0, $21
2815 umulh $4, $0, $28
2816 cmplt $21, $31, $19
2817 cmplt $28, $31, $20
2818 addq $21, $21, $21
2819 addq $28, $28, $28
2820 addq $28, $19, $28
2821 addq $8, $20, $8
2822 addq $22, $21, $22
2823 addq $23, $28, $23
2824 cmpult $22, $21, $25
2825 cmpult $23, $28, $27
2826 addq $23, $25, $23
2827 addq $8, $27, $8
2828 stq $22, 32($16)
2829 bis $31, $31, $22
2830 mulq $3, $2, $24
2831 umulh $3, $2, $18
2832 cmplt $24, $31, $17
2833 cmplt $18, $31, $19
2834 addq $24, $24, $24
2835 addq $18, $18, $18
2836 addq $18, $17, $18
2837 addq $22, $19, $22
2838 addq $23, $24, $23
2839 addq $8, $18, $8
2840 cmpult $23, $24, $20
2841 cmpult $8, $18, $21
2842 addq $8, $20, $8
2843 addq $22, $21, $22
2844 mulq $4, $1, $28
2845 umulh $4, $1, $25
2846 cmplt $28, $31, $27
2847 cmplt $25, $31, $17
2848 addq $28, $28, $28
2849 addq $25, $25, $25
2850 addq $25, $27, $25
2851 addq $22, $17, $22
2852 addq $23, $28, $23
2853 addq $8, $25, $8
2854 cmpult $23, $28, $19
2855 cmpult $8, $25, $24
2856 addq $8, $19, $8
2857 addq $22, $24, $22
2858 mulq $5, $0, $18
2859 umulh $5, $0, $20
2860 cmplt $18, $31, $21
2861 cmplt $20, $31, $27
2862 addq $18, $18, $18
2863 addq $20, $20, $20
2864 addq $20, $21, $20
2865 addq $22, $27, $22
2866 addq $23, $18, $23
2867 addq $8, $20, $8
2868 cmpult $23, $18, $17
2869 cmpult $8, $20, $28
2870 addq $8, $17, $8
2871 addq $22, $28, $22
2872 stq $23, 40($16)
2873 bis $31, $31, $23
2874 mulq $3, $3, $25
2875 umulh $3, $3, $19
2876 addq $8, $25, $8
2877 addq $22, $19, $22
2878 cmpult $8, $25, $24
2879 cmpult $22, $19, $21
2880 addq $22, $24, $22
2881 addq $23, $21, $23
2882 mulq $4, $2, $27
2883 umulh $4, $2, $18
2884 cmplt $27, $31, $20
2885 cmplt $18, $31, $17
2886 addq $27, $27, $27
2887 addq $18, $18, $18
2888 addq $18, $20, $18
2889 addq $23, $17, $23
2890 addq $8, $27, $8
2891 addq $22, $18, $22
2892 cmpult $8, $27, $28
2893 cmpult $22, $18, $25
2894 addq $22, $28, $22
2895 addq $23, $25, $23
2896 mulq $5, $1, $19
2897 umulh $5, $1, $24
2898 cmplt $19, $31, $21
2899 cmplt $24, $31, $20
2900 addq $19, $19, $19
2901 addq $24, $24, $24
2902 addq $24, $21, $24
2903 addq $23, $20, $23
2904 addq $8, $19, $8
2905 addq $22, $24, $22
2906 cmpult $8, $19, $17
2907 cmpult $22, $24, $27
2908 addq $22, $17, $22
2909 addq $23, $27, $23
2910 mulq $6, $0, $18
2911 umulh $6, $0, $28
2912 cmplt $18, $31, $25
2913 cmplt $28, $31, $21
2914 addq $18, $18, $18
2915 addq $28, $28, $28
2916 addq $28, $25, $28
2917 addq $23, $21, $23
2918 addq $8, $18, $8
2919 addq $22, $28, $22
2920 cmpult $8, $18, $20
2921 cmpult $22, $28, $19
2922 addq $22, $20, $22
2923 addq $23, $19, $23
2924 stq $8, 48($16)
2925 bis $31, $31, $8
2926 mulq $4, $3, $24
2927 umulh $4, $3, $17
2928 cmplt $24, $31, $27
2929 cmplt $17, $31, $25
2930 addq $24, $24, $24
2931 addq $17, $17, $17
2932 addq $17, $27, $17
2933 addq $8, $25, $8
2934 addq $22, $24, $22
2935 addq $23, $17, $23
2936 cmpult $22, $24, $21
2937 cmpult $23, $17, $18
2938 addq $23, $21, $23
2939 addq $8, $18, $8
2940 mulq $5, $2, $28
2941 umulh $5, $2, $20
2942 cmplt $28, $31, $19
2943 cmplt $20, $31, $27
2944 addq $28, $28, $28
2945 addq $20, $20, $20
2946 addq $20, $19, $20
2947 addq $8, $27, $8
2948 addq $22, $28, $22
2949 addq $23, $20, $23
2950 cmpult $22, $28, $25
2951 cmpult $23, $20, $24
2952 addq $23, $25, $23
2953 addq $8, $24, $8
2954 mulq $6, $1, $17
2955 umulh $6, $1, $21
2956 cmplt $17, $31, $18
2957 cmplt $21, $31, $19
2958 addq $17, $17, $17
2959 addq $21, $21, $21
2960 addq $21, $18, $21
2961 addq $8, $19, $8
2962 addq $22, $17, $22
2963 addq $23, $21, $23
2964 cmpult $22, $17, $27
2965 cmpult $23, $21, $28
2966 addq $23, $27, $23
2967 addq $8, $28, $8
2968 mulq $7, $0, $20
2969 umulh $7, $0, $25
2970 cmplt $20, $31, $24
2971 cmplt $25, $31, $18
2972 addq $20, $20, $20
2973 addq $25, $25, $25
2974 addq $25, $24, $25
2975 addq $8, $18, $8
2976 addq $22, $20, $22
2977 addq $23, $25, $23
2978 cmpult $22, $20, $19
2979 cmpult $23, $25, $17
2980 addq $23, $19, $23
2981 addq $8, $17, $8
2982 stq $22, 56($16)
2983 bis $31, $31, $22
2984 mulq $4, $4, $21
2985 umulh $4, $4, $27
2986 addq $23, $21, $23
2987 addq $8, $27, $8
2988 cmpult $23, $21, $28
2989 cmpult $8, $27, $24
2990 addq $8, $28, $8
2991 addq $22, $24, $22
2992 mulq $5, $3, $18
2993 umulh $5, $3, $20
2994 cmplt $18, $31, $25
2995 cmplt $20, $31, $19
2996 addq $18, $18, $18
2997 addq $20, $20, $20
2998 addq $20, $25, $20
2999 addq $22, $19, $22
3000 addq $23, $18, $23
3001 addq $8, $20, $8
3002 cmpult $23, $18, $17
3003 cmpult $8, $20, $21
3004 addq $8, $17, $8
3005 addq $22, $21, $22
3006 mulq $6, $2, $27
3007 umulh $6, $2, $28
3008 cmplt $27, $31, $24
3009 cmplt $28, $31, $25
3010 addq $27, $27, $27
3011 addq $28, $28, $28
3012 addq $28, $24, $28
3013 addq $22, $25, $22
3014 addq $23, $27, $23
3015 addq $8, $28, $8
3016 cmpult $23, $27, $19
3017 cmpult $8, $28, $18
3018 addq $8, $19, $8
3019 addq $22, $18, $22
3020 mulq $7, $1, $20
3021 umulh $7, $1, $17
3022 cmplt $20, $31, $21
3023 cmplt $17, $31, $24
3024 addq $20, $20, $20
3025 addq $17, $17, $17
3026 addq $17, $21, $17
3027 addq $22, $24, $22
3028 addq $23, $20, $23
3029 addq $8, $17, $8
3030 cmpult $23, $20, $25
3031 cmpult $8, $17, $27
3032 addq $8, $25, $8
3033 addq $22, $27, $22
3034 stq $23, 64($16)
3035 bis $31, $31, $23
3036 mulq $5, $4, $28
3037 umulh $5, $4, $19
3038 cmplt $28, $31, $18
3039 cmplt $19, $31, $21
3040 addq $28, $28, $28
3041 addq $19, $19, $19
3042 addq $19, $18, $19
3043 addq $23, $21, $23
3044 addq $8, $28, $8
3045 addq $22, $19, $22
3046 cmpult $8, $28, $24
3047 cmpult $22, $19, $20
3048 addq $22, $24, $22
3049 addq $23, $20, $23
3050 mulq $6, $3, $17
3051 umulh $6, $3, $25
3052 cmplt $17, $31, $27
3053 cmplt $25, $31, $18
3054 addq $17, $17, $17
3055 addq $25, $25, $25
3056 addq $25, $27, $25
3057 addq $23, $18, $23
3058 addq $8, $17, $8
3059 addq $22, $25, $22
3060 cmpult $8, $17, $21
3061 cmpult $22, $25, $28
3062 addq $22, $21, $22
3063 addq $23, $28, $23
3064 mulq $7, $2, $19
3065 umulh $7, $2, $24
3066 cmplt $19, $31, $20
3067 cmplt $24, $31, $27
3068 addq $19, $19, $19
3069 addq $24, $24, $24
3070 addq $24, $20, $24
3071 addq $23, $27, $23
3072 addq $8, $19, $8
3073 addq $22, $24, $22
3074 cmpult $8, $19, $18
3075 cmpult $22, $24, $17
3076 addq $22, $18, $22
3077 addq $23, $17, $23
3078 stq $8, 72($16)
3079 bis $31, $31, $8
3080 mulq $5, $5, $25
3081 umulh $5, $5, $21
3082 addq $22, $25, $22
3083 addq $23, $21, $23
3084 cmpult $22, $25, $28
3085 cmpult $23, $21, $20
3086 addq $23, $28, $23
3087 addq $8, $20, $8
3088 mulq $6, $4, $27
3089 umulh $6, $4, $19
3090 cmplt $27, $31, $24
3091 cmplt $19, $31, $18
3092 addq $27, $27, $27
3093 addq $19, $19, $19
3094 addq $19, $24, $19
3095 addq $8, $18, $8
3096 addq $22, $27, $22
3097 addq $23, $19, $23
3098 cmpult $22, $27, $17
3099 cmpult $23, $19, $25
3100 addq $23, $17, $23
3101 addq $8, $25, $8
3102 mulq $7, $3, $21
3103 umulh $7, $3, $28
3104 cmplt $21, $31, $20
3105 cmplt $28, $31, $24
3106 addq $21, $21, $21
3107 addq $28, $28, $28
3108 addq $28, $20, $28
3109 addq $8, $24, $8
3110 addq $22, $21, $22
3111 addq $23, $28, $23
3112 cmpult $22, $21, $18
3113 cmpult $23, $28, $27
3114 addq $23, $18, $23
3115 addq $8, $27, $8
3116 stq $22, 80($16)
3117 bis $31, $31, $22
3118 mulq $6, $5, $19
3119 umulh $6, $5, $17
3120 cmplt $19, $31, $25
3121 cmplt $17, $31, $20
3122 addq $19, $19, $19
3123 addq $17, $17, $17
3124 addq $17, $25, $17
3125 addq $22, $20, $22
3126 addq $23, $19, $23
3127 addq $8, $17, $8
3128 cmpult $23, $19, $24
3129 cmpult $8, $17, $21
3130 addq $8, $24, $8
3131 addq $22, $21, $22
3132 mulq $7, $4, $28
3133 umulh $7, $4, $18
3134 cmplt $28, $31, $27
3135 cmplt $18, $31, $25
3136 addq $28, $28, $28
3137 addq $18, $18, $18
3138 addq $18, $27, $18
3139 addq $22, $25, $22
3140 addq $23, $28, $23
3141 addq $8, $18, $8
3142 cmpult $23, $28, $20
3143 cmpult $8, $18, $19
3144 addq $8, $20, $8
3145 addq $22, $19, $22
3146 stq $23, 88($16)
3147 bis $31, $31, $23
3148 mulq $6, $6, $17
3149 umulh $6, $6, $24
3150 addq $8, $17, $8
3151 addq $22, $24, $22
3152 cmpult $8, $17, $21
3153 cmpult $22, $24, $27
3154 addq $22, $21, $22
3155 addq $23, $27, $23
3156 mulq $7, $5, $25
3157 umulh $7, $5, $28
3158 cmplt $25, $31, $18
3159 cmplt $28, $31, $20
3160 addq $25, $25, $25
3161 addq $28, $28, $28
3162 addq $28, $18, $28
3163 addq $23, $20, $23
3164 addq $8, $25, $8
3165 addq $22, $28, $22
3166 cmpult $8, $25, $19
3167 cmpult $22, $28, $17
3168 addq $22, $19, $22
3169 addq $23, $17, $23
3170 stq $8, 96($16)
3171 bis $31, $31, $8
3172 mulq $7, $6, $24
3173 umulh $7, $6, $21
3174 cmplt $24, $31, $27
3175 cmplt $21, $31, $18
3176 addq $24, $24, $24
3177 addq $21, $21, $21
3178 addq $21, $27, $21
3179 addq $8, $18, $8
3180 addq $22, $24, $22
3181 addq $23, $21, $23
3182 cmpult $22, $24, $20
3183 cmpult $23, $21, $25
3184 addq $23, $20, $23
3185 addq $8, $25, $8
3186 stq $22, 104($16)
3187 bis $31, $31, $22
3188 mulq $7, $7, $28
3189 umulh $7, $7, $19
3190 addq $23, $28, $23
3191 addq $8, $19, $8
3192 cmpult $23, $28, $17
3193 cmpult $8, $19, $27
3194 addq $8, $17, $8
3195 addq $22, $27, $22
3196 stq $23, 112($16)
3197 stq $8, 120($16)
3198 ret $31,($26),1
3199 .end bn_sqr_comba8
diff --git a/src/lib/libcrypto/bn/asm/alpha.s.works b/src/lib/libcrypto/bn/asm/alpha.s.works
new file mode 100644
index 0000000000..ee6c587809
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.s.works
@@ -0,0 +1,533 @@
1
2 # DEC Alpha assember
3 # The bn_div64 is actually gcc output but the other parts are hand done.
4 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
5 # bn_div64.
6 # I've gone back and re-done most of routines.
7 # The key thing to remeber for the 164 CPU is that while a
8 # multiply operation takes 8 cycles, another one can only be issued
9 # after 4 cycles have elapsed. I've done modification to help
10 # improve this. Also, normally, a ld instruction will not be available
11 # for about 3 cycles.
12 .file 1 "bn_asm.c"
13 .set noat
14gcc2_compiled.:
15__gnu_compiled_c:
16 .text
17 .align 3
18 .globl bn_mul_add_words
19 .ent bn_mul_add_words
20bn_mul_add_words:
21bn_mul_add_words..ng:
22 .frame $30,0,$26,0
23 .prologue 0
24 .align 5
25 subq $18,4,$18
26 bis $31,$31,$0
27 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
28 ldq $20,0($17) # 1 1
29 ldq $1,0($16) # 1 1
30 .align 3
31$42:
32 mulq $20,$19,$5 # 1 2 1 ######
33 ldq $21,8($17) # 2 1
34 ldq $2,8($16) # 2 1
35 umulh $20,$19,$20 # 1 2 ######
36 ldq $27,16($17) # 3 1
37 ldq $3,16($16) # 3 1
38 mulq $21,$19,$6 # 2 2 1 ######
39 ldq $28,24($17) # 4 1
40 addq $1,$5,$1 # 1 2 2
41 ldq $4,24($16) # 4 1
42 umulh $21,$19,$21 # 2 2 ######
43 cmpult $1,$5,$22 # 1 2 3 1
44 addq $20,$22,$20 # 1 3 1
45 addq $1,$0,$1 # 1 2 3 1
46 mulq $27,$19,$7 # 3 2 1 ######
47 cmpult $1,$0,$0 # 1 2 3 2
48 addq $2,$6,$2 # 2 2 2
49 addq $20,$0,$0 # 1 3 2
50 cmpult $2,$6,$23 # 2 2 3 1
51 addq $21,$23,$21 # 2 3 1
52 umulh $27,$19,$27 # 3 2 ######
53 addq $2,$0,$2 # 2 2 3 1
54 cmpult $2,$0,$0 # 2 2 3 2
55 subq $18,4,$18
56 mulq $28,$19,$8 # 4 2 1 ######
57 addq $21,$0,$0 # 2 3 2
58 addq $3,$7,$3 # 3 2 2
59 addq $16,32,$16
60 cmpult $3,$7,$24 # 3 2 3 1
61 stq $1,-32($16) # 1 2 4
62 umulh $28,$19,$28 # 4 2 ######
63 addq $27,$24,$27 # 3 3 1
64 addq $3,$0,$3 # 3 2 3 1
65 stq $2,-24($16) # 2 2 4
66 cmpult $3,$0,$0 # 3 2 3 2
67 stq $3,-16($16) # 3 2 4
68 addq $4,$8,$4 # 4 2 2
69 addq $27,$0,$0 # 3 3 2
70 cmpult $4,$8,$25 # 4 2 3 1
71 addq $17,32,$17
72 addq $28,$25,$28 # 4 3 1
73 addq $4,$0,$4 # 4 2 3 1
74 cmpult $4,$0,$0 # 4 2 3 2
75 stq $4,-8($16) # 4 2 4
76 addq $28,$0,$0 # 4 3 2
77 blt $18,$43
78
79 ldq $20,0($17) # 1 1
80 ldq $1,0($16) # 1 1
81
82 br $42
83
84 .align 4
85$45:
86 ldq $20,0($17) # 4 1
87 ldq $1,0($16) # 4 1
88 mulq $20,$19,$5 # 4 2 1
89 subq $18,1,$18
90 addq $16,8,$16
91 addq $17,8,$17
92 umulh $20,$19,$20 # 4 2
93 addq $1,$5,$1 # 4 2 2
94 cmpult $1,$5,$22 # 4 2 3 1
95 addq $20,$22,$20 # 4 3 1
96 addq $1,$0,$1 # 4 2 3 1
97 cmpult $1,$0,$0 # 4 2 3 2
98 addq $20,$0,$0 # 4 3 2
99 stq $1,-8($16) # 4 2 4
100 bgt $18,$45
101 ret $31,($26),1 # else exit
102
103 .align 4
104$43:
105 addq $18,4,$18
106 bgt $18,$45 # goto tail code
107 ret $31,($26),1 # else exit
108
109 .end bn_mul_add_words
110 .align 3
111 .globl bn_mul_words
112 .ent bn_mul_words
113bn_mul_words:
114bn_mul_words..ng:
115 .frame $30,0,$26,0
116 .prologue 0
117 .align 5
118 subq $18,4,$18
119 bis $31,$31,$0
120 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
121 ldq $20,0($17) # 1 1
122 .align 3
123$142:
124
125 mulq $20,$19,$5 # 1 2 1 #####
126 ldq $21,8($17) # 2 1
127 ldq $27,16($17) # 3 1
128 umulh $20,$19,$20 # 1 2 #####
129 ldq $28,24($17) # 4 1
130 mulq $21,$19,$6 # 2 2 1 #####
131 addq $5,$0,$5 # 1 2 3 1
132 subq $18,4,$18
133 cmpult $5,$0,$0 # 1 2 3 2
134 umulh $21,$19,$21 # 2 2 #####
135 addq $20,$0,$0 # 1 3 2
136 addq $17,32,$17
137 addq $6,$0,$6 # 2 2 3 1
138 mulq $27,$19,$7 # 3 2 1 #####
139 cmpult $6,$0,$0 # 2 2 3 2
140 addq $21,$0,$0 # 2 3 2
141 addq $16,32,$16
142 umulh $27,$19,$27 # 3 2 #####
143 stq $5,-32($16) # 1 2 4
144 mulq $28,$19,$8 # 4 2 1 #####
145 addq $7,$0,$7 # 3 2 3 1
146 stq $6,-24($16) # 2 2 4
147 cmpult $7,$0,$0 # 3 2 3 2
148 umulh $28,$19,$28 # 4 2 #####
149 addq $27,$0,$0 # 3 3 2
150 stq $7,-16($16) # 3 2 4
151 addq $8,$0,$8 # 4 2 3 1
152 cmpult $8,$0,$0 # 4 2 3 2
153
154 addq $28,$0,$0 # 4 3 2
155
156 stq $8,-8($16) # 4 2 4
157
158 blt $18,$143
159
160 ldq $20,0($17) # 1 1
161
162 br $142
163
164 .align 4
165$145:
166 ldq $20,0($17) # 4 1
167 mulq $20,$19,$5 # 4 2 1
168 subq $18,1,$18
169 umulh $20,$19,$20 # 4 2
170 addq $5,$0,$5 # 4 2 3 1
171 addq $16,8,$16
172 cmpult $5,$0,$0 # 4 2 3 2
173 addq $17,8,$17
174 addq $20,$0,$0 # 4 3 2
175 stq $5,-8($16) # 4 2 4
176
177 bgt $18,$145
178 ret $31,($26),1 # else exit
179
180 .align 4
181$143:
182 addq $18,4,$18
183 bgt $18,$145 # goto tail code
184 ret $31,($26),1 # else exit
185
186 .end bn_mul_words
187 .align 3
188 .globl bn_sqr_words
189 .ent bn_sqr_words
190bn_sqr_words:
191bn_sqr_words..ng:
192 .frame $30,0,$26,0
193 .prologue 0
194
195 subq $18,4,$18
196 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
197 ldq $20,0($17) # 1 1
198 .align 3
199$542:
200 mulq $20,$20,$5 ######
201 ldq $21,8($17) # 1 1
202 subq $18,4
203 umulh $20,$20,$1 ######
204 ldq $27,16($17) # 1 1
205 mulq $21,$21,$6 ######
206 ldq $28,24($17) # 1 1
207 stq $5,0($16) # r[0]
208 umulh $21,$21,$2 ######
209 stq $1,8($16) # r[1]
210 mulq $27,$27,$7 ######
211 stq $6,16($16) # r[0]
212 umulh $27,$27,$3 ######
213 stq $2,24($16) # r[1]
214 mulq $28,$28,$8 ######
215 stq $7,32($16) # r[0]
216 umulh $28,$28,$4 ######
217 stq $3,40($16) # r[1]
218
219 addq $16,64,$16
220 addq $17,32,$17
221 stq $8,-16($16) # r[0]
222 stq $4,-8($16) # r[1]
223
224 blt $18,$543
225 ldq $20,0($17) # 1 1
226 br $542
227
228$442:
229 ldq $20,0($17) # a[0]
230 mulq $20,$20,$5 # a[0]*w low part r2
231 addq $16,16,$16
232 addq $17,8,$17
233 subq $18,1,$18
234 umulh $20,$20,$1 # a[0]*w high part r3
235 stq $5,-16($16) # r[0]
236 stq $1,-8($16) # r[1]
237
238 bgt $18,$442
239 ret $31,($26),1 # else exit
240
241 .align 4
242$543:
243 addq $18,4,$18
244 bgt $18,$442 # goto tail code
245 ret $31,($26),1 # else exit
246 .end bn_sqr_words
247
248 .align 3
249 .globl bn_add_words
250 .ent bn_add_words
251bn_add_words:
252bn_add_words..ng:
253 .frame $30,0,$26,0
254 .prologue 0
255
256 subq $19,4,$19
257 bis $31,$31,$0 # carry = 0
258 blt $19,$900
259 ldq $5,0($17) # a[0]
260 ldq $1,0($18) # b[1]
261 .align 3
262$901:
263 addq $1,$5,$1 # r=a+b;
264 ldq $6,8($17) # a[1]
265 cmpult $1,$5,$22 # did we overflow?
266 ldq $2,8($18) # b[1]
267 addq $1,$0,$1 # c+= overflow
268 ldq $7,16($17) # a[2]
269 cmpult $1,$0,$0 # overflow?
270 ldq $3,16($18) # b[2]
271 addq $0,$22,$0
272 ldq $8,24($17) # a[3]
273 addq $2,$6,$2 # r=a+b;
274 ldq $4,24($18) # b[3]
275 cmpult $2,$6,$23 # did we overflow?
276 addq $3,$7,$3 # r=a+b;
277 addq $2,$0,$2 # c+= overflow
278 cmpult $3,$7,$24 # did we overflow?
279 cmpult $2,$0,$0 # overflow?
280 addq $4,$8,$4 # r=a+b;
281 addq $0,$23,$0
282 cmpult $4,$8,$25 # did we overflow?
283 addq $3,$0,$3 # c+= overflow
284 stq $1,0($16) # r[0]=c
285 cmpult $3,$0,$0 # overflow?
286 stq $2,8($16) # r[1]=c
287 addq $0,$24,$0
288 stq $3,16($16) # r[2]=c
289 addq $4,$0,$4 # c+= overflow
290 subq $19,4,$19 # loop--
291 cmpult $4,$0,$0 # overflow?
292 addq $17,32,$17 # a++
293 addq $0,$25,$0
294 stq $4,24($16) # r[3]=c
295 addq $18,32,$18 # b++
296 addq $16,32,$16 # r++
297
298 blt $19,$900
299 ldq $5,0($17) # a[0]
300 ldq $1,0($18) # b[1]
301 br $901
302 .align 4
303$945:
304 ldq $5,0($17) # a[0]
305 ldq $1,0($18) # b[1]
306 addq $1,$5,$1 # r=a+b;
307 subq $19,1,$19 # loop--
308 addq $1,$0,$1 # c+= overflow
309 addq $17,8,$17 # a++
310 cmpult $1,$5,$22 # did we overflow?
311 cmpult $1,$0,$0 # overflow?
312 addq $18,8,$18 # b++
313 stq $1,0($16) # r[0]=c
314 addq $0,$22,$0
315 addq $16,8,$16 # r++
316
317 bgt $19,$945
318 ret $31,($26),1 # else exit
319
320$900:
321 addq $19,4,$19
322 bgt $19,$945 # goto tail code
323 ret $31,($26),1 # else exit
324 .end bn_add_words
325
326 #
327 # What follows was taken directly from the C compiler with a few
328 # hacks to redo the lables.
329 #
330.text
331 .align 3
332 .globl bn_div64
333 .ent bn_div64
334bn_div64:
335 ldgp $29,0($27)
336bn_div64..ng:
337 lda $30,-48($30)
338 .frame $30,48,$26,0
339 stq $26,0($30)
340 stq $9,8($30)
341 stq $10,16($30)
342 stq $11,24($30)
343 stq $12,32($30)
344 stq $13,40($30)
345 .mask 0x4003e00,-48
346 .prologue 1
347 bis $16,$16,$9
348 bis $17,$17,$10
349 bis $18,$18,$11
350 bis $31,$31,$13
351 bis $31,2,$12
352 bne $11,$119
353 lda $0,-1
354 br $31,$136
355 .align 4
356$119:
357 bis $11,$11,$16
358 jsr $26,BN_num_bits_word
359 ldgp $29,0($26)
360 subq $0,64,$1
361 beq $1,$120
362 bis $31,1,$1
363 sll $1,$0,$1
364 cmpule $9,$1,$1
365 bne $1,$120
366 # lda $16,_IO_stderr_
367 # lda $17,$C32
368 # bis $0,$0,$18
369 # jsr $26,fprintf
370 # ldgp $29,0($26)
371 jsr $26,abort
372 ldgp $29,0($26)
373 .align 4
374$120:
375 bis $31,64,$3
376 cmpult $9,$11,$2
377 subq $3,$0,$1
378 addl $1,$31,$0
379 subq $9,$11,$1
380 cmoveq $2,$1,$9
381 beq $0,$122
382 zapnot $0,15,$2
383 subq $3,$0,$1
384 sll $11,$2,$11
385 sll $9,$2,$3
386 srl $10,$1,$1
387 sll $10,$2,$10
388 bis $3,$1,$9
389$122:
390 srl $11,32,$5
391 zapnot $11,15,$6
392 lda $7,-1
393 .align 5
394$123:
395 srl $9,32,$1
396 subq $1,$5,$1
397 bne $1,$126
398 zapnot $7,15,$27
399 br $31,$127
400 .align 4
401$126:
402 bis $9,$9,$24
403 bis $5,$5,$25
404 divqu $24,$25,$27
405$127:
406 srl $10,32,$4
407 .align 5
408$128:
409 mulq $27,$5,$1
410 subq $9,$1,$3
411 zapnot $3,240,$1
412 bne $1,$129
413 mulq $6,$27,$2
414 sll $3,32,$1
415 addq $1,$4,$1
416 cmpule $2,$1,$2
417 bne $2,$129
418 subq $27,1,$27
419 br $31,$128
420 .align 4
421$129:
422 mulq $27,$6,$1
423 mulq $27,$5,$4
424 srl $1,32,$3
425 sll $1,32,$1
426 addq $4,$3,$4
427 cmpult $10,$1,$2
428 subq $10,$1,$10
429 addq $2,$4,$2
430 cmpult $9,$2,$1
431 bis $2,$2,$4
432 beq $1,$134
433 addq $9,$11,$9
434 subq $27,1,$27
435$134:
436 subl $12,1,$12
437 subq $9,$4,$9
438 beq $12,$124
439 sll $27,32,$13
440 sll $9,32,$2
441 srl $10,32,$1
442 sll $10,32,$10
443 bis $2,$1,$9
444 br $31,$123
445 .align 4
446$124:
447 bis $13,$27,$0
448$136:
449 ldq $26,0($30)
450 ldq $9,8($30)
451 ldq $10,16($30)
452 ldq $11,24($30)
453 ldq $12,32($30)
454 ldq $13,40($30)
455 addq $30,48,$30
456 ret $31,($26),1
457 .end bn_div64
458
459 .set noat
460 .text
461 .align 3
462 .globl bn_sub_words
463 .ent bn_sub_words
464bn_sub_words:
465bn_sub_words..ng:
466 .frame $30,0,$26,0
467 .prologue 0
468
469 subq $19, 4, $19
470 bis $31, $31, $0
471 blt $19, $100
472 ldq $1, 0($17)
473 ldq $2, 0($18)
474$101:
475 ldq $3, 8($17)
476 cmpult $1, $2, $4
477 ldq $5, 8($18)
478 subq $1, $2, $1
479 ldq $6, 16($17)
480 cmpult $1, $0, $2
481 ldq $7, 16($18)
482 subq $1, $0, $23
483 ldq $8, 24($17)
484 addq $2, $4, $0
485 cmpult $3, $5, $24
486 subq $3, $5, $3
487 ldq $22, 24($18)
488 cmpult $3, $0, $5
489 subq $3, $0, $25
490 addq $5, $24, $0
491 cmpult $6, $7, $27
492 subq $6, $7, $6
493 stq $23, 0($16)
494 cmpult $6, $0, $7
495 subq $6, $0, $28
496 addq $7, $27, $0
497 cmpult $8, $22, $21
498 subq $8, $22, $8
499 stq $25, 8($16)
500 cmpult $8, $0, $22
501 subq $8, $0, $20
502 addq $22, $21, $0
503 stq $28, 16($16)
504 subq $19, 4, $19
505 stq $20, 24($16)
506 addq $17, 32, $17
507 addq $18, 32, $18
508 addq $16, 32, $16
509 blt $19, $100
510 ldq $1, 0($17)
511 ldq $2, 0($18)
512 br $101
513$102:
514 ldq $1, 0($17)
515 ldq $2, 0($18)
516 cmpult $1, $2, $27
517 subq $1, $2, $1
518 cmpult $1, $0, $2
519 subq $1, $0, $1
520 stq $1, 0($16)
521 addq $2, $27, $0
522 addq $17, 8, $17
523 addq $18, 8, $18
524 addq $16, 8, $16
525 subq $19, 1, $19
526 bgt $19, $102
527 ret $31,($26),1
528$100:
529 addq $19, 4, $19
530 bgt $19, $102
531$103:
532 ret $31,($26),1
533 .end bn_sub_words
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl
new file mode 100644
index 0000000000..4dc76e6b69
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl
@@ -0,0 +1,119 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &br(&label("finish"));
23 &blt($count,&label("finish"));
24
25 ($a0,$b0)=&NR(2);
26 &ld($a0,&QWPw(0,$ap));
27 &ld($b0,&QWPw(0,$bp));
28
29##########################################################
30 &set_label("loop");
31
32 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
33 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
34 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
35 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
36 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
37 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
38
39 ($o0,$t0)=&NR(2);
40 &add($a0,$b0,$o0);
41 &cmpult($o0,$b0,$t0);
42 &add($o0,$cc,$o0);
43 &cmpult($o0,$cc,$cc);
44 &add($cc,$t0,$cc); &FR($t0);
45
46 ($t1,$o1)=&NR(2);
47
48 &add($a1,$b1,$o1); &FR($a1);
49 &cmpult($o1,$b1,$t1); &FR($b1);
50 &add($o1,$cc,$o1);
51 &cmpult($o1,$cc,$cc);
52 &add($cc,$t1,$cc); &FR($t1);
53
54 ($t2,$o2)=&NR(2);
55
56 &add($a2,$b2,$o2); &FR($a2);
57 &cmpult($o2,$b2,$t2); &FR($b2);
58 &add($o2,$cc,$o2);
59 &cmpult($o2,$cc,$cc);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &add($a3,$b3,$o3); &FR($a3);
65 &cmpult($o3,$b3,$t3); &FR($b3);
66 &add($o3,$cc,$o3);
67 &cmpult($o3,$cc,$cc);
68 &add($cc,$t3,$cc); &FR($t3);
69
70 &st($o0,&QWPw(0,$rp)); &FR($o0);
71 &st($o1,&QWPw(0,$rp)); &FR($o1);
72 &st($o2,&QWPw(0,$rp)); &FR($o2);
73 &st($o3,&QWPw(0,$rp)); &FR($o3);
74
75 &sub($count,4,$count); # count-=4
76 &add($ap,4*$QWS,$ap); # count+=4
77 &add($bp,4*$QWS,$bp); # count+=4
78 &add($rp,4*$QWS,$rp); # count+=4
79
80 &blt($count,&label("finish"));
81 &ld($a0,&QWPw(0,$ap));
82 &ld($b0,&QWPw(0,$bp));
83 &br(&label("loop"));
84##################################################
85 # Do the last 0..3 words
86
87 ($t0,$o0)=&NR(2);
88 &set_label("last_loop");
89
90 &ld($a0,&QWPw(0,$ap)); # get a
91 &ld($b0,&QWPw(0,$bp)); # get b
92
93 &add($a0,$b0,$o0);
94 &cmpult($o0,$b0,$t0); # will we borrow?
95 &add($o0,$cc,$o0); # will we borrow?
96 &cmpult($o0,$cc,$cc); # will we borrow?
97 &add($cc,$t0,$cc); # add the borrows
98 &st($o0,&QWPw(0,$rp)); # save
99
100 &add($ap,$QWS,$ap);
101 &add($bp,$QWS,$bp);
102 &add($rp,$QWS,$rp);
103 &sub($count,1,$count);
104 &bgt($count,&label("last_loop"));
105 &function_end_A($name);
106
107######################################################
108 &set_label("finish");
109 &add($count,4,$count);
110 &bgt($count,&label("last_loop"));
111
112 &FR($o0,$t0,$a0,$b0);
113 &set_label("end");
114 &function_end($name);
115
116 &fin_pool;
117 }
118
1191;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl
new file mode 100644
index 0000000000..7ec144377f
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl
@@ -0,0 +1,144 @@
1#!/usr/local/bin/perl
2
3sub bn_div64
4 {
5 local($data)=<<'EOF';
6 #
7 # What follows was taken directly from the C compiler with a few
8 # hacks to redo the lables.
9 #
10.text
11 .set noreorder
12 .set volatile
13 .align 3
14 .globl bn_div64
15 .ent bn_div64
16bn_div64:
17 ldgp $29,0($27)
18bn_div64..ng:
19 lda $30,-48($30)
20 .frame $30,48,$26,0
21 stq $26,0($30)
22 stq $9,8($30)
23 stq $10,16($30)
24 stq $11,24($30)
25 stq $12,32($30)
26 stq $13,40($30)
27 .mask 0x4003e00,-48
28 .prologue 1
29 bis $16,$16,$9
30 bis $17,$17,$10
31 bis $18,$18,$11
32 bis $31,$31,$13
33 bis $31,2,$12
34 bne $11,$9119
35 lda $0,-1
36 br $31,$9136
37 .align 4
38$9119:
39 bis $11,$11,$16
40 jsr $26,BN_num_bits_word
41 ldgp $29,0($26)
42 subq $0,64,$1
43 beq $1,$9120
44 bis $31,1,$1
45 sll $1,$0,$1
46 cmpule $9,$1,$1
47 bne $1,$9120
48 # lda $16,_IO_stderr_
49 # lda $17,$C32
50 # bis $0,$0,$18
51 # jsr $26,fprintf
52 # ldgp $29,0($26)
53 jsr $26,abort
54 ldgp $29,0($26)
55 .align 4
56$9120:
57 bis $31,64,$3
58 cmpult $9,$11,$2
59 subq $3,$0,$1
60 addl $1,$31,$0
61 subq $9,$11,$1
62 cmoveq $2,$1,$9
63 beq $0,$9122
64 zapnot $0,15,$2
65 subq $3,$0,$1
66 sll $11,$2,$11
67 sll $9,$2,$3
68 srl $10,$1,$1
69 sll $10,$2,$10
70 bis $3,$1,$9
71$9122:
72 srl $11,32,$5
73 zapnot $11,15,$6
74 lda $7,-1
75 .align 5
76$9123:
77 srl $9,32,$1
78 subq $1,$5,$1
79 bne $1,$9126
80 zapnot $7,15,$27
81 br $31,$9127
82 .align 4
83$9126:
84 bis $9,$9,$24
85 bis $5,$5,$25
86 divqu $24,$25,$27
87$9127:
88 srl $10,32,$4
89 .align 5
90$9128:
91 mulq $27,$5,$1
92 subq $9,$1,$3
93 zapnot $3,240,$1
94 bne $1,$9129
95 mulq $6,$27,$2
96 sll $3,32,$1
97 addq $1,$4,$1
98 cmpule $2,$1,$2
99 bne $2,$9129
100 subq $27,1,$27
101 br $31,$9128
102 .align 4
103$9129:
104 mulq $27,$6,$1
105 mulq $27,$5,$4
106 srl $1,32,$3
107 sll $1,32,$1
108 addq $4,$3,$4
109 cmpult $10,$1,$2
110 subq $10,$1,$10
111 addq $2,$4,$2
112 cmpult $9,$2,$1
113 bis $2,$2,$4
114 beq $1,$9134
115 addq $9,$11,$9
116 subq $27,1,$27
117$9134:
118 subl $12,1,$12
119 subq $9,$4,$9
120 beq $12,$9124
121 sll $27,32,$13
122 sll $9,32,$2
123 srl $10,32,$1
124 sll $10,32,$10
125 bis $2,$1,$9
126 br $31,$9123
127 .align 4
128$9124:
129 bis $13,$27,$0
130$9136:
131 ldq $26,0($30)
132 ldq $9,8($30)
133 ldq $10,16($30)
134 ldq $11,24($30)
135 ldq $12,32($30)
136 ldq $13,40($30)
137 addq $30,48,$30
138 ret $31,($26),1
139 .end bn_div64
140EOF
141 &asm_add($data);
142 }
143
1441;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl
new file mode 100644
index 0000000000..b182bae452
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl
@@ -0,0 +1,116 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &br(&label("finish"));
23 &blt($count,&label("finish"));
24
25 ($a0,$r0)=&NR(2);
26 &ld($a0,&QWPw(0,$ap));
27 &ld($r0,&QWPw(0,$rp));
28
29$a=<<'EOF';
30##########################################################
31 &set_label("loop");
32
33 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
34 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
35 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
36 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
37 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
38 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
39
40 ($o0,$t0)=&NR(2);
41 &add($a0,$b0,$o0);
42 &cmpult($o0,$b0,$t0);
43 &add($o0,$cc,$o0);
44 &cmpult($o0,$cc,$cc);
45 &add($cc,$t0,$cc); &FR($t0);
46
47 ($t1,$o1)=&NR(2);
48
49 &add($a1,$b1,$o1); &FR($a1);
50 &cmpult($o1,$b1,$t1); &FR($b1);
51 &add($o1,$cc,$o1);
52 &cmpult($o1,$cc,$cc);
53 &add($cc,$t1,$cc); &FR($t1);
54
55 ($t2,$o2)=&NR(2);
56
57 &add($a2,$b2,$o2); &FR($a2);
58 &cmpult($o2,$b2,$t2); &FR($b2);
59 &add($o2,$cc,$o2);
60 &cmpult($o2,$cc,$cc);
61 &add($cc,$t2,$cc); &FR($t2);
62
63 ($t3,$o3)=&NR(2);
64
65 &add($a3,$b3,$o3); &FR($a3);
66 &cmpult($o3,$b3,$t3); &FR($b3);
67 &add($o3,$cc,$o3);
68 &cmpult($o3,$cc,$cc);
69 &add($cc,$t3,$cc); &FR($t3);
70
71 &st($o0,&QWPw(0,$rp)); &FR($o0);
72 &st($o1,&QWPw(0,$rp)); &FR($o1);
73 &st($o2,&QWPw(0,$rp)); &FR($o2);
74 &st($o3,&QWPw(0,$rp)); &FR($o3);
75
76 &sub($count,4,$count); # count-=4
77 &add($ap,4*$QWS,$ap); # count+=4
78 &add($bp,4*$QWS,$bp); # count+=4
79 &add($rp,4*$QWS,$rp); # count+=4
80
81 &blt($count,&label("finish"));
82 &ld($a0,&QWPw(0,$ap));
83 &ld($b0,&QWPw(0,$bp));
84 &br(&label("loop"));
85EOF
86##################################################
87 # Do the last 0..3 words
88
89 &set_label("last_loop");
90
91 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
92 &mul($a0,$word,($l0)=&NR(1));
93 &add($ap,$QWS,$ap);
94 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
95 &add($l0,$cc,$l0);
96 &add($rp,$QWS,$rp);
97 &sub($count,1,$count);
98 &cmpult($l0,$cc,$cc);
99 &st($l0,&QWPw(-1,$rp)); &FR($l0);
100 &add($h0,$cc,$cc); &FR($h0);
101
102 &bgt($count,&label("last_loop"));
103 &function_end_A($name);
104
105######################################################
106 &set_label("finish");
107 &add($count,4,$count);
108 &bgt($count,&label("last_loop"));
109
110 &set_label("end");
111 &function_end($name);
112
113 &fin_pool;
114 }
115
1161;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl
new file mode 100644
index 0000000000..e37f6315fb
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl
@@ -0,0 +1,120 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &br(&label("finish"));
23 &blt($count,&label("finish"));
24
25 ($a0,$r0)=&NR(2);
26 &ld($a0,&QWPw(0,$ap));
27 &ld($r0,&QWPw(0,$rp));
28
29$a=<<'EOF';
30##########################################################
31 &set_label("loop");
32
33 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
34 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
35 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
36 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
37 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
38 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
39
40 ($o0,$t0)=&NR(2);
41 &add($a0,$b0,$o0);
42 &cmpult($o0,$b0,$t0);
43 &add($o0,$cc,$o0);
44 &cmpult($o0,$cc,$cc);
45 &add($cc,$t0,$cc); &FR($t0);
46
47 ($t1,$o1)=&NR(2);
48
49 &add($a1,$b1,$o1); &FR($a1);
50 &cmpult($o1,$b1,$t1); &FR($b1);
51 &add($o1,$cc,$o1);
52 &cmpult($o1,$cc,$cc);
53 &add($cc,$t1,$cc); &FR($t1);
54
55 ($t2,$o2)=&NR(2);
56
57 &add($a2,$b2,$o2); &FR($a2);
58 &cmpult($o2,$b2,$t2); &FR($b2);
59 &add($o2,$cc,$o2);
60 &cmpult($o2,$cc,$cc);
61 &add($cc,$t2,$cc); &FR($t2);
62
63 ($t3,$o3)=&NR(2);
64
65 &add($a3,$b3,$o3); &FR($a3);
66 &cmpult($o3,$b3,$t3); &FR($b3);
67 &add($o3,$cc,$o3);
68 &cmpult($o3,$cc,$cc);
69 &add($cc,$t3,$cc); &FR($t3);
70
71 &st($o0,&QWPw(0,$rp)); &FR($o0);
72 &st($o1,&QWPw(0,$rp)); &FR($o1);
73 &st($o2,&QWPw(0,$rp)); &FR($o2);
74 &st($o3,&QWPw(0,$rp)); &FR($o3);
75
76 &sub($count,4,$count); # count-=4
77 &add($ap,4*$QWS,$ap); # count+=4
78 &add($bp,4*$QWS,$bp); # count+=4
79 &add($rp,4*$QWS,$rp); # count+=4
80
81 &blt($count,&label("finish"));
82 &ld($a0,&QWPw(0,$ap));
83 &ld($b0,&QWPw(0,$bp));
84 &br(&label("loop"));
85EOF
86##################################################
87 # Do the last 0..3 words
88
89 &set_label("last_loop");
90
91 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
92 &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
93 &mul($a0,$word,($l0)=&NR(1));
94 &sub($count,1,$count);
95 &add($ap,$QWS,$ap);
96 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
97 &add($r0,$l0,$r0);
98 &add($rp,$QWS,$rp);
99 &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
100 &add($r0,$cc,$r0);
101 &add($h0,$t0,$h0); &FR($t0);
102 &cmpult($r0,$cc,$cc);
103 &st($r0,&QWPw(-1,$rp)); &FR($r0);
104 &add($h0,$cc,$cc); &FR($h0);
105
106 &bgt($count,&label("last_loop"));
107 &function_end_A($name);
108
109######################################################
110 &set_label("finish");
111 &add($count,4,$count);
112 &bgt($count,&label("last_loop"));
113
114 &set_label("end");
115 &function_end($name);
116
117 &fin_pool;
118 }
119
1201;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl
new file mode 100644
index 0000000000..5efd201281
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl
@@ -0,0 +1,213 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub mul_add_c
5 {
6 local($a,$b,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9 &mul($a,$b,($l1)=&NR(1));
10 &muh($a,$b,($h1)=&NR(1));
11 &add($c0,$l1,$c0);
12 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
13 &add($t1,$h1,$h1); &FR($t1);
14 &add($c1,$h1,$c1);
15 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
16 &add($c2,$t2,$c2); &FR($t2);
17 }
18
19sub bn_mul_comba4
20 {
21 local($name)=@_;
22 local(@a,@b,$r,$c0,$c1,$c2);
23
24 $cnt=1;
25 &init_pool(3);
26
27 $rp=&wparam(0);
28 $ap=&wparam(1);
29 $bp=&wparam(2);
30
31 &function_begin($name,"");
32
33 &comment("");
34
35 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
36 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
37 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
38 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
39 &mul($a[0],$b[0],($r00)=&NR(1));
40 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42 &muh($a[0],$b[0],($r01)=&NR(1));
43 &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
44 &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
45 &mul($a[0],$b[1],($r02)=&NR(1));
46
47 ($R,$H1,$H2)=&NR(3);
48
49 &st($r00,&QWPw(0,$rp)); &FR($r00);
50
51 &mov("zero",$R);
52 &mul($a[1],$b[0],($r03)=&NR(1));
53
54 &mov("zero",$H1);
55 &mov("zero",$H0);
56 &add($R,$r01,$R);
57 &muh($a[0],$b[1],($r04)=&NR(1));
58 &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
59 &add($R,$r02,$R);
60 &add($H1,$t01,$H1) &FR($t01);
61 &muh($a[1],$b[0],($r05)=&NR(1));
62 &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
63 &add($R,$r03,$R);
64 &add($H2,$t02,$H2) &FR($t02);
65 &mul($a[0],$b[2],($r06)=&NR(1));
66 &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
67 &add($H1,$t03,$H1) &FR($t03);
68 &st($R,&QWPw(1,$rp));
69 &add($H1,$H2,$R);
70
71 &mov("zero",$H1);
72 &add($R,$r04,$R);
73 &mov("zero",$H2);
74 &mul($a[1],$b[1],($r07)=&NR(1));
75 &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
76 &add($R,$r05,$R);
77 &add($H1,$t04,$H1) &FR($t04);
78 &mul($a[2],$b[0],($r08)=&NR(1));
79 &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
80 &add($R,$r01,$R);
81 &add($H2,$t05,$H2) &FR($t05);
82 &muh($a[0],$b[2],($r09)=&NR(1));
83 &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
84 &add($R,$r07,$R);
85 &add($H1,$t06,$H1) &FR($t06);
86 &muh($a[1],$b[1],($r10)=&NR(1));
87 &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
88 &add($R,$r08,$R);
89 &add($H2,$t07,$H2) &FR($t07);
90 &muh($a[2],$b[0],($r11)=&NR(1));
91 &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
92 &add($H1,$t08,$H1) &FR($t08);
93 &st($R,&QWPw(2,$rp));
94 &add($H1,$H2,$R);
95
96 &mov("zero",$H1);
97 &add($R,$r09,$R);
98 &mov("zero",$H2);
99 &mul($a[0],$b[3],($r12)=&NR(1));
100 &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
101 &add($R,$r10,$R);
102 &add($H1,$t09,$H1) &FR($t09);
103 &mul($a[1],$b[2],($r13)=&NR(1));
104 &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
105 &add($R,$r11,$R);
106 &add($H1,$t10,$H1) &FR($t10);
107 &mul($a[2],$b[1],($r14)=&NR(1));
108 &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
109 &add($R,$r12,$R);
110 &add($H1,$t11,$H1) &FR($t11);
111 &mul($a[3],$b[0],($r15)=&NR(1));
112 &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
113 &add($R,$r13,$R);
114 &add($H1,$t12,$H1) &FR($t12);
115 &muh($a[0],$b[3],($r16)=&NR(1));
116 &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
117 &add($R,$r14,$R);
118 &add($H1,$t13,$H1) &FR($t13);
119 &muh($a[1],$b[2],($r17)=&NR(1));
120 &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
121 &add($R,$r15,$R);
122 &add($H1,$t14,$H1) &FR($t14);
123 &muh($a[2],$b[1],($r18)=&NR(1));
124 &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
125 &add($H1,$t15,$H1) &FR($t15);
126 &st($R,&QWPw(3,$rp));
127 &add($H1,$H2,$R);
128
129 &mov("zero",$H1);
130 &add($R,$r16,$R);
131 &mov("zero",$H2);
132 &muh($a[3],$b[0],($r19)=&NR(1));
133 &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
134 &add($R,$r17,$R);
135 &add($H1,$t16,$H1) &FR($t16);
136 &mul($a[1],$b[3],($r20)=&NR(1));
137 &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
138 &add($R,$r18,$R);
139 &add($H1,$t17,$H1) &FR($t17);
140 &mul($a[2],$b[2],($r21)=&NR(1));
141 &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
142 &add($R,$r19,$R);
143 &add($H1,$t18,$H1) &FR($t18);
144 &mul($a[3],$b[1],($r22)=&NR(1));
145 &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
146 &add($R,$r20,$R);
147 &add($H1,$t19,$H1) &FR($t19);
148 &muh($a[1],$b[3],($r23)=&NR(1));
149 &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
150 &add($R,$r21,$R);
151 &add($H1,$t20,$H1) &FR($t20);
152 &muh($a[2],$b[2],($r24)=&NR(1));
153 &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
154 &add($R,$r22,$R);
155 &add($H1,$t21,$H1) &FR($t21);
156 &muh($a[3],$b[1],($r25)=&NR(1));
157 &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
158 &add($H1,$t22,$H1) &FR($t22);
159 &st($R,&QWPw(4,$rp));
160 &add($H1,$H2,$R);
161
162 &mov("zero",$H1);
163 &add($R,$r23,$R);
164 &mov("zero",$H2);
165 &mul($a[2],$b[3],($r26)=&NR(1));
166 &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
167 &add($R,$r24,$R);
168 &add($H1,$t23,$H1) &FR($t23);
169 &mul($a[3],$b[2],($r27)=&NR(1));
170 &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
171 &add($R,$r25,$R);
172 &add($H1,$t24,$H1) &FR($t24);
173 &muh($a[2],$b[3],($r28)=&NR(1));
174 &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
175 &add($R,$r26,$R);
176 &add($H1,$t25,$H1) &FR($t25);
177 &muh($a[3],$b[2],($r29)=&NR(1));
178 &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
179 &add($R,$r27,$R);
180 &add($H1,$t26,$H1) &FR($t26);
181 &mul($a[3],$b[3],($r30)=&NR(1));
182 &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
183 &add($H1,$t27,$H1) &FR($t27);
184 &st($R,&QWPw(5,$rp));
185 &add($H1,$H2,$R);
186
187 &mov("zero",$H1);
188 &add($R,$r28,$R);
189 &mov("zero",$H2);
190 &muh($a[3],$b[3],($r31)=&NR(1));
191 &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
192 &add($R,$r29,$R);
193 &add($H1,$t28,$H1) &FR($t28);
194 ############
195 &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
196 &add($R,$r30,$R);
197 &add($H1,$t29,$H1) &FR($t29);
198 ############
199 &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
200 &add($H1,$t30,$H1) &FR($t30);
201 &st($R,&QWPw(6,$rp));
202 &add($H1,$H2,$R);
203
204 &add($R,$r31,$R); &FR($r31);
205 &st($R,&QWPw(7,$rp));
206
207 &FR($R,$H1,$H2);
208 &function_end($name);
209
210 &fin_pool;
211 }
212
2131;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl
new file mode 100644
index 0000000000..79d86dd25c
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl
@@ -0,0 +1,98 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub mul_add_c
5 {
6 local($a,$b,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9print STDERR "count=$cnt\n"; $cnt++;
10 &mul($a,$b,($l1)=&NR(1));
11 &muh($a,$b,($h1)=&NR(1));
12 &add($c0,$l1,$c0);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &add($t1,$h1,$h1); &FR($t1);
15 &add($c1,$h1,$c1);
16 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
17 &add($c2,$t2,$c2); &FR($t2);
18 }
19
20sub bn_mul_comba4
21 {
22 local($name)=@_;
23 local(@a,@b,$r,$c0,$c1,$c2);
24
25 $cnt=1;
26 &init_pool(3);
27
28 $rp=&wparam(0);
29 $ap=&wparam(1);
30 $bp=&wparam(2);
31
32 &function_begin($name,"");
33
34 &comment("");
35
36 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
37 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
38 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
39 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
40 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
43 &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
44
45 ($c0,$c1,$c2)=&NR(3);
46 &mov("zero",$c2);
47 &mul($a[0],$b[0],$c0);
48 &muh($a[0],$b[0],$c1);
49 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
50 ($c0,$c1,$c2)=($c1,$c2,$c0);
51 &mov("zero",$c2);
52
53 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
54 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
55 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
56 ($c0,$c1,$c2)=($c1,$c2,$c0);
57 &mov("zero",$c2);
58
59 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
60 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
61 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
62 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
63 ($c0,$c1,$c2)=($c1,$c2,$c0);
64 &mov("zero",$c2);
65
66 &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
67 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
68 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
69 &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
70 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
71 ($c0,$c1,$c2)=($c1,$c2,$c0);
72 &mov("zero",$c2);
73
74 &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
75 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
76 &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
77 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
78 ($c0,$c1,$c2)=($c1,$c2,$c0);
79 &mov("zero",$c2);
80
81 &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
82 &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
83 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
84 ($c0,$c1,$c2)=($c1,$c2,$c0);
85 &mov("zero",$c2);
86
87 &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
88 &st($c0,&QWPw(6,$rp));
89 &st($c1,&QWPw(7,$rp));
90
91 &FR($c0,$c1,$c2);
92
93 &function_end($name);
94
95 &fin_pool;
96 }
97
981;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl
new file mode 100644
index 0000000000..525ca7494b
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl
@@ -0,0 +1,177 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(3);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19
20 &stack_push(2);
21 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
22 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
23 &st($reg_s0,&swtmp(0)); &FR($reg_s0);
24 &st($reg_s1,&swtmp(1)); &FR($reg_s1);
25 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
26 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
27 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
28 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
29 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
30 &ld(($b[3])=&NR(1),&QWPw(3,$bp));
31 &ld(($a[4])=&NR(1),&QWPw(1,$ap));
32 &ld(($b[4])=&NR(1),&QWPw(1,$bp));
33 &ld(($a[5])=&NR(1),&QWPw(1,$ap));
34 &ld(($b[5])=&NR(1),&QWPw(1,$bp));
35 &ld(($a[6])=&NR(1),&QWPw(1,$ap));
36 &ld(($b[6])=&NR(1),&QWPw(1,$bp));
37 &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
38 &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
39
40 ($c0,$c1,$c2)=&NR(3);
41 &mov("zero",$c2);
42 &mul($a[0],$b[0],$c0);
43 &muh($a[0],$b[0],$c1);
44 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
49 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
55 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
56 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
62 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
63 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
64 &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
65 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
66 ($c0,$c1,$c2)=($c1,$c2,$c0);
67 &mov("zero",$c2);
68
69 &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
70 &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
71 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
72 &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
73 &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
74 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
75 ($c0,$c1,$c2)=($c1,$c2,$c0);
76 &mov("zero",$c2);
77
78 &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
79 &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
80 &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
81 &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
82 &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
83 &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
84 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
85 ($c0,$c1,$c2)=($c1,$c2,$c0);
86 &mov("zero",$c2);
87
88 &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
89 &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
90 &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
91 &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
92 &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
93 &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
94 &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
95 &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
100 &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
101 &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
102 &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
103 &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
104 &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
105 &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
106 &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
107 &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
108 ($c0,$c1,$c2)=($c1,$c2,$c0);
109 &mov("zero",$c2);
110
111 &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
112 &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
113 &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
114 &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
115 &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
116 &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
117 &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
118 &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
119 ($c0,$c1,$c2)=($c1,$c2,$c0);
120 &mov("zero",$c2);
121
122 &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
123 &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
124 &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
125 &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
126 &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
127 &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
128 &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
129 ($c0,$c1,$c2)=($c1,$c2,$c0);
130 &mov("zero",$c2);
131
132 &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
133 &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
134 &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
135 &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
136 &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
137 &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
138 ($c0,$c1,$c2)=($c1,$c2,$c0);
139 &mov("zero",$c2);
140
141 &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
142 &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
143 &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
144 &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
145 &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
146 ($c0,$c1,$c2)=($c1,$c2,$c0);
147 &mov("zero",$c2);
148
149 &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
150 &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
151 &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
152 &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
153 ($c0,$c1,$c2)=($c1,$c2,$c0);
154 &mov("zero",$c2);
155
156 &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
157 &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
158 &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
159 ($c0,$c1,$c2)=($c1,$c2,$c0);
160 &mov("zero",$c2);
161
162 &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
163 &st($c0,&QWPw(14,$rp));
164 &st($c1,&QWPw(15,$rp));
165
166 &FR($c0,$c1,$c2);
167
168 &ld($reg_s0,&swtmp(0));
169 &ld($reg_s1,&swtmp(1));
170 &stack_pop(2);
171
172 &function_end($name);
173
174 &fin_pool;
175 }
176
1771;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl
new file mode 100644
index 0000000000..a55b696906
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl
@@ -0,0 +1,113 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(3);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19 &sub($count,4,$count);
20 &mov("zero",$cc);
21 &br(&label("finish"));
22 &blt($count,&label("finish"));
23
24 ($a0,$r0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($r0,&QWPw(0,$rp));
27
28$a=<<'EOF';
29##########################################################
30 &set_label("loop");
31
32 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
33 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
34 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
35 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
36 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
37 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
38
39 ($o0,$t0)=&NR(2);
40 &add($a0,$b0,$o0);
41 &cmpult($o0,$b0,$t0);
42 &add($o0,$cc,$o0);
43 &cmpult($o0,$cc,$cc);
44 &add($cc,$t0,$cc); &FR($t0);
45
46 ($t1,$o1)=&NR(2);
47
48 &add($a1,$b1,$o1); &FR($a1);
49 &cmpult($o1,$b1,$t1); &FR($b1);
50 &add($o1,$cc,$o1);
51 &cmpult($o1,$cc,$cc);
52 &add($cc,$t1,$cc); &FR($t1);
53
54 ($t2,$o2)=&NR(2);
55
56 &add($a2,$b2,$o2); &FR($a2);
57 &cmpult($o2,$b2,$t2); &FR($b2);
58 &add($o2,$cc,$o2);
59 &cmpult($o2,$cc,$cc);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &add($a3,$b3,$o3); &FR($a3);
65 &cmpult($o3,$b3,$t3); &FR($b3);
66 &add($o3,$cc,$o3);
67 &cmpult($o3,$cc,$cc);
68 &add($cc,$t3,$cc); &FR($t3);
69
70 &st($o0,&QWPw(0,$rp)); &FR($o0);
71 &st($o1,&QWPw(0,$rp)); &FR($o1);
72 &st($o2,&QWPw(0,$rp)); &FR($o2);
73 &st($o3,&QWPw(0,$rp)); &FR($o3);
74
75 &sub($count,4,$count); # count-=4
76 &add($ap,4*$QWS,$ap); # count+=4
77 &add($bp,4*$QWS,$bp); # count+=4
78 &add($rp,4*$QWS,$rp); # count+=4
79
80 &blt($count,&label("finish"));
81 &ld($a0,&QWPw(0,$ap));
82 &ld($b0,&QWPw(0,$bp));
83 &br(&label("loop"));
84EOF
85##################################################
86 # Do the last 0..3 words
87
88 &set_label("last_loop");
89
90 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
91 &mul($a0,$a0,($l0)=&NR(1));
92 &add($ap,$QWS,$ap);
93 &add($rp,2*$QWS,$rp);
94 &sub($count,1,$count);
95 &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
96 &st($l0,&QWPw(-2,$rp)); &FR($l0);
97 &st($h0,&QWPw(-1,$rp)); &FR($h0);
98
99 &bgt($count,&label("last_loop"));
100 &function_end_A($name);
101
102######################################################
103 &set_label("finish");
104 &add($count,4,$count);
105 &bgt($count,&label("last_loop"));
106
107 &set_label("end");
108 &function_end($name);
109
110 &fin_pool;
111 }
112
1131;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl
new file mode 100644
index 0000000000..bf33f5b503
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl
@@ -0,0 +1,109 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub sqr_add_c
5 {
6 local($a,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9 &mul($a,$a,($l1)=&NR(1));
10 &muh($a,$a,($h1)=&NR(1));
11 &add($c0,$l1,$c0);
12 &add($c1,$h1,$c1);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
15 &add($c1,$t1,$c1); &FR($t1);
16 &add($c2,$t2,$c2); &FR($t2);
17 }
18
19sub sqr_add_c2
20 {
21 local($a,$b,$c0,$c1,$c2)=@_;
22 local($l1,$h1,$t1,$t2);
23
24 &mul($a,$b,($l1)=&NR(1));
25 &muh($a,$b,($h1)=&NR(1));
26 &cmplt($l1,"zero",($lc1)=&NR(1));
27 &cmplt($h1,"zero",($hc1)=&NR(1));
28 &add($l1,$l1,$l1);
29 &add($h1,$h1,$h1);
30 &add($h1,$lc1,$h1); &FR($lc1);
31 &add($c2,$hc1,$c2); &FR($hc1);
32
33 &add($c0,$l1,$c0);
34 &add($c1,$h1,$c1);
35 &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
36 &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
37
38 &add($c1,$lc1,$c1); &FR($lc1);
39 &add($c2,$hc1,$c2); &FR($hc1);
40 }
41
42
43sub bn_sqr_comba4
44 {
45 local($name)=@_;
46 local(@a,@b,$r,$c0,$c1,$c2);
47
48 $cnt=1;
49 &init_pool(2);
50
51 $rp=&wparam(0);
52 $ap=&wparam(1);
53
54 &function_begin($name,"");
55
56 &comment("");
57
58 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
59 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
60 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
61 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
62
63 ($c0,$c1,$c2)=&NR(3);
64
65 &mov("zero",$c2);
66 &mul($a[0],$a[0],$c0);
67 &muh($a[0],$a[0],$c1);
68 &st($c0,&QWPw(0,$rp));
69 ($c0,$c1,$c2)=($c1,$c2,$c0);
70 &mov("zero",$c2);
71
72 &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
73 &st($c0,&QWPw(1,$rp));
74 ($c0,$c1,$c2)=($c1,$c2,$c0);
75 &mov("zero",$c2);
76
77 &sqr_add_c($a[1],$c0,$c1,$c2);
78 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
79 &st($c0,&QWPw(2,$rp));
80 ($c0,$c1,$c2)=($c1,$c2,$c0);
81 &mov("zero",$c2);
82
83 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
84 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
85 &st($c0,&QWPw(3,$rp));
86 ($c0,$c1,$c2)=($c1,$c2,$c0);
87 &mov("zero",$c2);
88
89 &sqr_add_c($a[2],$c0,$c1,$c2);
90 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
91 &st($c0,&QWPw(4,$rp));
92 ($c0,$c1,$c2)=($c1,$c2,$c0);
93 &mov("zero",$c2);
94
95 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
96 &st($c0,&QWPw(5,$rp));
97 ($c0,$c1,$c2)=($c1,$c2,$c0);
98 &mov("zero",$c2);
99
100 &sqr_add_c($a[3],$c0,$c1,$c2);
101 &st($c0,&QWPw(6,$rp));
102 &st($c1,&QWPw(7,$rp));
103
104 &function_end($name);
105
106 &fin_pool;
107 }
108
1091;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
new file mode 100644
index 0000000000..b4afe085f1
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
@@ -0,0 +1,132 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(2);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14
15 &function_begin($name,"");
16
17 &comment("");
18
19 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
20 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
21 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
22 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
23 &ld(($a[4])=&NR(1),&QWPw(4,$ap));
24 &ld(($a[5])=&NR(1),&QWPw(5,$ap));
25 &ld(($a[6])=&NR(1),&QWPw(6,$ap));
26 &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
27
28 ($c0,$c1,$c2)=&NR(3);
29
30 &mov("zero",$c2);
31 &mul($a[0],$a[0],$c0);
32 &muh($a[0],$a[0],$c1);
33 &st($c0,&QWPw(0,$rp));
34 ($c0,$c1,$c2)=($c1,$c2,$c0);
35 &mov("zero",$c2);
36
37 &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
38 &st($c0,&QWPw(1,$rp));
39 ($c0,$c1,$c2)=($c1,$c2,$c0);
40 &mov("zero",$c2);
41
42 &sqr_add_c($a[1],$c0,$c1,$c2);
43 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
44 &st($c0,&QWPw(2,$rp));
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
49 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(3,$rp));
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &sqr_add_c($a[2],$c0,$c1,$c2);
55 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
56 &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(4,$rp));
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
62 &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
63 &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
64 &st($c0,&QWPw(5,$rp));
65 ($c0,$c1,$c2)=($c1,$c2,$c0);
66 &mov("zero",$c2);
67
68 &sqr_add_c($a[3],$c0,$c1,$c2);
69 &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
70 &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
71 &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
72 &st($c0,&QWPw(6,$rp));
73 ($c0,$c1,$c2)=($c1,$c2,$c0);
74 &mov("zero",$c2);
75
76 &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
77 &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
78 &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
79 &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
80 &st($c0,&QWPw(7,$rp));
81 ($c0,$c1,$c2)=($c1,$c2,$c0);
82 &mov("zero",$c2);
83
84 &sqr_add_c($a[4],$c0,$c1,$c2);
85 &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
86 &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
87 &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
88 &st($c0,&QWPw(8,$rp));
89 ($c0,$c1,$c2)=($c1,$c2,$c0);
90 &mov("zero",$c2);
91
92 &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
93 &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
94 &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
95 &st($c0,&QWPw(9,$rp));
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &sqr_add_c($a[5],$c0,$c1,$c2);
100 &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
101 &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
102 &st($c0,&QWPw(10,$rp));
103 ($c0,$c1,$c2)=($c1,$c2,$c0);
104 &mov("zero",$c2);
105
106 &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
107 &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
108 &st($c0,&QWPw(11,$rp));
109 ($c0,$c1,$c2)=($c1,$c2,$c0);
110 &mov("zero",$c2);
111
112 &sqr_add_c($a[6],$c0,$c1,$c2);
113 &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
114 &st($c0,&QWPw(12,$rp));
115 ($c0,$c1,$c2)=($c1,$c2,$c0);
116 &mov("zero",$c2);
117
118 &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
119 &st($c0,&QWPw(13,$rp));
120 ($c0,$c1,$c2)=($c1,$c2,$c0);
121 &mov("zero",$c2);
122
123 &sqr_add_c($a[7],$c0,$c1,$c2);
124 &st($c0,&QWPw(14,$rp));
125 &st($c1,&QWPw(15,$rp));
126
127 &function_end($name);
128
129 &fin_pool;
130 }
131
1321;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl
new file mode 100644
index 0000000000..d998da5c21
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl
@@ -0,0 +1,108 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &blt($count,&label("finish"));
23
24 ($a0,$b0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($b0,&QWPw(0,$bp));
27
28##########################################################
29 &set_label("loop");
30
31 ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
32 &ld($a1,&QWPw(1,$ap));
33 &cmpult($a0,$b0,$tmp); # will we borrow?
34 &ld($b1,&QWPw(1,$bp));
35 &sub($a0,$b0,$a0); # do the subtract
36 &ld($a2,&QWPw(2,$ap));
37 &cmpult($a0,$cc,$b0); # will we borrow?
38 &ld($b2,&QWPw(2,$bp));
39 &sub($a0,$cc,$o0); # will we borrow?
40 &ld($a3,&QWPw(3,$ap));
41 &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
42
43 &cmpult($a1,$b1,$t1); # will we borrow?
44 &sub($a1,$b1,$a1); # do the subtract
45 &ld($b3,&QWPw(3,$bp));
46 &cmpult($a1,$cc,$b1); # will we borrow?
47 &sub($a1,$cc,$o1); # will we borrow?
48 &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
49
50 &cmpult($a2,$b2,$tmp); # will we borrow?
51 &sub($a2,$b2,$a2); # do the subtract
52 &st($o0,&QWPw(0,$rp)); &FR($o0); # save
53 &cmpult($a2,$cc,$b2); # will we borrow?
54 &sub($a2,$cc,$o2); # will we borrow?
55 &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
56
57 &cmpult($a3,$b3,$t3); # will we borrow?
58 &sub($a3,$b3,$a3); # do the subtract
59 &st($o1,&QWPw(1,$rp)); &FR($o1);
60 &cmpult($a3,$cc,$b3); # will we borrow?
61 &sub($a3,$cc,$o3); # will we borrow?
62 &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
63
64 &st($o2,&QWPw(2,$rp)); &FR($o2);
65 &sub($count,4,$count); # count-=4
66 &st($o3,&QWPw(3,$rp)); &FR($o3);
67 &add($ap,4*$QWS,$ap); # count+=4
68 &add($bp,4*$QWS,$bp); # count+=4
69 &add($rp,4*$QWS,$rp); # count+=4
70
71 &blt($count,&label("finish"));
72 &ld($a0,&QWPw(0,$ap));
73 &ld($b0,&QWPw(0,$bp));
74 &br(&label("loop"));
75##################################################
76 # Do the last 0..3 words
77
78 &set_label("last_loop");
79
80 &ld($a0,&QWPw(0,$ap)); # get a
81 &ld($b0,&QWPw(0,$bp)); # get b
82 &cmpult($a0,$b0,$tmp); # will we borrow?
83 &sub($a0,$b0,$a0); # do the subtract
84 &cmpult($a0,$cc,$b0); # will we borrow?
85 &sub($a0,$cc,$a0); # will we borrow?
86 &st($a0,&QWPw(0,$rp)); # save
87 &add($b0,$tmp,$cc); # add the borrows
88
89 &add($ap,$QWS,$ap);
90 &add($bp,$QWS,$bp);
91 &add($rp,$QWS,$rp);
92 &sub($count,1,$count);
93 &bgt($count,&label("last_loop"));
94 &function_end_A($name);
95
96######################################################
97 &set_label("finish");
98 &add($count,4,$count);
99 &bgt($count,&label("last_loop"));
100
101 &FR($a0,$b0);
102 &set_label("end");
103 &function_end($name);
104
105 &fin_pool;
106 }
107
1081;
diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl
new file mode 100644
index 0000000000..13bf516428
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/add.pl
@@ -0,0 +1,118 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &blt($count,&label("finish"));
23
24 ($a0,$b0)=&NR(2);
25
26##########################################################
27 &set_label("loop");
28
29 &ld(($a0)=&NR(1),&QWPw(0,$ap));
30 &ld(($b0)=&NR(1),&QWPw(0,$bp));
31 &ld(($a1)=&NR(1),&QWPw(1,$ap));
32 &ld(($b1)=&NR(1),&QWPw(1,$bp));
33
34 ($o0,$t0)=&NR(2);
35 &add($a0,$b0,$o0);
36 &ld(($a2)=&NR(1),&QWPw(2,$ap));
37 &cmpult($o0,$b0,$t0);
38 &add($o0,$cc,$o0);
39 &cmpult($o0,$cc,$cc);
40 &ld(($b2)=&NR(1),&QWPw(2,$bp));
41 &add($cc,$t0,$cc); &FR($t0);
42
43 ($t1,$o1)=&NR(2);
44
45 &add($a1,$b1,$o1); &FR($a1);
46 &cmpult($o1,$b1,$t1); &FR($b1);
47 &add($o1,$cc,$o1);
48 &cmpult($o1,$cc,$cc);
49 &ld(($a3)=&NR(1),&QWPw(3,$ap));
50 &add($cc,$t1,$cc); &FR($t1);
51
52 ($t2,$o2)=&NR(2);
53
54 &add($a2,$b2,$o2); &FR($a2);
55 &cmpult($o2,$b2,$t2); &FR($b2);
56 &add($o2,$cc,$o2);
57 &cmpult($o2,$cc,$cc);
58 &ld(($b3)=&NR(1),&QWPw(3,$bp));
59 &st($o0,&QWPw(0,$rp)); &FR($o0);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &st($o1,&QWPw(0,$rp)); &FR($o1);
65 &add($a3,$b3,$o3); &FR($a3);
66 &cmpult($o3,$b3,$t3); &FR($b3);
67 &add($o3,$cc,$o3);
68 &st($o2,&QWPw(0,$rp)); &FR($o2);
69 &cmpult($o3,$cc,$cc);
70 &st($o3,&QWPw(0,$rp)); &FR($o3);
71 &add($cc,$t3,$cc); &FR($t3);
72
73
74 &sub($count,4,$count); # count-=4
75 &add($ap,4*$QWS,$ap); # count+=4
76 &add($bp,4*$QWS,$bp); # count+=4
77 &add($rp,4*$QWS,$rp); # count+=4
78
79 ###
80 &bge($count,&label("loop"));
81 ###
82 &br(&label("finish"));
83##################################################
84 # Do the last 0..3 words
85
86 ($t0,$o0)=&NR(2);
87 &set_label("last_loop");
88
89 &ld($a0,&QWPw(0,$ap)); # get a
90 &ld($b0,&QWPw(0,$bp)); # get b
91 &add($ap,$QWS,$ap);
92 &add($bp,$QWS,$bp);
93 &add($a0,$b0,$o0);
94 &sub($count,1,$count);
95 &cmpult($o0,$b0,$t0); # will we borrow?
96 &add($o0,$cc,$o0); # will we borrow?
97 &cmpult($o0,$cc,$cc); # will we borrow?
98 &add($rp,$QWS,$rp);
99 &st($o0,&QWPw(-1,$rp)); # save
100 &add($cc,$t0,$cc); # add the borrows
101
102 ###
103 &bgt($count,&label("last_loop"));
104 &function_end_A($name);
105
106######################################################
107 &set_label("finish");
108 &add($count,4,$count);
109 &bgt($count,&label("last_loop"));
110
111 &FR($o0,$t0,$a0,$b0);
112 &set_label("end");
113 &function_end($name);
114
115 &fin_pool;
116 }
117
1181;
diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl
new file mode 100644
index 0000000000..e9e680897a
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/div.pl
@@ -0,0 +1,144 @@
1#!/usr/local/bin/perl
2
3sub bn_div_words
4 {
5 local($data)=<<'EOF';
6 #
7 # What follows was taken directly from the C compiler with a few
8 # hacks to redo the lables.
9 #
10.text
11 .set noreorder
12 .set volatile
13 .align 3
14 .globl bn_div_words
15 .ent bn_div_words
16bn_div_words
17 ldgp $29,0($27)
18bn_div_words.ng:
19 lda $30,-48($30)
20 .frame $30,48,$26,0
21 stq $26,0($30)
22 stq $9,8($30)
23 stq $10,16($30)
24 stq $11,24($30)
25 stq $12,32($30)
26 stq $13,40($30)
27 .mask 0x4003e00,-48
28 .prologue 1
29 bis $16,$16,$9
30 bis $17,$17,$10
31 bis $18,$18,$11
32 bis $31,$31,$13
33 bis $31,2,$12
34 bne $11,$9119
35 lda $0,-1
36 br $31,$9136
37 .align 4
38$9119:
39 bis $11,$11,$16
40 jsr $26,BN_num_bits_word
41 ldgp $29,0($26)
42 subq $0,64,$1
43 beq $1,$9120
44 bis $31,1,$1
45 sll $1,$0,$1
46 cmpule $9,$1,$1
47 bne $1,$9120
48 # lda $16,_IO_stderr_
49 # lda $17,$C32
50 # bis $0,$0,$18
51 # jsr $26,fprintf
52 # ldgp $29,0($26)
53 jsr $26,abort
54 ldgp $29,0($26)
55 .align 4
56$9120:
57 bis $31,64,$3
58 cmpult $9,$11,$2
59 subq $3,$0,$1
60 addl $1,$31,$0
61 subq $9,$11,$1
62 cmoveq $2,$1,$9
63 beq $0,$9122
64 zapnot $0,15,$2
65 subq $3,$0,$1
66 sll $11,$2,$11
67 sll $9,$2,$3
68 srl $10,$1,$1
69 sll $10,$2,$10
70 bis $3,$1,$9
71$9122:
72 srl $11,32,$5
73 zapnot $11,15,$6
74 lda $7,-1
75 .align 5
76$9123:
77 srl $9,32,$1
78 subq $1,$5,$1
79 bne $1,$9126
80 zapnot $7,15,$27
81 br $31,$9127
82 .align 4
83$9126:
84 bis $9,$9,$24
85 bis $5,$5,$25
86 divqu $24,$25,$27
87$9127:
88 srl $10,32,$4
89 .align 5
90$9128:
91 mulq $27,$5,$1
92 subq $9,$1,$3
93 zapnot $3,240,$1
94 bne $1,$9129
95 mulq $6,$27,$2
96 sll $3,32,$1
97 addq $1,$4,$1
98 cmpule $2,$1,$2
99 bne $2,$9129
100 subq $27,1,$27
101 br $31,$9128
102 .align 4
103$9129:
104 mulq $27,$6,$1
105 mulq $27,$5,$4
106 srl $1,32,$3
107 sll $1,32,$1
108 addq $4,$3,$4
109 cmpult $10,$1,$2
110 subq $10,$1,$10
111 addq $2,$4,$2
112 cmpult $9,$2,$1
113 bis $2,$2,$4
114 beq $1,$9134
115 addq $9,$11,$9
116 subq $27,1,$27
117$9134:
118 subl $12,1,$12
119 subq $9,$4,$9
120 beq $12,$9124
121 sll $27,32,$13
122 sll $9,32,$2
123 srl $10,32,$1
124 sll $10,32,$10
125 bis $2,$1,$9
126 br $31,$9123
127 .align 4
128$9124:
129 bis $13,$27,$0
130$9136:
131 ldq $26,0($30)
132 ldq $9,8($30)
133 ldq $10,16($30)
134 ldq $11,24($30)
135 ldq $12,32($30)
136 ldq $13,40($30)
137 addq $30,48,$30
138 ret $31,($26),1
139 .end bn_div_words
140EOF
141 &asm_add($data);
142 }
143
1441;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl
new file mode 100644
index 0000000000..76c926566c
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul.pl
@@ -0,0 +1,104 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 ###
23 &blt($count,&label("finish"));
24
25 ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
26
27 &set_label("loop");
28
29 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
30 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
31
32 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
33 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
34 ### wait 8
35 &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
36 ### wait 8
37 &muh($a1,$word,($h1)=&NR(1)); &FR($a1);
38 &add($l0,$cc,$l0); ### wait 8
39 &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
40 &cmpult($l0,$cc,$cc); ### wait 8
41 &muh($a2,$word,($h2)=&NR(1)); &FR($a2);
42 &add($h0,$cc,$cc); &FR($h0); ### wait 8
43 &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
44 &add($l1,$cc,$l1); ### wait 8
45 &st($l0,&QWPw(0,$rp)); &FR($l0);
46 &cmpult($l1,$cc,$cc); ### wait 8
47 &muh($a3,$word,($h3)=&NR(1)); &FR($a3);
48 &add($h1,$cc,$cc); &FR($h1);
49 &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
50 &add($l2,$cc,$l2);
51 &st($l1,&QWPw(1,$rp)); &FR($l1);
52 &cmpult($l2,$cc,$cc);
53 &add($h2,$cc,$cc); &FR($h2);
54 &sub($count,4,$count); # count-=4
55 &st($l2,&QWPw(2,$rp)); &FR($l2);
56 &add($l3,$cc,$l3);
57 &cmpult($l3,$cc,$cc);
58 &add($bp,4*$QWS,$bp); # count+=4
59 &add($h3,$cc,$cc); &FR($h3);
60 &add($ap,4*$QWS,$ap); # count+=4
61 &st($l3,&QWPw(3,$rp)); &FR($l3);
62 &add($rp,4*$QWS,$rp); # count+=4
63 ###
64 &blt($count,&label("finish"));
65 ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
66 &br(&label("finish"));
67##################################################
68
69##################################################
70 # Do the last 0..3 words
71
72 &set_label("last_loop");
73
74 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
75 ###
76 ###
77 ###
78 &muh($a0,$word,($h0)=&NR(1));
79 ### Wait 8 for next mul issue
80 &mul($a0,$word,($l0)=&NR(1)); &FR($a0)
81 &add($ap,$QWS,$ap);
82 ### Loose 12 until result is available
83 &add($rp,$QWS,$rp);
84 &sub($count,1,$count);
85 &add($l0,$cc,$l0);
86 ###
87 &st($l0,&QWPw(-1,$rp)); &FR($l0);
88 &cmpult($l0,$cc,$cc);
89 &add($h0,$cc,$cc); &FR($h0);
90 &bgt($count,&label("last_loop"));
91 &function_end_A($name);
92
93######################################################
94 &set_label("finish");
95 &add($count,4,$count);
96 &bgt($count,&label("last_loop"));
97
98 &set_label("end");
99 &function_end($name);
100
101 &fin_pool;
102 }
103
1041;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl
new file mode 100644
index 0000000000..0d6df69bc4
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl
@@ -0,0 +1,123 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 ###
23 &blt($count,&label("finish"));
24
25 &ld(($a0)=&NR(1),&QWPw(0,$ap));
26
27$a=<<'EOF';
28##########################################################
29 &set_label("loop");
30
31 &ld(($r0)=&NR(1),&QWPw(0,$rp));
32 &ld(($a1)=&NR(1),&QWPw(1,$ap));
33 &muh($a0,$word,($h0)=&NR(1));
34 &ld(($r1)=&NR(1),&QWPw(1,$rp));
35 &ld(($a2)=&NR(1),&QWPw(2,$ap));
36 ###
37 &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
38 &ld(($r2)=&NR(1),&QWPw(2,$rp));
39 &muh($a1,$word,($h1)=&NR(1));
40 &ld(($a3)=&NR(1),&QWPw(3,$ap));
41 &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
42 &ld(($r3)=&NR(1),&QWPw(3,$rp));
43 &add($r0,$l0,$r0);
44 &add($r1,$l1,$r1);
45 &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
46 &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1);
47 &muh($a2,$word,($h2)=&NR(1));
48 &add($r0,$cc,$r0);
49 &add($h0,$t0,$h0); &FR($t0);
50 &cmpult($r0,$cc,$cc);
51 &add($h1,$t1,$h1); &FR($t1);
52 &add($h0,$cc,$cc); &FR($h0);
53 &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
54 &add($r1,$cc,$r1);
55 &cmpult($r1,$cc,$cc);
56 &add($r2,$l2,$r2);
57 &add($h1,$cc,$cc); &FR($h1);
58 &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2);
59 &muh($a3,$word,($h3)=&NR(1));
60 &add($r2,$cc,$r2);
61 &st($r0,&QWPw(0,$rp)); &FR($r0);
62 &add($h2,$t2,$h2); &FR($t2);
63 &st($r1,&QWPw(1,$rp)); &FR($r1);
64 &cmpult($r2,$cc,$cc);
65 &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
66 &add($h2,$cc,$cc); &FR($h2);
67 &st($r2,&QWPw(2,$rp)); &FR($r2);
68 &sub($count,4,$count); # count-=4
69 &add($rp,4*$QWS,$rp); # count+=4
70 &add($r3,$l3,$r3);
71 &add($ap,4*$QWS,$ap); # count+=4
72 &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3);
73 &add($r3,$cc,$r3);
74 &add($h3,$t3,$h3); &FR($t3);
75 &cmpult($r3,$cc,$cc);
76 &st($r3,&QWPw(-1,$rp)); &FR($r3);
77 &add($h3,$cc,$cc); &FR($h3);
78
79 ###
80 &blt($count,&label("finish"));
81 &ld(($a0)=&NR(1),&QWPw(0,$ap));
82 &br(&label("loop"));
83EOF
84##################################################
85 # Do the last 0..3 words
86
87 &set_label("last_loop");
88
89 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
90 &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
91 ###
92 ###
93 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
94 ### wait 8
95 &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
96 &add($rp,$QWS,$rp);
97 &add($ap,$QWS,$ap);
98 &sub($count,1,$count);
99 ### wait 3 until l0 is available
100 &add($r0,$l0,$r0);
101 ###
102 &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
103 &add($r0,$cc,$r0);
104 &add($h0,$t0,$h0); &FR($t0);
105 &cmpult($r0,$cc,$cc);
106 &add($h0,$cc,$cc); &FR($h0);
107
108 &st($r0,&QWPw(-1,$rp)); &FR($r0);
109 &bgt($count,&label("last_loop"));
110 &function_end_A($name);
111
112######################################################
113 &set_label("finish");
114 &add($count,4,$count);
115 &bgt($count,&label("last_loop"));
116
117 &set_label("end");
118 &function_end($name);
119
120 &fin_pool;
121 }
122
1231;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl
new file mode 100644
index 0000000000..9cc876ded4
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl
@@ -0,0 +1,215 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4# upto
5
6sub mul_add_c
7 {
8 local($a,$b,$c0,$c1,$c2)=@_;
9 local($l1,$h1,$t1,$t2);
10
11 &mul($a,$b,($l1)=&NR(1));
12 &muh($a,$b,($h1)=&NR(1));
13 &add($c0,$l1,$c0);
14 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
15 &add($t1,$h1,$h1); &FR($t1);
16 &add($c1,$h1,$c1);
17 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
18 &add($c2,$t2,$c2); &FR($t2);
19 }
20
21sub bn_mul_comba4
22 {
23 local($name)=@_;
24 local(@a,@b,$r,$c0,$c1,$c2);
25
26 $cnt=1;
27 &init_pool(3);
28
29 $rp=&wparam(0);
30 $ap=&wparam(1);
31 $bp=&wparam(2);
32
33 &function_begin($name,"");
34
35 &comment("");
36
37 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
38 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
39 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
40 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
41 &mul($a[0],$b[0],($r00)=&NR(1));
42 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
43 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
44 &muh($a[0],$b[0],($r01)=&NR(1));
45 &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
46 &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
47 &mul($a[0],$b[1],($r02)=&NR(1));
48
49 ($R,$H1,$H2)=&NR(3);
50
51 &st($r00,&QWPw(0,$rp)); &FR($r00);
52
53 &mov("zero",$R);
54 &mul($a[1],$b[0],($r03)=&NR(1));
55
56 &mov("zero",$H1);
57 &mov("zero",$H0);
58 &add($R,$r01,$R);
59 &muh($a[0],$b[1],($r04)=&NR(1));
60 &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
61 &add($R,$r02,$R);
62 &add($H1,$t01,$H1) &FR($t01);
63 &muh($a[1],$b[0],($r05)=&NR(1));
64 &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
65 &add($R,$r03,$R);
66 &add($H2,$t02,$H2) &FR($t02);
67 &mul($a[0],$b[2],($r06)=&NR(1));
68 &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
69 &add($H1,$t03,$H1) &FR($t03);
70 &st($R,&QWPw(1,$rp));
71 &add($H1,$H2,$R);
72
73 &mov("zero",$H1);
74 &add($R,$r04,$R);
75 &mov("zero",$H2);
76 &mul($a[1],$b[1],($r07)=&NR(1));
77 &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
78 &add($R,$r05,$R);
79 &add($H1,$t04,$H1) &FR($t04);
80 &mul($a[2],$b[0],($r08)=&NR(1));
81 &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
82 &add($R,$r01,$R);
83 &add($H2,$t05,$H2) &FR($t05);
84 &muh($a[0],$b[2],($r09)=&NR(1));
85 &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
86 &add($R,$r07,$R);
87 &add($H1,$t06,$H1) &FR($t06);
88 &muh($a[1],$b[1],($r10)=&NR(1));
89 &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
90 &add($R,$r08,$R);
91 &add($H2,$t07,$H2) &FR($t07);
92 &muh($a[2],$b[0],($r11)=&NR(1));
93 &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
94 &add($H1,$t08,$H1) &FR($t08);
95 &st($R,&QWPw(2,$rp));
96 &add($H1,$H2,$R);
97
98 &mov("zero",$H1);
99 &add($R,$r09,$R);
100 &mov("zero",$H2);
101 &mul($a[0],$b[3],($r12)=&NR(1));
102 &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
103 &add($R,$r10,$R);
104 &add($H1,$t09,$H1) &FR($t09);
105 &mul($a[1],$b[2],($r13)=&NR(1));
106 &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
107 &add($R,$r11,$R);
108 &add($H1,$t10,$H1) &FR($t10);
109 &mul($a[2],$b[1],($r14)=&NR(1));
110 &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
111 &add($R,$r12,$R);
112 &add($H1,$t11,$H1) &FR($t11);
113 &mul($a[3],$b[0],($r15)=&NR(1));
114 &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
115 &add($R,$r13,$R);
116 &add($H1,$t12,$H1) &FR($t12);
117 &muh($a[0],$b[3],($r16)=&NR(1));
118 &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
119 &add($R,$r14,$R);
120 &add($H1,$t13,$H1) &FR($t13);
121 &muh($a[1],$b[2],($r17)=&NR(1));
122 &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
123 &add($R,$r15,$R);
124 &add($H1,$t14,$H1) &FR($t14);
125 &muh($a[2],$b[1],($r18)=&NR(1));
126 &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
127 &add($H1,$t15,$H1) &FR($t15);
128 &st($R,&QWPw(3,$rp));
129 &add($H1,$H2,$R);
130
131 &mov("zero",$H1);
132 &add($R,$r16,$R);
133 &mov("zero",$H2);
134 &muh($a[3],$b[0],($r19)=&NR(1));
135 &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
136 &add($R,$r17,$R);
137 &add($H1,$t16,$H1) &FR($t16);
138 &mul($a[1],$b[3],($r20)=&NR(1));
139 &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
140 &add($R,$r18,$R);
141 &add($H1,$t17,$H1) &FR($t17);
142 &mul($a[2],$b[2],($r21)=&NR(1));
143 &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
144 &add($R,$r19,$R);
145 &add($H1,$t18,$H1) &FR($t18);
146 &mul($a[3],$b[1],($r22)=&NR(1));
147 &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
148 &add($R,$r20,$R);
149 &add($H1,$t19,$H1) &FR($t19);
150 &muh($a[1],$b[3],($r23)=&NR(1));
151 &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
152 &add($R,$r21,$R);
153 &add($H1,$t20,$H1) &FR($t20);
154 &muh($a[2],$b[2],($r24)=&NR(1));
155 &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
156 &add($R,$r22,$R);
157 &add($H1,$t21,$H1) &FR($t21);
158 &muh($a[3],$b[1],($r25)=&NR(1));
159 &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
160 &add($H1,$t22,$H1) &FR($t22);
161 &st($R,&QWPw(4,$rp));
162 &add($H1,$H2,$R);
163
164 &mov("zero",$H1);
165 &add($R,$r23,$R);
166 &mov("zero",$H2);
167 &mul($a[2],$b[3],($r26)=&NR(1));
168 &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
169 &add($R,$r24,$R);
170 &add($H1,$t23,$H1) &FR($t23);
171 &mul($a[3],$b[2],($r27)=&NR(1));
172 &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
173 &add($R,$r25,$R);
174 &add($H1,$t24,$H1) &FR($t24);
175 &muh($a[2],$b[3],($r28)=&NR(1));
176 &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
177 &add($R,$r26,$R);
178 &add($H1,$t25,$H1) &FR($t25);
179 &muh($a[3],$b[2],($r29)=&NR(1));
180 &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
181 &add($R,$r27,$R);
182 &add($H1,$t26,$H1) &FR($t26);
183 &mul($a[3],$b[3],($r30)=&NR(1));
184 &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
185 &add($H1,$t27,$H1) &FR($t27);
186 &st($R,&QWPw(5,$rp));
187 &add($H1,$H2,$R);
188
189 &mov("zero",$H1);
190 &add($R,$r28,$R);
191 &mov("zero",$H2);
192 &muh($a[3],$b[3],($r31)=&NR(1));
193 &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
194 &add($R,$r29,$R);
195 &add($H1,$t28,$H1) &FR($t28);
196 ############
197 &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
198 &add($R,$r30,$R);
199 &add($H1,$t29,$H1) &FR($t29);
200 ############
201 &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
202 &add($H1,$t30,$H1) &FR($t30);
203 &st($R,&QWPw(6,$rp));
204 &add($H1,$H2,$R);
205
206 &add($R,$r31,$R); &FR($r31);
207 &st($R,&QWPw(7,$rp));
208
209 &FR($R,$H1,$H2);
210 &function_end($name);
211
212 &fin_pool;
213 }
214
2151;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl
new file mode 100644
index 0000000000..79d86dd25c
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl
@@ -0,0 +1,98 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub mul_add_c
5 {
6 local($a,$b,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9print STDERR "count=$cnt\n"; $cnt++;
10 &mul($a,$b,($l1)=&NR(1));
11 &muh($a,$b,($h1)=&NR(1));
12 &add($c0,$l1,$c0);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &add($t1,$h1,$h1); &FR($t1);
15 &add($c1,$h1,$c1);
16 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
17 &add($c2,$t2,$c2); &FR($t2);
18 }
19
20sub bn_mul_comba4
21 {
22 local($name)=@_;
23 local(@a,@b,$r,$c0,$c1,$c2);
24
25 $cnt=1;
26 &init_pool(3);
27
28 $rp=&wparam(0);
29 $ap=&wparam(1);
30 $bp=&wparam(2);
31
32 &function_begin($name,"");
33
34 &comment("");
35
36 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
37 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
38 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
39 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
40 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
43 &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
44
45 ($c0,$c1,$c2)=&NR(3);
46 &mov("zero",$c2);
47 &mul($a[0],$b[0],$c0);
48 &muh($a[0],$b[0],$c1);
49 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
50 ($c0,$c1,$c2)=($c1,$c2,$c0);
51 &mov("zero",$c2);
52
53 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
54 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
55 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
56 ($c0,$c1,$c2)=($c1,$c2,$c0);
57 &mov("zero",$c2);
58
59 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
60 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
61 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
62 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
63 ($c0,$c1,$c2)=($c1,$c2,$c0);
64 &mov("zero",$c2);
65
66 &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
67 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
68 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
69 &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
70 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
71 ($c0,$c1,$c2)=($c1,$c2,$c0);
72 &mov("zero",$c2);
73
74 &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
75 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
76 &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
77 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
78 ($c0,$c1,$c2)=($c1,$c2,$c0);
79 &mov("zero",$c2);
80
81 &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
82 &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
83 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
84 ($c0,$c1,$c2)=($c1,$c2,$c0);
85 &mov("zero",$c2);
86
87 &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
88 &st($c0,&QWPw(6,$rp));
89 &st($c1,&QWPw(7,$rp));
90
91 &FR($c0,$c1,$c2);
92
93 &function_end($name);
94
95 &fin_pool;
96 }
97
981;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl
new file mode 100644
index 0000000000..525ca7494b
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl
@@ -0,0 +1,177 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(3);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19
20 &stack_push(2);
21 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
22 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
23 &st($reg_s0,&swtmp(0)); &FR($reg_s0);
24 &st($reg_s1,&swtmp(1)); &FR($reg_s1);
25 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
26 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
27 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
28 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
29 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
30 &ld(($b[3])=&NR(1),&QWPw(3,$bp));
31 &ld(($a[4])=&NR(1),&QWPw(1,$ap));
32 &ld(($b[4])=&NR(1),&QWPw(1,$bp));
33 &ld(($a[5])=&NR(1),&QWPw(1,$ap));
34 &ld(($b[5])=&NR(1),&QWPw(1,$bp));
35 &ld(($a[6])=&NR(1),&QWPw(1,$ap));
36 &ld(($b[6])=&NR(1),&QWPw(1,$bp));
37 &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
38 &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
39
40 ($c0,$c1,$c2)=&NR(3);
41 &mov("zero",$c2);
42 &mul($a[0],$b[0],$c0);
43 &muh($a[0],$b[0],$c1);
44 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
49 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
55 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
56 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
62 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
63 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
64 &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
65 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
66 ($c0,$c1,$c2)=($c1,$c2,$c0);
67 &mov("zero",$c2);
68
69 &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
70 &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
71 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
72 &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
73 &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
74 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
75 ($c0,$c1,$c2)=($c1,$c2,$c0);
76 &mov("zero",$c2);
77
78 &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
79 &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
80 &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
81 &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
82 &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
83 &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
84 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
85 ($c0,$c1,$c2)=($c1,$c2,$c0);
86 &mov("zero",$c2);
87
88 &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
89 &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
90 &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
91 &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
92 &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
93 &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
94 &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
95 &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
100 &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
101 &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
102 &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
103 &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
104 &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
105 &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
106 &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
107 &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
108 ($c0,$c1,$c2)=($c1,$c2,$c0);
109 &mov("zero",$c2);
110
111 &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
112 &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
113 &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
114 &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
115 &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
116 &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
117 &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
118 &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
119 ($c0,$c1,$c2)=($c1,$c2,$c0);
120 &mov("zero",$c2);
121
122 &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
123 &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
124 &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
125 &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
126 &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
127 &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
128 &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
129 ($c0,$c1,$c2)=($c1,$c2,$c0);
130 &mov("zero",$c2);
131
132 &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
133 &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
134 &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
135 &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
136 &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
137 &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
138 ($c0,$c1,$c2)=($c1,$c2,$c0);
139 &mov("zero",$c2);
140
141 &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
142 &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
143 &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
144 &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
145 &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
146 ($c0,$c1,$c2)=($c1,$c2,$c0);
147 &mov("zero",$c2);
148
149 &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
150 &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
151 &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
152 &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
153 ($c0,$c1,$c2)=($c1,$c2,$c0);
154 &mov("zero",$c2);
155
156 &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
157 &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
158 &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
159 ($c0,$c1,$c2)=($c1,$c2,$c0);
160 &mov("zero",$c2);
161
162 &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
163 &st($c0,&QWPw(14,$rp));
164 &st($c1,&QWPw(15,$rp));
165
166 &FR($c0,$c1,$c2);
167
168 &ld($reg_s0,&swtmp(0));
169 &ld($reg_s1,&swtmp(1));
170 &stack_pop(2);
171
172 &function_end($name);
173
174 &fin_pool;
175 }
176
1771;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl
new file mode 100644
index 0000000000..a55b696906
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sqr.pl
@@ -0,0 +1,113 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(3);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19 &sub($count,4,$count);
20 &mov("zero",$cc);
21 &br(&label("finish"));
22 &blt($count,&label("finish"));
23
24 ($a0,$r0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($r0,&QWPw(0,$rp));
27
28$a=<<'EOF';
29##########################################################
30 &set_label("loop");
31
32 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
33 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
34 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
35 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
36 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
37 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
38
39 ($o0,$t0)=&NR(2);
40 &add($a0,$b0,$o0);
41 &cmpult($o0,$b0,$t0);
42 &add($o0,$cc,$o0);
43 &cmpult($o0,$cc,$cc);
44 &add($cc,$t0,$cc); &FR($t0);
45
46 ($t1,$o1)=&NR(2);
47
48 &add($a1,$b1,$o1); &FR($a1);
49 &cmpult($o1,$b1,$t1); &FR($b1);
50 &add($o1,$cc,$o1);
51 &cmpult($o1,$cc,$cc);
52 &add($cc,$t1,$cc); &FR($t1);
53
54 ($t2,$o2)=&NR(2);
55
56 &add($a2,$b2,$o2); &FR($a2);
57 &cmpult($o2,$b2,$t2); &FR($b2);
58 &add($o2,$cc,$o2);
59 &cmpult($o2,$cc,$cc);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &add($a3,$b3,$o3); &FR($a3);
65 &cmpult($o3,$b3,$t3); &FR($b3);
66 &add($o3,$cc,$o3);
67 &cmpult($o3,$cc,$cc);
68 &add($cc,$t3,$cc); &FR($t3);
69
70 &st($o0,&QWPw(0,$rp)); &FR($o0);
71 &st($o1,&QWPw(0,$rp)); &FR($o1);
72 &st($o2,&QWPw(0,$rp)); &FR($o2);
73 &st($o3,&QWPw(0,$rp)); &FR($o3);
74
75 &sub($count,4,$count); # count-=4
76 &add($ap,4*$QWS,$ap); # count+=4
77 &add($bp,4*$QWS,$bp); # count+=4
78 &add($rp,4*$QWS,$rp); # count+=4
79
80 &blt($count,&label("finish"));
81 &ld($a0,&QWPw(0,$ap));
82 &ld($b0,&QWPw(0,$bp));
83 &br(&label("loop"));
84EOF
85##################################################
86 # Do the last 0..3 words
87
88 &set_label("last_loop");
89
90 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
91 &mul($a0,$a0,($l0)=&NR(1));
92 &add($ap,$QWS,$ap);
93 &add($rp,2*$QWS,$rp);
94 &sub($count,1,$count);
95 &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
96 &st($l0,&QWPw(-2,$rp)); &FR($l0);
97 &st($h0,&QWPw(-1,$rp)); &FR($h0);
98
99 &bgt($count,&label("last_loop"));
100 &function_end_A($name);
101
102######################################################
103 &set_label("finish");
104 &add($count,4,$count);
105 &bgt($count,&label("last_loop"));
106
107 &set_label("end");
108 &function_end($name);
109
110 &fin_pool;
111 }
112
1131;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl
new file mode 100644
index 0000000000..bf33f5b503
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl
@@ -0,0 +1,109 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub sqr_add_c
5 {
6 local($a,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9 &mul($a,$a,($l1)=&NR(1));
10 &muh($a,$a,($h1)=&NR(1));
11 &add($c0,$l1,$c0);
12 &add($c1,$h1,$c1);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
15 &add($c1,$t1,$c1); &FR($t1);
16 &add($c2,$t2,$c2); &FR($t2);
17 }
18
19sub sqr_add_c2
20 {
21 local($a,$b,$c0,$c1,$c2)=@_;
22 local($l1,$h1,$t1,$t2);
23
24 &mul($a,$b,($l1)=&NR(1));
25 &muh($a,$b,($h1)=&NR(1));
26 &cmplt($l1,"zero",($lc1)=&NR(1));
27 &cmplt($h1,"zero",($hc1)=&NR(1));
28 &add($l1,$l1,$l1);
29 &add($h1,$h1,$h1);
30 &add($h1,$lc1,$h1); &FR($lc1);
31 &add($c2,$hc1,$c2); &FR($hc1);
32
33 &add($c0,$l1,$c0);
34 &add($c1,$h1,$c1);
35 &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
36 &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
37
38 &add($c1,$lc1,$c1); &FR($lc1);
39 &add($c2,$hc1,$c2); &FR($hc1);
40 }
41
42
43sub bn_sqr_comba4
44 {
45 local($name)=@_;
46 local(@a,@b,$r,$c0,$c1,$c2);
47
48 $cnt=1;
49 &init_pool(2);
50
51 $rp=&wparam(0);
52 $ap=&wparam(1);
53
54 &function_begin($name,"");
55
56 &comment("");
57
58 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
59 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
60 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
61 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
62
63 ($c0,$c1,$c2)=&NR(3);
64
65 &mov("zero",$c2);
66 &mul($a[0],$a[0],$c0);
67 &muh($a[0],$a[0],$c1);
68 &st($c0,&QWPw(0,$rp));
69 ($c0,$c1,$c2)=($c1,$c2,$c0);
70 &mov("zero",$c2);
71
72 &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
73 &st($c0,&QWPw(1,$rp));
74 ($c0,$c1,$c2)=($c1,$c2,$c0);
75 &mov("zero",$c2);
76
77 &sqr_add_c($a[1],$c0,$c1,$c2);
78 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
79 &st($c0,&QWPw(2,$rp));
80 ($c0,$c1,$c2)=($c1,$c2,$c0);
81 &mov("zero",$c2);
82
83 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
84 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
85 &st($c0,&QWPw(3,$rp));
86 ($c0,$c1,$c2)=($c1,$c2,$c0);
87 &mov("zero",$c2);
88
89 &sqr_add_c($a[2],$c0,$c1,$c2);
90 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
91 &st($c0,&QWPw(4,$rp));
92 ($c0,$c1,$c2)=($c1,$c2,$c0);
93 &mov("zero",$c2);
94
95 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
96 &st($c0,&QWPw(5,$rp));
97 ($c0,$c1,$c2)=($c1,$c2,$c0);
98 &mov("zero",$c2);
99
100 &sqr_add_c($a[3],$c0,$c1,$c2);
101 &st($c0,&QWPw(6,$rp));
102 &st($c1,&QWPw(7,$rp));
103
104 &function_end($name);
105
106 &fin_pool;
107 }
108
1091;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl
new file mode 100644
index 0000000000..b4afe085f1
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl
@@ -0,0 +1,132 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(2);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14
15 &function_begin($name,"");
16
17 &comment("");
18
19 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
20 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
21 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
22 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
23 &ld(($a[4])=&NR(1),&QWPw(4,$ap));
24 &ld(($a[5])=&NR(1),&QWPw(5,$ap));
25 &ld(($a[6])=&NR(1),&QWPw(6,$ap));
26 &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
27
28 ($c0,$c1,$c2)=&NR(3);
29
30 &mov("zero",$c2);
31 &mul($a[0],$a[0],$c0);
32 &muh($a[0],$a[0],$c1);
33 &st($c0,&QWPw(0,$rp));
34 ($c0,$c1,$c2)=($c1,$c2,$c0);
35 &mov("zero",$c2);
36
37 &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
38 &st($c0,&QWPw(1,$rp));
39 ($c0,$c1,$c2)=($c1,$c2,$c0);
40 &mov("zero",$c2);
41
42 &sqr_add_c($a[1],$c0,$c1,$c2);
43 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
44 &st($c0,&QWPw(2,$rp));
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
49 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(3,$rp));
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &sqr_add_c($a[2],$c0,$c1,$c2);
55 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
56 &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(4,$rp));
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
62 &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
63 &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
64 &st($c0,&QWPw(5,$rp));
65 ($c0,$c1,$c2)=($c1,$c2,$c0);
66 &mov("zero",$c2);
67
68 &sqr_add_c($a[3],$c0,$c1,$c2);
69 &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
70 &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
71 &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
72 &st($c0,&QWPw(6,$rp));
73 ($c0,$c1,$c2)=($c1,$c2,$c0);
74 &mov("zero",$c2);
75
76 &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
77 &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
78 &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
79 &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
80 &st($c0,&QWPw(7,$rp));
81 ($c0,$c1,$c2)=($c1,$c2,$c0);
82 &mov("zero",$c2);
83
84 &sqr_add_c($a[4],$c0,$c1,$c2);
85 &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
86 &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
87 &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
88 &st($c0,&QWPw(8,$rp));
89 ($c0,$c1,$c2)=($c1,$c2,$c0);
90 &mov("zero",$c2);
91
92 &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
93 &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
94 &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
95 &st($c0,&QWPw(9,$rp));
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &sqr_add_c($a[5],$c0,$c1,$c2);
100 &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
101 &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
102 &st($c0,&QWPw(10,$rp));
103 ($c0,$c1,$c2)=($c1,$c2,$c0);
104 &mov("zero",$c2);
105
106 &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
107 &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
108 &st($c0,&QWPw(11,$rp));
109 ($c0,$c1,$c2)=($c1,$c2,$c0);
110 &mov("zero",$c2);
111
112 &sqr_add_c($a[6],$c0,$c1,$c2);
113 &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
114 &st($c0,&QWPw(12,$rp));
115 ($c0,$c1,$c2)=($c1,$c2,$c0);
116 &mov("zero",$c2);
117
118 &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
119 &st($c0,&QWPw(13,$rp));
120 ($c0,$c1,$c2)=($c1,$c2,$c0);
121 &mov("zero",$c2);
122
123 &sqr_add_c($a[7],$c0,$c1,$c2);
124 &st($c0,&QWPw(14,$rp));
125 &st($c1,&QWPw(15,$rp));
126
127 &function_end($name);
128
129 &fin_pool;
130 }
131
1321;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl
new file mode 100644
index 0000000000..d998da5c21
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sub.pl
@@ -0,0 +1,108 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &blt($count,&label("finish"));
23
24 ($a0,$b0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($b0,&QWPw(0,$bp));
27
28##########################################################
29 &set_label("loop");
30
31 ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
32 &ld($a1,&QWPw(1,$ap));
33 &cmpult($a0,$b0,$tmp); # will we borrow?
34 &ld($b1,&QWPw(1,$bp));
35 &sub($a0,$b0,$a0); # do the subtract
36 &ld($a2,&QWPw(2,$ap));
37 &cmpult($a0,$cc,$b0); # will we borrow?
38 &ld($b2,&QWPw(2,$bp));
39 &sub($a0,$cc,$o0); # will we borrow?
40 &ld($a3,&QWPw(3,$ap));
41 &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
42
43 &cmpult($a1,$b1,$t1); # will we borrow?
44 &sub($a1,$b1,$a1); # do the subtract
45 &ld($b3,&QWPw(3,$bp));
46 &cmpult($a1,$cc,$b1); # will we borrow?
47 &sub($a1,$cc,$o1); # will we borrow?
48 &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
49
50 &cmpult($a2,$b2,$tmp); # will we borrow?
51 &sub($a2,$b2,$a2); # do the subtract
52 &st($o0,&QWPw(0,$rp)); &FR($o0); # save
53 &cmpult($a2,$cc,$b2); # will we borrow?
54 &sub($a2,$cc,$o2); # will we borrow?
55 &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
56
57 &cmpult($a3,$b3,$t3); # will we borrow?
58 &sub($a3,$b3,$a3); # do the subtract
59 &st($o1,&QWPw(1,$rp)); &FR($o1);
60 &cmpult($a3,$cc,$b3); # will we borrow?
61 &sub($a3,$cc,$o3); # will we borrow?
62 &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
63
64 &st($o2,&QWPw(2,$rp)); &FR($o2);
65 &sub($count,4,$count); # count-=4
66 &st($o3,&QWPw(3,$rp)); &FR($o3);
67 &add($ap,4*$QWS,$ap); # count+=4
68 &add($bp,4*$QWS,$bp); # count+=4
69 &add($rp,4*$QWS,$rp); # count+=4
70
71 &blt($count,&label("finish"));
72 &ld($a0,&QWPw(0,$ap));
73 &ld($b0,&QWPw(0,$bp));
74 &br(&label("loop"));
75##################################################
76 # Do the last 0..3 words
77
78 &set_label("last_loop");
79
80 &ld($a0,&QWPw(0,$ap)); # get a
81 &ld($b0,&QWPw(0,$bp)); # get b
82 &cmpult($a0,$b0,$tmp); # will we borrow?
83 &sub($a0,$b0,$a0); # do the subtract
84 &cmpult($a0,$cc,$b0); # will we borrow?
85 &sub($a0,$cc,$a0); # will we borrow?
86 &st($a0,&QWPw(0,$rp)); # save
87 &add($b0,$tmp,$cc); # add the borrows
88
89 &add($ap,$QWS,$ap);
90 &add($bp,$QWS,$bp);
91 &add($rp,$QWS,$rp);
92 &sub($count,1,$count);
93 &bgt($count,&label("last_loop"));
94 &function_end_A($name);
95
96######################################################
97 &set_label("finish");
98 &add($count,4,$count);
99 &bgt($count,&label("last_loop"));
100
101 &FR($a0,$b0);
102 &set_label("end");
103 &function_end($name);
104
105 &fin_pool;
106 }
107
1081;
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
new file mode 100644
index 0000000000..c4de4a2bee
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -0,0 +1,593 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_add_words("bn_mul_add_words");
9&bn_mul_words("bn_mul_words");
10&bn_sqr_words("bn_sqr_words");
11&bn_div_words("bn_div_words");
12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words");
14#&bn_sub_part_words("bn_sub_part_words");
15
16&asm_finish();
17
18sub bn_mul_add_words
19 {
20 local($name)=@_;
21
22 &function_begin($name,"");
23
24 &comment("");
25 $Low="eax";
26 $High="edx";
27 $a="ebx";
28 $w="ebp";
29 $r="edi";
30 $c="esi";
31
32 &xor($c,$c); # clear carry
33 &mov($r,&wparam(0)); #
34
35 &mov("ecx",&wparam(2)); #
36 &mov($a,&wparam(1)); #
37
38 &and("ecx",0xfffffff8); # num / 8
39 &mov($w,&wparam(3)); #
40
41 &push("ecx"); # Up the stack for a tmp variable
42
43 &jz(&label("maw_finish"));
44
45 &set_label("maw_loop",0);
46
47 &mov(&swtmp(0),"ecx"); #
48
49 for ($i=0; $i<32; $i+=4)
50 {
51 &comment("Round $i");
52
53 &mov("eax",&DWP($i,$a,"",0)); # *a
54 &mul($w); # *a * w
55 &add("eax",$c); # L(t)+= *r
56 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
57 &adc("edx",0); # H(t)+=carry
58 &add("eax",$c); # L(t)+=c
59 &adc("edx",0); # H(t)+=carry
60 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
61 &mov($c,"edx"); # c= H(t);
62 }
63
64 &comment("");
65 &mov("ecx",&swtmp(0)); #
66 &add($a,32);
67 &add($r,32);
68 &sub("ecx",8);
69 &jnz(&label("maw_loop"));
70
71 &set_label("maw_finish",0);
72 &mov("ecx",&wparam(2)); # get num
73 &and("ecx",7);
74 &jnz(&label("maw_finish2")); # helps branch prediction
75 &jmp(&label("maw_end"));
76
77 &set_label("maw_finish2",1);
78 for ($i=0; $i<7; $i++)
79 {
80 &comment("Tail Round $i");
81 &mov("eax",&DWP($i*4,$a,"",0));# *a
82 &mul($w); # *a * w
83 &add("eax",$c); # L(t)+=c
84 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
85 &adc("edx",0); # H(t)+=carry
86 &add("eax",$c);
87 &adc("edx",0); # H(t)+=carry
88 &dec("ecx") if ($i != 7-1);
89 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
90 &mov($c,"edx"); # c= H(t);
91 &jz(&label("maw_end")) if ($i != 7-1);
92 }
93 &set_label("maw_end",0);
94 &mov("eax",$c);
95
96 &pop("ecx"); # clear variable from
97
98 &function_end($name);
99 }
100
101sub bn_mul_words
102 {
103 local($name)=@_;
104
105 &function_begin($name,"");
106
107 &comment("");
108 $Low="eax";
109 $High="edx";
110 $a="ebx";
111 $w="ecx";
112 $r="edi";
113 $c="esi";
114 $num="ebp";
115
116 &xor($c,$c); # clear carry
117 &mov($r,&wparam(0)); #
118 &mov($a,&wparam(1)); #
119 &mov($num,&wparam(2)); #
120 &mov($w,&wparam(3)); #
121
122 &and($num,0xfffffff8); # num / 8
123 &jz(&label("mw_finish"));
124
125 &set_label("mw_loop",0);
126 for ($i=0; $i<32; $i+=4)
127 {
128 &comment("Round $i");
129
130 &mov("eax",&DWP($i,$a,"",0)); # *a
131 &mul($w); # *a * w
132 &add("eax",$c); # L(t)+=c
133 # XXX
134
135 &adc("edx",0); # H(t)+=carry
136 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
137
138 &mov($c,"edx"); # c= H(t);
139 }
140
141 &comment("");
142 &add($a,32);
143 &add($r,32);
144 &sub($num,8);
145 &jz(&label("mw_finish"));
146 &jmp(&label("mw_loop"));
147
148 &set_label("mw_finish",0);
149 &mov($num,&wparam(2)); # get num
150 &and($num,7);
151 &jnz(&label("mw_finish2"));
152 &jmp(&label("mw_end"));
153
154 &set_label("mw_finish2",1);
155 for ($i=0; $i<7; $i++)
156 {
157 &comment("Tail Round $i");
158 &mov("eax",&DWP($i*4,$a,"",0));# *a
159 &mul($w); # *a * w
160 &add("eax",$c); # L(t)+=c
161 # XXX
162 &adc("edx",0); # H(t)+=carry
163 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
164 &mov($c,"edx"); # c= H(t);
165 &dec($num) if ($i != 7-1);
166 &jz(&label("mw_end")) if ($i != 7-1);
167 }
168 &set_label("mw_end",0);
169 &mov("eax",$c);
170
171 &function_end($name);
172 }
173
174sub bn_sqr_words
175 {
176 local($name)=@_;
177
178 &function_begin($name,"");
179
180 &comment("");
181 $r="esi";
182 $a="edi";
183 $num="ebx";
184
185 &mov($r,&wparam(0)); #
186 &mov($a,&wparam(1)); #
187 &mov($num,&wparam(2)); #
188
189 &and($num,0xfffffff8); # num / 8
190 &jz(&label("sw_finish"));
191
192 &set_label("sw_loop",0);
193 for ($i=0; $i<32; $i+=4)
194 {
195 &comment("Round $i");
196 &mov("eax",&DWP($i,$a,"",0)); # *a
197 # XXX
198 &mul("eax"); # *a * *a
199 &mov(&DWP($i*2,$r,"",0),"eax"); #
200 &mov(&DWP($i*2+4,$r,"",0),"edx");#
201 }
202
203 &comment("");
204 &add($a,32);
205 &add($r,64);
206 &sub($num,8);
207 &jnz(&label("sw_loop"));
208
209 &set_label("sw_finish",0);
210 &mov($num,&wparam(2)); # get num
211 &and($num,7);
212 &jz(&label("sw_end"));
213
214 for ($i=0; $i<7; $i++)
215 {
216 &comment("Tail Round $i");
217 &mov("eax",&DWP($i*4,$a,"",0)); # *a
218 # XXX
219 &mul("eax"); # *a * *a
220 &mov(&DWP($i*8,$r,"",0),"eax"); #
221 &dec($num) if ($i != 7-1);
222 &mov(&DWP($i*8+4,$r,"",0),"edx");
223 &jz(&label("sw_end")) if ($i != 7-1);
224 }
225 &set_label("sw_end",0);
226
227 &function_end($name);
228 }
229
230sub bn_div_words
231 {
232 local($name)=@_;
233
234 &function_begin($name,"");
235 &mov("edx",&wparam(0)); #
236 &mov("eax",&wparam(1)); #
237 &mov("ebx",&wparam(2)); #
238 &div("ebx");
239 &function_end($name);
240 }
241
242sub bn_add_words
243 {
244 local($name)=@_;
245
246 &function_begin($name,"");
247
248 &comment("");
249 $a="esi";
250 $b="edi";
251 $c="eax";
252 $r="ebx";
253 $tmp1="ecx";
254 $tmp2="edx";
255 $num="ebp";
256
257 &mov($r,&wparam(0)); # get r
258 &mov($a,&wparam(1)); # get a
259 &mov($b,&wparam(2)); # get b
260 &mov($num,&wparam(3)); # get num
261 &xor($c,$c); # clear carry
262 &and($num,0xfffffff8); # num / 8
263
264 &jz(&label("aw_finish"));
265
266 &set_label("aw_loop",0);
267 for ($i=0; $i<8; $i++)
268 {
269 &comment("Round $i");
270
271 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
272 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
273 &add($tmp1,$c);
274 &mov($c,0);
275 &adc($c,$c);
276 &add($tmp1,$tmp2);
277 &adc($c,0);
278 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
279 }
280
281 &comment("");
282 &add($a,32);
283 &add($b,32);
284 &add($r,32);
285 &sub($num,8);
286 &jnz(&label("aw_loop"));
287
288 &set_label("aw_finish",0);
289 &mov($num,&wparam(3)); # get num
290 &and($num,7);
291 &jz(&label("aw_end"));
292
293 for ($i=0; $i<7; $i++)
294 {
295 &comment("Tail Round $i");
296 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
297 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
298 &add($tmp1,$c);
299 &mov($c,0);
300 &adc($c,$c);
301 &add($tmp1,$tmp2);
302 &adc($c,0);
303 &dec($num) if ($i != 6);
304 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
305 &jz(&label("aw_end")) if ($i != 6);
306 }
307 &set_label("aw_end",0);
308
309# &mov("eax",$c); # $c is "eax"
310
311 &function_end($name);
312 }
313
314sub bn_sub_words
315 {
316 local($name)=@_;
317
318 &function_begin($name,"");
319
320 &comment("");
321 $a="esi";
322 $b="edi";
323 $c="eax";
324 $r="ebx";
325 $tmp1="ecx";
326 $tmp2="edx";
327 $num="ebp";
328
329 &mov($r,&wparam(0)); # get r
330 &mov($a,&wparam(1)); # get a
331 &mov($b,&wparam(2)); # get b
332 &mov($num,&wparam(3)); # get num
333 &xor($c,$c); # clear carry
334 &and($num,0xfffffff8); # num / 8
335
336 &jz(&label("aw_finish"));
337
338 &set_label("aw_loop",0);
339 for ($i=0; $i<8; $i++)
340 {
341 &comment("Round $i");
342
343 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
344 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
345 &sub($tmp1,$c);
346 &mov($c,0);
347 &adc($c,$c);
348 &sub($tmp1,$tmp2);
349 &adc($c,0);
350 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
351 }
352
353 &comment("");
354 &add($a,32);
355 &add($b,32);
356 &add($r,32);
357 &sub($num,8);
358 &jnz(&label("aw_loop"));
359
360 &set_label("aw_finish",0);
361 &mov($num,&wparam(3)); # get num
362 &and($num,7);
363 &jz(&label("aw_end"));
364
365 for ($i=0; $i<7; $i++)
366 {
367 &comment("Tail Round $i");
368 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
369 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
370 &sub($tmp1,$c);
371 &mov($c,0);
372 &adc($c,$c);
373 &sub($tmp1,$tmp2);
374 &adc($c,0);
375 &dec($num) if ($i != 6);
376 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
377 &jz(&label("aw_end")) if ($i != 6);
378 }
379 &set_label("aw_end",0);
380
381# &mov("eax",$c); # $c is "eax"
382
383 &function_end($name);
384 }
385
386sub bn_sub_part_words
387 {
388 local($name)=@_;
389
390 &function_begin($name,"");
391
392 &comment("");
393 $a="esi";
394 $b="edi";
395 $c="eax";
396 $r="ebx";
397 $tmp1="ecx";
398 $tmp2="edx";
399 $num="ebp";
400
401 &mov($r,&wparam(0)); # get r
402 &mov($a,&wparam(1)); # get a
403 &mov($b,&wparam(2)); # get b
404 &mov($num,&wparam(3)); # get num
405 &xor($c,$c); # clear carry
406 &and($num,0xfffffff8); # num / 8
407
408 &jz(&label("aw_finish"));
409
410 &set_label("aw_loop",0);
411 for ($i=0; $i<8; $i++)
412 {
413 &comment("Round $i");
414
415 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
416 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
417 &sub($tmp1,$c);
418 &mov($c,0);
419 &adc($c,$c);
420 &sub($tmp1,$tmp2);
421 &adc($c,0);
422 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
423 }
424
425 &comment("");
426 &add($a,32);
427 &add($b,32);
428 &add($r,32);
429 &sub($num,8);
430 &jnz(&label("aw_loop"));
431
432 &set_label("aw_finish",0);
433 &mov($num,&wparam(3)); # get num
434 &and($num,7);
435 &jz(&label("aw_end"));
436
437 for ($i=0; $i<7; $i++)
438 {
439 &comment("Tail Round $i");
440 &mov($tmp1,&DWP(0,$a,"",0)); # *a
441 &mov($tmp2,&DWP(0,$b,"",0));# *b
442 &sub($tmp1,$c);
443 &mov($c,0);
444 &adc($c,$c);
445 &sub($tmp1,$tmp2);
446 &adc($c,0);
447 &mov(&DWP(0,$r,"",0),$tmp1); # *r
448 &add($a, 4);
449 &add($b, 4);
450 &add($r, 4);
451 &dec($num) if ($i != 6);
452 &jz(&label("aw_end")) if ($i != 6);
453 }
454 &set_label("aw_end",0);
455
456 &cmp(&wparam(4),0);
457 &je(&label("pw_end"));
458
459 &mov($num,&wparam(4)); # get dl
460 &cmp($num,0);
461 &je(&label("pw_end"));
462 &jge(&label("pw_pos"));
463
464 &comment("pw_neg");
465 &mov($tmp2,0);
466 &sub($tmp2,$num);
467 &mov($num,$tmp2);
468 &and($num,0xfffffff8); # num / 8
469 &jz(&label("pw_neg_finish"));
470
471 &set_label("pw_neg_loop",0);
472 for ($i=0; $i<8; $i++)
473 {
474 &comment("dl<0 Round $i");
475
476 &mov($tmp1,0);
477 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
478 &sub($tmp1,$c);
479 &mov($c,0);
480 &adc($c,$c);
481 &sub($tmp1,$tmp2);
482 &adc($c,0);
483 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
484 }
485
486 &comment("");
487 &add($b,32);
488 &add($r,32);
489 &sub($num,8);
490 &jnz(&label("pw_neg_loop"));
491
492 &set_label("pw_neg_finish",0);
493 &mov($tmp2,&wparam(4)); # get dl
494 &mov($num,0);
495 &sub($num,$tmp2);
496 &and($num,7);
497 &jz(&label("pw_end"));
498
499 for ($i=0; $i<7; $i++)
500 {
501 &comment("dl<0 Tail Round $i");
502 &mov($tmp1,0);
503 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504 &sub($tmp1,$c);
505 &mov($c,0);
506 &adc($c,$c);
507 &sub($tmp1,$tmp2);
508 &adc($c,0);
509 &dec($num) if ($i != 6);
510 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
511 &jz(&label("pw_end")) if ($i != 6);
512 }
513
514 &jmp(&label("pw_end"));
515
516 &set_label("pw_pos",0);
517
518 &and($num,0xfffffff8); # num / 8
519 &jz(&label("pw_pos_finish"));
520
521 &set_label("pw_pos_loop",0);
522
523 for ($i=0; $i<8; $i++)
524 {
525 &comment("dl>0 Round $i");
526
527 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
528 &sub($tmp1,$c);
529 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
530 &jnc(&label("pw_nc".$i));
531 }
532
533 &comment("");
534 &add($a,32);
535 &add($r,32);
536 &sub($num,8);
537 &jnz(&label("pw_pos_loop"));
538
539 &set_label("pw_pos_finish",0);
540 &mov($num,&wparam(4)); # get dl
541 &and($num,7);
542 &jz(&label("pw_end"));
543
544 for ($i=0; $i<7; $i++)
545 {
546 &comment("dl>0 Tail Round $i");
547 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
548 &sub($tmp1,$c);
549 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
550 &jnc(&label("pw_tail_nc".$i));
551 &dec($num) if ($i != 6);
552 &jz(&label("pw_end")) if ($i != 6);
553 }
554 &mov($c,1);
555 &jmp(&label("pw_end"));
556
557 &set_label("pw_nc_loop",0);
558 for ($i=0; $i<8; $i++)
559 {
560 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
561 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
562 &set_label("pw_nc".$i,0);
563 }
564
565 &comment("");
566 &add($a,32);
567 &add($r,32);
568 &sub($num,8);
569 &jnz(&label("pw_nc_loop"));
570
571 &mov($num,&wparam(4)); # get dl
572 &and($num,7);
573 &jz(&label("pw_nc_end"));
574
575 for ($i=0; $i<7; $i++)
576 {
577 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
578 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
579 &set_label("pw_tail_nc".$i,0);
580 &dec($num) if ($i != 6);
581 &jz(&label("pw_nc_end")) if ($i != 6);
582 }
583
584 &set_label("pw_nc_end",0);
585 &mov($c,0);
586
587 &set_label("pw_end",0);
588
589# &mov("eax",$c); # $c is "eax"
590
591 &function_end($name);
592 }
593
diff --git a/src/lib/libcrypto/bn/asm/bn-alpha.pl b/src/lib/libcrypto/bn/asm/bn-alpha.pl
new file mode 100644
index 0000000000..302edf2376
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/bn-alpha.pl
@@ -0,0 +1,571 @@
1#!/usr/local/bin/perl
2# I have this in perl so I can use more usefull register names and then convert
3# them into alpha registers.
4#
5
6$d=&data();
7$d =~ s/CC/0/g;
8$d =~ s/R1/1/g;
9$d =~ s/R2/2/g;
10$d =~ s/R3/3/g;
11$d =~ s/R4/4/g;
12$d =~ s/L1/5/g;
13$d =~ s/L2/6/g;
14$d =~ s/L3/7/g;
15$d =~ s/L4/8/g;
16$d =~ s/O1/22/g;
17$d =~ s/O2/23/g;
18$d =~ s/O3/24/g;
19$d =~ s/O4/25/g;
20$d =~ s/A1/20/g;
21$d =~ s/A2/21/g;
22$d =~ s/A3/27/g;
23$d =~ s/A4/28/g;
24if (0){
25}
26
27print $d;
28
29sub data
30 {
31 local($data)=<<'EOF';
32
33 # DEC Alpha assember
34 # The bn_div_words is actually gcc output but the other parts are hand done.
35 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
36 # bn_div_words.
37 # I've gone back and re-done most of routines.
38 # The key thing to remeber for the 164 CPU is that while a
39 # multiply operation takes 8 cycles, another one can only be issued
40 # after 4 cycles have elapsed. I've done modification to help
41 # improve this. Also, normally, a ld instruction will not be available
42 # for about 3 cycles.
43 .file 1 "bn_asm.c"
44 .set noat
45gcc2_compiled.:
46__gnu_compiled_c:
47 .text
48 .align 3
49 .globl bn_mul_add_words
50 .ent bn_mul_add_words
51bn_mul_add_words:
52bn_mul_add_words..ng:
53 .frame $30,0,$26,0
54 .prologue 0
55 .align 5
56 subq $18,4,$18
57 bis $31,$31,$CC
58 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
59 ldq $A1,0($17) # 1 1
60 ldq $R1,0($16) # 1 1
61 .align 3
62$42:
63 mulq $A1,$19,$L1 # 1 2 1 ######
64 ldq $A2,8($17) # 2 1
65 ldq $R2,8($16) # 2 1
66 umulh $A1,$19,$A1 # 1 2 ######
67 ldq $A3,16($17) # 3 1
68 ldq $R3,16($16) # 3 1
69 mulq $A2,$19,$L2 # 2 2 1 ######
70 ldq $A4,24($17) # 4 1
71 addq $R1,$L1,$R1 # 1 2 2
72 ldq $R4,24($16) # 4 1
73 umulh $A2,$19,$A2 # 2 2 ######
74 cmpult $R1,$L1,$O1 # 1 2 3 1
75 addq $A1,$O1,$A1 # 1 3 1
76 addq $R1,$CC,$R1 # 1 2 3 1
77 mulq $A3,$19,$L3 # 3 2 1 ######
78 cmpult $R1,$CC,$CC # 1 2 3 2
79 addq $R2,$L2,$R2 # 2 2 2
80 addq $A1,$CC,$CC # 1 3 2
81 cmpult $R2,$L2,$O2 # 2 2 3 1
82 addq $A2,$O2,$A2 # 2 3 1
83 umulh $A3,$19,$A3 # 3 2 ######
84 addq $R2,$CC,$R2 # 2 2 3 1
85 cmpult $R2,$CC,$CC # 2 2 3 2
86 subq $18,4,$18
87 mulq $A4,$19,$L4 # 4 2 1 ######
88 addq $A2,$CC,$CC # 2 3 2
89 addq $R3,$L3,$R3 # 3 2 2
90 addq $16,32,$16
91 cmpult $R3,$L3,$O3 # 3 2 3 1
92 stq $R1,-32($16) # 1 2 4
93 umulh $A4,$19,$A4 # 4 2 ######
94 addq $A3,$O3,$A3 # 3 3 1
95 addq $R3,$CC,$R3 # 3 2 3 1
96 stq $R2,-24($16) # 2 2 4
97 cmpult $R3,$CC,$CC # 3 2 3 2
98 stq $R3,-16($16) # 3 2 4
99 addq $R4,$L4,$R4 # 4 2 2
100 addq $A3,$CC,$CC # 3 3 2
101 cmpult $R4,$L4,$O4 # 4 2 3 1
102 addq $17,32,$17
103 addq $A4,$O4,$A4 # 4 3 1
104 addq $R4,$CC,$R4 # 4 2 3 1
105 cmpult $R4,$CC,$CC # 4 2 3 2
106 stq $R4,-8($16) # 4 2 4
107 addq $A4,$CC,$CC # 4 3 2
108 blt $18,$43
109
110 ldq $A1,0($17) # 1 1
111 ldq $R1,0($16) # 1 1
112
113 br $42
114
115 .align 4
116$45:
117 ldq $A1,0($17) # 4 1
118 ldq $R1,0($16) # 4 1
119 mulq $A1,$19,$L1 # 4 2 1
120 subq $18,1,$18
121 addq $16,8,$16
122 addq $17,8,$17
123 umulh $A1,$19,$A1 # 4 2
124 addq $R1,$L1,$R1 # 4 2 2
125 cmpult $R1,$L1,$O1 # 4 2 3 1
126 addq $A1,$O1,$A1 # 4 3 1
127 addq $R1,$CC,$R1 # 4 2 3 1
128 cmpult $R1,$CC,$CC # 4 2 3 2
129 addq $A1,$CC,$CC # 4 3 2
130 stq $R1,-8($16) # 4 2 4
131 bgt $18,$45
132 ret $31,($26),1 # else exit
133
134 .align 4
135$43:
136 addq $18,4,$18
137 bgt $18,$45 # goto tail code
138 ret $31,($26),1 # else exit
139
140 .end bn_mul_add_words
141 .align 3
142 .globl bn_mul_words
143 .ent bn_mul_words
144bn_mul_words:
145bn_mul_words..ng:
146 .frame $30,0,$26,0
147 .prologue 0
148 .align 5
149 subq $18,4,$18
150 bis $31,$31,$CC
151 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
152 ldq $A1,0($17) # 1 1
153 .align 3
154$142:
155
156 mulq $A1,$19,$L1 # 1 2 1 #####
157 ldq $A2,8($17) # 2 1
158 ldq $A3,16($17) # 3 1
159 umulh $A1,$19,$A1 # 1 2 #####
160 ldq $A4,24($17) # 4 1
161 mulq $A2,$19,$L2 # 2 2 1 #####
162 addq $L1,$CC,$L1 # 1 2 3 1
163 subq $18,4,$18
164 cmpult $L1,$CC,$CC # 1 2 3 2
165 umulh $A2,$19,$A2 # 2 2 #####
166 addq $A1,$CC,$CC # 1 3 2
167 addq $17,32,$17
168 addq $L2,$CC,$L2 # 2 2 3 1
169 mulq $A3,$19,$L3 # 3 2 1 #####
170 cmpult $L2,$CC,$CC # 2 2 3 2
171 addq $A2,$CC,$CC # 2 3 2
172 addq $16,32,$16
173 umulh $A3,$19,$A3 # 3 2 #####
174 stq $L1,-32($16) # 1 2 4
175 mulq $A4,$19,$L4 # 4 2 1 #####
176 addq $L3,$CC,$L3 # 3 2 3 1
177 stq $L2,-24($16) # 2 2 4
178 cmpult $L3,$CC,$CC # 3 2 3 2
179 umulh $A4,$19,$A4 # 4 2 #####
180 addq $A3,$CC,$CC # 3 3 2
181 stq $L3,-16($16) # 3 2 4
182 addq $L4,$CC,$L4 # 4 2 3 1
183 cmpult $L4,$CC,$CC # 4 2 3 2
184
185 addq $A4,$CC,$CC # 4 3 2
186
187 stq $L4,-8($16) # 4 2 4
188
189 blt $18,$143
190
191 ldq $A1,0($17) # 1 1
192
193 br $142
194
195 .align 4
196$145:
197 ldq $A1,0($17) # 4 1
198 mulq $A1,$19,$L1 # 4 2 1
199 subq $18,1,$18
200 umulh $A1,$19,$A1 # 4 2
201 addq $L1,$CC,$L1 # 4 2 3 1
202 addq $16,8,$16
203 cmpult $L1,$CC,$CC # 4 2 3 2
204 addq $17,8,$17
205 addq $A1,$CC,$CC # 4 3 2
206 stq $L1,-8($16) # 4 2 4
207
208 bgt $18,$145
209 ret $31,($26),1 # else exit
210
211 .align 4
212$143:
213 addq $18,4,$18
214 bgt $18,$145 # goto tail code
215 ret $31,($26),1 # else exit
216
217 .end bn_mul_words
218 .align 3
219 .globl bn_sqr_words
220 .ent bn_sqr_words
221bn_sqr_words:
222bn_sqr_words..ng:
223 .frame $30,0,$26,0
224 .prologue 0
225
226 subq $18,4,$18
227 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
228 ldq $A1,0($17) # 1 1
229 .align 3
230$542:
231 mulq $A1,$A1,$L1 ######
232 ldq $A2,8($17) # 1 1
233 subq $18,4
234 umulh $A1,$A1,$R1 ######
235 ldq $A3,16($17) # 1 1
236 mulq $A2,$A2,$L2 ######
237 ldq $A4,24($17) # 1 1
238 stq $L1,0($16) # r[0]
239 umulh $A2,$A2,$R2 ######
240 stq $R1,8($16) # r[1]
241 mulq $A3,$A3,$L3 ######
242 stq $L2,16($16) # r[0]
243 umulh $A3,$A3,$R3 ######
244 stq $R2,24($16) # r[1]
245 mulq $A4,$A4,$L4 ######
246 stq $L3,32($16) # r[0]
247 umulh $A4,$A4,$R4 ######
248 stq $R3,40($16) # r[1]
249
250 addq $16,64,$16
251 addq $17,32,$17
252 stq $L4,-16($16) # r[0]
253 stq $R4,-8($16) # r[1]
254
255 blt $18,$543
256 ldq $A1,0($17) # 1 1
257 br $542
258
259$442:
260 ldq $A1,0($17) # a[0]
261 mulq $A1,$A1,$L1 # a[0]*w low part r2
262 addq $16,16,$16
263 addq $17,8,$17
264 subq $18,1,$18
265 umulh $A1,$A1,$R1 # a[0]*w high part r3
266 stq $L1,-16($16) # r[0]
267 stq $R1,-8($16) # r[1]
268
269 bgt $18,$442
270 ret $31,($26),1 # else exit
271
272 .align 4
273$543:
274 addq $18,4,$18
275 bgt $18,$442 # goto tail code
276 ret $31,($26),1 # else exit
277 .end bn_sqr_words
278
279 .align 3
280 .globl bn_add_words
281 .ent bn_add_words
282bn_add_words:
283bn_add_words..ng:
284 .frame $30,0,$26,0
285 .prologue 0
286
287 subq $19,4,$19
288 bis $31,$31,$CC # carry = 0
289 blt $19,$900
290 ldq $L1,0($17) # a[0]
291 ldq $R1,0($18) # b[1]
292 .align 3
293$901:
294 addq $R1,$L1,$R1 # r=a+b;
295 ldq $L2,8($17) # a[1]
296 cmpult $R1,$L1,$O1 # did we overflow?
297 ldq $R2,8($18) # b[1]
298 addq $R1,$CC,$R1 # c+= overflow
299 ldq $L3,16($17) # a[2]
300 cmpult $R1,$CC,$CC # overflow?
301 ldq $R3,16($18) # b[2]
302 addq $CC,$O1,$CC
303 ldq $L4,24($17) # a[3]
304 addq $R2,$L2,$R2 # r=a+b;
305 ldq $R4,24($18) # b[3]
306 cmpult $R2,$L2,$O2 # did we overflow?
307 addq $R3,$L3,$R3 # r=a+b;
308 addq $R2,$CC,$R2 # c+= overflow
309 cmpult $R3,$L3,$O3 # did we overflow?
310 cmpult $R2,$CC,$CC # overflow?
311 addq $R4,$L4,$R4 # r=a+b;
312 addq $CC,$O2,$CC
313 cmpult $R4,$L4,$O4 # did we overflow?
314 addq $R3,$CC,$R3 # c+= overflow
315 stq $R1,0($16) # r[0]=c
316 cmpult $R3,$CC,$CC # overflow?
317 stq $R2,8($16) # r[1]=c
318 addq $CC,$O3,$CC
319 stq $R3,16($16) # r[2]=c
320 addq $R4,$CC,$R4 # c+= overflow
321 subq $19,4,$19 # loop--
322 cmpult $R4,$CC,$CC # overflow?
323 addq $17,32,$17 # a++
324 addq $CC,$O4,$CC
325 stq $R4,24($16) # r[3]=c
326 addq $18,32,$18 # b++
327 addq $16,32,$16 # r++
328
329 blt $19,$900
330 ldq $L1,0($17) # a[0]
331 ldq $R1,0($18) # b[1]
332 br $901
333 .align 4
334$945:
335 ldq $L1,0($17) # a[0]
336 ldq $R1,0($18) # b[1]
337 addq $R1,$L1,$R1 # r=a+b;
338 subq $19,1,$19 # loop--
339 addq $R1,$CC,$R1 # c+= overflow
340 addq $17,8,$17 # a++
341 cmpult $R1,$L1,$O1 # did we overflow?
342 cmpult $R1,$CC,$CC # overflow?
343 addq $18,8,$18 # b++
344 stq $R1,0($16) # r[0]=c
345 addq $CC,$O1,$CC
346 addq $16,8,$16 # r++
347
348 bgt $19,$945
349 ret $31,($26),1 # else exit
350
351$900:
352 addq $19,4,$19
353 bgt $19,$945 # goto tail code
354 ret $31,($26),1 # else exit
355 .end bn_add_words
356
357 .align 3
358 .globl bn_sub_words
359 .ent bn_sub_words
360bn_sub_words:
361bn_sub_words..ng:
362 .frame $30,0,$26,0
363 .prologue 0
364
365 subq $19,4,$19
366 bis $31,$31,$CC # carry = 0
367 br $800
368 blt $19,$800
369 ldq $L1,0($17) # a[0]
370 ldq $R1,0($18) # b[1]
371 .align 3
372$801:
373 addq $R1,$L1,$R1 # r=a+b;
374 ldq $L2,8($17) # a[1]
375 cmpult $R1,$L1,$O1 # did we overflow?
376 ldq $R2,8($18) # b[1]
377 addq $R1,$CC,$R1 # c+= overflow
378 ldq $L3,16($17) # a[2]
379 cmpult $R1,$CC,$CC # overflow?
380 ldq $R3,16($18) # b[2]
381 addq $CC,$O1,$CC
382 ldq $L4,24($17) # a[3]
383 addq $R2,$L2,$R2 # r=a+b;
384 ldq $R4,24($18) # b[3]
385 cmpult $R2,$L2,$O2 # did we overflow?
386 addq $R3,$L3,$R3 # r=a+b;
387 addq $R2,$CC,$R2 # c+= overflow
388 cmpult $R3,$L3,$O3 # did we overflow?
389 cmpult $R2,$CC,$CC # overflow?
390 addq $R4,$L4,$R4 # r=a+b;
391 addq $CC,$O2,$CC
392 cmpult $R4,$L4,$O4 # did we overflow?
393 addq $R3,$CC,$R3 # c+= overflow
394 stq $R1,0($16) # r[0]=c
395 cmpult $R3,$CC,$CC # overflow?
396 stq $R2,8($16) # r[1]=c
397 addq $CC,$O3,$CC
398 stq $R3,16($16) # r[2]=c
399 addq $R4,$CC,$R4 # c+= overflow
400 subq $19,4,$19 # loop--
401 cmpult $R4,$CC,$CC # overflow?
402 addq $17,32,$17 # a++
403 addq $CC,$O4,$CC
404 stq $R4,24($16) # r[3]=c
405 addq $18,32,$18 # b++
406 addq $16,32,$16 # r++
407
408 blt $19,$800
409 ldq $L1,0($17) # a[0]
410 ldq $R1,0($18) # b[1]
411 br $801
412 .align 4
413$845:
414 ldq $L1,0($17) # a[0]
415 ldq $R1,0($18) # b[1]
416 cmpult $L1,$R1,$O1 # will we borrow?
417 subq $L1,$R1,$R1 # r=a-b;
418 subq $19,1,$19 # loop--
419 cmpult $R1,$CC,$O2 # will we borrow?
420 subq $R1,$CC,$R1 # c+= overflow
421 addq $17,8,$17 # a++
422 addq $18,8,$18 # b++
423 stq $R1,0($16) # r[0]=c
424 addq $O2,$O1,$CC
425 addq $16,8,$16 # r++
426
427 bgt $19,$845
428 ret $31,($26),1 # else exit
429
430$800:
431 addq $19,4,$19
432 bgt $19,$845 # goto tail code
433 ret $31,($26),1 # else exit
434 .end bn_sub_words
435
436 #
437 # What follows was taken directly from the C compiler with a few
438 # hacks to redo the lables.
439 #
440.text
441 .align 3
442 .globl bn_div_words
443 .ent bn_div_words
444bn_div_words:
445 ldgp $29,0($27)
446bn_div_words..ng:
447 lda $30,-48($30)
448 .frame $30,48,$26,0
449 stq $26,0($30)
450 stq $9,8($30)
451 stq $10,16($30)
452 stq $11,24($30)
453 stq $12,32($30)
454 stq $13,40($30)
455 .mask 0x4003e00,-48
456 .prologue 1
457 bis $16,$16,$9
458 bis $17,$17,$10
459 bis $18,$18,$11
460 bis $31,$31,$13
461 bis $31,2,$12
462 bne $11,$119
463 lda $0,-1
464 br $31,$136
465 .align 4
466$119:
467 bis $11,$11,$16
468 jsr $26,BN_num_bits_word
469 ldgp $29,0($26)
470 subq $0,64,$1
471 beq $1,$120
472 bis $31,1,$1
473 sll $1,$0,$1
474 cmpule $9,$1,$1
475 bne $1,$120
476 # lda $16,_IO_stderr_
477 # lda $17,$C32
478 # bis $0,$0,$18
479 # jsr $26,fprintf
480 # ldgp $29,0($26)
481 jsr $26,abort
482 ldgp $29,0($26)
483 .align 4
484$120:
485 bis $31,64,$3
486 cmpult $9,$11,$2
487 subq $3,$0,$1
488 addl $1,$31,$0
489 subq $9,$11,$1
490 cmoveq $2,$1,$9
491 beq $0,$122
492 zapnot $0,15,$2
493 subq $3,$0,$1
494 sll $11,$2,$11
495 sll $9,$2,$3
496 srl $10,$1,$1
497 sll $10,$2,$10
498 bis $3,$1,$9
499$122:
500 srl $11,32,$5
501 zapnot $11,15,$6
502 lda $7,-1
503 .align 5
504$123:
505 srl $9,32,$1
506 subq $1,$5,$1
507 bne $1,$126
508 zapnot $7,15,$27
509 br $31,$127
510 .align 4
511$126:
512 bis $9,$9,$24
513 bis $5,$5,$25
514 divqu $24,$25,$27
515$127:
516 srl $10,32,$4
517 .align 5
518$128:
519 mulq $27,$5,$1
520 subq $9,$1,$3
521 zapnot $3,240,$1
522 bne $1,$129
523 mulq $6,$27,$2
524 sll $3,32,$1
525 addq $1,$4,$1
526 cmpule $2,$1,$2
527 bne $2,$129
528 subq $27,1,$27
529 br $31,$128
530 .align 4
531$129:
532 mulq $27,$6,$1
533 mulq $27,$5,$4
534 srl $1,32,$3
535 sll $1,32,$1
536 addq $4,$3,$4
537 cmpult $10,$1,$2
538 subq $10,$1,$10
539 addq $2,$4,$2
540 cmpult $9,$2,$1
541 bis $2,$2,$4
542 beq $1,$134
543 addq $9,$11,$9
544 subq $27,1,$27
545$134:
546 subl $12,1,$12
547 subq $9,$4,$9
548 beq $12,$124
549 sll $27,32,$13
550 sll $9,32,$2
551 srl $10,32,$1
552 sll $10,32,$10
553 bis $2,$1,$9
554 br $31,$123
555 .align 4
556$124:
557 bis $13,$27,$0
558$136:
559 ldq $26,0($30)
560 ldq $9,8($30)
561 ldq $10,16($30)
562 ldq $11,24($30)
563 ldq $12,32($30)
564 ldq $13,40($30)
565 addq $30,48,$30
566 ret $31,($26),1
567 .end bn_div_words
568EOF
569 return($data);
570 }
571
diff --git a/src/lib/libcrypto/bn/asm/ca.pl b/src/lib/libcrypto/bn/asm/ca.pl
new file mode 100644
index 0000000000..c1ce67a6b4
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/ca.pl
@@ -0,0 +1,33 @@
1#!/usr/local/bin/perl
2# I have this in perl so I can use more usefull register names and then convert
3# them into alpha registers.
4#
5
6push(@INC,"perlasm","../../perlasm");
7require "alpha.pl";
8require "alpha/mul_add.pl";
9require "alpha/mul.pl";
10require "alpha/sqr.pl";
11require "alpha/add.pl";
12require "alpha/sub.pl";
13require "alpha/mul_c8.pl";
14require "alpha/mul_c4.pl";
15require "alpha/sqr_c4.pl";
16require "alpha/sqr_c8.pl";
17require "alpha/div.pl";
18
19&asm_init($ARGV[0],$0);
20
21&bn_mul_words("bn_mul_words");
22&bn_sqr_words("bn_sqr_words");
23&bn_mul_add_words("bn_mul_add_words");
24&bn_add_words("bn_add_words");
25&bn_sub_words("bn_sub_words");
26&bn_div_words("bn_div_words");
27&bn_mul_comba8("bn_mul_comba8");
28&bn_mul_comba4("bn_mul_comba4");
29&bn_sqr_comba4("bn_sqr_comba4");
30&bn_sqr_comba8("bn_sqr_comba8");
31
32&asm_finish();
33
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
new file mode 100644
index 0000000000..5d962cb957
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/co-586.pl
@@ -0,0 +1,286 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_comba("bn_mul_comba8",8);
9&bn_mul_comba("bn_mul_comba4",4);
10&bn_sqr_comba("bn_sqr_comba8",8);
11&bn_sqr_comba("bn_sqr_comba4",4);
12
13&asm_finish();
14
15sub mul_add_c
16 {
17 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
18
19 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
20 # words, and 1 if load return value
21
22 &comment("mul a[$ai]*b[$bi]");
23
24 # "eax" and "edx" will always be pre-loaded.
25 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
26 # &mov("edx",&DWP($bi*4,$b,"",0));
27
28 &mul("edx");
29 &add($c0,"eax");
30 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
31 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
32 ###
33 &adc($c1,"edx");
34 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
36 ###
37 &adc($c2,0);
38 # is pos > 1, it means it is the last loop
39 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
40 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
41 }
42
43sub sqr_add_c
44 {
45 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
46
47 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
48 # words, and 1 if load return value
49
50 &comment("sqr a[$ai]*a[$bi]");
51
52 # "eax" and "edx" will always be pre-loaded.
53 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
54 # &mov("edx",&DWP($bi*4,$b,"",0));
55
56 if ($ai == $bi)
57 { &mul("eax");}
58 else
59 { &mul("edx");}
60 &add($c0,"eax");
61 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
62 ###
63 &adc($c1,"edx");
64 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
65 ###
66 &adc($c2,0);
67 # is pos > 1, it means it is the last loop
68 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
69 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
70 }
71
72sub sqr_add_c2
73 {
74 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
75
76 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
77 # words, and 1 if load return value
78
79 &comment("sqr a[$ai]*a[$bi]");
80
81 # "eax" and "edx" will always be pre-loaded.
82 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
83 # &mov("edx",&DWP($bi*4,$a,"",0));
84
85 if ($ai == $bi)
86 { &mul("eax");}
87 else
88 { &mul("edx");}
89 &add("eax","eax");
90 ###
91 &adc("edx","edx");
92 ###
93 &adc($c2,0);
94 &add($c0,"eax");
95 &adc($c1,"edx");
96 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
98 &adc($c2,0);
99 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
100 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
101 ###
102 }
103
104sub bn_mul_comba
105 {
106 local($name,$num)=@_;
107 local($a,$b,$c0,$c1,$c2);
108 local($i,$as,$ae,$bs,$be,$ai,$bi);
109 local($tot,$end);
110
111 &function_begin_B($name,"");
112
113 $c0="ebx";
114 $c1="ecx";
115 $c2="ebp";
116 $a="esi";
117 $b="edi";
118
119 $as=0;
120 $ae=0;
121 $bs=0;
122 $be=0;
123 $tot=$num+$num-1;
124
125 &push("esi");
126 &mov($a,&wparam(1));
127 &push("edi");
128 &mov($b,&wparam(2));
129 &push("ebp");
130 &push("ebx");
131
132 &xor($c0,$c0);
133 &mov("eax",&DWP(0,$a,"",0)); # load the first word
134 &xor($c1,$c1);
135 &mov("edx",&DWP(0,$b,"",0)); # load the first second
136
137 for ($i=0; $i<$tot; $i++)
138 {
139 $ai=$as;
140 $bi=$bs;
141 $end=$be+1;
142
143 &comment("################## Calculate word $i");
144
145 for ($j=$bs; $j<$end; $j++)
146 {
147 &xor($c2,$c2) if ($j == $bs);
148 if (($j+1) == $end)
149 {
150 $v=1;
151 $v=2 if (($i+1) == $tot);
152 }
153 else
154 { $v=0; }
155 if (($j+1) != $end)
156 {
157 $na=($ai-1);
158 $nb=($bi+1);
159 }
160 else
161 {
162 $na=$as+($i < ($num-1));
163 $nb=$bs+($i >= ($num-1));
164 }
165#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
166 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
167 if ($v)
168 {
169 &comment("saved r[$i]");
170 # &mov("eax",&wparam(0));
171 # &mov(&DWP($i*4,"eax","",0),$c0);
172 ($c0,$c1,$c2)=($c1,$c2,$c0);
173 }
174 $ai--;
175 $bi++;
176 }
177 $as++ if ($i < ($num-1));
178 $ae++ if ($i >= ($num-1));
179
180 $bs++ if ($i >= ($num-1));
181 $be++ if ($i < ($num-1));
182 }
183 &comment("save r[$i]");
184 # &mov("eax",&wparam(0));
185 &mov(&DWP($i*4,"eax","",0),$c0);
186
187 &pop("ebx");
188 &pop("ebp");
189 &pop("edi");
190 &pop("esi");
191 &ret();
192 &function_end_B($name);
193 }
194
195sub bn_sqr_comba
196 {
197 local($name,$num)=@_;
198 local($r,$a,$c0,$c1,$c2)=@_;
199 local($i,$as,$ae,$bs,$be,$ai,$bi);
200 local($b,$tot,$end,$half);
201
202 &function_begin_B($name,"");
203
204 $c0="ebx";
205 $c1="ecx";
206 $c2="ebp";
207 $a="esi";
208 $r="edi";
209
210 &push("esi");
211 &push("edi");
212 &push("ebp");
213 &push("ebx");
214 &mov($r,&wparam(0));
215 &mov($a,&wparam(1));
216 &xor($c0,$c0);
217 &xor($c1,$c1);
218 &mov("eax",&DWP(0,$a,"",0)); # load the first word
219
220 $as=0;
221 $ae=0;
222 $bs=0;
223 $be=0;
224 $tot=$num+$num-1;
225
226 for ($i=0; $i<$tot; $i++)
227 {
228 $ai=$as;
229 $bi=$bs;
230 $end=$be+1;
231
232 &comment("############### Calculate word $i");
233 for ($j=$bs; $j<$end; $j++)
234 {
235 &xor($c2,$c2) if ($j == $bs);
236 if (($ai-1) < ($bi+1))
237 {
238 $v=1;
239 $v=2 if ($i+1) == $tot;
240 }
241 else
242 { $v=0; }
243 if (!$v)
244 {
245 $na=$ai-1;
246 $nb=$bi+1;
247 }
248 else
249 {
250 $na=$as+($i < ($num-1));
251 $nb=$bs+($i >= ($num-1));
252 }
253 if ($ai == $bi)
254 {
255 &sqr_add_c($r,$a,$ai,$bi,
256 $c0,$c1,$c2,$v,$i,$na,$nb);
257 }
258 else
259 {
260 &sqr_add_c2($r,$a,$ai,$bi,
261 $c0,$c1,$c2,$v,$i,$na,$nb);
262 }
263 if ($v)
264 {
265 &comment("saved r[$i]");
266 #&mov(&DWP($i*4,$r,"",0),$c0);
267 ($c0,$c1,$c2)=($c1,$c2,$c0);
268 last;
269 }
270 $ai--;
271 $bi++;
272 }
273 $as++ if ($i < ($num-1));
274 $ae++ if ($i >= ($num-1));
275
276 $bs++ if ($i >= ($num-1));
277 $be++ if ($i < ($num-1));
278 }
279 &mov(&DWP($i*4,$r,"",0),$c0);
280 &pop("ebx");
281 &pop("ebp");
282 &pop("edi");
283 &pop("esi");
284 &ret();
285 &function_end_B($name);
286 }
diff --git a/src/lib/libcrypto/bn/asm/co-alpha.pl b/src/lib/libcrypto/bn/asm/co-alpha.pl
new file mode 100644
index 0000000000..67dad3e3d5
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/co-alpha.pl
@@ -0,0 +1,116 @@
1#!/usr/local/bin/perl
2# I have this in perl so I can use more usefull register names and then convert
3# them into alpha registers.
4#
5
6push(@INC,"perlasm","../../perlasm");
7require "alpha.pl";
8
9&asm_init($ARGV[0],$0);
10
11print &bn_sub_words("bn_sub_words");
12
13&asm_finish();
14
15sub bn_sub_words
16 {
17 local($name)=@_;
18 local($cc,$a,$b,$r);
19
20 $cc="r0";
21 $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13";
22 $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14";
23 $a2="r3"; $b2="r7"; $r2="r11";
24 $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15";
25
26 $rp=&wparam(0);
27 $ap=&wparam(1);
28 $bp=&wparam(2);
29 $count=&wparam(3);
30
31 &function_begin($name,"");
32
33 &comment("");
34 &sub($count,4,$count);
35 &mov("zero",$cc);
36 &blt($count,&label("finish"));
37
38 &ld($a0,&QWPw(0,$ap));
39 &ld($b0,&QWPw(0,$bp));
40
41##########################################################
42 &set_label("loop");
43
44 &ld($a1,&QWPw(1,$ap));
45 &cmpult($a0,$b0,$tmp); # will we borrow?
46 &ld($b1,&QWPw(1,$bp));
47 &sub($a0,$b0,$a0); # do the subtract
48 &ld($a2,&QWPw(2,$ap));
49 &cmpult($a0,$cc,$b0); # will we borrow?
50 &ld($b2,&QWPw(2,$bp));
51 &sub($a0,$cc,$a0); # will we borrow?
52 &ld($a3,&QWPw(3,$ap));
53 &add($b0,$tmp,$cc); # add the borrows
54
55 &cmpult($a1,$b1,$t1); # will we borrow?
56 &sub($a1,$b1,$a1); # do the subtract
57 &ld($b3,&QWPw(3,$bp));
58 &cmpult($a1,$cc,$b1); # will we borrow?
59 &sub($a1,$cc,$a1); # will we borrow?
60 &add($b1,$t1,$cc); # add the borrows
61
62 &cmpult($a2,$b2,$tmp); # will we borrow?
63 &sub($a2,$b2,$a2); # do the subtract
64 &st($a0,&QWPw(0,$rp)); # save
65 &cmpult($a2,$cc,$b2); # will we borrow?
66 &sub($a2,$cc,$a2); # will we borrow?
67 &add($b2,$tmp,$cc); # add the borrows
68
69 &cmpult($a3,$b3,$t3); # will we borrow?
70 &sub($a3,$b3,$a3); # do the subtract
71 &st($a1,&QWPw(1,$rp)); # save
72 &cmpult($a3,$cc,$b3); # will we borrow?
73 &sub($a3,$cc,$a3); # will we borrow?
74 &add($b3,$t3,$cc); # add the borrows
75
76 &st($a2,&QWPw(2,$rp)); # save
77 &sub($count,4,$count); # count-=4
78 &st($a3,&QWPw(3,$rp)); # save
79 &add($ap,4*$QWS,$ap); # count+=4
80 &add($bp,4*$QWS,$bp); # count+=4
81 &add($rp,4*$QWS,$rp); # count+=4
82
83 &blt($count,&label("finish"));
84 &ld($a0,&QWPw(0,$ap));
85 &ld($b0,&QWPw(0,$bp));
86 &br(&label("loop"));
87##################################################
88 # Do the last 0..3 words
89
90 &set_label("last_loop");
91
92 &ld($a0,&QWPw(0,$ap)); # get a
93 &ld($b0,&QWPw(0,$bp)); # get b
94 &cmpult($a0,$b0,$tmp); # will we borrow?
95 &sub($a0,$b0,$a0); # do the subtract
96 &cmpult($a0,$cc,$b0); # will we borrow?
97 &sub($a0,$cc,$a0); # will we borrow?
98 &st($a0,&QWPw(0,$rp)); # save
99 &add($b0,$tmp,$cc); # add the borrows
100
101 &add($ap,$QWS,$ap);
102 &add($bp,$QWS,$bp);
103 &add($rp,$QWS,$rp);
104 &sub($count,1,$count);
105 &bgt($count,&label("last_loop"));
106 &function_end_A($name);
107
108######################################################
109 &set_label("finish");
110 &add($count,4,$count);
111 &bgt($count,&label("last_loop"));
112
113 &set_label("end");
114 &function_end($name);
115 }
116
diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S
new file mode 100644
index 0000000000..7b82b820e6
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/ia64.S
@@ -0,0 +1,1560 @@
1.explicit
2.text
3.ident "ia64.S, Version 2.1"
4.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6//
7// ====================================================================
8// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9// project.
10//
11// Rights for redistribution and usage in source and binary forms are
12// granted according to the OpenSSL license. Warranty of any kind is
13// disclaimed.
14// ====================================================================
15//
16// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
17// different from Itanium to this module viewpoint. Most notably, is it
18// "wider" than Itanium? Can you experience loop scalability as
19// discussed in commentary sections? Not really:-( Itanium2 has 6
20// integer ALU ports, i.e. it's 2 ports wider, but it's not enough to
21// spin twice as fast, as I need 8 IALU ports. Amount of floating point
22// ports is the same, i.e. 2, while I need 4. In other words, to this
23// module Itanium2 remains effectively as "wide" as Itanium. Yet it's
24// essentially different in respect to this module, and a re-tune was
25// required. Well, because some intruction latencies has changed. Most
26// noticeably those intensively used:
27//
28// Itanium Itanium2
29// ldf8 9 6 L2 hit
30// ld8 2 1 L1 hit
31// getf 2 5
32// xma[->getf] 7[+1] 4[+0]
33// add[->st8] 1[+1] 1[+0]
34//
35// What does it mean? You might ratiocinate that the original code
36// should run just faster... Because sum of latencies is smaller...
37// Wrong! Note that getf latency increased. This means that if a loop is
38// scheduled for lower latency (as they were), then it will suffer from
39// stall condition and the code will therefore turn anti-scalable, e.g.
40// original bn_mul_words spun at 5*n or 2.5 times slower than expected
41// on Itanium2! What to do? Reschedule loops for Itanium2? But then
42// Itanium would exhibit anti-scalability. So I've chosen to reschedule
43// for worst latency for every instruction aiming for best *all-round*
44// performance.
45
46// Q. How much faster does it get?
47// A. Here is the output from 'openssl speed rsa dsa' for vanilla
48// 0.9.6a compiled with gcc version 2.96 20000731 (Red Hat
49// Linux 7.1 2.96-81):
50//
51// sign verify sign/s verify/s
52// rsa 512 bits 0.0036s 0.0003s 275.3 2999.2
53// rsa 1024 bits 0.0203s 0.0011s 49.3 894.1
54// rsa 2048 bits 0.1331s 0.0040s 7.5 250.9
55// rsa 4096 bits 0.9270s 0.0147s 1.1 68.1
56// sign verify sign/s verify/s
57// dsa 512 bits 0.0035s 0.0043s 288.3 234.8
58// dsa 1024 bits 0.0111s 0.0135s 90.0 74.2
59//
60// And here is similar output but for this assembler
61// implementation:-)
62//
63// sign verify sign/s verify/s
64// rsa 512 bits 0.0021s 0.0001s 549.4 9638.5
65// rsa 1024 bits 0.0055s 0.0002s 183.8 4481.1
66// rsa 2048 bits 0.0244s 0.0006s 41.4 1726.3
67// rsa 4096 bits 0.1295s 0.0018s 7.7 561.5
68// sign verify sign/s verify/s
69// dsa 512 bits 0.0012s 0.0013s 891.9 756.6
70// dsa 1024 bits 0.0023s 0.0028s 440.4 376.2
71//
72// Yes, you may argue that it's not fair comparison as it's
73// possible to craft the C implementation with BN_UMULT_HIGH
74// inline assembler macro. But of course! Here is the output
75// with the macro:
76//
77// sign verify sign/s verify/s
78// rsa 512 bits 0.0020s 0.0002s 495.0 6561.0
79// rsa 1024 bits 0.0086s 0.0004s 116.2 2235.7
80// rsa 2048 bits 0.0519s 0.0015s 19.3 667.3
81// rsa 4096 bits 0.3464s 0.0053s 2.9 187.7
82// sign verify sign/s verify/s
83// dsa 512 bits 0.0016s 0.0020s 613.1 510.5
84// dsa 1024 bits 0.0045s 0.0054s 221.0 183.9
85//
86// My code is still way faster, huh:-) And I believe that even
87// higher performance can be achieved. Note that as keys get
88// longer, performance gain is larger. Why? According to the
89// profiler there is another player in the field, namely
90// BN_from_montgomery consuming larger and larger portion of CPU
91// time as keysize decreases. I therefore consider putting effort
92// to assembler implementation of the following routine:
93//
94// void bn_mul_add_mont (BN_ULONG *rp,BN_ULONG *np,int nl,BN_ULONG n0)
95// {
96// int i,j;
97// BN_ULONG v;
98//
99// for (i=0; i<nl; i++)
100// {
101// v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
102// nrp++;
103// rp++;
104// if (((nrp[-1]+=v)&BN_MASK2) < v)
105// for (j=0; ((++nrp[j])&BN_MASK2) == 0; j++) ;
106// }
107// }
108//
109// It might as well be beneficial to implement even combaX
110// variants, as it appears as it can literally unleash the
111// performance (see comment section to bn_mul_comba8 below).
112//
113// And finally for your reference the output for 0.9.6a compiled
114// with SGIcc version 0.01.0-12 (keep in mind that for the moment
115// of this writing it's not possible to convince SGIcc to use
116// BN_UMULT_HIGH inline assembler macro, yet the code is fast,
117// i.e. for a compiler generated one:-):
118//
119// sign verify sign/s verify/s
120// rsa 512 bits 0.0022s 0.0002s 452.7 5894.3
121// rsa 1024 bits 0.0097s 0.0005s 102.7 2002.9
122// rsa 2048 bits 0.0578s 0.0017s 17.3 600.2
123// rsa 4096 bits 0.3838s 0.0061s 2.6 164.5
124// sign verify sign/s verify/s
125// dsa 512 bits 0.0018s 0.0022s 547.3 459.6
126// dsa 1024 bits 0.0051s 0.0062s 196.6 161.3
127//
128// Oh! Benchmarks were performed on 733MHz Lion-class Itanium
129// system running Redhat Linux 7.1 (very special thanks to Ray
130// McCaffity of Williams Communications for providing an account).
131//
132// Q. What's the heck with 'rum 1<<5' at the end of every function?
133// A. Well, by clearing the "upper FP registers written" bit of the
134// User Mask I want to excuse the kernel from preserving upper
135// (f32-f128) FP register bank over process context switch, thus
136// minimizing bus bandwidth consumption during the switch (i.e.
137// after PKI opration completes and the program is off doing
138// something else like bulk symmetric encryption). Having said
139// this, I also want to point out that it might be good idea
140// to compile the whole toolkit (as well as majority of the
141// programs for that matter) with -mfixed-range=f32-f127 command
142// line option. No, it doesn't prevent the compiler from writing
143// to upper bank, but at least discourages to do so. If you don't
144// like the idea you have the option to compile the module with
145// -Drum=nop.m in command line.
146//
147
148#if defined(_HPUX_SOURCE) && !defined(_LP64)
149#define ADDP addp4
150#else
151#define ADDP add
152#endif
153
154#if 1
155//
156// bn_[add|sub]_words routines.
157//
158// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
159// data reside in L1 cache, i.e. 2 ticks away). It's possible to
160// compress the epilogue and get down to 2*n+6, but at the cost of
161// scalability (the neat feature of this implementation is that it
162// shall automagically spin in n+5 on "wider" IA-64 implementations:-)
163// I consider that the epilogue is short enough as it is to trade tiny
164// performance loss on Itanium for scalability.
165//
166// BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
167//
168.global bn_add_words#
169.proc bn_add_words#
170.align 64
171.skip 32 // makes the loop body aligned at 64-byte boundary
172bn_add_words:
173 .prologue
174 .fframe 0
175 .save ar.pfs,r2
176{ .mii; alloc r2=ar.pfs,4,12,0,16
177 cmp4.le p6,p0=r35,r0 };;
178{ .mfb; mov r8=r0 // return value
179(p6) br.ret.spnt.many b0 };;
180
181 .save ar.lc,r3
182{ .mib; sub r10=r35,r0,1
183 mov r3=ar.lc
184 brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16
185 }
186 .body
187{ .mib; ADDP r14=0,r32 // rp
188 mov r9=pr };;
189{ .mii; ADDP r15=0,r33 // ap
190 mov ar.lc=r10
191 mov ar.ec=6 }
192{ .mib; ADDP r16=0,r34 // bp
193 mov pr.rot=1<<16 };;
194
195.L_bn_add_words_ctop:
196{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
197 (p18) add r39=r37,r34
198 (p19) cmp.ltu.unc p56,p0=r40,r38 }
199{ .mfb; (p0) nop.m 0x0
200 (p0) nop.f 0x0
201 (p0) nop.b 0x0 }
202{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
203 (p58) cmp.eq.or p57,p0=-1,r41 // (p20)
204 (p58) add r41=1,r41 } // (p20)
205{ .mfb; (p21) st8 [r14]=r42,8 // *(rp++)=r
206 (p0) nop.f 0x0
207 br.ctop.sptk .L_bn_add_words_ctop };;
208.L_bn_add_words_cend:
209
210{ .mii;
211(p59) add r8=1,r8 // return value
212 mov pr=r9,0x1ffff
213 mov ar.lc=r3 }
214{ .mbb; nop.b 0x0
215 br.ret.sptk.many b0 };;
216.endp bn_add_words#
217
218//
219// BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
220//
221.global bn_sub_words#
222.proc bn_sub_words#
223.align 64
224.skip 32 // makes the loop body aligned at 64-byte boundary
225bn_sub_words:
226 .prologue
227 .fframe 0
228 .save ar.pfs,r2
229{ .mii; alloc r2=ar.pfs,4,12,0,16
230 cmp4.le p6,p0=r35,r0 };;
231{ .mfb; mov r8=r0 // return value
232(p6) br.ret.spnt.many b0 };;
233
234 .save ar.lc,r3
235{ .mib; sub r10=r35,r0,1
236 mov r3=ar.lc
237 brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16
238 }
239 .body
240{ .mib; ADDP r14=0,r32 // rp
241 mov r9=pr };;
242{ .mii; ADDP r15=0,r33 // ap
243 mov ar.lc=r10
244 mov ar.ec=6 }
245{ .mib; ADDP r16=0,r34 // bp
246 mov pr.rot=1<<16 };;
247
248.L_bn_sub_words_ctop:
249{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
250 (p18) sub r39=r37,r34
251 (p19) cmp.gtu.unc p56,p0=r40,r38 }
252{ .mfb; (p0) nop.m 0x0
253 (p0) nop.f 0x0
254 (p0) nop.b 0x0 }
255{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
256 (p58) cmp.eq.or p57,p0=0,r41 // (p20)
257 (p58) add r41=-1,r41 } // (p20)
258{ .mbb; (p21) st8 [r14]=r42,8 // *(rp++)=r
259 (p0) nop.b 0x0
260 br.ctop.sptk .L_bn_sub_words_ctop };;
261.L_bn_sub_words_cend:
262
263{ .mii;
264(p59) add r8=1,r8 // return value
265 mov pr=r9,0x1ffff
266 mov ar.lc=r3 }
267{ .mbb; nop.b 0x0
268 br.ret.sptk.many b0 };;
269.endp bn_sub_words#
270#endif
271
272#if 0
273#define XMA_TEMPTATION
274#endif
275
276#if 1
277//
278// BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
279//
280.global bn_mul_words#
281.proc bn_mul_words#
282.align 64
283.skip 32 // makes the loop body aligned at 64-byte boundary
284bn_mul_words:
285 .prologue
286 .fframe 0
287 .save ar.pfs,r2
288#ifdef XMA_TEMPTATION
289{ .mfi; alloc r2=ar.pfs,4,0,0,0 };;
290#else
291{ .mfi; alloc r2=ar.pfs,4,12,0,16 };;
292#endif
293{ .mib; mov r8=r0 // return value
294 cmp4.le p6,p0=r34,r0
295(p6) br.ret.spnt.many b0 };;
296
297 .save ar.lc,r3
298{ .mii; sub r10=r34,r0,1
299 mov r3=ar.lc
300 mov r9=pr };;
301
302 .body
303{ .mib; setf.sig f8=r35 // w
304 mov pr.rot=0x800001<<16
305 // ------^----- serves as (p50) at first (p27)
306 brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16
307 }
308
309#ifndef XMA_TEMPTATION
310
311{ .mmi; ADDP r14=0,r32 // rp
312 ADDP r15=0,r33 // ap
313 mov ar.lc=r10 }
314{ .mmi; mov r40=0 // serves as r35 at first (p27)
315 mov ar.ec=13 };;
316
317// This loop spins in 2*(n+12) ticks. It's scheduled for data in Itanium
318// L2 cache (i.e. 9 ticks away) as floating point load/store instructions
319// bypass L1 cache and L2 latency is actually best-case scenario for
320// ldf8. The loop is not scalable and shall run in 2*(n+12) even on
321// "wider" IA-64 implementations. It's a trade-off here. n+24 loop
322// would give us ~5% in *overall* performance improvement on "wider"
323// IA-64, but would hurt Itanium for about same because of longer
324// epilogue. As it's a matter of few percents in either case I've
325// chosen to trade the scalability for development time (you can see
326// this very instruction sequence in bn_mul_add_words loop which in
327// turn is scalable).
328.L_bn_mul_words_ctop:
329{ .mfi; (p25) getf.sig r36=f52 // low
330 (p21) xmpy.lu f48=f37,f8
331 (p28) cmp.ltu p54,p50=r41,r39 }
332{ .mfi; (p16) ldf8 f32=[r15],8
333 (p21) xmpy.hu f40=f37,f8
334 (p0) nop.i 0x0 };;
335{ .mii; (p25) getf.sig r32=f44 // high
336 .pred.rel "mutex",p50,p54
337 (p50) add r40=r38,r35 // (p27)
338 (p54) add r40=r38,r35,1 } // (p27)
339{ .mfb; (p28) st8 [r14]=r41,8
340 (p0) nop.f 0x0
341 br.ctop.sptk .L_bn_mul_words_ctop };;
342.L_bn_mul_words_cend:
343
344{ .mii; nop.m 0x0
345.pred.rel "mutex",p51,p55
346(p51) add r8=r36,r0
347(p55) add r8=r36,r0,1 }
348{ .mfb; nop.m 0x0
349 nop.f 0x0
350 nop.b 0x0 }
351
352#else // XMA_TEMPTATION
353
354 setf.sig f37=r0 // serves as carry at (p18) tick
355 mov ar.lc=r10
356 mov ar.ec=5;;
357
358// Most of you examining this code very likely wonder why in the name
359// of Intel the following loop is commented out? Indeed, it looks so
360// neat that you find it hard to believe that it's something wrong
361// with it, right? The catch is that every iteration depends on the
362// result from previous one and the latter isn't available instantly.
363// The loop therefore spins at the latency of xma minus 1, or in other
364// words at 6*(n+4) ticks:-( Compare to the "production" loop above
365// that runs in 2*(n+11) where the low latency problem is worked around
366// by moving the dependency to one-tick latent interger ALU. Note that
367// "distance" between ldf8 and xma is not latency of ldf8, but the
368// *difference* between xma and ldf8 latencies.
369.L_bn_mul_words_ctop:
370{ .mfi; (p16) ldf8 f32=[r33],8
371 (p18) xma.hu f38=f34,f8,f39 }
372{ .mfb; (p20) stf8 [r32]=f37,8
373 (p18) xma.lu f35=f34,f8,f39
374 br.ctop.sptk .L_bn_mul_words_ctop };;
375.L_bn_mul_words_cend:
376
377 getf.sig r8=f41 // the return value
378
379#endif // XMA_TEMPTATION
380
381{ .mii; nop.m 0x0
382 mov pr=r9,0x1ffff
383 mov ar.lc=r3 }
384{ .mfb; rum 1<<5 // clear um.mfh
385 nop.f 0x0
386 br.ret.sptk.many b0 };;
387.endp bn_mul_words#
388#endif
389
390#if 1
391//
392// BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
393//
394.global bn_mul_add_words#
395.proc bn_mul_add_words#
396.align 64
397.skip 48 // makes the loop body aligned at 64-byte boundary
398bn_mul_add_words:
399 .prologue
400 .fframe 0
401 .save ar.pfs,r2
402 .save ar.lc,r3
403 .save pr,r9
404{ .mmi; alloc r2=ar.pfs,4,4,0,8
405 cmp4.le p6,p0=r34,r0
406 mov r3=ar.lc };;
407{ .mib; mov r8=r0 // return value
408 sub r10=r34,r0,1
409(p6) br.ret.spnt.many b0 };;
410
411 .body
412{ .mib; setf.sig f8=r35 // w
413 mov r9=pr
414 brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16
415 }
416{ .mmi; ADDP r14=0,r32 // rp
417 ADDP r15=0,r33 // ap
418 mov ar.lc=r10 }
419{ .mii; ADDP r16=0,r32 // rp copy
420 mov pr.rot=0x2001<<16
421 // ------^----- serves as (p40) at first (p27)
422 mov ar.ec=11 };;
423
424// This loop spins in 3*(n+10) ticks on Itanium and in 2*(n+10) on
425// Itanium 2. Yes, unlike previous versions it scales:-) Previous
426// version was peforming *all* additions in IALU and was starving
427// for those even on Itanium 2. In this version one addition is
428// moved to FPU and is folded with multiplication. This is at cost
429// of propogating the result from previous call to this subroutine
430// to L2 cache... In other words negligible even for shorter keys.
431// *Overall* performance improvement [over previous version] varies
432// from 11 to 22 percent depending on key length.
433.L_bn_mul_add_words_ctop:
434.pred.rel "mutex",p40,p42
435{ .mfi; (p23) getf.sig r36=f45 // low
436 (p20) xma.lu f42=f36,f8,f50 // low
437 (p40) add r39=r39,r35 } // (p27)
438{ .mfi; (p16) ldf8 f32=[r15],8 // *(ap++)
439 (p20) xma.hu f36=f36,f8,f50 // high
440 (p42) add r39=r39,r35,1 };; // (p27)
441{ .mmi; (p24) getf.sig r32=f40 // high
442 (p16) ldf8 f46=[r16],8 // *(rp1++)
443 (p40) cmp.ltu p41,p39=r39,r35 } // (p27)
444{ .mib; (p26) st8 [r14]=r39,8 // *(rp2++)
445 (p42) cmp.leu p41,p39=r39,r35 // (p27)
446 br.ctop.sptk .L_bn_mul_add_words_ctop};;
447.L_bn_mul_add_words_cend:
448
449{ .mmi; .pred.rel "mutex",p40,p42
450(p40) add r8=r35,r0
451(p42) add r8=r35,r0,1
452 mov pr=r9,0x1ffff }
453{ .mib; rum 1<<5 // clear um.mfh
454 mov ar.lc=r3
455 br.ret.sptk.many b0 };;
456.endp bn_mul_add_words#
457#endif
458
459#if 1
460//
461// void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
462//
463.global bn_sqr_words#
464.proc bn_sqr_words#
465.align 64
466.skip 32 // makes the loop body aligned at 64-byte boundary
467bn_sqr_words:
468 .prologue
469 .fframe 0
470 .save ar.pfs,r2
471{ .mii; alloc r2=ar.pfs,3,0,0,0
472 sxt4 r34=r34 };;
473{ .mii; cmp.le p6,p0=r34,r0
474 mov r8=r0 } // return value
475{ .mfb; ADDP r32=0,r32
476 nop.f 0x0
477(p6) br.ret.spnt.many b0 };;
478
479 .save ar.lc,r3
480{ .mii; sub r10=r34,r0,1
481 mov r3=ar.lc
482 mov r9=pr };;
483
484 .body
485{ .mib; ADDP r33=0,r33
486 mov pr.rot=1<<16
487 brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16
488 }
489{ .mii; add r34=8,r32
490 mov ar.lc=r10
491 mov ar.ec=18 };;
492
493// 2*(n+17) on Itanium, (n+17) on "wider" IA-64 implementations. It's
494// possible to compress the epilogue (I'm getting tired to write this
495// comment over and over) and get down to 2*n+16 at the cost of
496// scalability. The decision will very likely be reconsidered after the
497// benchmark program is profiled. I.e. if perfomance gain on Itanium
498// will appear larger than loss on "wider" IA-64, then the loop should
499// be explicitely split and the epilogue compressed.
500.L_bn_sqr_words_ctop:
501{ .mfi; (p16) ldf8 f32=[r33],8
502 (p25) xmpy.lu f42=f41,f41
503 (p0) nop.i 0x0 }
504{ .mib; (p33) stf8 [r32]=f50,16
505 (p0) nop.i 0x0
506 (p0) nop.b 0x0 }
507{ .mfi; (p0) nop.m 0x0
508 (p25) xmpy.hu f52=f41,f41
509 (p0) nop.i 0x0 }
510{ .mib; (p33) stf8 [r34]=f60,16
511 (p0) nop.i 0x0
512 br.ctop.sptk .L_bn_sqr_words_ctop };;
513.L_bn_sqr_words_cend:
514
515{ .mii; nop.m 0x0
516 mov pr=r9,0x1ffff
517 mov ar.lc=r3 }
518{ .mfb; rum 1<<5 // clear um.mfh
519 nop.f 0x0
520 br.ret.sptk.many b0 };;
521.endp bn_sqr_words#
522#endif
523
524#if 1
525// Apparently we win nothing by implementing special bn_sqr_comba8.
526// Yes, it is possible to reduce the number of multiplications by
527// almost factor of two, but then the amount of additions would
528// increase by factor of two (as we would have to perform those
529// otherwise performed by xma ourselves). Normally we would trade
530// anyway as multiplications are way more expensive, but not this
531// time... Multiplication kernel is fully pipelined and as we drain
532// one 128-bit multiplication result per clock cycle multiplications
533// are effectively as inexpensive as additions. Special implementation
534// might become of interest for "wider" IA-64 implementation as you'll
535// be able to get through the multiplication phase faster (there won't
536// be any stall issues as discussed in the commentary section below and
537// you therefore will be able to employ all 4 FP units)... But these
538// Itanium days it's simply too hard to justify the effort so I just
539// drop down to bn_mul_comba8 code:-)
540//
541// void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
542//
543.global bn_sqr_comba8#
544.proc bn_sqr_comba8#
545.align 64
546bn_sqr_comba8:
547 .prologue
548 .fframe 0
549 .save ar.pfs,r2
550#if defined(_HPUX_SOURCE) && !defined(_LP64)
551{ .mii; alloc r2=ar.pfs,2,1,0,0
552 addp4 r33=0,r33
553 addp4 r32=0,r32 };;
554{ .mii;
555#else
556{ .mii; alloc r2=ar.pfs,2,1,0,0
557#endif
558 mov r34=r33
559 add r14=8,r33 };;
560 .body
561{ .mii; add r17=8,r34
562 add r15=16,r33
563 add r18=16,r34 }
564{ .mfb; add r16=24,r33
565 br .L_cheat_entry_point8 };;
566.endp bn_sqr_comba8#
567#endif
568
569#if 1
570// I've estimated this routine to run in ~120 ticks, but in reality
571// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
572// cycles consumed for instructions fetch? Or did I misinterpret some
573// clause in Itanium µ-architecture manual? Comments are welcomed and
574// highly appreciated.
575//
576// On Itanium 2 it takes ~190 ticks. This is because of stalls on
577// result from getf.sig. I do nothing about it at this point for
578// reasons depicted below.
579//
580// However! It should be noted that even 160 ticks is darn good result
581// as it's over 10 (yes, ten, spelled as t-e-n) times faster than the
582// C version (compiled with gcc with inline assembler). I really
583// kicked compiler's butt here, didn't I? Yeah! This brings us to the
584// following statement. It's damn shame that this routine isn't called
585// very often nowadays! According to the profiler most CPU time is
586// consumed by bn_mul_add_words called from BN_from_montgomery. In
587// order to estimate what we're missing, I've compared the performance
588// of this routine against "traditional" implementation, i.e. against
589// following routine:
590//
591// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
592// { r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
593// r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
594// r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
595// r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
596// r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
597// r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
598// r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
599// r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
600// }
601//
602// The one below is over 8 times faster than the one above:-( Even
603// more reasons to "combafy" bn_mul_add_mont...
604//
605// And yes, this routine really made me wish there were an optimizing
606// assembler! It also feels like it deserves a dedication.
607//
608// To my wife for being there and to my kids...
609//
610// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
611//
612#define carry1 r14
613#define carry2 r15
614#define carry3 r34
615.global bn_mul_comba8#
616.proc bn_mul_comba8#
617.align 64
618bn_mul_comba8:
619 .prologue
620 .fframe 0
621 .save ar.pfs,r2
622#if defined(_HPUX_SOURCE) && !defined(_LP64)
623{ .mii; alloc r2=ar.pfs,3,0,0,0
624 addp4 r33=0,r33
625 addp4 r34=0,r34 };;
626{ .mii; addp4 r32=0,r32
627#else
628{ .mii; alloc r2=ar.pfs,3,0,0,0
629#endif
630 add r14=8,r33
631 add r17=8,r34 }
632 .body
633{ .mii; add r15=16,r33
634 add r18=16,r34
635 add r16=24,r33 }
636.L_cheat_entry_point8:
637{ .mmi; add r19=24,r34
638
639 ldf8 f32=[r33],32 };;
640
641{ .mmi; ldf8 f120=[r34],32
642 ldf8 f121=[r17],32 }
643{ .mmi; ldf8 f122=[r18],32
644 ldf8 f123=[r19],32 };;
645{ .mmi; ldf8 f124=[r34]
646 ldf8 f125=[r17] }
647{ .mmi; ldf8 f126=[r18]
648 ldf8 f127=[r19] }
649
650{ .mmi; ldf8 f33=[r14],32
651 ldf8 f34=[r15],32 }
652{ .mmi; ldf8 f35=[r16],32;;
653 ldf8 f36=[r33] }
654{ .mmi; ldf8 f37=[r14]
655 ldf8 f38=[r15] }
656{ .mfi; ldf8 f39=[r16]
657// -------\ Entering multiplier's heaven /-------
658// ------------\ /------------
659// -----------------\ /-----------------
660// ----------------------\/----------------------
661 xma.hu f41=f32,f120,f0 }
662{ .mfi; xma.lu f40=f32,f120,f0 };; // (*)
663{ .mfi; xma.hu f51=f32,f121,f0 }
664{ .mfi; xma.lu f50=f32,f121,f0 };;
665{ .mfi; xma.hu f61=f32,f122,f0 }
666{ .mfi; xma.lu f60=f32,f122,f0 };;
667{ .mfi; xma.hu f71=f32,f123,f0 }
668{ .mfi; xma.lu f70=f32,f123,f0 };;
669{ .mfi; xma.hu f81=f32,f124,f0 }
670{ .mfi; xma.lu f80=f32,f124,f0 };;
671{ .mfi; xma.hu f91=f32,f125,f0 }
672{ .mfi; xma.lu f90=f32,f125,f0 };;
673{ .mfi; xma.hu f101=f32,f126,f0 }
674{ .mfi; xma.lu f100=f32,f126,f0 };;
675{ .mfi; xma.hu f111=f32,f127,f0 }
676{ .mfi; xma.lu f110=f32,f127,f0 };;//
677// (*) You can argue that splitting at every second bundle would
678// prevent "wider" IA-64 implementations from achieving the peak
679// performance. Well, not really... The catch is that if you
680// intend to keep 4 FP units busy by splitting at every fourth
681// bundle and thus perform these 16 multiplications in 4 ticks,
682// the first bundle *below* would stall because the result from
683// the first xma bundle *above* won't be available for another 3
684// ticks (if not more, being an optimist, I assume that "wider"
685// implementation will have same latency:-). This stall will hold
686// you back and the performance would be as if every second bundle
687// were split *anyway*...
688{ .mfi; getf.sig r16=f40
689 xma.hu f42=f33,f120,f41
690 add r33=8,r32 }
691{ .mfi; xma.lu f41=f33,f120,f41 };;
692{ .mfi; getf.sig r24=f50
693 xma.hu f52=f33,f121,f51 }
694{ .mfi; xma.lu f51=f33,f121,f51 };;
695{ .mfi; st8 [r32]=r16,16
696 xma.hu f62=f33,f122,f61 }
697{ .mfi; xma.lu f61=f33,f122,f61 };;
698{ .mfi; xma.hu f72=f33,f123,f71 }
699{ .mfi; xma.lu f71=f33,f123,f71 };;
700{ .mfi; xma.hu f82=f33,f124,f81 }
701{ .mfi; xma.lu f81=f33,f124,f81 };;
702{ .mfi; xma.hu f92=f33,f125,f91 }
703{ .mfi; xma.lu f91=f33,f125,f91 };;
704{ .mfi; xma.hu f102=f33,f126,f101 }
705{ .mfi; xma.lu f101=f33,f126,f101 };;
706{ .mfi; xma.hu f112=f33,f127,f111 }
707{ .mfi; xma.lu f111=f33,f127,f111 };;//
708//-------------------------------------------------//
709{ .mfi; getf.sig r25=f41
710 xma.hu f43=f34,f120,f42 }
711{ .mfi; xma.lu f42=f34,f120,f42 };;
712{ .mfi; getf.sig r16=f60
713 xma.hu f53=f34,f121,f52 }
714{ .mfi; xma.lu f52=f34,f121,f52 };;
715{ .mfi; getf.sig r17=f51
716 xma.hu f63=f34,f122,f62
717 add r25=r25,r24 }
718{ .mfi; xma.lu f62=f34,f122,f62
719 mov carry1=0 };;
720{ .mfi; cmp.ltu p6,p0=r25,r24
721 xma.hu f73=f34,f123,f72 }
722{ .mfi; xma.lu f72=f34,f123,f72 };;
723{ .mfi; st8 [r33]=r25,16
724 xma.hu f83=f34,f124,f82
725(p6) add carry1=1,carry1 }
726{ .mfi; xma.lu f82=f34,f124,f82 };;
727{ .mfi; xma.hu f93=f34,f125,f92 }
728{ .mfi; xma.lu f92=f34,f125,f92 };;
729{ .mfi; xma.hu f103=f34,f126,f102 }
730{ .mfi; xma.lu f102=f34,f126,f102 };;
731{ .mfi; xma.hu f113=f34,f127,f112 }
732{ .mfi; xma.lu f112=f34,f127,f112 };;//
733//-------------------------------------------------//
734{ .mfi; getf.sig r18=f42
735 xma.hu f44=f35,f120,f43
736 add r17=r17,r16 }
737{ .mfi; xma.lu f43=f35,f120,f43 };;
738{ .mfi; getf.sig r24=f70
739 xma.hu f54=f35,f121,f53 }
740{ .mfi; mov carry2=0
741 xma.lu f53=f35,f121,f53 };;
742{ .mfi; getf.sig r25=f61
743 xma.hu f64=f35,f122,f63
744 cmp.ltu p7,p0=r17,r16 }
745{ .mfi; add r18=r18,r17
746 xma.lu f63=f35,f122,f63 };;
747{ .mfi; getf.sig r26=f52
748 xma.hu f74=f35,f123,f73
749(p7) add carry2=1,carry2 }
750{ .mfi; cmp.ltu p7,p0=r18,r17
751 xma.lu f73=f35,f123,f73
752 add r18=r18,carry1 };;
753{ .mfi;
754 xma.hu f84=f35,f124,f83
755(p7) add carry2=1,carry2 }
756{ .mfi; cmp.ltu p7,p0=r18,carry1
757 xma.lu f83=f35,f124,f83 };;
758{ .mfi; st8 [r32]=r18,16
759 xma.hu f94=f35,f125,f93
760(p7) add carry2=1,carry2 }
761{ .mfi; xma.lu f93=f35,f125,f93 };;
762{ .mfi; xma.hu f104=f35,f126,f103 }
763{ .mfi; xma.lu f103=f35,f126,f103 };;
764{ .mfi; xma.hu f114=f35,f127,f113 }
765{ .mfi; mov carry1=0
766 xma.lu f113=f35,f127,f113
767 add r25=r25,r24 };;//
768//-------------------------------------------------//
769{ .mfi; getf.sig r27=f43
770 xma.hu f45=f36,f120,f44
771 cmp.ltu p6,p0=r25,r24 }
772{ .mfi; xma.lu f44=f36,f120,f44
773 add r26=r26,r25 };;
774{ .mfi; getf.sig r16=f80
775 xma.hu f55=f36,f121,f54
776(p6) add carry1=1,carry1 }
777{ .mfi; xma.lu f54=f36,f121,f54 };;
778{ .mfi; getf.sig r17=f71
779 xma.hu f65=f36,f122,f64
780 cmp.ltu p6,p0=r26,r25 }
781{ .mfi; xma.lu f64=f36,f122,f64
782 add r27=r27,r26 };;
783{ .mfi; getf.sig r18=f62
784 xma.hu f75=f36,f123,f74
785(p6) add carry1=1,carry1 }
786{ .mfi; cmp.ltu p6,p0=r27,r26
787 xma.lu f74=f36,f123,f74
788 add r27=r27,carry2 };;
789{ .mfi; getf.sig r19=f53
790 xma.hu f85=f36,f124,f84
791(p6) add carry1=1,carry1 }
792{ .mfi; xma.lu f84=f36,f124,f84
793 cmp.ltu p6,p0=r27,carry2 };;
794{ .mfi; st8 [r33]=r27,16
795 xma.hu f95=f36,f125,f94
796(p6) add carry1=1,carry1 }
797{ .mfi; xma.lu f94=f36,f125,f94 };;
798{ .mfi; xma.hu f105=f36,f126,f104 }
799{ .mfi; mov carry2=0
800 xma.lu f104=f36,f126,f104
801 add r17=r17,r16 };;
802{ .mfi; xma.hu f115=f36,f127,f114
803 cmp.ltu p7,p0=r17,r16 }
804{ .mfi; xma.lu f114=f36,f127,f114
805 add r18=r18,r17 };;//
806//-------------------------------------------------//
807{ .mfi; getf.sig r20=f44
808 xma.hu f46=f37,f120,f45
809(p7) add carry2=1,carry2 }
810{ .mfi; cmp.ltu p7,p0=r18,r17
811 xma.lu f45=f37,f120,f45
812 add r19=r19,r18 };;
813{ .mfi; getf.sig r24=f90
814 xma.hu f56=f37,f121,f55 }
815{ .mfi; xma.lu f55=f37,f121,f55 };;
816{ .mfi; getf.sig r25=f81
817 xma.hu f66=f37,f122,f65
818(p7) add carry2=1,carry2 }
819{ .mfi; cmp.ltu p7,p0=r19,r18
820 xma.lu f65=f37,f122,f65
821 add r20=r20,r19 };;
822{ .mfi; getf.sig r26=f72
823 xma.hu f76=f37,f123,f75
824(p7) add carry2=1,carry2 }
825{ .mfi; cmp.ltu p7,p0=r20,r19
826 xma.lu f75=f37,f123,f75
827 add r20=r20,carry1 };;
828{ .mfi; getf.sig r27=f63
829 xma.hu f86=f37,f124,f85
830(p7) add carry2=1,carry2 }
831{ .mfi; xma.lu f85=f37,f124,f85
832 cmp.ltu p7,p0=r20,carry1 };;
833{ .mfi; getf.sig r28=f54
834 xma.hu f96=f37,f125,f95
835(p7) add carry2=1,carry2 }
836{ .mfi; st8 [r32]=r20,16
837 xma.lu f95=f37,f125,f95 };;
838{ .mfi; xma.hu f106=f37,f126,f105 }
839{ .mfi; mov carry1=0
840 xma.lu f105=f37,f126,f105
841 add r25=r25,r24 };;
842{ .mfi; xma.hu f116=f37,f127,f115
843 cmp.ltu p6,p0=r25,r24 }
844{ .mfi; xma.lu f115=f37,f127,f115
845 add r26=r26,r25 };;//
846//-------------------------------------------------//
847{ .mfi; getf.sig r29=f45
848 xma.hu f47=f38,f120,f46
849(p6) add carry1=1,carry1 }
850{ .mfi; cmp.ltu p6,p0=r26,r25
851 xma.lu f46=f38,f120,f46
852 add r27=r27,r26 };;
853{ .mfi; getf.sig r16=f100
854 xma.hu f57=f38,f121,f56
855(p6) add carry1=1,carry1 }
856{ .mfi; cmp.ltu p6,p0=r27,r26
857 xma.lu f56=f38,f121,f56
858 add r28=r28,r27 };;
859{ .mfi; getf.sig r17=f91
860 xma.hu f67=f38,f122,f66
861(p6) add carry1=1,carry1 }
862{ .mfi; cmp.ltu p6,p0=r28,r27
863 xma.lu f66=f38,f122,f66
864 add r29=r29,r28 };;
865{ .mfi; getf.sig r18=f82
866 xma.hu f77=f38,f123,f76
867(p6) add carry1=1,carry1 }
868{ .mfi; cmp.ltu p6,p0=r29,r28
869 xma.lu f76=f38,f123,f76
870 add r29=r29,carry2 };;
871{ .mfi; getf.sig r19=f73
872 xma.hu f87=f38,f124,f86
873(p6) add carry1=1,carry1 }
874{ .mfi; xma.lu f86=f38,f124,f86
875 cmp.ltu p6,p0=r29,carry2 };;
876{ .mfi; getf.sig r20=f64
877 xma.hu f97=f38,f125,f96
878(p6) add carry1=1,carry1 }
879{ .mfi; st8 [r33]=r29,16
880 xma.lu f96=f38,f125,f96 };;
881{ .mfi; getf.sig r21=f55
882 xma.hu f107=f38,f126,f106 }
883{ .mfi; mov carry2=0
884 xma.lu f106=f38,f126,f106
885 add r17=r17,r16 };;
886{ .mfi; xma.hu f117=f38,f127,f116
887 cmp.ltu p7,p0=r17,r16 }
888{ .mfi; xma.lu f116=f38,f127,f116
889 add r18=r18,r17 };;//
890//-------------------------------------------------//
891{ .mfi; getf.sig r22=f46
892 xma.hu f48=f39,f120,f47
893(p7) add carry2=1,carry2 }
894{ .mfi; cmp.ltu p7,p0=r18,r17
895 xma.lu f47=f39,f120,f47
896 add r19=r19,r18 };;
897{ .mfi; getf.sig r24=f110
898 xma.hu f58=f39,f121,f57
899(p7) add carry2=1,carry2 }
900{ .mfi; cmp.ltu p7,p0=r19,r18
901 xma.lu f57=f39,f121,f57
902 add r20=r20,r19 };;
903{ .mfi; getf.sig r25=f101
904 xma.hu f68=f39,f122,f67
905(p7) add carry2=1,carry2 }
906{ .mfi; cmp.ltu p7,p0=r20,r19
907 xma.lu f67=f39,f122,f67
908 add r21=r21,r20 };;
909{ .mfi; getf.sig r26=f92
910 xma.hu f78=f39,f123,f77
911(p7) add carry2=1,carry2 }
912{ .mfi; cmp.ltu p7,p0=r21,r20
913 xma.lu f77=f39,f123,f77
914 add r22=r22,r21 };;
915{ .mfi; getf.sig r27=f83
916 xma.hu f88=f39,f124,f87
917(p7) add carry2=1,carry2 }
918{ .mfi; cmp.ltu p7,p0=r22,r21
919 xma.lu f87=f39,f124,f87
920 add r22=r22,carry1 };;
921{ .mfi; getf.sig r28=f74
922 xma.hu f98=f39,f125,f97
923(p7) add carry2=1,carry2 }
924{ .mfi; xma.lu f97=f39,f125,f97
925 cmp.ltu p7,p0=r22,carry1 };;
926{ .mfi; getf.sig r29=f65
927 xma.hu f108=f39,f126,f107
928(p7) add carry2=1,carry2 }
929{ .mfi; st8 [r32]=r22,16
930 xma.lu f107=f39,f126,f107 };;
931{ .mfi; getf.sig r30=f56
932 xma.hu f118=f39,f127,f117 }
933{ .mfi; xma.lu f117=f39,f127,f117 };;//
934//-------------------------------------------------//
935// Leaving muliplier's heaven... Quite a ride, huh?
936
937{ .mii; getf.sig r31=f47
938 add r25=r25,r24
939 mov carry1=0 };;
940{ .mii; getf.sig r16=f111
941 cmp.ltu p6,p0=r25,r24
942 add r26=r26,r25 };;
943{ .mfb; getf.sig r17=f102 }
944{ .mii;
945(p6) add carry1=1,carry1
946 cmp.ltu p6,p0=r26,r25
947 add r27=r27,r26 };;
948{ .mfb; nop.m 0x0 }
949{ .mii;
950(p6) add carry1=1,carry1
951 cmp.ltu p6,p0=r27,r26
952 add r28=r28,r27 };;
953{ .mii; getf.sig r18=f93
954 add r17=r17,r16
955 mov carry3=0 }
956{ .mii;
957(p6) add carry1=1,carry1
958 cmp.ltu p6,p0=r28,r27
959 add r29=r29,r28 };;
960{ .mii; getf.sig r19=f84
961 cmp.ltu p7,p0=r17,r16 }
962{ .mii;
963(p6) add carry1=1,carry1
964 cmp.ltu p6,p0=r29,r28
965 add r30=r30,r29 };;
966{ .mii; getf.sig r20=f75
967 add r18=r18,r17 }
968{ .mii;
969(p6) add carry1=1,carry1
970 cmp.ltu p6,p0=r30,r29
971 add r31=r31,r30 };;
972{ .mfb; getf.sig r21=f66 }
973{ .mii; (p7) add carry3=1,carry3
974 cmp.ltu p7,p0=r18,r17
975 add r19=r19,r18 }
976{ .mfb; nop.m 0x0 }
977{ .mii;
978(p6) add carry1=1,carry1
979 cmp.ltu p6,p0=r31,r30
980 add r31=r31,carry2 };;
981{ .mfb; getf.sig r22=f57 }
982{ .mii; (p7) add carry3=1,carry3
983 cmp.ltu p7,p0=r19,r18
984 add r20=r20,r19 }
985{ .mfb; nop.m 0x0 }
986{ .mii;
987(p6) add carry1=1,carry1
988 cmp.ltu p6,p0=r31,carry2 };;
989{ .mfb; getf.sig r23=f48 }
990{ .mii; (p7) add carry3=1,carry3
991 cmp.ltu p7,p0=r20,r19
992 add r21=r21,r20 }
993{ .mii;
994(p6) add carry1=1,carry1 }
995{ .mfb; st8 [r33]=r31,16 };;
996
997{ .mfb; getf.sig r24=f112 }
998{ .mii; (p7) add carry3=1,carry3
999 cmp.ltu p7,p0=r21,r20
1000 add r22=r22,r21 };;
1001{ .mfb; getf.sig r25=f103 }
1002{ .mii; (p7) add carry3=1,carry3
1003 cmp.ltu p7,p0=r22,r21
1004 add r23=r23,r22 };;
1005{ .mfb; getf.sig r26=f94 }
1006{ .mii; (p7) add carry3=1,carry3
1007 cmp.ltu p7,p0=r23,r22
1008 add r23=r23,carry1 };;
1009{ .mfb; getf.sig r27=f85 }
1010{ .mii; (p7) add carry3=1,carry3
1011 cmp.ltu p7,p8=r23,carry1};;
1012{ .mii; getf.sig r28=f76
1013 add r25=r25,r24
1014 mov carry1=0 }
1015{ .mii; st8 [r32]=r23,16
1016 (p7) add carry2=1,carry3
1017 (p8) add carry2=0,carry3 };;
1018
1019{ .mfb; nop.m 0x0 }
1020{ .mii; getf.sig r29=f67
1021 cmp.ltu p6,p0=r25,r24
1022 add r26=r26,r25 };;
1023{ .mfb; getf.sig r30=f58 }
1024{ .mii;
1025(p6) add carry1=1,carry1
1026 cmp.ltu p6,p0=r26,r25
1027 add r27=r27,r26 };;
1028{ .mfb; getf.sig r16=f113 }
1029{ .mii;
1030(p6) add carry1=1,carry1
1031 cmp.ltu p6,p0=r27,r26
1032 add r28=r28,r27 };;
1033{ .mfb; getf.sig r17=f104 }
1034{ .mii;
1035(p6) add carry1=1,carry1
1036 cmp.ltu p6,p0=r28,r27
1037 add r29=r29,r28 };;
1038{ .mfb; getf.sig r18=f95 }
1039{ .mii;
1040(p6) add carry1=1,carry1
1041 cmp.ltu p6,p0=r29,r28
1042 add r30=r30,r29 };;
1043{ .mii; getf.sig r19=f86
1044 add r17=r17,r16
1045 mov carry3=0 }
1046{ .mii;
1047(p6) add carry1=1,carry1
1048 cmp.ltu p6,p0=r30,r29
1049 add r30=r30,carry2 };;
1050{ .mii; getf.sig r20=f77
1051 cmp.ltu p7,p0=r17,r16
1052 add r18=r18,r17 }
1053{ .mii;
1054(p6) add carry1=1,carry1
1055 cmp.ltu p6,p0=r30,carry2 };;
1056{ .mfb; getf.sig r21=f68 }
1057{ .mii; st8 [r33]=r30,16
1058(p6) add carry1=1,carry1 };;
1059
1060{ .mfb; getf.sig r24=f114 }
1061{ .mii; (p7) add carry3=1,carry3
1062 cmp.ltu p7,p0=r18,r17
1063 add r19=r19,r18 };;
1064{ .mfb; getf.sig r25=f105 }
1065{ .mii; (p7) add carry3=1,carry3
1066 cmp.ltu p7,p0=r19,r18
1067 add r20=r20,r19 };;
1068{ .mfb; getf.sig r26=f96 }
1069{ .mii; (p7) add carry3=1,carry3
1070 cmp.ltu p7,p0=r20,r19
1071 add r21=r21,r20 };;
1072{ .mfb; getf.sig r27=f87 }
1073{ .mii; (p7) add carry3=1,carry3
1074 cmp.ltu p7,p0=r21,r20
1075 add r21=r21,carry1 };;
1076{ .mib; getf.sig r28=f78
1077 add r25=r25,r24 }
1078{ .mib; (p7) add carry3=1,carry3
1079 cmp.ltu p7,p8=r21,carry1};;
1080{ .mii; st8 [r32]=r21,16
1081 (p7) add carry2=1,carry3
1082 (p8) add carry2=0,carry3 }
1083
1084{ .mii; mov carry1=0
1085 cmp.ltu p6,p0=r25,r24
1086 add r26=r26,r25 };;
1087{ .mfb; getf.sig r16=f115 }
1088{ .mii;
1089(p6) add carry1=1,carry1
1090 cmp.ltu p6,p0=r26,r25
1091 add r27=r27,r26 };;
1092{ .mfb; getf.sig r17=f106 }
1093{ .mii;
1094(p6) add carry1=1,carry1
1095 cmp.ltu p6,p0=r27,r26
1096 add r28=r28,r27 };;
1097{ .mfb; getf.sig r18=f97 }
1098{ .mii;
1099(p6) add carry1=1,carry1
1100 cmp.ltu p6,p0=r28,r27
1101 add r28=r28,carry2 };;
1102{ .mib; getf.sig r19=f88
1103 add r17=r17,r16 }
1104{ .mib;
1105(p6) add carry1=1,carry1
1106 cmp.ltu p6,p0=r28,carry2 };;
1107{ .mii; st8 [r33]=r28,16
1108(p6) add carry1=1,carry1 }
1109
1110{ .mii; mov carry2=0
1111 cmp.ltu p7,p0=r17,r16
1112 add r18=r18,r17 };;
1113{ .mfb; getf.sig r24=f116 }
1114{ .mii; (p7) add carry2=1,carry2
1115 cmp.ltu p7,p0=r18,r17
1116 add r19=r19,r18 };;
1117{ .mfb; getf.sig r25=f107 }
1118{ .mii; (p7) add carry2=1,carry2
1119 cmp.ltu p7,p0=r19,r18
1120 add r19=r19,carry1 };;
1121{ .mfb; getf.sig r26=f98 }
1122{ .mii; (p7) add carry2=1,carry2
1123 cmp.ltu p7,p0=r19,carry1};;
1124{ .mii; st8 [r32]=r19,16
1125 (p7) add carry2=1,carry2 }
1126
1127{ .mfb; add r25=r25,r24 };;
1128
1129{ .mfb; getf.sig r16=f117 }
1130{ .mii; mov carry1=0
1131 cmp.ltu p6,p0=r25,r24
1132 add r26=r26,r25 };;
1133{ .mfb; getf.sig r17=f108 }
1134{ .mii;
1135(p6) add carry1=1,carry1
1136 cmp.ltu p6,p0=r26,r25
1137 add r26=r26,carry2 };;
1138{ .mfb; nop.m 0x0 }
1139{ .mii;
1140(p6) add carry1=1,carry1
1141 cmp.ltu p6,p0=r26,carry2 };;
1142{ .mii; st8 [r33]=r26,16
1143(p6) add carry1=1,carry1 }
1144
1145{ .mfb; add r17=r17,r16 };;
1146{ .mfb; getf.sig r24=f118 }
1147{ .mii; mov carry2=0
1148 cmp.ltu p7,p0=r17,r16
1149 add r17=r17,carry1 };;
1150{ .mii; (p7) add carry2=1,carry2
1151 cmp.ltu p7,p0=r17,carry1};;
1152{ .mii; st8 [r32]=r17
1153 (p7) add carry2=1,carry2 };;
1154{ .mfb; add r24=r24,carry2 };;
1155{ .mib; st8 [r33]=r24 }
1156
1157{ .mib; rum 1<<5 // clear um.mfh
1158 br.ret.sptk.many b0 };;
1159.endp bn_mul_comba8#
1160#undef carry3
1161#undef carry2
1162#undef carry1
1163#endif
1164
1165#if 1
1166// It's possible to make it faster (see comment to bn_sqr_comba8), but
1167// I reckon it doesn't worth the effort. Basically because the routine
1168// (actually both of them) practically never called... So I just play
1169// same trick as with bn_sqr_comba8.
1170//
1171// void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1172//
1173.global bn_sqr_comba4#
1174.proc bn_sqr_comba4#
1175.align 64
1176bn_sqr_comba4:
1177 .prologue
1178 .fframe 0
1179 .save ar.pfs,r2
1180#if defined(_HPUX_SOURCE) && !defined(_LP64)
1181{ .mii; alloc r2=ar.pfs,2,1,0,0
1182 addp4 r32=0,r32
1183 addp4 r33=0,r33 };;
1184{ .mii;
1185#else
1186{ .mii; alloc r2=ar.pfs,2,1,0,0
1187#endif
1188 mov r34=r33
1189 add r14=8,r33 };;
1190 .body
1191{ .mii; add r17=8,r34
1192 add r15=16,r33
1193 add r18=16,r34 }
1194{ .mfb; add r16=24,r33
1195 br .L_cheat_entry_point4 };;
1196.endp bn_sqr_comba4#
1197#endif
1198
1199#if 1
1200// Runs in ~115 cycles and ~4.5 times faster than C. Well, whatever...
1201//
1202// void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1203//
1204#define carry1 r14
1205#define carry2 r15
1206.global bn_mul_comba4#
1207.proc bn_mul_comba4#
1208.align 64
1209bn_mul_comba4:
1210 .prologue
1211 .fframe 0
1212 .save ar.pfs,r2
1213#if defined(_HPUX_SOURCE) && !defined(_LP64)
1214{ .mii; alloc r2=ar.pfs,3,0,0,0
1215 addp4 r33=0,r33
1216 addp4 r34=0,r34 };;
1217{ .mii; addp4 r32=0,r32
1218#else
1219{ .mii; alloc r2=ar.pfs,3,0,0,0
1220#endif
1221 add r14=8,r33
1222 add r17=8,r34 }
1223 .body
1224{ .mii; add r15=16,r33
1225 add r18=16,r34
1226 add r16=24,r33 };;
1227.L_cheat_entry_point4:
1228{ .mmi; add r19=24,r34
1229
1230 ldf8 f32=[r33] }
1231
1232{ .mmi; ldf8 f120=[r34]
1233 ldf8 f121=[r17] };;
1234{ .mmi; ldf8 f122=[r18]
1235 ldf8 f123=[r19] }
1236
1237{ .mmi; ldf8 f33=[r14]
1238 ldf8 f34=[r15] }
1239{ .mfi; ldf8 f35=[r16]
1240
1241 xma.hu f41=f32,f120,f0 }
1242{ .mfi; xma.lu f40=f32,f120,f0 };;
1243{ .mfi; xma.hu f51=f32,f121,f0 }
1244{ .mfi; xma.lu f50=f32,f121,f0 };;
1245{ .mfi; xma.hu f61=f32,f122,f0 }
1246{ .mfi; xma.lu f60=f32,f122,f0 };;
1247{ .mfi; xma.hu f71=f32,f123,f0 }
1248{ .mfi; xma.lu f70=f32,f123,f0 };;//
1249// Major stall takes place here, and 3 more places below. Result from
1250// first xma is not available for another 3 ticks.
1251{ .mfi; getf.sig r16=f40
1252 xma.hu f42=f33,f120,f41
1253 add r33=8,r32 }
1254{ .mfi; xma.lu f41=f33,f120,f41 };;
1255{ .mfi; getf.sig r24=f50
1256 xma.hu f52=f33,f121,f51 }
1257{ .mfi; xma.lu f51=f33,f121,f51 };;
1258{ .mfi; st8 [r32]=r16,16
1259 xma.hu f62=f33,f122,f61 }
1260{ .mfi; xma.lu f61=f33,f122,f61 };;
1261{ .mfi; xma.hu f72=f33,f123,f71 }
1262{ .mfi; xma.lu f71=f33,f123,f71 };;//
1263//-------------------------------------------------//
1264{ .mfi; getf.sig r25=f41
1265 xma.hu f43=f34,f120,f42 }
1266{ .mfi; xma.lu f42=f34,f120,f42 };;
1267{ .mfi; getf.sig r16=f60
1268 xma.hu f53=f34,f121,f52 }
1269{ .mfi; xma.lu f52=f34,f121,f52 };;
1270{ .mfi; getf.sig r17=f51
1271 xma.hu f63=f34,f122,f62
1272 add r25=r25,r24 }
1273{ .mfi; mov carry1=0
1274 xma.lu f62=f34,f122,f62 };;
1275{ .mfi; st8 [r33]=r25,16
1276 xma.hu f73=f34,f123,f72
1277 cmp.ltu p6,p0=r25,r24 }
1278{ .mfi; xma.lu f72=f34,f123,f72 };;//
1279//-------------------------------------------------//
1280{ .mfi; getf.sig r18=f42
1281 xma.hu f44=f35,f120,f43
1282(p6) add carry1=1,carry1 }
1283{ .mfi; add r17=r17,r16
1284 xma.lu f43=f35,f120,f43
1285 mov carry2=0 };;
1286{ .mfi; getf.sig r24=f70
1287 xma.hu f54=f35,f121,f53
1288 cmp.ltu p7,p0=r17,r16 }
1289{ .mfi; xma.lu f53=f35,f121,f53 };;
1290{ .mfi; getf.sig r25=f61
1291 xma.hu f64=f35,f122,f63
1292 add r18=r18,r17 }
1293{ .mfi; xma.lu f63=f35,f122,f63
1294(p7) add carry2=1,carry2 };;
1295{ .mfi; getf.sig r26=f52
1296 xma.hu f74=f35,f123,f73
1297 cmp.ltu p7,p0=r18,r17 }
1298{ .mfi; xma.lu f73=f35,f123,f73
1299 add r18=r18,carry1 };;
1300//-------------------------------------------------//
1301{ .mii; st8 [r32]=r18,16
1302(p7) add carry2=1,carry2
1303 cmp.ltu p7,p0=r18,carry1 };;
1304
1305{ .mfi; getf.sig r27=f43 // last major stall
1306(p7) add carry2=1,carry2 };;
1307{ .mii; getf.sig r16=f71
1308 add r25=r25,r24
1309 mov carry1=0 };;
1310{ .mii; getf.sig r17=f62
1311 cmp.ltu p6,p0=r25,r24
1312 add r26=r26,r25 };;
1313{ .mii;
1314(p6) add carry1=1,carry1
1315 cmp.ltu p6,p0=r26,r25
1316 add r27=r27,r26 };;
1317{ .mii;
1318(p6) add carry1=1,carry1
1319 cmp.ltu p6,p0=r27,r26
1320 add r27=r27,carry2 };;
1321{ .mii; getf.sig r18=f53
1322(p6) add carry1=1,carry1
1323 cmp.ltu p6,p0=r27,carry2 };;
1324{ .mfi; st8 [r33]=r27,16
1325(p6) add carry1=1,carry1 }
1326
1327{ .mii; getf.sig r19=f44
1328 add r17=r17,r16
1329 mov carry2=0 };;
1330{ .mii; getf.sig r24=f72
1331 cmp.ltu p7,p0=r17,r16
1332 add r18=r18,r17 };;
1333{ .mii; (p7) add carry2=1,carry2
1334 cmp.ltu p7,p0=r18,r17
1335 add r19=r19,r18 };;
1336{ .mii; (p7) add carry2=1,carry2
1337 cmp.ltu p7,p0=r19,r18
1338 add r19=r19,carry1 };;
1339{ .mii; getf.sig r25=f63
1340 (p7) add carry2=1,carry2
1341 cmp.ltu p7,p0=r19,carry1};;
1342{ .mii; st8 [r32]=r19,16
1343 (p7) add carry2=1,carry2 }
1344
1345{ .mii; getf.sig r26=f54
1346 add r25=r25,r24
1347 mov carry1=0 };;
1348{ .mii; getf.sig r16=f73
1349 cmp.ltu p6,p0=r25,r24
1350 add r26=r26,r25 };;
1351{ .mii;
1352(p6) add carry1=1,carry1
1353 cmp.ltu p6,p0=r26,r25
1354 add r26=r26,carry2 };;
1355{ .mii; getf.sig r17=f64
1356(p6) add carry1=1,carry1
1357 cmp.ltu p6,p0=r26,carry2 };;
1358{ .mii; st8 [r33]=r26,16
1359(p6) add carry1=1,carry1 }
1360
1361{ .mii; getf.sig r24=f74
1362 add r17=r17,r16
1363 mov carry2=0 };;
1364{ .mii; cmp.ltu p7,p0=r17,r16
1365 add r17=r17,carry1 };;
1366
1367{ .mii; (p7) add carry2=1,carry2
1368 cmp.ltu p7,p0=r17,carry1};;
1369{ .mii; st8 [r32]=r17,16
1370 (p7) add carry2=1,carry2 };;
1371
1372{ .mii; add r24=r24,carry2 };;
1373{ .mii; st8 [r33]=r24 }
1374
1375{ .mib; rum 1<<5 // clear um.mfh
1376 br.ret.sptk.many b0 };;
1377.endp bn_mul_comba4#
1378#undef carry2
1379#undef carry1
1380#endif
1381
1382#if 1
1383//
1384// BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
1385//
1386// In the nutshell it's a port of my MIPS III/IV implementation.
1387//
1388#define AT r14
1389#define H r16
1390#define HH r20
1391#define L r17
1392#define D r18
1393#define DH r22
1394#define I r21
1395
1396#if 0
1397// Some preprocessors (most notably HP-UX) appear to be allergic to
1398// macros enclosed to parenthesis [as these three were].
1399#define cont p16
1400#define break p0 // p20
1401#define equ p24
1402#else
1403cont=p16
1404break=p0
1405equ=p24
1406#endif
1407
1408.global abort#
1409.global bn_div_words#
1410.proc bn_div_words#
1411.align 64
1412bn_div_words:
1413 .prologue
1414 .fframe 0
1415 .save ar.pfs,r2
1416 .save b0,r3
1417{ .mii; alloc r2=ar.pfs,3,5,0,8
1418 mov r3=b0
1419 mov r10=pr };;
1420{ .mmb; cmp.eq p6,p0=r34,r0
1421 mov r8=-1
1422(p6) br.ret.spnt.many b0 };;
1423
1424 .body
1425{ .mii; mov H=r32 // save h
1426 mov ar.ec=0 // don't rotate at exit
1427 mov pr.rot=0 }
1428{ .mii; mov L=r33 // save l
1429 mov r36=r0 };;
1430
1431.L_divw_shift: // -vv- note signed comparison
1432{ .mfi; (p0) cmp.lt p16,p0=r0,r34 // d
1433 (p0) shladd r33=r34,1,r0 }
1434{ .mfb; (p0) add r35=1,r36
1435 (p0) nop.f 0x0
1436(p16) br.wtop.dpnt .L_divw_shift };;
1437
1438{ .mii; mov D=r34
1439 shr.u DH=r34,32
1440 sub r35=64,r36 };;
1441{ .mii; setf.sig f7=DH
1442 shr.u AT=H,r35
1443 mov I=r36 };;
1444{ .mib; cmp.ne p6,p0=r0,AT
1445 shl H=H,r36
1446(p6) br.call.spnt.clr b0=abort };; // overflow, die...
1447
1448{ .mfi; fcvt.xuf.s1 f7=f7
1449 shr.u AT=L,r35 };;
1450{ .mii; shl L=L,r36
1451 or H=H,AT };;
1452
1453{ .mii; nop.m 0x0
1454 cmp.leu p6,p0=D,H;;
1455(p6) sub H=H,D }
1456
1457{ .mlx; setf.sig f14=D
1458 movl AT=0xffffffff };;
1459///////////////////////////////////////////////////////////
1460{ .mii; setf.sig f6=H
1461 shr.u HH=H,32;;
1462 cmp.eq p6,p7=HH,DH };;
1463{ .mfb;
1464(p6) setf.sig f8=AT
1465(p7) fcvt.xuf.s1 f6=f6
1466(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1467
1468{ .mfi; getf.sig r33=f8 // q
1469 xmpy.lu f9=f8,f14 }
1470{ .mfi; xmpy.hu f10=f8,f14
1471 shrp H=H,L,32 };;
1472
1473{ .mmi; getf.sig r35=f9 // tl
1474 getf.sig r31=f10 };; // th
1475
1476.L_divw_1st_iter:
1477{ .mii; (p0) add r32=-1,r33
1478 (p0) cmp.eq equ,cont=HH,r31 };;
1479{ .mii; (p0) cmp.ltu p8,p0=r35,D
1480 (p0) sub r34=r35,D
1481 (equ) cmp.leu break,cont=r35,H };;
1482{ .mib; (cont) cmp.leu cont,break=HH,r31
1483 (p8) add r31=-1,r31
1484(cont) br.wtop.spnt .L_divw_1st_iter };;
1485///////////////////////////////////////////////////////////
1486{ .mii; sub H=H,r35
1487 shl r8=r33,32
1488 shl L=L,32 };;
1489///////////////////////////////////////////////////////////
1490{ .mii; setf.sig f6=H
1491 shr.u HH=H,32;;
1492 cmp.eq p6,p7=HH,DH };;
1493{ .mfb;
1494(p6) setf.sig f8=AT
1495(p7) fcvt.xuf.s1 f6=f6
1496(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1497
1498{ .mfi; getf.sig r33=f8 // q
1499 xmpy.lu f9=f8,f14 }
1500{ .mfi; xmpy.hu f10=f8,f14
1501 shrp H=H,L,32 };;
1502
1503{ .mmi; getf.sig r35=f9 // tl
1504 getf.sig r31=f10 };; // th
1505
1506.L_divw_2nd_iter:
1507{ .mii; (p0) add r32=-1,r33
1508 (p0) cmp.eq equ,cont=HH,r31 };;
1509{ .mii; (p0) cmp.ltu p8,p0=r35,D
1510 (p0) sub r34=r35,D
1511 (equ) cmp.leu break,cont=r35,H };;
1512{ .mib; (cont) cmp.leu cont,break=HH,r31
1513 (p8) add r31=-1,r31
1514(cont) br.wtop.spnt .L_divw_2nd_iter };;
1515///////////////////////////////////////////////////////////
1516{ .mii; sub H=H,r35
1517 or r8=r8,r33
1518 mov ar.pfs=r2 };;
1519{ .mii; shr.u r9=H,I // remainder if anybody wants it
1520 mov pr=r10,0x1ffff }
1521{ .mfb; br.ret.sptk.many b0 };;
1522
1523// Unsigned 64 by 32 (well, by 64 for the moment) bit integer division
1524// procedure.
1525//
1526// inputs: f6 = (double)a, f7 = (double)b
1527// output: f8 = (int)(a/b)
1528// clobbered: f8,f9,f10,f11,pred
1529pred=p15
1530// One can argue that this snippet is copyrighted to Intel
1531// Corporation, as it's essentially identical to one of those
1532// found in "Divide, Square Root and Remainder" section at
1533// http://www.intel.com/software/products/opensource/libraries/num.htm.
1534// Yes, I admit that the referred code was used as template,
1535// but after I realized that there hardly is any other instruction
1536// sequence which would perform this operation. I mean I figure that
1537// any independent attempt to implement high-performance division
1538// will result in code virtually identical to the Intel code. It
1539// should be noted though that below division kernel is 1 cycle
1540// faster than Intel one (note commented splits:-), not to mention
1541// original prologue (rather lack of one) and epilogue.
1542.align 32
1543.skip 16
1544.L_udiv64_32_b6:
1545 frcpa.s1 f8,pred=f6,f7;; // [0] y0 = 1 / b
1546
1547(pred) fnma.s1 f9=f7,f8,f1 // [5] e0 = 1 - b * y0
1548(pred) fmpy.s1 f10=f6,f8;; // [5] q0 = a * y0
1549(pred) fmpy.s1 f11=f9,f9 // [10] e1 = e0 * e0
1550(pred) fma.s1 f10=f9,f10,f10;; // [10] q1 = q0 + e0 * q0
1551(pred) fma.s1 f8=f9,f8,f8 //;; // [15] y1 = y0 + e0 * y0
1552(pred) fma.s1 f9=f11,f10,f10;; // [15] q2 = q1 + e1 * q1
1553(pred) fma.s1 f8=f11,f8,f8 //;; // [20] y2 = y1 + e1 * y1
1554(pred) fnma.s1 f10=f7,f9,f6;; // [20] r2 = a - b * q2
1555(pred) fma.s1 f8=f10,f8,f9;; // [25] q3 = q2 + r2 * y2
1556
1557 fcvt.fxu.trunc.s1 f8=f8 // [30] q = trunc(q3)
1558 br.ret.sptk.many b6;;
1559.endp bn_div_words#
1560#endif
diff --git a/src/lib/libcrypto/bn/asm/mips1.s b/src/lib/libcrypto/bn/asm/mips1.s
new file mode 100644
index 0000000000..44fa1254c7
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/mips1.s
@@ -0,0 +1,539 @@
1/* This assember is for R2000/R3000 machines, or higher ones that do
2 * no want to do any 64 bit arithmatic.
3 * Make sure that the SSLeay bignum library is compiled with
4 * THIRTY_TWO_BIT set.
5 * This must either be compiled with the system CC, or, if you use GNU gas,
6 * cc -E mips1.s|gas -o mips1.o
7 */
8 .set reorder
9 .set noat
10
11#define R1 $1
12#define CC $2
13#define R2 $3
14#define R3 $8
15#define R4 $9
16#define L1 $10
17#define L2 $11
18#define L3 $12
19#define L4 $13
20#define H1 $14
21#define H2 $15
22#define H3 $24
23#define H4 $25
24
25#define P1 $4
26#define P2 $5
27#define P3 $6
28#define P4 $7
29
30 .align 2
31 .ent bn_mul_add_words
32 .globl bn_mul_add_words
33.text
34bn_mul_add_words:
35 .frame $sp,0,$31
36 .mask 0x00000000,0
37 .fmask 0x00000000,0
38
39 #blt P3,4,$lab34
40
41 subu R1,P3,4
42 move CC,$0
43 bltz R1,$lab34
44$lab2:
45 lw R1,0(P1)
46 lw L1,0(P2)
47 lw R2,4(P1)
48 lw L2,4(P2)
49 lw R3,8(P1)
50 lw L3,8(P2)
51 lw R4,12(P1)
52 lw L4,12(P2)
53 multu L1,P4
54 addu R1,R1,CC
55 mflo L1
56 sltu CC,R1,CC
57 addu R1,R1,L1
58 mfhi H1
59 sltu L1,R1,L1
60 sw R1,0(P1)
61 addu CC,CC,L1
62 multu L2,P4
63 addu CC,H1,CC
64 mflo L2
65 addu R2,R2,CC
66 sltu CC,R2,CC
67 mfhi H2
68 addu R2,R2,L2
69 addu P2,P2,16
70 sltu L2,R2,L2
71 sw R2,4(P1)
72 addu CC,CC,L2
73 multu L3,P4
74 addu CC,H2,CC
75 mflo L3
76 addu R3,R3,CC
77 sltu CC,R3,CC
78 mfhi H3
79 addu R3,R3,L3
80 addu P1,P1,16
81 sltu L3,R3,L3
82 sw R3,-8(P1)
83 addu CC,CC,L3
84 multu L4,P4
85 addu CC,H3,CC
86 mflo L4
87 addu R4,R4,CC
88 sltu CC,R4,CC
89 mfhi H4
90 addu R4,R4,L4
91 subu P3,P3,4
92 sltu L4,R4,L4
93 addu CC,CC,L4
94 addu CC,H4,CC
95
96 subu R1,P3,4
97 sw R4,-4(P1) # delay slot
98 bgez R1,$lab2
99
100 bleu P3,0,$lab3
101 .align 2
102$lab33:
103 lw L1,0(P2)
104 lw R1,0(P1)
105 multu L1,P4
106 addu R1,R1,CC
107 sltu CC,R1,CC
108 addu P1,P1,4
109 mflo L1
110 mfhi H1
111 addu R1,R1,L1
112 addu P2,P2,4
113 sltu L1,R1,L1
114 subu P3,P3,1
115 addu CC,CC,L1
116 sw R1,-4(P1)
117 addu CC,H1,CC
118 bgtz P3,$lab33
119 j $31
120 .align 2
121$lab3:
122 j $31
123 .align 2
124$lab34:
125 bgt P3,0,$lab33
126 j $31
127 .end bn_mul_add_words
128
129 .align 2
130 # Program Unit: bn_mul_words
131 .ent bn_mul_words
132 .globl bn_mul_words
133.text
134bn_mul_words:
135 .frame $sp,0,$31
136 .mask 0x00000000,0
137 .fmask 0x00000000,0
138
139 subu P3,P3,4
140 move CC,$0
141 bltz P3,$lab45
142$lab44:
143 lw L1,0(P2)
144 lw L2,4(P2)
145 lw L3,8(P2)
146 lw L4,12(P2)
147 multu L1,P4
148 subu P3,P3,4
149 mflo L1
150 mfhi H1
151 addu L1,L1,CC
152 multu L2,P4
153 sltu CC,L1,CC
154 sw L1,0(P1)
155 addu CC,H1,CC
156 mflo L2
157 mfhi H2
158 addu L2,L2,CC
159 multu L3,P4
160 sltu CC,L2,CC
161 sw L2,4(P1)
162 addu CC,H2,CC
163 mflo L3
164 mfhi H3
165 addu L3,L3,CC
166 multu L4,P4
167 sltu CC,L3,CC
168 sw L3,8(P1)
169 addu CC,H3,CC
170 mflo L4
171 mfhi H4
172 addu L4,L4,CC
173 addu P1,P1,16
174 sltu CC,L4,CC
175 addu P2,P2,16
176 addu CC,H4,CC
177 sw L4,-4(P1)
178
179 bgez P3,$lab44
180 b $lab45
181$lab46:
182 lw L1,0(P2)
183 addu P1,P1,4
184 multu L1,P4
185 addu P2,P2,4
186 mflo L1
187 mfhi H1
188 addu L1,L1,CC
189 subu P3,P3,1
190 sltu CC,L1,CC
191 sw L1,-4(P1)
192 addu CC,H1,CC
193 bgtz P3,$lab46
194 j $31
195$lab45:
196 addu P3,P3,4
197 bgtz P3,$lab46
198 j $31
199 .align 2
200 .end bn_mul_words
201
202 # Program Unit: bn_sqr_words
203 .ent bn_sqr_words
204 .globl bn_sqr_words
205.text
206bn_sqr_words:
207 .frame $sp,0,$31
208 .mask 0x00000000,0
209 .fmask 0x00000000,0
210
211 subu P3,P3,4
212 bltz P3,$lab55
213$lab54:
214 lw L1,0(P2)
215 lw L2,4(P2)
216 lw L3,8(P2)
217 lw L4,12(P2)
218
219 multu L1,L1
220 subu P3,P3,4
221 mflo L1
222 mfhi H1
223 sw L1,0(P1)
224 sw H1,4(P1)
225
226 multu L2,L2
227 addu P1,P1,32
228 mflo L2
229 mfhi H2
230 sw L2,-24(P1)
231 sw H2,-20(P1)
232
233 multu L3,L3
234 addu P2,P2,16
235 mflo L3
236 mfhi H3
237 sw L3,-16(P1)
238 sw H3,-12(P1)
239
240 multu L4,L4
241
242 mflo L4
243 mfhi H4
244 sw L4,-8(P1)
245 sw H4,-4(P1)
246
247 bgtz P3,$lab54
248 b $lab55
249$lab56:
250 lw L1,0(P2)
251 addu P1,P1,8
252 multu L1,L1
253 addu P2,P2,4
254 subu P3,P3,1
255 mflo L1
256 mfhi H1
257 sw L1,-8(P1)
258 sw H1,-4(P1)
259
260 bgtz P3,$lab56
261 j $31
262$lab55:
263 addu P3,P3,4
264 bgtz P3,$lab56
265 j $31
266 .align 2
267 .end bn_sqr_words
268
269 # Program Unit: bn_add_words
270 .ent bn_add_words
271 .globl bn_add_words
272.text
273bn_add_words: # 0x590
274 .frame $sp,0,$31
275 .mask 0x00000000,0
276 .fmask 0x00000000,0
277
278 subu P4,P4,4
279 move CC,$0
280 bltz P4,$lab65
281$lab64:
282 lw L1,0(P2)
283 lw R1,0(P3)
284 lw L2,4(P2)
285 lw R2,4(P3)
286
287 addu L1,L1,CC
288 lw L3,8(P2)
289 sltu CC,L1,CC
290 addu L1,L1,R1
291 sltu R1,L1,R1
292 lw R3,8(P3)
293 addu CC,CC,R1
294 lw L4,12(P2)
295
296 addu L2,L2,CC
297 lw R4,12(P3)
298 sltu CC,L2,CC
299 addu L2,L2,R2
300 sltu R2,L2,R2
301 sw L1,0(P1)
302 addu CC,CC,R2
303 addu P1,P1,16
304 addu L3,L3,CC
305 sw L2,-12(P1)
306
307 sltu CC,L3,CC
308 addu L3,L3,R3
309 sltu R3,L3,R3
310 addu P2,P2,16
311 addu CC,CC,R3
312
313 addu L4,L4,CC
314 addu P3,P3,16
315 sltu CC,L4,CC
316 addu L4,L4,R4
317 subu P4,P4,4
318 sltu R4,L4,R4
319 sw L3,-8(P1)
320 addu CC,CC,R4
321 sw L4,-4(P1)
322
323 bgtz P4,$lab64
324 b $lab65
325$lab66:
326 lw L1,0(P2)
327 lw R1,0(P3)
328 addu L1,L1,CC
329 addu P1,P1,4
330 sltu CC,L1,CC
331 addu P2,P2,4
332 addu P3,P3,4
333 addu L1,L1,R1
334 subu P4,P4,1
335 sltu R1,L1,R1
336 sw L1,-4(P1)
337 addu CC,CC,R1
338
339 bgtz P4,$lab66
340 j $31
341$lab65:
342 addu P4,P4,4
343 bgtz P4,$lab66
344 j $31
345 .end bn_add_words
346
347 # Program Unit: bn_div64
348 .set at
349 .set reorder
350 .text
351 .align 2
352 .globl bn_div64
353 # 321 {
354 .ent bn_div64 2
355bn_div64:
356 subu $sp, 64
357 sw $31, 56($sp)
358 sw $16, 48($sp)
359 .mask 0x80010000, -56
360 .frame $sp, 64, $31
361 move $9, $4
362 move $12, $5
363 move $16, $6
364 # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
365 move $31, $0
366 # 323 int i,count=2;
367 li $13, 2
368 # 324
369 # 325 if (d == 0) return(BN_MASK2);
370 bne $16, 0, $80
371 li $2, -1
372 b $93
373$80:
374 # 326
375 # 327 i=BN_num_bits_word(d);
376 move $4, $16
377 sw $31, 16($sp)
378 sw $9, 24($sp)
379 sw $12, 32($sp)
380 sw $13, 40($sp)
381 .livereg 0x800ff0e,0xfff
382 jal BN_num_bits_word
383 li $4, 32
384 lw $31, 16($sp)
385 lw $9, 24($sp)
386 lw $12, 32($sp)
387 lw $13, 40($sp)
388 move $3, $2
389 # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
390 beq $2, $4, $81
391 li $14, 1
392 sll $15, $14, $2
393 bleu $9, $15, $81
394 # 329 {
395 # 330 #if !defined(NO_STDIO) && !defined(WIN16)
396 # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
397 # 332 #endif
398 # 333 abort();
399 sw $3, 8($sp)
400 sw $9, 24($sp)
401 sw $12, 32($sp)
402 sw $13, 40($sp)
403 sw $31, 26($sp)
404 .livereg 0xff0e,0xfff
405 jal abort
406 lw $3, 8($sp)
407 li $4, 32
408 lw $9, 24($sp)
409 lw $12, 32($sp)
410 lw $13, 40($sp)
411 lw $31, 26($sp)
412 # 334 }
413$81:
414 # 335 i=BN_BITS2-i;
415 subu $3, $4, $3
416 # 336 if (h >= d) h-=d;
417 bltu $9, $16, $82
418 subu $9, $9, $16
419$82:
420 # 337
421 # 338 if (i)
422 beq $3, 0, $83
423 # 339 {
424 # 340 d<<=i;
425 sll $16, $16, $3
426 # 341 h=(h<<i)|(l>>(BN_BITS2-i));
427 sll $24, $9, $3
428 subu $25, $4, $3
429 srl $14, $12, $25
430 or $9, $24, $14
431 # 342 l<<=i;
432 sll $12, $12, $3
433 # 343 }
434$83:
435 # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
436 # 345 dl=(d&BN_MASK2l);
437 and $8, $16, -65536
438 srl $8, $8, 16
439 and $10, $16, 65535
440 li $6, -65536
441$84:
442 # 346 for (;;)
443 # 347 {
444 # 348 if ((h>>BN_BITS4) == dh)
445 srl $15, $9, 16
446 bne $8, $15, $85
447 # 349 q=BN_MASK2l;
448 li $5, 65535
449 b $86
450$85:
451 # 350 else
452 # 351 q=h/dh;
453 divu $5, $9, $8
454$86:
455 # 352
456 # 353 for (;;)
457 # 354 {
458 # 355 t=(h-q*dh);
459 mul $4, $5, $8
460 subu $2, $9, $4
461 move $3, $2
462 # 356 if ((t&BN_MASK2h) ||
463 # 357 ((dl*q) <= (
464 # 358 (t<<BN_BITS4)+
465 # 359 ((l&BN_MASK2h)>>BN_BITS4))))
466 and $25, $2, $6
467 bne $25, $0, $87
468 mul $24, $10, $5
469 sll $14, $3, 16
470 and $15, $12, $6
471 srl $25, $15, 16
472 addu $15, $14, $25
473 bgtu $24, $15, $88
474$87:
475 # 360 break;
476 mul $3, $10, $5
477 b $89
478$88:
479 # 361 q--;
480 addu $5, $5, -1
481 # 362 }
482 b $86
483$89:
484 # 363 th=q*dh;
485 # 364 tl=q*dl;
486 # 365 t=(tl>>BN_BITS4);
487 # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
488 sll $14, $3, 16
489 and $2, $14, $6
490 move $11, $2
491 # 367 th+=t;
492 srl $25, $3, 16
493 addu $7, $4, $25
494 # 368
495 # 369 if (l < tl) th++;
496 bgeu $12, $2, $90
497 addu $7, $7, 1
498$90:
499 # 370 l-=tl;
500 subu $12, $12, $11
501 # 371 if (h < th)
502 bgeu $9, $7, $91
503 # 372 {
504 # 373 h+=d;
505 addu $9, $9, $16
506 # 374 q--;
507 addu $5, $5, -1
508 # 375 }
509$91:
510 # 376 h-=th;
511 subu $9, $9, $7
512 # 377
513 # 378 if (--count == 0) break;
514 addu $13, $13, -1
515 beq $13, 0, $92
516 # 379
517 # 380 ret=q<<BN_BITS4;
518 sll $31, $5, 16
519 # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
520 sll $24, $9, 16
521 srl $15, $12, 16
522 or $9, $24, $15
523 # 382 l=(l&BN_MASK2l)<<BN_BITS4;
524 and $12, $12, 65535
525 sll $12, $12, 16
526 # 383 }
527 b $84
528$92:
529 # 384 ret|=q;
530 or $31, $31, $5
531 # 385 return(ret);
532 move $2, $31
533$93:
534 lw $16, 48($sp)
535 lw $31, 56($sp)
536 addu $sp, 64
537 j $31
538 .end bn_div64
539
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s
new file mode 100644
index 0000000000..dca4105c7d
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/mips3.s
@@ -0,0 +1,2201 @@
1.rdata
2.asciiz "mips3.s, Version 1.1"
3.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4
5/*
6 * ====================================================================
7 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
8 * project.
9 *
10 * Rights for redistribution and usage in source and binary forms are
11 * granted according to the OpenSSL license. Warranty of any kind is
12 * disclaimed.
13 * ====================================================================
14 */
15
16/*
17 * This is my modest contributon to the OpenSSL project (see
18 * http://www.openssl.org/ for more information about it) and is
19 * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
20 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 *
22 * The module is designed to work with either of the "new" MIPS ABI(5),
23 * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
24 * IRIX 5.x not only because it doesn't support new ABIs but also
25 * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
26 * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
27 * cause illegal instruction exception:-(
28 *
29 * In addition the code depends on preprocessor flags set up by MIPSpro
30 * compiler driver (either as or cc) and therefore (probably?) can't be
31 * compiled by the GNU assembler. GNU C driver manages fine though...
32 * I mean as long as -mmips-as is specified or is the default option,
33 * because then it simply invokes /usr/bin/as which in turn takes
34 * perfect care of the preprocessor definitions. Another neat feature
35 * offered by the MIPSpro assembler is an optimization pass. This gave
36 * me the opportunity to have the code looking more regular as all those
37 * architecture dependent instruction rescheduling details were left to
38 * the assembler. Cool, huh?
39 *
40 * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
41 * goes way over 3 times faster!
42 *
43 * <appro@fy.chalmers.se>
44 */
45#include <asm.h>
46#include <regdef.h>
47
48#if _MIPS_ISA>=4
49#define MOVNZ(cond,dst,src) \
50 movn dst,src,cond
51#else
52#define MOVNZ(cond,dst,src) \
53 .set noreorder; \
54 bnezl cond,.+8; \
55 move dst,src; \
56 .set reorder
57#endif
58
59.text
60
61.set noat
62.set reorder
63
64#define MINUS4 v1
65
66.align 5
67LEAF(bn_mul_add_words)
68 .set noreorder
69 bgtzl a2,.L_bn_mul_add_words_proceed
70 ld t0,0(a1)
71 jr ra
72 move v0,zero
73 .set reorder
74
75.L_bn_mul_add_words_proceed:
76 li MINUS4,-4
77 and ta0,a2,MINUS4
78 move v0,zero
79 beqz ta0,.L_bn_mul_add_words_tail
80
81.L_bn_mul_add_words_loop:
82 dmultu t0,a3
83 ld t1,0(a0)
84 ld t2,8(a1)
85 ld t3,8(a0)
86 ld ta0,16(a1)
87 ld ta1,16(a0)
88 daddu t1,v0
89 sltu v0,t1,v0 /* All manuals say it "compares 32-bit
90 * values", but it seems to work fine
91 * even on 64-bit registers. */
92 mflo AT
93 mfhi t0
94 daddu t1,AT
95 daddu v0,t0
96 sltu AT,t1,AT
97 sd t1,0(a0)
98 daddu v0,AT
99
100 dmultu t2,a3
101 ld ta2,24(a1)
102 ld ta3,24(a0)
103 daddu t3,v0
104 sltu v0,t3,v0
105 mflo AT
106 mfhi t2
107 daddu t3,AT
108 daddu v0,t2
109 sltu AT,t3,AT
110 sd t3,8(a0)
111 daddu v0,AT
112
113 dmultu ta0,a3
114 subu a2,4
115 PTR_ADD a0,32
116 PTR_ADD a1,32
117 daddu ta1,v0
118 sltu v0,ta1,v0
119 mflo AT
120 mfhi ta0
121 daddu ta1,AT
122 daddu v0,ta0
123 sltu AT,ta1,AT
124 sd ta1,-16(a0)
125 daddu v0,AT
126
127
128 dmultu ta2,a3
129 and ta0,a2,MINUS4
130 daddu ta3,v0
131 sltu v0,ta3,v0
132 mflo AT
133 mfhi ta2
134 daddu ta3,AT
135 daddu v0,ta2
136 sltu AT,ta3,AT
137 sd ta3,-8(a0)
138 daddu v0,AT
139 .set noreorder
140 bgtzl ta0,.L_bn_mul_add_words_loop
141 ld t0,0(a1)
142
143 bnezl a2,.L_bn_mul_add_words_tail
144 ld t0,0(a1)
145 .set reorder
146
147.L_bn_mul_add_words_return:
148 jr ra
149
150.L_bn_mul_add_words_tail:
151 dmultu t0,a3
152 ld t1,0(a0)
153 subu a2,1
154 daddu t1,v0
155 sltu v0,t1,v0
156 mflo AT
157 mfhi t0
158 daddu t1,AT
159 daddu v0,t0
160 sltu AT,t1,AT
161 sd t1,0(a0)
162 daddu v0,AT
163 beqz a2,.L_bn_mul_add_words_return
164
165 ld t0,8(a1)
166 dmultu t0,a3
167 ld t1,8(a0)
168 subu a2,1
169 daddu t1,v0
170 sltu v0,t1,v0
171 mflo AT
172 mfhi t0
173 daddu t1,AT
174 daddu v0,t0
175 sltu AT,t1,AT
176 sd t1,8(a0)
177 daddu v0,AT
178 beqz a2,.L_bn_mul_add_words_return
179
180 ld t0,16(a1)
181 dmultu t0,a3
182 ld t1,16(a0)
183 daddu t1,v0
184 sltu v0,t1,v0
185 mflo AT
186 mfhi t0
187 daddu t1,AT
188 daddu v0,t0
189 sltu AT,t1,AT
190 sd t1,16(a0)
191 daddu v0,AT
192 jr ra
193END(bn_mul_add_words)
194
195.align 5
196LEAF(bn_mul_words)
197 .set noreorder
198 bgtzl a2,.L_bn_mul_words_proceed
199 ld t0,0(a1)
200 jr ra
201 move v0,zero
202 .set reorder
203
204.L_bn_mul_words_proceed:
205 li MINUS4,-4
206 and ta0,a2,MINUS4
207 move v0,zero
208 beqz ta0,.L_bn_mul_words_tail
209
210.L_bn_mul_words_loop:
211 dmultu t0,a3
212 ld t2,8(a1)
213 ld ta0,16(a1)
214 ld ta2,24(a1)
215 mflo AT
216 mfhi t0
217 daddu v0,AT
218 sltu t1,v0,AT
219 sd v0,0(a0)
220 daddu v0,t1,t0
221
222 dmultu t2,a3
223 subu a2,4
224 PTR_ADD a0,32
225 PTR_ADD a1,32
226 mflo AT
227 mfhi t2
228 daddu v0,AT
229 sltu t3,v0,AT
230 sd v0,-24(a0)
231 daddu v0,t3,t2
232
233 dmultu ta0,a3
234 mflo AT
235 mfhi ta0
236 daddu v0,AT
237 sltu ta1,v0,AT
238 sd v0,-16(a0)
239 daddu v0,ta1,ta0
240
241
242 dmultu ta2,a3
243 and ta0,a2,MINUS4
244 mflo AT
245 mfhi ta2
246 daddu v0,AT
247 sltu ta3,v0,AT
248 sd v0,-8(a0)
249 daddu v0,ta3,ta2
250 .set noreorder
251 bgtzl ta0,.L_bn_mul_words_loop
252 ld t0,0(a1)
253
254 bnezl a2,.L_bn_mul_words_tail
255 ld t0,0(a1)
256 .set reorder
257
258.L_bn_mul_words_return:
259 jr ra
260
261.L_bn_mul_words_tail:
262 dmultu t0,a3
263 subu a2,1
264 mflo AT
265 mfhi t0
266 daddu v0,AT
267 sltu t1,v0,AT
268 sd v0,0(a0)
269 daddu v0,t1,t0
270 beqz a2,.L_bn_mul_words_return
271
272 ld t0,8(a1)
273 dmultu t0,a3
274 subu a2,1
275 mflo AT
276 mfhi t0
277 daddu v0,AT
278 sltu t1,v0,AT
279 sd v0,8(a0)
280 daddu v0,t1,t0
281 beqz a2,.L_bn_mul_words_return
282
283 ld t0,16(a1)
284 dmultu t0,a3
285 mflo AT
286 mfhi t0
287 daddu v0,AT
288 sltu t1,v0,AT
289 sd v0,16(a0)
290 daddu v0,t1,t0
291 jr ra
292END(bn_mul_words)
293
294.align 5
295LEAF(bn_sqr_words)
296 .set noreorder
297 bgtzl a2,.L_bn_sqr_words_proceed
298 ld t0,0(a1)
299 jr ra
300 move v0,zero
301 .set reorder
302
303.L_bn_sqr_words_proceed:
304 li MINUS4,-4
305 and ta0,a2,MINUS4
306 move v0,zero
307 beqz ta0,.L_bn_sqr_words_tail
308
309.L_bn_sqr_words_loop:
310 dmultu t0,t0
311 ld t2,8(a1)
312 ld ta0,16(a1)
313 ld ta2,24(a1)
314 mflo t1
315 mfhi t0
316 sd t1,0(a0)
317 sd t0,8(a0)
318
319 dmultu t2,t2
320 subu a2,4
321 PTR_ADD a0,64
322 PTR_ADD a1,32
323 mflo t3
324 mfhi t2
325 sd t3,-48(a0)
326 sd t2,-40(a0)
327
328 dmultu ta0,ta0
329 mflo ta1
330 mfhi ta0
331 sd ta1,-32(a0)
332 sd ta0,-24(a0)
333
334
335 dmultu ta2,ta2
336 and ta0,a2,MINUS4
337 mflo ta3
338 mfhi ta2
339 sd ta3,-16(a0)
340 sd ta2,-8(a0)
341
342 .set noreorder
343 bgtzl ta0,.L_bn_sqr_words_loop
344 ld t0,0(a1)
345
346 bnezl a2,.L_bn_sqr_words_tail
347 ld t0,0(a1)
348 .set reorder
349
350.L_bn_sqr_words_return:
351 move v0,zero
352 jr ra
353
354.L_bn_sqr_words_tail:
355 dmultu t0,t0
356 subu a2,1
357 mflo t1
358 mfhi t0
359 sd t1,0(a0)
360 sd t0,8(a0)
361 beqz a2,.L_bn_sqr_words_return
362
363 ld t0,8(a1)
364 dmultu t0,t0
365 subu a2,1
366 mflo t1
367 mfhi t0
368 sd t1,16(a0)
369 sd t0,24(a0)
370 beqz a2,.L_bn_sqr_words_return
371
372 ld t0,16(a1)
373 dmultu t0,t0
374 mflo t1
375 mfhi t0
376 sd t1,32(a0)
377 sd t0,40(a0)
378 jr ra
379END(bn_sqr_words)
380
381.align 5
382LEAF(bn_add_words)
383 .set noreorder
384 bgtzl a3,.L_bn_add_words_proceed
385 ld t0,0(a1)
386 jr ra
387 move v0,zero
388 .set reorder
389
390.L_bn_add_words_proceed:
391 li MINUS4,-4
392 and AT,a3,MINUS4
393 move v0,zero
394 beqz AT,.L_bn_add_words_tail
395
396.L_bn_add_words_loop:
397 ld ta0,0(a2)
398 subu a3,4
399 ld t1,8(a1)
400 and AT,a3,MINUS4
401 ld t2,16(a1)
402 PTR_ADD a2,32
403 ld t3,24(a1)
404 PTR_ADD a0,32
405 ld ta1,-24(a2)
406 PTR_ADD a1,32
407 ld ta2,-16(a2)
408 ld ta3,-8(a2)
409 daddu ta0,t0
410 sltu t8,ta0,t0
411 daddu t0,ta0,v0
412 sltu v0,t0,ta0
413 sd t0,-32(a0)
414 daddu v0,t8
415
416 daddu ta1,t1
417 sltu t9,ta1,t1
418 daddu t1,ta1,v0
419 sltu v0,t1,ta1
420 sd t1,-24(a0)
421 daddu v0,t9
422
423 daddu ta2,t2
424 sltu t8,ta2,t2
425 daddu t2,ta2,v0
426 sltu v0,t2,ta2
427 sd t2,-16(a0)
428 daddu v0,t8
429
430 daddu ta3,t3
431 sltu t9,ta3,t3
432 daddu t3,ta3,v0
433 sltu v0,t3,ta3
434 sd t3,-8(a0)
435 daddu v0,t9
436
437 .set noreorder
438 bgtzl AT,.L_bn_add_words_loop
439 ld t0,0(a1)
440
441 bnezl a3,.L_bn_add_words_tail
442 ld t0,0(a1)
443 .set reorder
444
445.L_bn_add_words_return:
446 jr ra
447
448.L_bn_add_words_tail:
449 ld ta0,0(a2)
450 daddu ta0,t0
451 subu a3,1
452 sltu t8,ta0,t0
453 daddu t0,ta0,v0
454 sltu v0,t0,ta0
455 sd t0,0(a0)
456 daddu v0,t8
457 beqz a3,.L_bn_add_words_return
458
459 ld t1,8(a1)
460 ld ta1,8(a2)
461 daddu ta1,t1
462 subu a3,1
463 sltu t9,ta1,t1
464 daddu t1,ta1,v0
465 sltu v0,t1,ta1
466 sd t1,8(a0)
467 daddu v0,t9
468 beqz a3,.L_bn_add_words_return
469
470 ld t2,16(a1)
471 ld ta2,16(a2)
472 daddu ta2,t2
473 sltu t8,ta2,t2
474 daddu t2,ta2,v0
475 sltu v0,t2,ta2
476 sd t2,16(a0)
477 daddu v0,t8
478 jr ra
479END(bn_add_words)
480
481.align 5
482LEAF(bn_sub_words)
483 .set noreorder
484 bgtzl a3,.L_bn_sub_words_proceed
485 ld t0,0(a1)
486 jr ra
487 move v0,zero
488 .set reorder
489
490.L_bn_sub_words_proceed:
491 li MINUS4,-4
492 and AT,a3,MINUS4
493 move v0,zero
494 beqz AT,.L_bn_sub_words_tail
495
496.L_bn_sub_words_loop:
497 ld ta0,0(a2)
498 subu a3,4
499 ld t1,8(a1)
500 and AT,a3,MINUS4
501 ld t2,16(a1)
502 PTR_ADD a2,32
503 ld t3,24(a1)
504 PTR_ADD a0,32
505 ld ta1,-24(a2)
506 PTR_ADD a1,32
507 ld ta2,-16(a2)
508 ld ta3,-8(a2)
509 sltu t8,t0,ta0
510 dsubu t0,ta0
511 dsubu ta0,t0,v0
512 sd ta0,-32(a0)
513 MOVNZ (t0,v0,t8)
514
515 sltu t9,t1,ta1
516 dsubu t1,ta1
517 dsubu ta1,t1,v0
518 sd ta1,-24(a0)
519 MOVNZ (t1,v0,t9)
520
521
522 sltu t8,t2,ta2
523 dsubu t2,ta2
524 dsubu ta2,t2,v0
525 sd ta2,-16(a0)
526 MOVNZ (t2,v0,t8)
527
528 sltu t9,t3,ta3
529 dsubu t3,ta3
530 dsubu ta3,t3,v0
531 sd ta3,-8(a0)
532 MOVNZ (t3,v0,t9)
533
534 .set noreorder
535 bgtzl AT,.L_bn_sub_words_loop
536 ld t0,0(a1)
537
538 bnezl a3,.L_bn_sub_words_tail
539 ld t0,0(a1)
540 .set reorder
541
542.L_bn_sub_words_return:
543 jr ra
544
545.L_bn_sub_words_tail:
546 ld ta0,0(a2)
547 subu a3,1
548 sltu t8,t0,ta0
549 dsubu t0,ta0
550 dsubu ta0,t0,v0
551 MOVNZ (t0,v0,t8)
552 sd ta0,0(a0)
553 beqz a3,.L_bn_sub_words_return
554
555 ld t1,8(a1)
556 subu a3,1
557 ld ta1,8(a2)
558 sltu t9,t1,ta1
559 dsubu t1,ta1
560 dsubu ta1,t1,v0
561 MOVNZ (t1,v0,t9)
562 sd ta1,8(a0)
563 beqz a3,.L_bn_sub_words_return
564
565 ld t2,16(a1)
566 ld ta2,16(a2)
567 sltu t8,t2,ta2
568 dsubu t2,ta2
569 dsubu ta2,t2,v0
570 MOVNZ (t2,v0,t8)
571 sd ta2,16(a0)
572 jr ra
573END(bn_sub_words)
574
575#undef MINUS4
576
577.align 5
578LEAF(bn_div_3_words)
579 .set reorder
580 move a3,a0 /* we know that bn_div_words doesn't
581 * touch a3, ta2, ta3 and preserves a2
582 * so that we can save two arguments
583 * and return address in registers
584 * instead of stack:-)
585 */
586 ld a0,(a3)
587 move ta2,a1
588 ld a1,-8(a3)
589 bne a0,a2,.L_bn_div_3_words_proceed
590 li v0,-1
591 jr ra
592.L_bn_div_3_words_proceed:
593 move ta3,ra
594 bal bn_div_words
595 move ra,ta3
596 dmultu ta2,v0
597 ld t2,-16(a3)
598 move ta0,zero
599 mfhi t1
600 mflo t0
601 sltu t8,t1,v1
602.L_bn_div_3_words_inner_loop:
603 bnez t8,.L_bn_div_3_words_inner_loop_done
604 sgeu AT,t2,t0
605 seq t9,t1,v1
606 and AT,t9
607 sltu t3,t0,ta2
608 daddu v1,a2
609 dsubu t1,t3
610 dsubu t0,ta2
611 sltu t8,t1,v1
612 sltu ta0,v1,a2
613 or t8,ta0
614 .set noreorder
615 beqzl AT,.L_bn_div_3_words_inner_loop
616 dsubu v0,1
617 .set reorder
618.L_bn_div_3_words_inner_loop_done:
619 jr ra
620END(bn_div_3_words)
621
622.align 5
623LEAF(bn_div_words)
624 .set noreorder
625 bnezl a2,.L_bn_div_words_proceed
626 move v1,zero
627 jr ra
628 li v0,-1 /* I'd rather signal div-by-zero
629 * which can be done with 'break 7' */
630
631.L_bn_div_words_proceed:
632 bltz a2,.L_bn_div_words_body
633 move t9,v1
634 dsll a2,1
635 bgtz a2,.-4
636 addu t9,1
637
638 .set reorder
639 negu t1,t9
640 li t2,-1
641 dsll t2,t1
642 and t2,a0
643 dsrl AT,a1,t1
644 .set noreorder
645 bnezl t2,.+8
646 break 6 /* signal overflow */
647 .set reorder
648 dsll a0,t9
649 dsll a1,t9
650 or a0,AT
651
652#define QT ta0
653#define HH ta1
654#define DH v1
655.L_bn_div_words_body:
656 dsrl DH,a2,32
657 sgeu AT,a0,a2
658 .set noreorder
659 bnezl AT,.+8
660 dsubu a0,a2
661 .set reorder
662
663 li QT,-1
664 dsrl HH,a0,32
665 dsrl QT,32 /* q=0xffffffff */
666 beq DH,HH,.L_bn_div_words_skip_div1
667 ddivu zero,a0,DH
668 mflo QT
669.L_bn_div_words_skip_div1:
670 dmultu a2,QT
671 dsll t3,a0,32
672 dsrl AT,a1,32
673 or t3,AT
674 mflo t0
675 mfhi t1
676.L_bn_div_words_inner_loop1:
677 sltu t2,t3,t0
678 seq t8,HH,t1
679 sltu AT,HH,t1
680 and t2,t8
681 sltu v0,t0,a2
682 or AT,t2
683 .set noreorder
684 beqz AT,.L_bn_div_words_inner_loop1_done
685 dsubu t1,v0
686 dsubu t0,a2
687 b .L_bn_div_words_inner_loop1
688 dsubu QT,1
689 .set reorder
690.L_bn_div_words_inner_loop1_done:
691
692 dsll a1,32
693 dsubu a0,t3,t0
694 dsll v0,QT,32
695
696 li QT,-1
697 dsrl HH,a0,32
698 dsrl QT,32 /* q=0xffffffff */
699 beq DH,HH,.L_bn_div_words_skip_div2
700 ddivu zero,a0,DH
701 mflo QT
702.L_bn_div_words_skip_div2:
703#undef DH
704 dmultu a2,QT
705 dsll t3,a0,32
706 dsrl AT,a1,32
707 or t3,AT
708 mflo t0
709 mfhi t1
710.L_bn_div_words_inner_loop2:
711 sltu t2,t3,t0
712 seq t8,HH,t1
713 sltu AT,HH,t1
714 and t2,t8
715 sltu v1,t0,a2
716 or AT,t2
717 .set noreorder
718 beqz AT,.L_bn_div_words_inner_loop2_done
719 dsubu t1,v1
720 dsubu t0,a2
721 b .L_bn_div_words_inner_loop2
722 dsubu QT,1
723 .set reorder
724.L_bn_div_words_inner_loop2_done:
725#undef HH
726
727 dsubu a0,t3,t0
728 or v0,QT
729 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
730 dsrl a2,t9 /* restore a2 */
731 jr ra
732#undef QT
733END(bn_div_words)
734
735#define a_0 t0
736#define a_1 t1
737#define a_2 t2
738#define a_3 t3
739#define b_0 ta0
740#define b_1 ta1
741#define b_2 ta2
742#define b_3 ta3
743
744#define a_4 s0
745#define a_5 s2
746#define a_6 s4
747#define a_7 a1 /* once we load a[7] we don't need a anymore */
748#define b_4 s1
749#define b_5 s3
750#define b_6 s5
751#define b_7 a2 /* once we load b[7] we don't need b anymore */
752
753#define t_1 t8
754#define t_2 t9
755
756#define c_1 v0
757#define c_2 v1
758#define c_3 a3
759
760#define FRAME_SIZE 48
761
762.align 5
763LEAF(bn_mul_comba8)
764 .set noreorder
765 PTR_SUB sp,FRAME_SIZE
766 .frame sp,64,ra
767 .set reorder
768 ld a_0,0(a1) /* If compiled with -mips3 option on
769 * R5000 box assembler barks on this
770 * line with "shouldn't have mult/div
771 * as last instruction in bb (R10K
772 * bug)" warning. If anybody out there
773 * has a clue about how to circumvent
774 * this do send me a note.
775 * <appro@fy.chalmers.se>
776 */
777 ld b_0,0(a2)
778 ld a_1,8(a1)
779 ld a_2,16(a1)
780 ld a_3,24(a1)
781 ld b_1,8(a2)
782 ld b_2,16(a2)
783 ld b_3,24(a2)
784 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
785 sd s0,0(sp)
786 sd s1,8(sp)
787 sd s2,16(sp)
788 sd s3,24(sp)
789 sd s4,32(sp)
790 sd s5,40(sp)
791 mflo c_1
792 mfhi c_2
793
794 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
795 ld a_4,32(a1)
796 ld a_5,40(a1)
797 ld a_6,48(a1)
798 ld a_7,56(a1)
799 ld b_4,32(a2)
800 ld b_5,40(a2)
801 mflo t_1
802 mfhi t_2
803 daddu c_2,t_1
804 sltu AT,c_2,t_1
805 daddu c_3,t_2,AT
806 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
807 ld b_6,48(a2)
808 ld b_7,56(a2)
809 sd c_1,0(a0) /* r[0]=c1; */
810 mflo t_1
811 mfhi t_2
812 daddu c_2,t_1
813 sltu AT,c_2,t_1
814 daddu t_2,AT
815 daddu c_3,t_2
816 sltu c_1,c_3,t_2
817 sd c_2,8(a0) /* r[1]=c2; */
818
819 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
820 mflo t_1
821 mfhi t_2
822 daddu c_3,t_1
823 sltu AT,c_3,t_1
824 daddu t_2,AT
825 daddu c_1,t_2
826 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
827 mflo t_1
828 mfhi t_2
829 daddu c_3,t_1
830 sltu AT,c_3,t_1
831 daddu t_2,AT
832 daddu c_1,t_2
833 sltu c_2,c_1,t_2
834 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
835 mflo t_1
836 mfhi t_2
837 daddu c_3,t_1
838 sltu AT,c_3,t_1
839 daddu t_2,AT
840 daddu c_1,t_2
841 sltu AT,c_1,t_2
842 daddu c_2,AT
843 sd c_3,16(a0) /* r[2]=c3; */
844
845 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
846 mflo t_1
847 mfhi t_2
848 daddu c_1,t_1
849 sltu AT,c_1,t_1
850 daddu t_2,AT
851 daddu c_2,t_2
852 sltu c_3,c_2,t_2
853 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
854 mflo t_1
855 mfhi t_2
856 daddu c_1,t_1
857 sltu AT,c_1,t_1
858 daddu t_2,AT
859 daddu c_2,t_2
860 sltu AT,c_2,t_2
861 daddu c_3,AT
862 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
863 mflo t_1
864 mfhi t_2
865 daddu c_1,t_1
866 sltu AT,c_1,t_1
867 daddu t_2,AT
868 daddu c_2,t_2
869 sltu AT,c_2,t_2
870 daddu c_3,AT
871 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
872 mflo t_1
873 mfhi t_2
874 daddu c_1,t_1
875 sltu AT,c_1,t_1
876 daddu t_2,AT
877 daddu c_2,t_2
878 sltu AT,c_2,t_2
879 daddu c_3,AT
880 sd c_1,24(a0) /* r[3]=c1; */
881
882 dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
883 mflo t_1
884 mfhi t_2
885 daddu c_2,t_1
886 sltu AT,c_2,t_1
887 daddu t_2,AT
888 daddu c_3,t_2
889 sltu c_1,c_3,t_2
890 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
891 mflo t_1
892 mfhi t_2
893 daddu c_2,t_1
894 sltu AT,c_2,t_1
895 daddu t_2,AT
896 daddu c_3,t_2
897 sltu AT,c_3,t_2
898 daddu c_1,AT
899 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
900 mflo t_1
901 mfhi t_2
902 daddu c_2,t_1
903 sltu AT,c_2,t_1
904 daddu t_2,AT
905 daddu c_3,t_2
906 sltu AT,c_3,t_2
907 daddu c_1,AT
908 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
909 mflo t_1
910 mfhi t_2
911 daddu c_2,t_1
912 sltu AT,c_2,t_1
913 daddu t_2,AT
914 daddu c_3,t_2
915 sltu AT,c_3,t_2
916 daddu c_1,AT
917 dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
918 mflo t_1
919 mfhi t_2
920 daddu c_2,t_1
921 sltu AT,c_2,t_1
922 daddu t_2,AT
923 daddu c_3,t_2
924 sltu AT,c_3,t_2
925 daddu c_1,AT
926 sd c_2,32(a0) /* r[4]=c2; */
927
928 dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
929 mflo t_1
930 mfhi t_2
931 daddu c_3,t_1
932 sltu AT,c_3,t_1
933 daddu t_2,AT
934 daddu c_1,t_2
935 sltu c_2,c_1,t_2
936 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
937 mflo t_1
938 mfhi t_2
939 daddu c_3,t_1
940 sltu AT,c_3,t_1
941 daddu t_2,AT
942 daddu c_1,t_2
943 sltu AT,c_1,t_2
944 daddu c_2,AT
945 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
946 mflo t_1
947 mfhi t_2
948 daddu c_3,t_1
949 sltu AT,c_3,t_1
950 daddu t_2,AT
951 daddu c_1,t_2
952 sltu AT,c_1,t_2
953 daddu c_2,AT
954 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
955 mflo t_1
956 mfhi t_2
957 daddu c_3,t_1
958 sltu AT,c_3,t_1
959 daddu t_2,AT
960 daddu c_1,t_2
961 sltu AT,c_1,t_2
962 daddu c_2,AT
963 dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
964 mflo t_1
965 mfhi t_2
966 daddu c_3,t_1
967 sltu AT,c_3,t_1
968 daddu t_2,AT
969 daddu c_1,t_2
970 sltu AT,c_1,t_2
971 daddu c_2,AT
972 dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
973 mflo t_1
974 mfhi t_2
975 daddu c_3,t_1
976 sltu AT,c_3,t_1
977 daddu t_2,AT
978 daddu c_1,t_2
979 sltu AT,c_1,t_2
980 daddu c_2,AT
981 sd c_3,40(a0) /* r[5]=c3; */
982
983 dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
984 mflo t_1
985 mfhi t_2
986 daddu c_1,t_1
987 sltu AT,c_1,t_1
988 daddu t_2,AT
989 daddu c_2,t_2
990 sltu c_3,c_2,t_2
991 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
992 mflo t_1
993 mfhi t_2
994 daddu c_1,t_1
995 sltu AT,c_1,t_1
996 daddu t_2,AT
997 daddu c_2,t_2
998 sltu AT,c_2,t_2
999 daddu c_3,AT
1000 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
1001 mflo t_1
1002 mfhi t_2
1003 daddu c_1,t_1
1004 sltu AT,c_1,t_1
1005 daddu t_2,AT
1006 daddu c_2,t_2
1007 sltu AT,c_2,t_2
1008 daddu c_3,AT
1009 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1010 mflo t_1
1011 mfhi t_2
1012 daddu c_1,t_1
1013 sltu AT,c_1,t_1
1014 daddu t_2,AT
1015 daddu c_2,t_2
1016 sltu AT,c_2,t_2
1017 daddu c_3,AT
1018 dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
1019 mflo t_1
1020 mfhi t_2
1021 daddu c_1,t_1
1022 sltu AT,c_1,t_1
1023 daddu t_2,AT
1024 daddu c_2,t_2
1025 sltu AT,c_2,t_2
1026 daddu c_3,AT
1027 dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
1028 mflo t_1
1029 mfhi t_2
1030 daddu c_1,t_1
1031 sltu AT,c_1,t_1
1032 daddu t_2,AT
1033 daddu c_2,t_2
1034 sltu AT,c_2,t_2
1035 daddu c_3,AT
1036 dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
1037 mflo t_1
1038 mfhi t_2
1039 daddu c_1,t_1
1040 sltu AT,c_1,t_1
1041 daddu t_2,AT
1042 daddu c_2,t_2
1043 sltu AT,c_2,t_2
1044 daddu c_3,AT
1045 sd c_1,48(a0) /* r[6]=c1; */
1046
1047 dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
1048 mflo t_1
1049 mfhi t_2
1050 daddu c_2,t_1
1051 sltu AT,c_2,t_1
1052 daddu t_2,AT
1053 daddu c_3,t_2
1054 sltu c_1,c_3,t_2
1055 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1056 mflo t_1
1057 mfhi t_2
1058 daddu c_2,t_1
1059 sltu AT,c_2,t_1
1060 daddu t_2,AT
1061 daddu c_3,t_2
1062 sltu AT,c_3,t_2
1063 daddu c_1,AT
1064 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1065 mflo t_1
1066 mfhi t_2
1067 daddu c_2,t_1
1068 sltu AT,c_2,t_1
1069 daddu t_2,AT
1070 daddu c_3,t_2
1071 sltu AT,c_3,t_2
1072 daddu c_1,AT
1073 dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
1074 mflo t_1
1075 mfhi t_2
1076 daddu c_2,t_1
1077 sltu AT,c_2,t_1
1078 daddu t_2,AT
1079 daddu c_3,t_2
1080 sltu AT,c_3,t_2
1081 daddu c_1,AT
1082 dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
1083 mflo t_1
1084 mfhi t_2
1085 daddu c_2,t_1
1086 sltu AT,c_2,t_1
1087 daddu t_2,AT
1088 daddu c_3,t_2
1089 sltu AT,c_3,t_2
1090 daddu c_1,AT
1091 dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
1092 mflo t_1
1093 mfhi t_2
1094 daddu c_2,t_1
1095 sltu AT,c_2,t_1
1096 daddu t_2,AT
1097 daddu c_3,t_2
1098 sltu AT,c_3,t_2
1099 daddu c_1,AT
1100 dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
1101 mflo t_1
1102 mfhi t_2
1103 daddu c_2,t_1
1104 sltu AT,c_2,t_1
1105 daddu t_2,AT
1106 daddu c_3,t_2
1107 sltu AT,c_3,t_2
1108 daddu c_1,AT
1109 dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
1110 mflo t_1
1111 mfhi t_2
1112 daddu c_2,t_1
1113 sltu AT,c_2,t_1
1114 daddu t_2,AT
1115 daddu c_3,t_2
1116 sltu AT,c_3,t_2
1117 daddu c_1,AT
1118 sd c_2,56(a0) /* r[7]=c2; */
1119
1120 dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
1121 mflo t_1
1122 mfhi t_2
1123 daddu c_3,t_1
1124 sltu AT,c_3,t_1
1125 daddu t_2,AT
1126 daddu c_1,t_2
1127 sltu c_2,c_1,t_2
1128 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1129 mflo t_1
1130 mfhi t_2
1131 daddu c_3,t_1
1132 sltu AT,c_3,t_1
1133 daddu t_2,AT
1134 daddu c_1,t_2
1135 sltu AT,c_1,t_2
1136 daddu c_2,AT
1137 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1138 mflo t_1
1139 mfhi t_2
1140 daddu c_3,t_1
1141 sltu AT,c_3,t_1
1142 daddu t_2,AT
1143 daddu c_1,t_2
1144 sltu AT,c_1,t_2
1145 daddu c_2,AT
1146 dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1147 mflo t_1
1148 mfhi t_2
1149 daddu c_3,t_1
1150 sltu AT,c_3,t_1
1151 daddu t_2,AT
1152 daddu c_1,t_2
1153 sltu AT,c_1,t_2
1154 daddu c_2,AT
1155 dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
1156 mflo t_1
1157 mfhi t_2
1158 daddu c_3,t_1
1159 sltu AT,c_3,t_1
1160 daddu t_2,AT
1161 daddu c_1,t_2
1162 sltu AT,c_1,t_2
1163 daddu c_2,AT
1164 dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
1165 mflo t_1
1166 mfhi t_2
1167 daddu c_3,t_1
1168 sltu AT,c_3,t_1
1169 daddu t_2,AT
1170 daddu c_1,t_2
1171 sltu AT,c_1,t_2
1172 daddu c_2,AT
1173 dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
1174 mflo t_1
1175 mfhi t_2
1176 daddu c_3,t_1
1177 sltu AT,c_3,t_1
1178 daddu t_2,AT
1179 daddu c_1,t_2
1180 sltu AT,c_1,t_2
1181 daddu c_2,AT
1182 sd c_3,64(a0) /* r[8]=c3; */
1183
1184 dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
1185 mflo t_1
1186 mfhi t_2
1187 daddu c_1,t_1
1188 sltu AT,c_1,t_1
1189 daddu t_2,AT
1190 daddu c_2,t_2
1191 sltu c_3,c_2,t_2
1192 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1193 mflo t_1
1194 mfhi t_2
1195 daddu c_1,t_1
1196 sltu AT,c_1,t_1
1197 daddu t_2,AT
1198 daddu c_2,t_2
1199 sltu AT,c_2,t_2
1200 daddu c_3,AT
1201 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1202 mflo t_1
1203 mfhi t_2
1204 daddu c_1,t_1
1205 sltu AT,c_1,t_1
1206 daddu t_2,AT
1207 daddu c_2,t_2
1208 sltu AT,c_2,t_2
1209 daddu c_3,AT
1210 dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
1211 mflo t_1
1212 mfhi t_2
1213 daddu c_1,t_1
1214 sltu AT,c_1,t_1
1215 daddu t_2,AT
1216 daddu c_2,t_2
1217 sltu AT,c_2,t_2
1218 daddu c_3,AT
1219 dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
1220 mflo t_1
1221 mfhi t_2
1222 daddu c_1,t_1
1223 sltu AT,c_1,t_1
1224 daddu t_2,AT
1225 daddu c_2,t_2
1226 sltu AT,c_2,t_2
1227 daddu c_3,AT
1228 dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
1229 mflo t_1
1230 mfhi t_2
1231 daddu c_1,t_1
1232 sltu AT,c_1,t_1
1233 daddu t_2,AT
1234 daddu c_2,t_2
1235 sltu AT,c_2,t_2
1236 daddu c_3,AT
1237 sd c_1,72(a0) /* r[9]=c1; */
1238
1239 dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
1240 mflo t_1
1241 mfhi t_2
1242 daddu c_2,t_1
1243 sltu AT,c_2,t_1
1244 daddu t_2,AT
1245 daddu c_3,t_2
1246 sltu c_1,c_3,t_2
1247 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1248 mflo t_1
1249 mfhi t_2
1250 daddu c_2,t_1
1251 sltu AT,c_2,t_1
1252 daddu t_2,AT
1253 daddu c_3,t_2
1254 sltu AT,c_3,t_2
1255 daddu c_1,AT
1256 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1257 mflo t_1
1258 mfhi t_2
1259 daddu c_2,t_1
1260 sltu AT,c_2,t_1
1261 daddu t_2,AT
1262 daddu c_3,t_2
1263 sltu AT,c_3,t_2
1264 daddu c_1,AT
1265 dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
1266 mflo t_1
1267 mfhi t_2
1268 daddu c_2,t_1
1269 sltu AT,c_2,t_1
1270 daddu t_2,AT
1271 daddu c_3,t_2
1272 sltu AT,c_3,t_2
1273 daddu c_1,AT
1274 dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
1275 mflo t_1
1276 mfhi t_2
1277 daddu c_2,t_1
1278 sltu AT,c_2,t_1
1279 daddu t_2,AT
1280 daddu c_3,t_2
1281 sltu AT,c_3,t_2
1282 daddu c_1,AT
1283 sd c_2,80(a0) /* r[10]=c2; */
1284
1285 dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
1286 mflo t_1
1287 mfhi t_2
1288 daddu c_3,t_1
1289 sltu AT,c_3,t_1
1290 daddu t_2,AT
1291 daddu c_1,t_2
1292 sltu c_2,c_1,t_2
1293 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
1294 mflo t_1
1295 mfhi t_2
1296 daddu c_3,t_1
1297 sltu AT,c_3,t_1
1298 daddu t_2,AT
1299 daddu c_1,t_2
1300 sltu AT,c_1,t_2
1301 daddu c_2,AT
1302 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
1303 mflo t_1
1304 mfhi t_2
1305 daddu c_3,t_1
1306 sltu AT,c_3,t_1
1307 daddu t_2,AT
1308 daddu c_1,t_2
1309 sltu AT,c_1,t_2
1310 daddu c_2,AT
1311 dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
1312 mflo t_1
1313 mfhi t_2
1314 daddu c_3,t_1
1315 sltu AT,c_3,t_1
1316 daddu t_2,AT
1317 daddu c_1,t_2
1318 sltu AT,c_1,t_2
1319 daddu c_2,AT
1320 sd c_3,88(a0) /* r[11]=c3; */
1321
1322 dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
1323 mflo t_1
1324 mfhi t_2
1325 daddu c_1,t_1
1326 sltu AT,c_1,t_1
1327 daddu t_2,AT
1328 daddu c_2,t_2
1329 sltu c_3,c_2,t_2
1330 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1331 mflo t_1
1332 mfhi t_2
1333 daddu c_1,t_1
1334 sltu AT,c_1,t_1
1335 daddu t_2,AT
1336 daddu c_2,t_2
1337 sltu AT,c_2,t_2
1338 daddu c_3,AT
1339 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
1340 mflo t_1
1341 mfhi t_2
1342 daddu c_1,t_1
1343 sltu AT,c_1,t_1
1344 daddu t_2,AT
1345 daddu c_2,t_2
1346 sltu AT,c_2,t_2
1347 daddu c_3,AT
1348 sd c_1,96(a0) /* r[12]=c1; */
1349
1350 dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
1351 mflo t_1
1352 mfhi t_2
1353 daddu c_2,t_1
1354 sltu AT,c_2,t_1
1355 daddu t_2,AT
1356 daddu c_3,t_2
1357 sltu c_1,c_3,t_2
1358 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
1359 mflo t_1
1360 mfhi t_2
1361 daddu c_2,t_1
1362 sltu AT,c_2,t_1
1363 daddu t_2,AT
1364 daddu c_3,t_2
1365 sltu AT,c_3,t_2
1366 daddu c_1,AT
1367 sd c_2,104(a0) /* r[13]=c2; */
1368
1369 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
1370 ld s0,0(sp)
1371 ld s1,8(sp)
1372 ld s2,16(sp)
1373 ld s3,24(sp)
1374 ld s4,32(sp)
1375 ld s5,40(sp)
1376 mflo t_1
1377 mfhi t_2
1378 daddu c_3,t_1
1379 sltu AT,c_3,t_1
1380 daddu t_2,AT
1381 daddu c_1,t_2
1382 sd c_3,112(a0) /* r[14]=c3; */
1383 sd c_1,120(a0) /* r[15]=c1; */
1384
1385 PTR_ADD sp,FRAME_SIZE
1386
1387 jr ra
1388END(bn_mul_comba8)
1389
1390.align 5
1391LEAF(bn_mul_comba4)
1392 .set reorder
1393 ld a_0,0(a1)
1394 ld b_0,0(a2)
1395 ld a_1,8(a1)
1396 ld a_2,16(a1)
1397 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1398 ld a_3,24(a1)
1399 ld b_1,8(a2)
1400 ld b_2,16(a2)
1401 ld b_3,24(a2)
1402 mflo c_1
1403 mfhi c_2
1404 sd c_1,0(a0)
1405
1406 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
1407 mflo t_1
1408 mfhi t_2
1409 daddu c_2,t_1
1410 sltu AT,c_2,t_1
1411 daddu c_3,t_2,AT
1412 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
1413 mflo t_1
1414 mfhi t_2
1415 daddu c_2,t_1
1416 sltu AT,c_2,t_1
1417 daddu t_2,AT
1418 daddu c_3,t_2
1419 sltu c_1,c_3,t_2
1420 sd c_2,8(a0)
1421
1422 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
1423 mflo t_1
1424 mfhi t_2
1425 daddu c_3,t_1
1426 sltu AT,c_3,t_1
1427 daddu t_2,AT
1428 daddu c_1,t_2
1429 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1430 mflo t_1
1431 mfhi t_2
1432 daddu c_3,t_1
1433 sltu AT,c_3,t_1
1434 daddu t_2,AT
1435 daddu c_1,t_2
1436 sltu c_2,c_1,t_2
1437 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
1438 mflo t_1
1439 mfhi t_2
1440 daddu c_3,t_1
1441 sltu AT,c_3,t_1
1442 daddu t_2,AT
1443 daddu c_1,t_2
1444 sltu AT,c_1,t_2
1445 daddu c_2,AT
1446 sd c_3,16(a0)
1447
1448 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
1449 mflo t_1
1450 mfhi t_2
1451 daddu c_1,t_1
1452 sltu AT,c_1,t_1
1453 daddu t_2,AT
1454 daddu c_2,t_2
1455 sltu c_3,c_2,t_2
1456 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1457 mflo t_1
1458 mfhi t_2
1459 daddu c_1,t_1
1460 sltu AT,c_1,t_1
1461 daddu t_2,AT
1462 daddu c_2,t_2
1463 sltu AT,c_2,t_2
1464 daddu c_3,AT
1465 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1466 mflo t_1
1467 mfhi t_2
1468 daddu c_1,t_1
1469 sltu AT,c_1,t_1
1470 daddu t_2,AT
1471 daddu c_2,t_2
1472 sltu AT,c_2,t_2
1473 daddu c_3,AT
1474 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
1475 mflo t_1
1476 mfhi t_2
1477 daddu c_1,t_1
1478 sltu AT,c_1,t_1
1479 daddu t_2,AT
1480 daddu c_2,t_2
1481 sltu AT,c_2,t_2
1482 daddu c_3,AT
1483 sd c_1,24(a0)
1484
1485 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
1486 mflo t_1
1487 mfhi t_2
1488 daddu c_2,t_1
1489 sltu AT,c_2,t_1
1490 daddu t_2,AT
1491 daddu c_3,t_2
1492 sltu c_1,c_3,t_2
1493 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1494 mflo t_1
1495 mfhi t_2
1496 daddu c_2,t_1
1497 sltu AT,c_2,t_1
1498 daddu t_2,AT
1499 daddu c_3,t_2
1500 sltu AT,c_3,t_2
1501 daddu c_1,AT
1502 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1503 mflo t_1
1504 mfhi t_2
1505 daddu c_2,t_1
1506 sltu AT,c_2,t_1
1507 daddu t_2,AT
1508 daddu c_3,t_2
1509 sltu AT,c_3,t_2
1510 daddu c_1,AT
1511 sd c_2,32(a0)
1512
1513 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
1514 mflo t_1
1515 mfhi t_2
1516 daddu c_3,t_1
1517 sltu AT,c_3,t_1
1518 daddu t_2,AT
1519 daddu c_1,t_2
1520 sltu c_2,c_1,t_2
1521 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1522 mflo t_1
1523 mfhi t_2
1524 daddu c_3,t_1
1525 sltu AT,c_3,t_1
1526 daddu t_2,AT
1527 daddu c_1,t_2
1528 sltu AT,c_1,t_2
1529 daddu c_2,AT
1530 sd c_3,40(a0)
1531
1532 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1533 mflo t_1
1534 mfhi t_2
1535 daddu c_1,t_1
1536 sltu AT,c_1,t_1
1537 daddu t_2,AT
1538 daddu c_2,t_2
1539 sd c_1,48(a0)
1540 sd c_2,56(a0)
1541
1542 jr ra
1543END(bn_mul_comba4)
1544
1545#undef a_4
1546#undef a_5
1547#undef a_6
1548#undef a_7
1549#define a_4 b_0
1550#define a_5 b_1
1551#define a_6 b_2
1552#define a_7 b_3
1553
1554.align 5
1555LEAF(bn_sqr_comba8)
1556 .set reorder
1557 ld a_0,0(a1)
1558 ld a_1,8(a1)
1559 ld a_2,16(a1)
1560 ld a_3,24(a1)
1561
1562 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1563 ld a_4,32(a1)
1564 ld a_5,40(a1)
1565 ld a_6,48(a1)
1566 ld a_7,56(a1)
1567 mflo c_1
1568 mfhi c_2
1569 sd c_1,0(a0)
1570
1571 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
1572 mflo t_1
1573 mfhi t_2
1574 slt c_1,t_2,zero
1575 dsll t_2,1
1576 slt a2,t_1,zero
1577 daddu t_2,a2
1578 dsll t_1,1
1579 daddu c_2,t_1
1580 sltu AT,c_2,t_1
1581 daddu c_3,t_2,AT
1582 sd c_2,8(a0)
1583
1584 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
1585 mflo t_1
1586 mfhi t_2
1587 slt c_2,t_2,zero
1588 dsll t_2,1
1589 slt a2,t_1,zero
1590 daddu t_2,a2
1591 dsll t_1,1
1592 daddu c_3,t_1
1593 sltu AT,c_3,t_1
1594 daddu t_2,AT
1595 daddu c_1,t_2
1596 sltu AT,c_1,t_2
1597 daddu c_2,AT
1598 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1599 mflo t_1
1600 mfhi t_2
1601 daddu c_3,t_1
1602 sltu AT,c_3,t_1
1603 daddu t_2,AT
1604 daddu c_1,t_2
1605 sltu AT,c_1,t_2
1606 daddu c_2,AT
1607 sd c_3,16(a0)
1608
1609 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
1610 mflo t_1
1611 mfhi t_2
1612 slt c_3,t_2,zero
1613 dsll t_2,1
1614 slt a2,t_1,zero
1615 daddu t_2,a2
1616 dsll t_1,1
1617 daddu c_1,t_1
1618 sltu AT,c_1,t_1
1619 daddu t_2,AT
1620 daddu c_2,t_2
1621 sltu AT,c_2,t_2
1622 daddu c_3,AT
1623 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
1624 mflo t_1
1625 mfhi t_2
1626 slt AT,t_2,zero
1627 daddu c_3,AT
1628 dsll t_2,1
1629 slt a2,t_1,zero
1630 daddu t_2,a2
1631 dsll t_1,1
1632 daddu c_1,t_1
1633 sltu AT,c_1,t_1
1634 daddu t_2,AT
1635 daddu c_2,t_2
1636 sltu AT,c_2,t_2
1637 daddu c_3,AT
1638 sd c_1,24(a0)
1639
1640 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
1641 mflo t_1
1642 mfhi t_2
1643 slt c_1,t_2,zero
1644 dsll t_2,1
1645 slt a2,t_1,zero
1646 daddu t_2,a2
1647 dsll t_1,1
1648 daddu c_2,t_1
1649 sltu AT,c_2,t_1
1650 daddu t_2,AT
1651 daddu c_3,t_2
1652 sltu AT,c_3,t_2
1653 daddu c_1,AT
1654 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
1655 mflo t_1
1656 mfhi t_2
1657 slt AT,t_2,zero
1658 daddu c_1,AT
1659 dsll t_2,1
1660 slt a2,t_1,zero
1661 daddu t_2,a2
1662 dsll t_1,1
1663 daddu c_2,t_1
1664 sltu AT,c_2,t_1
1665 daddu t_2,AT
1666 daddu c_3,t_2
1667 sltu AT,c_3,t_2
1668 daddu c_1,AT
1669 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1670 mflo t_1
1671 mfhi t_2
1672 daddu c_2,t_1
1673 sltu AT,c_2,t_1
1674 daddu t_2,AT
1675 daddu c_3,t_2
1676 sltu AT,c_3,t_2
1677 daddu c_1,AT
1678 sd c_2,32(a0)
1679
1680 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
1681 mflo t_1
1682 mfhi t_2
1683 slt c_2,t_2,zero
1684 dsll t_2,1
1685 slt a2,t_1,zero
1686 daddu t_2,a2
1687 dsll t_1,1
1688 daddu c_3,t_1
1689 sltu AT,c_3,t_1
1690 daddu t_2,AT
1691 daddu c_1,t_2
1692 sltu AT,c_1,t_2
1693 daddu c_2,AT
1694 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
1695 mflo t_1
1696 mfhi t_2
1697 slt AT,t_2,zero
1698 daddu c_2,AT
1699 dsll t_2,1
1700 slt a2,t_1,zero
1701 daddu t_2,a2
1702 dsll t_1,1
1703 daddu c_3,t_1
1704 sltu AT,c_3,t_1
1705 daddu t_2,AT
1706 daddu c_1,t_2
1707 sltu AT,c_1,t_2
1708 daddu c_2,AT
1709 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
1710 mflo t_1
1711 mfhi t_2
1712 slt AT,t_2,zero
1713 daddu c_2,AT
1714 dsll t_2,1
1715 slt a2,t_1,zero
1716 daddu t_2,a2
1717 dsll t_1,1
1718 daddu c_3,t_1
1719 sltu AT,c_3,t_1
1720 daddu t_2,AT
1721 daddu c_1,t_2
1722 sltu AT,c_1,t_2
1723 daddu c_2,AT
1724 sd c_3,40(a0)
1725
1726 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
1727 mflo t_1
1728 mfhi t_2
1729 slt c_3,t_2,zero
1730 dsll t_2,1
1731 slt a2,t_1,zero
1732 daddu t_2,a2
1733 dsll t_1,1
1734 daddu c_1,t_1
1735 sltu AT,c_1,t_1
1736 daddu t_2,AT
1737 daddu c_2,t_2
1738 sltu AT,c_2,t_2
1739 daddu c_3,AT
1740 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
1741 mflo t_1
1742 mfhi t_2
1743 slt AT,t_2,zero
1744 daddu c_3,AT
1745 dsll t_2,1
1746 slt a2,t_1,zero
1747 daddu t_2,a2
1748 dsll t_1,1
1749 daddu c_1,t_1
1750 sltu AT,c_1,t_1
1751 daddu t_2,AT
1752 daddu c_2,t_2
1753 sltu AT,c_2,t_2
1754 daddu c_3,AT
1755 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
1756 mflo t_1
1757 mfhi t_2
1758 slt AT,t_2,zero
1759 daddu c_3,AT
1760 dsll t_2,1
1761 slt a2,t_1,zero
1762 daddu t_2,a2
1763 dsll t_1,1
1764 daddu c_1,t_1
1765 sltu AT,c_1,t_1
1766 daddu t_2,AT
1767 daddu c_2,t_2
1768 sltu AT,c_2,t_2
1769 daddu c_3,AT
1770 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1771 mflo t_1
1772 mfhi t_2
1773 daddu c_1,t_1
1774 sltu AT,c_1,t_1
1775 daddu t_2,AT
1776 daddu c_2,t_2
1777 sltu AT,c_2,t_2
1778 daddu c_3,AT
1779 sd c_1,48(a0)
1780
1781 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
1782 mflo t_1
1783 mfhi t_2
1784 slt c_1,t_2,zero
1785 dsll t_2,1
1786 slt a2,t_1,zero
1787 daddu t_2,a2
1788 dsll t_1,1
1789 daddu c_2,t_1
1790 sltu AT,c_2,t_1
1791 daddu t_2,AT
1792 daddu c_3,t_2
1793 sltu AT,c_3,t_2
1794 daddu c_1,AT
1795 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
1796 mflo t_1
1797 mfhi t_2
1798 slt AT,t_2,zero
1799 daddu c_1,AT
1800 dsll t_2,1
1801 slt a2,t_1,zero
1802 daddu t_2,a2
1803 dsll t_1,1
1804 daddu c_2,t_1
1805 sltu AT,c_2,t_1
1806 daddu t_2,AT
1807 daddu c_3,t_2
1808 sltu AT,c_3,t_2
1809 daddu c_1,AT
1810 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
1811 mflo t_1
1812 mfhi t_2
1813 slt AT,t_2,zero
1814 daddu c_1,AT
1815 dsll t_2,1
1816 slt a2,t_1,zero
1817 daddu t_2,a2
1818 dsll t_1,1
1819 daddu c_2,t_1
1820 sltu AT,c_2,t_1
1821 daddu t_2,AT
1822 daddu c_3,t_2
1823 sltu AT,c_3,t_2
1824 daddu c_1,AT
1825 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
1826 mflo t_1
1827 mfhi t_2
1828 slt AT,t_2,zero
1829 daddu c_1,AT
1830 dsll t_2,1
1831 slt a2,t_1,zero
1832 daddu t_2,a2
1833 dsll t_1,1
1834 daddu c_2,t_1
1835 sltu AT,c_2,t_1
1836 daddu t_2,AT
1837 daddu c_3,t_2
1838 sltu AT,c_3,t_2
1839 daddu c_1,AT
1840 sd c_2,56(a0)
1841
1842 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
1843 mflo t_1
1844 mfhi t_2
1845 slt c_2,t_2,zero
1846 dsll t_2,1
1847 slt a2,t_1,zero
1848 daddu t_2,a2
1849 dsll t_1,1
1850 daddu c_3,t_1
1851 sltu AT,c_3,t_1
1852 daddu t_2,AT
1853 daddu c_1,t_2
1854 sltu AT,c_1,t_2
1855 daddu c_2,AT
1856 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
1857 mflo t_1
1858 mfhi t_2
1859 slt AT,t_2,zero
1860 daddu c_2,AT
1861 dsll t_2,1
1862 slt a2,t_1,zero
1863 daddu t_2,a2
1864 dsll t_1,1
1865 daddu c_3,t_1
1866 sltu AT,c_3,t_1
1867 daddu t_2,AT
1868 daddu c_1,t_2
1869 sltu AT,c_1,t_2
1870 daddu c_2,AT
1871 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
1872 mflo t_1
1873 mfhi t_2
1874 slt AT,t_2,zero
1875 daddu c_2,AT
1876 dsll t_2,1
1877 slt a2,t_1,zero
1878 daddu t_2,a2
1879 dsll t_1,1
1880 daddu c_3,t_1
1881 sltu AT,c_3,t_1
1882 daddu t_2,AT
1883 daddu c_1,t_2
1884 sltu AT,c_1,t_2
1885 daddu c_2,AT
1886 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1887 mflo t_1
1888 mfhi t_2
1889 daddu c_3,t_1
1890 sltu AT,c_3,t_1
1891 daddu t_2,AT
1892 daddu c_1,t_2
1893 sltu AT,c_1,t_2
1894 daddu c_2,AT
1895 sd c_3,64(a0)
1896
1897 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
1898 mflo t_1
1899 mfhi t_2
1900 slt c_3,t_2,zero
1901 dsll t_2,1
1902 slt a2,t_1,zero
1903 daddu t_2,a2
1904 dsll t_1,1
1905 daddu c_1,t_1
1906 sltu AT,c_1,t_1
1907 daddu t_2,AT
1908 daddu c_2,t_2
1909 sltu AT,c_2,t_2
1910 daddu c_3,AT
1911 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
1912 mflo t_1
1913 mfhi t_2
1914 slt AT,t_2,zero
1915 daddu c_3,AT
1916 dsll t_2,1
1917 slt a2,t_1,zero
1918 daddu t_2,a2
1919 dsll t_1,1
1920 daddu c_1,t_1
1921 sltu AT,c_1,t_1
1922 daddu t_2,AT
1923 daddu c_2,t_2
1924 sltu AT,c_2,t_2
1925 daddu c_3,AT
1926 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
1927 mflo t_1
1928 mfhi t_2
1929 slt AT,t_2,zero
1930 daddu c_3,AT
1931 dsll t_2,1
1932 slt a2,t_1,zero
1933 daddu t_2,a2
1934 dsll t_1,1
1935 daddu c_1,t_1
1936 sltu AT,c_1,t_1
1937 daddu t_2,AT
1938 daddu c_2,t_2
1939 sltu AT,c_2,t_2
1940 daddu c_3,AT
1941 sd c_1,72(a0)
1942
1943 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
1944 mflo t_1
1945 mfhi t_2
1946 slt c_1,t_2,zero
1947 dsll t_2,1
1948 slt a2,t_1,zero
1949 daddu t_2,a2
1950 dsll t_1,1
1951 daddu c_2,t_1
1952 sltu AT,c_2,t_1
1953 daddu t_2,AT
1954 daddu c_3,t_2
1955 sltu AT,c_3,t_2
1956 daddu c_1,AT
1957 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
1958 mflo t_1
1959 mfhi t_2
1960 slt AT,t_2,zero
1961 daddu c_1,AT
1962 dsll t_2,1
1963 slt a2,t_1,zero
1964 daddu t_2,a2
1965 dsll t_1,1
1966 daddu c_2,t_1
1967 sltu AT,c_2,t_1
1968 daddu t_2,AT
1969 daddu c_3,t_2
1970 sltu AT,c_3,t_2
1971 daddu c_1,AT
1972 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1973 mflo t_1
1974 mfhi t_2
1975 daddu c_2,t_1
1976 sltu AT,c_2,t_1
1977 daddu t_2,AT
1978 daddu c_3,t_2
1979 sltu AT,c_3,t_2
1980 daddu c_1,AT
1981 sd c_2,80(a0)
1982
1983 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
1984 mflo t_1
1985 mfhi t_2
1986 slt c_2,t_2,zero
1987 dsll t_2,1
1988 slt a2,t_1,zero
1989 daddu t_2,a2
1990 dsll t_1,1
1991 daddu c_3,t_1
1992 sltu AT,c_3,t_1
1993 daddu t_2,AT
1994 daddu c_1,t_2
1995 sltu AT,c_1,t_2
1996 daddu c_2,AT
1997 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
1998 mflo t_1
1999 mfhi t_2
2000 slt AT,t_2,zero
2001 daddu c_2,AT
2002 dsll t_2,1
2003 slt a2,t_1,zero
2004 daddu t_2,a2
2005 dsll t_1,1
2006 daddu c_3,t_1
2007 sltu AT,c_3,t_1
2008 daddu t_2,AT
2009 daddu c_1,t_2
2010 sltu AT,c_1,t_2
2011 daddu c_2,AT
2012 sd c_3,88(a0)
2013
2014 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
2015 mflo t_1
2016 mfhi t_2
2017 slt c_3,t_2,zero
2018 dsll t_2,1
2019 slt a2,t_1,zero
2020 daddu t_2,a2
2021 dsll t_1,1
2022 daddu c_1,t_1
2023 sltu AT,c_1,t_1
2024 daddu t_2,AT
2025 daddu c_2,t_2
2026 sltu AT,c_2,t_2
2027 daddu c_3,AT
2028 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
2029 mflo t_1
2030 mfhi t_2
2031 daddu c_1,t_1
2032 sltu AT,c_1,t_1
2033 daddu t_2,AT
2034 daddu c_2,t_2
2035 sltu AT,c_2,t_2
2036 daddu c_3,AT
2037 sd c_1,96(a0)
2038
2039 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
2040 mflo t_1
2041 mfhi t_2
2042 slt c_1,t_2,zero
2043 dsll t_2,1
2044 slt a2,t_1,zero
2045 daddu t_2,a2
2046 dsll t_1,1
2047 daddu c_2,t_1
2048 sltu AT,c_2,t_1
2049 daddu t_2,AT
2050 daddu c_3,t_2
2051 sltu AT,c_3,t_2
2052 daddu c_1,AT
2053 sd c_2,104(a0)
2054
2055 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2056 mflo t_1
2057 mfhi t_2
2058 daddu c_3,t_1
2059 sltu AT,c_3,t_1
2060 daddu t_2,AT
2061 daddu c_1,t_2
2062 sd c_3,112(a0)
2063 sd c_1,120(a0)
2064
2065 jr ra
2066END(bn_sqr_comba8)
2067
2068.align 5
2069LEAF(bn_sqr_comba4)
2070 .set reorder
2071 ld a_0,0(a1)
2072 ld a_1,8(a1)
2073 ld a_2,16(a1)
2074 ld a_3,24(a1)
2075 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2076 mflo c_1
2077 mfhi c_2
2078 sd c_1,0(a0)
2079
2080 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2081 mflo t_1
2082 mfhi t_2
2083 slt c_1,t_2,zero
2084 dsll t_2,1
2085 slt a2,t_1,zero
2086 daddu t_2,a2
2087 dsll t_1,1
2088 daddu c_2,t_1
2089 sltu AT,c_2,t_1
2090 daddu c_3,t_2,AT
2091 sd c_2,8(a0)
2092
2093 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2094 mflo t_1
2095 mfhi t_2
2096 slt c_2,t_2,zero
2097 dsll t_2,1
2098 slt a2,t_1,zero
2099 daddu t_2,a2
2100 dsll t_1,1
2101 daddu c_3,t_1
2102 sltu AT,c_3,t_1
2103 daddu t_2,AT
2104 daddu c_1,t_2
2105 sltu AT,c_1,t_2
2106 daddu c_2,AT
2107 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2108 mflo t_1
2109 mfhi t_2
2110 daddu c_3,t_1
2111 sltu AT,c_3,t_1
2112 daddu t_2,AT
2113 daddu c_1,t_2
2114 sltu AT,c_1,t_2
2115 daddu c_2,AT
2116 sd c_3,16(a0)
2117
2118 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2119 mflo t_1
2120 mfhi t_2
2121 slt c_3,t_2,zero
2122 dsll t_2,1
2123 slt a2,t_1,zero
2124 daddu t_2,a2
2125 dsll t_1,1
2126 daddu c_1,t_1
2127 sltu AT,c_1,t_1
2128 daddu t_2,AT
2129 daddu c_2,t_2
2130 sltu AT,c_2,t_2
2131 daddu c_3,AT
2132 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2133 mflo t_1
2134 mfhi t_2
2135 slt AT,t_2,zero
2136 daddu c_3,AT
2137 dsll t_2,1
2138 slt a2,t_1,zero
2139 daddu t_2,a2
2140 dsll t_1,1
2141 daddu c_1,t_1
2142 sltu AT,c_1,t_1
2143 daddu t_2,AT
2144 daddu c_2,t_2
2145 sltu AT,c_2,t_2
2146 daddu c_3,AT
2147 sd c_1,24(a0)
2148
2149 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2150 mflo t_1
2151 mfhi t_2
2152 slt c_1,t_2,zero
2153 dsll t_2,1
2154 slt a2,t_1,zero
2155 daddu t_2,a2
2156 dsll t_1,1
2157 daddu c_2,t_1
2158 sltu AT,c_2,t_1
2159 daddu t_2,AT
2160 daddu c_3,t_2
2161 sltu AT,c_3,t_2
2162 daddu c_1,AT
2163 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2164 mflo t_1
2165 mfhi t_2
2166 daddu c_2,t_1
2167 sltu AT,c_2,t_1
2168 daddu t_2,AT
2169 daddu c_3,t_2
2170 sltu AT,c_3,t_2
2171 daddu c_1,AT
2172 sd c_2,32(a0)
2173
2174 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2175 mflo t_1
2176 mfhi t_2
2177 slt c_2,t_2,zero
2178 dsll t_2,1
2179 slt a2,t_1,zero
2180 daddu t_2,a2
2181 dsll t_1,1
2182 daddu c_3,t_1
2183 sltu AT,c_3,t_1
2184 daddu t_2,AT
2185 daddu c_1,t_2
2186 sltu AT,c_1,t_2
2187 daddu c_2,AT
2188 sd c_3,40(a0)
2189
2190 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2191 mflo t_1
2192 mfhi t_2
2193 daddu c_1,t_1
2194 sltu AT,c_1,t_1
2195 daddu t_2,AT
2196 daddu c_2,t_2
2197 sd c_1,48(a0)
2198 sd c_2,56(a0)
2199
2200 jr ra
2201END(bn_sqr_comba4)
diff --git a/src/lib/libcrypto/bn/asm/pa-risc.s b/src/lib/libcrypto/bn/asm/pa-risc.s
new file mode 100644
index 0000000000..775130a191
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/pa-risc.s
@@ -0,0 +1,710 @@
1 .SPACE $PRIVATE$
2 .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
3 .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
4 .SPACE $TEXT$
5 .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
6 .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
7 .IMPORT $global$,DATA
8 .IMPORT $$dyncall,MILLICODE
9; gcc_compiled.:
10 .SPACE $TEXT$
11 .SUBSPA $CODE$
12
13 .align 4
14 .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
15bn_mul_add_words
16 .PROC
17 .CALLINFO FRAME=0,CALLS,SAVE_RP
18 .ENTRY
19 stw %r2,-20(0,%r30)
20 ldi 0,%r28
21 extru %r23,31,16,%r2
22 stw %r2,-16(0,%r30)
23 extru %r23,15,16,%r23
24 ldil L'65536,%r31
25 fldws -16(0,%r30),%fr11R
26 stw %r23,-16(0,%r30)
27 ldo 12(%r25),%r29
28 ldo 12(%r26),%r23
29 fldws -16(0,%r30),%fr11L
30L$0002
31 ldw 0(0,%r25),%r19
32 extru %r19,31,16,%r20
33 stw %r20,-16(0,%r30)
34 extru %r19,15,16,%r19
35 fldws -16(0,%r30),%fr22L
36 stw %r19,-16(0,%r30)
37 xmpyu %fr22L,%fr11R,%fr8
38 fldws -16(0,%r30),%fr22L
39 fstws %fr8R,-16(0,%r30)
40 xmpyu %fr11R,%fr22L,%fr10
41 ldw -16(0,%r30),%r2
42 stw %r20,-16(0,%r30)
43 xmpyu %fr22L,%fr11L,%fr9
44 fldws -16(0,%r30),%fr22L
45 fstws %fr10R,-16(0,%r30)
46 copy %r2,%r22
47 ldw -16(0,%r30),%r2
48 fstws %fr9R,-16(0,%r30)
49 xmpyu %fr11L,%fr22L,%fr8
50 copy %r2,%r19
51 ldw -16(0,%r30),%r2
52 fstws %fr8R,-16(0,%r30)
53 copy %r2,%r20
54 ldw -16(0,%r30),%r2
55 addl %r2,%r19,%r21
56 comclr,<<= %r19,%r21,0
57 addl %r20,%r31,%r20
58L$0005
59 extru %r21,15,16,%r19
60 addl %r20,%r19,%r20
61 zdep %r21,15,16,%r19
62 addl %r22,%r19,%r22
63 comclr,<<= %r19,%r22,0
64 addi,tr 1,%r20,%r19
65 copy %r20,%r19
66 addl %r22,%r28,%r20
67 comclr,<<= %r28,%r20,0
68 addi 1,%r19,%r19
69 ldw 0(0,%r26),%r28
70 addl %r20,%r28,%r20
71 comclr,<<= %r28,%r20,0
72 addi,tr 1,%r19,%r28
73 copy %r19,%r28
74 addib,= -1,%r24,L$0003
75 stw %r20,0(0,%r26)
76 ldw -8(0,%r29),%r19
77 extru %r19,31,16,%r20
78 stw %r20,-16(0,%r30)
79 extru %r19,15,16,%r19
80 fldws -16(0,%r30),%fr22L
81 stw %r19,-16(0,%r30)
82 xmpyu %fr22L,%fr11R,%fr8
83 fldws -16(0,%r30),%fr22L
84 fstws %fr8R,-16(0,%r30)
85 xmpyu %fr11R,%fr22L,%fr10
86 ldw -16(0,%r30),%r2
87 stw %r20,-16(0,%r30)
88 xmpyu %fr22L,%fr11L,%fr9
89 fldws -16(0,%r30),%fr22L
90 fstws %fr10R,-16(0,%r30)
91 copy %r2,%r22
92 ldw -16(0,%r30),%r2
93 fstws %fr9R,-16(0,%r30)
94 xmpyu %fr11L,%fr22L,%fr8
95 copy %r2,%r19
96 ldw -16(0,%r30),%r2
97 fstws %fr8R,-16(0,%r30)
98 copy %r2,%r20
99 ldw -16(0,%r30),%r2
100 addl %r2,%r19,%r21
101 comclr,<<= %r19,%r21,0
102 addl %r20,%r31,%r20
103L$0010
104 extru %r21,15,16,%r19
105 addl %r20,%r19,%r20
106 zdep %r21,15,16,%r19
107 addl %r22,%r19,%r22
108 comclr,<<= %r19,%r22,0
109 addi,tr 1,%r20,%r19
110 copy %r20,%r19
111 addl %r22,%r28,%r20
112 comclr,<<= %r28,%r20,0
113 addi 1,%r19,%r19
114 ldw -8(0,%r23),%r28
115 addl %r20,%r28,%r20
116 comclr,<<= %r28,%r20,0
117 addi,tr 1,%r19,%r28
118 copy %r19,%r28
119 addib,= -1,%r24,L$0003
120 stw %r20,-8(0,%r23)
121 ldw -4(0,%r29),%r19
122 extru %r19,31,16,%r20
123 stw %r20,-16(0,%r30)
124 extru %r19,15,16,%r19
125 fldws -16(0,%r30),%fr22L
126 stw %r19,-16(0,%r30)
127 xmpyu %fr22L,%fr11R,%fr8
128 fldws -16(0,%r30),%fr22L
129 fstws %fr8R,-16(0,%r30)
130 xmpyu %fr11R,%fr22L,%fr10
131 ldw -16(0,%r30),%r2
132 stw %r20,-16(0,%r30)
133 xmpyu %fr22L,%fr11L,%fr9
134 fldws -16(0,%r30),%fr22L
135 fstws %fr10R,-16(0,%r30)
136 copy %r2,%r22
137 ldw -16(0,%r30),%r2
138 fstws %fr9R,-16(0,%r30)
139 xmpyu %fr11L,%fr22L,%fr8
140 copy %r2,%r19
141 ldw -16(0,%r30),%r2
142 fstws %fr8R,-16(0,%r30)
143 copy %r2,%r20
144 ldw -16(0,%r30),%r2
145 addl %r2,%r19,%r21
146 comclr,<<= %r19,%r21,0
147 addl %r20,%r31,%r20
148L$0015
149 extru %r21,15,16,%r19
150 addl %r20,%r19,%r20
151 zdep %r21,15,16,%r19
152 addl %r22,%r19,%r22
153 comclr,<<= %r19,%r22,0
154 addi,tr 1,%r20,%r19
155 copy %r20,%r19
156 addl %r22,%r28,%r20
157 comclr,<<= %r28,%r20,0
158 addi 1,%r19,%r19
159 ldw -4(0,%r23),%r28
160 addl %r20,%r28,%r20
161 comclr,<<= %r28,%r20,0
162 addi,tr 1,%r19,%r28
163 copy %r19,%r28
164 addib,= -1,%r24,L$0003
165 stw %r20,-4(0,%r23)
166 ldw 0(0,%r29),%r19
167 extru %r19,31,16,%r20
168 stw %r20,-16(0,%r30)
169 extru %r19,15,16,%r19
170 fldws -16(0,%r30),%fr22L
171 stw %r19,-16(0,%r30)
172 xmpyu %fr22L,%fr11R,%fr8
173 fldws -16(0,%r30),%fr22L
174 fstws %fr8R,-16(0,%r30)
175 xmpyu %fr11R,%fr22L,%fr10
176 ldw -16(0,%r30),%r2
177 stw %r20,-16(0,%r30)
178 xmpyu %fr22L,%fr11L,%fr9
179 fldws -16(0,%r30),%fr22L
180 fstws %fr10R,-16(0,%r30)
181 copy %r2,%r22
182 ldw -16(0,%r30),%r2
183 fstws %fr9R,-16(0,%r30)
184 xmpyu %fr11L,%fr22L,%fr8
185 copy %r2,%r19
186 ldw -16(0,%r30),%r2
187 fstws %fr8R,-16(0,%r30)
188 copy %r2,%r20
189 ldw -16(0,%r30),%r2
190 addl %r2,%r19,%r21
191 comclr,<<= %r19,%r21,0
192 addl %r20,%r31,%r20
193L$0020
194 extru %r21,15,16,%r19
195 addl %r20,%r19,%r20
196 zdep %r21,15,16,%r19
197 addl %r22,%r19,%r22
198 comclr,<<= %r19,%r22,0
199 addi,tr 1,%r20,%r19
200 copy %r20,%r19
201 addl %r22,%r28,%r20
202 comclr,<<= %r28,%r20,0
203 addi 1,%r19,%r19
204 ldw 0(0,%r23),%r28
205 addl %r20,%r28,%r20
206 comclr,<<= %r28,%r20,0
207 addi,tr 1,%r19,%r28
208 copy %r19,%r28
209 addib,= -1,%r24,L$0003
210 stw %r20,0(0,%r23)
211 ldo 16(%r29),%r29
212 ldo 16(%r25),%r25
213 ldo 16(%r23),%r23
214 bl L$0002,0
215 ldo 16(%r26),%r26
216L$0003
217 ldw -20(0,%r30),%r2
218 bv,n 0(%r2)
219 .EXIT
220 .PROCEND
221 .align 4
222 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
223bn_mul_words
224 .PROC
225 .CALLINFO FRAME=0,CALLS,SAVE_RP
226 .ENTRY
227 stw %r2,-20(0,%r30)
228 ldi 0,%r28
229 extru %r23,31,16,%r2
230 stw %r2,-16(0,%r30)
231 extru %r23,15,16,%r23
232 ldil L'65536,%r31
233 fldws -16(0,%r30),%fr11R
234 stw %r23,-16(0,%r30)
235 ldo 12(%r26),%r29
236 ldo 12(%r25),%r23
237 fldws -16(0,%r30),%fr11L
238L$0026
239 ldw 0(0,%r25),%r19
240 extru %r19,31,16,%r20
241 stw %r20,-16(0,%r30)
242 extru %r19,15,16,%r19
243 fldws -16(0,%r30),%fr22L
244 stw %r19,-16(0,%r30)
245 xmpyu %fr22L,%fr11R,%fr8
246 fldws -16(0,%r30),%fr22L
247 fstws %fr8R,-16(0,%r30)
248 xmpyu %fr11R,%fr22L,%fr10
249 ldw -16(0,%r30),%r2
250 stw %r20,-16(0,%r30)
251 xmpyu %fr22L,%fr11L,%fr9
252 fldws -16(0,%r30),%fr22L
253 fstws %fr10R,-16(0,%r30)
254 copy %r2,%r22
255 ldw -16(0,%r30),%r2
256 fstws %fr9R,-16(0,%r30)
257 xmpyu %fr11L,%fr22L,%fr8
258 copy %r2,%r19
259 ldw -16(0,%r30),%r2
260 fstws %fr8R,-16(0,%r30)
261 copy %r2,%r20
262 ldw -16(0,%r30),%r2
263 addl %r2,%r19,%r21
264 comclr,<<= %r19,%r21,0
265 addl %r20,%r31,%r20
266L$0029
267 extru %r21,15,16,%r19
268 addl %r20,%r19,%r20
269 zdep %r21,15,16,%r19
270 addl %r22,%r19,%r22
271 comclr,<<= %r19,%r22,0
272 addi,tr 1,%r20,%r19
273 copy %r20,%r19
274 addl %r22,%r28,%r20
275 comclr,<<= %r28,%r20,0
276 addi,tr 1,%r19,%r28
277 copy %r19,%r28
278 addib,= -1,%r24,L$0027
279 stw %r20,0(0,%r26)
280 ldw -8(0,%r23),%r19
281 extru %r19,31,16,%r20
282 stw %r20,-16(0,%r30)
283 extru %r19,15,16,%r19
284 fldws -16(0,%r30),%fr22L
285 stw %r19,-16(0,%r30)
286 xmpyu %fr22L,%fr11R,%fr8
287 fldws -16(0,%r30),%fr22L
288 fstws %fr8R,-16(0,%r30)
289 xmpyu %fr11R,%fr22L,%fr10
290 ldw -16(0,%r30),%r2
291 stw %r20,-16(0,%r30)
292 xmpyu %fr22L,%fr11L,%fr9
293 fldws -16(0,%r30),%fr22L
294 fstws %fr10R,-16(0,%r30)
295 copy %r2,%r22
296 ldw -16(0,%r30),%r2
297 fstws %fr9R,-16(0,%r30)
298 xmpyu %fr11L,%fr22L,%fr8
299 copy %r2,%r19
300 ldw -16(0,%r30),%r2
301 fstws %fr8R,-16(0,%r30)
302 copy %r2,%r20
303 ldw -16(0,%r30),%r2
304 addl %r2,%r19,%r21
305 comclr,<<= %r19,%r21,0
306 addl %r20,%r31,%r20
307L$0033
308 extru %r21,15,16,%r19
309 addl %r20,%r19,%r20
310 zdep %r21,15,16,%r19
311 addl %r22,%r19,%r22
312 comclr,<<= %r19,%r22,0
313 addi,tr 1,%r20,%r19
314 copy %r20,%r19
315 addl %r22,%r28,%r20
316 comclr,<<= %r28,%r20,0
317 addi,tr 1,%r19,%r28
318 copy %r19,%r28
319 addib,= -1,%r24,L$0027
320 stw %r20,-8(0,%r29)
321 ldw -4(0,%r23),%r19
322 extru %r19,31,16,%r20
323 stw %r20,-16(0,%r30)
324 extru %r19,15,16,%r19
325 fldws -16(0,%r30),%fr22L
326 stw %r19,-16(0,%r30)
327 xmpyu %fr22L,%fr11R,%fr8
328 fldws -16(0,%r30),%fr22L
329 fstws %fr8R,-16(0,%r30)
330 xmpyu %fr11R,%fr22L,%fr10
331 ldw -16(0,%r30),%r2
332 stw %r20,-16(0,%r30)
333 xmpyu %fr22L,%fr11L,%fr9
334 fldws -16(0,%r30),%fr22L
335 fstws %fr10R,-16(0,%r30)
336 copy %r2,%r22
337 ldw -16(0,%r30),%r2
338 fstws %fr9R,-16(0,%r30)
339 xmpyu %fr11L,%fr22L,%fr8
340 copy %r2,%r19
341 ldw -16(0,%r30),%r2
342 fstws %fr8R,-16(0,%r30)
343 copy %r2,%r20
344 ldw -16(0,%r30),%r2
345 addl %r2,%r19,%r21
346 comclr,<<= %r19,%r21,0
347 addl %r20,%r31,%r20
348L$0037
349 extru %r21,15,16,%r19
350 addl %r20,%r19,%r20
351 zdep %r21,15,16,%r19
352 addl %r22,%r19,%r22
353 comclr,<<= %r19,%r22,0
354 addi,tr 1,%r20,%r19
355 copy %r20,%r19
356 addl %r22,%r28,%r20
357 comclr,<<= %r28,%r20,0
358 addi,tr 1,%r19,%r28
359 copy %r19,%r28
360 addib,= -1,%r24,L$0027
361 stw %r20,-4(0,%r29)
362 ldw 0(0,%r23),%r19
363 extru %r19,31,16,%r20
364 stw %r20,-16(0,%r30)
365 extru %r19,15,16,%r19
366 fldws -16(0,%r30),%fr22L
367 stw %r19,-16(0,%r30)
368 xmpyu %fr22L,%fr11R,%fr8
369 fldws -16(0,%r30),%fr22L
370 fstws %fr8R,-16(0,%r30)
371 xmpyu %fr11R,%fr22L,%fr10
372 ldw -16(0,%r30),%r2
373 stw %r20,-16(0,%r30)
374 xmpyu %fr22L,%fr11L,%fr9
375 fldws -16(0,%r30),%fr22L
376 fstws %fr10R,-16(0,%r30)
377 copy %r2,%r22
378 ldw -16(0,%r30),%r2
379 fstws %fr9R,-16(0,%r30)
380 xmpyu %fr11L,%fr22L,%fr8
381 copy %r2,%r19
382 ldw -16(0,%r30),%r2
383 fstws %fr8R,-16(0,%r30)
384 copy %r2,%r20
385 ldw -16(0,%r30),%r2
386 addl %r2,%r19,%r21
387 comclr,<<= %r19,%r21,0
388 addl %r20,%r31,%r20
389L$0041
390 extru %r21,15,16,%r19
391 addl %r20,%r19,%r20
392 zdep %r21,15,16,%r19
393 addl %r22,%r19,%r22
394 comclr,<<= %r19,%r22,0
395 addi,tr 1,%r20,%r19
396 copy %r20,%r19
397 addl %r22,%r28,%r20
398 comclr,<<= %r28,%r20,0
399 addi,tr 1,%r19,%r28
400 copy %r19,%r28
401 addib,= -1,%r24,L$0027
402 stw %r20,0(0,%r29)
403 ldo 16(%r23),%r23
404 ldo 16(%r25),%r25
405 ldo 16(%r29),%r29
406 bl L$0026,0
407 ldo 16(%r26),%r26
408L$0027
409 ldw -20(0,%r30),%r2
410 bv,n 0(%r2)
411 .EXIT
412 .PROCEND
413 .align 4
414 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
415bn_sqr_words
416 .PROC
417 .CALLINFO FRAME=0,NO_CALLS
418 .ENTRY
419 ldo 28(%r26),%r23
420 ldo 12(%r25),%r28
421L$0046
422 ldw 0(0,%r25),%r21
423 extru %r21,31,16,%r22
424 stw %r22,-16(0,%r30)
425 extru %r21,15,16,%r21
426 fldws -16(0,%r30),%fr10L
427 stw %r21,-16(0,%r30)
428 fldws -16(0,%r30),%fr10R
429 xmpyu %fr10L,%fr10R,%fr8
430 fstws %fr8R,-16(0,%r30)
431 ldw -16(0,%r30),%r29
432 stw %r22,-16(0,%r30)
433 fldws -16(0,%r30),%fr10R
434 stw %r21,-16(0,%r30)
435 copy %r29,%r19
436 xmpyu %fr10L,%fr10R,%fr8
437 fldws -16(0,%r30),%fr10L
438 stw %r21,-16(0,%r30)
439 fldws -16(0,%r30),%fr10R
440 fstws %fr8R,-16(0,%r30)
441 extru %r19,16,17,%r20
442 zdep %r19,14,15,%r19
443 ldw -16(0,%r30),%r29
444 xmpyu %fr10L,%fr10R,%fr9
445 addl %r29,%r19,%r22
446 stw %r22,0(0,%r26)
447 fstws %fr9R,-16(0,%r30)
448 ldw -16(0,%r30),%r29
449 addl %r29,%r20,%r21
450 comclr,<<= %r19,%r22,0
451 addi 1,%r21,%r21
452 addib,= -1,%r24,L$0057
453 stw %r21,-24(0,%r23)
454 ldw -8(0,%r28),%r21
455 extru %r21,31,16,%r22
456 stw %r22,-16(0,%r30)
457 extru %r21,15,16,%r21
458 fldws -16(0,%r30),%fr10L
459 stw %r21,-16(0,%r30)
460 fldws -16(0,%r30),%fr10R
461 xmpyu %fr10L,%fr10R,%fr8
462 fstws %fr8R,-16(0,%r30)
463 ldw -16(0,%r30),%r29
464 stw %r22,-16(0,%r30)
465 fldws -16(0,%r30),%fr10R
466 stw %r21,-16(0,%r30)
467 copy %r29,%r19
468 xmpyu %fr10L,%fr10R,%fr8
469 fldws -16(0,%r30),%fr10L
470 stw %r21,-16(0,%r30)
471 fldws -16(0,%r30),%fr10R
472 fstws %fr8R,-16(0,%r30)
473 extru %r19,16,17,%r20
474 zdep %r19,14,15,%r19
475 ldw -16(0,%r30),%r29
476 xmpyu %fr10L,%fr10R,%fr9
477 addl %r29,%r19,%r22
478 stw %r22,-20(0,%r23)
479 fstws %fr9R,-16(0,%r30)
480 ldw -16(0,%r30),%r29
481 addl %r29,%r20,%r21
482 comclr,<<= %r19,%r22,0
483 addi 1,%r21,%r21
484 addib,= -1,%r24,L$0057
485 stw %r21,-16(0,%r23)
486 ldw -4(0,%r28),%r21
487 extru %r21,31,16,%r22
488 stw %r22,-16(0,%r30)
489 extru %r21,15,16,%r21
490 fldws -16(0,%r30),%fr10L
491 stw %r21,-16(0,%r30)
492 fldws -16(0,%r30),%fr10R
493 xmpyu %fr10L,%fr10R,%fr8
494 fstws %fr8R,-16(0,%r30)
495 ldw -16(0,%r30),%r29
496 stw %r22,-16(0,%r30)
497 fldws -16(0,%r30),%fr10R
498 stw %r21,-16(0,%r30)
499 copy %r29,%r19
500 xmpyu %fr10L,%fr10R,%fr8
501 fldws -16(0,%r30),%fr10L
502 stw %r21,-16(0,%r30)
503 fldws -16(0,%r30),%fr10R
504 fstws %fr8R,-16(0,%r30)
505 extru %r19,16,17,%r20
506 zdep %r19,14,15,%r19
507 ldw -16(0,%r30),%r29
508 xmpyu %fr10L,%fr10R,%fr9
509 addl %r29,%r19,%r22
510 stw %r22,-12(0,%r23)
511 fstws %fr9R,-16(0,%r30)
512 ldw -16(0,%r30),%r29
513 addl %r29,%r20,%r21
514 comclr,<<= %r19,%r22,0
515 addi 1,%r21,%r21
516 addib,= -1,%r24,L$0057
517 stw %r21,-8(0,%r23)
518 ldw 0(0,%r28),%r21
519 extru %r21,31,16,%r22
520 stw %r22,-16(0,%r30)
521 extru %r21,15,16,%r21
522 fldws -16(0,%r30),%fr10L
523 stw %r21,-16(0,%r30)
524 fldws -16(0,%r30),%fr10R
525 xmpyu %fr10L,%fr10R,%fr8
526 fstws %fr8R,-16(0,%r30)
527 ldw -16(0,%r30),%r29
528 stw %r22,-16(0,%r30)
529 fldws -16(0,%r30),%fr10R
530 stw %r21,-16(0,%r30)
531 copy %r29,%r19
532 xmpyu %fr10L,%fr10R,%fr8
533 fldws -16(0,%r30),%fr10L
534 stw %r21,-16(0,%r30)
535 fldws -16(0,%r30),%fr10R
536 fstws %fr8R,-16(0,%r30)
537 extru %r19,16,17,%r20
538 zdep %r19,14,15,%r19
539 ldw -16(0,%r30),%r29
540 xmpyu %fr10L,%fr10R,%fr9
541 addl %r29,%r19,%r22
542 stw %r22,-4(0,%r23)
543 fstws %fr9R,-16(0,%r30)
544 ldw -16(0,%r30),%r29
545 addl %r29,%r20,%r21
546 comclr,<<= %r19,%r22,0
547 addi 1,%r21,%r21
548 addib,= -1,%r24,L$0057
549 stw %r21,0(0,%r23)
550 ldo 16(%r28),%r28
551 ldo 16(%r25),%r25
552 ldo 32(%r23),%r23
553 bl L$0046,0
554 ldo 32(%r26),%r26
555L$0057
556 bv,n 0(%r2)
557 .EXIT
558 .PROCEND
559 .IMPORT BN_num_bits_word,CODE
560 .IMPORT fprintf,CODE
561 .IMPORT __iob,DATA
562 .SPACE $TEXT$
563 .SUBSPA $LIT$
564
565 .align 4
566L$C0000
567 .STRING "Division would overflow\x0a\x00"
568 .IMPORT abort,CODE
569 .SPACE $TEXT$
570 .SUBSPA $CODE$
571
572 .align 4
573 .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
574bn_div64
575 .PROC
576 .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
577 .ENTRY
578 stw %r2,-20(0,%r30)
579 stwm %r8,128(0,%r30)
580 stw %r7,-124(0,%r30)
581 stw %r4,-112(0,%r30)
582 stw %r3,-108(0,%r30)
583 copy %r26,%r3
584 copy %r25,%r4
585 stw %r6,-120(0,%r30)
586 ldi 0,%r7
587 stw %r5,-116(0,%r30)
588 movb,<> %r24,%r5,L$0059
589 ldi 2,%r6
590 bl L$0076,0
591 ldi -1,%r28
592L$0059
593 .CALL ARGW0=GR
594 bl BN_num_bits_word,%r2
595 copy %r5,%r26
596 ldi 32,%r19
597 comb,= %r19,%r28,L$0060
598 subi 31,%r28,%r19
599 mtsar %r19
600 zvdepi 1,32,%r19
601 comb,>>= %r19,%r3,L$0060
602 addil LR'__iob-$global$+32,%r27
603 ldo RR'__iob-$global$+32(%r1),%r26
604 ldil LR'L$C0000,%r25
605 .CALL ARGW0=GR,ARGW1=GR
606 bl fprintf,%r2
607 ldo RR'L$C0000(%r25),%r25
608 .CALL
609 bl abort,%r2
610 nop
611L$0060
612 comb,>> %r5,%r3,L$0061
613 subi 32,%r28,%r28
614 sub %r3,%r5,%r3
615L$0061
616 comib,= 0,%r28,L$0062
617 subi 31,%r28,%r19
618 mtsar %r19
619 zvdep %r5,32,%r5
620 zvdep %r3,32,%r21
621 subi 32,%r28,%r20
622 mtsar %r20
623 vshd 0,%r4,%r20
624 or %r21,%r20,%r3
625 mtsar %r19
626 zvdep %r4,32,%r4
627L$0062
628 extru %r5,15,16,%r23
629 extru %r5,31,16,%r28
630L$0063
631 extru %r3,15,16,%r19
632 comb,<> %r23,%r19,L$0066
633 copy %r3,%r26
634 bl L$0067,0
635 zdepi -1,31,16,%r29
636L$0066
637 .IMPORT $$divU,MILLICODE
638 bl $$divU,%r31
639 copy %r23,%r25
640L$0067
641 stw %r29,-16(0,%r30)
642 fldws -16(0,%r30),%fr10L
643 stw %r28,-16(0,%r30)
644 fldws -16(0,%r30),%fr10R
645 stw %r23,-16(0,%r30)
646 xmpyu %fr10L,%fr10R,%fr8
647 fldws -16(0,%r30),%fr10R
648 fstws %fr8R,-16(0,%r30)
649 xmpyu %fr10L,%fr10R,%fr9
650 ldw -16(0,%r30),%r8
651 fstws %fr9R,-16(0,%r30)
652 copy %r8,%r22
653 ldw -16(0,%r30),%r8
654 extru %r4,15,16,%r24
655 copy %r8,%r21
656L$0068
657 sub %r3,%r21,%r20
658 copy %r20,%r19
659 depi 0,31,16,%r19
660 comib,<> 0,%r19,L$0069
661 zdep %r20,15,16,%r19
662 addl %r19,%r24,%r19
663 comb,>>= %r19,%r22,L$0069
664 sub %r22,%r28,%r22
665 sub %r21,%r23,%r21
666 bl L$0068,0
667 ldo -1(%r29),%r29
668L$0069
669 stw %r29,-16(0,%r30)
670 fldws -16(0,%r30),%fr10L
671 stw %r28,-16(0,%r30)
672 fldws -16(0,%r30),%fr10R
673 xmpyu %fr10L,%fr10R,%fr8
674 fstws %fr8R,-16(0,%r30)
675 ldw -16(0,%r30),%r8
676 stw %r23,-16(0,%r30)
677 fldws -16(0,%r30),%fr10R
678 copy %r8,%r19
679 xmpyu %fr10L,%fr10R,%fr8
680 fstws %fr8R,-16(0,%r30)
681 extru %r19,15,16,%r20
682 ldw -16(0,%r30),%r8
683 zdep %r19,15,16,%r19
684 addl %r8,%r20,%r20
685 comclr,<<= %r19,%r4,0
686 addi 1,%r20,%r20
687 comb,<<= %r20,%r3,L$0074
688 sub %r4,%r19,%r4
689 addl %r3,%r5,%r3
690 ldo -1(%r29),%r29
691L$0074
692 addib,= -1,%r6,L$0064
693 sub %r3,%r20,%r3
694 zdep %r29,15,16,%r7
695 shd %r3,%r4,16,%r3
696 bl L$0063,0
697 zdep %r4,15,16,%r4
698L$0064
699 or %r7,%r29,%r28
700L$0076
701 ldw -148(0,%r30),%r2
702 ldw -124(0,%r30),%r7
703 ldw -120(0,%r30),%r6
704 ldw -116(0,%r30),%r5
705 ldw -112(0,%r30),%r4
706 ldw -108(0,%r30),%r3
707 bv 0(%r2)
708 ldwm -128(0,%r30),%r8
709 .EXIT
710 .PROCEND
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s
new file mode 100644
index 0000000000..f3b16290eb
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/pa-risc2.s
@@ -0,0 +1,1618 @@
1;
2; PA-RISC 2.0 implementation of bn_asm code, based on the
3; 64-bit version of the code. This code is effectively the
4; same as the 64-bit version except the register model is
5; slightly different given all values must be 32-bit between
6; function calls. Thus the 64-bit return values are returned
7; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
8;
9;
10; This code is approximately 2x faster than the C version
11; for RSA/DSA.
12;
13; See http://devresource.hp.com/ for more details on the PA-RISC
14; architecture. Also see the book "PA-RISC 2.0 Architecture"
15; by Gerry Kane for information on the instruction set architecture.
16;
17; Code written by Chris Ruemmler (with some help from the HP C
18; compiler).
19;
20; The code compiles with HP's assembler
21;
22
23 .level 2.0N
24 .space $TEXT$
25 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
26
27;
28; Global Register definitions used for the routines.
29;
30; Some information about HP's runtime architecture for 32-bits.
31;
32; "Caller save" means the calling function must save the register
33; if it wants the register to be preserved.
34; "Callee save" means if a function uses the register, it must save
35; the value before using it.
36;
37; For the floating point registers
38;
39; "caller save" registers: fr4-fr11, fr22-fr31
40; "callee save" registers: fr12-fr21
41; "special" registers: fr0-fr3 (status and exception registers)
42;
43; For the integer registers
44; value zero : r0
45; "caller save" registers: r1,r19-r26
46; "callee save" registers: r3-r18
47; return register : r2 (rp)
48; return values ; r28,r29 (ret0,ret1)
49; Stack pointer ; r30 (sp)
50; millicode return ptr ; r31 (also a caller save register)
51
52
53;
54; Arguments to the routines
55;
56r_ptr .reg %r26
57a_ptr .reg %r25
58b_ptr .reg %r24
59num .reg %r24
60n .reg %r23
61
62;
63; Note that the "w" argument for bn_mul_add_words and bn_mul_words
64; is passed on the stack at a delta of -56 from the top of stack
65; as the routine is entered.
66;
67
68;
69; Globals used in some routines
70;
71
72top_overflow .reg %r23
73high_mask .reg %r22 ; value 0xffffffff80000000L
74
75
76;------------------------------------------------------------------------------
77;
78; bn_mul_add_words
79;
80;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
81; int num, BN_ULONG w)
82;
83; arg0 = r_ptr
84; arg1 = a_ptr
85; arg3 = num
86; -56(sp) = w
87;
88; Local register definitions
89;
90
91fm1 .reg %fr22
92fm .reg %fr23
93ht_temp .reg %fr24
94ht_temp_1 .reg %fr25
95lt_temp .reg %fr26
96lt_temp_1 .reg %fr27
97fm1_1 .reg %fr28
98fm_1 .reg %fr29
99
100fw_h .reg %fr7L
101fw_l .reg %fr7R
102fw .reg %fr7
103
104fht_0 .reg %fr8L
105flt_0 .reg %fr8R
106t_float_0 .reg %fr8
107
108fht_1 .reg %fr9L
109flt_1 .reg %fr9R
110t_float_1 .reg %fr9
111
112tmp_0 .reg %r31
113tmp_1 .reg %r21
114m_0 .reg %r20
115m_1 .reg %r19
116ht_0 .reg %r1
117ht_1 .reg %r3
118lt_0 .reg %r4
119lt_1 .reg %r5
120m1_0 .reg %r6
121m1_1 .reg %r7
122rp_val .reg %r8
123rp_val_1 .reg %r9
124
125bn_mul_add_words
126 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
127 .proc
128 .callinfo frame=128
129 .entry
130 .align 64
131
132 STD %r3,0(%sp) ; save r3
133 STD %r4,8(%sp) ; save r4
134 NOP ; Needed to make the loop 16-byte aligned
135 NOP ; needed to make the loop 16-byte aligned
136
137 STD %r5,16(%sp) ; save r5
138 NOP
139 STD %r6,24(%sp) ; save r6
140 STD %r7,32(%sp) ; save r7
141
142 STD %r8,40(%sp) ; save r8
143 STD %r9,48(%sp) ; save r9
144 COPY %r0,%ret1 ; return 0 by default
145 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
146
147 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
148 LDO 128(%sp),%sp ; bump stack
149
150 ;
151 ; The loop is unrolled twice, so if there is only 1 number
152 ; then go straight to the cleanup code.
153 ;
154 CMPIB,= 1,num,bn_mul_add_words_single_top
155 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
156
157 ;
158 ; This loop is unrolled 2 times (64-byte aligned as well)
159 ;
160 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
161 ; two 32-bit mutiplies can be issued per cycle.
162 ;
163bn_mul_add_words_unroll2
164
165 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
166 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
167 LDD 0(r_ptr),rp_val ; rp[0]
168 LDD 8(r_ptr),rp_val_1 ; rp[1]
169
170 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
171 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
172 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
173 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
174
175 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
176 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
177 FSTD fm,-8(%sp) ; -8(sp) = m[0]
178 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
179
180 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
181 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
182 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
183 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
184
185 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
186 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
187 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
188 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
189
190 LDD -8(%sp),m_0 ; m[0]
191 LDD -40(%sp),m_1 ; m[1]
192 LDD -16(%sp),m1_0 ; m1[0]
193 LDD -48(%sp),m1_1 ; m1[1]
194
195 LDD -24(%sp),ht_0 ; ht[0]
196 LDD -56(%sp),ht_1 ; ht[1]
197 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
198 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
199
200 LDD -32(%sp),lt_0
201 LDD -64(%sp),lt_1
202 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
203 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
204
205 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
206 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
207 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
208 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
209
210 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
211 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
212 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
213 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
214
215 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
216 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
217 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
218 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
219
220 ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
221 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
222 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
223 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
224
225 LDO -2(num),num ; num = num - 2;
226 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
227 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
228 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
229
230 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
231 ADD,DC ht_1,%r0,%ret1 ; ht[1]++
232 LDO 16(a_ptr),a_ptr ; a_ptr += 2
233
234 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
235 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
236 LDO 16(r_ptr),r_ptr ; r_ptr += 2
237
238 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
239
240 ;
241 ; Top of loop aligned on 64-byte boundary
242 ;
243bn_mul_add_words_single_top
244 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
245 LDD 0(r_ptr),rp_val ; rp[0]
246 LDO 8(a_ptr),a_ptr ; a_ptr++
247 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
248 FSTD fm1,-16(%sp) ; -16(sp) = m1
249 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
250 FSTD fm,-8(%sp) ; -8(sp) = m
251 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
252 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
253 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
254 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
255
256 LDD -8(%sp),m_0
257 LDD -16(%sp),m1_0 ; m1 = temp1
258 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
259 LDD -24(%sp),ht_0
260 LDD -32(%sp),lt_0
261
262 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
263 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
264
265 EXTRD,U tmp_0,31,32,m_0 ; m>>32
266 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
267
268 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
269 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
270 ADD,DC ht_0,%r0,ht_0 ; ht++
271 ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
272 ADD,DC ht_0,%r0,ht_0 ; ht++
273 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
274 ADD,DC ht_0,%r0,%ret1 ; ht++
275 STD lt_0,0(r_ptr) ; rp[0] = lt
276
277bn_mul_add_words_exit
278 .EXIT
279
280 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
281 LDD -80(%sp),%r9 ; restore r9
282 LDD -88(%sp),%r8 ; restore r8
283 LDD -96(%sp),%r7 ; restore r7
284 LDD -104(%sp),%r6 ; restore r6
285 LDD -112(%sp),%r5 ; restore r5
286 LDD -120(%sp),%r4 ; restore r4
287 BVE (%rp)
288 LDD,MB -128(%sp),%r3 ; restore r3
289 .PROCEND ;in=23,24,25,26,29;out=28;
290
291;----------------------------------------------------------------------------
292;
293;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
294;
295; arg0 = rp
296; arg1 = ap
297; arg3 = num
298; w on stack at -56(sp)
299
300bn_mul_words
301 .proc
302 .callinfo frame=128
303 .entry
304 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
305 .align 64
306
307 STD %r3,0(%sp) ; save r3
308 STD %r4,8(%sp) ; save r4
309 NOP
310 STD %r5,16(%sp) ; save r5
311
312 STD %r6,24(%sp) ; save r6
313 STD %r7,32(%sp) ; save r7
314 COPY %r0,%ret1 ; return 0 by default
315 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
316
317 CMPIB,>= 0,num,bn_mul_words_exit
318 LDO 128(%sp),%sp ; bump stack
319
320 ;
321 ; See if only 1 word to do, thus just do cleanup
322 ;
323 CMPIB,= 1,num,bn_mul_words_single_top
324 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
325
326 ;
327 ; This loop is unrolled 2 times (64-byte aligned as well)
328 ;
329 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
330 ; two 32-bit mutiplies can be issued per cycle.
331 ;
332bn_mul_words_unroll2
333
334 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
335 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
336 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
337 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
338
339 FSTD fm1,-16(%sp) ; -16(sp) = m1
340 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
341 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
342 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
343
344 FSTD fm,-8(%sp) ; -8(sp) = m
345 FSTD fm_1,-40(%sp) ; -40(sp) = m
346 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
347 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
348
349 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
350 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
351 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
352 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
353
354 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
355 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
356 LDD -8(%sp),m_0
357 LDD -40(%sp),m_1
358
359 LDD -16(%sp),m1_0
360 LDD -48(%sp),m1_1
361 LDD -24(%sp),ht_0
362 LDD -56(%sp),ht_1
363
364 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
365 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
366 LDD -32(%sp),lt_0
367 LDD -64(%sp),lt_1
368
369 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
370 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
371 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
372 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
373
374 EXTRD,U tmp_0,31,32,m_0 ; m>>32
375 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
376 EXTRD,U tmp_1,31,32,m_1 ; m>>32
377 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
378
379 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
380 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
381 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
382 ADD,DC ht_0,%r0,ht_0 ; ht++
383
384 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
385 ADD,DC ht_1,%r0,ht_1 ; ht++
386 ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1);
387 ADD,DC ht_0,%r0,ht_0 ; ht++
388
389 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
390 ADD,DC ht_1,%r0,ht_1 ; ht++
391 STD lt_0,0(r_ptr) ; rp[0] = lt
392 STD lt_1,8(r_ptr) ; rp[1] = lt
393
394 COPY ht_1,%ret1 ; carry = ht
395 LDO -2(num),num ; num = num - 2;
396 LDO 16(a_ptr),a_ptr ; ap += 2
397 CMPIB,<= 2,num,bn_mul_words_unroll2
398 LDO 16(r_ptr),r_ptr ; rp++
399
400 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
401
402 ;
403 ; Top of loop aligned on 64-byte boundary
404 ;
405bn_mul_words_single_top
406 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
407
408 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
409 FSTD fm1,-16(%sp) ; -16(sp) = m1
410 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
411 FSTD fm,-8(%sp) ; -8(sp) = m
412 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
413 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
414 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
415 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
416
417 LDD -8(%sp),m_0
418 LDD -16(%sp),m1_0
419 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
420 LDD -24(%sp),ht_0
421 LDD -32(%sp),lt_0
422
423 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
424 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
425
426 EXTRD,U tmp_0,31,32,m_0 ; m>>32
427 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
428
429 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
430 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
431 ADD,DC ht_0,%r0,ht_0 ; ht++
432
433 ADD %ret1,lt_0,lt_0 ; lt = lt + c;
434 ADD,DC ht_0,%r0,ht_0 ; ht++
435
436 COPY ht_0,%ret1 ; copy carry
437 STD lt_0,0(r_ptr) ; rp[0] = lt
438
439bn_mul_words_exit
440 .EXIT
441 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
442 LDD -96(%sp),%r7 ; restore r7
443 LDD -104(%sp),%r6 ; restore r6
444 LDD -112(%sp),%r5 ; restore r5
445 LDD -120(%sp),%r4 ; restore r4
446 BVE (%rp)
447 LDD,MB -128(%sp),%r3 ; restore r3
448 .PROCEND
449
450;----------------------------------------------------------------------------
451;
452;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
453;
454; arg0 = rp
455; arg1 = ap
456; arg2 = num
457;
458
459bn_sqr_words
460 .proc
461 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
462 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
463 .entry
464 .align 64
465
466 STD %r3,0(%sp) ; save r3
467 STD %r4,8(%sp) ; save r4
468 NOP
469 STD %r5,16(%sp) ; save r5
470
471 CMPIB,>= 0,num,bn_sqr_words_exit
472 LDO 128(%sp),%sp ; bump stack
473
474 ;
475 ; If only 1, the goto straight to cleanup
476 ;
477 CMPIB,= 1,num,bn_sqr_words_single_top
478 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
479
480 ;
481 ; This loop is unrolled 2 times (64-byte aligned as well)
482 ;
483
484bn_sqr_words_unroll2
485 FLDD 0(a_ptr),t_float_0 ; a[0]
486 FLDD 8(a_ptr),t_float_1 ; a[1]
487 XMPYU fht_0,flt_0,fm ; m[0]
488 XMPYU fht_1,flt_1,fm_1 ; m[1]
489
490 FSTD fm,-24(%sp) ; store m[0]
491 FSTD fm_1,-56(%sp) ; store m[1]
492 XMPYU flt_0,flt_0,lt_temp ; lt[0]
493 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
494
495 FSTD lt_temp,-16(%sp) ; store lt[0]
496 FSTD lt_temp_1,-48(%sp) ; store lt[1]
497 XMPYU fht_0,fht_0,ht_temp ; ht[0]
498 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
499
500 FSTD ht_temp,-8(%sp) ; store ht[0]
501 FSTD ht_temp_1,-40(%sp) ; store ht[1]
502 LDD -24(%sp),m_0
503 LDD -56(%sp),m_1
504
505 AND m_0,high_mask,tmp_0 ; m[0] & Mask
506 AND m_1,high_mask,tmp_1 ; m[1] & Mask
507 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
508 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
509
510 LDD -16(%sp),lt_0
511 LDD -48(%sp),lt_1
512 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
513 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
514
515 LDD -8(%sp),ht_0
516 LDD -40(%sp),ht_1
517 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
518 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
519
520 ADD lt_0,m_0,lt_0 ; lt = lt+m
521 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
522 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
523 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
524
525 ADD lt_1,m_1,lt_1 ; lt = lt+m
526 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
527 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
528 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
529
530 LDO -2(num),num ; num = num - 2;
531 LDO 16(a_ptr),a_ptr ; ap += 2
532 CMPIB,<= 2,num,bn_sqr_words_unroll2
533 LDO 32(r_ptr),r_ptr ; rp += 4
534
535 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
536
537 ;
538 ; Top of loop aligned on 64-byte boundary
539 ;
540bn_sqr_words_single_top
541 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
542
543 XMPYU fht_0,flt_0,fm ; m
544 FSTD fm,-24(%sp) ; store m
545
546 XMPYU flt_0,flt_0,lt_temp ; lt
547 FSTD lt_temp,-16(%sp) ; store lt
548
549 XMPYU fht_0,fht_0,ht_temp ; ht
550 FSTD ht_temp,-8(%sp) ; store ht
551
552 LDD -24(%sp),m_0 ; load m
553 AND m_0,high_mask,tmp_0 ; m & Mask
554 DEPD,Z m_0,30,31,m_0 ; m << 32+1
555 LDD -16(%sp),lt_0 ; lt
556
557 LDD -8(%sp),ht_0 ; ht
558 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
559 ADD m_0,lt_0,lt_0 ; lt = lt+m
560 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
561 ADD,DC ht_0,%r0,ht_0 ; ht++
562
563 STD lt_0,0(r_ptr) ; rp[0] = lt
564 STD ht_0,8(r_ptr) ; rp[1] = ht
565
566bn_sqr_words_exit
567 .EXIT
568 LDD -112(%sp),%r5 ; restore r5
569 LDD -120(%sp),%r4 ; restore r4
570 BVE (%rp)
571 LDD,MB -128(%sp),%r3
572 .PROCEND ;in=23,24,25,26,29;out=28;
573
574
575;----------------------------------------------------------------------------
576;
577;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
578;
579; arg0 = rp
580; arg1 = ap
581; arg2 = bp
582; arg3 = n
583
584t .reg %r22
585b .reg %r21
586l .reg %r20
587
588bn_add_words
589 .proc
590 .entry
591 .callinfo
592 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
593 .align 64
594
595 CMPIB,>= 0,n,bn_add_words_exit
596 COPY %r0,%ret1 ; return 0 by default
597
598 ;
599 ; If 2 or more numbers do the loop
600 ;
601 CMPIB,= 1,n,bn_add_words_single_top
602 NOP
603
604 ;
605 ; This loop is unrolled 2 times (64-byte aligned as well)
606 ;
607bn_add_words_unroll2
608 LDD 0(a_ptr),t
609 LDD 0(b_ptr),b
610 ADD t,%ret1,t ; t = t+c;
611 ADD,DC %r0,%r0,%ret1 ; set c to carry
612 ADD t,b,l ; l = t + b[0]
613 ADD,DC %ret1,%r0,%ret1 ; c+= carry
614 STD l,0(r_ptr)
615
616 LDD 8(a_ptr),t
617 LDD 8(b_ptr),b
618 ADD t,%ret1,t ; t = t+c;
619 ADD,DC %r0,%r0,%ret1 ; set c to carry
620 ADD t,b,l ; l = t + b[0]
621 ADD,DC %ret1,%r0,%ret1 ; c+= carry
622 STD l,8(r_ptr)
623
624 LDO -2(n),n
625 LDO 16(a_ptr),a_ptr
626 LDO 16(b_ptr),b_ptr
627
628 CMPIB,<= 2,n,bn_add_words_unroll2
629 LDO 16(r_ptr),r_ptr
630
631 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
632
633bn_add_words_single_top
634 LDD 0(a_ptr),t
635 LDD 0(b_ptr),b
636
637 ADD t,%ret1,t ; t = t+c;
638 ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??)
639 ADD t,b,l ; l = t + b[0]
640 ADD,DC %ret1,%r0,%ret1 ; c+= carry
641 STD l,0(r_ptr)
642
643bn_add_words_exit
644 .EXIT
645 BVE (%rp)
646 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
647 .PROCEND ;in=23,24,25,26,29;out=28;
648
649;----------------------------------------------------------------------------
650;
651;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
652;
653; arg0 = rp
654; arg1 = ap
655; arg2 = bp
656; arg3 = n
657
658t1 .reg %r22
659t2 .reg %r21
660sub_tmp1 .reg %r20
661sub_tmp2 .reg %r19
662
663
664bn_sub_words
665 .proc
666 .callinfo
667 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
668 .entry
669 .align 64
670
671 CMPIB,>= 0,n,bn_sub_words_exit
672 COPY %r0,%ret1 ; return 0 by default
673
674 ;
675 ; If 2 or more numbers do the loop
676 ;
677 CMPIB,= 1,n,bn_sub_words_single_top
678 NOP
679
680 ;
681 ; This loop is unrolled 2 times (64-byte aligned as well)
682 ;
683bn_sub_words_unroll2
684 LDD 0(a_ptr),t1
685 LDD 0(b_ptr),t2
686 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
687 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
688
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret1
694 STD sub_tmp1,0(r_ptr)
695
696 LDD 8(a_ptr),t1
697 LDD 8(b_ptr),t2
698 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
699 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
700 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
701 LDO 1(%r0),sub_tmp2
702
703 CMPCLR,*= t1,t2,%r0
704 COPY sub_tmp2,%ret1
705 STD sub_tmp1,8(r_ptr)
706
707 LDO -2(n),n
708 LDO 16(a_ptr),a_ptr
709 LDO 16(b_ptr),b_ptr
710
711 CMPIB,<= 2,n,bn_sub_words_unroll2
712 LDO 16(r_ptr),r_ptr
713
714 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
715
716bn_sub_words_single_top
717 LDD 0(a_ptr),t1
718 LDD 0(b_ptr),t2
719 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
720 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
721 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
722 LDO 1(%r0),sub_tmp2
723
724 CMPCLR,*= t1,t2,%r0
725 COPY sub_tmp2,%ret1
726
727 STD sub_tmp1,0(r_ptr)
728
729bn_sub_words_exit
730 .EXIT
731 BVE (%rp)
732 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
733 .PROCEND ;in=23,24,25,26,29;out=28;
734
735;------------------------------------------------------------------------------
736;
737; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
738;
739; arg0 = h
740; arg1 = l
741; arg2 = d
742;
743; This is mainly just output from the HP C compiler.
744;
745;------------------------------------------------------------------------------
746bn_div_words
747 .PROC
748 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
749 .IMPORT BN_num_bits_word,CODE
750 ;--- not PIC .IMPORT __iob,DATA
751 ;--- not PIC .IMPORT fprintf,CODE
752 .IMPORT abort,CODE
753 .IMPORT $$div2U,MILLICODE
754 .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
755 .ENTRY
756 STW %r2,-20(%r30) ;offset 0x8ec
757 STW,MA %r3,192(%r30) ;offset 0x8f0
758 STW %r4,-188(%r30) ;offset 0x8f4
759 DEPD %r5,31,32,%r6 ;offset 0x8f8
760 STD %r6,-184(%r30) ;offset 0x8fc
761 DEPD %r7,31,32,%r8 ;offset 0x900
762 STD %r8,-176(%r30) ;offset 0x904
763 STW %r9,-168(%r30) ;offset 0x908
764 LDD -248(%r30),%r3 ;offset 0x90c
765 COPY %r26,%r4 ;offset 0x910
766 COPY %r24,%r5 ;offset 0x914
767 DEPD %r25,31,32,%r4 ;offset 0x918
768 CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c
769 DEPD %r23,31,32,%r5 ;offset 0x920
770 MOVIB,TR -1,%r29,$00060002 ;offset 0x924
771 EXTRD,U %r29,31,32,%r28 ;offset 0x928
772$0006002A
773 LDO -1(%r29),%r29 ;offset 0x92c
774 SUB %r23,%r7,%r23 ;offset 0x930
775$00060024
776 SUB %r4,%r31,%r25 ;offset 0x934
777 AND %r25,%r19,%r26 ;offset 0x938
778 CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c
779 DEPD,Z %r25,31,32,%r20 ;offset 0x940
780 OR %r20,%r24,%r21 ;offset 0x944
781 CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948
782 SUB %r31,%r2,%r31 ;offset 0x94c
783$00060046
784$0006002E
785 DEPD,Z %r23,31,32,%r25 ;offset 0x950
786 EXTRD,U %r23,31,32,%r26 ;offset 0x954
787 AND %r25,%r19,%r24 ;offset 0x958
788 ADD,L %r31,%r26,%r31 ;offset 0x95c
789 CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960
790 LDO 1(%r31),%r31 ;offset 0x964
791$00060032
792 CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968
793 LDO -1(%r29),%r29 ;offset 0x96c
794 ADD,L %r4,%r3,%r4 ;offset 0x970
795$00060036
796 ADDIB,=,N -1,%r8,$D0 ;offset 0x974
797 SUB %r5,%r24,%r28 ;offset 0x978
798$0006003A
799 SUB %r4,%r31,%r24 ;offset 0x97c
800 SHRPD %r24,%r28,32,%r4 ;offset 0x980
801 DEPD,Z %r29,31,32,%r9 ;offset 0x984
802 DEPD,Z %r28,31,32,%r5 ;offset 0x988
803$0006001C
804 EXTRD,U %r4,31,32,%r31 ;offset 0x98c
805 CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990
806 MOVB,TR %r6,%r29,$D1 ;offset 0x994
807 STD %r29,-152(%r30) ;offset 0x998
808$0006000C
809 EXTRD,U %r3,31,32,%r25 ;offset 0x99c
810 COPY %r3,%r26 ;offset 0x9a0
811 EXTRD,U %r3,31,32,%r9 ;offset 0x9a4
812 EXTRD,U %r4,31,32,%r8 ;offset 0x9a8
813 .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28;
814 B,L BN_num_bits_word,%r2 ;offset 0x9ac
815 EXTRD,U %r5,31,32,%r7 ;offset 0x9b0
816 LDI 64,%r20 ;offset 0x9b4
817 DEPD %r7,31,32,%r5 ;offset 0x9b8
818 DEPD %r8,31,32,%r4 ;offset 0x9bc
819 DEPD %r9,31,32,%r3 ;offset 0x9c0
820 CMPB,= %r28,%r20,$00060012 ;offset 0x9c4
821 COPY %r28,%r24 ;offset 0x9c8
822 MTSARCM %r24 ;offset 0x9cc
823 DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0
824 CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4
825$00060012
826 SUBI 64,%r24,%r31 ;offset 0x9d8
827 CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc
828 SUB %r4,%r3,%r4 ;offset 0x9e0
829$00060016
830 CMPB,= %r31,%r0,$0006001A ;offset 0x9e4
831 COPY %r0,%r9 ;offset 0x9e8
832 MTSARCM %r31 ;offset 0x9ec
833 DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0
834 SUBI 64,%r31,%r26 ;offset 0x9f4
835 MTSAR %r26 ;offset 0x9f8
836 SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc
837 MTSARCM %r31 ;offset 0xa00
838 DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04
839$0006001A
840 DEPDI,Z -1,31,32,%r19 ;offset 0xa08
841 AND %r3,%r19,%r29 ;offset 0xa0c
842 EXTRD,U %r29,31,32,%r2 ;offset 0xa10
843 DEPDI,Z -1,63,32,%r6 ;offset 0xa14
844 MOVIB,TR 2,%r8,$0006001C ;offset 0xa18
845 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c
846$D2
847 ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20
848 ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24
849 ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28
850 ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28;
851 ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c
852 ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30
853 .CALL ;
854 B,L abort,%r2 ;offset 0xa34
855 NOP ;offset 0xa38
856 B $D3 ;offset 0xa3c
857 LDW -212(%r30),%r2 ;offset 0xa40
858$00060020
859 COPY %r4,%r26 ;offset 0xa44
860 EXTRD,U %r4,31,32,%r25 ;offset 0xa48
861 COPY %r2,%r24 ;offset 0xa4c
862 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
863 B,L $$div2U,%r31 ;offset 0xa50
864 EXTRD,U %r2,31,32,%r23 ;offset 0xa54
865 DEPD %r28,31,32,%r29 ;offset 0xa58
866$00060022
867 STD %r29,-152(%r30) ;offset 0xa5c
868$D1
869 AND %r5,%r19,%r24 ;offset 0xa60
870 EXTRD,U %r24,31,32,%r24 ;offset 0xa64
871 STW %r2,-160(%r30) ;offset 0xa68
872 STW %r7,-128(%r30) ;offset 0xa6c
873 FLDD -152(%r30),%fr4 ;offset 0xa70
874 FLDD -152(%r30),%fr7 ;offset 0xa74
875 FLDW -160(%r30),%fr8L ;offset 0xa78
876 FLDW -128(%r30),%fr5L ;offset 0xa7c
877 XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80
878 FSTD %fr10,-136(%r30) ;offset 0xa84
879 XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88
880 FSTD %fr22,-144(%r30) ;offset 0xa8c
881 XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90
882 XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94
883 FSTD %fr11,-112(%r30) ;offset 0xa98
884 FSTD %fr23,-120(%r30) ;offset 0xa9c
885 LDD -136(%r30),%r28 ;offset 0xaa0
886 DEPD,Z %r28,31,32,%r31 ;offset 0xaa4
887 LDD -144(%r30),%r20 ;offset 0xaa8
888 ADD,L %r20,%r31,%r31 ;offset 0xaac
889 LDD -112(%r30),%r22 ;offset 0xab0
890 DEPD,Z %r22,31,32,%r22 ;offset 0xab4
891 LDD -120(%r30),%r21 ;offset 0xab8
892 B $00060024 ;offset 0xabc
893 ADD,L %r21,%r22,%r23 ;offset 0xac0
894$D0
895 OR %r9,%r29,%r29 ;offset 0xac4
896$00060040
897 EXTRD,U %r29,31,32,%r28 ;offset 0xac8
898$00060002
899$L2
900 LDW -212(%r30),%r2 ;offset 0xacc
901$D3
902 LDW -168(%r30),%r9 ;offset 0xad0
903 LDD -176(%r30),%r8 ;offset 0xad4
904 EXTRD,U %r8,31,32,%r7 ;offset 0xad8
905 LDD -184(%r30),%r6 ;offset 0xadc
906 EXTRD,U %r6,31,32,%r5 ;offset 0xae0
907 LDW -188(%r30),%r4 ;offset 0xae4
908 BVE (%r2) ;offset 0xae8
909 .EXIT
910 LDW,MB -192(%r30),%r3 ;offset 0xaec
911 .PROCEND ;in=23,25;out=28,29;fpin=105,107;
912
913
914
915
916;----------------------------------------------------------------------------
917;
918; Registers to hold 64-bit values to manipulate. The "L" part
919; of the register corresponds to the upper 32-bits, while the "R"
920; part corresponds to the lower 32-bits
921;
922; Note, that when using b6 and b7, the code must save these before
923; using them because they are callee save registers
924;
925;
926; Floating point registers to use to save values that
927; are manipulated. These don't collide with ftemp1-6 and
928; are all caller save registers
929;
930a0 .reg %fr22
931a0L .reg %fr22L
932a0R .reg %fr22R
933
934a1 .reg %fr23
935a1L .reg %fr23L
936a1R .reg %fr23R
937
938a2 .reg %fr24
939a2L .reg %fr24L
940a2R .reg %fr24R
941
942a3 .reg %fr25
943a3L .reg %fr25L
944a3R .reg %fr25R
945
946a4 .reg %fr26
947a4L .reg %fr26L
948a4R .reg %fr26R
949
950a5 .reg %fr27
951a5L .reg %fr27L
952a5R .reg %fr27R
953
954a6 .reg %fr28
955a6L .reg %fr28L
956a6R .reg %fr28R
957
958a7 .reg %fr29
959a7L .reg %fr29L
960a7R .reg %fr29R
961
962b0 .reg %fr30
963b0L .reg %fr30L
964b0R .reg %fr30R
965
966b1 .reg %fr31
967b1L .reg %fr31L
968b1R .reg %fr31R
969
970;
971; Temporary floating point variables, these are all caller save
972; registers
973;
974ftemp1 .reg %fr4
975ftemp2 .reg %fr5
976ftemp3 .reg %fr6
977ftemp4 .reg %fr7
978
979;
980; The B set of registers when used.
981;
982
983b2 .reg %fr8
984b2L .reg %fr8L
985b2R .reg %fr8R
986
987b3 .reg %fr9
988b3L .reg %fr9L
989b3R .reg %fr9R
990
991b4 .reg %fr10
992b4L .reg %fr10L
993b4R .reg %fr10R
994
995b5 .reg %fr11
996b5L .reg %fr11L
997b5R .reg %fr11R
998
999b6 .reg %fr12
1000b6L .reg %fr12L
1001b6R .reg %fr12R
1002
1003b7 .reg %fr13
1004b7L .reg %fr13L
1005b7R .reg %fr13R
1006
1007c1 .reg %r21 ; only reg
1008temp1 .reg %r20 ; only reg
1009temp2 .reg %r19 ; only reg
1010temp3 .reg %r31 ; only reg
1011
1012m1 .reg %r28
1013c2 .reg %r23
1014high_one .reg %r1
1015ht .reg %r6
1016lt .reg %r5
1017m .reg %r4
1018c3 .reg %r3
1019
1020SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1021 XMPYU A0L,A0R,ftemp1 ; m
1022 FSTD ftemp1,-24(%sp) ; store m
1023
1024 XMPYU A0R,A0R,ftemp2 ; lt
1025 FSTD ftemp2,-16(%sp) ; store lt
1026
1027 XMPYU A0L,A0L,ftemp3 ; ht
1028 FSTD ftemp3,-8(%sp) ; store ht
1029
1030 LDD -24(%sp),m ; load m
1031 AND m,high_mask,temp2 ; m & Mask
1032 DEPD,Z m,30,31,temp3 ; m << 32+1
1033 LDD -16(%sp),lt ; lt
1034
1035 LDD -8(%sp),ht ; ht
1036 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1037 ADD temp3,lt,lt ; lt = lt+m
1038 ADD,L ht,temp1,ht ; ht += temp1
1039 ADD,DC ht,%r0,ht ; ht++
1040
1041 ADD C1,lt,C1 ; c1=c1+lt
1042 ADD,DC ht,%r0,ht ; ht++
1043
1044 ADD C2,ht,C2 ; c2=c2+ht
1045 ADD,DC C3,%r0,C3 ; c3++
1046.endm
1047
1048SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1049 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1050 FSTD ftemp1,-16(%sp) ;
1051 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1052 FSTD ftemp2,-8(%sp) ;
1053 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1054 FSTD ftemp3,-32(%sp)
1055 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1056 FSTD ftemp4,-24(%sp) ;
1057
1058 LDD -8(%sp),m ; r21 = m
1059 LDD -16(%sp),m1 ; r19 = m1
1060 ADD,L m,m1,m ; m+m1
1061
1062 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1063 LDD -24(%sp),ht ; r24 = ht
1064
1065 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1066 ADD,L ht,high_one,ht ; ht+=high_one
1067
1068 EXTRD,U m,31,32,temp1 ; m >> 32
1069 LDD -32(%sp),lt ; lt
1070 ADD,L ht,temp1,ht ; ht+= m>>32
1071 ADD lt,temp3,lt ; lt = lt+m1
1072 ADD,DC ht,%r0,ht ; ht++
1073
1074 ADD ht,ht,ht ; ht=ht+ht;
1075 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1076
1077 ADD lt,lt,lt ; lt=lt+lt;
1078 ADD,DC ht,%r0,ht ; add in carry (ht++)
1079
1080 ADD C1,lt,C1 ; c1=c1+lt
1081 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1082 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1083
1084 ADD C2,ht,C2 ; c2 = c2 + ht
1085 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1086.endm
1087
1088;
1089;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1090; arg0 = r_ptr
1091; arg1 = a_ptr
1092;
1093
1094bn_sqr_comba8
1095 .PROC
1096 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1097 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1098 .ENTRY
1099 .align 64
1100
1101 STD %r3,0(%sp) ; save r3
1102 STD %r4,8(%sp) ; save r4
1103 STD %r5,16(%sp) ; save r5
1104 STD %r6,24(%sp) ; save r6
1105
1106 ;
1107 ; Zero out carries
1108 ;
1109 COPY %r0,c1
1110 COPY %r0,c2
1111 COPY %r0,c3
1112
1113 LDO 128(%sp),%sp ; bump stack
1114 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1115 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1116
1117 ;
1118 ; Load up all of the values we are going to use
1119 ;
1120 FLDD 0(a_ptr),a0
1121 FLDD 8(a_ptr),a1
1122 FLDD 16(a_ptr),a2
1123 FLDD 24(a_ptr),a3
1124 FLDD 32(a_ptr),a4
1125 FLDD 40(a_ptr),a5
1126 FLDD 48(a_ptr),a6
1127 FLDD 56(a_ptr),a7
1128
1129 SQR_ADD_C a0L,a0R,c1,c2,c3
1130 STD c1,0(r_ptr) ; r[0] = c1;
1131 COPY %r0,c1
1132
1133 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1134 STD c2,8(r_ptr) ; r[1] = c2;
1135 COPY %r0,c2
1136
1137 SQR_ADD_C a1L,a1R,c3,c1,c2
1138 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1139 STD c3,16(r_ptr) ; r[2] = c3;
1140 COPY %r0,c3
1141
1142 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1143 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1144 STD c1,24(r_ptr) ; r[3] = c1;
1145 COPY %r0,c1
1146
1147 SQR_ADD_C a2L,a2R,c2,c3,c1
1148 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1149 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1150 STD c2,32(r_ptr) ; r[4] = c2;
1151 COPY %r0,c2
1152
1153 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1154 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1155 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1156 STD c3,40(r_ptr) ; r[5] = c3;
1157 COPY %r0,c3
1158
1159 SQR_ADD_C a3L,a3R,c1,c2,c3
1160 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1161 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1162 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1163 STD c1,48(r_ptr) ; r[6] = c1;
1164 COPY %r0,c1
1165
1166 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1167 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1168 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1169 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1170 STD c2,56(r_ptr) ; r[7] = c2;
1171 COPY %r0,c2
1172
1173 SQR_ADD_C a4L,a4R,c3,c1,c2
1174 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1175 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1176 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1177 STD c3,64(r_ptr) ; r[8] = c3;
1178 COPY %r0,c3
1179
1180 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1181 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1182 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1183 STD c1,72(r_ptr) ; r[9] = c1;
1184 COPY %r0,c1
1185
1186 SQR_ADD_C a5L,a5R,c2,c3,c1
1187 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1188 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1189 STD c2,80(r_ptr) ; r[10] = c2;
1190 COPY %r0,c2
1191
1192 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1193 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1194 STD c3,88(r_ptr) ; r[11] = c3;
1195 COPY %r0,c3
1196
1197 SQR_ADD_C a6L,a6R,c1,c2,c3
1198 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1199 STD c1,96(r_ptr) ; r[12] = c1;
1200 COPY %r0,c1
1201
1202 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1203 STD c2,104(r_ptr) ; r[13] = c2;
1204 COPY %r0,c2
1205
1206 SQR_ADD_C a7L,a7R,c3,c1,c2
1207 STD c3, 112(r_ptr) ; r[14] = c3
1208 STD c1, 120(r_ptr) ; r[15] = c1
1209
1210 .EXIT
1211 LDD -104(%sp),%r6 ; restore r6
1212 LDD -112(%sp),%r5 ; restore r5
1213 LDD -120(%sp),%r4 ; restore r4
1214 BVE (%rp)
1215 LDD,MB -128(%sp),%r3
1216
1217 .PROCEND
1218
1219;-----------------------------------------------------------------------------
1220;
1221;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1222; arg0 = r_ptr
1223; arg1 = a_ptr
1224;
1225
1226bn_sqr_comba4
1227 .proc
1228 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1229 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1230 .entry
1231 .align 64
1232 STD %r3,0(%sp) ; save r3
1233 STD %r4,8(%sp) ; save r4
1234 STD %r5,16(%sp) ; save r5
1235 STD %r6,24(%sp) ; save r6
1236
1237 ;
1238 ; Zero out carries
1239 ;
1240 COPY %r0,c1
1241 COPY %r0,c2
1242 COPY %r0,c3
1243
1244 LDO 128(%sp),%sp ; bump stack
1245 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1246 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1247
1248 ;
1249 ; Load up all of the values we are going to use
1250 ;
1251 FLDD 0(a_ptr),a0
1252 FLDD 8(a_ptr),a1
1253 FLDD 16(a_ptr),a2
1254 FLDD 24(a_ptr),a3
1255 FLDD 32(a_ptr),a4
1256 FLDD 40(a_ptr),a5
1257 FLDD 48(a_ptr),a6
1258 FLDD 56(a_ptr),a7
1259
1260 SQR_ADD_C a0L,a0R,c1,c2,c3
1261
1262 STD c1,0(r_ptr) ; r[0] = c1;
1263 COPY %r0,c1
1264
1265 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1266
1267 STD c2,8(r_ptr) ; r[1] = c2;
1268 COPY %r0,c2
1269
1270 SQR_ADD_C a1L,a1R,c3,c1,c2
1271 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1272
1273 STD c3,16(r_ptr) ; r[2] = c3;
1274 COPY %r0,c3
1275
1276 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1277 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1278
1279 STD c1,24(r_ptr) ; r[3] = c1;
1280 COPY %r0,c1
1281
1282 SQR_ADD_C a2L,a2R,c2,c3,c1
1283 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1284
1285 STD c2,32(r_ptr) ; r[4] = c2;
1286 COPY %r0,c2
1287
1288 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1289 STD c3,40(r_ptr) ; r[5] = c3;
1290 COPY %r0,c3
1291
1292 SQR_ADD_C a3L,a3R,c1,c2,c3
1293 STD c1,48(r_ptr) ; r[6] = c1;
1294 STD c2,56(r_ptr) ; r[7] = c2;
1295
1296 .EXIT
1297 LDD -104(%sp),%r6 ; restore r6
1298 LDD -112(%sp),%r5 ; restore r5
1299 LDD -120(%sp),%r4 ; restore r4
1300 BVE (%rp)
1301 LDD,MB -128(%sp),%r3
1302
1303 .PROCEND
1304
1305
1306;---------------------------------------------------------------------------
1307
1308MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1309 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1310 FSTD ftemp1,-16(%sp) ;
1311 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1312 FSTD ftemp2,-8(%sp) ;
1313 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1314 FSTD ftemp3,-32(%sp)
1315 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1316 FSTD ftemp4,-24(%sp) ;
1317
1318 LDD -8(%sp),m ; r21 = m
1319 LDD -16(%sp),m1 ; r19 = m1
1320 ADD,L m,m1,m ; m+m1
1321
1322 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1323 LDD -24(%sp),ht ; r24 = ht
1324
1325 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1326 ADD,L ht,high_one,ht ; ht+=high_one
1327
1328 EXTRD,U m,31,32,temp1 ; m >> 32
1329 LDD -32(%sp),lt ; lt
1330 ADD,L ht,temp1,ht ; ht+= m>>32
1331 ADD lt,temp3,lt ; lt = lt+m1
1332 ADD,DC ht,%r0,ht ; ht++
1333
1334 ADD C1,lt,C1 ; c1=c1+lt
1335 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1336
1337 ADD C2,ht,C2 ; c2 = c2 + ht
1338 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1339.endm
1340
1341
1342;
1343;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1344; arg0 = r_ptr
1345; arg1 = a_ptr
1346; arg2 = b_ptr
1347;
1348
1349bn_mul_comba8
1350 .proc
1351 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1352 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1353 .entry
1354 .align 64
1355
1356 STD %r3,0(%sp) ; save r3
1357 STD %r4,8(%sp) ; save r4
1358 STD %r5,16(%sp) ; save r5
1359 STD %r6,24(%sp) ; save r6
1360 FSTD %fr12,32(%sp) ; save r6
1361 FSTD %fr13,40(%sp) ; save r7
1362
1363 ;
1364 ; Zero out carries
1365 ;
1366 COPY %r0,c1
1367 COPY %r0,c2
1368 COPY %r0,c3
1369
1370 LDO 128(%sp),%sp ; bump stack
1371 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1372
1373 ;
1374 ; Load up all of the values we are going to use
1375 ;
1376 FLDD 0(a_ptr),a0
1377 FLDD 8(a_ptr),a1
1378 FLDD 16(a_ptr),a2
1379 FLDD 24(a_ptr),a3
1380 FLDD 32(a_ptr),a4
1381 FLDD 40(a_ptr),a5
1382 FLDD 48(a_ptr),a6
1383 FLDD 56(a_ptr),a7
1384
1385 FLDD 0(b_ptr),b0
1386 FLDD 8(b_ptr),b1
1387 FLDD 16(b_ptr),b2
1388 FLDD 24(b_ptr),b3
1389 FLDD 32(b_ptr),b4
1390 FLDD 40(b_ptr),b5
1391 FLDD 48(b_ptr),b6
1392 FLDD 56(b_ptr),b7
1393
1394 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1395 STD c1,0(r_ptr)
1396 COPY %r0,c1
1397
1398 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1399 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1400 STD c2,8(r_ptr)
1401 COPY %r0,c2
1402
1403 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1404 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1405 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1406 STD c3,16(r_ptr)
1407 COPY %r0,c3
1408
1409 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1410 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1411 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1412 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1413 STD c1,24(r_ptr)
1414 COPY %r0,c1
1415
1416 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1417 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1418 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1419 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1420 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1421 STD c2,32(r_ptr)
1422 COPY %r0,c2
1423
1424 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1425 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1426 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1427 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1428 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1429 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1430 STD c3,40(r_ptr)
1431 COPY %r0,c3
1432
1433 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1434 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1435 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1436 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1437 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1438 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1439 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1440 STD c1,48(r_ptr)
1441 COPY %r0,c1
1442
1443 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1444 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1445 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1446 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1447 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1448 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1449 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1450 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1451 STD c2,56(r_ptr)
1452 COPY %r0,c2
1453
1454 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1455 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1456 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1457 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1458 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1459 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1460 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1461 STD c3,64(r_ptr)
1462 COPY %r0,c3
1463
1464 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1465 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1466 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1467 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1468 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1469 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1470 STD c1,72(r_ptr)
1471 COPY %r0,c1
1472
1473 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1474 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1475 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1476 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1477 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1478 STD c2,80(r_ptr)
1479 COPY %r0,c2
1480
1481 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1482 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1483 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1484 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1485 STD c3,88(r_ptr)
1486 COPY %r0,c3
1487
1488 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1489 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1490 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1491 STD c1,96(r_ptr)
1492 COPY %r0,c1
1493
1494 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1495 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1496 STD c2,104(r_ptr)
1497 COPY %r0,c2
1498
1499 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1500 STD c3,112(r_ptr)
1501 STD c1,120(r_ptr)
1502
1503 .EXIT
1504 FLDD -88(%sp),%fr13
1505 FLDD -96(%sp),%fr12
1506 LDD -104(%sp),%r6 ; restore r6
1507 LDD -112(%sp),%r5 ; restore r5
1508 LDD -120(%sp),%r4 ; restore r4
1509 BVE (%rp)
1510 LDD,MB -128(%sp),%r3
1511
1512 .PROCEND
1513
1514;-----------------------------------------------------------------------------
1515;
1516;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1517; arg0 = r_ptr
1518; arg1 = a_ptr
1519; arg2 = b_ptr
1520;
1521
1522bn_mul_comba4
1523 .proc
1524 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1525 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1526 .entry
1527 .align 64
1528
1529 STD %r3,0(%sp) ; save r3
1530 STD %r4,8(%sp) ; save r4
1531 STD %r5,16(%sp) ; save r5
1532 STD %r6,24(%sp) ; save r6
1533 FSTD %fr12,32(%sp) ; save r6
1534 FSTD %fr13,40(%sp) ; save r7
1535
1536 ;
1537 ; Zero out carries
1538 ;
1539 COPY %r0,c1
1540 COPY %r0,c2
1541 COPY %r0,c3
1542
1543 LDO 128(%sp),%sp ; bump stack
1544 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1545
1546 ;
1547 ; Load up all of the values we are going to use
1548 ;
1549 FLDD 0(a_ptr),a0
1550 FLDD 8(a_ptr),a1
1551 FLDD 16(a_ptr),a2
1552 FLDD 24(a_ptr),a3
1553
1554 FLDD 0(b_ptr),b0
1555 FLDD 8(b_ptr),b1
1556 FLDD 16(b_ptr),b2
1557 FLDD 24(b_ptr),b3
1558
1559 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1560 STD c1,0(r_ptr)
1561 COPY %r0,c1
1562
1563 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1564 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1565 STD c2,8(r_ptr)
1566 COPY %r0,c2
1567
1568 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1569 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1570 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1571 STD c3,16(r_ptr)
1572 COPY %r0,c3
1573
1574 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1575 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1576 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1577 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1578 STD c1,24(r_ptr)
1579 COPY %r0,c1
1580
1581 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1582 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1583 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1584 STD c2,32(r_ptr)
1585 COPY %r0,c2
1586
1587 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1588 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1589 STD c3,40(r_ptr)
1590 COPY %r0,c3
1591
1592 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1593 STD c1,48(r_ptr)
1594 STD c2,56(r_ptr)
1595
1596 .EXIT
1597 FLDD -88(%sp),%fr13
1598 FLDD -96(%sp),%fr12
1599 LDD -104(%sp),%r6 ; restore r6
1600 LDD -112(%sp),%r5 ; restore r5
1601 LDD -120(%sp),%r4 ; restore r4
1602 BVE (%rp)
1603 LDD,MB -128(%sp),%r3
1604
1605 .PROCEND
1606
1607
1608;--- not PIC .SPACE $TEXT$
1609;--- not PIC .SUBSPA $CODE$
1610;--- not PIC .SPACE $PRIVATE$,SORT=16
1611;--- not PIC .IMPORT $global$,DATA
1612;--- not PIC .SPACE $TEXT$
1613;--- not PIC .SUBSPA $CODE$
1614;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c
1615;--- not PIC C$7
1616;--- not PIC .ALIGN 8
1617;--- not PIC .STRINGZ "Division would overflow (%d)\n"
1618 .END
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
new file mode 100644
index 0000000000..a99545754d
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s
@@ -0,0 +1,1605 @@
1;
2; PA-RISC 64-bit implementation of bn_asm code
3;
4; This code is approximately 2x faster than the C version
5; for RSA/DSA.
6;
7; See http://devresource.hp.com/ for more details on the PA-RISC
8; architecture. Also see the book "PA-RISC 2.0 Architecture"
9; by Gerry Kane for information on the instruction set architecture.
10;
11; Code written by Chris Ruemmler (with some help from the HP C
12; compiler).
13;
14; The code compiles with HP's assembler
15;
16
17 .level 2.0W
18 .space $TEXT$
19 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
20
21;
22; Global Register definitions used for the routines.
23;
24; Some information about HP's runtime architecture for 64-bits.
25;
26; "Caller save" means the calling function must save the register
27; if it wants the register to be preserved.
28; "Callee save" means if a function uses the register, it must save
29; the value before using it.
30;
31; For the floating point registers
32;
33; "caller save" registers: fr4-fr11, fr22-fr31
34; "callee save" registers: fr12-fr21
35; "special" registers: fr0-fr3 (status and exception registers)
36;
37; For the integer registers
38; value zero : r0
39; "caller save" registers: r1,r19-r26
40; "callee save" registers: r3-r18
41; return register : r2 (rp)
42; return values ; r28 (ret0,ret1)
43; Stack pointer ; r30 (sp)
44; global data pointer ; r27 (dp)
45; argument pointer ; r29 (ap)
46; millicode return ptr ; r31 (also a caller save register)
47
48
49;
50; Arguments to the routines
51;
52r_ptr .reg %r26
53a_ptr .reg %r25
54b_ptr .reg %r24
55num .reg %r24
56w .reg %r23
57n .reg %r23
58
59
60;
61; Globals used in some routines
62;
63
64top_overflow .reg %r29
65high_mask .reg %r22 ; value 0xffffffff80000000L
66
67
68;------------------------------------------------------------------------------
69;
70; bn_mul_add_words
71;
72;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
73; int num, BN_ULONG w)
74;
75; arg0 = r_ptr
76; arg1 = a_ptr
77; arg2 = num
78; arg3 = w
79;
80; Local register definitions
81;
82
83fm1 .reg %fr22
84fm .reg %fr23
85ht_temp .reg %fr24
86ht_temp_1 .reg %fr25
87lt_temp .reg %fr26
88lt_temp_1 .reg %fr27
89fm1_1 .reg %fr28
90fm_1 .reg %fr29
91
92fw_h .reg %fr7L
93fw_l .reg %fr7R
94fw .reg %fr7
95
96fht_0 .reg %fr8L
97flt_0 .reg %fr8R
98t_float_0 .reg %fr8
99
100fht_1 .reg %fr9L
101flt_1 .reg %fr9R
102t_float_1 .reg %fr9
103
104tmp_0 .reg %r31
105tmp_1 .reg %r21
106m_0 .reg %r20
107m_1 .reg %r19
108ht_0 .reg %r1
109ht_1 .reg %r3
110lt_0 .reg %r4
111lt_1 .reg %r5
112m1_0 .reg %r6
113m1_1 .reg %r7
114rp_val .reg %r8
115rp_val_1 .reg %r9
116
117bn_mul_add_words
118 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
119 .proc
120 .callinfo frame=128
121 .entry
122 .align 64
123
124 STD %r3,0(%sp) ; save r3
125 STD %r4,8(%sp) ; save r4
126 NOP ; Needed to make the loop 16-byte aligned
127 NOP ; Needed to make the loop 16-byte aligned
128
129 STD %r5,16(%sp) ; save r5
130 STD %r6,24(%sp) ; save r6
131 STD %r7,32(%sp) ; save r7
132 STD %r8,40(%sp) ; save r8
133
134 STD %r9,48(%sp) ; save r9
135 COPY %r0,%ret0 ; return 0 by default
136 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
137 STD w,56(%sp) ; store w on stack
138
139 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
140 LDO 128(%sp),%sp ; bump stack
141
142 ;
143 ; The loop is unrolled twice, so if there is only 1 number
144 ; then go straight to the cleanup code.
145 ;
146 CMPIB,= 1,num,bn_mul_add_words_single_top
147 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
148
149 ;
150 ; This loop is unrolled 2 times (64-byte aligned as well)
151 ;
152 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
153 ; two 32-bit mutiplies can be issued per cycle.
154 ;
155bn_mul_add_words_unroll2
156
157 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
158 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
159 LDD 0(r_ptr),rp_val ; rp[0]
160 LDD 8(r_ptr),rp_val_1 ; rp[1]
161
162 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
163 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
164 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
165 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
166
167 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
168 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
169 FSTD fm,-8(%sp) ; -8(sp) = m[0]
170 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
171
172 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
173 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
174 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
175 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
176
177 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
178 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
179 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
180 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
181
182 LDD -8(%sp),m_0 ; m[0]
183 LDD -40(%sp),m_1 ; m[1]
184 LDD -16(%sp),m1_0 ; m1[0]
185 LDD -48(%sp),m1_1 ; m1[1]
186
187 LDD -24(%sp),ht_0 ; ht[0]
188 LDD -56(%sp),ht_1 ; ht[1]
189 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
190 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
191
192 LDD -32(%sp),lt_0
193 LDD -64(%sp),lt_1
194 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
195 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
196
197 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
198 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
199 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
200 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
201
202 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
203 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
204 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
205 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
206
207 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
208 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
209 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
210 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
211
212 ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c;
213 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
214 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
215 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
216
217 LDO -2(num),num ; num = num - 2;
218 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
219 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
220 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
221
222 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
223 ADD,DC ht_1,%r0,%ret0 ; ht[1]++
224 LDO 16(a_ptr),a_ptr ; a_ptr += 2
225
226 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
227 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
228 LDO 16(r_ptr),r_ptr ; r_ptr += 2
229
230 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
231
232 ;
233 ; Top of loop aligned on 64-byte boundary
234 ;
235bn_mul_add_words_single_top
236 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
237 LDD 0(r_ptr),rp_val ; rp[0]
238 LDO 8(a_ptr),a_ptr ; a_ptr++
239 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
240 FSTD fm1,-16(%sp) ; -16(sp) = m1
241 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
242 FSTD fm,-8(%sp) ; -8(sp) = m
243 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
244 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
245 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
246 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
247
248 LDD -8(%sp),m_0
249 LDD -16(%sp),m1_0 ; m1 = temp1
250 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
251 LDD -24(%sp),ht_0
252 LDD -32(%sp),lt_0
253
254 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
255 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
256
257 EXTRD,U tmp_0,31,32,m_0 ; m>>32
258 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
259
260 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
261 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
262 ADD,DC ht_0,%r0,ht_0 ; ht++
263 ADD %ret0,tmp_0,lt_0 ; lt = lt + c;
264 ADD,DC ht_0,%r0,ht_0 ; ht++
265 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
266 ADD,DC ht_0,%r0,%ret0 ; ht++
267 STD lt_0,0(r_ptr) ; rp[0] = lt
268
269bn_mul_add_words_exit
270 .EXIT
271 LDD -80(%sp),%r9 ; restore r9
272 LDD -88(%sp),%r8 ; restore r8
273 LDD -96(%sp),%r7 ; restore r7
274 LDD -104(%sp),%r6 ; restore r6
275 LDD -112(%sp),%r5 ; restore r5
276 LDD -120(%sp),%r4 ; restore r4
277 BVE (%rp)
278 LDD,MB -128(%sp),%r3 ; restore r3
279 .PROCEND ;in=23,24,25,26,29;out=28;
280
281;----------------------------------------------------------------------------
282;
283;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
284;
285; arg0 = rp
286; arg1 = ap
287; arg2 = num
288; arg3 = w
289
290bn_mul_words
291 .proc
292 .callinfo frame=128
293 .entry
294 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
295 .align 64
296
297 STD %r3,0(%sp) ; save r3
298 STD %r4,8(%sp) ; save r4
299 STD %r5,16(%sp) ; save r5
300 STD %r6,24(%sp) ; save r6
301
302 STD %r7,32(%sp) ; save r7
303 COPY %r0,%ret0 ; return 0 by default
304 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
305 STD w,56(%sp) ; w on stack
306
307 CMPIB,>= 0,num,bn_mul_words_exit
308 LDO 128(%sp),%sp ; bump stack
309
310 ;
311 ; See if only 1 word to do, thus just do cleanup
312 ;
313 CMPIB,= 1,num,bn_mul_words_single_top
314 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
315
316 ;
317 ; This loop is unrolled 2 times (64-byte aligned as well)
318 ;
319 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
320 ; two 32-bit mutiplies can be issued per cycle.
321 ;
322bn_mul_words_unroll2
323
324 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
325 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
326 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
327 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
328
329 FSTD fm1,-16(%sp) ; -16(sp) = m1
330 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
331 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
332 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
333
334 FSTD fm,-8(%sp) ; -8(sp) = m
335 FSTD fm_1,-40(%sp) ; -40(sp) = m
336 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
337 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
338
339 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
340 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
341 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
342 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
343
344 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
345 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
346 LDD -8(%sp),m_0
347 LDD -40(%sp),m_1
348
349 LDD -16(%sp),m1_0
350 LDD -48(%sp),m1_1
351 LDD -24(%sp),ht_0
352 LDD -56(%sp),ht_1
353
354 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
355 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
356 LDD -32(%sp),lt_0
357 LDD -64(%sp),lt_1
358
359 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
360 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
361 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
362 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
363
364 EXTRD,U tmp_0,31,32,m_0 ; m>>32
365 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
366 EXTRD,U tmp_1,31,32,m_1 ; m>>32
367 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
368
369 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
370 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
371 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
372 ADD,DC ht_0,%r0,ht_0 ; ht++
373
374 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
375 ADD,DC ht_1,%r0,ht_1 ; ht++
376 ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0);
377 ADD,DC ht_0,%r0,ht_0 ; ht++
378
379 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
380 ADD,DC ht_1,%r0,ht_1 ; ht++
381 STD lt_0,0(r_ptr) ; rp[0] = lt
382 STD lt_1,8(r_ptr) ; rp[1] = lt
383
384 COPY ht_1,%ret0 ; carry = ht
385 LDO -2(num),num ; num = num - 2;
386 LDO 16(a_ptr),a_ptr ; ap += 2
387 CMPIB,<= 2,num,bn_mul_words_unroll2
388 LDO 16(r_ptr),r_ptr ; rp++
389
390 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
391
392 ;
393 ; Top of loop aligned on 64-byte boundary
394 ;
395bn_mul_words_single_top
396 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
397
398 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
399 FSTD fm1,-16(%sp) ; -16(sp) = m1
400 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
401 FSTD fm,-8(%sp) ; -8(sp) = m
402 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
403 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
404 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
405 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
406
407 LDD -8(%sp),m_0
408 LDD -16(%sp),m1_0
409 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
410 LDD -24(%sp),ht_0
411 LDD -32(%sp),lt_0
412
413 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
414 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
415
416 EXTRD,U tmp_0,31,32,m_0 ; m>>32
417 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
418
419 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
420 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
421 ADD,DC ht_0,%r0,ht_0 ; ht++
422
423 ADD %ret0,lt_0,lt_0 ; lt = lt + c;
424 ADD,DC ht_0,%r0,ht_0 ; ht++
425
426 COPY ht_0,%ret0 ; copy carry
427 STD lt_0,0(r_ptr) ; rp[0] = lt
428
429bn_mul_words_exit
430 .EXIT
431 LDD -96(%sp),%r7 ; restore r7
432 LDD -104(%sp),%r6 ; restore r6
433 LDD -112(%sp),%r5 ; restore r5
434 LDD -120(%sp),%r4 ; restore r4
435 BVE (%rp)
436 LDD,MB -128(%sp),%r3 ; restore r3
437 .PROCEND ;in=23,24,25,26,29;out=28;
438
439;----------------------------------------------------------------------------
440;
441;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
442;
443; arg0 = rp
444; arg1 = ap
445; arg2 = num
446;
447
448bn_sqr_words
449 .proc
450 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
451 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
452 .entry
453 .align 64
454
455 STD %r3,0(%sp) ; save r3
456 STD %r4,8(%sp) ; save r4
457 NOP
458 STD %r5,16(%sp) ; save r5
459
460 CMPIB,>= 0,num,bn_sqr_words_exit
461 LDO 128(%sp),%sp ; bump stack
462
463 ;
464 ; If only 1, the goto straight to cleanup
465 ;
466 CMPIB,= 1,num,bn_sqr_words_single_top
467 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
468
469 ;
470 ; This loop is unrolled 2 times (64-byte aligned as well)
471 ;
472
473bn_sqr_words_unroll2
474 FLDD 0(a_ptr),t_float_0 ; a[0]
475 FLDD 8(a_ptr),t_float_1 ; a[1]
476 XMPYU fht_0,flt_0,fm ; m[0]
477 XMPYU fht_1,flt_1,fm_1 ; m[1]
478
479 FSTD fm,-24(%sp) ; store m[0]
480 FSTD fm_1,-56(%sp) ; store m[1]
481 XMPYU flt_0,flt_0,lt_temp ; lt[0]
482 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
483
484 FSTD lt_temp,-16(%sp) ; store lt[0]
485 FSTD lt_temp_1,-48(%sp) ; store lt[1]
486 XMPYU fht_0,fht_0,ht_temp ; ht[0]
487 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
488
489 FSTD ht_temp,-8(%sp) ; store ht[0]
490 FSTD ht_temp_1,-40(%sp) ; store ht[1]
491 LDD -24(%sp),m_0
492 LDD -56(%sp),m_1
493
494 AND m_0,high_mask,tmp_0 ; m[0] & Mask
495 AND m_1,high_mask,tmp_1 ; m[1] & Mask
496 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
497 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
498
499 LDD -16(%sp),lt_0
500 LDD -48(%sp),lt_1
501 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
502 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
503
504 LDD -8(%sp),ht_0
505 LDD -40(%sp),ht_1
506 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
507 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
508
509 ADD lt_0,m_0,lt_0 ; lt = lt+m
510 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
511 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
512 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
513
514 ADD lt_1,m_1,lt_1 ; lt = lt+m
515 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
516 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
517 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
518
519 LDO -2(num),num ; num = num - 2;
520 LDO 16(a_ptr),a_ptr ; ap += 2
521 CMPIB,<= 2,num,bn_sqr_words_unroll2
522 LDO 32(r_ptr),r_ptr ; rp += 4
523
524 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
525
526 ;
527 ; Top of loop aligned on 64-byte boundary
528 ;
529bn_sqr_words_single_top
530 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
531
532 XMPYU fht_0,flt_0,fm ; m
533 FSTD fm,-24(%sp) ; store m
534
535 XMPYU flt_0,flt_0,lt_temp ; lt
536 FSTD lt_temp,-16(%sp) ; store lt
537
538 XMPYU fht_0,fht_0,ht_temp ; ht
539 FSTD ht_temp,-8(%sp) ; store ht
540
541 LDD -24(%sp),m_0 ; load m
542 AND m_0,high_mask,tmp_0 ; m & Mask
543 DEPD,Z m_0,30,31,m_0 ; m << 32+1
544 LDD -16(%sp),lt_0 ; lt
545
546 LDD -8(%sp),ht_0 ; ht
547 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
548 ADD m_0,lt_0,lt_0 ; lt = lt+m
549 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
550 ADD,DC ht_0,%r0,ht_0 ; ht++
551
552 STD lt_0,0(r_ptr) ; rp[0] = lt
553 STD ht_0,8(r_ptr) ; rp[1] = ht
554
555bn_sqr_words_exit
556 .EXIT
557 LDD -112(%sp),%r5 ; restore r5
558 LDD -120(%sp),%r4 ; restore r4
559 BVE (%rp)
560 LDD,MB -128(%sp),%r3
561 .PROCEND ;in=23,24,25,26,29;out=28;
562
563
564;----------------------------------------------------------------------------
565;
566;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
567;
568; arg0 = rp
569; arg1 = ap
570; arg2 = bp
571; arg3 = n
572
573t .reg %r22
574b .reg %r21
575l .reg %r20
576
577bn_add_words
578 .proc
579 .entry
580 .callinfo
581 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
582 .align 64
583
584 CMPIB,>= 0,n,bn_add_words_exit
585 COPY %r0,%ret0 ; return 0 by default
586
587 ;
588 ; If 2 or more numbers do the loop
589 ;
590 CMPIB,= 1,n,bn_add_words_single_top
591 NOP
592
593 ;
594 ; This loop is unrolled 2 times (64-byte aligned as well)
595 ;
596bn_add_words_unroll2
597 LDD 0(a_ptr),t
598 LDD 0(b_ptr),b
599 ADD t,%ret0,t ; t = t+c;
600 ADD,DC %r0,%r0,%ret0 ; set c to carry
601 ADD t,b,l ; l = t + b[0]
602 ADD,DC %ret0,%r0,%ret0 ; c+= carry
603 STD l,0(r_ptr)
604
605 LDD 8(a_ptr),t
606 LDD 8(b_ptr),b
607 ADD t,%ret0,t ; t = t+c;
608 ADD,DC %r0,%r0,%ret0 ; set c to carry
609 ADD t,b,l ; l = t + b[0]
610 ADD,DC %ret0,%r0,%ret0 ; c+= carry
611 STD l,8(r_ptr)
612
613 LDO -2(n),n
614 LDO 16(a_ptr),a_ptr
615 LDO 16(b_ptr),b_ptr
616
617 CMPIB,<= 2,n,bn_add_words_unroll2
618 LDO 16(r_ptr),r_ptr
619
620 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
621
622bn_add_words_single_top
623 LDD 0(a_ptr),t
624 LDD 0(b_ptr),b
625
626 ADD t,%ret0,t ; t = t+c;
627 ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??)
628 ADD t,b,l ; l = t + b[0]
629 ADD,DC %ret0,%r0,%ret0 ; c+= carry
630 STD l,0(r_ptr)
631
632bn_add_words_exit
633 .EXIT
634 BVE (%rp)
635 NOP
636 .PROCEND ;in=23,24,25,26,29;out=28;
637
638;----------------------------------------------------------------------------
639;
640;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
641;
642; arg0 = rp
643; arg1 = ap
644; arg2 = bp
645; arg3 = n
646
647t1 .reg %r22
648t2 .reg %r21
649sub_tmp1 .reg %r20
650sub_tmp2 .reg %r19
651
652
653bn_sub_words
654 .proc
655 .callinfo
656 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
657 .entry
658 .align 64
659
660 CMPIB,>= 0,n,bn_sub_words_exit
661 COPY %r0,%ret0 ; return 0 by default
662
663 ;
664 ; If 2 or more numbers do the loop
665 ;
666 CMPIB,= 1,n,bn_sub_words_single_top
667 NOP
668
669 ;
670 ; This loop is unrolled 2 times (64-byte aligned as well)
671 ;
672bn_sub_words_unroll2
673 LDD 0(a_ptr),t1
674 LDD 0(b_ptr),t2
675 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
676 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
677
678 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
679 LDO 1(%r0),sub_tmp2
680
681 CMPCLR,*= t1,t2,%r0
682 COPY sub_tmp2,%ret0
683 STD sub_tmp1,0(r_ptr)
684
685 LDD 8(a_ptr),t1
686 LDD 8(b_ptr),t2
687 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
688 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret0
694 STD sub_tmp1,8(r_ptr)
695
696 LDO -2(n),n
697 LDO 16(a_ptr),a_ptr
698 LDO 16(b_ptr),b_ptr
699
700 CMPIB,<= 2,n,bn_sub_words_unroll2
701 LDO 16(r_ptr),r_ptr
702
703 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
704
705bn_sub_words_single_top
706 LDD 0(a_ptr),t1
707 LDD 0(b_ptr),t2
708 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
709 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
710 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
711 LDO 1(%r0),sub_tmp2
712
713 CMPCLR,*= t1,t2,%r0
714 COPY sub_tmp2,%ret0
715
716 STD sub_tmp1,0(r_ptr)
717
718bn_sub_words_exit
719 .EXIT
720 BVE (%rp)
721 NOP
722 .PROCEND ;in=23,24,25,26,29;out=28;
723
724;------------------------------------------------------------------------------
725;
726; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
727;
728; arg0 = h
729; arg1 = l
730; arg2 = d
731;
732; This is mainly just modified assembly from the compiler, thus the
733; lack of variable names.
734;
735;------------------------------------------------------------------------------
736bn_div_words
737 .proc
738 .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
739 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
740 .IMPORT BN_num_bits_word,CODE,NO_RELOCATION
741 .IMPORT __iob,DATA
742 .IMPORT fprintf,CODE,NO_RELOCATION
743 .IMPORT abort,CODE,NO_RELOCATION
744 .IMPORT $$div2U,MILLICODE
745 .entry
746 STD %r2,-16(%r30)
747 STD,MA %r3,352(%r30)
748 STD %r4,-344(%r30)
749 STD %r5,-336(%r30)
750 STD %r6,-328(%r30)
751 STD %r7,-320(%r30)
752 STD %r8,-312(%r30)
753 STD %r9,-304(%r30)
754 STD %r10,-296(%r30)
755
756 STD %r27,-288(%r30) ; save gp
757
758 COPY %r24,%r3 ; save d
759 COPY %r26,%r4 ; save h (high 64-bits)
760 LDO -1(%r0),%ret0 ; return -1 by default
761
762 CMPB,*= %r0,%arg2,$D3 ; if (d == 0)
763 COPY %r25,%r5 ; save l (low 64-bits)
764
765 LDO -48(%r30),%r29 ; create ap
766 .CALL ;in=26,29;out=28;
767 B,L BN_num_bits_word,%r2
768 COPY %r3,%r26
769 LDD -288(%r30),%r27 ; restore gp
770 LDI 64,%r21
771
772 CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward)
773 COPY %ret0,%r24 ; i
774 MTSARCM %r24
775 DEPDI,Z -1,%sar,1,%r29
776 CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)
777
778$00000012
779 SUBI 64,%r24,%r31 ; i = 64 - i;
780 CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d)
781 SUB %r4,%r3,%r4 ; h -= d
782 CMPB,= %r31,%r0,$0000001A ; if (i)
783 COPY %r0,%r10 ; ret = 0
784 MTSARCM %r31 ; i to shift
785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786 SUBI 64,%r31,%r19 ; 64 - i; redundent
787 MTSAR %r19 ; (64 -i) to shift
788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789 MTSARCM %r31 ; i to shift
790 DEPD,Z %r5,%sar,64,%r5 ; l <<= i;
791
792$0000001A
793 DEPDI,Z -1,31,32,%r19
794 EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32
795 EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff
796 LDO 2(%r0),%r9
797 STD %r3,-280(%r30) ; "d" to stack
798
799$0000001C
800 DEPDI,Z -1,63,32,%r29 ;
801 EXTRD,U %r4,31,32,%r31 ; h >> 32
802 CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div
803 COPY %r4,%r26
804 EXTRD,U %r4,31,32,%r25
805 COPY %r6,%r24
806 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
807 B,L $$div2U,%r2
808 EXTRD,U %r6,31,32,%r23
809 DEPD %r28,31,32,%r29
810$D2
811 STD %r29,-272(%r30) ; q
812 AND %r5,%r19,%r24 ; t & 0xffffffff00000000;
813 EXTRD,U %r24,31,32,%r24 ; ???
814 FLDD -272(%r30),%fr7 ; q
815 FLDD -280(%r30),%fr8 ; d
816 XMPYU %fr8L,%fr7L,%fr10
817 FSTD %fr10,-256(%r30)
818 XMPYU %fr8L,%fr7R,%fr22
819 FSTD %fr22,-264(%r30)
820 XMPYU %fr8R,%fr7L,%fr11
821 XMPYU %fr8R,%fr7R,%fr23
822 FSTD %fr11,-232(%r30)
823 FSTD %fr23,-240(%r30)
824 LDD -256(%r30),%r28
825 DEPD,Z %r28,31,32,%r2
826 LDD -264(%r30),%r20
827 ADD,L %r20,%r2,%r31
828 LDD -232(%r30),%r22
829 DEPD,Z %r22,31,32,%r22
830 LDD -240(%r30),%r21
831 B $00000024 ; enter loop
832 ADD,L %r21,%r22,%r23
833
834$0000002A
835 LDO -1(%r29),%r29
836 SUB %r23,%r8,%r23
837$00000024
838 SUB %r4,%r31,%r25
839 AND %r25,%r19,%r26
840 CMPB,*<>,N %r0,%r26,$00000046 ; (forward)
841 DEPD,Z %r25,31,32,%r20
842 OR %r20,%r24,%r21
843 CMPB,*<<,N %r21,%r23,$0000002A ;(backward)
844 SUB %r31,%r6,%r31
845;-------------Break path---------------------
846
847$00000046
848 DEPD,Z %r23,31,32,%r25 ;tl
849 EXTRD,U %r23,31,32,%r26 ;t
850 AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L
851 ADD,L %r31,%r26,%r31 ;th += t;
852 CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl)
853 LDO 1(%r31),%r31 ; th++;
854 CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward)
855 LDO -1(%r29),%r29 ;q--;
856 ADD,L %r4,%r3,%r4 ;h += d;
857$00000036
858 ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward)
859 SUB %r5,%r24,%r28 ; l -= tl;
860 SUB %r4,%r31,%r24 ; h -= th;
861 SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32));
862 DEPD,Z %r29,31,32,%r10 ; ret = q<<32
863 b $0000001C
864 DEPD,Z %r28,31,32,%r5 ; l = l << 32
865
866$D1
867 OR %r10,%r29,%r28 ; ret |= q
868$D3
869 LDD -368(%r30),%r2
870$D0
871 LDD -296(%r30),%r10
872 LDD -304(%r30),%r9
873 LDD -312(%r30),%r8
874 LDD -320(%r30),%r7
875 LDD -328(%r30),%r6
876 LDD -336(%r30),%r5
877 LDD -344(%r30),%r4
878 BVE (%r2)
879 .EXIT
880 LDD,MB -352(%r30),%r3
881
882bn_div_err_case
883 MFIA %r6
884 ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1
885 LDO R'bn_div_words-bn_div_err_case(%r1),%r6
886 ADDIL LT'__iob,%r27,%r1
887 LDD RT'__iob(%r1),%r26
888 ADDIL L'C$4-bn_div_words,%r6,%r1
889 LDO R'C$4-bn_div_words(%r1),%r25
890 LDO 64(%r26),%r26
891 .CALL ;in=24,25,26,29;out=28;
892 B,L fprintf,%r2
893 LDO -48(%r30),%r29
894 LDD -288(%r30),%r27
895 .CALL ;in=29;
896 B,L abort,%r2
897 LDO -48(%r30),%r29
898 LDD -288(%r30),%r27
899 B $D0
900 LDD -368(%r30),%r2
901 .PROCEND ;in=24,25,26,29;out=28;
902
903;----------------------------------------------------------------------------
904;
905; Registers to hold 64-bit values to manipulate. The "L" part
906; of the register corresponds to the upper 32-bits, while the "R"
907; part corresponds to the lower 32-bits
908;
909; Note, that when using b6 and b7, the code must save these before
910; using them because they are callee save registers
911;
912;
913; Floating point registers to use to save values that
914; are manipulated. These don't collide with ftemp1-6 and
915; are all caller save registers
916;
917a0 .reg %fr22
918a0L .reg %fr22L
919a0R .reg %fr22R
920
921a1 .reg %fr23
922a1L .reg %fr23L
923a1R .reg %fr23R
924
925a2 .reg %fr24
926a2L .reg %fr24L
927a2R .reg %fr24R
928
929a3 .reg %fr25
930a3L .reg %fr25L
931a3R .reg %fr25R
932
933a4 .reg %fr26
934a4L .reg %fr26L
935a4R .reg %fr26R
936
937a5 .reg %fr27
938a5L .reg %fr27L
939a5R .reg %fr27R
940
941a6 .reg %fr28
942a6L .reg %fr28L
943a6R .reg %fr28R
944
945a7 .reg %fr29
946a7L .reg %fr29L
947a7R .reg %fr29R
948
949b0 .reg %fr30
950b0L .reg %fr30L
951b0R .reg %fr30R
952
953b1 .reg %fr31
954b1L .reg %fr31L
955b1R .reg %fr31R
956
957;
958; Temporary floating point variables, these are all caller save
959; registers
960;
961ftemp1 .reg %fr4
962ftemp2 .reg %fr5
963ftemp3 .reg %fr6
964ftemp4 .reg %fr7
965
966;
967; The B set of registers when used.
968;
969
970b2 .reg %fr8
971b2L .reg %fr8L
972b2R .reg %fr8R
973
974b3 .reg %fr9
975b3L .reg %fr9L
976b3R .reg %fr9R
977
978b4 .reg %fr10
979b4L .reg %fr10L
980b4R .reg %fr10R
981
982b5 .reg %fr11
983b5L .reg %fr11L
984b5R .reg %fr11R
985
986b6 .reg %fr12
987b6L .reg %fr12L
988b6R .reg %fr12R
989
990b7 .reg %fr13
991b7L .reg %fr13L
992b7R .reg %fr13R
993
994c1 .reg %r21 ; only reg
995temp1 .reg %r20 ; only reg
996temp2 .reg %r19 ; only reg
997temp3 .reg %r31 ; only reg
998
999m1 .reg %r28
1000c2 .reg %r23
1001high_one .reg %r1
1002ht .reg %r6
1003lt .reg %r5
1004m .reg %r4
1005c3 .reg %r3
1006
1007SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1008 XMPYU A0L,A0R,ftemp1 ; m
1009 FSTD ftemp1,-24(%sp) ; store m
1010
1011 XMPYU A0R,A0R,ftemp2 ; lt
1012 FSTD ftemp2,-16(%sp) ; store lt
1013
1014 XMPYU A0L,A0L,ftemp3 ; ht
1015 FSTD ftemp3,-8(%sp) ; store ht
1016
1017 LDD -24(%sp),m ; load m
1018 AND m,high_mask,temp2 ; m & Mask
1019 DEPD,Z m,30,31,temp3 ; m << 32+1
1020 LDD -16(%sp),lt ; lt
1021
1022 LDD -8(%sp),ht ; ht
1023 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1024 ADD temp3,lt,lt ; lt = lt+m
1025 ADD,L ht,temp1,ht ; ht += temp1
1026 ADD,DC ht,%r0,ht ; ht++
1027
1028 ADD C1,lt,C1 ; c1=c1+lt
1029 ADD,DC ht,%r0,ht ; ht++
1030
1031 ADD C2,ht,C2 ; c2=c2+ht
1032 ADD,DC C3,%r0,C3 ; c3++
1033.endm
1034
1035SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1036 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1037 FSTD ftemp1,-16(%sp) ;
1038 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1039 FSTD ftemp2,-8(%sp) ;
1040 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1041 FSTD ftemp3,-32(%sp)
1042 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1043 FSTD ftemp4,-24(%sp) ;
1044
1045 LDD -8(%sp),m ; r21 = m
1046 LDD -16(%sp),m1 ; r19 = m1
1047 ADD,L m,m1,m ; m+m1
1048
1049 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1050 LDD -24(%sp),ht ; r24 = ht
1051
1052 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1053 ADD,L ht,high_one,ht ; ht+=high_one
1054
1055 EXTRD,U m,31,32,temp1 ; m >> 32
1056 LDD -32(%sp),lt ; lt
1057 ADD,L ht,temp1,ht ; ht+= m>>32
1058 ADD lt,temp3,lt ; lt = lt+m1
1059 ADD,DC ht,%r0,ht ; ht++
1060
1061 ADD ht,ht,ht ; ht=ht+ht;
1062 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1063
1064 ADD lt,lt,lt ; lt=lt+lt;
1065 ADD,DC ht,%r0,ht ; add in carry (ht++)
1066
1067 ADD C1,lt,C1 ; c1=c1+lt
1068 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1069 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1070
1071 ADD C2,ht,C2 ; c2 = c2 + ht
1072 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1073.endm
1074
1075;
1076;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1077; arg0 = r_ptr
1078; arg1 = a_ptr
1079;
1080
1081bn_sqr_comba8
1082 .PROC
1083 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1084 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1085 .ENTRY
1086 .align 64
1087
1088 STD %r3,0(%sp) ; save r3
1089 STD %r4,8(%sp) ; save r4
1090 STD %r5,16(%sp) ; save r5
1091 STD %r6,24(%sp) ; save r6
1092
1093 ;
1094 ; Zero out carries
1095 ;
1096 COPY %r0,c1
1097 COPY %r0,c2
1098 COPY %r0,c3
1099
1100 LDO 128(%sp),%sp ; bump stack
1101 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1102 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1103
1104 ;
1105 ; Load up all of the values we are going to use
1106 ;
1107 FLDD 0(a_ptr),a0
1108 FLDD 8(a_ptr),a1
1109 FLDD 16(a_ptr),a2
1110 FLDD 24(a_ptr),a3
1111 FLDD 32(a_ptr),a4
1112 FLDD 40(a_ptr),a5
1113 FLDD 48(a_ptr),a6
1114 FLDD 56(a_ptr),a7
1115
1116 SQR_ADD_C a0L,a0R,c1,c2,c3
1117 STD c1,0(r_ptr) ; r[0] = c1;
1118 COPY %r0,c1
1119
1120 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1121 STD c2,8(r_ptr) ; r[1] = c2;
1122 COPY %r0,c2
1123
1124 SQR_ADD_C a1L,a1R,c3,c1,c2
1125 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1126 STD c3,16(r_ptr) ; r[2] = c3;
1127 COPY %r0,c3
1128
1129 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1130 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1131 STD c1,24(r_ptr) ; r[3] = c1;
1132 COPY %r0,c1
1133
1134 SQR_ADD_C a2L,a2R,c2,c3,c1
1135 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1136 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1137 STD c2,32(r_ptr) ; r[4] = c2;
1138 COPY %r0,c2
1139
1140 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1141 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1142 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1143 STD c3,40(r_ptr) ; r[5] = c3;
1144 COPY %r0,c3
1145
1146 SQR_ADD_C a3L,a3R,c1,c2,c3
1147 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1148 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1149 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1150 STD c1,48(r_ptr) ; r[6] = c1;
1151 COPY %r0,c1
1152
1153 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1154 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1155 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1156 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1157 STD c2,56(r_ptr) ; r[7] = c2;
1158 COPY %r0,c2
1159
1160 SQR_ADD_C a4L,a4R,c3,c1,c2
1161 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1162 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1163 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1164 STD c3,64(r_ptr) ; r[8] = c3;
1165 COPY %r0,c3
1166
1167 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1168 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1169 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1170 STD c1,72(r_ptr) ; r[9] = c1;
1171 COPY %r0,c1
1172
1173 SQR_ADD_C a5L,a5R,c2,c3,c1
1174 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1175 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1176 STD c2,80(r_ptr) ; r[10] = c2;
1177 COPY %r0,c2
1178
1179 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1180 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1181 STD c3,88(r_ptr) ; r[11] = c3;
1182 COPY %r0,c3
1183
1184 SQR_ADD_C a6L,a6R,c1,c2,c3
1185 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1186 STD c1,96(r_ptr) ; r[12] = c1;
1187 COPY %r0,c1
1188
1189 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1190 STD c2,104(r_ptr) ; r[13] = c2;
1191 COPY %r0,c2
1192
1193 SQR_ADD_C a7L,a7R,c3,c1,c2
1194 STD c3, 112(r_ptr) ; r[14] = c3
1195 STD c1, 120(r_ptr) ; r[15] = c1
1196
1197 .EXIT
1198 LDD -104(%sp),%r6 ; restore r6
1199 LDD -112(%sp),%r5 ; restore r5
1200 LDD -120(%sp),%r4 ; restore r4
1201 BVE (%rp)
1202 LDD,MB -128(%sp),%r3
1203
1204 .PROCEND
1205
1206;-----------------------------------------------------------------------------
1207;
1208;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1209; arg0 = r_ptr
1210; arg1 = a_ptr
1211;
1212
1213bn_sqr_comba4
1214 .proc
1215 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1216 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1217 .entry
1218 .align 64
1219 STD %r3,0(%sp) ; save r3
1220 STD %r4,8(%sp) ; save r4
1221 STD %r5,16(%sp) ; save r5
1222 STD %r6,24(%sp) ; save r6
1223
1224 ;
1225 ; Zero out carries
1226 ;
1227 COPY %r0,c1
1228 COPY %r0,c2
1229 COPY %r0,c3
1230
1231 LDO 128(%sp),%sp ; bump stack
1232 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1233 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1234
1235 ;
1236 ; Load up all of the values we are going to use
1237 ;
1238 FLDD 0(a_ptr),a0
1239 FLDD 8(a_ptr),a1
1240 FLDD 16(a_ptr),a2
1241 FLDD 24(a_ptr),a3
1242 FLDD 32(a_ptr),a4
1243 FLDD 40(a_ptr),a5
1244 FLDD 48(a_ptr),a6
1245 FLDD 56(a_ptr),a7
1246
1247 SQR_ADD_C a0L,a0R,c1,c2,c3
1248
1249 STD c1,0(r_ptr) ; r[0] = c1;
1250 COPY %r0,c1
1251
1252 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1253
1254 STD c2,8(r_ptr) ; r[1] = c2;
1255 COPY %r0,c2
1256
1257 SQR_ADD_C a1L,a1R,c3,c1,c2
1258 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1259
1260 STD c3,16(r_ptr) ; r[2] = c3;
1261 COPY %r0,c3
1262
1263 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1264 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1265
1266 STD c1,24(r_ptr) ; r[3] = c1;
1267 COPY %r0,c1
1268
1269 SQR_ADD_C a2L,a2R,c2,c3,c1
1270 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1271
1272 STD c2,32(r_ptr) ; r[4] = c2;
1273 COPY %r0,c2
1274
1275 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1276 STD c3,40(r_ptr) ; r[5] = c3;
1277 COPY %r0,c3
1278
1279 SQR_ADD_C a3L,a3R,c1,c2,c3
1280 STD c1,48(r_ptr) ; r[6] = c1;
1281 STD c2,56(r_ptr) ; r[7] = c2;
1282
1283 .EXIT
1284 LDD -104(%sp),%r6 ; restore r6
1285 LDD -112(%sp),%r5 ; restore r5
1286 LDD -120(%sp),%r4 ; restore r4
1287 BVE (%rp)
1288 LDD,MB -128(%sp),%r3
1289
1290 .PROCEND
1291
1292
1293;---------------------------------------------------------------------------
1294
1295MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1296 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1297 FSTD ftemp1,-16(%sp) ;
1298 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1299 FSTD ftemp2,-8(%sp) ;
1300 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1301 FSTD ftemp3,-32(%sp)
1302 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1303 FSTD ftemp4,-24(%sp) ;
1304
1305 LDD -8(%sp),m ; r21 = m
1306 LDD -16(%sp),m1 ; r19 = m1
1307 ADD,L m,m1,m ; m+m1
1308
1309 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1310 LDD -24(%sp),ht ; r24 = ht
1311
1312 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1313 ADD,L ht,high_one,ht ; ht+=high_one
1314
1315 EXTRD,U m,31,32,temp1 ; m >> 32
1316 LDD -32(%sp),lt ; lt
1317 ADD,L ht,temp1,ht ; ht+= m>>32
1318 ADD lt,temp3,lt ; lt = lt+m1
1319 ADD,DC ht,%r0,ht ; ht++
1320
1321 ADD C1,lt,C1 ; c1=c1+lt
1322 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1323
1324 ADD C2,ht,C2 ; c2 = c2 + ht
1325 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1326.endm
1327
1328
1329;
1330;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1331; arg0 = r_ptr
1332; arg1 = a_ptr
1333; arg2 = b_ptr
1334;
1335
1336bn_mul_comba8
1337 .proc
1338 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1339 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1340 .entry
1341 .align 64
1342
1343 STD %r3,0(%sp) ; save r3
1344 STD %r4,8(%sp) ; save r4
1345 STD %r5,16(%sp) ; save r5
1346 STD %r6,24(%sp) ; save r6
1347 FSTD %fr12,32(%sp) ; save r6
1348 FSTD %fr13,40(%sp) ; save r7
1349
1350 ;
1351 ; Zero out carries
1352 ;
1353 COPY %r0,c1
1354 COPY %r0,c2
1355 COPY %r0,c3
1356
1357 LDO 128(%sp),%sp ; bump stack
1358 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1359
1360 ;
1361 ; Load up all of the values we are going to use
1362 ;
1363 FLDD 0(a_ptr),a0
1364 FLDD 8(a_ptr),a1
1365 FLDD 16(a_ptr),a2
1366 FLDD 24(a_ptr),a3
1367 FLDD 32(a_ptr),a4
1368 FLDD 40(a_ptr),a5
1369 FLDD 48(a_ptr),a6
1370 FLDD 56(a_ptr),a7
1371
1372 FLDD 0(b_ptr),b0
1373 FLDD 8(b_ptr),b1
1374 FLDD 16(b_ptr),b2
1375 FLDD 24(b_ptr),b3
1376 FLDD 32(b_ptr),b4
1377 FLDD 40(b_ptr),b5
1378 FLDD 48(b_ptr),b6
1379 FLDD 56(b_ptr),b7
1380
1381 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1382 STD c1,0(r_ptr)
1383 COPY %r0,c1
1384
1385 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1386 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1387 STD c2,8(r_ptr)
1388 COPY %r0,c2
1389
1390 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1391 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1392 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1393 STD c3,16(r_ptr)
1394 COPY %r0,c3
1395
1396 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1397 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1398 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1399 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1400 STD c1,24(r_ptr)
1401 COPY %r0,c1
1402
1403 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1404 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1405 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1406 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1407 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1408 STD c2,32(r_ptr)
1409 COPY %r0,c2
1410
1411 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1412 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1413 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1414 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1415 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1416 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1417 STD c3,40(r_ptr)
1418 COPY %r0,c3
1419
1420 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1421 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1422 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1423 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1424 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1425 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1426 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1427 STD c1,48(r_ptr)
1428 COPY %r0,c1
1429
1430 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1431 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1432 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1433 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1434 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1435 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1436 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1437 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1438 STD c2,56(r_ptr)
1439 COPY %r0,c2
1440
1441 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1442 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1443 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1444 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1445 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1446 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1447 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1448 STD c3,64(r_ptr)
1449 COPY %r0,c3
1450
1451 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1452 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1453 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1454 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1455 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1456 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1457 STD c1,72(r_ptr)
1458 COPY %r0,c1
1459
1460 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1461 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1462 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1463 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1464 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1465 STD c2,80(r_ptr)
1466 COPY %r0,c2
1467
1468 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1469 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1470 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1471 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1472 STD c3,88(r_ptr)
1473 COPY %r0,c3
1474
1475 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1476 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1477 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1478 STD c1,96(r_ptr)
1479 COPY %r0,c1
1480
1481 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1482 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1483 STD c2,104(r_ptr)
1484 COPY %r0,c2
1485
1486 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1487 STD c3,112(r_ptr)
1488 STD c1,120(r_ptr)
1489
1490 .EXIT
1491 FLDD -88(%sp),%fr13
1492 FLDD -96(%sp),%fr12
1493 LDD -104(%sp),%r6 ; restore r6
1494 LDD -112(%sp),%r5 ; restore r5
1495 LDD -120(%sp),%r4 ; restore r4
1496 BVE (%rp)
1497 LDD,MB -128(%sp),%r3
1498
1499 .PROCEND
1500
1501;-----------------------------------------------------------------------------
1502;
1503;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1504; arg0 = r_ptr
1505; arg1 = a_ptr
1506; arg2 = b_ptr
1507;
1508
1509bn_mul_comba4
1510 .proc
1511 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1512 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1513 .entry
1514 .align 64
1515
1516 STD %r3,0(%sp) ; save r3
1517 STD %r4,8(%sp) ; save r4
1518 STD %r5,16(%sp) ; save r5
1519 STD %r6,24(%sp) ; save r6
1520 FSTD %fr12,32(%sp) ; save r6
1521 FSTD %fr13,40(%sp) ; save r7
1522
1523 ;
1524 ; Zero out carries
1525 ;
1526 COPY %r0,c1
1527 COPY %r0,c2
1528 COPY %r0,c3
1529
1530 LDO 128(%sp),%sp ; bump stack
1531 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1532
1533 ;
1534 ; Load up all of the values we are going to use
1535 ;
1536 FLDD 0(a_ptr),a0
1537 FLDD 8(a_ptr),a1
1538 FLDD 16(a_ptr),a2
1539 FLDD 24(a_ptr),a3
1540
1541 FLDD 0(b_ptr),b0
1542 FLDD 8(b_ptr),b1
1543 FLDD 16(b_ptr),b2
1544 FLDD 24(b_ptr),b3
1545
1546 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1547 STD c1,0(r_ptr)
1548 COPY %r0,c1
1549
1550 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1551 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1552 STD c2,8(r_ptr)
1553 COPY %r0,c2
1554
1555 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1556 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1557 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1558 STD c3,16(r_ptr)
1559 COPY %r0,c3
1560
1561 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1562 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1563 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1564 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1565 STD c1,24(r_ptr)
1566 COPY %r0,c1
1567
1568 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1569 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1570 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1571 STD c2,32(r_ptr)
1572 COPY %r0,c2
1573
1574 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1575 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1576 STD c3,40(r_ptr)
1577 COPY %r0,c3
1578
1579 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1580 STD c1,48(r_ptr)
1581 STD c2,56(r_ptr)
1582
1583 .EXIT
1584 FLDD -88(%sp),%fr13
1585 FLDD -96(%sp),%fr12
1586 LDD -104(%sp),%r6 ; restore r6
1587 LDD -112(%sp),%r5 ; restore r5
1588 LDD -120(%sp),%r4 ; restore r4
1589 BVE (%rp)
1590 LDD,MB -128(%sp),%r3
1591
1592 .PROCEND
1593
1594
1595 .SPACE $TEXT$
1596 .SUBSPA $CODE$
1597 .SPACE $PRIVATE$,SORT=16
1598 .IMPORT $global$,DATA
1599 .SPACE $TEXT$
1600 .SUBSPA $CODE$
1601 .SUBSPA $LIT$,ACCESS=0x2c
1602C$4
1603 .ALIGN 8
1604 .STRINGZ "Division would overflow (%d)\n"
1605 .END
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
new file mode 100644
index 0000000000..08e0053473
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -0,0 +1,2078 @@
1#!/usr/bin/env perl
2#
3# Implemented as a Perl wrapper as we want to support several different
4# architectures with single file. We pick up the target based on the
5# file name we are asked to generate.
6#
7# It should be noted though that this perl code is nothing like
8# <openssl>/crypto/perlasm/x86*. In this case perl is used pretty much
9# as pre-processor to cover for platform differences in name decoration,
10# linker tables, 32-/64-bit instruction sets...
11#
12# As you might know there're several PowerPC ABI in use. Most notably
13# Linux and AIX use different 32-bit ABIs. Good news are that these ABIs
14# are similar enough to implement leaf(!) functions, which would be ABI
15# neutral. And that's what you find here: ABI neutral leaf functions.
16# In case you wonder what that is...
17#
18# AIX performance
19#
20# MEASUREMENTS WITH cc ON a 200 MhZ PowerPC 604e.
21#
22# The following is the performance of 32-bit compiler
23# generated code:
24#
25# OpenSSL 0.9.6c 21 dec 2001
26# built on: Tue Jun 11 11:06:51 EDT 2002
27# options:bn(64,32) ...
28#compiler: cc -DTHREADS -DAIX -DB_ENDIAN -DBN_LLONG -O3
29# sign verify sign/s verify/s
30#rsa 512 bits 0.0098s 0.0009s 102.0 1170.6
31#rsa 1024 bits 0.0507s 0.0026s 19.7 387.5
32#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1
33#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4
34#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
35#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
36#
37# Same bechmark with this assembler code:
38#
39#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
40#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1
41#rsa 2048 bits 0.1744s 0.0050s 5.7 201.2
42#rsa 4096 bits 1.1644s 0.0179s 0.9 55.7
43#dsa 512 bits 0.0052s 0.0062s 191.6 162.0
44#dsa 1024 bits 0.0149s 0.0180s 67.0 55.5
45#
46# Number of operations increases by at almost 75%
47#
48# Here are performance numbers for 64-bit compiler
49# generated code:
50#
51# OpenSSL 0.9.6g [engine] 9 Aug 2002
52# built on: Fri Apr 18 16:59:20 EDT 2003
53# options:bn(64,64) ...
54# compiler: cc -DTHREADS -D_REENTRANT -q64 -DB_ENDIAN -O3
55# sign verify sign/s verify/s
56#rsa 512 bits 0.0028s 0.0003s 357.1 3844.4
57#rsa 1024 bits 0.0148s 0.0008s 67.5 1239.7
58#rsa 2048 bits 0.0963s 0.0028s 10.4 353.0
59#rsa 4096 bits 0.6538s 0.0102s 1.5 98.1
60#dsa 512 bits 0.0026s 0.0032s 382.5 313.7
61#dsa 1024 bits 0.0081s 0.0099s 122.8 100.6
62#
63# Same benchmark with this assembler code:
64#
65#rsa 512 bits 0.0020s 0.0002s 510.4 6273.7
66#rsa 1024 bits 0.0088s 0.0005s 114.1 2128.3
67#rsa 2048 bits 0.0540s 0.0016s 18.5 622.5
68#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0
69#dsa 512 bits 0.0016s 0.0020s 610.7 507.1
70#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2
71#
72# Again, performance increases by at about 75%
73#
74# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code)
75# OpenSSL 0.9.7c 30 Sep 2003
76#
77# Original code.
78#
79#rsa 512 bits 0.0011s 0.0001s 906.1 11012.5
80#rsa 1024 bits 0.0060s 0.0003s 166.6 3363.1
81#rsa 2048 bits 0.0370s 0.0010s 27.1 982.4
82#rsa 4096 bits 0.2426s 0.0036s 4.1 280.4
83#dsa 512 bits 0.0010s 0.0012s 1038.1 841.5
84#dsa 1024 bits 0.0030s 0.0037s 329.6 269.7
85#dsa 2048 bits 0.0101s 0.0127s 98.9 78.6
86#
87# Same benchmark with this assembler code:
88#
89#rsa 512 bits 0.0007s 0.0001s 1416.2 16645.9
90#rsa 1024 bits 0.0036s 0.0002s 274.4 5380.6
91#rsa 2048 bits 0.0222s 0.0006s 45.1 1589.5
92#rsa 4096 bits 0.1469s 0.0022s 6.8 449.6
93#dsa 512 bits 0.0006s 0.0007s 1664.2 1376.2
94#dsa 1024 bits 0.0018s 0.0023s 545.0 442.2
95#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8
96#
97# Performance increase of ~60%
98#
99# If you have comments or suggestions to improve code send
100# me a note at schari@us.ibm.com
101#
102
103$opf = shift;
104
105if ($opf =~ /32\.s/) {
106 $BITS= 32;
107 $BNSZ= $BITS/8;
108 $ISA= "\"ppc\"";
109
110 $LD= "lwz"; # load
111 $LDU= "lwzu"; # load and update
112 $ST= "stw"; # store
113 $STU= "stwu"; # store and update
114 $UMULL= "mullw"; # unsigned multiply low
115 $UMULH= "mulhwu"; # unsigned multiply high
116 $UDIV= "divwu"; # unsigned divide
117 $UCMPI= "cmplwi"; # unsigned compare with immediate
118 $UCMP= "cmplw"; # unsigned compare
119 $CNTLZ= "cntlzw"; # count leading zeros
120 $SHL= "slw"; # shift left
121 $SHR= "srw"; # unsigned shift right
122 $SHRI= "srwi"; # unsigned shift right by immediate
123 $SHLI= "slwi"; # shift left by immediate
124 $CLRU= "clrlwi"; # clear upper bits
125 $INSR= "insrwi"; # insert right
126 $ROTL= "rotlwi"; # rotate left by immediate
127 $TR= "tw"; # conditional trap
128} elsif ($opf =~ /64\.s/) {
129 $BITS= 64;
130 $BNSZ= $BITS/8;
131 $ISA= "\"ppc64\"";
132
133 # same as above, but 64-bit mnemonics...
134 $LD= "ld"; # load
135 $LDU= "ldu"; # load and update
136 $ST= "std"; # store
137 $STU= "stdu"; # store and update
138 $UMULL= "mulld"; # unsigned multiply low
139 $UMULH= "mulhdu"; # unsigned multiply high
140 $UDIV= "divdu"; # unsigned divide
141 $UCMPI= "cmpldi"; # unsigned compare with immediate
142 $UCMP= "cmpld"; # unsigned compare
143 $CNTLZ= "cntlzd"; # count leading zeros
144 $SHL= "sld"; # shift left
145 $SHR= "srd"; # unsigned shift right
146 $SHRI= "srdi"; # unsigned shift right by immediate
147 $SHLI= "sldi"; # shift left by immediate
148 $CLRU= "clrldi"; # clear upper bits
149 $INSR= "insrdi"; # insert right
150 $ROTL= "rotldi"; # rotate left by immediate
151 $TR= "td"; # conditional trap
152} else { die "nonsense $opf"; }
153
154( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
155
156# function entry points from the AIX code
157#
158# There are other, more elegant, ways to handle this. We (IBM) chose
159# this approach as it plays well with scripts we run to 'namespace'
160# OpenSSL .i.e. we add a prefix to all the public symbols so we can
161# co-exist in the same process with other implementations of OpenSSL.
162# 'cleverer' ways of doing these substitutions tend to hide data we
163# need to be obvious.
164#
165my @items = ("bn_sqr_comba4",
166 "bn_sqr_comba8",
167 "bn_mul_comba4",
168 "bn_mul_comba8",
169 "bn_sub_words",
170 "bn_add_words",
171 "bn_div_words",
172 "bn_sqr_words",
173 "bn_mul_words",
174 "bn_mul_add_words");
175
176if ($opf =~ /linux/) { do_linux(); }
177elsif ($opf =~ /aix/) { do_aix(); }
178elsif ($opf =~ /osx/) { do_osx(); }
179else { do_bsd(); }
180
181sub do_linux {
182 $d=&data();
183
184 if ($BITS==64) {
185 foreach $t (@items) {
186 $d =~ s/\.$t:/\
187\t.section\t".opd","aw"\
188\t.align\t3\
189\t.globl\t$t\
190$t:\
191\t.quad\t.$t,.TOC.\@tocbase,0\
192\t.size\t$t,24\
193\t.previous\n\
194\t.type\t.$t,\@function\
195\t.globl\t.$t\
196.$t:/g;
197 }
198 }
199 else {
200 foreach $t (@items) {
201 $d=~s/\.$t/$t/g;
202 }
203 }
204 # hide internal labels to avoid pollution of name table...
205 $d=~s/Lppcasm_/.Lppcasm_/gm;
206 print $d;
207}
208
209sub do_aix {
210 # AIX assembler is smart enough to please the linker without
211 # making us do something special...
212 print &data();
213}
214
215# MacOSX 32 bit
216sub do_osx {
217 $d=&data();
218 # Change the bn symbol prefix from '.' to '_'
219 foreach $t (@items) {
220 $d=~s/\.$t/_$t/g;
221 }
222 # Change .machine to something OS X asm will accept
223 $d=~s/\.machine.*/.text/g;
224 $d=~s/\#/;/g; # change comment from '#' to ';'
225 print $d;
226}
227
228# BSD (Untested)
229sub do_bsd {
230 $d=&data();
231 foreach $t (@items) {
232 $d=~s/\.$t/_$t/g;
233 }
234 print $d;
235}
236
237sub data {
238 local($data)=<<EOF;
239#--------------------------------------------------------------------
240#
241#
242#
243#
244# File: ppc32.s
245#
246# Created by: Suresh Chari
247# IBM Thomas J. Watson Research Library
248# Hawthorne, NY
249#
250#
251# Description: Optimized assembly routines for OpenSSL crypto
252# on the 32 bitPowerPC platform.
253#
254#
255# Version History
256#
257# 2. Fixed bn_add,bn_sub and bn_div_words, added comments,
258# cleaned up code. Also made a single version which can
259# be used for both the AIX and Linux compilers. See NOTE
260# below.
261# 12/05/03 Suresh Chari
262# (with lots of help from) Andy Polyakov
263##
264# 1. Initial version 10/20/02 Suresh Chari
265#
266#
267# The following file works for the xlc,cc
268# and gcc compilers.
269#
270# NOTE: To get the file to link correctly with the gcc compiler
271# you have to change the names of the routines and remove
272# the first .(dot) character. This should automatically
273# be done in the build process.
274#
275# Hand optimized assembly code for the following routines
276#
277# bn_sqr_comba4
278# bn_sqr_comba8
279# bn_mul_comba4
280# bn_mul_comba8
281# bn_sub_words
282# bn_add_words
283# bn_div_words
284# bn_sqr_words
285# bn_mul_words
286# bn_mul_add_words
287#
288# NOTE: It is possible to optimize this code more for
289# specific PowerPC or Power architectures. On the Northstar
290# architecture the optimizations in this file do
291# NOT provide much improvement.
292#
293# If you have comments or suggestions to improve code send
294# me a note at schari\@us.ibm.com
295#
296#--------------------------------------------------------------------------
297#
298# Defines to be used in the assembly code.
299#
300.set r0,0 # we use it as storage for value of 0
301.set SP,1 # preserved
302.set RTOC,2 # preserved
303.set r3,3 # 1st argument/return value
304.set r4,4 # 2nd argument/volatile register
305.set r5,5 # 3rd argument/volatile register
306.set r6,6 # ...
307.set r7,7
308.set r8,8
309.set r9,9
310.set r10,10
311.set r11,11
312.set r12,12
313.set r13,13 # not used, nor any other "below" it...
314
315.set BO_IF_NOT,4
316.set BO_IF,12
317.set BO_dCTR_NZERO,16
318.set BO_dCTR_ZERO,18
319.set BO_ALWAYS,20
320.set CR0_LT,0;
321.set CR0_GT,1;
322.set CR0_EQ,2
323.set CR1_FX,4;
324.set CR1_FEX,5;
325.set CR1_VX,6
326.set LR,8
327
328# Declare function names to be global
329# NOTE: For gcc these names MUST be changed to remove
330# the first . i.e. for example change ".bn_sqr_comba4"
331# to "bn_sqr_comba4". This should be automatically done
332# in the build.
333
334 .globl .bn_sqr_comba4
335 .globl .bn_sqr_comba8
336 .globl .bn_mul_comba4
337 .globl .bn_mul_comba8
338 .globl .bn_sub_words
339 .globl .bn_add_words
340 .globl .bn_div_words
341 .globl .bn_sqr_words
342 .globl .bn_mul_words
343 .globl .bn_mul_add_words
344
345# .text section
346
347 .machine $ISA
348
349#
350# NOTE: The following label name should be changed to
351# "bn_sqr_comba4" i.e. remove the first dot
352# for the gcc compiler. This should be automatically
353# done in the build
354#
355
356.align 4
357.bn_sqr_comba4:
358#
359# Optimized version of bn_sqr_comba4.
360#
361# void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
362# r3 contains r
363# r4 contains a
364#
365# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
366#
367# r5,r6 are the two BN_ULONGs being multiplied.
368# r7,r8 are the results of the 32x32 giving 64 bit multiply.
369# r9,r10, r11 are the equivalents of c1,c2, c3.
370# Here's the assembly
371#
372#
373 xor r0,r0,r0 # set r0 = 0. Used in the addze
374 # instructions below
375
376 #sqr_add_c(a,0,c1,c2,c3)
377 $LD r5,`0*$BNSZ`(r4)
378 $UMULL r9,r5,r5
379 $UMULH r10,r5,r5 #in first iteration. No need
380 #to add since c1=c2=c3=0.
381 # Note c3(r11) is NOT set to 0
382 # but will be.
383
384 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
385 # sqr_add_c2(a,1,0,c2,c3,c1);
386 $LD r6,`1*$BNSZ`(r4)
387 $UMULL r7,r5,r6
388 $UMULH r8,r5,r6
389
390 addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8)
391 adde r8,r8,r8
392 addze r9,r0 # catch carry if any.
393 # r9= r0(=0) and carry
394
395 addc r10,r7,r10 # now add to temp result.
396 addze r11,r8 # r8 added to r11 which is 0
397 addze r9,r9
398
399 $ST r10,`1*$BNSZ`(r3) #r[1]=c2;
400 #sqr_add_c(a,1,c3,c1,c2)
401 $UMULL r7,r6,r6
402 $UMULH r8,r6,r6
403 addc r11,r7,r11
404 adde r9,r8,r9
405 addze r10,r0
406 #sqr_add_c2(a,2,0,c3,c1,c2)
407 $LD r6,`2*$BNSZ`(r4)
408 $UMULL r7,r5,r6
409 $UMULH r8,r5,r6
410
411 addc r7,r7,r7
412 adde r8,r8,r8
413 addze r10,r10
414
415 addc r11,r7,r11
416 adde r9,r8,r9
417 addze r10,r10
418 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
419 #sqr_add_c2(a,3,0,c1,c2,c3);
420 $LD r6,`3*$BNSZ`(r4)
421 $UMULL r7,r5,r6
422 $UMULH r8,r5,r6
423 addc r7,r7,r7
424 adde r8,r8,r8
425 addze r11,r0
426
427 addc r9,r7,r9
428 adde r10,r8,r10
429 addze r11,r11
430 #sqr_add_c2(a,2,1,c1,c2,c3);
431 $LD r5,`1*$BNSZ`(r4)
432 $LD r6,`2*$BNSZ`(r4)
433 $UMULL r7,r5,r6
434 $UMULH r8,r5,r6
435
436 addc r7,r7,r7
437 adde r8,r8,r8
438 addze r11,r11
439 addc r9,r7,r9
440 adde r10,r8,r10
441 addze r11,r11
442 $ST r9,`3*$BNSZ`(r3) #r[3]=c1
443 #sqr_add_c(a,2,c2,c3,c1);
444 $UMULL r7,r6,r6
445 $UMULH r8,r6,r6
446 addc r10,r7,r10
447 adde r11,r8,r11
448 addze r9,r0
449 #sqr_add_c2(a,3,1,c2,c3,c1);
450 $LD r6,`3*$BNSZ`(r4)
451 $UMULL r7,r5,r6
452 $UMULH r8,r5,r6
453 addc r7,r7,r7
454 adde r8,r8,r8
455 addze r9,r9
456
457 addc r10,r7,r10
458 adde r11,r8,r11
459 addze r9,r9
460 $ST r10,`4*$BNSZ`(r3) #r[4]=c2
461 #sqr_add_c2(a,3,2,c3,c1,c2);
462 $LD r5,`2*$BNSZ`(r4)
463 $UMULL r7,r5,r6
464 $UMULH r8,r5,r6
465 addc r7,r7,r7
466 adde r8,r8,r8
467 addze r10,r0
468
469 addc r11,r7,r11
470 adde r9,r8,r9
471 addze r10,r10
472 $ST r11,`5*$BNSZ`(r3) #r[5] = c3
473 #sqr_add_c(a,3,c1,c2,c3);
474 $UMULL r7,r6,r6
475 $UMULH r8,r6,r6
476 addc r9,r7,r9
477 adde r10,r8,r10
478
479 $ST r9,`6*$BNSZ`(r3) #r[6]=c1
480 $ST r10,`7*$BNSZ`(r3) #r[7]=c2
481 bclr BO_ALWAYS,CR0_LT
482 .long 0x00000000
483
484#
485# NOTE: The following label name should be changed to
486# "bn_sqr_comba8" i.e. remove the first dot
487# for the gcc compiler. This should be automatically
488# done in the build
489#
490
491.align 4
492.bn_sqr_comba8:
493#
494# This is an optimized version of the bn_sqr_comba8 routine.
495# Tightly uses the adde instruction
496#
497#
498# void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
499# r3 contains r
500# r4 contains a
501#
502# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
503#
504# r5,r6 are the two BN_ULONGs being multiplied.
505# r7,r8 are the results of the 32x32 giving 64 bit multiply.
506# r9,r10, r11 are the equivalents of c1,c2, c3.
507#
508# Possible optimization of loading all 8 longs of a into registers
509# doesnt provide any speedup
510#
511
512 xor r0,r0,r0 #set r0 = 0.Used in addze
513 #instructions below.
514
515 #sqr_add_c(a,0,c1,c2,c3);
516 $LD r5,`0*$BNSZ`(r4)
517 $UMULL r9,r5,r5 #1st iteration: no carries.
518 $UMULH r10,r5,r5
519 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
520 #sqr_add_c2(a,1,0,c2,c3,c1);
521 $LD r6,`1*$BNSZ`(r4)
522 $UMULL r7,r5,r6
523 $UMULH r8,r5,r6
524
525 addc r10,r7,r10 #add the two register number
526 adde r11,r8,r0 # (r8,r7) to the three register
527 addze r9,r0 # number (r9,r11,r10).NOTE:r0=0
528
529 addc r10,r7,r10 #add the two register number
530 adde r11,r8,r11 # (r8,r7) to the three register
531 addze r9,r9 # number (r9,r11,r10).
532
533 $ST r10,`1*$BNSZ`(r3) # r[1]=c2
534
535 #sqr_add_c(a,1,c3,c1,c2);
536 $UMULL r7,r6,r6
537 $UMULH r8,r6,r6
538 addc r11,r7,r11
539 adde r9,r8,r9
540 addze r10,r0
541 #sqr_add_c2(a,2,0,c3,c1,c2);
542 $LD r6,`2*$BNSZ`(r4)
543 $UMULL r7,r5,r6
544 $UMULH r8,r5,r6
545
546 addc r11,r7,r11
547 adde r9,r8,r9
548 addze r10,r10
549
550 addc r11,r7,r11
551 adde r9,r8,r9
552 addze r10,r10
553
554 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
555 #sqr_add_c2(a,3,0,c1,c2,c3);
556 $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0].
557 $UMULL r7,r5,r6
558 $UMULH r8,r5,r6
559
560 addc r9,r7,r9
561 adde r10,r8,r10
562 addze r11,r0
563
564 addc r9,r7,r9
565 adde r10,r8,r10
566 addze r11,r11
567 #sqr_add_c2(a,2,1,c1,c2,c3);
568 $LD r5,`1*$BNSZ`(r4)
569 $LD r6,`2*$BNSZ`(r4)
570 $UMULL r7,r5,r6
571 $UMULH r8,r5,r6
572
573 addc r9,r7,r9
574 adde r10,r8,r10
575 addze r11,r11
576
577 addc r9,r7,r9
578 adde r10,r8,r10
579 addze r11,r11
580
581 $ST r9,`3*$BNSZ`(r3) #r[3]=c1;
582 #sqr_add_c(a,2,c2,c3,c1);
583 $UMULL r7,r6,r6
584 $UMULH r8,r6,r6
585
586 addc r10,r7,r10
587 adde r11,r8,r11
588 addze r9,r0
589 #sqr_add_c2(a,3,1,c2,c3,c1);
590 $LD r6,`3*$BNSZ`(r4)
591 $UMULL r7,r5,r6
592 $UMULH r8,r5,r6
593
594 addc r10,r7,r10
595 adde r11,r8,r11
596 addze r9,r9
597
598 addc r10,r7,r10
599 adde r11,r8,r11
600 addze r9,r9
601 #sqr_add_c2(a,4,0,c2,c3,c1);
602 $LD r5,`0*$BNSZ`(r4)
603 $LD r6,`4*$BNSZ`(r4)
604 $UMULL r7,r5,r6
605 $UMULH r8,r5,r6
606
607 addc r10,r7,r10
608 adde r11,r8,r11
609 addze r9,r9
610
611 addc r10,r7,r10
612 adde r11,r8,r11
613 addze r9,r9
614 $ST r10,`4*$BNSZ`(r3) #r[4]=c2;
615 #sqr_add_c2(a,5,0,c3,c1,c2);
616 $LD r6,`5*$BNSZ`(r4)
617 $UMULL r7,r5,r6
618 $UMULH r8,r5,r6
619
620 addc r11,r7,r11
621 adde r9,r8,r9
622 addze r10,r0
623
624 addc r11,r7,r11
625 adde r9,r8,r9
626 addze r10,r10
627 #sqr_add_c2(a,4,1,c3,c1,c2);
628 $LD r5,`1*$BNSZ`(r4)
629 $LD r6,`4*$BNSZ`(r4)
630 $UMULL r7,r5,r6
631 $UMULH r8,r5,r6
632
633 addc r11,r7,r11
634 adde r9,r8,r9
635 addze r10,r10
636
637 addc r11,r7,r11
638 adde r9,r8,r9
639 addze r10,r10
640 #sqr_add_c2(a,3,2,c3,c1,c2);
641 $LD r5,`2*$BNSZ`(r4)
642 $LD r6,`3*$BNSZ`(r4)
643 $UMULL r7,r5,r6
644 $UMULH r8,r5,r6
645
646 addc r11,r7,r11
647 adde r9,r8,r9
648 addze r10,r10
649
650 addc r11,r7,r11
651 adde r9,r8,r9
652 addze r10,r10
653 $ST r11,`5*$BNSZ`(r3) #r[5]=c3;
654 #sqr_add_c(a,3,c1,c2,c3);
655 $UMULL r7,r6,r6
656 $UMULH r8,r6,r6
657 addc r9,r7,r9
658 adde r10,r8,r10
659 addze r11,r0
660 #sqr_add_c2(a,4,2,c1,c2,c3);
661 $LD r6,`4*$BNSZ`(r4)
662 $UMULL r7,r5,r6
663 $UMULH r8,r5,r6
664
665 addc r9,r7,r9
666 adde r10,r8,r10
667 addze r11,r11
668
669 addc r9,r7,r9
670 adde r10,r8,r10
671 addze r11,r11
672 #sqr_add_c2(a,5,1,c1,c2,c3);
673 $LD r5,`1*$BNSZ`(r4)
674 $LD r6,`5*$BNSZ`(r4)
675 $UMULL r7,r5,r6
676 $UMULH r8,r5,r6
677
678 addc r9,r7,r9
679 adde r10,r8,r10
680 addze r11,r11
681
682 addc r9,r7,r9
683 adde r10,r8,r10
684 addze r11,r11
685 #sqr_add_c2(a,6,0,c1,c2,c3);
686 $LD r5,`0*$BNSZ`(r4)
687 $LD r6,`6*$BNSZ`(r4)
688 $UMULL r7,r5,r6
689 $UMULH r8,r5,r6
690 addc r9,r7,r9
691 adde r10,r8,r10
692 addze r11,r11
693 addc r9,r7,r9
694 adde r10,r8,r10
695 addze r11,r11
696 $ST r9,`6*$BNSZ`(r3) #r[6]=c1;
697 #sqr_add_c2(a,7,0,c2,c3,c1);
698 $LD r6,`7*$BNSZ`(r4)
699 $UMULL r7,r5,r6
700 $UMULH r8,r5,r6
701
702 addc r10,r7,r10
703 adde r11,r8,r11
704 addze r9,r0
705 addc r10,r7,r10
706 adde r11,r8,r11
707 addze r9,r9
708 #sqr_add_c2(a,6,1,c2,c3,c1);
709 $LD r5,`1*$BNSZ`(r4)
710 $LD r6,`6*$BNSZ`(r4)
711 $UMULL r7,r5,r6
712 $UMULH r8,r5,r6
713
714 addc r10,r7,r10
715 adde r11,r8,r11
716 addze r9,r9
717 addc r10,r7,r10
718 adde r11,r8,r11
719 addze r9,r9
720 #sqr_add_c2(a,5,2,c2,c3,c1);
721 $LD r5,`2*$BNSZ`(r4)
722 $LD r6,`5*$BNSZ`(r4)
723 $UMULL r7,r5,r6
724 $UMULH r8,r5,r6
725 addc r10,r7,r10
726 adde r11,r8,r11
727 addze r9,r9
728 addc r10,r7,r10
729 adde r11,r8,r11
730 addze r9,r9
731 #sqr_add_c2(a,4,3,c2,c3,c1);
732 $LD r5,`3*$BNSZ`(r4)
733 $LD r6,`4*$BNSZ`(r4)
734 $UMULL r7,r5,r6
735 $UMULH r8,r5,r6
736
737 addc r10,r7,r10
738 adde r11,r8,r11
739 addze r9,r9
740 addc r10,r7,r10
741 adde r11,r8,r11
742 addze r9,r9
743 $ST r10,`7*$BNSZ`(r3) #r[7]=c2;
744 #sqr_add_c(a,4,c3,c1,c2);
745 $UMULL r7,r6,r6
746 $UMULH r8,r6,r6
747 addc r11,r7,r11
748 adde r9,r8,r9
749 addze r10,r0
750 #sqr_add_c2(a,5,3,c3,c1,c2);
751 $LD r6,`5*$BNSZ`(r4)
752 $UMULL r7,r5,r6
753 $UMULH r8,r5,r6
754 addc r11,r7,r11
755 adde r9,r8,r9
756 addze r10,r10
757 addc r11,r7,r11
758 adde r9,r8,r9
759 addze r10,r10
760 #sqr_add_c2(a,6,2,c3,c1,c2);
761 $LD r5,`2*$BNSZ`(r4)
762 $LD r6,`6*$BNSZ`(r4)
763 $UMULL r7,r5,r6
764 $UMULH r8,r5,r6
765 addc r11,r7,r11
766 adde r9,r8,r9
767 addze r10,r10
768
769 addc r11,r7,r11
770 adde r9,r8,r9
771 addze r10,r10
772 #sqr_add_c2(a,7,1,c3,c1,c2);
773 $LD r5,`1*$BNSZ`(r4)
774 $LD r6,`7*$BNSZ`(r4)
775 $UMULL r7,r5,r6
776 $UMULH r8,r5,r6
777 addc r11,r7,r11
778 adde r9,r8,r9
779 addze r10,r10
780 addc r11,r7,r11
781 adde r9,r8,r9
782 addze r10,r10
783 $ST r11,`8*$BNSZ`(r3) #r[8]=c3;
784 #sqr_add_c2(a,7,2,c1,c2,c3);
785 $LD r5,`2*$BNSZ`(r4)
786 $UMULL r7,r5,r6
787 $UMULH r8,r5,r6
788
789 addc r9,r7,r9
790 adde r10,r8,r10
791 addze r11,r0
792 addc r9,r7,r9
793 adde r10,r8,r10
794 addze r11,r11
795 #sqr_add_c2(a,6,3,c1,c2,c3);
796 $LD r5,`3*$BNSZ`(r4)
797 $LD r6,`6*$BNSZ`(r4)
798 $UMULL r7,r5,r6
799 $UMULH r8,r5,r6
800 addc r9,r7,r9
801 adde r10,r8,r10
802 addze r11,r11
803 addc r9,r7,r9
804 adde r10,r8,r10
805 addze r11,r11
806 #sqr_add_c2(a,5,4,c1,c2,c3);
807 $LD r5,`4*$BNSZ`(r4)
808 $LD r6,`5*$BNSZ`(r4)
809 $UMULL r7,r5,r6
810 $UMULH r8,r5,r6
811 addc r9,r7,r9
812 adde r10,r8,r10
813 addze r11,r11
814 addc r9,r7,r9
815 adde r10,r8,r10
816 addze r11,r11
817 $ST r9,`9*$BNSZ`(r3) #r[9]=c1;
818 #sqr_add_c(a,5,c2,c3,c1);
819 $UMULL r7,r6,r6
820 $UMULH r8,r6,r6
821 addc r10,r7,r10
822 adde r11,r8,r11
823 addze r9,r0
824 #sqr_add_c2(a,6,4,c2,c3,c1);
825 $LD r6,`6*$BNSZ`(r4)
826 $UMULL r7,r5,r6
827 $UMULH r8,r5,r6
828 addc r10,r7,r10
829 adde r11,r8,r11
830 addze r9,r9
831 addc r10,r7,r10
832 adde r11,r8,r11
833 addze r9,r9
834 #sqr_add_c2(a,7,3,c2,c3,c1);
835 $LD r5,`3*$BNSZ`(r4)
836 $LD r6,`7*$BNSZ`(r4)
837 $UMULL r7,r5,r6
838 $UMULH r8,r5,r6
839 addc r10,r7,r10
840 adde r11,r8,r11
841 addze r9,r9
842 addc r10,r7,r10
843 adde r11,r8,r11
844 addze r9,r9
845 $ST r10,`10*$BNSZ`(r3) #r[10]=c2;
846 #sqr_add_c2(a,7,4,c3,c1,c2);
847 $LD r5,`4*$BNSZ`(r4)
848 $UMULL r7,r5,r6
849 $UMULH r8,r5,r6
850 addc r11,r7,r11
851 adde r9,r8,r9
852 addze r10,r0
853 addc r11,r7,r11
854 adde r9,r8,r9
855 addze r10,r10
856 #sqr_add_c2(a,6,5,c3,c1,c2);
857 $LD r5,`5*$BNSZ`(r4)
858 $LD r6,`6*$BNSZ`(r4)
859 $UMULL r7,r5,r6
860 $UMULH r8,r5,r6
861 addc r11,r7,r11
862 adde r9,r8,r9
863 addze r10,r10
864 addc r11,r7,r11
865 adde r9,r8,r9
866 addze r10,r10
867 $ST r11,`11*$BNSZ`(r3) #r[11]=c3;
868 #sqr_add_c(a,6,c1,c2,c3);
869 $UMULL r7,r6,r6
870 $UMULH r8,r6,r6
871 addc r9,r7,r9
872 adde r10,r8,r10
873 addze r11,r0
874 #sqr_add_c2(a,7,5,c1,c2,c3)
875 $LD r6,`7*$BNSZ`(r4)
876 $UMULL r7,r5,r6
877 $UMULH r8,r5,r6
878 addc r9,r7,r9
879 adde r10,r8,r10
880 addze r11,r11
881 addc r9,r7,r9
882 adde r10,r8,r10
883 addze r11,r11
884 $ST r9,`12*$BNSZ`(r3) #r[12]=c1;
885
886 #sqr_add_c2(a,7,6,c2,c3,c1)
887 $LD r5,`6*$BNSZ`(r4)
888 $UMULL r7,r5,r6
889 $UMULH r8,r5,r6
890 addc r10,r7,r10
891 adde r11,r8,r11
892 addze r9,r0
893 addc r10,r7,r10
894 adde r11,r8,r11
895 addze r9,r9
896 $ST r10,`13*$BNSZ`(r3) #r[13]=c2;
897 #sqr_add_c(a,7,c3,c1,c2);
898 $UMULL r7,r6,r6
899 $UMULH r8,r6,r6
900 addc r11,r7,r11
901 adde r9,r8,r9
902 $ST r11,`14*$BNSZ`(r3) #r[14]=c3;
903 $ST r9, `15*$BNSZ`(r3) #r[15]=c1;
904
905
906 bclr BO_ALWAYS,CR0_LT
907
908 .long 0x00000000
909
910#
911# NOTE: The following label name should be changed to
912# "bn_mul_comba4" i.e. remove the first dot
913# for the gcc compiler. This should be automatically
914# done in the build
915#
916
917.align 4
918.bn_mul_comba4:
919#
920# This is an optimized version of the bn_mul_comba4 routine.
921#
922# void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
923# r3 contains r
924# r4 contains a
925# r5 contains b
926# r6, r7 are the 2 BN_ULONGs being multiplied.
927# r8, r9 are the results of the 32x32 giving 64 multiply.
928# r10, r11, r12 are the equivalents of c1, c2, and c3.
929#
930 xor r0,r0,r0 #r0=0. Used in addze below.
931 #mul_add_c(a[0],b[0],c1,c2,c3);
932 $LD r6,`0*$BNSZ`(r4)
933 $LD r7,`0*$BNSZ`(r5)
934 $UMULL r10,r6,r7
935 $UMULH r11,r6,r7
936 $ST r10,`0*$BNSZ`(r3) #r[0]=c1
937 #mul_add_c(a[0],b[1],c2,c3,c1);
938 $LD r7,`1*$BNSZ`(r5)
939 $UMULL r8,r6,r7
940 $UMULH r9,r6,r7
941 addc r11,r8,r11
942 adde r12,r9,r0
943 addze r10,r0
944 #mul_add_c(a[1],b[0],c2,c3,c1);
945 $LD r6, `1*$BNSZ`(r4)
946 $LD r7, `0*$BNSZ`(r5)
947 $UMULL r8,r6,r7
948 $UMULH r9,r6,r7
949 addc r11,r8,r11
950 adde r12,r9,r12
951 addze r10,r10
952 $ST r11,`1*$BNSZ`(r3) #r[1]=c2
953 #mul_add_c(a[2],b[0],c3,c1,c2);
954 $LD r6,`2*$BNSZ`(r4)
955 $UMULL r8,r6,r7
956 $UMULH r9,r6,r7
957 addc r12,r8,r12
958 adde r10,r9,r10
959 addze r11,r0
960 #mul_add_c(a[1],b[1],c3,c1,c2);
961 $LD r6,`1*$BNSZ`(r4)
962 $LD r7,`1*$BNSZ`(r5)
963 $UMULL r8,r6,r7
964 $UMULH r9,r6,r7
965 addc r12,r8,r12
966 adde r10,r9,r10
967 addze r11,r11
968 #mul_add_c(a[0],b[2],c3,c1,c2);
969 $LD r6,`0*$BNSZ`(r4)
970 $LD r7,`2*$BNSZ`(r5)
971 $UMULL r8,r6,r7
972 $UMULH r9,r6,r7
973 addc r12,r8,r12
974 adde r10,r9,r10
975 addze r11,r11
976 $ST r12,`2*$BNSZ`(r3) #r[2]=c3
977 #mul_add_c(a[0],b[3],c1,c2,c3);
978 $LD r7,`3*$BNSZ`(r5)
979 $UMULL r8,r6,r7
980 $UMULH r9,r6,r7
981 addc r10,r8,r10
982 adde r11,r9,r11
983 addze r12,r0
984 #mul_add_c(a[1],b[2],c1,c2,c3);
985 $LD r6,`1*$BNSZ`(r4)
986 $LD r7,`2*$BNSZ`(r5)
987 $UMULL r8,r6,r7
988 $UMULH r9,r6,r7
989 addc r10,r8,r10
990 adde r11,r9,r11
991 addze r12,r12
992 #mul_add_c(a[2],b[1],c1,c2,c3);
993 $LD r6,`2*$BNSZ`(r4)
994 $LD r7,`1*$BNSZ`(r5)
995 $UMULL r8,r6,r7
996 $UMULH r9,r6,r7
997 addc r10,r8,r10
998 adde r11,r9,r11
999 addze r12,r12
1000 #mul_add_c(a[3],b[0],c1,c2,c3);
1001 $LD r6,`3*$BNSZ`(r4)
1002 $LD r7,`0*$BNSZ`(r5)
1003 $UMULL r8,r6,r7
1004 $UMULH r9,r6,r7
1005 addc r10,r8,r10
1006 adde r11,r9,r11
1007 addze r12,r12
1008 $ST r10,`3*$BNSZ`(r3) #r[3]=c1
1009 #mul_add_c(a[3],b[1],c2,c3,c1);
1010 $LD r7,`1*$BNSZ`(r5)
1011 $UMULL r8,r6,r7
1012 $UMULH r9,r6,r7
1013 addc r11,r8,r11
1014 adde r12,r9,r12
1015 addze r10,r0
1016 #mul_add_c(a[2],b[2],c2,c3,c1);
1017 $LD r6,`2*$BNSZ`(r4)
1018 $LD r7,`2*$BNSZ`(r5)
1019 $UMULL r8,r6,r7
1020 $UMULH r9,r6,r7
1021 addc r11,r8,r11
1022 adde r12,r9,r12
1023 addze r10,r10
1024 #mul_add_c(a[1],b[3],c2,c3,c1);
1025 $LD r6,`1*$BNSZ`(r4)
1026 $LD r7,`3*$BNSZ`(r5)
1027 $UMULL r8,r6,r7
1028 $UMULH r9,r6,r7
1029 addc r11,r8,r11
1030 adde r12,r9,r12
1031 addze r10,r10
1032 $ST r11,`4*$BNSZ`(r3) #r[4]=c2
1033 #mul_add_c(a[2],b[3],c3,c1,c2);
1034 $LD r6,`2*$BNSZ`(r4)
1035 $UMULL r8,r6,r7
1036 $UMULH r9,r6,r7
1037 addc r12,r8,r12
1038 adde r10,r9,r10
1039 addze r11,r0
1040 #mul_add_c(a[3],b[2],c3,c1,c2);
1041 $LD r6,`3*$BNSZ`(r4)
1042 $LD r7,`2*$BNSZ`(r4)
1043 $UMULL r8,r6,r7
1044 $UMULH r9,r6,r7
1045 addc r12,r8,r12
1046 adde r10,r9,r10
1047 addze r11,r11
1048 $ST r12,`5*$BNSZ`(r3) #r[5]=c3
1049 #mul_add_c(a[3],b[3],c1,c2,c3);
1050 $LD r7,`3*$BNSZ`(r5)
1051 $UMULL r8,r6,r7
1052 $UMULH r9,r6,r7
1053 addc r10,r8,r10
1054 adde r11,r9,r11
1055
1056 $ST r10,`6*$BNSZ`(r3) #r[6]=c1
1057 $ST r11,`7*$BNSZ`(r3) #r[7]=c2
1058 bclr BO_ALWAYS,CR0_LT
1059 .long 0x00000000
1060
1061#
1062# NOTE: The following label name should be changed to
1063# "bn_mul_comba8" i.e. remove the first dot
1064# for the gcc compiler. This should be automatically
1065# done in the build
1066#
1067
1068.align 4
1069.bn_mul_comba8:
1070#
1071# Optimized version of the bn_mul_comba8 routine.
1072#
1073# void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1074# r3 contains r
1075# r4 contains a
1076# r5 contains b
1077# r6, r7 are the 2 BN_ULONGs being multiplied.
1078# r8, r9 are the results of the 32x32 giving 64 multiply.
1079# r10, r11, r12 are the equivalents of c1, c2, and c3.
1080#
1081 xor r0,r0,r0 #r0=0. Used in addze below.
1082
1083 #mul_add_c(a[0],b[0],c1,c2,c3);
1084 $LD r6,`0*$BNSZ`(r4) #a[0]
1085 $LD r7,`0*$BNSZ`(r5) #b[0]
1086 $UMULL r10,r6,r7
1087 $UMULH r11,r6,r7
1088 $ST r10,`0*$BNSZ`(r3) #r[0]=c1;
1089 #mul_add_c(a[0],b[1],c2,c3,c1);
1090 $LD r7,`1*$BNSZ`(r5)
1091 $UMULL r8,r6,r7
1092 $UMULH r9,r6,r7
1093 addc r11,r11,r8
1094 addze r12,r9 # since we didnt set r12 to zero before.
1095 addze r10,r0
1096 #mul_add_c(a[1],b[0],c2,c3,c1);
1097 $LD r6,`1*$BNSZ`(r4)
1098 $LD r7,`0*$BNSZ`(r5)
1099 $UMULL r8,r6,r7
1100 $UMULH r9,r6,r7
1101 addc r11,r11,r8
1102 adde r12,r12,r9
1103 addze r10,r10
1104 $ST r11,`1*$BNSZ`(r3) #r[1]=c2;
1105 #mul_add_c(a[2],b[0],c3,c1,c2);
1106 $LD r6,`2*$BNSZ`(r4)
1107 $UMULL r8,r6,r7
1108 $UMULH r9,r6,r7
1109 addc r12,r12,r8
1110 adde r10,r10,r9
1111 addze r11,r0
1112 #mul_add_c(a[1],b[1],c3,c1,c2);
1113 $LD r6,`1*$BNSZ`(r4)
1114 $LD r7,`1*$BNSZ`(r5)
1115 $UMULL r8,r6,r7
1116 $UMULH r9,r6,r7
1117 addc r12,r12,r8
1118 adde r10,r10,r9
1119 addze r11,r11
1120 #mul_add_c(a[0],b[2],c3,c1,c2);
1121 $LD r6,`0*$BNSZ`(r4)
1122 $LD r7,`2*$BNSZ`(r5)
1123 $UMULL r8,r6,r7
1124 $UMULH r9,r6,r7
1125 addc r12,r12,r8
1126 adde r10,r10,r9
1127 addze r11,r11
1128 $ST r12,`2*$BNSZ`(r3) #r[2]=c3;
1129 #mul_add_c(a[0],b[3],c1,c2,c3);
1130 $LD r7,`3*$BNSZ`(r5)
1131 $UMULL r8,r6,r7
1132 $UMULH r9,r6,r7
1133 addc r10,r10,r8
1134 adde r11,r11,r9
1135 addze r12,r0
1136 #mul_add_c(a[1],b[2],c1,c2,c3);
1137 $LD r6,`1*$BNSZ`(r4)
1138 $LD r7,`2*$BNSZ`(r5)
1139 $UMULL r8,r6,r7
1140 $UMULH r9,r6,r7
1141 addc r10,r10,r8
1142 adde r11,r11,r9
1143 addze r12,r12
1144
1145 #mul_add_c(a[2],b[1],c1,c2,c3);
1146 $LD r6,`2*$BNSZ`(r4)
1147 $LD r7,`1*$BNSZ`(r5)
1148 $UMULL r8,r6,r7
1149 $UMULH r9,r6,r7
1150 addc r10,r10,r8
1151 adde r11,r11,r9
1152 addze r12,r12
1153 #mul_add_c(a[3],b[0],c1,c2,c3);
1154 $LD r6,`3*$BNSZ`(r4)
1155 $LD r7,`0*$BNSZ`(r5)
1156 $UMULL r8,r6,r7
1157 $UMULH r9,r6,r7
1158 addc r10,r10,r8
1159 adde r11,r11,r9
1160 addze r12,r12
1161 $ST r10,`3*$BNSZ`(r3) #r[3]=c1;
1162 #mul_add_c(a[4],b[0],c2,c3,c1);
1163 $LD r6,`4*$BNSZ`(r4)
1164 $UMULL r8,r6,r7
1165 $UMULH r9,r6,r7
1166 addc r11,r11,r8
1167 adde r12,r12,r9
1168 addze r10,r0
1169 #mul_add_c(a[3],b[1],c2,c3,c1);
1170 $LD r6,`3*$BNSZ`(r4)
1171 $LD r7,`1*$BNSZ`(r5)
1172 $UMULL r8,r6,r7
1173 $UMULH r9,r6,r7
1174 addc r11,r11,r8
1175 adde r12,r12,r9
1176 addze r10,r10
1177 #mul_add_c(a[2],b[2],c2,c3,c1);
1178 $LD r6,`2*$BNSZ`(r4)
1179 $LD r7,`2*$BNSZ`(r5)
1180 $UMULL r8,r6,r7
1181 $UMULH r9,r6,r7
1182 addc r11,r11,r8
1183 adde r12,r12,r9
1184 addze r10,r10
1185 #mul_add_c(a[1],b[3],c2,c3,c1);
1186 $LD r6,`1*$BNSZ`(r4)
1187 $LD r7,`3*$BNSZ`(r5)
1188 $UMULL r8,r6,r7
1189 $UMULH r9,r6,r7
1190 addc r11,r11,r8
1191 adde r12,r12,r9
1192 addze r10,r10
1193 #mul_add_c(a[0],b[4],c2,c3,c1);
1194 $LD r6,`0*$BNSZ`(r4)
1195 $LD r7,`4*$BNSZ`(r5)
1196 $UMULL r8,r6,r7
1197 $UMULH r9,r6,r7
1198 addc r11,r11,r8
1199 adde r12,r12,r9
1200 addze r10,r10
1201 $ST r11,`4*$BNSZ`(r3) #r[4]=c2;
1202 #mul_add_c(a[0],b[5],c3,c1,c2);
1203 $LD r7,`5*$BNSZ`(r5)
1204 $UMULL r8,r6,r7
1205 $UMULH r9,r6,r7
1206 addc r12,r12,r8
1207 adde r10,r10,r9
1208 addze r11,r0
1209 #mul_add_c(a[1],b[4],c3,c1,c2);
1210 $LD r6,`1*$BNSZ`(r4)
1211 $LD r7,`4*$BNSZ`(r5)
1212 $UMULL r8,r6,r7
1213 $UMULH r9,r6,r7
1214 addc r12,r12,r8
1215 adde r10,r10,r9
1216 addze r11,r11
1217 #mul_add_c(a[2],b[3],c3,c1,c2);
1218 $LD r6,`2*$BNSZ`(r4)
1219 $LD r7,`3*$BNSZ`(r5)
1220 $UMULL r8,r6,r7
1221 $UMULH r9,r6,r7
1222 addc r12,r12,r8
1223 adde r10,r10,r9
1224 addze r11,r11
1225 #mul_add_c(a[3],b[2],c3,c1,c2);
1226 $LD r6,`3*$BNSZ`(r4)
1227 $LD r7,`2*$BNSZ`(r5)
1228 $UMULL r8,r6,r7
1229 $UMULH r9,r6,r7
1230 addc r12,r12,r8
1231 adde r10,r10,r9
1232 addze r11,r11
1233 #mul_add_c(a[4],b[1],c3,c1,c2);
1234 $LD r6,`4*$BNSZ`(r4)
1235 $LD r7,`1*$BNSZ`(r5)
1236 $UMULL r8,r6,r7
1237 $UMULH r9,r6,r7
1238 addc r12,r12,r8
1239 adde r10,r10,r9
1240 addze r11,r11
1241 #mul_add_c(a[5],b[0],c3,c1,c2);
1242 $LD r6,`5*$BNSZ`(r4)
1243 $LD r7,`0*$BNSZ`(r5)
1244 $UMULL r8,r6,r7
1245 $UMULH r9,r6,r7
1246 addc r12,r12,r8
1247 adde r10,r10,r9
1248 addze r11,r11
1249 $ST r12,`5*$BNSZ`(r3) #r[5]=c3;
1250 #mul_add_c(a[6],b[0],c1,c2,c3);
1251 $LD r6,`6*$BNSZ`(r4)
1252 $UMULL r8,r6,r7
1253 $UMULH r9,r6,r7
1254 addc r10,r10,r8
1255 adde r11,r11,r9
1256 addze r12,r0
1257 #mul_add_c(a[5],b[1],c1,c2,c3);
1258 $LD r6,`5*$BNSZ`(r4)
1259 $LD r7,`1*$BNSZ`(r5)
1260 $UMULL r8,r6,r7
1261 $UMULH r9,r6,r7
1262 addc r10,r10,r8
1263 adde r11,r11,r9
1264 addze r12,r12
1265 #mul_add_c(a[4],b[2],c1,c2,c3);
1266 $LD r6,`4*$BNSZ`(r4)
1267 $LD r7,`2*$BNSZ`(r5)
1268 $UMULL r8,r6,r7
1269 $UMULH r9,r6,r7
1270 addc r10,r10,r8
1271 adde r11,r11,r9
1272 addze r12,r12
1273 #mul_add_c(a[3],b[3],c1,c2,c3);
1274 $LD r6,`3*$BNSZ`(r4)
1275 $LD r7,`3*$BNSZ`(r5)
1276 $UMULL r8,r6,r7
1277 $UMULH r9,r6,r7
1278 addc r10,r10,r8
1279 adde r11,r11,r9
1280 addze r12,r12
1281 #mul_add_c(a[2],b[4],c1,c2,c3);
1282 $LD r6,`2*$BNSZ`(r4)
1283 $LD r7,`4*$BNSZ`(r5)
1284 $UMULL r8,r6,r7
1285 $UMULH r9,r6,r7
1286 addc r10,r10,r8
1287 adde r11,r11,r9
1288 addze r12,r12
1289 #mul_add_c(a[1],b[5],c1,c2,c3);
1290 $LD r6,`1*$BNSZ`(r4)
1291 $LD r7,`5*$BNSZ`(r5)
1292 $UMULL r8,r6,r7
1293 $UMULH r9,r6,r7
1294 addc r10,r10,r8
1295 adde r11,r11,r9
1296 addze r12,r12
1297 #mul_add_c(a[0],b[6],c1,c2,c3);
1298 $LD r6,`0*$BNSZ`(r4)
1299 $LD r7,`6*$BNSZ`(r5)
1300 $UMULL r8,r6,r7
1301 $UMULH r9,r6,r7
1302 addc r10,r10,r8
1303 adde r11,r11,r9
1304 addze r12,r12
1305 $ST r10,`6*$BNSZ`(r3) #r[6]=c1;
1306 #mul_add_c(a[0],b[7],c2,c3,c1);
1307 $LD r7,`7*$BNSZ`(r5)
1308 $UMULL r8,r6,r7
1309 $UMULH r9,r6,r7
1310 addc r11,r11,r8
1311 adde r12,r12,r9
1312 addze r10,r0
1313 #mul_add_c(a[1],b[6],c2,c3,c1);
1314 $LD r6,`1*$BNSZ`(r4)
1315 $LD r7,`6*$BNSZ`(r5)
1316 $UMULL r8,r6,r7
1317 $UMULH r9,r6,r7
1318 addc r11,r11,r8
1319 adde r12,r12,r9
1320 addze r10,r10
1321 #mul_add_c(a[2],b[5],c2,c3,c1);
1322 $LD r6,`2*$BNSZ`(r4)
1323 $LD r7,`5*$BNSZ`(r5)
1324 $UMULL r8,r6,r7
1325 $UMULH r9,r6,r7
1326 addc r11,r11,r8
1327 adde r12,r12,r9
1328 addze r10,r10
1329 #mul_add_c(a[3],b[4],c2,c3,c1);
1330 $LD r6,`3*$BNSZ`(r4)
1331 $LD r7,`4*$BNSZ`(r5)
1332 $UMULL r8,r6,r7
1333 $UMULH r9,r6,r7
1334 addc r11,r11,r8
1335 adde r12,r12,r9
1336 addze r10,r10
1337 #mul_add_c(a[4],b[3],c2,c3,c1);
1338 $LD r6,`4*$BNSZ`(r4)
1339 $LD r7,`3*$BNSZ`(r5)
1340 $UMULL r8,r6,r7
1341 $UMULH r9,r6,r7
1342 addc r11,r11,r8
1343 adde r12,r12,r9
1344 addze r10,r10
1345 #mul_add_c(a[5],b[2],c2,c3,c1);
1346 $LD r6,`5*$BNSZ`(r4)
1347 $LD r7,`2*$BNSZ`(r5)
1348 $UMULL r8,r6,r7
1349 $UMULH r9,r6,r7
1350 addc r11,r11,r8
1351 adde r12,r12,r9
1352 addze r10,r10
1353 #mul_add_c(a[6],b[1],c2,c3,c1);
1354 $LD r6,`6*$BNSZ`(r4)
1355 $LD r7,`1*$BNSZ`(r5)
1356 $UMULL r8,r6,r7
1357 $UMULH r9,r6,r7
1358 addc r11,r11,r8
1359 adde r12,r12,r9
1360 addze r10,r10
1361 #mul_add_c(a[7],b[0],c2,c3,c1);
1362 $LD r6,`7*$BNSZ`(r4)
1363 $LD r7,`0*$BNSZ`(r5)
1364 $UMULL r8,r6,r7
1365 $UMULH r9,r6,r7
1366 addc r11,r11,r8
1367 adde r12,r12,r9
1368 addze r10,r10
1369 $ST r11,`7*$BNSZ`(r3) #r[7]=c2;
1370 #mul_add_c(a[7],b[1],c3,c1,c2);
1371 $LD r7,`1*$BNSZ`(r5)
1372 $UMULL r8,r6,r7
1373 $UMULH r9,r6,r7
1374 addc r12,r12,r8
1375 adde r10,r10,r9
1376 addze r11,r0
1377 #mul_add_c(a[6],b[2],c3,c1,c2);
1378 $LD r6,`6*$BNSZ`(r4)
1379 $LD r7,`2*$BNSZ`(r5)
1380 $UMULL r8,r6,r7
1381 $UMULH r9,r6,r7
1382 addc r12,r12,r8
1383 adde r10,r10,r9
1384 addze r11,r11
1385 #mul_add_c(a[5],b[3],c3,c1,c2);
1386 $LD r6,`5*$BNSZ`(r4)
1387 $LD r7,`3*$BNSZ`(r5)
1388 $UMULL r8,r6,r7
1389 $UMULH r9,r6,r7
1390 addc r12,r12,r8
1391 adde r10,r10,r9
1392 addze r11,r11
1393 #mul_add_c(a[4],b[4],c3,c1,c2);
1394 $LD r6,`4*$BNSZ`(r4)
1395 $LD r7,`4*$BNSZ`(r5)
1396 $UMULL r8,r6,r7
1397 $UMULH r9,r6,r7
1398 addc r12,r12,r8
1399 adde r10,r10,r9
1400 addze r11,r11
1401 #mul_add_c(a[3],b[5],c3,c1,c2);
1402 $LD r6,`3*$BNSZ`(r4)
1403 $LD r7,`5*$BNSZ`(r5)
1404 $UMULL r8,r6,r7
1405 $UMULH r9,r6,r7
1406 addc r12,r12,r8
1407 adde r10,r10,r9
1408 addze r11,r11
1409 #mul_add_c(a[2],b[6],c3,c1,c2);
1410 $LD r6,`2*$BNSZ`(r4)
1411 $LD r7,`6*$BNSZ`(r5)
1412 $UMULL r8,r6,r7
1413 $UMULH r9,r6,r7
1414 addc r12,r12,r8
1415 adde r10,r10,r9
1416 addze r11,r11
1417 #mul_add_c(a[1],b[7],c3,c1,c2);
1418 $LD r6,`1*$BNSZ`(r4)
1419 $LD r7,`7*$BNSZ`(r5)
1420 $UMULL r8,r6,r7
1421 $UMULH r9,r6,r7
1422 addc r12,r12,r8
1423 adde r10,r10,r9
1424 addze r11,r11
1425 $ST r12,`8*$BNSZ`(r3) #r[8]=c3;
1426 #mul_add_c(a[2],b[7],c1,c2,c3);
1427 $LD r6,`2*$BNSZ`(r4)
1428 $UMULL r8,r6,r7
1429 $UMULH r9,r6,r7
1430 addc r10,r10,r8
1431 adde r11,r11,r9
1432 addze r12,r0
1433 #mul_add_c(a[3],b[6],c1,c2,c3);
1434 $LD r6,`3*$BNSZ`(r4)
1435 $LD r7,`6*$BNSZ`(r5)
1436 $UMULL r8,r6,r7
1437 $UMULH r9,r6,r7
1438 addc r10,r10,r8
1439 adde r11,r11,r9
1440 addze r12,r12
1441 #mul_add_c(a[4],b[5],c1,c2,c3);
1442 $LD r6,`4*$BNSZ`(r4)
1443 $LD r7,`5*$BNSZ`(r5)
1444 $UMULL r8,r6,r7
1445 $UMULH r9,r6,r7
1446 addc r10,r10,r8
1447 adde r11,r11,r9
1448 addze r12,r12
1449 #mul_add_c(a[5],b[4],c1,c2,c3);
1450 $LD r6,`5*$BNSZ`(r4)
1451 $LD r7,`4*$BNSZ`(r5)
1452 $UMULL r8,r6,r7
1453 $UMULH r9,r6,r7
1454 addc r10,r10,r8
1455 adde r11,r11,r9
1456 addze r12,r12
1457 #mul_add_c(a[6],b[3],c1,c2,c3);
1458 $LD r6,`6*$BNSZ`(r4)
1459 $LD r7,`3*$BNSZ`(r5)
1460 $UMULL r8,r6,r7
1461 $UMULH r9,r6,r7
1462 addc r10,r10,r8
1463 adde r11,r11,r9
1464 addze r12,r12
1465 #mul_add_c(a[7],b[2],c1,c2,c3);
1466 $LD r6,`7*$BNSZ`(r4)
1467 $LD r7,`2*$BNSZ`(r5)
1468 $UMULL r8,r6,r7
1469 $UMULH r9,r6,r7
1470 addc r10,r10,r8
1471 adde r11,r11,r9
1472 addze r12,r12
1473 $ST r10,`9*$BNSZ`(r3) #r[9]=c1;
1474 #mul_add_c(a[7],b[3],c2,c3,c1);
1475 $LD r7,`3*$BNSZ`(r5)
1476 $UMULL r8,r6,r7
1477 $UMULH r9,r6,r7
1478 addc r11,r11,r8
1479 adde r12,r12,r9
1480 addze r10,r0
1481 #mul_add_c(a[6],b[4],c2,c3,c1);
1482 $LD r6,`6*$BNSZ`(r4)
1483 $LD r7,`4*$BNSZ`(r5)
1484 $UMULL r8,r6,r7
1485 $UMULH r9,r6,r7
1486 addc r11,r11,r8
1487 adde r12,r12,r9
1488 addze r10,r10
1489 #mul_add_c(a[5],b[5],c2,c3,c1);
1490 $LD r6,`5*$BNSZ`(r4)
1491 $LD r7,`5*$BNSZ`(r5)
1492 $UMULL r8,r6,r7
1493 $UMULH r9,r6,r7
1494 addc r11,r11,r8
1495 adde r12,r12,r9
1496 addze r10,r10
1497 #mul_add_c(a[4],b[6],c2,c3,c1);
1498 $LD r6,`4*$BNSZ`(r4)
1499 $LD r7,`6*$BNSZ`(r5)
1500 $UMULL r8,r6,r7
1501 $UMULH r9,r6,r7
1502 addc r11,r11,r8
1503 adde r12,r12,r9
1504 addze r10,r10
1505 #mul_add_c(a[3],b[7],c2,c3,c1);
1506 $LD r6,`3*$BNSZ`(r4)
1507 $LD r7,`7*$BNSZ`(r5)
1508 $UMULL r8,r6,r7
1509 $UMULH r9,r6,r7
1510 addc r11,r11,r8
1511 adde r12,r12,r9
1512 addze r10,r10
1513 $ST r11,`10*$BNSZ`(r3) #r[10]=c2;
1514 #mul_add_c(a[4],b[7],c3,c1,c2);
1515 $LD r6,`4*$BNSZ`(r4)
1516 $UMULL r8,r6,r7
1517 $UMULH r9,r6,r7
1518 addc r12,r12,r8
1519 adde r10,r10,r9
1520 addze r11,r0
1521 #mul_add_c(a[5],b[6],c3,c1,c2);
1522 $LD r6,`5*$BNSZ`(r4)
1523 $LD r7,`6*$BNSZ`(r5)
1524 $UMULL r8,r6,r7
1525 $UMULH r9,r6,r7
1526 addc r12,r12,r8
1527 adde r10,r10,r9
1528 addze r11,r11
1529 #mul_add_c(a[6],b[5],c3,c1,c2);
1530 $LD r6,`6*$BNSZ`(r4)
1531 $LD r7,`5*$BNSZ`(r5)
1532 $UMULL r8,r6,r7
1533 $UMULH r9,r6,r7
1534 addc r12,r12,r8
1535 adde r10,r10,r9
1536 addze r11,r11
1537 #mul_add_c(a[7],b[4],c3,c1,c2);
1538 $LD r6,`7*$BNSZ`(r4)
1539 $LD r7,`4*$BNSZ`(r5)
1540 $UMULL r8,r6,r7
1541 $UMULH r9,r6,r7
1542 addc r12,r12,r8
1543 adde r10,r10,r9
1544 addze r11,r11
1545 $ST r12,`11*$BNSZ`(r3) #r[11]=c3;
1546 #mul_add_c(a[7],b[5],c1,c2,c3);
1547 $LD r7,`5*$BNSZ`(r5)
1548 $UMULL r8,r6,r7
1549 $UMULH r9,r6,r7
1550 addc r10,r10,r8
1551 adde r11,r11,r9
1552 addze r12,r0
1553 #mul_add_c(a[6],b[6],c1,c2,c3);
1554 $LD r6,`6*$BNSZ`(r4)
1555 $LD r7,`6*$BNSZ`(r5)
1556 $UMULL r8,r6,r7
1557 $UMULH r9,r6,r7
1558 addc r10,r10,r8
1559 adde r11,r11,r9
1560 addze r12,r12
1561 #mul_add_c(a[5],b[7],c1,c2,c3);
1562 $LD r6,`5*$BNSZ`(r4)
1563 $LD r7,`7*$BNSZ`(r5)
1564 $UMULL r8,r6,r7
1565 $UMULH r9,r6,r7
1566 addc r10,r10,r8
1567 adde r11,r11,r9
1568 addze r12,r12
1569 $ST r10,`12*$BNSZ`(r3) #r[12]=c1;
1570 #mul_add_c(a[6],b[7],c2,c3,c1);
1571 $LD r6,`6*$BNSZ`(r4)
1572 $UMULL r8,r6,r7
1573 $UMULH r9,r6,r7
1574 addc r11,r11,r8
1575 adde r12,r12,r9
1576 addze r10,r0
1577 #mul_add_c(a[7],b[6],c2,c3,c1);
1578 $LD r6,`7*$BNSZ`(r4)
1579 $LD r7,`6*$BNSZ`(r5)
1580 $UMULL r8,r6,r7
1581 $UMULH r9,r6,r7
1582 addc r11,r11,r8
1583 adde r12,r12,r9
1584 addze r10,r10
1585 $ST r11,`13*$BNSZ`(r3) #r[13]=c2;
1586 #mul_add_c(a[7],b[7],c3,c1,c2);
1587 $LD r7,`7*$BNSZ`(r5)
1588 $UMULL r8,r6,r7
1589 $UMULH r9,r6,r7
1590 addc r12,r12,r8
1591 adde r10,r10,r9
1592 $ST r12,`14*$BNSZ`(r3) #r[14]=c3;
1593 $ST r10,`15*$BNSZ`(r3) #r[15]=c1;
1594 bclr BO_ALWAYS,CR0_LT
1595 .long 0x00000000
1596
1597#
1598# NOTE: The following label name should be changed to
1599# "bn_sub_words" i.e. remove the first dot
1600# for the gcc compiler. This should be automatically
1601# done in the build
1602#
1603#
1604.align 4
1605.bn_sub_words:
1606#
1607# Handcoded version of bn_sub_words
1608#
1609#BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1610#
1611# r3 = r
1612# r4 = a
1613# r5 = b
1614# r6 = n
1615#
1616# Note: No loop unrolling done since this is not a performance
1617# critical loop.
1618
1619 xor r0,r0,r0 #set r0 = 0
1620#
1621# check for r6 = 0 AND set carry bit.
1622#
1623 subfc. r7,r0,r6 # If r6 is 0 then result is 0.
1624 # if r6 > 0 then result !=0
1625 # In either case carry bit is set.
1626 bc BO_IF,CR0_EQ,Lppcasm_sub_adios
1627 addi r4,r4,-$BNSZ
1628 addi r3,r3,-$BNSZ
1629 addi r5,r5,-$BNSZ
1630 mtctr r6
1631Lppcasm_sub_mainloop:
1632 $LDU r7,$BNSZ(r4)
1633 $LDU r8,$BNSZ(r5)
1634 subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8)
1635 # if carry = 1 this is r7-r8. Else it
1636 # is r7-r8 -1 as we need.
1637 $STU r6,$BNSZ(r3)
1638 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
1639Lppcasm_sub_adios:
1640 subfze r3,r0 # if carry bit is set then r3 = 0 else -1
1641 andi. r3,r3,1 # keep only last bit.
1642 bclr BO_ALWAYS,CR0_LT
1643 .long 0x00000000
1644
1645
1646#
1647# NOTE: The following label name should be changed to
1648# "bn_add_words" i.e. remove the first dot
1649# for the gcc compiler. This should be automatically
1650# done in the build
1651#
1652
1653.align 4
1654.bn_add_words:
1655#
1656# Handcoded version of bn_add_words
1657#
1658#BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1659#
1660# r3 = r
1661# r4 = a
1662# r5 = b
1663# r6 = n
1664#
1665# Note: No loop unrolling done since this is not a performance
1666# critical loop.
1667
1668 xor r0,r0,r0
1669#
1670# check for r6 = 0. Is this needed?
1671#
1672 addic. r6,r6,0 #test r6 and clear carry bit.
1673 bc BO_IF,CR0_EQ,Lppcasm_add_adios
1674 addi r4,r4,-$BNSZ
1675 addi r3,r3,-$BNSZ
1676 addi r5,r5,-$BNSZ
1677 mtctr r6
1678Lppcasm_add_mainloop:
1679 $LDU r7,$BNSZ(r4)
1680 $LDU r8,$BNSZ(r5)
1681 adde r8,r7,r8
1682 $STU r8,$BNSZ(r3)
1683 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
1684Lppcasm_add_adios:
1685 addze r3,r0 #return carry bit.
1686 bclr BO_ALWAYS,CR0_LT
1687 .long 0x00000000
1688
1689#
1690# NOTE: The following label name should be changed to
1691# "bn_div_words" i.e. remove the first dot
1692# for the gcc compiler. This should be automatically
1693# done in the build
1694#
1695
1696.align 4
1697.bn_div_words:
1698#
1699# This is a cleaned up version of code generated by
1700# the AIX compiler. The only optimization is to use
1701# the PPC instruction to count leading zeros instead
1702# of call to num_bits_word. Since this was compiled
1703# only at level -O2 we can possibly squeeze it more?
1704#
1705# r3 = h
1706# r4 = l
1707# r5 = d
1708
1709 $UCMPI 0,r5,0 # compare r5 and 0
1710 bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0
1711 li r3,-1 # d=0 return -1
1712 bclr BO_ALWAYS,CR0_LT
1713Lppcasm_div1:
1714 xor r0,r0,r0 #r0=0
1715 li r8,$BITS
1716 $CNTLZ. r7,r5 #r7 = num leading 0s in d.
1717 bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros
1718 subf r8,r7,r8 #r8 = BN_num_bits_word(d)
1719 $SHR. r9,r3,r8 #are there any bits above r8'th?
1720 $TR 16,r9,r0 #if there're, signal to dump core...
1721Lppcasm_div2:
1722 $UCMP 0,r3,r5 #h>=d?
1723 bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not
1724 subf r3,r5,r3 #h-=d ;
1725Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
1726 cmpi 0,0,r7,0 # is (i == 0)?
1727 bc BO_IF,CR0_EQ,Lppcasm_div4
1728 $SHL r3,r3,r7 # h = (h<< i)
1729 $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
1730 $SHL r5,r5,r7 # d<<=i
1731 or r3,r3,r8 # h = (h<<i)|(l>>(BN_BITS2-i))
1732 $SHL r4,r4,r7 # l <<=i
1733Lppcasm_div4:
1734 $SHRI r9,r5,`$BITS/2` # r9 = dh
1735 # dl will be computed when needed
1736 # as it saves registers.
1737 li r6,2 #r6=2
1738 mtctr r6 #counter will be in count.
1739Lppcasm_divouterloop:
1740 $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4)
1741 $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
1742 # compute here for innerloop.
1743 $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
1744 bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not
1745
1746 li r8,-1
1747 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
1748 b Lppcasm_div6
1749Lppcasm_div5:
1750 $UDIV r8,r3,r9 #q = h/dh
1751Lppcasm_div6:
1752 $UMULL r12,r9,r8 #th = q*dh
1753 $CLRU r10,r5,`$BITS/2` #r10=dl
1754 $UMULL r6,r8,r10 #tl = q*dl
1755
1756Lppcasm_divinnerloop:
1757 subf r10,r12,r3 #t = h -th
1758 $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of...
1759 addic. r7,r7,0 #test if r7 == 0. used below.
1760 # now want to compute
1761 # r7 = (t<<BN_BITS4)|((l&BN_MASK2h)>>BN_BITS4)
1762 # the following 2 instructions do that
1763 $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
1764 or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
1765 $UCMP 1,r6,r7 # compare (tl <= r7)
1766 bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
1767 bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
1768 addi r8,r8,-1 #q--
1769 subf r12,r9,r12 #th -=dh
1770 $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
1771 subf r6,r10,r6 #tl -=dl
1772 b Lppcasm_divinnerloop
1773Lppcasm_divinnerexit:
1774 $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
1775 $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
1776 $UCMP 1,r4,r11 # compare l and tl
1777 add r12,r12,r10 # th+=t
1778 bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
1779 addi r12,r12,1 # th++
1780Lppcasm_div7:
1781 subf r11,r11,r4 #r11=l-tl
1782 $UCMP 1,r3,r12 #compare h and th
1783 bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
1784 addi r8,r8,-1 # q--
1785 add r3,r5,r3 # h+=d
1786Lppcasm_div8:
1787 subf r12,r12,r3 #r12 = h-th
1788 $SHLI r4,r11,`$BITS/2` #l=(l&BN_MASK2l)<<BN_BITS4
1789 # want to compute
1790 # h = ((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2
1791 # the following 2 instructions will do this.
1792 $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
1793 $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
1794 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
1795 $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
1796 b Lppcasm_divouterloop
1797Lppcasm_div9:
1798 or r3,r8,r0
1799 bclr BO_ALWAYS,CR0_LT
1800 .long 0x00000000
1801
1802#
1803# NOTE: The following label name should be changed to
1804# "bn_sqr_words" i.e. remove the first dot
1805# for the gcc compiler. This should be automatically
1806# done in the build
1807#
1808.align 4
1809.bn_sqr_words:
1810#
1811# Optimized version of bn_sqr_words
1812#
1813# void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
1814#
1815# r3 = r
1816# r4 = a
1817# r5 = n
1818#
1819# r6 = a[i].
1820# r7,r8 = product.
1821#
1822# No unrolling done here. Not performance critical.
1823
1824 addic. r5,r5,0 #test r5.
1825 bc BO_IF,CR0_EQ,Lppcasm_sqr_adios
1826 addi r4,r4,-$BNSZ
1827 addi r3,r3,-$BNSZ
1828 mtctr r5
1829Lppcasm_sqr_mainloop:
1830 #sqr(r[0],r[1],a[0]);
1831 $LDU r6,$BNSZ(r4)
1832 $UMULL r7,r6,r6
1833 $UMULH r8,r6,r6
1834 $STU r7,$BNSZ(r3)
1835 $STU r8,$BNSZ(r3)
1836 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
1837Lppcasm_sqr_adios:
1838 bclr BO_ALWAYS,CR0_LT
1839 .long 0x00000000
1840
1841
1842#
1843# NOTE: The following label name should be changed to
1844# "bn_mul_words" i.e. remove the first dot
1845# for the gcc compiler. This should be automatically
1846# done in the build
1847#
1848
1849.align 4
1850.bn_mul_words:
1851#
1852# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1853#
1854# r3 = rp
1855# r4 = ap
1856# r5 = num
1857# r6 = w
1858 xor r0,r0,r0
1859 xor r12,r12,r12 # used for carry
1860 rlwinm. r7,r5,30,2,31 # num >> 2
1861 bc BO_IF,CR0_EQ,Lppcasm_mw_REM
1862 mtctr r7
1863Lppcasm_mw_LOOP:
1864 #mul(rp[0],ap[0],w,c1);
1865 $LD r8,`0*$BNSZ`(r4)
1866 $UMULL r9,r6,r8
1867 $UMULH r10,r6,r8
1868 addc r9,r9,r12
1869 #addze r10,r10 #carry is NOT ignored.
1870 #will be taken care of
1871 #in second spin below
1872 #using adde.
1873 $ST r9,`0*$BNSZ`(r3)
1874 #mul(rp[1],ap[1],w,c1);
1875 $LD r8,`1*$BNSZ`(r4)
1876 $UMULL r11,r6,r8
1877 $UMULH r12,r6,r8
1878 adde r11,r11,r10
1879 #addze r12,r12
1880 $ST r11,`1*$BNSZ`(r3)
1881 #mul(rp[2],ap[2],w,c1);
1882 $LD r8,`2*$BNSZ`(r4)
1883 $UMULL r9,r6,r8
1884 $UMULH r10,r6,r8
1885 adde r9,r9,r12
1886 #addze r10,r10
1887 $ST r9,`2*$BNSZ`(r3)
1888 #mul_add(rp[3],ap[3],w,c1);
1889 $LD r8,`3*$BNSZ`(r4)
1890 $UMULL r11,r6,r8
1891 $UMULH r12,r6,r8
1892 adde r11,r11,r10
1893 addze r12,r12 #this spin we collect carry into
1894 #r12
1895 $ST r11,`3*$BNSZ`(r3)
1896
1897 addi r3,r3,`4*$BNSZ`
1898 addi r4,r4,`4*$BNSZ`
1899 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
1900
1901Lppcasm_mw_REM:
1902 andi. r5,r5,0x3
1903 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
1904 #mul(rp[0],ap[0],w,c1);
1905 $LD r8,`0*$BNSZ`(r4)
1906 $UMULL r9,r6,r8
1907 $UMULH r10,r6,r8
1908 addc r9,r9,r12
1909 addze r10,r10
1910 $ST r9,`0*$BNSZ`(r3)
1911 addi r12,r10,0
1912
1913 addi r5,r5,-1
1914 cmpli 0,0,r5,0
1915 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
1916
1917
1918 #mul(rp[1],ap[1],w,c1);
1919 $LD r8,`1*$BNSZ`(r4)
1920 $UMULL r9,r6,r8
1921 $UMULH r10,r6,r8
1922 addc r9,r9,r12
1923 addze r10,r10
1924 $ST r9,`1*$BNSZ`(r3)
1925 addi r12,r10,0
1926
1927 addi r5,r5,-1
1928 cmpli 0,0,r5,0
1929 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
1930
1931 #mul_add(rp[2],ap[2],w,c1);
1932 $LD r8,`2*$BNSZ`(r4)
1933 $UMULL r9,r6,r8
1934 $UMULH r10,r6,r8
1935 addc r9,r9,r12
1936 addze r10,r10
1937 $ST r9,`2*$BNSZ`(r3)
1938 addi r12,r10,0
1939
1940Lppcasm_mw_OVER:
1941 addi r3,r12,0
1942 bclr BO_ALWAYS,CR0_LT
1943 .long 0x00000000
1944
1945#
1946# NOTE: The following label name should be changed to
1947# "bn_mul_add_words" i.e. remove the first dot
1948# for the gcc compiler. This should be automatically
1949# done in the build
1950#
1951
1952.align 4
1953.bn_mul_add_words:
1954#
1955# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1956#
1957# r3 = rp
1958# r4 = ap
1959# r5 = num
1960# r6 = w
1961#
1962# empirical evidence suggests that unrolled version performs best!!
1963#
1964 xor r0,r0,r0 #r0 = 0
1965 xor r12,r12,r12 #r12 = 0 . used for carry
1966 rlwinm. r7,r5,30,2,31 # num >> 2
1967 bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
1968 mtctr r7
1969Lppcasm_maw_mainloop:
1970 #mul_add(rp[0],ap[0],w,c1);
1971 $LD r8,`0*$BNSZ`(r4)
1972 $LD r11,`0*$BNSZ`(r3)
1973 $UMULL r9,r6,r8
1974 $UMULH r10,r6,r8
1975 addc r9,r9,r12 #r12 is carry.
1976 addze r10,r10
1977 addc r9,r9,r11
1978 #addze r10,r10
1979 #the above instruction addze
1980 #is NOT needed. Carry will NOT
1981 #be ignored. It's not affected
1982 #by multiply and will be collected
1983 #in the next spin
1984 $ST r9,`0*$BNSZ`(r3)
1985
1986 #mul_add(rp[1],ap[1],w,c1);
1987 $LD r8,`1*$BNSZ`(r4)
1988 $LD r9,`1*$BNSZ`(r3)
1989 $UMULL r11,r6,r8
1990 $UMULH r12,r6,r8
1991 adde r11,r11,r10 #r10 is carry.
1992 addze r12,r12
1993 addc r11,r11,r9
1994 #addze r12,r12
1995 $ST r11,`1*$BNSZ`(r3)
1996
1997 #mul_add(rp[2],ap[2],w,c1);
1998 $LD r8,`2*$BNSZ`(r4)
1999 $UMULL r9,r6,r8
2000 $LD r11,`2*$BNSZ`(r3)
2001 $UMULH r10,r6,r8
2002 adde r9,r9,r12
2003 addze r10,r10
2004 addc r9,r9,r11
2005 #addze r10,r10
2006 $ST r9,`2*$BNSZ`(r3)
2007
2008 #mul_add(rp[3],ap[3],w,c1);
2009 $LD r8,`3*$BNSZ`(r4)
2010 $UMULL r11,r6,r8
2011 $LD r9,`3*$BNSZ`(r3)
2012 $UMULH r12,r6,r8
2013 adde r11,r11,r10
2014 addze r12,r12
2015 addc r11,r11,r9
2016 addze r12,r12
2017 $ST r11,`3*$BNSZ`(r3)
2018 addi r3,r3,`4*$BNSZ`
2019 addi r4,r4,`4*$BNSZ`
2020 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
2021
2022Lppcasm_maw_leftover:
2023 andi. r5,r5,0x3
2024 bc BO_IF,CR0_EQ,Lppcasm_maw_adios
2025 addi r3,r3,-$BNSZ
2026 addi r4,r4,-$BNSZ
2027 #mul_add(rp[0],ap[0],w,c1);
2028 mtctr r5
2029 $LDU r8,$BNSZ(r4)
2030 $UMULL r9,r6,r8
2031 $UMULH r10,r6,r8
2032 $LDU r11,$BNSZ(r3)
2033 addc r9,r9,r11
2034 addze r10,r10
2035 addc r9,r9,r12
2036 addze r12,r10
2037 $ST r9,0(r3)
2038
2039 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
2040 #mul_add(rp[1],ap[1],w,c1);
2041 $LDU r8,$BNSZ(r4)
2042 $UMULL r9,r6,r8
2043 $UMULH r10,r6,r8
2044 $LDU r11,$BNSZ(r3)
2045 addc r9,r9,r11
2046 addze r10,r10
2047 addc r9,r9,r12
2048 addze r12,r10
2049 $ST r9,0(r3)
2050
2051 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
2052 #mul_add(rp[2],ap[2],w,c1);
2053 $LDU r8,$BNSZ(r4)
2054 $UMULL r9,r6,r8
2055 $UMULH r10,r6,r8
2056 $LDU r11,$BNSZ(r3)
2057 addc r9,r9,r11
2058 addze r10,r10
2059 addc r9,r9,r12
2060 addze r12,r10
2061 $ST r9,0(r3)
2062
2063Lppcasm_maw_adios:
2064 addi r3,r12,0
2065 bclr BO_ALWAYS,CR0_LT
2066 .long 0x00000000
2067 .align 4
2068EOF
2069 $data =~ s/\`([^\`]*)\`/eval $1/gem;
2070
2071 # if some assembler chokes on some simplified mnemonic,
2072 # this is the spot to fix it up, e.g.:
2073 # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
2074 $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
2075 # assembler X doesn't accept li, load immediate value
2076 #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
2077 return($data);
2078}
diff --git a/src/lib/libcrypto/bn/asm/r3000.s b/src/lib/libcrypto/bn/asm/r3000.s
new file mode 100644
index 0000000000..e95269afa3
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/r3000.s
@@ -0,0 +1,646 @@
1 .file 1 "../bn_mulw.c"
2 .set nobopt
3 .option pic2
4
5 # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C
6
7 # Cc1 defaults:
8 # -mabicalls
9
10 # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1):
11 # -quiet -dumpbase -O2 -o
12
13gcc2_compiled.:
14__gnu_compiled_c:
15 .rdata
16
17 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
18 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20
19 .byte 0x24,0x0
20
21 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
22 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20
23 .byte 0x24,0x0
24
25 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
26 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24
27 .byte 0x0
28
29 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
30 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
31 .byte 0x0
32
33 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
34 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20
35 .byte 0x24,0x0
36
37 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
38 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20
39 .byte 0x24,0x0
40
41 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
42 .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20
43 .byte 0x24,0x0
44
45 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
46 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24
47 .byte 0x0
48
49 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
50 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24
51 .byte 0x0
52
53 .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
54 .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
55 .byte 0x0
56 .text
57 .align 2
58 .globl bn_mul_add_words
59 .ent bn_mul_add_words
60bn_mul_add_words:
61 .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
62 .mask 0x00000000,0
63 .fmask 0x00000000,0
64 .set noreorder
65 .cpload $25
66 .set reorder
67 move $12,$4
68 move $14,$5
69 move $9,$6
70 move $13,$7
71 move $8,$0
72 addu $10,$12,12
73 addu $11,$14,12
74$L2:
75 lw $6,0($14)
76 #nop
77 multu $13,$6
78 mfhi $6
79 mflo $7
80 #nop
81 move $5,$8
82 move $4,$0
83 lw $3,0($12)
84 addu $9,$9,-1
85 move $2,$0
86 addu $7,$7,$3
87 sltu $8,$7,$3
88 addu $6,$6,$2
89 addu $6,$6,$8
90 addu $7,$7,$5
91 sltu $2,$7,$5
92 addu $6,$6,$4
93 addu $6,$6,$2
94 srl $3,$6,0
95 move $2,$0
96 move $8,$3
97 .set noreorder
98 .set nomacro
99 beq $9,$0,$L3
100 sw $7,0($12)
101 .set macro
102 .set reorder
103
104 lw $6,-8($11)
105 #nop
106 multu $13,$6
107 mfhi $6
108 mflo $7
109 #nop
110 move $5,$8
111 move $4,$0
112 lw $3,-8($10)
113 addu $9,$9,-1
114 move $2,$0
115 addu $7,$7,$3
116 sltu $8,$7,$3
117 addu $6,$6,$2
118 addu $6,$6,$8
119 addu $7,$7,$5
120 sltu $2,$7,$5
121 addu $6,$6,$4
122 addu $6,$6,$2
123 srl $3,$6,0
124 move $2,$0
125 move $8,$3
126 .set noreorder
127 .set nomacro
128 beq $9,$0,$L3
129 sw $7,-8($10)
130 .set macro
131 .set reorder
132
133 lw $6,-4($11)
134 #nop
135 multu $13,$6
136 mfhi $6
137 mflo $7
138 #nop
139 move $5,$8
140 move $4,$0
141 lw $3,-4($10)
142 addu $9,$9,-1
143 move $2,$0
144 addu $7,$7,$3
145 sltu $8,$7,$3
146 addu $6,$6,$2
147 addu $6,$6,$8
148 addu $7,$7,$5
149 sltu $2,$7,$5
150 addu $6,$6,$4
151 addu $6,$6,$2
152 srl $3,$6,0
153 move $2,$0
154 move $8,$3
155 .set noreorder
156 .set nomacro
157 beq $9,$0,$L3
158 sw $7,-4($10)
159 .set macro
160 .set reorder
161
162 lw $6,0($11)
163 #nop
164 multu $13,$6
165 mfhi $6
166 mflo $7
167 #nop
168 move $5,$8
169 move $4,$0
170 lw $3,0($10)
171 addu $9,$9,-1
172 move $2,$0
173 addu $7,$7,$3
174 sltu $8,$7,$3
175 addu $6,$6,$2
176 addu $6,$6,$8
177 addu $7,$7,$5
178 sltu $2,$7,$5
179 addu $6,$6,$4
180 addu $6,$6,$2
181 srl $3,$6,0
182 move $2,$0
183 move $8,$3
184 .set noreorder
185 .set nomacro
186 beq $9,$0,$L3
187 sw $7,0($10)
188 .set macro
189 .set reorder
190
191 addu $11,$11,16
192 addu $14,$14,16
193 addu $10,$10,16
194 .set noreorder
195 .set nomacro
196 j $L2
197 addu $12,$12,16
198 .set macro
199 .set reorder
200
201$L3:
202 .set noreorder
203 .set nomacro
204 j $31
205 move $2,$8
206 .set macro
207 .set reorder
208
209 .end bn_mul_add_words
210 .align 2
211 .globl bn_mul_words
212 .ent bn_mul_words
213bn_mul_words:
214 .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
215 .mask 0x00000000,0
216 .fmask 0x00000000,0
217 .set noreorder
218 .cpload $25
219 .set reorder
220 move $11,$4
221 move $12,$5
222 move $8,$6
223 move $6,$0
224 addu $10,$11,12
225 addu $9,$12,12
226$L10:
227 lw $4,0($12)
228 #nop
229 multu $7,$4
230 mfhi $4
231 mflo $5
232 #nop
233 move $3,$6
234 move $2,$0
235 addu $8,$8,-1
236 addu $5,$5,$3
237 sltu $6,$5,$3
238 addu $4,$4,$2
239 addu $4,$4,$6
240 srl $3,$4,0
241 move $2,$0
242 move $6,$3
243 .set noreorder
244 .set nomacro
245 beq $8,$0,$L11
246 sw $5,0($11)
247 .set macro
248 .set reorder
249
250 lw $4,-8($9)
251 #nop
252 multu $7,$4
253 mfhi $4
254 mflo $5
255 #nop
256 move $3,$6
257 move $2,$0
258 addu $8,$8,-1
259 addu $5,$5,$3
260 sltu $6,$5,$3
261 addu $4,$4,$2
262 addu $4,$4,$6
263 srl $3,$4,0
264 move $2,$0
265 move $6,$3
266 .set noreorder
267 .set nomacro
268 beq $8,$0,$L11
269 sw $5,-8($10)
270 .set macro
271 .set reorder
272
273 lw $4,-4($9)
274 #nop
275 multu $7,$4
276 mfhi $4
277 mflo $5
278 #nop
279 move $3,$6
280 move $2,$0
281 addu $8,$8,-1
282 addu $5,$5,$3
283 sltu $6,$5,$3
284 addu $4,$4,$2
285 addu $4,$4,$6
286 srl $3,$4,0
287 move $2,$0
288 move $6,$3
289 .set noreorder
290 .set nomacro
291 beq $8,$0,$L11
292 sw $5,-4($10)
293 .set macro
294 .set reorder
295
296 lw $4,0($9)
297 #nop
298 multu $7,$4
299 mfhi $4
300 mflo $5
301 #nop
302 move $3,$6
303 move $2,$0
304 addu $8,$8,-1
305 addu $5,$5,$3
306 sltu $6,$5,$3
307 addu $4,$4,$2
308 addu $4,$4,$6
309 srl $3,$4,0
310 move $2,$0
311 move $6,$3
312 .set noreorder
313 .set nomacro
314 beq $8,$0,$L11
315 sw $5,0($10)
316 .set macro
317 .set reorder
318
319 addu $9,$9,16
320 addu $12,$12,16
321 addu $10,$10,16
322 .set noreorder
323 .set nomacro
324 j $L10
325 addu $11,$11,16
326 .set macro
327 .set reorder
328
329$L11:
330 .set noreorder
331 .set nomacro
332 j $31
333 move $2,$6
334 .set macro
335 .set reorder
336
337 .end bn_mul_words
338 .align 2
339 .globl bn_sqr_words
340 .ent bn_sqr_words
341bn_sqr_words:
342 .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
343 .mask 0x00000000,0
344 .fmask 0x00000000,0
345 .set noreorder
346 .cpload $25
347 .set reorder
348 move $9,$4
349 addu $7,$9,28
350 addu $8,$5,12
351$L18:
352 lw $2,0($5)
353 #nop
354 multu $2,$2
355 mfhi $2
356 mflo $3
357 #nop
358 addu $6,$6,-1
359 sw $3,0($9)
360 srl $3,$2,0
361 move $2,$0
362 .set noreorder
363 .set nomacro
364 beq $6,$0,$L19
365 sw $3,-24($7)
366 .set macro
367 .set reorder
368
369 lw $2,-8($8)
370 #nop
371 multu $2,$2
372 mfhi $2
373 mflo $3
374 #nop
375 addu $6,$6,-1
376 sw $3,-20($7)
377 srl $3,$2,0
378 move $2,$0
379 .set noreorder
380 .set nomacro
381 beq $6,$0,$L19
382 sw $3,-16($7)
383 .set macro
384 .set reorder
385
386 lw $2,-4($8)
387 #nop
388 multu $2,$2
389 mfhi $2
390 mflo $3
391 #nop
392 addu $6,$6,-1
393 sw $3,-12($7)
394 srl $3,$2,0
395 move $2,$0
396 .set noreorder
397 .set nomacro
398 beq $6,$0,$L19
399 sw $3,-8($7)
400 .set macro
401 .set reorder
402
403 lw $2,0($8)
404 #nop
405 multu $2,$2
406 mfhi $2
407 mflo $3
408 #nop
409 addu $6,$6,-1
410 sw $3,-4($7)
411 srl $3,$2,0
412 move $2,$0
413 .set noreorder
414 .set nomacro
415 beq $6,$0,$L19
416 sw $3,0($7)
417 .set macro
418 .set reorder
419
420 addu $8,$8,16
421 addu $5,$5,16
422 addu $7,$7,32
423 .set noreorder
424 .set nomacro
425 j $L18
426 addu $9,$9,32
427 .set macro
428 .set reorder
429
430$L19:
431 j $31
432 .end bn_sqr_words
433 .rdata
434 .align 2
435$LC0:
436
437 .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e
438 .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f
439 .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa
440 .byte 0x0
441 .text
442 .align 2
443 .globl bn_div64
444 .ent bn_div64
445bn_div64:
446 .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8
447 .mask 0x901f0000,-8
448 .fmask 0x00000000,0
449 .set noreorder
450 .cpload $25
451 .set reorder
452 subu $sp,$sp,56
453 .cprestore 16
454 sw $16,24($sp)
455 move $16,$4
456 sw $17,28($sp)
457 move $17,$5
458 sw $18,32($sp)
459 move $18,$6
460 sw $20,40($sp)
461 move $20,$0
462 sw $19,36($sp)
463 li $19,0x00000002 # 2
464 sw $31,48($sp)
465 .set noreorder
466 .set nomacro
467 bne $18,$0,$L26
468 sw $28,44($sp)
469 .set macro
470 .set reorder
471
472 .set noreorder
473 .set nomacro
474 j $L43
475 li $2,-1 # 0xffffffff
476 .set macro
477 .set reorder
478
479$L26:
480 move $4,$18
481 jal BN_num_bits_word
482 move $4,$2
483 li $2,0x00000020 # 32
484 .set noreorder
485 .set nomacro
486 beq $4,$2,$L27
487 li $2,0x00000001 # 1
488 .set macro
489 .set reorder
490
491 sll $2,$2,$4
492 sltu $2,$2,$16
493 .set noreorder
494 .set nomacro
495 beq $2,$0,$L44
496 li $5,0x00000020 # 32
497 .set macro
498 .set reorder
499
500 la $4,__iob+32
501 la $5,$LC0
502 jal fprintf
503 jal abort
504$L27:
505 li $5,0x00000020 # 32
506$L44:
507 sltu $2,$16,$18
508 .set noreorder
509 .set nomacro
510 bne $2,$0,$L28
511 subu $4,$5,$4
512 .set macro
513 .set reorder
514
515 subu $16,$16,$18
516$L28:
517 .set noreorder
518 .set nomacro
519 beq $4,$0,$L29
520 li $10,-65536 # 0xffff0000
521 .set macro
522 .set reorder
523
524 sll $18,$18,$4
525 sll $3,$16,$4
526 subu $2,$5,$4
527 srl $2,$17,$2
528 or $16,$3,$2
529 sll $17,$17,$4
530$L29:
531 srl $7,$18,16
532 andi $9,$18,0xffff
533$L30:
534 srl $2,$16,16
535 .set noreorder
536 .set nomacro
537 beq $2,$7,$L34
538 li $6,0x0000ffff # 65535
539 .set macro
540 .set reorder
541
542 divu $6,$16,$7
543$L34:
544 mult $6,$9
545 mflo $5
546 #nop
547 #nop
548 mult $6,$7
549 and $2,$17,$10
550 srl $8,$2,16
551 mflo $4
552$L35:
553 subu $3,$16,$4
554 and $2,$3,$10
555 .set noreorder
556 .set nomacro
557 bne $2,$0,$L36
558 sll $2,$3,16
559 .set macro
560 .set reorder
561
562 addu $2,$2,$8
563 sltu $2,$2,$5
564 .set noreorder
565 .set nomacro
566 beq $2,$0,$L36
567 subu $5,$5,$9
568 .set macro
569 .set reorder
570
571 subu $4,$4,$7
572 .set noreorder
573 .set nomacro
574 j $L35
575 addu $6,$6,-1
576 .set macro
577 .set reorder
578
579$L36:
580 mult $6,$7
581 mflo $5
582 #nop
583 #nop
584 mult $6,$9
585 mflo $4
586 #nop
587 #nop
588 srl $3,$4,16
589 sll $2,$4,16
590 and $4,$2,$10
591 sltu $2,$17,$4
592 .set noreorder
593 .set nomacro
594 beq $2,$0,$L40
595 addu $5,$5,$3
596 .set macro
597 .set reorder
598
599 addu $5,$5,1
600$L40:
601 sltu $2,$16,$5
602 .set noreorder
603 .set nomacro
604 beq $2,$0,$L41
605 subu $17,$17,$4
606 .set macro
607 .set reorder
608
609 addu $16,$16,$18
610 addu $6,$6,-1
611$L41:
612 addu $19,$19,-1
613 .set noreorder
614 .set nomacro
615 beq $19,$0,$L31
616 subu $16,$16,$5
617 .set macro
618 .set reorder
619
620 sll $20,$6,16
621 sll $3,$16,16
622 srl $2,$17,16
623 or $16,$3,$2
624 .set noreorder
625 .set nomacro
626 j $L30
627 sll $17,$17,16
628 .set macro
629 .set reorder
630
631$L31:
632 or $2,$20,$6
633$L43:
634 lw $31,48($sp)
635 lw $20,40($sp)
636 lw $19,36($sp)
637 lw $18,32($sp)
638 lw $17,28($sp)
639 lw $16,24($sp)
640 addu $sp,$sp,56
641 j $31
642 .end bn_div64
643
644 .globl abort .text
645 .globl fprintf .text
646 .globl BN_num_bits_word .text
diff --git a/src/lib/libcrypto/bn/asm/sparcv8.S b/src/lib/libcrypto/bn/asm/sparcv8.S
new file mode 100644
index 0000000000..88c5dc480a
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/sparcv8.S
@@ -0,0 +1,1458 @@
1.ident "sparcv8.s, Version 1.4"
2.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * See bn_asm.sparc.v8plus.S for more details.
22 */
23
24/*
25 * Revision history.
26 *
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
30 * 1.4 - some retunes;
31 *
32 * (*) see bn_asm.sparc.v8plus.S for details
33 */
34
35.section ".text",#alloc,#execinstr
36.file "bn_asm.sparc.v8.S"
37
38.align 32
39
40.global bn_mul_add_words
41/*
42 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43 * BN_ULONG *rp,*ap;
44 * int num;
45 * BN_ULONG w;
46 */
47bn_mul_add_words:
48 cmp %o2,0
49 bg,a .L_bn_mul_add_words_proceed
50 ld [%o1],%g2
51 retl
52 clr %o0
53
54.L_bn_mul_add_words_proceed:
55 andcc %o2,-4,%g0
56 bz .L_bn_mul_add_words_tail
57 clr %o5
58
59.L_bn_mul_add_words_loop:
60 ld [%o0],%o4
61 ld [%o1+4],%g3
62 umul %o3,%g2,%g2
63 rd %y,%g1
64 addcc %o4,%o5,%o4
65 addx %g1,0,%g1
66 addcc %o4,%g2,%o4
67 st %o4,[%o0]
68 addx %g1,0,%o5
69
70 ld [%o0+4],%o4
71 ld [%o1+8],%g2
72 umul %o3,%g3,%g3
73 dec 4,%o2
74 rd %y,%g1
75 addcc %o4,%o5,%o4
76 addx %g1,0,%g1
77 addcc %o4,%g3,%o4
78 st %o4,[%o0+4]
79 addx %g1,0,%o5
80
81 ld [%o0+8],%o4
82 ld [%o1+12],%g3
83 umul %o3,%g2,%g2
84 inc 16,%o1
85 rd %y,%g1
86 addcc %o4,%o5,%o4
87 addx %g1,0,%g1
88 addcc %o4,%g2,%o4
89 st %o4,[%o0+8]
90 addx %g1,0,%o5
91
92 ld [%o0+12],%o4
93 umul %o3,%g3,%g3
94 inc 16,%o0
95 rd %y,%g1
96 addcc %o4,%o5,%o4
97 addx %g1,0,%g1
98 addcc %o4,%g3,%o4
99 st %o4,[%o0-4]
100 addx %g1,0,%o5
101 andcc %o2,-4,%g0
102 bnz,a .L_bn_mul_add_words_loop
103 ld [%o1],%g2
104
105 tst %o2
106 bnz,a .L_bn_mul_add_words_tail
107 ld [%o1],%g2
108.L_bn_mul_add_words_return:
109 retl
110 mov %o5,%o0
111 nop
112
113.L_bn_mul_add_words_tail:
114 ld [%o0],%o4
115 umul %o3,%g2,%g2
116 addcc %o4,%o5,%o4
117 rd %y,%g1
118 addx %g1,0,%g1
119 addcc %o4,%g2,%o4
120 addx %g1,0,%o5
121 deccc %o2
122 bz .L_bn_mul_add_words_return
123 st %o4,[%o0]
124
125 ld [%o1+4],%g2
126 ld [%o0+4],%o4
127 umul %o3,%g2,%g2
128 rd %y,%g1
129 addcc %o4,%o5,%o4
130 addx %g1,0,%g1
131 addcc %o4,%g2,%o4
132 addx %g1,0,%o5
133 deccc %o2
134 bz .L_bn_mul_add_words_return
135 st %o4,[%o0+4]
136
137 ld [%o1+8],%g2
138 ld [%o0+8],%o4
139 umul %o3,%g2,%g2
140 rd %y,%g1
141 addcc %o4,%o5,%o4
142 addx %g1,0,%g1
143 addcc %o4,%g2,%o4
144 st %o4,[%o0+8]
145 retl
146 addx %g1,0,%o0
147
148.type bn_mul_add_words,#function
149.size bn_mul_add_words,(.-bn_mul_add_words)
150
151.align 32
152
153.global bn_mul_words
154/*
155 * BN_ULONG bn_mul_words(rp,ap,num,w)
156 * BN_ULONG *rp,*ap;
157 * int num;
158 * BN_ULONG w;
159 */
160bn_mul_words:
161 cmp %o2,0
162 bg,a .L_bn_mul_words_proceeed
163 ld [%o1],%g2
164 retl
165 clr %o0
166
167.L_bn_mul_words_proceeed:
168 andcc %o2,-4,%g0
169 bz .L_bn_mul_words_tail
170 clr %o5
171
172.L_bn_mul_words_loop:
173 ld [%o1+4],%g3
174 umul %o3,%g2,%g2
175 addcc %g2,%o5,%g2
176 rd %y,%g1
177 addx %g1,0,%o5
178 st %g2,[%o0]
179
180 ld [%o1+8],%g2
181 umul %o3,%g3,%g3
182 addcc %g3,%o5,%g3
183 rd %y,%g1
184 dec 4,%o2
185 addx %g1,0,%o5
186 st %g3,[%o0+4]
187
188 ld [%o1+12],%g3
189 umul %o3,%g2,%g2
190 addcc %g2,%o5,%g2
191 rd %y,%g1
192 inc 16,%o1
193 st %g2,[%o0+8]
194 addx %g1,0,%o5
195
196 umul %o3,%g3,%g3
197 addcc %g3,%o5,%g3
198 rd %y,%g1
199 inc 16,%o0
200 addx %g1,0,%o5
201 st %g3,[%o0-4]
202 andcc %o2,-4,%g0
203 nop
204 bnz,a .L_bn_mul_words_loop
205 ld [%o1],%g2
206
207 tst %o2
208 bnz,a .L_bn_mul_words_tail
209 ld [%o1],%g2
210.L_bn_mul_words_return:
211 retl
212 mov %o5,%o0
213 nop
214
215.L_bn_mul_words_tail:
216 umul %o3,%g2,%g2
217 addcc %g2,%o5,%g2
218 rd %y,%g1
219 addx %g1,0,%o5
220 deccc %o2
221 bz .L_bn_mul_words_return
222 st %g2,[%o0]
223 nop
224
225 ld [%o1+4],%g2
226 umul %o3,%g2,%g2
227 addcc %g2,%o5,%g2
228 rd %y,%g1
229 addx %g1,0,%o5
230 deccc %o2
231 bz .L_bn_mul_words_return
232 st %g2,[%o0+4]
233
234 ld [%o1+8],%g2
235 umul %o3,%g2,%g2
236 addcc %g2,%o5,%g2
237 rd %y,%g1
238 st %g2,[%o0+8]
239 retl
240 addx %g1,0,%o0
241
242.type bn_mul_words,#function
243.size bn_mul_words,(.-bn_mul_words)
244
245.align 32
246.global bn_sqr_words
247/*
248 * void bn_sqr_words(r,a,n)
249 * BN_ULONG *r,*a;
250 * int n;
251 */
252bn_sqr_words:
253 cmp %o2,0
254 bg,a .L_bn_sqr_words_proceeed
255 ld [%o1],%g2
256 retl
257 clr %o0
258
259.L_bn_sqr_words_proceeed:
260 andcc %o2,-4,%g0
261 bz .L_bn_sqr_words_tail
262 clr %o5
263
264.L_bn_sqr_words_loop:
265 ld [%o1+4],%g3
266 umul %g2,%g2,%o4
267 st %o4,[%o0]
268 rd %y,%o5
269 st %o5,[%o0+4]
270
271 ld [%o1+8],%g2
272 umul %g3,%g3,%o4
273 dec 4,%o2
274 st %o4,[%o0+8]
275 rd %y,%o5
276 st %o5,[%o0+12]
277 nop
278
279 ld [%o1+12],%g3
280 umul %g2,%g2,%o4
281 st %o4,[%o0+16]
282 rd %y,%o5
283 inc 16,%o1
284 st %o5,[%o0+20]
285
286 umul %g3,%g3,%o4
287 inc 32,%o0
288 st %o4,[%o0-8]
289 rd %y,%o5
290 st %o5,[%o0-4]
291 andcc %o2,-4,%g2
292 bnz,a .L_bn_sqr_words_loop
293 ld [%o1],%g2
294
295 tst %o2
296 nop
297 bnz,a .L_bn_sqr_words_tail
298 ld [%o1],%g2
299.L_bn_sqr_words_return:
300 retl
301 clr %o0
302
303.L_bn_sqr_words_tail:
304 umul %g2,%g2,%o4
305 st %o4,[%o0]
306 deccc %o2
307 rd %y,%o5
308 bz .L_bn_sqr_words_return
309 st %o5,[%o0+4]
310
311 ld [%o1+4],%g2
312 umul %g2,%g2,%o4
313 st %o4,[%o0+8]
314 deccc %o2
315 rd %y,%o5
316 nop
317 bz .L_bn_sqr_words_return
318 st %o5,[%o0+12]
319
320 ld [%o1+8],%g2
321 umul %g2,%g2,%o4
322 st %o4,[%o0+16]
323 rd %y,%o5
324 st %o5,[%o0+20]
325 retl
326 clr %o0
327
328.type bn_sqr_words,#function
329.size bn_sqr_words,(.-bn_sqr_words)
330
331.align 32
332
333.global bn_div_words
334/*
335 * BN_ULONG bn_div_words(h,l,d)
336 * BN_ULONG h,l,d;
337 */
338bn_div_words:
339 wr %o0,%y
340 udiv %o1,%o2,%o0
341 retl
342 nop
343
344.type bn_div_words,#function
345.size bn_div_words,(.-bn_div_words)
346
347.align 32
348
349.global bn_add_words
350/*
351 * BN_ULONG bn_add_words(rp,ap,bp,n)
352 * BN_ULONG *rp,*ap,*bp;
353 * int n;
354 */
355bn_add_words:
356 cmp %o3,0
357 bg,a .L_bn_add_words_proceed
358 ld [%o1],%o4
359 retl
360 clr %o0
361
362.L_bn_add_words_proceed:
363 andcc %o3,-4,%g0
364 bz .L_bn_add_words_tail
365 clr %g1
366 ba .L_bn_add_words_warn_loop
367 addcc %g0,0,%g0 ! clear carry flag
368
369.L_bn_add_words_loop:
370 ld [%o1],%o4
371.L_bn_add_words_warn_loop:
372 ld [%o2],%o5
373 ld [%o1+4],%g3
374 ld [%o2+4],%g4
375 dec 4,%o3
376 addxcc %o5,%o4,%o5
377 st %o5,[%o0]
378
379 ld [%o1+8],%o4
380 ld [%o2+8],%o5
381 inc 16,%o1
382 addxcc %g3,%g4,%g3
383 st %g3,[%o0+4]
384
385 ld [%o1-4],%g3
386 ld [%o2+12],%g4
387 inc 16,%o2
388 addxcc %o5,%o4,%o5
389 st %o5,[%o0+8]
390
391 inc 16,%o0
392 addxcc %g3,%g4,%g3
393 st %g3,[%o0-4]
394 addx %g0,0,%g1
395 andcc %o3,-4,%g0
396 bnz,a .L_bn_add_words_loop
397 addcc %g1,-1,%g0
398
399 tst %o3
400 bnz,a .L_bn_add_words_tail
401 ld [%o1],%o4
402.L_bn_add_words_return:
403 retl
404 mov %g1,%o0
405
406.L_bn_add_words_tail:
407 addcc %g1,-1,%g0
408 ld [%o2],%o5
409 addxcc %o5,%o4,%o5
410 addx %g0,0,%g1
411 deccc %o3
412 bz .L_bn_add_words_return
413 st %o5,[%o0]
414
415 ld [%o1+4],%o4
416 addcc %g1,-1,%g0
417 ld [%o2+4],%o5
418 addxcc %o5,%o4,%o5
419 addx %g0,0,%g1
420 deccc %o3
421 bz .L_bn_add_words_return
422 st %o5,[%o0+4]
423
424 ld [%o1+8],%o4
425 addcc %g1,-1,%g0
426 ld [%o2+8],%o5
427 addxcc %o5,%o4,%o5
428 st %o5,[%o0+8]
429 retl
430 addx %g0,0,%o0
431
432.type bn_add_words,#function
433.size bn_add_words,(.-bn_add_words)
434
435.align 32
436
437.global bn_sub_words
438/*
439 * BN_ULONG bn_sub_words(rp,ap,bp,n)
440 * BN_ULONG *rp,*ap,*bp;
441 * int n;
442 */
443bn_sub_words:
444 cmp %o3,0
445 bg,a .L_bn_sub_words_proceed
446 ld [%o1],%o4
447 retl
448 clr %o0
449
450.L_bn_sub_words_proceed:
451 andcc %o3,-4,%g0
452 bz .L_bn_sub_words_tail
453 clr %g1
454 ba .L_bn_sub_words_warm_loop
455 addcc %g0,0,%g0 ! clear carry flag
456
457.L_bn_sub_words_loop:
458 ld [%o1],%o4
459.L_bn_sub_words_warm_loop:
460 ld [%o2],%o5
461 ld [%o1+4],%g3
462 ld [%o2+4],%g4
463 dec 4,%o3
464 subxcc %o4,%o5,%o5
465 st %o5,[%o0]
466
467 ld [%o1+8],%o4
468 ld [%o2+8],%o5
469 inc 16,%o1
470 subxcc %g3,%g4,%g4
471 st %g4,[%o0+4]
472
473 ld [%o1-4],%g3
474 ld [%o2+12],%g4
475 inc 16,%o2
476 subxcc %o4,%o5,%o5
477 st %o5,[%o0+8]
478
479 inc 16,%o0
480 subxcc %g3,%g4,%g4
481 st %g4,[%o0-4]
482 addx %g0,0,%g1
483 andcc %o3,-4,%g0
484 bnz,a .L_bn_sub_words_loop
485 addcc %g1,-1,%g0
486
487 tst %o3
488 nop
489 bnz,a .L_bn_sub_words_tail
490 ld [%o1],%o4
491.L_bn_sub_words_return:
492 retl
493 mov %g1,%o0
494
495.L_bn_sub_words_tail:
496 addcc %g1,-1,%g0
497 ld [%o2],%o5
498 subxcc %o4,%o5,%o5
499 addx %g0,0,%g1
500 deccc %o3
501 bz .L_bn_sub_words_return
502 st %o5,[%o0]
503 nop
504
505 ld [%o1+4],%o4
506 addcc %g1,-1,%g0
507 ld [%o2+4],%o5
508 subxcc %o4,%o5,%o5
509 addx %g0,0,%g1
510 deccc %o3
511 bz .L_bn_sub_words_return
512 st %o5,[%o0+4]
513
514 ld [%o1+8],%o4
515 addcc %g1,-1,%g0
516 ld [%o2+8],%o5
517 subxcc %o4,%o5,%o5
518 st %o5,[%o0+8]
519 retl
520 addx %g0,0,%o0
521
522.type bn_sub_words,#function
523.size bn_sub_words,(.-bn_sub_words)
524
525#define FRAME_SIZE -96
526
527/*
528 * Here is register usage map for *all* routines below.
529 */
530#define t_1 %o0
531#define t_2 %o1
532#define c_1 %o2
533#define c_2 %o3
534#define c_3 %o4
535
536#define ap(I) [%i1+4*I]
537#define bp(I) [%i2+4*I]
538#define rp(I) [%i0+4*I]
539
540#define a_0 %l0
541#define a_1 %l1
542#define a_2 %l2
543#define a_3 %l3
544#define a_4 %l4
545#define a_5 %l5
546#define a_6 %l6
547#define a_7 %l7
548
549#define b_0 %i3
550#define b_1 %i4
551#define b_2 %i5
552#define b_3 %o5
553#define b_4 %g1
554#define b_5 %g2
555#define b_6 %g3
556#define b_7 %g4
557
558.align 32
559.global bn_mul_comba8
560/*
561 * void bn_mul_comba8(r,a,b)
562 * BN_ULONG *r,*a,*b;
563 */
564bn_mul_comba8:
565 save %sp,FRAME_SIZE,%sp
566 ld ap(0),a_0
567 ld bp(0),b_0
568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
569 ld bp(1),b_1
570 rd %y,c_2
571 st c_1,rp(0) !r[0]=c1;
572
573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
574 ld ap(1),a_1
575 addcc c_2,t_1,c_2
576 rd %y,t_2
577 addxcc %g0,t_2,c_3 !=
578 addx %g0,%g0,c_1
579 ld ap(2),a_2
580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
581 addcc c_2,t_1,c_2 !=
582 rd %y,t_2
583 addxcc c_3,t_2,c_3
584 st c_2,rp(1) !r[1]=c2;
585 addx c_1,%g0,c_1 !=
586
587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
588 addcc c_3,t_1,c_3
589 rd %y,t_2
590 addxcc c_1,t_2,c_1 !=
591 addx %g0,%g0,c_2
592 ld bp(2),b_2
593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
594 addcc c_3,t_1,c_3 !=
595 rd %y,t_2
596 addxcc c_1,t_2,c_1
597 ld bp(3),b_3
598 addx c_2,%g0,c_2 !=
599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
600 addcc c_3,t_1,c_3
601 rd %y,t_2
602 addxcc c_1,t_2,c_1 !=
603 addx c_2,%g0,c_2
604 st c_3,rp(2) !r[2]=c3;
605
606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
607 addcc c_1,t_1,c_1 !=
608 rd %y,t_2
609 addxcc c_2,t_2,c_2
610 addx %g0,%g0,c_3
611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
612 addcc c_1,t_1,c_1
613 rd %y,t_2
614 addxcc c_2,t_2,c_2
615 addx c_3,%g0,c_3 !=
616 ld ap(3),a_3
617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
618 addcc c_1,t_1,c_1
619 rd %y,t_2 !=
620 addxcc c_2,t_2,c_2
621 addx c_3,%g0,c_3
622 ld ap(4),a_4
623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
624 addcc c_1,t_1,c_1
625 rd %y,t_2
626 addxcc c_2,t_2,c_2
627 addx c_3,%g0,c_3 !=
628 st c_1,rp(3) !r[3]=c1;
629
630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
631 addcc c_2,t_1,c_2
632 rd %y,t_2 !=
633 addxcc c_3,t_2,c_3
634 addx %g0,%g0,c_1
635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
636 addcc c_2,t_1,c_2 !=
637 rd %y,t_2
638 addxcc c_3,t_2,c_3
639 addx c_1,%g0,c_1
640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
641 addcc c_2,t_1,c_2
642 rd %y,t_2
643 addxcc c_3,t_2,c_3
644 addx c_1,%g0,c_1 !=
645 ld bp(4),b_4
646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
647 addcc c_2,t_1,c_2
648 rd %y,t_2 !=
649 addxcc c_3,t_2,c_3
650 addx c_1,%g0,c_1
651 ld bp(5),b_5
652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
653 addcc c_2,t_1,c_2
654 rd %y,t_2
655 addxcc c_3,t_2,c_3
656 addx c_1,%g0,c_1 !=
657 st c_2,rp(4) !r[4]=c2;
658
659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
660 addcc c_3,t_1,c_3
661 rd %y,t_2 !=
662 addxcc c_1,t_2,c_1
663 addx %g0,%g0,c_2
664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
665 addcc c_3,t_1,c_3 !=
666 rd %y,t_2
667 addxcc c_1,t_2,c_1
668 addx c_2,%g0,c_2
669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
670 addcc c_3,t_1,c_3
671 rd %y,t_2
672 addxcc c_1,t_2,c_1
673 addx c_2,%g0,c_2 !=
674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
675 addcc c_3,t_1,c_3
676 rd %y,t_2
677 addxcc c_1,t_2,c_1 !=
678 addx c_2,%g0,c_2
679 ld ap(5),a_5
680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
681 addcc c_3,t_1,c_3 !=
682 rd %y,t_2
683 addxcc c_1,t_2,c_1
684 ld ap(6),a_6
685 addx c_2,%g0,c_2 !=
686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
687 addcc c_3,t_1,c_3
688 rd %y,t_2
689 addxcc c_1,t_2,c_1 !=
690 addx c_2,%g0,c_2
691 st c_3,rp(5) !r[5]=c3;
692
693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
694 addcc c_1,t_1,c_1 !=
695 rd %y,t_2
696 addxcc c_2,t_2,c_2
697 addx %g0,%g0,c_3
698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
699 addcc c_1,t_1,c_1
700 rd %y,t_2
701 addxcc c_2,t_2,c_2
702 addx c_3,%g0,c_3 !=
703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
704 addcc c_1,t_1,c_1
705 rd %y,t_2
706 addxcc c_2,t_2,c_2 !=
707 addx c_3,%g0,c_3
708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
709 addcc c_1,t_1,c_1
710 rd %y,t_2 !=
711 addxcc c_2,t_2,c_2
712 addx c_3,%g0,c_3
713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
714 addcc c_1,t_1,c_1 !=
715 rd %y,t_2
716 addxcc c_2,t_2,c_2
717 ld bp(6),b_6
718 addx c_3,%g0,c_3 !=
719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
720 addcc c_1,t_1,c_1
721 rd %y,t_2
722 addxcc c_2,t_2,c_2 !=
723 addx c_3,%g0,c_3
724 ld bp(7),b_7
725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
726 addcc c_1,t_1,c_1 !=
727 rd %y,t_2
728 addxcc c_2,t_2,c_2
729 st c_1,rp(6) !r[6]=c1;
730 addx c_3,%g0,c_3 !=
731
732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
733 addcc c_2,t_1,c_2
734 rd %y,t_2
735 addxcc c_3,t_2,c_3 !=
736 addx %g0,%g0,c_1
737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
738 addcc c_2,t_1,c_2
739 rd %y,t_2 !=
740 addxcc c_3,t_2,c_3
741 addx c_1,%g0,c_1
742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
743 addcc c_2,t_1,c_2 !=
744 rd %y,t_2
745 addxcc c_3,t_2,c_3
746 addx c_1,%g0,c_1
747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
748 addcc c_2,t_1,c_2
749 rd %y,t_2
750 addxcc c_3,t_2,c_3
751 addx c_1,%g0,c_1 !=
752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
753 addcc c_2,t_1,c_2
754 rd %y,t_2
755 addxcc c_3,t_2,c_3 !=
756 addx c_1,%g0,c_1
757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
758 addcc c_2,t_1,c_2
759 rd %y,t_2 !=
760 addxcc c_3,t_2,c_3
761 addx c_1,%g0,c_1
762 ld ap(7),a_7
763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
764 addcc c_2,t_1,c_2
765 rd %y,t_2
766 addxcc c_3,t_2,c_3
767 addx c_1,%g0,c_1 !=
768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
769 addcc c_2,t_1,c_2
770 rd %y,t_2
771 addxcc c_3,t_2,c_3 !=
772 addx c_1,%g0,c_1
773 st c_2,rp(7) !r[7]=c2;
774
775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
776 addcc c_3,t_1,c_3 !=
777 rd %y,t_2
778 addxcc c_1,t_2,c_1
779 addx %g0,%g0,c_2
780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
781 addcc c_3,t_1,c_3
782 rd %y,t_2
783 addxcc c_1,t_2,c_1
784 addx c_2,%g0,c_2 !=
785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
786 addcc c_3,t_1,c_3
787 rd %y,t_2
788 addxcc c_1,t_2,c_1 !=
789 addx c_2,%g0,c_2
790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
791 addcc c_3,t_1,c_3
792 rd %y,t_2 !=
793 addxcc c_1,t_2,c_1
794 addx c_2,%g0,c_2
795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
796 addcc c_3,t_1,c_3 !=
797 rd %y,t_2
798 addxcc c_1,t_2,c_1
799 addx c_2,%g0,c_2
800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
801 addcc c_3,t_1,c_3
802 rd %y,t_2
803 addxcc c_1,t_2,c_1
804 addx c_2,%g0,c_2 !=
805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
806 addcc c_3,t_1,c_3
807 rd %y,t_2
808 addxcc c_1,t_2,c_1 !
809 addx c_2,%g0,c_2
810 st c_3,rp(8) !r[8]=c3;
811
812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
813 addcc c_1,t_1,c_1 !=
814 rd %y,t_2
815 addxcc c_2,t_2,c_2
816 addx %g0,%g0,c_3
817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
818 addcc c_1,t_1,c_1
819 rd %y,t_2
820 addxcc c_2,t_2,c_2
821 addx c_3,%g0,c_3 !=
822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
823 addcc c_1,t_1,c_1
824 rd %y,t_2
825 addxcc c_2,t_2,c_2 !=
826 addx c_3,%g0,c_3
827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
828 addcc c_1,t_1,c_1
829 rd %y,t_2 !=
830 addxcc c_2,t_2,c_2
831 addx c_3,%g0,c_3
832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
833 addcc c_1,t_1,c_1 !=
834 rd %y,t_2
835 addxcc c_2,t_2,c_2
836 addx c_3,%g0,c_3
837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
838 addcc c_1,t_1,c_1
839 rd %y,t_2
840 addxcc c_2,t_2,c_2
841 addx c_3,%g0,c_3 !=
842 st c_1,rp(9) !r[9]=c1;
843
844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
845 addcc c_2,t_1,c_2
846 rd %y,t_2 !=
847 addxcc c_3,t_2,c_3
848 addx %g0,%g0,c_1
849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
850 addcc c_2,t_1,c_2 !=
851 rd %y,t_2
852 addxcc c_3,t_2,c_3
853 addx c_1,%g0,c_1
854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
855 addcc c_2,t_1,c_2
856 rd %y,t_2
857 addxcc c_3,t_2,c_3
858 addx c_1,%g0,c_1 !=
859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
860 addcc c_2,t_1,c_2
861 rd %y,t_2
862 addxcc c_3,t_2,c_3 !=
863 addx c_1,%g0,c_1
864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
865 addcc c_2,t_1,c_2
866 rd %y,t_2 !=
867 addxcc c_3,t_2,c_3
868 addx c_1,%g0,c_1
869 st c_2,rp(10) !r[10]=c2;
870
871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
872 addcc c_3,t_1,c_3
873 rd %y,t_2
874 addxcc c_1,t_2,c_1
875 addx %g0,%g0,c_2 !=
876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
877 addcc c_3,t_1,c_3
878 rd %y,t_2
879 addxcc c_1,t_2,c_1 !=
880 addx c_2,%g0,c_2
881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
882 addcc c_3,t_1,c_3
883 rd %y,t_2 !=
884 addxcc c_1,t_2,c_1
885 addx c_2,%g0,c_2
886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
887 addcc c_3,t_1,c_3 !=
888 rd %y,t_2
889 addxcc c_1,t_2,c_1
890 st c_3,rp(11) !r[11]=c3;
891 addx c_2,%g0,c_2 !=
892
893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
894 addcc c_1,t_1,c_1
895 rd %y,t_2
896 addxcc c_2,t_2,c_2 !=
897 addx %g0,%g0,c_3
898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
899 addcc c_1,t_1,c_1
900 rd %y,t_2 !=
901 addxcc c_2,t_2,c_2
902 addx c_3,%g0,c_3
903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
904 addcc c_1,t_1,c_1 !=
905 rd %y,t_2
906 addxcc c_2,t_2,c_2
907 st c_1,rp(12) !r[12]=c1;
908 addx c_3,%g0,c_3 !=
909
910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
911 addcc c_2,t_1,c_2
912 rd %y,t_2
913 addxcc c_3,t_2,c_3 !=
914 addx %g0,%g0,c_1
915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
916 addcc c_2,t_1,c_2
917 rd %y,t_2 !=
918 addxcc c_3,t_2,c_3
919 addx c_1,%g0,c_1
920 st c_2,rp(13) !r[13]=c2;
921
922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
923 addcc c_3,t_1,c_3
924 rd %y,t_2
925 addxcc c_1,t_2,c_1
926 nop !=
927 st c_3,rp(14) !r[14]=c3;
928 st c_1,rp(15) !r[15]=c1;
929
930 ret
931 restore %g0,%g0,%o0
932
933.type bn_mul_comba8,#function
934.size bn_mul_comba8,(.-bn_mul_comba8)
935
936.align 32
937
938.global bn_mul_comba4
939/*
940 * void bn_mul_comba4(r,a,b)
941 * BN_ULONG *r,*a,*b;
942 */
943bn_mul_comba4:
944 save %sp,FRAME_SIZE,%sp
945 ld ap(0),a_0
946 ld bp(0),b_0
947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
948 ld bp(1),b_1
949 rd %y,c_2
950 st c_1,rp(0) !r[0]=c1;
951
952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
953 ld ap(1),a_1
954 addcc c_2,t_1,c_2
955 rd %y,t_2 !=
956 addxcc %g0,t_2,c_3
957 addx %g0,%g0,c_1
958 ld ap(2),a_2
959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
960 addcc c_2,t_1,c_2
961 rd %y,t_2
962 addxcc c_3,t_2,c_3
963 addx c_1,%g0,c_1 !=
964 st c_2,rp(1) !r[1]=c2;
965
966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
967 addcc c_3,t_1,c_3
968 rd %y,t_2 !=
969 addxcc c_1,t_2,c_1
970 addx %g0,%g0,c_2
971 ld bp(2),b_2
972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
973 addcc c_3,t_1,c_3
974 rd %y,t_2
975 addxcc c_1,t_2,c_1
976 addx c_2,%g0,c_2 !=
977 ld bp(3),b_3
978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
979 addcc c_3,t_1,c_3
980 rd %y,t_2 !=
981 addxcc c_1,t_2,c_1
982 addx c_2,%g0,c_2
983 st c_3,rp(2) !r[2]=c3;
984
985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
986 addcc c_1,t_1,c_1
987 rd %y,t_2
988 addxcc c_2,t_2,c_2
989 addx %g0,%g0,c_3 !=
990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
991 addcc c_1,t_1,c_1
992 rd %y,t_2
993 addxcc c_2,t_2,c_2 !=
994 addx c_3,%g0,c_3
995 ld ap(3),a_3
996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
997 addcc c_1,t_1,c_1 !=
998 rd %y,t_2
999 addxcc c_2,t_2,c_2
1000 addx c_3,%g0,c_3
1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1002 addcc c_1,t_1,c_1
1003 rd %y,t_2
1004 addxcc c_2,t_2,c_2
1005 addx c_3,%g0,c_3 !=
1006 st c_1,rp(3) !r[3]=c1;
1007
1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1009 addcc c_2,t_1,c_2
1010 rd %y,t_2 !=
1011 addxcc c_3,t_2,c_3
1012 addx %g0,%g0,c_1
1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1014 addcc c_2,t_1,c_2 !=
1015 rd %y,t_2
1016 addxcc c_3,t_2,c_3
1017 addx c_1,%g0,c_1
1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1019 addcc c_2,t_1,c_2
1020 rd %y,t_2
1021 addxcc c_3,t_2,c_3
1022 addx c_1,%g0,c_1 !=
1023 st c_2,rp(4) !r[4]=c2;
1024
1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1026 addcc c_3,t_1,c_3
1027 rd %y,t_2 !=
1028 addxcc c_1,t_2,c_1
1029 addx %g0,%g0,c_2
1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1031 addcc c_3,t_1,c_3 !=
1032 rd %y,t_2
1033 addxcc c_1,t_2,c_1
1034 st c_3,rp(5) !r[5]=c3;
1035 addx c_2,%g0,c_2 !=
1036
1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1038 addcc c_1,t_1,c_1
1039 rd %y,t_2
1040 addxcc c_2,t_2,c_2 !=
1041 st c_1,rp(6) !r[6]=c1;
1042 st c_2,rp(7) !r[7]=c2;
1043
1044 ret
1045 restore %g0,%g0,%o0
1046
1047.type bn_mul_comba4,#function
1048.size bn_mul_comba4,(.-bn_mul_comba4)
1049
1050.align 32
1051
1052.global bn_sqr_comba8
1053bn_sqr_comba8:
1054 save %sp,FRAME_SIZE,%sp
1055 ld ap(0),a_0
1056 ld ap(1),a_1
1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1058 rd %y,c_2
1059 st c_1,rp(0) !r[0]=c1;
1060
1061 ld ap(2),a_2
1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1063 addcc c_2,t_1,c_2
1064 rd %y,t_2
1065 addxcc %g0,t_2,c_3
1066 addx %g0,%g0,c_1 !=
1067 addcc c_2,t_1,c_2
1068 addxcc c_3,t_2,c_3
1069 st c_2,rp(1) !r[1]=c2;
1070 addx c_1,%g0,c_1 !=
1071
1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1073 addcc c_3,t_1,c_3
1074 rd %y,t_2
1075 addxcc c_1,t_2,c_1 !=
1076 addx %g0,%g0,c_2
1077 addcc c_3,t_1,c_3
1078 addxcc c_1,t_2,c_1
1079 addx c_2,%g0,c_2 !=
1080 ld ap(3),a_3
1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1082 addcc c_3,t_1,c_3
1083 rd %y,t_2 !=
1084 addxcc c_1,t_2,c_1
1085 addx c_2,%g0,c_2
1086 st c_3,rp(2) !r[2]=c3;
1087
1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1089 addcc c_1,t_1,c_1
1090 rd %y,t_2
1091 addxcc c_2,t_2,c_2
1092 addx %g0,%g0,c_3 !=
1093 addcc c_1,t_1,c_1
1094 addxcc c_2,t_2,c_2
1095 ld ap(4),a_4
1096 addx c_3,%g0,c_3 !=
1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1098 addcc c_1,t_1,c_1
1099 rd %y,t_2
1100 addxcc c_2,t_2,c_2 !=
1101 addx c_3,%g0,c_3
1102 addcc c_1,t_1,c_1
1103 addxcc c_2,t_2,c_2
1104 addx c_3,%g0,c_3 !=
1105 st c_1,rp(3) !r[3]=c1;
1106
1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1108 addcc c_2,t_1,c_2
1109 rd %y,t_2 !=
1110 addxcc c_3,t_2,c_3
1111 addx %g0,%g0,c_1
1112 addcc c_2,t_1,c_2
1113 addxcc c_3,t_2,c_3 !=
1114 addx c_1,%g0,c_1
1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1116 addcc c_2,t_1,c_2
1117 rd %y,t_2 !=
1118 addxcc c_3,t_2,c_3
1119 addx c_1,%g0,c_1
1120 addcc c_2,t_1,c_2
1121 addxcc c_3,t_2,c_3 !=
1122 addx c_1,%g0,c_1
1123 ld ap(5),a_5
1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1125 addcc c_2,t_1,c_2 !=
1126 rd %y,t_2
1127 addxcc c_3,t_2,c_3
1128 st c_2,rp(4) !r[4]=c2;
1129 addx c_1,%g0,c_1 !=
1130
1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1132 addcc c_3,t_1,c_3
1133 rd %y,t_2
1134 addxcc c_1,t_2,c_1 !=
1135 addx %g0,%g0,c_2
1136 addcc c_3,t_1,c_3
1137 addxcc c_1,t_2,c_1
1138 addx c_2,%g0,c_2 !=
1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1140 addcc c_3,t_1,c_3
1141 rd %y,t_2
1142 addxcc c_1,t_2,c_1 !=
1143 addx c_2,%g0,c_2
1144 addcc c_3,t_1,c_3
1145 addxcc c_1,t_2,c_1
1146 addx c_2,%g0,c_2 !=
1147 ld ap(6),a_6
1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1149 addcc c_3,t_1,c_3
1150 rd %y,t_2 !=
1151 addxcc c_1,t_2,c_1
1152 addx c_2,%g0,c_2
1153 addcc c_3,t_1,c_3
1154 addxcc c_1,t_2,c_1 !=
1155 addx c_2,%g0,c_2
1156 st c_3,rp(5) !r[5]=c3;
1157
1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1159 addcc c_1,t_1,c_1 !=
1160 rd %y,t_2
1161 addxcc c_2,t_2,c_2
1162 addx %g0,%g0,c_3
1163 addcc c_1,t_1,c_1 !=
1164 addxcc c_2,t_2,c_2
1165 addx c_3,%g0,c_3
1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1167 addcc c_1,t_1,c_1 !=
1168 rd %y,t_2
1169 addxcc c_2,t_2,c_2
1170 addx c_3,%g0,c_3
1171 addcc c_1,t_1,c_1 !=
1172 addxcc c_2,t_2,c_2
1173 addx c_3,%g0,c_3
1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1175 addcc c_1,t_1,c_1 !=
1176 rd %y,t_2
1177 addxcc c_2,t_2,c_2
1178 addx c_3,%g0,c_3
1179 addcc c_1,t_1,c_1 !=
1180 addxcc c_2,t_2,c_2
1181 addx c_3,%g0,c_3
1182 ld ap(7),a_7
1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1184 addcc c_1,t_1,c_1
1185 rd %y,t_2
1186 addxcc c_2,t_2,c_2
1187 addx c_3,%g0,c_3 !=
1188 st c_1,rp(6) !r[6]=c1;
1189
1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1191 addcc c_2,t_1,c_2
1192 rd %y,t_2 !=
1193 addxcc c_3,t_2,c_3
1194 addx %g0,%g0,c_1
1195 addcc c_2,t_1,c_2
1196 addxcc c_3,t_2,c_3 !=
1197 addx c_1,%g0,c_1
1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1199 addcc c_2,t_1,c_2
1200 rd %y,t_2 !=
1201 addxcc c_3,t_2,c_3
1202 addx c_1,%g0,c_1
1203 addcc c_2,t_1,c_2
1204 addxcc c_3,t_2,c_3 !=
1205 addx c_1,%g0,c_1
1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1207 addcc c_2,t_1,c_2
1208 rd %y,t_2 !=
1209 addxcc c_3,t_2,c_3
1210 addx c_1,%g0,c_1
1211 addcc c_2,t_1,c_2
1212 addxcc c_3,t_2,c_3 !=
1213 addx c_1,%g0,c_1
1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1215 addcc c_2,t_1,c_2
1216 rd %y,t_2 !=
1217 addxcc c_3,t_2,c_3
1218 addx c_1,%g0,c_1
1219 addcc c_2,t_1,c_2
1220 addxcc c_3,t_2,c_3 !=
1221 addx c_1,%g0,c_1
1222 st c_2,rp(7) !r[7]=c2;
1223
1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1225 addcc c_3,t_1,c_3 !=
1226 rd %y,t_2
1227 addxcc c_1,t_2,c_1
1228 addx %g0,%g0,c_2
1229 addcc c_3,t_1,c_3 !=
1230 addxcc c_1,t_2,c_1
1231 addx c_2,%g0,c_2
1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1233 addcc c_3,t_1,c_3 !=
1234 rd %y,t_2
1235 addxcc c_1,t_2,c_1
1236 addx c_2,%g0,c_2
1237 addcc c_3,t_1,c_3 !=
1238 addxcc c_1,t_2,c_1
1239 addx c_2,%g0,c_2
1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1241 addcc c_3,t_1,c_3 !=
1242 rd %y,t_2
1243 addxcc c_1,t_2,c_1
1244 addx c_2,%g0,c_2
1245 addcc c_3,t_1,c_3 !=
1246 addxcc c_1,t_2,c_1
1247 addx c_2,%g0,c_2
1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1249 addcc c_3,t_1,c_3 !=
1250 rd %y,t_2
1251 addxcc c_1,t_2,c_1
1252 st c_3,rp(8) !r[8]=c3;
1253 addx c_2,%g0,c_2 !=
1254
1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1256 addcc c_1,t_1,c_1
1257 rd %y,t_2
1258 addxcc c_2,t_2,c_2 !=
1259 addx %g0,%g0,c_3
1260 addcc c_1,t_1,c_1
1261 addxcc c_2,t_2,c_2
1262 addx c_3,%g0,c_3 !=
1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1264 addcc c_1,t_1,c_1
1265 rd %y,t_2
1266 addxcc c_2,t_2,c_2 !=
1267 addx c_3,%g0,c_3
1268 addcc c_1,t_1,c_1
1269 addxcc c_2,t_2,c_2
1270 addx c_3,%g0,c_3 !=
1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1272 addcc c_1,t_1,c_1
1273 rd %y,t_2
1274 addxcc c_2,t_2,c_2 !=
1275 addx c_3,%g0,c_3
1276 addcc c_1,t_1,c_1
1277 addxcc c_2,t_2,c_2
1278 addx c_3,%g0,c_3 !=
1279 st c_1,rp(9) !r[9]=c1;
1280
1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1282 addcc c_2,t_1,c_2
1283 rd %y,t_2 !=
1284 addxcc c_3,t_2,c_3
1285 addx %g0,%g0,c_1
1286 addcc c_2,t_1,c_2
1287 addxcc c_3,t_2,c_3 !=
1288 addx c_1,%g0,c_1
1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1290 addcc c_2,t_1,c_2
1291 rd %y,t_2 !=
1292 addxcc c_3,t_2,c_3
1293 addx c_1,%g0,c_1
1294 addcc c_2,t_1,c_2
1295 addxcc c_3,t_2,c_3 !=
1296 addx c_1,%g0,c_1
1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1298 addcc c_2,t_1,c_2
1299 rd %y,t_2 !=
1300 addxcc c_3,t_2,c_3
1301 addx c_1,%g0,c_1
1302 st c_2,rp(10) !r[10]=c2;
1303
1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1305 addcc c_3,t_1,c_3
1306 rd %y,t_2
1307 addxcc c_1,t_2,c_1
1308 addx %g0,%g0,c_2 !=
1309 addcc c_3,t_1,c_3
1310 addxcc c_1,t_2,c_1
1311 addx c_2,%g0,c_2
1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1313 addcc c_3,t_1,c_3
1314 rd %y,t_2
1315 addxcc c_1,t_2,c_1
1316 addx c_2,%g0,c_2 !=
1317 addcc c_3,t_1,c_3
1318 addxcc c_1,t_2,c_1
1319 st c_3,rp(11) !r[11]=c3;
1320 addx c_2,%g0,c_2 !=
1321
1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1323 addcc c_1,t_1,c_1
1324 rd %y,t_2
1325 addxcc c_2,t_2,c_2 !=
1326 addx %g0,%g0,c_3
1327 addcc c_1,t_1,c_1
1328 addxcc c_2,t_2,c_2
1329 addx c_3,%g0,c_3 !=
1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1331 addcc c_1,t_1,c_1
1332 rd %y,t_2
1333 addxcc c_2,t_2,c_2 !=
1334 addx c_3,%g0,c_3
1335 st c_1,rp(12) !r[12]=c1;
1336
1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1338 addcc c_2,t_1,c_2 !=
1339 rd %y,t_2
1340 addxcc c_3,t_2,c_3
1341 addx %g0,%g0,c_1
1342 addcc c_2,t_1,c_2 !=
1343 addxcc c_3,t_2,c_3
1344 st c_2,rp(13) !r[13]=c2;
1345 addx c_1,%g0,c_1 !=
1346
1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1348 addcc c_3,t_1,c_3
1349 rd %y,t_2
1350 addxcc c_1,t_2,c_1 !=
1351 st c_3,rp(14) !r[14]=c3;
1352 st c_1,rp(15) !r[15]=c1;
1353
1354 ret
1355 restore %g0,%g0,%o0
1356
1357.type bn_sqr_comba8,#function
1358.size bn_sqr_comba8,(.-bn_sqr_comba8)
1359
1360.align 32
1361
1362.global bn_sqr_comba4
1363/*
1364 * void bn_sqr_comba4(r,a)
1365 * BN_ULONG *r,*a;
1366 */
1367bn_sqr_comba4:
1368 save %sp,FRAME_SIZE,%sp
1369 ld ap(0),a_0
1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1371 ld ap(1),a_1 !=
1372 rd %y,c_2
1373 st c_1,rp(0) !r[0]=c1;
1374
1375 ld ap(2),a_2
1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1377 addcc c_2,t_1,c_2
1378 rd %y,t_2
1379 addxcc %g0,t_2,c_3
1380 addx %g0,%g0,c_1 !=
1381 addcc c_2,t_1,c_2
1382 addxcc c_3,t_2,c_3
1383 addx c_1,%g0,c_1 !=
1384 st c_2,rp(1) !r[1]=c2;
1385
1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1387 addcc c_3,t_1,c_3
1388 rd %y,t_2 !=
1389 addxcc c_1,t_2,c_1
1390 addx %g0,%g0,c_2
1391 addcc c_3,t_1,c_3
1392 addxcc c_1,t_2,c_1 !=
1393 addx c_2,%g0,c_2
1394 ld ap(3),a_3
1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1396 addcc c_3,t_1,c_3 !=
1397 rd %y,t_2
1398 addxcc c_1,t_2,c_1
1399 st c_3,rp(2) !r[2]=c3;
1400 addx c_2,%g0,c_2 !=
1401
1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1403 addcc c_1,t_1,c_1
1404 rd %y,t_2
1405 addxcc c_2,t_2,c_2 !=
1406 addx %g0,%g0,c_3
1407 addcc c_1,t_1,c_1
1408 addxcc c_2,t_2,c_2
1409 addx c_3,%g0,c_3 !=
1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1411 addcc c_1,t_1,c_1
1412 rd %y,t_2
1413 addxcc c_2,t_2,c_2 !=
1414 addx c_3,%g0,c_3
1415 addcc c_1,t_1,c_1
1416 addxcc c_2,t_2,c_2
1417 addx c_3,%g0,c_3 !=
1418 st c_1,rp(3) !r[3]=c1;
1419
1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1421 addcc c_2,t_1,c_2
1422 rd %y,t_2 !=
1423 addxcc c_3,t_2,c_3
1424 addx %g0,%g0,c_1
1425 addcc c_2,t_1,c_2
1426 addxcc c_3,t_2,c_3 !=
1427 addx c_1,%g0,c_1
1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1429 addcc c_2,t_1,c_2
1430 rd %y,t_2 !=
1431 addxcc c_3,t_2,c_3
1432 addx c_1,%g0,c_1
1433 st c_2,rp(4) !r[4]=c2;
1434
1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1436 addcc c_3,t_1,c_3
1437 rd %y,t_2
1438 addxcc c_1,t_2,c_1
1439 addx %g0,%g0,c_2 !=
1440 addcc c_3,t_1,c_3
1441 addxcc c_1,t_2,c_1
1442 st c_3,rp(5) !r[5]=c3;
1443 addx c_2,%g0,c_2 !=
1444
1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1446 addcc c_1,t_1,c_1
1447 rd %y,t_2
1448 addxcc c_2,t_2,c_2 !=
1449 st c_1,rp(6) !r[6]=c1;
1450 st c_2,rp(7) !r[7]=c2;
1451
1452 ret
1453 restore %g0,%g0,%o0
1454
1455.type bn_sqr_comba4,#function
1456.size bn_sqr_comba4,(.-bn_sqr_comba4)
1457
1458.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
new file mode 100644
index 0000000000..8c56e2e7e7
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/sparcv8plus.S
@@ -0,0 +1,1547 @@
1.ident "sparcv8plus.s, Version 1.4"
2.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * Questions-n-answers.
22 *
23 * Q. How to compile?
24 * A. With SC4.x/SC5.x:
25 *
26 * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
27 *
28 * and with gcc:
29 *
30 * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o
31 *
32 * or if above fails (it does if you have gas installed):
33 *
34 * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o
35 *
36 * Quick-n-dirty way to fuse the module into the library.
37 * Provided that the library is already configured and built
38 * (in 0.9.2 case with no-asm option):
39 *
40 * # cd crypto/bn
41 * # cp /some/place/bn_asm.sparc.v8plus.S .
42 * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
43 * # make
44 * # cd ../..
45 * # make; make test
46 *
47 * Quick-n-dirty way to get rid of it:
48 *
49 * # cd crypto/bn
50 * # touch bn_asm.c
51 * # make
52 * # cd ../..
53 * # make; make test
54 *
55 * Q. V8plus achitecture? What kind of beast is that?
56 * A. Well, it's rather a programming model than an architecture...
57 * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
58 * special conditions, namely when kernel doesn't preserve upper
59 * 32 bits of otherwise 64-bit registers during a context switch.
60 *
61 * Q. Why just UltraSPARC? What about SuperSPARC?
62 * A. Original release did target UltraSPARC only. Now SuperSPARC
63 * version is provided along. Both version share bn_*comba[48]
64 * implementations (see comment later in code for explanation).
65 * But what's so special about this UltraSPARC implementation?
66 * Why didn't I let compiler do the job? Trouble is that most of
67 * available compilers (well, SC5.0 is the only exception) don't
68 * attempt to take advantage of UltraSPARC's 64-bitness under
69 * 32-bit kernels even though it's perfectly possible (see next
70 * question).
71 *
72 * Q. 64-bit registers under 32-bit kernels? Didn't you just say it
73 * doesn't work?
74 * A. You can't adress *all* registers as 64-bit wide:-( The catch is
75 * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
76 * preserved if you're in a leaf function, i.e. such never calling
77 * any other functions. All functions in this module are leaf and
78 * 10 registers is a handful. And as a matter of fact none-"comba"
79 * routines don't require even that much and I could even afford to
80 * not allocate own stack frame for 'em:-)
81 *
82 * Q. What about 64-bit kernels?
83 * A. What about 'em? Just kidding:-) Pure 64-bit version is currently
84 * under evaluation and development...
85 *
86 * Q. What about shared libraries?
87 * A. What about 'em? Kidding again:-) Code does *not* contain any
88 * code position dependencies and it's safe to include it into
89 * shared library as is.
90 *
91 * Q. How much faster does it go?
92 * A. Do you have a good benchmark? In either case below is what I
93 * experience with crypto/bn/expspeed.c test program:
94 *
95 * v8plus module on U10/300MHz against bn_asm.c compiled with:
96 *
97 * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12%
98 * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35%
99 * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45%
100 *
101 * v8 module on SS10/60MHz against bn_asm.c compiled with:
102 *
103 * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10%
104 * cc-4.2 -xarch=v8 -xO5 -xdepend +10%
105 * egcs-1.1.2 -mv8 -O3 +35-45%
106 *
107 * As you can see it's damn hard to beat the new Sun C compiler
108 * and it's in first place GNU C users who will appreciate this
109 * assembler implementation:-)
110 */
111
112/*
113 * Revision history.
114 *
115 * 1.0 - initial release;
116 * 1.1 - new loop unrolling model(*);
117 * - some more fine tuning;
118 * 1.2 - made gas friendly;
119 * - updates to documentation concerning v9;
120 * - new performance comparison matrix;
121 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
122 * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient)
123 * resulting in slight overall performance kick;
124 * - some retunes;
125 * - support for GNU as added;
126 *
127 * (*) Originally unrolled loop looked like this:
128 * for (;;) {
129 * op(p+0); if (--n==0) break;
130 * op(p+1); if (--n==0) break;
131 * op(p+2); if (--n==0) break;
132 * op(p+3); if (--n==0) break;
133 * p+=4;
134 * }
135 * I unroll according to following:
136 * while (n&~3) {
137 * op(p+0); op(p+1); op(p+2); op(p+3);
138 * p+=4; n=-4;
139 * }
140 * if (n) {
141 * op(p+0); if (--n==0) return;
142 * op(p+2); if (--n==0) return;
143 * op(p+3); return;
144 * }
145 */
146
147/*
148 * GNU assembler can't stand stuw:-(
149 */
150#define stuw st
151
152.section ".text",#alloc,#execinstr
153.file "bn_asm.sparc.v8plus.S"
154
155.align 32
156
157.global bn_mul_add_words
158/*
159 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
160 * BN_ULONG *rp,*ap;
161 * int num;
162 * BN_ULONG w;
163 */
164bn_mul_add_words:
165 sra %o2,%g0,%o2 ! signx %o2
166 brgz,a %o2,.L_bn_mul_add_words_proceed
167 lduw [%o1],%g2
168 retl
169 clr %o0
170 nop
171 nop
172 nop
173
174.L_bn_mul_add_words_proceed:
175 srl %o3,%g0,%o3 ! clruw %o3
176 andcc %o2,-4,%g0
177 bz,pn %icc,.L_bn_mul_add_words_tail
178 clr %o5
179
180.L_bn_mul_add_words_loop: ! wow! 32 aligned!
181 lduw [%o0],%g1
182 lduw [%o1+4],%g3
183 mulx %o3,%g2,%g2
184 add %g1,%o5,%o4
185 nop
186 add %o4,%g2,%o4
187 stuw %o4,[%o0]
188 srlx %o4,32,%o5
189
190 lduw [%o0+4],%g1
191 lduw [%o1+8],%g2
192 mulx %o3,%g3,%g3
193 add %g1,%o5,%o4
194 dec 4,%o2
195 add %o4,%g3,%o4
196 stuw %o4,[%o0+4]
197 srlx %o4,32,%o5
198
199 lduw [%o0+8],%g1
200 lduw [%o1+12],%g3
201 mulx %o3,%g2,%g2
202 add %g1,%o5,%o4
203 inc 16,%o1
204 add %o4,%g2,%o4
205 stuw %o4,[%o0+8]
206 srlx %o4,32,%o5
207
208 lduw [%o0+12],%g1
209 mulx %o3,%g3,%g3
210 add %g1,%o5,%o4
211 inc 16,%o0
212 add %o4,%g3,%o4
213 andcc %o2,-4,%g0
214 stuw %o4,[%o0-4]
215 srlx %o4,32,%o5
216 bnz,a,pt %icc,.L_bn_mul_add_words_loop
217 lduw [%o1],%g2
218
219 brnz,a,pn %o2,.L_bn_mul_add_words_tail
220 lduw [%o1],%g2
221.L_bn_mul_add_words_return:
222 retl
223 mov %o5,%o0
224
225.L_bn_mul_add_words_tail:
226 lduw [%o0],%g1
227 mulx %o3,%g2,%g2
228 add %g1,%o5,%o4
229 dec %o2
230 add %o4,%g2,%o4
231 srlx %o4,32,%o5
232 brz,pt %o2,.L_bn_mul_add_words_return
233 stuw %o4,[%o0]
234
235 lduw [%o1+4],%g2
236 lduw [%o0+4],%g1
237 mulx %o3,%g2,%g2
238 add %g1,%o5,%o4
239 dec %o2
240 add %o4,%g2,%o4
241 srlx %o4,32,%o5
242 brz,pt %o2,.L_bn_mul_add_words_return
243 stuw %o4,[%o0+4]
244
245 lduw [%o1+8],%g2
246 lduw [%o0+8],%g1
247 mulx %o3,%g2,%g2
248 add %g1,%o5,%o4
249 add %o4,%g2,%o4
250 stuw %o4,[%o0+8]
251 retl
252 srlx %o4,32,%o0
253
254.type bn_mul_add_words,#function
255.size bn_mul_add_words,(.-bn_mul_add_words)
256
257.align 32
258
259.global bn_mul_words
260/*
261 * BN_ULONG bn_mul_words(rp,ap,num,w)
262 * BN_ULONG *rp,*ap;
263 * int num;
264 * BN_ULONG w;
265 */
266bn_mul_words:
267 sra %o2,%g0,%o2 ! signx %o2
268 brgz,a %o2,.L_bn_mul_words_proceeed
269 lduw [%o1],%g2
270 retl
271 clr %o0
272 nop
273 nop
274 nop
275
276.L_bn_mul_words_proceeed:
277 srl %o3,%g0,%o3 ! clruw %o3
278 andcc %o2,-4,%g0
279 bz,pn %icc,.L_bn_mul_words_tail
280 clr %o5
281
282.L_bn_mul_words_loop: ! wow! 32 aligned!
283 lduw [%o1+4],%g3
284 mulx %o3,%g2,%g2
285 add %g2,%o5,%o4
286 nop
287 stuw %o4,[%o0]
288 srlx %o4,32,%o5
289
290 lduw [%o1+8],%g2
291 mulx %o3,%g3,%g3
292 add %g3,%o5,%o4
293 dec 4,%o2
294 stuw %o4,[%o0+4]
295 srlx %o4,32,%o5
296
297 lduw [%o1+12],%g3
298 mulx %o3,%g2,%g2
299 add %g2,%o5,%o4
300 inc 16,%o1
301 stuw %o4,[%o0+8]
302 srlx %o4,32,%o5
303
304 mulx %o3,%g3,%g3
305 add %g3,%o5,%o4
306 inc 16,%o0
307 stuw %o4,[%o0-4]
308 srlx %o4,32,%o5
309 andcc %o2,-4,%g0
310 bnz,a,pt %icc,.L_bn_mul_words_loop
311 lduw [%o1],%g2
312 nop
313 nop
314
315 brnz,a,pn %o2,.L_bn_mul_words_tail
316 lduw [%o1],%g2
317.L_bn_mul_words_return:
318 retl
319 mov %o5,%o0
320
321.L_bn_mul_words_tail:
322 mulx %o3,%g2,%g2
323 add %g2,%o5,%o4
324 dec %o2
325 srlx %o4,32,%o5
326 brz,pt %o2,.L_bn_mul_words_return
327 stuw %o4,[%o0]
328
329 lduw [%o1+4],%g2
330 mulx %o3,%g2,%g2
331 add %g2,%o5,%o4
332 dec %o2
333 srlx %o4,32,%o5
334 brz,pt %o2,.L_bn_mul_words_return
335 stuw %o4,[%o0+4]
336
337 lduw [%o1+8],%g2
338 mulx %o3,%g2,%g2
339 add %g2,%o5,%o4
340 stuw %o4,[%o0+8]
341 retl
342 srlx %o4,32,%o0
343
344.type bn_mul_words,#function
345.size bn_mul_words,(.-bn_mul_words)
346
347.align 32
348.global bn_sqr_words
349/*
350 * void bn_sqr_words(r,a,n)
351 * BN_ULONG *r,*a;
352 * int n;
353 */
354bn_sqr_words:
355 sra %o2,%g0,%o2 ! signx %o2
356 brgz,a %o2,.L_bn_sqr_words_proceeed
357 lduw [%o1],%g2
358 retl
359 clr %o0
360 nop
361 nop
362 nop
363
364.L_bn_sqr_words_proceeed:
365 andcc %o2,-4,%g0
366 nop
367 bz,pn %icc,.L_bn_sqr_words_tail
368 nop
369
370.L_bn_sqr_words_loop: ! wow! 32 aligned!
371 lduw [%o1+4],%g3
372 mulx %g2,%g2,%o4
373 stuw %o4,[%o0]
374 srlx %o4,32,%o5
375 stuw %o5,[%o0+4]
376 nop
377
378 lduw [%o1+8],%g2
379 mulx %g3,%g3,%o4
380 dec 4,%o2
381 stuw %o4,[%o0+8]
382 srlx %o4,32,%o5
383 stuw %o5,[%o0+12]
384
385 lduw [%o1+12],%g3
386 mulx %g2,%g2,%o4
387 srlx %o4,32,%o5
388 stuw %o4,[%o0+16]
389 inc 16,%o1
390 stuw %o5,[%o0+20]
391
392 mulx %g3,%g3,%o4
393 inc 32,%o0
394 stuw %o4,[%o0-8]
395 srlx %o4,32,%o5
396 andcc %o2,-4,%g2
397 stuw %o5,[%o0-4]
398 bnz,a,pt %icc,.L_bn_sqr_words_loop
399 lduw [%o1],%g2
400 nop
401
402 brnz,a,pn %o2,.L_bn_sqr_words_tail
403 lduw [%o1],%g2
404.L_bn_sqr_words_return:
405 retl
406 clr %o0
407
408.L_bn_sqr_words_tail:
409 mulx %g2,%g2,%o4
410 dec %o2
411 stuw %o4,[%o0]
412 srlx %o4,32,%o5
413 brz,pt %o2,.L_bn_sqr_words_return
414 stuw %o5,[%o0+4]
415
416 lduw [%o1+4],%g2
417 mulx %g2,%g2,%o4
418 dec %o2
419 stuw %o4,[%o0+8]
420 srlx %o4,32,%o5
421 brz,pt %o2,.L_bn_sqr_words_return
422 stuw %o5,[%o0+12]
423
424 lduw [%o1+8],%g2
425 mulx %g2,%g2,%o4
426 srlx %o4,32,%o5
427 stuw %o4,[%o0+16]
428 stuw %o5,[%o0+20]
429 retl
430 clr %o0
431
432.type bn_sqr_words,#function
433.size bn_sqr_words,(.-bn_sqr_words)
434
435.align 32
436.global bn_div_words
437/*
438 * BN_ULONG bn_div_words(h,l,d)
439 * BN_ULONG h,l,d;
440 */
441bn_div_words:
442 sllx %o0,32,%o0
443 or %o0,%o1,%o0
444 udivx %o0,%o2,%o0
445 retl
446 srl %o0,%g0,%o0 ! clruw %o0
447
448.type bn_div_words,#function
449.size bn_div_words,(.-bn_div_words)
450
451.align 32
452
453.global bn_add_words
454/*
455 * BN_ULONG bn_add_words(rp,ap,bp,n)
456 * BN_ULONG *rp,*ap,*bp;
457 * int n;
458 */
459bn_add_words:
460 sra %o3,%g0,%o3 ! signx %o3
461 brgz,a %o3,.L_bn_add_words_proceed
462 lduw [%o1],%o4
463 retl
464 clr %o0
465
466.L_bn_add_words_proceed:
467 andcc %o3,-4,%g0
468 bz,pn %icc,.L_bn_add_words_tail
469 addcc %g0,0,%g0 ! clear carry flag
470
471.L_bn_add_words_loop: ! wow! 32 aligned!
472 dec 4,%o3
473 lduw [%o2],%o5
474 lduw [%o1+4],%g1
475 lduw [%o2+4],%g2
476 lduw [%o1+8],%g3
477 lduw [%o2+8],%g4
478 addccc %o5,%o4,%o5
479 stuw %o5,[%o0]
480
481 lduw [%o1+12],%o4
482 lduw [%o2+12],%o5
483 inc 16,%o1
484 addccc %g1,%g2,%g1
485 stuw %g1,[%o0+4]
486
487 inc 16,%o2
488 addccc %g3,%g4,%g3
489 stuw %g3,[%o0+8]
490
491 inc 16,%o0
492 addccc %o5,%o4,%o5
493 stuw %o5,[%o0-4]
494 and %o3,-4,%g1
495 brnz,a,pt %g1,.L_bn_add_words_loop
496 lduw [%o1],%o4
497
498 brnz,a,pn %o3,.L_bn_add_words_tail
499 lduw [%o1],%o4
500.L_bn_add_words_return:
501 clr %o0
502 retl
503 movcs %icc,1,%o0
504 nop
505
506.L_bn_add_words_tail:
507 lduw [%o2],%o5
508 dec %o3
509 addccc %o5,%o4,%o5
510 brz,pt %o3,.L_bn_add_words_return
511 stuw %o5,[%o0]
512
513 lduw [%o1+4],%o4
514 lduw [%o2+4],%o5
515 dec %o3
516 addccc %o5,%o4,%o5
517 brz,pt %o3,.L_bn_add_words_return
518 stuw %o5,[%o0+4]
519
520 lduw [%o1+8],%o4
521 lduw [%o2+8],%o5
522 addccc %o5,%o4,%o5
523 stuw %o5,[%o0+8]
524 clr %o0
525 retl
526 movcs %icc,1,%o0
527
528.type bn_add_words,#function
529.size bn_add_words,(.-bn_add_words)
530
531.global bn_sub_words
532/*
533 * BN_ULONG bn_sub_words(rp,ap,bp,n)
534 * BN_ULONG *rp,*ap,*bp;
535 * int n;
536 */
537bn_sub_words:
538 sra %o3,%g0,%o3 ! signx %o3
539 brgz,a %o3,.L_bn_sub_words_proceed
540 lduw [%o1],%o4
541 retl
542 clr %o0
543
544.L_bn_sub_words_proceed:
545 andcc %o3,-4,%g0
546 bz,pn %icc,.L_bn_sub_words_tail
547 addcc %g0,0,%g0 ! clear carry flag
548
549.L_bn_sub_words_loop: ! wow! 32 aligned!
550 dec 4,%o3
551 lduw [%o2],%o5
552 lduw [%o1+4],%g1
553 lduw [%o2+4],%g2
554 lduw [%o1+8],%g3
555 lduw [%o2+8],%g4
556 subccc %o4,%o5,%o5
557 stuw %o5,[%o0]
558
559 lduw [%o1+12],%o4
560 lduw [%o2+12],%o5
561 inc 16,%o1
562 subccc %g1,%g2,%g2
563 stuw %g2,[%o0+4]
564
565 inc 16,%o2
566 subccc %g3,%g4,%g4
567 stuw %g4,[%o0+8]
568
569 inc 16,%o0
570 subccc %o4,%o5,%o5
571 stuw %o5,[%o0-4]
572 and %o3,-4,%g1
573 brnz,a,pt %g1,.L_bn_sub_words_loop
574 lduw [%o1],%o4
575
576 brnz,a,pn %o3,.L_bn_sub_words_tail
577 lduw [%o1],%o4
578.L_bn_sub_words_return:
579 clr %o0
580 retl
581 movcs %icc,1,%o0
582 nop
583
584.L_bn_sub_words_tail: ! wow! 32 aligned!
585 lduw [%o2],%o5
586 dec %o3
587 subccc %o4,%o5,%o5
588 brz,pt %o3,.L_bn_sub_words_return
589 stuw %o5,[%o0]
590
591 lduw [%o1+4],%o4
592 lduw [%o2+4],%o5
593 dec %o3
594 subccc %o4,%o5,%o5
595 brz,pt %o3,.L_bn_sub_words_return
596 stuw %o5,[%o0+4]
597
598 lduw [%o1+8],%o4
599 lduw [%o2+8],%o5
600 subccc %o4,%o5,%o5
601 stuw %o5,[%o0+8]
602 clr %o0
603 retl
604 movcs %icc,1,%o0
605
606.type bn_sub_words,#function
607.size bn_sub_words,(.-bn_sub_words)
608
609/*
610 * Code below depends on the fact that upper parts of the %l0-%l7
611 * and %i0-%i7 are zeroed by kernel after context switch. In
612 * previous versions this comment stated that "the trouble is that
613 * it's not feasible to implement the mumbo-jumbo in less V9
614 * instructions:-(" which apparently isn't true thanks to
615 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement
616 * results not from the shorter code, but from elimination of
617 * multicycle none-pairable 'rd %y,%rd' instructions.
618 *
619 * Andy.
620 */
621
622#define FRAME_SIZE -96
623
624/*
625 * Here is register usage map for *all* routines below.
626 */
627#define t_1 %o0
628#define t_2 %o1
629#define c_12 %o2
630#define c_3 %o3
631
632#define ap(I) [%i1+4*I]
633#define bp(I) [%i2+4*I]
634#define rp(I) [%i0+4*I]
635
636#define a_0 %l0
637#define a_1 %l1
638#define a_2 %l2
639#define a_3 %l3
640#define a_4 %l4
641#define a_5 %l5
642#define a_6 %l6
643#define a_7 %l7
644
645#define b_0 %i3
646#define b_1 %i4
647#define b_2 %i5
648#define b_3 %o4
649#define b_4 %o5
650#define b_5 %o7
651#define b_6 %g1
652#define b_7 %g4
653
654.align 32
655.global bn_mul_comba8
656/*
657 * void bn_mul_comba8(r,a,b)
658 * BN_ULONG *r,*a,*b;
659 */
660bn_mul_comba8:
661 save %sp,FRAME_SIZE,%sp
662 mov 1,t_2
663 lduw ap(0),a_0
664 sllx t_2,32,t_2
665 lduw bp(0),b_0 !=
666 lduw bp(1),b_1
667 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
668 srlx t_1,32,c_12
669 stuw t_1,rp(0) !=!r[0]=c1;
670
671 lduw ap(1),a_1
672 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
673 addcc c_12,t_1,c_12
674 clr c_3 !=
675 bcs,a %xcc,.+8
676 add c_3,t_2,c_3
677 lduw ap(2),a_2
678 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
679 addcc c_12,t_1,t_1
680 bcs,a %xcc,.+8
681 add c_3,t_2,c_3
682 srlx t_1,32,c_12 !=
683 stuw t_1,rp(1) !r[1]=c2;
684 or c_12,c_3,c_12
685
686 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
687 addcc c_12,t_1,c_12 !=
688 clr c_3
689 bcs,a %xcc,.+8
690 add c_3,t_2,c_3
691 lduw bp(2),b_2 !=
692 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
693 addcc c_12,t_1,c_12
694 bcs,a %xcc,.+8
695 add c_3,t_2,c_3 !=
696 lduw bp(3),b_3
697 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
698 addcc c_12,t_1,t_1
699 bcs,a %xcc,.+8 !=
700 add c_3,t_2,c_3
701 srlx t_1,32,c_12
702 stuw t_1,rp(2) !r[2]=c3;
703 or c_12,c_3,c_12 !=
704
705 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
706 addcc c_12,t_1,c_12
707 clr c_3
708 bcs,a %xcc,.+8 !=
709 add c_3,t_2,c_3
710 mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
711 addcc c_12,t_1,c_12
712 bcs,a %xcc,.+8 !=
713 add c_3,t_2,c_3
714 lduw ap(3),a_3
715 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
716 addcc c_12,t_1,c_12 !=
717 bcs,a %xcc,.+8
718 add c_3,t_2,c_3
719 lduw ap(4),a_4
720 mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!=
721 addcc c_12,t_1,t_1
722 bcs,a %xcc,.+8
723 add c_3,t_2,c_3
724 srlx t_1,32,c_12 !=
725 stuw t_1,rp(3) !r[3]=c1;
726 or c_12,c_3,c_12
727
728 mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
729 addcc c_12,t_1,c_12 !=
730 clr c_3
731 bcs,a %xcc,.+8
732 add c_3,t_2,c_3
733 mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1);
734 addcc c_12,t_1,c_12
735 bcs,a %xcc,.+8
736 add c_3,t_2,c_3
737 mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
738 addcc c_12,t_1,c_12
739 bcs,a %xcc,.+8
740 add c_3,t_2,c_3
741 lduw bp(4),b_4 !=
742 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
743 addcc c_12,t_1,c_12
744 bcs,a %xcc,.+8
745 add c_3,t_2,c_3 !=
746 lduw bp(5),b_5
747 mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1);
748 addcc c_12,t_1,t_1
749 bcs,a %xcc,.+8 !=
750 add c_3,t_2,c_3
751 srlx t_1,32,c_12
752 stuw t_1,rp(4) !r[4]=c2;
753 or c_12,c_3,c_12 !=
754
755 mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
756 addcc c_12,t_1,c_12
757 clr c_3
758 bcs,a %xcc,.+8 !=
759 add c_3,t_2,c_3
760 mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
761 addcc c_12,t_1,c_12
762 bcs,a %xcc,.+8 !=
763 add c_3,t_2,c_3
764 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
765 addcc c_12,t_1,c_12
766 bcs,a %xcc,.+8 !=
767 add c_3,t_2,c_3
768 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
769 addcc c_12,t_1,c_12
770 bcs,a %xcc,.+8 !=
771 add c_3,t_2,c_3
772 lduw ap(5),a_5
773 mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
774 addcc c_12,t_1,c_12 !=
775 bcs,a %xcc,.+8
776 add c_3,t_2,c_3
777 lduw ap(6),a_6
778 mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2);
779 addcc c_12,t_1,t_1
780 bcs,a %xcc,.+8
781 add c_3,t_2,c_3
782 srlx t_1,32,c_12 !=
783 stuw t_1,rp(5) !r[5]=c3;
784 or c_12,c_3,c_12
785
786 mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
787 addcc c_12,t_1,c_12 !=
788 clr c_3
789 bcs,a %xcc,.+8
790 add c_3,t_2,c_3
791 mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
792 addcc c_12,t_1,c_12
793 bcs,a %xcc,.+8
794 add c_3,t_2,c_3
795 mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3);
796 addcc c_12,t_1,c_12
797 bcs,a %xcc,.+8
798 add c_3,t_2,c_3
799 mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3);
800 addcc c_12,t_1,c_12
801 bcs,a %xcc,.+8
802 add c_3,t_2,c_3
803 mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3);
804 addcc c_12,t_1,c_12
805 bcs,a %xcc,.+8
806 add c_3,t_2,c_3
807 lduw bp(6),b_6 !=
808 mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
809 addcc c_12,t_1,c_12
810 bcs,a %xcc,.+8
811 add c_3,t_2,c_3 !=
812 lduw bp(7),b_7
813 mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
814 addcc c_12,t_1,t_1
815 bcs,a %xcc,.+8 !=
816 add c_3,t_2,c_3
817 srlx t_1,32,c_12
818 stuw t_1,rp(6) !r[6]=c1;
819 or c_12,c_3,c_12 !=
820
821 mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
822 addcc c_12,t_1,c_12
823 clr c_3
824 bcs,a %xcc,.+8 !=
825 add c_3,t_2,c_3
826 mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
827 addcc c_12,t_1,c_12
828 bcs,a %xcc,.+8 !=
829 add c_3,t_2,c_3
830 mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
831 addcc c_12,t_1,c_12
832 bcs,a %xcc,.+8 !=
833 add c_3,t_2,c_3
834 mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1);
835 addcc c_12,t_1,c_12
836 bcs,a %xcc,.+8 !=
837 add c_3,t_2,c_3
838 mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
839 addcc c_12,t_1,c_12
840 bcs,a %xcc,.+8 !=
841 add c_3,t_2,c_3
842 mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
843 addcc c_12,t_1,c_12
844 bcs,a %xcc,.+8 !=
845 add c_3,t_2,c_3
846 lduw ap(7),a_7
847 mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
848 addcc c_12,t_1,c_12
849 bcs,a %xcc,.+8
850 add c_3,t_2,c_3
851 mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1);
852 addcc c_12,t_1,t_1
853 bcs,a %xcc,.+8
854 add c_3,t_2,c_3
855 srlx t_1,32,c_12 !=
856 stuw t_1,rp(7) !r[7]=c2;
857 or c_12,c_3,c_12
858
859 mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2);
860 addcc c_12,t_1,c_12
861 clr c_3
862 bcs,a %xcc,.+8
863 add c_3,t_2,c_3 !=
864 mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2);
865 addcc c_12,t_1,c_12
866 bcs,a %xcc,.+8
867 add c_3,t_2,c_3 !=
868 mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
869 addcc c_12,t_1,c_12
870 bcs,a %xcc,.+8
871 add c_3,t_2,c_3 !=
872 mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
873 addcc c_12,t_1,c_12
874 bcs,a %xcc,.+8
875 add c_3,t_2,c_3 !=
876 mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
877 addcc c_12,t_1,c_12
878 bcs,a %xcc,.+8
879 add c_3,t_2,c_3 !=
880 mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2);
881 addcc c_12,t_1,c_12
882 bcs,a %xcc,.+8
883 add c_3,t_2,c_3 !=
884 mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
885 addcc c_12,t_1,t_1
886 bcs,a %xcc,.+8
887 add c_3,t_2,c_3 !=
888 srlx t_1,32,c_12
889 stuw t_1,rp(8) !r[8]=c3;
890 or c_12,c_3,c_12
891
892 mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3);
893 addcc c_12,t_1,c_12
894 clr c_3
895 bcs,a %xcc,.+8
896 add c_3,t_2,c_3 !=
897 mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3);
898 addcc c_12,t_1,c_12
899 bcs,a %xcc,.+8 !=
900 add c_3,t_2,c_3
901 mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
902 addcc c_12,t_1,c_12
903 bcs,a %xcc,.+8 !=
904 add c_3,t_2,c_3
905 mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
906 addcc c_12,t_1,c_12
907 bcs,a %xcc,.+8 !=
908 add c_3,t_2,c_3
909 mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
910 addcc c_12,t_1,c_12
911 bcs,a %xcc,.+8 !=
912 add c_3,t_2,c_3
913 mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3);
914 addcc c_12,t_1,t_1
915 bcs,a %xcc,.+8 !=
916 add c_3,t_2,c_3
917 srlx t_1,32,c_12
918 stuw t_1,rp(9) !r[9]=c1;
919 or c_12,c_3,c_12 !=
920
921 mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
922 addcc c_12,t_1,c_12
923 clr c_3
924 bcs,a %xcc,.+8 !=
925 add c_3,t_2,c_3
926 mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
927 addcc c_12,t_1,c_12
928 bcs,a %xcc,.+8 !=
929 add c_3,t_2,c_3
930 mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1);
931 addcc c_12,t_1,c_12
932 bcs,a %xcc,.+8 !=
933 add c_3,t_2,c_3
934 mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
935 addcc c_12,t_1,c_12
936 bcs,a %xcc,.+8 !=
937 add c_3,t_2,c_3
938 mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
939 addcc c_12,t_1,t_1
940 bcs,a %xcc,.+8 !=
941 add c_3,t_2,c_3
942 srlx t_1,32,c_12
943 stuw t_1,rp(10) !r[10]=c2;
944 or c_12,c_3,c_12 !=
945
946 mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2);
947 addcc c_12,t_1,c_12
948 clr c_3
949 bcs,a %xcc,.+8 !=
950 add c_3,t_2,c_3
951 mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
952 addcc c_12,t_1,c_12
953 bcs,a %xcc,.+8 !=
954 add c_3,t_2,c_3
955 mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
956 addcc c_12,t_1,c_12
957 bcs,a %xcc,.+8 !=
958 add c_3,t_2,c_3
959 mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
960 addcc c_12,t_1,t_1
961 bcs,a %xcc,.+8 !=
962 add c_3,t_2,c_3
963 srlx t_1,32,c_12
964 stuw t_1,rp(11) !r[11]=c3;
965 or c_12,c_3,c_12 !=
966
967 mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
968 addcc c_12,t_1,c_12
969 clr c_3
970 bcs,a %xcc,.+8 !=
971 add c_3,t_2,c_3
972 mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
973 addcc c_12,t_1,c_12
974 bcs,a %xcc,.+8 !=
975 add c_3,t_2,c_3
976 mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
977 addcc c_12,t_1,t_1
978 bcs,a %xcc,.+8 !=
979 add c_3,t_2,c_3
980 srlx t_1,32,c_12
981 stuw t_1,rp(12) !r[12]=c1;
982 or c_12,c_3,c_12 !=
983
984 mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
985 addcc c_12,t_1,c_12
986 clr c_3
987 bcs,a %xcc,.+8 !=
988 add c_3,t_2,c_3
989 mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
990 addcc c_12,t_1,t_1
991 bcs,a %xcc,.+8 !=
992 add c_3,t_2,c_3
993 srlx t_1,32,c_12
994 st t_1,rp(13) !r[13]=c2;
995 or c_12,c_3,c_12 !=
996
997 mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2);
998 addcc c_12,t_1,t_1
999 srlx t_1,32,c_12 !=
1000 stuw t_1,rp(14) !r[14]=c3;
1001 stuw c_12,rp(15) !r[15]=c1;
1002
1003 ret
1004 restore %g0,%g0,%o0 !=
1005
1006.type bn_mul_comba8,#function
1007.size bn_mul_comba8,(.-bn_mul_comba8)
1008
1009.align 32
1010
1011.global bn_mul_comba4
1012/*
1013 * void bn_mul_comba4(r,a,b)
1014 * BN_ULONG *r,*a,*b;
1015 */
1016bn_mul_comba4:
1017 save %sp,FRAME_SIZE,%sp
1018 lduw ap(0),a_0
1019 mov 1,t_2
1020 lduw bp(0),b_0
1021 sllx t_2,32,t_2 !=
1022 lduw bp(1),b_1
1023 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
1024 srlx t_1,32,c_12
1025 stuw t_1,rp(0) !=!r[0]=c1;
1026
1027 lduw ap(1),a_1
1028 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
1029 addcc c_12,t_1,c_12
1030 clr c_3 !=
1031 bcs,a %xcc,.+8
1032 add c_3,t_2,c_3
1033 lduw ap(2),a_2
1034 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
1035 addcc c_12,t_1,t_1
1036 bcs,a %xcc,.+8
1037 add c_3,t_2,c_3
1038 srlx t_1,32,c_12 !=
1039 stuw t_1,rp(1) !r[1]=c2;
1040 or c_12,c_3,c_12
1041
1042 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1043 addcc c_12,t_1,c_12 !=
1044 clr c_3
1045 bcs,a %xcc,.+8
1046 add c_3,t_2,c_3
1047 lduw bp(2),b_2 !=
1048 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
1049 addcc c_12,t_1,c_12
1050 bcs,a %xcc,.+8
1051 add c_3,t_2,c_3 !=
1052 lduw bp(3),b_3
1053 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1054 addcc c_12,t_1,t_1
1055 bcs,a %xcc,.+8 !=
1056 add c_3,t_2,c_3
1057 srlx t_1,32,c_12
1058 stuw t_1,rp(2) !r[2]=c3;
1059 or c_12,c_3,c_12 !=
1060
1061 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
1062 addcc c_12,t_1,c_12
1063 clr c_3
1064 bcs,a %xcc,.+8 !=
1065 add c_3,t_2,c_3
1066 mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1067 addcc c_12,t_1,c_12
1068 bcs,a %xcc,.+8 !=
1069 add c_3,t_2,c_3
1070 lduw ap(3),a_3
1071 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1072 addcc c_12,t_1,c_12 !=
1073 bcs,a %xcc,.+8
1074 add c_3,t_2,c_3
1075 mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
1076 addcc c_12,t_1,t_1 !=
1077 bcs,a %xcc,.+8
1078 add c_3,t_2,c_3
1079 srlx t_1,32,c_12
1080 stuw t_1,rp(3) !=!r[3]=c1;
1081 or c_12,c_3,c_12
1082
1083 mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1084 addcc c_12,t_1,c_12
1085 clr c_3 !=
1086 bcs,a %xcc,.+8
1087 add c_3,t_2,c_3
1088 mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1089 addcc c_12,t_1,c_12 !=
1090 bcs,a %xcc,.+8
1091 add c_3,t_2,c_3
1092 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
1093 addcc c_12,t_1,t_1 !=
1094 bcs,a %xcc,.+8
1095 add c_3,t_2,c_3
1096 srlx t_1,32,c_12
1097 stuw t_1,rp(4) !=!r[4]=c2;
1098 or c_12,c_3,c_12
1099
1100 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1101 addcc c_12,t_1,c_12
1102 clr c_3 !=
1103 bcs,a %xcc,.+8
1104 add c_3,t_2,c_3
1105 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1106 addcc c_12,t_1,t_1 !=
1107 bcs,a %xcc,.+8
1108 add c_3,t_2,c_3
1109 srlx t_1,32,c_12
1110 stuw t_1,rp(5) !=!r[5]=c3;
1111 or c_12,c_3,c_12
1112
1113 mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1114 addcc c_12,t_1,t_1
1115 srlx t_1,32,c_12 !=
1116 stuw t_1,rp(6) !r[6]=c1;
1117 stuw c_12,rp(7) !r[7]=c2;
1118
1119 ret
1120 restore %g0,%g0,%o0
1121
1122.type bn_mul_comba4,#function
1123.size bn_mul_comba4,(.-bn_mul_comba4)
1124
1125.align 32
1126
1127.global bn_sqr_comba8
1128bn_sqr_comba8:
1129 save %sp,FRAME_SIZE,%sp
1130 mov 1,t_2
1131 lduw ap(0),a_0
1132 sllx t_2,32,t_2
1133 lduw ap(1),a_1
1134 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1135 srlx t_1,32,c_12
1136 stuw t_1,rp(0) !r[0]=c1;
1137
1138 lduw ap(2),a_2
1139 mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1140 addcc c_12,t_1,c_12
1141 clr c_3
1142 bcs,a %xcc,.+8
1143 add c_3,t_2,c_3
1144 addcc c_12,t_1,t_1
1145 bcs,a %xcc,.+8
1146 add c_3,t_2,c_3
1147 srlx t_1,32,c_12
1148 stuw t_1,rp(1) !r[1]=c2;
1149 or c_12,c_3,c_12
1150
1151 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1152 addcc c_12,t_1,c_12
1153 clr c_3
1154 bcs,a %xcc,.+8
1155 add c_3,t_2,c_3
1156 addcc c_12,t_1,c_12
1157 bcs,a %xcc,.+8
1158 add c_3,t_2,c_3
1159 lduw ap(3),a_3
1160 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1161 addcc c_12,t_1,t_1
1162 bcs,a %xcc,.+8
1163 add c_3,t_2,c_3
1164 srlx t_1,32,c_12
1165 stuw t_1,rp(2) !r[2]=c3;
1166 or c_12,c_3,c_12
1167
1168 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1169 addcc c_12,t_1,c_12
1170 clr c_3
1171 bcs,a %xcc,.+8
1172 add c_3,t_2,c_3
1173 addcc c_12,t_1,c_12
1174 bcs,a %xcc,.+8
1175 add c_3,t_2,c_3
1176 lduw ap(4),a_4
1177 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1178 addcc c_12,t_1,c_12
1179 bcs,a %xcc,.+8
1180 add c_3,t_2,c_3
1181 addcc c_12,t_1,t_1
1182 bcs,a %xcc,.+8
1183 add c_3,t_2,c_3
1184 srlx t_1,32,c_12
1185 st t_1,rp(3) !r[3]=c1;
1186 or c_12,c_3,c_12
1187
1188 mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1189 addcc c_12,t_1,c_12
1190 clr c_3
1191 bcs,a %xcc,.+8
1192 add c_3,t_2,c_3
1193 addcc c_12,t_1,c_12
1194 bcs,a %xcc,.+8
1195 add c_3,t_2,c_3
1196 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1197 addcc c_12,t_1,c_12
1198 bcs,a %xcc,.+8
1199 add c_3,t_2,c_3
1200 addcc c_12,t_1,c_12
1201 bcs,a %xcc,.+8
1202 add c_3,t_2,c_3
1203 lduw ap(5),a_5
1204 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1205 addcc c_12,t_1,t_1
1206 bcs,a %xcc,.+8
1207 add c_3,t_2,c_3
1208 srlx t_1,32,c_12
1209 stuw t_1,rp(4) !r[4]=c2;
1210 or c_12,c_3,c_12
1211
1212 mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1213 addcc c_12,t_1,c_12
1214 clr c_3
1215 bcs,a %xcc,.+8
1216 add c_3,t_2,c_3
1217 addcc c_12,t_1,c_12
1218 bcs,a %xcc,.+8
1219 add c_3,t_2,c_3
1220 mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1221 addcc c_12,t_1,c_12
1222 bcs,a %xcc,.+8
1223 add c_3,t_2,c_3
1224 addcc c_12,t_1,c_12
1225 bcs,a %xcc,.+8
1226 add c_3,t_2,c_3
1227 lduw ap(6),a_6
1228 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1229 addcc c_12,t_1,c_12
1230 bcs,a %xcc,.+8
1231 add c_3,t_2,c_3
1232 addcc c_12,t_1,t_1
1233 bcs,a %xcc,.+8
1234 add c_3,t_2,c_3
1235 srlx t_1,32,c_12
1236 stuw t_1,rp(5) !r[5]=c3;
1237 or c_12,c_3,c_12
1238
1239 mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1240 addcc c_12,t_1,c_12
1241 clr c_3
1242 bcs,a %xcc,.+8
1243 add c_3,t_2,c_3
1244 addcc c_12,t_1,c_12
1245 bcs,a %xcc,.+8
1246 add c_3,t_2,c_3
1247 mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1248 addcc c_12,t_1,c_12
1249 bcs,a %xcc,.+8
1250 add c_3,t_2,c_3
1251 addcc c_12,t_1,c_12
1252 bcs,a %xcc,.+8
1253 add c_3,t_2,c_3
1254 mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1255 addcc c_12,t_1,c_12
1256 bcs,a %xcc,.+8
1257 add c_3,t_2,c_3
1258 addcc c_12,t_1,c_12
1259 bcs,a %xcc,.+8
1260 add c_3,t_2,c_3
1261 lduw ap(7),a_7
1262 mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1263 addcc c_12,t_1,t_1
1264 bcs,a %xcc,.+8
1265 add c_3,t_2,c_3
1266 srlx t_1,32,c_12
1267 stuw t_1,rp(6) !r[6]=c1;
1268 or c_12,c_3,c_12
1269
1270 mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1271 addcc c_12,t_1,c_12
1272 clr c_3
1273 bcs,a %xcc,.+8
1274 add c_3,t_2,c_3
1275 addcc c_12,t_1,c_12
1276 bcs,a %xcc,.+8
1277 add c_3,t_2,c_3
1278 mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1279 addcc c_12,t_1,c_12
1280 bcs,a %xcc,.+8
1281 add c_3,t_2,c_3
1282 addcc c_12,t_1,c_12
1283 bcs,a %xcc,.+8
1284 add c_3,t_2,c_3
1285 mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1286 addcc c_12,t_1,c_12
1287 bcs,a %xcc,.+8
1288 add c_3,t_2,c_3
1289 addcc c_12,t_1,c_12
1290 bcs,a %xcc,.+8
1291 add c_3,t_2,c_3
1292 mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1293 addcc c_12,t_1,c_12
1294 bcs,a %xcc,.+8
1295 add c_3,t_2,c_3
1296 addcc c_12,t_1,t_1
1297 bcs,a %xcc,.+8
1298 add c_3,t_2,c_3
1299 srlx t_1,32,c_12
1300 stuw t_1,rp(7) !r[7]=c2;
1301 or c_12,c_3,c_12
1302
1303 mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1304 addcc c_12,t_1,c_12
1305 clr c_3
1306 bcs,a %xcc,.+8
1307 add c_3,t_2,c_3
1308 addcc c_12,t_1,c_12
1309 bcs,a %xcc,.+8
1310 add c_3,t_2,c_3
1311 mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1312 addcc c_12,t_1,c_12
1313 bcs,a %xcc,.+8
1314 add c_3,t_2,c_3
1315 addcc c_12,t_1,c_12
1316 bcs,a %xcc,.+8
1317 add c_3,t_2,c_3
1318 mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1319 addcc c_12,t_1,c_12
1320 bcs,a %xcc,.+8
1321 add c_3,t_2,c_3
1322 addcc c_12,t_1,c_12
1323 bcs,a %xcc,.+8
1324 add c_3,t_2,c_3
1325 mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1326 addcc c_12,t_1,t_1
1327 bcs,a %xcc,.+8
1328 add c_3,t_2,c_3
1329 srlx t_1,32,c_12
1330 stuw t_1,rp(8) !r[8]=c3;
1331 or c_12,c_3,c_12
1332
1333 mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1334 addcc c_12,t_1,c_12
1335 clr c_3
1336 bcs,a %xcc,.+8
1337 add c_3,t_2,c_3
1338 addcc c_12,t_1,c_12
1339 bcs,a %xcc,.+8
1340 add c_3,t_2,c_3
1341 mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1342 addcc c_12,t_1,c_12
1343 bcs,a %xcc,.+8
1344 add c_3,t_2,c_3
1345 addcc c_12,t_1,c_12
1346 bcs,a %xcc,.+8
1347 add c_3,t_2,c_3
1348 mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1349 addcc c_12,t_1,c_12
1350 bcs,a %xcc,.+8
1351 add c_3,t_2,c_3
1352 addcc c_12,t_1,t_1
1353 bcs,a %xcc,.+8
1354 add c_3,t_2,c_3
1355 srlx t_1,32,c_12
1356 stuw t_1,rp(9) !r[9]=c1;
1357 or c_12,c_3,c_12
1358
1359 mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1360 addcc c_12,t_1,c_12
1361 clr c_3
1362 bcs,a %xcc,.+8
1363 add c_3,t_2,c_3
1364 addcc c_12,t_1,c_12
1365 bcs,a %xcc,.+8
1366 add c_3,t_2,c_3
1367 mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1368 addcc c_12,t_1,c_12
1369 bcs,a %xcc,.+8
1370 add c_3,t_2,c_3
1371 addcc c_12,t_1,c_12
1372 bcs,a %xcc,.+8
1373 add c_3,t_2,c_3
1374 mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1375 addcc c_12,t_1,t_1
1376 bcs,a %xcc,.+8
1377 add c_3,t_2,c_3
1378 srlx t_1,32,c_12
1379 stuw t_1,rp(10) !r[10]=c2;
1380 or c_12,c_3,c_12
1381
1382 mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2);
1383 addcc c_12,t_1,c_12
1384 clr c_3
1385 bcs,a %xcc,.+8
1386 add c_3,t_2,c_3
1387 addcc c_12,t_1,c_12
1388 bcs,a %xcc,.+8
1389 add c_3,t_2,c_3
1390 mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2);
1391 addcc c_12,t_1,c_12
1392 bcs,a %xcc,.+8
1393 add c_3,t_2,c_3
1394 addcc c_12,t_1,t_1
1395 bcs,a %xcc,.+8
1396 add c_3,t_2,c_3
1397 srlx t_1,32,c_12
1398 stuw t_1,rp(11) !r[11]=c3;
1399 or c_12,c_3,c_12
1400
1401 mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1402 addcc c_12,t_1,c_12
1403 clr c_3
1404 bcs,a %xcc,.+8
1405 add c_3,t_2,c_3
1406 addcc c_12,t_1,c_12
1407 bcs,a %xcc,.+8
1408 add c_3,t_2,c_3
1409 mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1410 addcc c_12,t_1,t_1
1411 bcs,a %xcc,.+8
1412 add c_3,t_2,c_3
1413 srlx t_1,32,c_12
1414 stuw t_1,rp(12) !r[12]=c1;
1415 or c_12,c_3,c_12
1416
1417 mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1418 addcc c_12,t_1,c_12
1419 clr c_3
1420 bcs,a %xcc,.+8
1421 add c_3,t_2,c_3
1422 addcc c_12,t_1,t_1
1423 bcs,a %xcc,.+8
1424 add c_3,t_2,c_3
1425 srlx t_1,32,c_12
1426 stuw t_1,rp(13) !r[13]=c2;
1427 or c_12,c_3,c_12
1428
1429 mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1430 addcc c_12,t_1,t_1
1431 srlx t_1,32,c_12
1432 stuw t_1,rp(14) !r[14]=c3;
1433 stuw c_12,rp(15) !r[15]=c1;
1434
1435 ret
1436 restore %g0,%g0,%o0
1437
1438.type bn_sqr_comba8,#function
1439.size bn_sqr_comba8,(.-bn_sqr_comba8)
1440
1441.align 32
1442
1443.global bn_sqr_comba4
1444/*
1445 * void bn_sqr_comba4(r,a)
1446 * BN_ULONG *r,*a;
1447 */
1448bn_sqr_comba4:
1449 save %sp,FRAME_SIZE,%sp
1450 mov 1,t_2
1451 lduw ap(0),a_0
1452 sllx t_2,32,t_2
1453 lduw ap(1),a_1
1454 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1455 srlx t_1,32,c_12
1456 stuw t_1,rp(0) !r[0]=c1;
1457
1458 lduw ap(2),a_2
1459 mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1);
1460 addcc c_12,t_1,c_12
1461 clr c_3
1462 bcs,a %xcc,.+8
1463 add c_3,t_2,c_3
1464 addcc c_12,t_1,t_1
1465 bcs,a %xcc,.+8
1466 add c_3,t_2,c_3
1467 srlx t_1,32,c_12
1468 stuw t_1,rp(1) !r[1]=c2;
1469 or c_12,c_3,c_12
1470
1471 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1472 addcc c_12,t_1,c_12
1473 clr c_3
1474 bcs,a %xcc,.+8
1475 add c_3,t_2,c_3
1476 addcc c_12,t_1,c_12
1477 bcs,a %xcc,.+8
1478 add c_3,t_2,c_3
1479 lduw ap(3),a_3
1480 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1481 addcc c_12,t_1,t_1
1482 bcs,a %xcc,.+8
1483 add c_3,t_2,c_3
1484 srlx t_1,32,c_12
1485 stuw t_1,rp(2) !r[2]=c3;
1486 or c_12,c_3,c_12
1487
1488 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1489 addcc c_12,t_1,c_12
1490 clr c_3
1491 bcs,a %xcc,.+8
1492 add c_3,t_2,c_3
1493 addcc c_12,t_1,c_12
1494 bcs,a %xcc,.+8
1495 add c_3,t_2,c_3
1496 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1497 addcc c_12,t_1,c_12
1498 bcs,a %xcc,.+8
1499 add c_3,t_2,c_3
1500 addcc c_12,t_1,t_1
1501 bcs,a %xcc,.+8
1502 add c_3,t_2,c_3
1503 srlx t_1,32,c_12
1504 stuw t_1,rp(3) !r[3]=c1;
1505 or c_12,c_3,c_12
1506
1507 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1508 addcc c_12,t_1,c_12
1509 clr c_3
1510 bcs,a %xcc,.+8
1511 add c_3,t_2,c_3
1512 addcc c_12,t_1,c_12
1513 bcs,a %xcc,.+8
1514 add c_3,t_2,c_3
1515 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1516 addcc c_12,t_1,t_1
1517 bcs,a %xcc,.+8
1518 add c_3,t_2,c_3
1519 srlx t_1,32,c_12
1520 stuw t_1,rp(4) !r[4]=c2;
1521 or c_12,c_3,c_12
1522
1523 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1524 addcc c_12,t_1,c_12
1525 clr c_3
1526 bcs,a %xcc,.+8
1527 add c_3,t_2,c_3
1528 addcc c_12,t_1,t_1
1529 bcs,a %xcc,.+8
1530 add c_3,t_2,c_3
1531 srlx t_1,32,c_12
1532 stuw t_1,rp(5) !r[5]=c3;
1533 or c_12,c_3,c_12
1534
1535 mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1536 addcc c_12,t_1,t_1
1537 srlx t_1,32,c_12
1538 stuw t_1,rp(6) !r[6]=c1;
1539 stuw c_12,rp(7) !r[7]=c2;
1540
1541 ret
1542 restore %g0,%g0,%o0
1543
1544.type bn_sqr_comba4,#function
1545.size bn_sqr_comba4,(.-bn_sqr_comba4)
1546
1547.align 32
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar
new file mode 100644
index 0000000000..aefab15cdb
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/vms.mar
@@ -0,0 +1,6440 @@
1 .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64
2;
3; w.j.m. 15-jan-1999
4;
5; it's magic ...
6;
7; ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
8; ULONG c = 0;
9; int i;
10; for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
11; return c;
12; }
13
14r=4 ;(AP)
15a=8 ;(AP)
16n=12 ;(AP) n by value (input)
17w=16 ;(AP) w by value (input)
18
19
20 .psect code,nowrt
21
22.entry bn_mul_add_words,^m<r2,r3,r4,r5,r6>
23
24 moval @r(ap),r2
25 moval @a(ap),r3
26 movl n(ap),r4 ; assumed >0 by C code
27 movl w(ap),r5
28 clrl r6 ; c
29
300$:
31 emul r5,(r3),(r2),r0 ; w, a[], r[] considered signed
32
33 ; fixup for "negative" r[]
34 tstl (r2)
35 bgeq 10$
36 incl r1
3710$:
38
39 ; add in c
40 addl2 r6,r0
41 adwc #0,r1
42
43 ; combined fixup for "negative" w, a[]
44 tstl r5
45 bgeq 20$
46 addl2 (r3),r1
4720$:
48 tstl (r3)
49 bgeq 30$
50 addl2 r5,r1
5130$:
52
53 movl r0,(r2)+ ; store lo result in r[] & advance
54 addl #4,r3 ; advance a[]
55 movl r1,r6 ; store hi result => c
56
57 sobgtr r4,0$
58
59 movl r6,r0 ; return c
60 ret
61
62 .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
63;
64; w.j.m. 15-jan-1999
65;
66; it's magic ...
67;
68; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
69; ULONG c = 0;
70; int i;
71; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
72; return(c);
73; }
74
75r=4 ;(AP)
76a=8 ;(AP)
77n=12 ;(AP) n by value (input)
78w=16 ;(AP) w by value (input)
79
80
81 .psect code,nowrt
82
83.entry bn_mul_words,^m<r2,r3,r4,r5,r6>
84
85 moval @r(ap),r2 ; r2 -> r[]
86 moval @a(ap),r3 ; r3 -> a[]
87 movl n(ap),r4 ; r4 = loop count (assumed >0 by C code)
88 movl w(ap),r5 ; r5 = w
89 clrl r6 ; r6 = c
90
910$:
92 ; <r1,r0> := w * a[] + c
93 emul r5,(r3),r6,r0 ; w, a[], c considered signed
94
95 ; fixup for "negative" c
96 tstl r6 ; c
97 bgeq 10$
98 incl r1
9910$:
100
101 ; combined fixup for "negative" w, a[]
102 tstl r5 ; w
103 bgeq 20$
104 addl2 (r3),r1 ; a[]
10520$:
106 tstl (r3) ; a[]
107 bgeq 30$
108 addl2 r5,r1 ; w
10930$:
110
111 movl r0,(r2)+ ; store lo result in r[] & advance
112 addl #4,r3 ; advance a[]
113 movl r1,r6 ; store hi result => c
114
115 sobgtr r4,0$
116
117 movl r6,r0 ; return c
118 ret
119
120 .title vax_bn_sqr_words unsigned square, 32*32=>64
121;
122; w.j.m. 15-jan-1999
123;
124; it's magic ...
125;
126; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
127; int i;
128; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
129; }
130
131r=4 ;(AP)
132a=8 ;(AP)
133n=12 ;(AP) n by value (input)
134
135
136 .psect code,nowrt
137
138.entry bn_sqr_words,^m<r2,r3,r4,r5>
139
140 moval @r(ap),r2 ; r2 -> r[]
141 moval @a(ap),r3 ; r3 -> a[]
142 movl n(ap),r4 ; r4 = n (assumed >0 by C code)
143
1440$:
145 movl (r3)+,r5 ; r5 = a[] & advance
146
147 ; <r1,r0> := a[] * a[]
148 emul r5,r5,#0,r0 ; a[] considered signed
149
150 ; fixup for "negative" a[]
151 tstl r5 ; a[]
152 bgeq 30$
153 addl2 r5,r1 ; a[]
154 addl2 r5,r1 ; a[]
15530$:
156
157 movl r0,(r2)+ ; store lo result in r[] & advance
158 movl r1,(r2)+ ; store hi result in r[] & advance
159
160 sobgtr r4,0$
161
162 movl #1,r0 ; return SS$_NORMAL
163 ret
164
165 .title vax_bn_div_words unsigned divide
166;
167; Richard Levitte 20-Nov-2000
168;
169; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
170; {
171; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
172; }
173;
174; Using EDIV would be very easy, if it didn't do signed calculations.
175; Any time any of the input numbers are signed, there are problems,
176; usually with integer overflow, at which point it returns useless
177; data (the quotient gets the value of l, and the remainder becomes 0).
178;
179; If it was just for the dividend, it would be very easy, just divide
180; it by 2 (unsigned), do the division, multiply the resulting quotient
181; and remainder by 2, add the bit that was dropped when dividing by 2
182; to the remainder, and do some adjustment so the remainder doesn't
183; end up larger than the divisor. For some cases when the divisor is
184; negative (from EDIV's point of view, i.e. when the highest bit is set),
185; dividing the dividend by 2 isn't enough, and since some operations
186; might generate integer overflows even when the dividend is divided by
187; 4 (when the high part of the shifted down dividend ends up being exactly
188; half of the divisor, the result is the quotient 0x80000000, which is
189; negative...) it needs to be divided by 8. Furthermore, the divisor needs
190; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
191; In this case, a little extra fiddling with the remainder is required.
192;
193; So, the simplest way to handle this is always to divide the dividend
194; by 8, and to divide the divisor by 2 if it's highest bit is set.
195; After EDIV has been used, the quotient gets multiplied by 8 if the
196; original divisor was positive, otherwise 4. The remainder, oddly
197; enough, is *always* multiplied by 8.
198; NOTE: in the case mentioned above, where the high part of the shifted
199; down dividend ends up being exactly half the shifted down divisor, we
200; end up with a 33 bit quotient. That's no problem however, it usually
201; means we have ended up with a too large remainder as well, and the
202; problem is fixed by the last part of the algorithm (next paragraph).
203;
204; The routine ends with comparing the resulting remainder with the
205; original divisor and if the remainder is larger, subtract the
206; original divisor from it, and increase the quotient by 1. This is
207; done until the remainder is smaller than the divisor.
208;
209; The complete algorithm looks like this:
210;
211; d' = d
212; l' = l & 7
213; [h,l] = [h,l] >> 3
214; [q,r] = floor([h,l] / d) # This is the EDIV operation
215; if (q < 0) q = -q # I doubt this is necessary any more
216;
217; r' = r >> 29
218; if (d' >= 0)
219; q' = q >> 29
220; q = q << 3
221; else
222; q' = q >> 30
223; q = q << 2
224; r = (r << 3) + l'
225;
226; if (d' < 0)
227; {
228; [r',r] = [r',r] - q
229; while ([r',r] < 0)
230; {
231; [r',r] = [r',r] + d
232; [q',q] = [q',q] - 1
233; }
234; }
235;
236; while ([r',r] >= d')
237; {
238; [r',r] = [r',r] - d'
239; [q',q] = [q',q] + 1
240; }
241;
242; return q
243
244h=4 ;(AP) h by value (input)
245l=8 ;(AP) l by value (input)
246d=12 ;(AP) d by value (input)
247
248;r2 = l, q
249;r3 = h, r
250;r4 = d
251;r5 = l'
252;r6 = r'
253;r7 = d'
254;r8 = q'
255
256 .psect code,nowrt
257
258.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8>
259 movl l(ap),r2
260 movl h(ap),r3
261 movl d(ap),r4
262
263 bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7
264 bicl3 #^X00000007,r2,r2
265
266 bicl3 #^XFFFFFFF8,r3,r6
267 bicl3 #^X00000007,r3,r3
268
269 addl r6,r2
270
271 rotl #-3,r2,r2 ; l = l >> 3
272 rotl #-3,r3,r3 ; h = h >> 3
273
274 movl r4,r7 ; d' = d
275
276 movl #0,r6 ; r' = 0
277 movl #0,r8 ; q' = 0
278
279 tstl r4
280 beql 666$ ; Uh-oh, the divisor is 0...
281 bgtr 1$
282 rotl #-1,r4,r4 ; If d is negative, shift it right.
283 bicl2 #^X80000000,r4 ; Since d is then a large number, the
284 ; lowest bit is insignificant
285 ; (contradict that, and I'll fix the problem!)
2861$:
287 ediv r4,r2,r2,r3 ; Do the actual division
288
289 tstl r2
290 bgeq 3$
291 mnegl r2,r2 ; if q < 0, negate it
2923$:
293 tstl r7
294 blss 4$
295 rotl #3,r2,r2 ; q = q << 3
296 bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q
297 bicl3 #^X00000007,r2,r2
298 bsb 41$
2994$: ; else
300 rotl #2,r2,r2 ; q = q << 2
301 bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q
302 bicl3 #^X00000003,r2,r2
30341$:
304 rotl #3,r3,r3 ; r = r << 3
305 bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r
306 bicl3 #^X00000007,r3,r3
307 addl r5,r3 ; r = r + l'
308
309 tstl r7
310 bgeq 5$
311 bitl #1,r7
312 beql 5$ ; if d' < 0 && d' & 1
313 subl r2,r3 ; [r',r] = [r',r] - [q',q]
314 sbwc r8,r6
31545$:
316 bgeq 5$ ; while r < 0
317 decl r2 ; [q',q] = [q',q] - 1
318 sbwc #0,r8
319 addl r7,r3 ; [r',r] = [r',r] + d'
320 adwc #0,r6
321 brb 45$
322
323; The return points are placed in the middle to keep a short distance from
324; all the branch points
32542$:
326; movl r3,r1
327 movl r2,r0
328 ret
329666$:
330 movl #^XFFFFFFFF,r0
331 ret
332
3335$:
334 tstl r6
335 bneq 6$
336 cmpl r3,r7
337 blssu 42$ ; while [r',r] >= d'
3386$:
339 subl r7,r3 ; [r',r] = [r',r] - d'
340 sbwc #0,r6
341 incl r2 ; [q',q] = [q',q] + 1
342 adwc #0,r8
343 brb 5$
344
345 .title vax_bn_add_words unsigned add of two arrays
346;
347; Richard Levitte 20-Nov-2000
348;
349; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
350; ULONG c = 0;
351; int i;
352; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
353; return(c);
354; }
355
356r=4 ;(AP) r by reference (output)
357a=8 ;(AP) a by reference (input)
358b=12 ;(AP) b by reference (input)
359n=16 ;(AP) n by value (input)
360
361
362 .psect code,nowrt
363
364.entry bn_add_words,^m<r2,r3,r4,r5,r6>
365
366 moval @r(ap),r2
367 moval @a(ap),r3
368 moval @b(ap),r4
369 movl n(ap),r5 ; assumed >0 by C code
370 clrl r0 ; c
371
372 tstl r5 ; carry = 0
373 bleq 666$
374
3750$:
376 movl (r3)+,r6 ; carry untouched
377 adwc (r4)+,r6 ; carry used and touched
378 movl r6,(r2)+ ; carry untouched
379 sobgtr r5,0$ ; carry untouched
380
381 adwc #0,r0
382666$:
383 ret
384
385 .title vax_bn_sub_words unsigned add of two arrays
386;
387; Richard Levitte 20-Nov-2000
388;
389; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
390; ULONG c = 0;
391; int i;
392; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
393; return(c);
394; }
395
396r=4 ;(AP) r by reference (output)
397a=8 ;(AP) a by reference (input)
398b=12 ;(AP) b by reference (input)
399n=16 ;(AP) n by value (input)
400
401
402 .psect code,nowrt
403
404.entry bn_sub_words,^m<r2,r3,r4,r5,r6>
405
406 moval @r(ap),r2
407 moval @a(ap),r3
408 moval @b(ap),r4
409 movl n(ap),r5 ; assumed >0 by C code
410 clrl r0 ; c
411
412 tstl r5 ; carry = 0
413 bleq 666$
414
4150$:
416 movl (r3)+,r6 ; carry untouched
417 sbwc (r4)+,r6 ; carry used and touched
418 movl r6,(r2)+ ; carry untouched
419 sobgtr r5,0$ ; carry untouched
420
421 adwc #0,r0
422666$:
423 ret
424
425
426;r=4 ;(AP)
427;a=8 ;(AP)
428;b=12 ;(AP)
429;n=16 ;(AP) n by value (input)
430
431 .psect code,nowrt
432
433.entry BN_MUL_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
434 movab -924(sp),sp
435 clrq r8
436
437 clrl r10
438
439 movl 8(ap),r6
440 movzwl 2(r6),r3
441 movl 12(ap),r7
442 bicl3 #-65536,(r7),r2
443 movzwl 2(r7),r0
444 bicl2 #-65536,r0
445 bicl3 #-65536,(r6),-12(fp)
446 bicl3 #-65536,r3,-16(fp)
447 mull3 r0,-12(fp),-4(fp)
448 mull2 r2,-12(fp)
449 mull3 r2,-16(fp),-8(fp)
450 mull2 r0,-16(fp)
451 addl3 -4(fp),-8(fp),r0
452 bicl3 #0,r0,-4(fp)
453 cmpl -4(fp),-8(fp)
454 bgequ noname.45
455 addl2 #65536,-16(fp)
456noname.45:
457 movzwl -2(fp),r0
458 bicl2 #-65536,r0
459 addl2 r0,-16(fp)
460 bicl3 #-65536,-4(fp),r0
461 ashl #16,r0,-8(fp)
462 addl3 -8(fp),-12(fp),r0
463 bicl3 #0,r0,-12(fp)
464 cmpl -12(fp),-8(fp)
465 bgequ noname.46
466 incl -16(fp)
467noname.46:
468 movl -12(fp),r1
469 movl -16(fp),r2
470 addl2 r1,r9
471 bicl2 #0,r9
472 cmpl r9,r1
473 bgequ noname.47
474 incl r2
475noname.47:
476 addl2 r2,r8
477 bicl2 #0,r8
478 cmpl r8,r2
479 bgequ noname.48
480 incl r10
481noname.48:
482
483 movl 4(ap),r11
484 movl r9,(r11)
485
486 clrl r9
487
488 movzwl 2(r6),r2
489 bicl3 #-65536,4(r7),r3
490 movzwl 6(r7),r0
491 bicl2 #-65536,r0
492 bicl3 #-65536,(r6),-28(fp)
493 bicl3 #-65536,r2,-32(fp)
494 mull3 r0,-28(fp),-20(fp)
495 mull2 r3,-28(fp)
496 mull3 r3,-32(fp),-24(fp)
497 mull2 r0,-32(fp)
498 addl3 -20(fp),-24(fp),r0
499 bicl3 #0,r0,-20(fp)
500 cmpl -20(fp),-24(fp)
501 bgequ noname.49
502 addl2 #65536,-32(fp)
503noname.49:
504 movzwl -18(fp),r0
505 bicl2 #-65536,r0
506 addl2 r0,-32(fp)
507 bicl3 #-65536,-20(fp),r0
508 ashl #16,r0,-24(fp)
509 addl3 -24(fp),-28(fp),r0
510 bicl3 #0,r0,-28(fp)
511 cmpl -28(fp),-24(fp)
512 bgequ noname.50
513 incl -32(fp)
514noname.50:
515 movl -28(fp),r1
516 movl -32(fp),r2
517 addl2 r1,r8
518 bicl2 #0,r8
519 cmpl r8,r1
520 bgequ noname.51
521 incl r2
522noname.51:
523 addl2 r2,r10
524 bicl2 #0,r10
525 cmpl r10,r2
526 bgequ noname.52
527 incl r9
528noname.52:
529
530 movzwl 6(r6),r2
531 bicl3 #-65536,(r7),r3
532 movzwl 2(r7),r0
533 bicl2 #-65536,r0
534 bicl3 #-65536,4(r6),-44(fp)
535 bicl3 #-65536,r2,-48(fp)
536 mull3 r0,-44(fp),-36(fp)
537 mull2 r3,-44(fp)
538 mull3 r3,-48(fp),-40(fp)
539 mull2 r0,-48(fp)
540 addl3 -36(fp),-40(fp),r0
541 bicl3 #0,r0,-36(fp)
542 cmpl -36(fp),-40(fp)
543 bgequ noname.53
544 addl2 #65536,-48(fp)
545noname.53:
546 movzwl -34(fp),r0
547 bicl2 #-65536,r0
548 addl2 r0,-48(fp)
549 bicl3 #-65536,-36(fp),r0
550 ashl #16,r0,-40(fp)
551 addl3 -40(fp),-44(fp),r0
552 bicl3 #0,r0,-44(fp)
553 cmpl -44(fp),-40(fp)
554 bgequ noname.54
555 incl -48(fp)
556noname.54:
557 movl -44(fp),r1
558 movl -48(fp),r2
559 addl2 r1,r8
560 bicl2 #0,r8
561 cmpl r8,r1
562 bgequ noname.55
563 incl r2
564noname.55:
565 addl2 r2,r10
566 bicl2 #0,r10
567 cmpl r10,r2
568 bgequ noname.56
569 incl r9
570noname.56:
571
572 movl r8,4(r11)
573
574 clrl r8
575
576 movzwl 10(r6),r2
577 bicl3 #-65536,(r7),r3
578 movzwl 2(r7),r0
579 bicl2 #-65536,r0
580 bicl3 #-65536,8(r6),-60(fp)
581 bicl3 #-65536,r2,-64(fp)
582 mull3 r0,-60(fp),-52(fp)
583 mull2 r3,-60(fp)
584 mull3 r3,-64(fp),-56(fp)
585 mull2 r0,-64(fp)
586 addl3 -52(fp),-56(fp),r0
587 bicl3 #0,r0,-52(fp)
588 cmpl -52(fp),-56(fp)
589 bgequ noname.57
590 addl2 #65536,-64(fp)
591noname.57:
592 movzwl -50(fp),r0
593 bicl2 #-65536,r0
594 addl2 r0,-64(fp)
595 bicl3 #-65536,-52(fp),r0
596 ashl #16,r0,-56(fp)
597 addl3 -56(fp),-60(fp),r0
598 bicl3 #0,r0,-60(fp)
599 cmpl -60(fp),-56(fp)
600 bgequ noname.58
601 incl -64(fp)
602noname.58:
603 movl -60(fp),r1
604 movl -64(fp),r2
605 addl2 r1,r10
606 bicl2 #0,r10
607 cmpl r10,r1
608 bgequ noname.59
609 incl r2
610noname.59:
611 addl2 r2,r9
612 bicl2 #0,r9
613 cmpl r9,r2
614 bgequ noname.60
615 incl r8
616noname.60:
617
618 movzwl 6(r6),r2
619 bicl3 #-65536,4(r7),r3
620 movzwl 6(r7),r0
621 bicl2 #-65536,r0
622 bicl3 #-65536,4(r6),-76(fp)
623 bicl3 #-65536,r2,-80(fp)
624 mull3 r0,-76(fp),-68(fp)
625 mull2 r3,-76(fp)
626 mull3 r3,-80(fp),-72(fp)
627 mull2 r0,-80(fp)
628 addl3 -68(fp),-72(fp),r0
629 bicl3 #0,r0,-68(fp)
630 cmpl -68(fp),-72(fp)
631 bgequ noname.61
632 addl2 #65536,-80(fp)
633noname.61:
634 movzwl -66(fp),r0
635 bicl2 #-65536,r0
636 addl2 r0,-80(fp)
637 bicl3 #-65536,-68(fp),r0
638 ashl #16,r0,-72(fp)
639 addl3 -72(fp),-76(fp),r0
640 bicl3 #0,r0,-76(fp)
641 cmpl -76(fp),-72(fp)
642 bgequ noname.62
643 incl -80(fp)
644noname.62:
645 movl -76(fp),r1
646 movl -80(fp),r2
647 addl2 r1,r10
648 bicl2 #0,r10
649 cmpl r10,r1
650 bgequ noname.63
651 incl r2
652noname.63:
653 addl2 r2,r9
654 bicl2 #0,r9
655 cmpl r9,r2
656 bgequ noname.64
657 incl r8
658noname.64:
659
660 movzwl 2(r6),r2
661 bicl3 #-65536,8(r7),r3
662 movzwl 10(r7),r0
663 bicl2 #-65536,r0
664 bicl3 #-65536,(r6),-92(fp)
665 bicl3 #-65536,r2,-96(fp)
666 mull3 r0,-92(fp),-84(fp)
667 mull2 r3,-92(fp)
668 mull3 r3,-96(fp),-88(fp)
669 mull2 r0,-96(fp)
670 addl3 -84(fp),-88(fp),r0
671 bicl3 #0,r0,-84(fp)
672 cmpl -84(fp),-88(fp)
673 bgequ noname.65
674 addl2 #65536,-96(fp)
675noname.65:
676 movzwl -82(fp),r0
677 bicl2 #-65536,r0
678 addl2 r0,-96(fp)
679 bicl3 #-65536,-84(fp),r0
680 ashl #16,r0,-88(fp)
681 addl3 -88(fp),-92(fp),r0
682 bicl3 #0,r0,-92(fp)
683 cmpl -92(fp),-88(fp)
684 bgequ noname.66
685 incl -96(fp)
686noname.66:
687 movl -92(fp),r1
688 movl -96(fp),r2
689 addl2 r1,r10
690 bicl2 #0,r10
691 cmpl r10,r1
692 bgequ noname.67
693 incl r2
694noname.67:
695 addl2 r2,r9
696 bicl2 #0,r9
697 cmpl r9,r2
698 bgequ noname.68
699 incl r8
700noname.68:
701
702 movl r10,8(r11)
703
704 clrl r10
705
706 movzwl 2(r6),r2
707 bicl3 #-65536,12(r7),r3
708 movzwl 14(r7),r0
709 bicl2 #-65536,r0
710 bicl3 #-65536,(r6),-108(fp)
711 bicl3 #-65536,r2,-112(fp)
712 mull3 r0,-108(fp),-100(fp)
713 mull2 r3,-108(fp)
714 mull3 r3,-112(fp),-104(fp)
715 mull2 r0,-112(fp)
716 addl3 -100(fp),-104(fp),r0
717 bicl3 #0,r0,-100(fp)
718 cmpl -100(fp),-104(fp)
719 bgequ noname.69
720 addl2 #65536,-112(fp)
721noname.69:
722 movzwl -98(fp),r0
723 bicl2 #-65536,r0
724 addl2 r0,-112(fp)
725 bicl3 #-65536,-100(fp),r0
726 ashl #16,r0,-104(fp)
727 addl3 -104(fp),-108(fp),r0
728 bicl3 #0,r0,-108(fp)
729 cmpl -108(fp),-104(fp)
730 bgequ noname.70
731 incl -112(fp)
732noname.70:
733 movl -108(fp),r1
734 movl -112(fp),r2
735 addl2 r1,r9
736 bicl2 #0,r9
737 cmpl r9,r1
738 bgequ noname.71
739 incl r2
740noname.71:
741 addl2 r2,r8
742 bicl2 #0,r8
743 cmpl r8,r2
744 bgequ noname.72
745 incl r10
746noname.72:
747
748 movzwl 6(r6),r2
749 bicl3 #-65536,8(r7),r3
750 movzwl 10(r7),r0
751 bicl2 #-65536,r0
752 bicl3 #-65536,4(r6),-124(fp)
753 bicl3 #-65536,r2,-128(fp)
754 mull3 r0,-124(fp),-116(fp)
755 mull2 r3,-124(fp)
756 mull3 r3,-128(fp),-120(fp)
757 mull2 r0,-128(fp)
758 addl3 -116(fp),-120(fp),r0
759 bicl3 #0,r0,-116(fp)
760 cmpl -116(fp),-120(fp)
761 bgequ noname.73
762 addl2 #65536,-128(fp)
763noname.73:
764 movzwl -114(fp),r0
765 bicl2 #-65536,r0
766 addl2 r0,-128(fp)
767 bicl3 #-65536,-116(fp),r0
768 ashl #16,r0,-120(fp)
769 addl3 -120(fp),-124(fp),r0
770 bicl3 #0,r0,-124(fp)
771 cmpl -124(fp),-120(fp)
772 bgequ noname.74
773 incl -128(fp)
774noname.74:
775 movl -124(fp),r1
776 movl -128(fp),r2
777 addl2 r1,r9
778 bicl2 #0,r9
779 cmpl r9,r1
780 bgequ noname.75
781 incl r2
782noname.75:
783 addl2 r2,r8
784 bicl2 #0,r8
785 cmpl r8,r2
786 bgequ noname.76
787 incl r10
788noname.76:
789
790 movzwl 10(r6),r2
791 bicl3 #-65536,4(r7),r3
792 movzwl 6(r7),r0
793 bicl2 #-65536,r0
794 bicl3 #-65536,8(r6),-140(fp)
795 bicl3 #-65536,r2,-144(fp)
796 mull3 r0,-140(fp),-132(fp)
797 mull2 r3,-140(fp)
798 mull3 r3,-144(fp),-136(fp)
799 mull2 r0,-144(fp)
800 addl3 -132(fp),-136(fp),r0
801 bicl3 #0,r0,-132(fp)
802 cmpl -132(fp),-136(fp)
803 bgequ noname.77
804 addl2 #65536,-144(fp)
805noname.77:
806 movzwl -130(fp),r0
807 bicl2 #-65536,r0
808 addl2 r0,-144(fp)
809 bicl3 #-65536,-132(fp),r0
810 ashl #16,r0,-136(fp)
811 addl3 -136(fp),-140(fp),r0
812 bicl3 #0,r0,-140(fp)
813 cmpl -140(fp),-136(fp)
814 bgequ noname.78
815 incl -144(fp)
816noname.78:
817 movl -140(fp),r1
818 movl -144(fp),r2
819 addl2 r1,r9
820 bicl2 #0,r9
821 cmpl r9,r1
822 bgequ noname.79
823 incl r2
824noname.79:
825 addl2 r2,r8
826 bicl2 #0,r8
827 cmpl r8,r2
828 bgequ noname.80
829 incl r10
830noname.80:
831
832 movzwl 14(r6),r2
833 bicl3 #-65536,(r7),r3
834 movzwl 2(r7),r0
835 bicl2 #-65536,r0
836 bicl3 #-65536,12(r6),-156(fp)
837 bicl3 #-65536,r2,-160(fp)
838 mull3 r0,-156(fp),-148(fp)
839 mull2 r3,-156(fp)
840 mull3 r3,-160(fp),-152(fp)
841 mull2 r0,-160(fp)
842 addl3 -148(fp),-152(fp),r0
843 bicl3 #0,r0,-148(fp)
844 cmpl -148(fp),-152(fp)
845 bgequ noname.81
846 addl2 #65536,-160(fp)
847noname.81:
848 movzwl -146(fp),r0
849 bicl2 #-65536,r0
850 addl2 r0,-160(fp)
851 bicl3 #-65536,-148(fp),r0
852 ashl #16,r0,-152(fp)
853 addl3 -152(fp),-156(fp),r0
854 bicl3 #0,r0,-156(fp)
855 cmpl -156(fp),-152(fp)
856 bgequ noname.82
857 incl -160(fp)
858noname.82:
859 movl -156(fp),r1
860 movl -160(fp),r2
861 addl2 r1,r9
862 bicl2 #0,r9
863 cmpl r9,r1
864 bgequ noname.83
865 incl r2
866noname.83:
867 addl2 r2,r8
868 bicl2 #0,r8
869 cmpl r8,r2
870 bgequ noname.84
871 incl r10
872noname.84:
873
874 movl r9,12(r11)
875
876 clrl r9
877
878 movzwl 18(r6),r2
879 bicl3 #-65536,(r7),r3
880 movzwl 2(r7),r0
881 bicl2 #-65536,r0
882 bicl3 #-65536,16(r6),-172(fp)
883 bicl3 #-65536,r2,-176(fp)
884 mull3 r0,-172(fp),-164(fp)
885 mull2 r3,-172(fp)
886 mull3 r3,-176(fp),-168(fp)
887 mull2 r0,-176(fp)
888 addl3 -164(fp),-168(fp),r0
889 bicl3 #0,r0,-164(fp)
890 cmpl -164(fp),-168(fp)
891 bgequ noname.85
892 addl2 #65536,-176(fp)
893noname.85:
894 movzwl -162(fp),r0
895 bicl2 #-65536,r0
896 addl2 r0,-176(fp)
897 bicl3 #-65536,-164(fp),r0
898 ashl #16,r0,-168(fp)
899 addl3 -168(fp),-172(fp),r0
900 bicl3 #0,r0,-172(fp)
901 cmpl -172(fp),-168(fp)
902 bgequ noname.86
903 incl -176(fp)
904noname.86:
905 movl -172(fp),r1
906 movl -176(fp),r2
907 addl2 r1,r8
908 bicl2 #0,r8
909 cmpl r8,r1
910 bgequ noname.87
911 incl r2
912noname.87:
913 addl2 r2,r10
914 bicl2 #0,r10
915 cmpl r10,r2
916 bgequ noname.88
917 incl r9
918noname.88:
919
920 movzwl 14(r6),r2
921 bicl3 #-65536,4(r7),r3
922 movzwl 6(r7),r0
923 bicl2 #-65536,r0
924 bicl3 #-65536,12(r6),-188(fp)
925 bicl3 #-65536,r2,-192(fp)
926 mull3 r0,-188(fp),-180(fp)
927 mull2 r3,-188(fp)
928 mull3 r3,-192(fp),-184(fp)
929 mull2 r0,-192(fp)
930 addl3 -180(fp),-184(fp),r0
931 bicl3 #0,r0,-180(fp)
932 cmpl -180(fp),-184(fp)
933 bgequ noname.89
934 addl2 #65536,-192(fp)
935noname.89:
936 movzwl -178(fp),r0
937 bicl2 #-65536,r0
938 addl2 r0,-192(fp)
939 bicl3 #-65536,-180(fp),r0
940 ashl #16,r0,-184(fp)
941 addl3 -184(fp),-188(fp),r0
942 bicl3 #0,r0,-188(fp)
943 cmpl -188(fp),-184(fp)
944 bgequ noname.90
945 incl -192(fp)
946noname.90:
947 movl -188(fp),r1
948 movl -192(fp),r2
949 addl2 r1,r8
950 bicl2 #0,r8
951 cmpl r8,r1
952 bgequ noname.91
953 incl r2
954noname.91:
955 addl2 r2,r10
956 bicl2 #0,r10
957 cmpl r10,r2
958 bgequ noname.92
959 incl r9
960noname.92:
961
962 movzwl 10(r6),r2
963 bicl3 #-65536,8(r7),r3
964 movzwl 10(r7),r0
965 bicl2 #-65536,r0
966 bicl3 #-65536,8(r6),-204(fp)
967 bicl3 #-65536,r2,-208(fp)
968 mull3 r0,-204(fp),-196(fp)
969 mull2 r3,-204(fp)
970 mull3 r3,-208(fp),-200(fp)
971 mull2 r0,-208(fp)
972 addl3 -196(fp),-200(fp),r0
973 bicl3 #0,r0,-196(fp)
974 cmpl -196(fp),-200(fp)
975 bgequ noname.93
976 addl2 #65536,-208(fp)
977noname.93:
978 movzwl -194(fp),r0
979 bicl2 #-65536,r0
980 addl2 r0,-208(fp)
981 bicl3 #-65536,-196(fp),r0
982 ashl #16,r0,-200(fp)
983 addl3 -200(fp),-204(fp),r0
984 bicl3 #0,r0,-204(fp)
985 cmpl -204(fp),-200(fp)
986 bgequ noname.94
987 incl -208(fp)
988noname.94:
989 movl -204(fp),r1
990 movl -208(fp),r2
991 addl2 r1,r8
992 bicl2 #0,r8
993 cmpl r8,r1
994 bgequ noname.95
995 incl r2
996noname.95:
997 addl2 r2,r10
998 bicl2 #0,r10
999 cmpl r10,r2
1000 bgequ noname.96
1001 incl r9
1002noname.96:
1003
1004 movzwl 6(r6),r2
1005 bicl3 #-65536,12(r7),r3
1006 movzwl 14(r7),r0
1007 bicl2 #-65536,r0
1008 bicl3 #-65536,4(r6),-220(fp)
1009 bicl3 #-65536,r2,-224(fp)
1010 mull3 r0,-220(fp),-212(fp)
1011 mull2 r3,-220(fp)
1012 mull3 r3,-224(fp),-216(fp)
1013 mull2 r0,-224(fp)
1014 addl3 -212(fp),-216(fp),r0
1015 bicl3 #0,r0,-212(fp)
1016 cmpl -212(fp),-216(fp)
1017 bgequ noname.97
1018 addl2 #65536,-224(fp)
1019noname.97:
1020 movzwl -210(fp),r0
1021 bicl2 #-65536,r0
1022 addl2 r0,-224(fp)
1023 bicl3 #-65536,-212(fp),r0
1024 ashl #16,r0,-216(fp)
1025 addl3 -216(fp),-220(fp),r0
1026 bicl3 #0,r0,-220(fp)
1027 cmpl -220(fp),-216(fp)
1028 bgequ noname.98
1029 incl -224(fp)
1030noname.98:
1031 movl -220(fp),r1
1032 movl -224(fp),r2
1033 addl2 r1,r8
1034 bicl2 #0,r8
1035 cmpl r8,r1
1036 bgequ noname.99
1037 incl r2
1038noname.99:
1039 addl2 r2,r10
1040 bicl2 #0,r10
1041 cmpl r10,r2
1042 bgequ noname.100
1043 incl r9
1044noname.100:
1045
1046 movzwl 2(r6),r2
1047 bicl3 #-65536,16(r7),r3
1048 movzwl 18(r7),r0
1049 bicl2 #-65536,r0
1050 bicl3 #-65536,(r6),-236(fp)
1051 bicl3 #-65536,r2,-240(fp)
1052 mull3 r0,-236(fp),-228(fp)
1053 mull2 r3,-236(fp)
1054 mull3 r3,-240(fp),-232(fp)
1055 mull2 r0,-240(fp)
1056 addl3 -228(fp),-232(fp),r0
1057 bicl3 #0,r0,-228(fp)
1058 cmpl -228(fp),-232(fp)
1059 bgequ noname.101
1060 addl2 #65536,-240(fp)
1061noname.101:
1062 movzwl -226(fp),r0
1063 bicl2 #-65536,r0
1064 addl2 r0,-240(fp)
1065 bicl3 #-65536,-228(fp),r0
1066 ashl #16,r0,-232(fp)
1067 addl3 -232(fp),-236(fp),r0
1068 bicl3 #0,r0,-236(fp)
1069 cmpl -236(fp),-232(fp)
1070 bgequ noname.102
1071 incl -240(fp)
1072noname.102:
1073 movl -236(fp),r1
1074 movl -240(fp),r2
1075 addl2 r1,r8
1076 bicl2 #0,r8
1077 cmpl r8,r1
1078 bgequ noname.103
1079 incl r2
1080noname.103:
1081 addl2 r2,r10
1082 bicl2 #0,r10
1083 cmpl r10,r2
1084 bgequ noname.104
1085 incl r9
1086noname.104:
1087
1088 movl r8,16(r11)
1089
1090 clrl r8
1091
1092 movzwl 2(r6),r2
1093 bicl3 #-65536,20(r7),r3
1094 movzwl 22(r7),r0
1095 bicl2 #-65536,r0
1096 bicl3 #-65536,(r6),-252(fp)
1097 bicl3 #-65536,r2,-256(fp)
1098 mull3 r0,-252(fp),-244(fp)
1099 mull2 r3,-252(fp)
1100 mull3 r3,-256(fp),-248(fp)
1101 mull2 r0,-256(fp)
1102 addl3 -244(fp),-248(fp),r0
1103 bicl3 #0,r0,-244(fp)
1104 cmpl -244(fp),-248(fp)
1105 bgequ noname.105
1106 addl2 #65536,-256(fp)
1107noname.105:
1108 movzwl -242(fp),r0
1109 bicl2 #-65536,r0
1110 addl2 r0,-256(fp)
1111 bicl3 #-65536,-244(fp),r0
1112 ashl #16,r0,-248(fp)
1113 addl3 -248(fp),-252(fp),r0
1114 bicl3 #0,r0,-252(fp)
1115 cmpl -252(fp),-248(fp)
1116 bgequ noname.106
1117 incl -256(fp)
1118noname.106:
1119 movl -252(fp),r1
1120 movl -256(fp),r2
1121 addl2 r1,r10
1122 bicl2 #0,r10
1123 cmpl r10,r1
1124 bgequ noname.107
1125 incl r2
1126noname.107:
1127 addl2 r2,r9
1128 bicl2 #0,r9
1129 cmpl r9,r2
1130 bgequ noname.108
1131 incl r8
1132noname.108:
1133
1134 movzwl 6(r6),r2
1135 bicl3 #-65536,16(r7),r3
1136 movzwl 18(r7),r0
1137 bicl2 #-65536,r0
1138 bicl3 #-65536,4(r6),-268(fp)
1139 bicl3 #-65536,r2,-272(fp)
1140 mull3 r0,-268(fp),-260(fp)
1141 mull2 r3,-268(fp)
1142 mull3 r3,-272(fp),-264(fp)
1143 mull2 r0,-272(fp)
1144 addl3 -260(fp),-264(fp),r0
1145 bicl3 #0,r0,-260(fp)
1146 cmpl -260(fp),-264(fp)
1147 bgequ noname.109
1148 addl2 #65536,-272(fp)
1149noname.109:
1150 movzwl -258(fp),r0
1151 bicl2 #-65536,r0
1152 addl2 r0,-272(fp)
1153 bicl3 #-65536,-260(fp),r0
1154 ashl #16,r0,-264(fp)
1155 addl3 -264(fp),-268(fp),r0
1156 bicl3 #0,r0,-268(fp)
1157 cmpl -268(fp),-264(fp)
1158 bgequ noname.110
1159 incl -272(fp)
1160noname.110:
1161 movl -268(fp),r1
1162 movl -272(fp),r2
1163 addl2 r1,r10
1164 bicl2 #0,r10
1165 cmpl r10,r1
1166 bgequ noname.111
1167 incl r2
1168noname.111:
1169 addl2 r2,r9
1170 bicl2 #0,r9
1171 cmpl r9,r2
1172 bgequ noname.112
1173 incl r8
1174noname.112:
1175
1176 movzwl 10(r6),r2
1177 bicl3 #-65536,12(r7),r3
1178 movzwl 14(r7),r0
1179 bicl2 #-65536,r0
1180 bicl3 #-65536,8(r6),-284(fp)
1181 bicl3 #-65536,r2,-288(fp)
1182 mull3 r0,-284(fp),-276(fp)
1183 mull2 r3,-284(fp)
1184 mull3 r3,-288(fp),-280(fp)
1185 mull2 r0,-288(fp)
1186 addl3 -276(fp),-280(fp),r0
1187 bicl3 #0,r0,-276(fp)
1188 cmpl -276(fp),-280(fp)
1189 bgequ noname.113
1190 addl2 #65536,-288(fp)
1191noname.113:
1192 movzwl -274(fp),r0
1193 bicl2 #-65536,r0
1194 addl2 r0,-288(fp)
1195 bicl3 #-65536,-276(fp),r0
1196 ashl #16,r0,-280(fp)
1197 addl3 -280(fp),-284(fp),r0
1198 bicl3 #0,r0,-284(fp)
1199 cmpl -284(fp),-280(fp)
1200 bgequ noname.114
1201 incl -288(fp)
1202noname.114:
1203 movl -284(fp),r1
1204 movl -288(fp),r2
1205 addl2 r1,r10
1206 bicl2 #0,r10
1207 cmpl r10,r1
1208 bgequ noname.115
1209 incl r2
1210noname.115:
1211 addl2 r2,r9
1212 bicl2 #0,r9
1213 cmpl r9,r2
1214 bgequ noname.116
1215 incl r8
1216noname.116:
1217
1218 movzwl 14(r6),r2
1219 bicl3 #-65536,8(r7),r3
1220 movzwl 10(r7),r0
1221 bicl2 #-65536,r0
1222 bicl3 #-65536,12(r6),-300(fp)
1223 bicl3 #-65536,r2,-304(fp)
1224 mull3 r0,-300(fp),-292(fp)
1225 mull2 r3,-300(fp)
1226 mull3 r3,-304(fp),-296(fp)
1227 mull2 r0,-304(fp)
1228 addl3 -292(fp),-296(fp),r0
1229 bicl3 #0,r0,-292(fp)
1230 cmpl -292(fp),-296(fp)
1231 bgequ noname.117
1232 addl2 #65536,-304(fp)
1233noname.117:
1234 movzwl -290(fp),r0
1235 bicl2 #-65536,r0
1236 addl2 r0,-304(fp)
1237 bicl3 #-65536,-292(fp),r0
1238 ashl #16,r0,-296(fp)
1239 addl3 -296(fp),-300(fp),r0
1240 bicl3 #0,r0,-300(fp)
1241 cmpl -300(fp),-296(fp)
1242 bgequ noname.118
1243 incl -304(fp)
1244noname.118:
1245 movl -300(fp),r1
1246 movl -304(fp),r2
1247 addl2 r1,r10
1248 bicl2 #0,r10
1249 cmpl r10,r1
1250 bgequ noname.119
1251 incl r2
1252noname.119:
1253 addl2 r2,r9
1254 bicl2 #0,r9
1255 cmpl r9,r2
1256 bgequ noname.120
1257 incl r8
1258noname.120:
1259
1260 movzwl 18(r6),r2
1261 bicl3 #-65536,4(r7),r3
1262 movzwl 6(r7),r0
1263 bicl2 #-65536,r0
1264 bicl3 #-65536,16(r6),-316(fp)
1265 bicl3 #-65536,r2,-320(fp)
1266 mull3 r0,-316(fp),-308(fp)
1267 mull2 r3,-316(fp)
1268 mull3 r3,-320(fp),-312(fp)
1269 mull2 r0,-320(fp)
1270 addl3 -308(fp),-312(fp),r0
1271 bicl3 #0,r0,-308(fp)
1272 cmpl -308(fp),-312(fp)
1273 bgequ noname.121
1274 addl2 #65536,-320(fp)
1275noname.121:
1276 movzwl -306(fp),r0
1277 bicl2 #-65536,r0
1278 addl2 r0,-320(fp)
1279 bicl3 #-65536,-308(fp),r0
1280 ashl #16,r0,-312(fp)
1281 addl3 -312(fp),-316(fp),r0
1282 bicl3 #0,r0,-316(fp)
1283 cmpl -316(fp),-312(fp)
1284 bgequ noname.122
1285 incl -320(fp)
1286noname.122:
1287 movl -316(fp),r1
1288 movl -320(fp),r2
1289 addl2 r1,r10
1290 bicl2 #0,r10
1291 cmpl r10,r1
1292 bgequ noname.123
1293 incl r2
1294
1295noname.123:
1296 addl2 r2,r9
1297 bicl2 #0,r9
1298 cmpl r9,r2
1299 bgequ noname.124
1300 incl r8
1301noname.124:
1302
1303 movzwl 22(r6),r2
1304 bicl3 #-65536,(r7),r3
1305 movzwl 2(r7),r0
1306 bicl2 #-65536,r0
1307 bicl3 #-65536,20(r6),-332(fp)
1308 bicl3 #-65536,r2,-336(fp)
1309 mull3 r0,-332(fp),-324(fp)
1310 mull2 r3,-332(fp)
1311 mull3 r3,-336(fp),-328(fp)
1312 mull2 r0,-336(fp)
1313 addl3 -324(fp),-328(fp),r0
1314 bicl3 #0,r0,-324(fp)
1315 cmpl -324(fp),-328(fp)
1316 bgequ noname.125
1317 addl2 #65536,-336(fp)
1318noname.125:
1319 movzwl -322(fp),r0
1320 bicl2 #-65536,r0
1321 addl2 r0,-336(fp)
1322 bicl3 #-65536,-324(fp),r0
1323 ashl #16,r0,-328(fp)
1324 addl3 -328(fp),-332(fp),r0
1325 bicl3 #0,r0,-332(fp)
1326 cmpl -332(fp),-328(fp)
1327 bgequ noname.126
1328 incl -336(fp)
1329noname.126:
1330 movl -332(fp),r1
1331 movl -336(fp),r2
1332 addl2 r1,r10
1333 bicl2 #0,r10
1334 cmpl r10,r1
1335 bgequ noname.127
1336 incl r2
1337noname.127:
1338 addl2 r2,r9
1339 bicl2 #0,r9
1340 cmpl r9,r2
1341 bgequ noname.128
1342 incl r8
1343noname.128:
1344
1345 movl r10,20(r11)
1346
1347 clrl r10
1348
1349 movzwl 26(r6),r2
1350 bicl3 #-65536,(r7),r3
1351 movzwl 2(r7),r0
1352 bicl2 #-65536,r0
1353 bicl3 #-65536,24(r6),-348(fp)
1354 bicl3 #-65536,r2,-352(fp)
1355 mull3 r0,-348(fp),-340(fp)
1356 mull2 r3,-348(fp)
1357 mull3 r3,-352(fp),-344(fp)
1358 mull2 r0,-352(fp)
1359 addl3 -340(fp),-344(fp),r0
1360 bicl3 #0,r0,-340(fp)
1361 cmpl -340(fp),-344(fp)
1362 bgequ noname.129
1363 addl2 #65536,-352(fp)
1364noname.129:
1365 movzwl -338(fp),r0
1366 bicl2 #-65536,r0
1367 addl2 r0,-352(fp)
1368 bicl3 #-65536,-340(fp),r0
1369 ashl #16,r0,-344(fp)
1370 addl3 -344(fp),-348(fp),r0
1371 bicl3 #0,r0,-348(fp)
1372 cmpl -348(fp),-344(fp)
1373 bgequ noname.130
1374 incl -352(fp)
1375noname.130:
1376 movl -348(fp),r1
1377 movl -352(fp),r2
1378 addl2 r1,r9
1379 bicl2 #0,r9
1380 cmpl r9,r1
1381 bgequ noname.131
1382 incl r2
1383noname.131:
1384 addl2 r2,r8
1385 bicl2 #0,r8
1386 cmpl r8,r2
1387 bgequ noname.132
1388 incl r10
1389noname.132:
1390
1391 movzwl 22(r6),r2
1392 bicl3 #-65536,4(r7),r3
1393 movzwl 6(r7),r0
1394 bicl2 #-65536,r0
1395 bicl3 #-65536,20(r6),-364(fp)
1396 bicl3 #-65536,r2,-368(fp)
1397 mull3 r0,-364(fp),-356(fp)
1398 mull2 r3,-364(fp)
1399 mull3 r3,-368(fp),-360(fp)
1400 mull2 r0,-368(fp)
1401 addl3 -356(fp),-360(fp),r0
1402 bicl3 #0,r0,-356(fp)
1403 cmpl -356(fp),-360(fp)
1404 bgequ noname.133
1405 addl2 #65536,-368(fp)
1406noname.133:
1407 movzwl -354(fp),r0
1408 bicl2 #-65536,r0
1409 addl2 r0,-368(fp)
1410 bicl3 #-65536,-356(fp),r0
1411 ashl #16,r0,-360(fp)
1412 addl3 -360(fp),-364(fp),r0
1413 bicl3 #0,r0,-364(fp)
1414 cmpl -364(fp),-360(fp)
1415 bgequ noname.134
1416 incl -368(fp)
1417noname.134:
1418 movl -364(fp),r1
1419 movl -368(fp),r2
1420 addl2 r1,r9
1421 bicl2 #0,r9
1422 cmpl r9,r1
1423 bgequ noname.135
1424 incl r2
1425noname.135:
1426 addl2 r2,r8
1427 bicl2 #0,r8
1428 cmpl r8,r2
1429 bgequ noname.136
1430 incl r10
1431noname.136:
1432
1433 movzwl 18(r6),r2
1434 bicl3 #-65536,8(r7),r3
1435 movzwl 10(r7),r0
1436 bicl2 #-65536,r0
1437 bicl3 #-65536,16(r6),-380(fp)
1438 bicl3 #-65536,r2,-384(fp)
1439 mull3 r0,-380(fp),-372(fp)
1440 mull2 r3,-380(fp)
1441 mull3 r3,-384(fp),-376(fp)
1442 mull2 r0,-384(fp)
1443 addl3 -372(fp),-376(fp),r0
1444 bicl3 #0,r0,-372(fp)
1445 cmpl -372(fp),-376(fp)
1446 bgequ noname.137
1447 addl2 #65536,-384(fp)
1448noname.137:
1449 movzwl -370(fp),r0
1450 bicl2 #-65536,r0
1451 addl2 r0,-384(fp)
1452 bicl3 #-65536,-372(fp),r0
1453 ashl #16,r0,-376(fp)
1454 addl3 -376(fp),-380(fp),r0
1455 bicl3 #0,r0,-380(fp)
1456 cmpl -380(fp),-376(fp)
1457 bgequ noname.138
1458 incl -384(fp)
1459noname.138:
1460 movl -380(fp),r1
1461 movl -384(fp),r2
1462 addl2 r1,r9
1463 bicl2 #0,r9
1464 cmpl r9,r1
1465 bgequ noname.139
1466 incl r2
1467noname.139:
1468 addl2 r2,r8
1469 bicl2 #0,r8
1470 cmpl r8,r2
1471 bgequ noname.140
1472 incl r10
1473noname.140:
1474
1475 movzwl 14(r6),r2
1476 bicl3 #-65536,12(r7),r3
1477 movzwl 14(r7),r0
1478 bicl2 #-65536,r0
1479 bicl3 #-65536,12(r6),-396(fp)
1480 bicl3 #-65536,r2,-400(fp)
1481 mull3 r0,-396(fp),-388(fp)
1482 mull2 r3,-396(fp)
1483 mull3 r3,-400(fp),-392(fp)
1484 mull2 r0,-400(fp)
1485 addl3 -388(fp),-392(fp),r0
1486 bicl3 #0,r0,-388(fp)
1487 cmpl -388(fp),-392(fp)
1488 bgequ noname.141
1489 addl2 #65536,-400(fp)
1490noname.141:
1491 movzwl -386(fp),r0
1492 bicl2 #-65536,r0
1493 addl2 r0,-400(fp)
1494 bicl3 #-65536,-388(fp),r0
1495 ashl #16,r0,-392(fp)
1496 addl3 -392(fp),-396(fp),r0
1497 bicl3 #0,r0,-396(fp)
1498 cmpl -396(fp),-392(fp)
1499 bgequ noname.142
1500 incl -400(fp)
1501noname.142:
1502 movl -396(fp),r1
1503 movl -400(fp),r2
1504 addl2 r1,r9
1505 bicl2 #0,r9
1506 cmpl r9,r1
1507 bgequ noname.143
1508 incl r2
1509noname.143:
1510 addl2 r2,r8
1511 bicl2 #0,r8
1512 cmpl r8,r2
1513 bgequ noname.144
1514 incl r10
1515noname.144:
1516
1517 movzwl 10(r6),r2
1518 bicl3 #-65536,16(r7),r3
1519 movzwl 18(r7),r0
1520 bicl2 #-65536,r0
1521 bicl3 #-65536,8(r6),-412(fp)
1522 bicl3 #-65536,r2,-416(fp)
1523 mull3 r0,-412(fp),-404(fp)
1524 mull2 r3,-412(fp)
1525 mull3 r3,-416(fp),-408(fp)
1526 mull2 r0,-416(fp)
1527 addl3 -404(fp),-408(fp),r0
1528 bicl3 #0,r0,-404(fp)
1529 cmpl -404(fp),-408(fp)
1530 bgequ noname.145
1531 addl2 #65536,-416(fp)
1532noname.145:
1533 movzwl -402(fp),r0
1534 bicl2 #-65536,r0
1535 addl2 r0,-416(fp)
1536 bicl3 #-65536,-404(fp),r0
1537 ashl #16,r0,-408(fp)
1538 addl3 -408(fp),-412(fp),r0
1539 bicl3 #0,r0,-412(fp)
1540 cmpl -412(fp),-408(fp)
1541 bgequ noname.146
1542 incl -416(fp)
1543noname.146:
1544 movl -412(fp),r1
1545 movl -416(fp),r2
1546 addl2 r1,r9
1547 bicl2 #0,r9
1548 cmpl r9,r1
1549 bgequ noname.147
1550 incl r2
1551noname.147:
1552 addl2 r2,r8
1553 bicl2 #0,r8
1554 cmpl r8,r2
1555 bgequ noname.148
1556 incl r10
1557noname.148:
1558
1559 movzwl 6(r6),r2
1560 bicl3 #-65536,20(r7),r3
1561 movzwl 22(r7),r0
1562 bicl2 #-65536,r0
1563 bicl3 #-65536,4(r6),-428(fp)
1564 bicl3 #-65536,r2,-432(fp)
1565 mull3 r0,-428(fp),-420(fp)
1566 mull2 r3,-428(fp)
1567 mull3 r3,-432(fp),-424(fp)
1568 mull2 r0,-432(fp)
1569 addl3 -420(fp),-424(fp),r0
1570 bicl3 #0,r0,-420(fp)
1571 cmpl -420(fp),-424(fp)
1572 bgequ noname.149
1573 addl2 #65536,-432(fp)
1574noname.149:
1575 movzwl -418(fp),r0
1576 bicl2 #-65536,r0
1577 addl2 r0,-432(fp)
1578 bicl3 #-65536,-420(fp),r0
1579 ashl #16,r0,-424(fp)
1580 addl3 -424(fp),-428(fp),r0
1581 bicl3 #0,r0,-428(fp)
1582 cmpl -428(fp),-424(fp)
1583 bgequ noname.150
1584 incl -432(fp)
1585noname.150:
1586 movl -428(fp),r1
1587 movl -432(fp),r2
1588 addl2 r1,r9
1589 bicl2 #0,r9
1590 cmpl r9,r1
1591 bgequ noname.151
1592 incl r2
1593noname.151:
1594 addl2 r2,r8
1595 bicl2 #0,r8
1596 cmpl r8,r2
1597 bgequ noname.152
1598 incl r10
1599noname.152:
1600
1601 movzwl 2(r6),r2
1602 bicl3 #-65536,24(r7),r3
1603 movzwl 26(r7),r0
1604 bicl2 #-65536,r0
1605 bicl3 #-65536,(r6),-444(fp)
1606 bicl3 #-65536,r2,-448(fp)
1607 mull3 r0,-444(fp),-436(fp)
1608 mull2 r3,-444(fp)
1609 mull3 r3,-448(fp),-440(fp)
1610 mull2 r0,-448(fp)
1611 addl3 -436(fp),-440(fp),r0
1612 bicl3 #0,r0,-436(fp)
1613 cmpl -436(fp),-440(fp)
1614 bgequ noname.153
1615 addl2 #65536,-448(fp)
1616noname.153:
1617 movzwl -434(fp),r0
1618 bicl2 #-65536,r0
1619 addl2 r0,-448(fp)
1620 bicl3 #-65536,-436(fp),r0
1621 ashl #16,r0,-440(fp)
1622 addl3 -440(fp),-444(fp),r0
1623 bicl3 #0,r0,-444(fp)
1624 cmpl -444(fp),-440(fp)
1625 bgequ noname.154
1626 incl -448(fp)
1627noname.154:
1628 movl -444(fp),r1
1629 movl -448(fp),r2
1630 addl2 r1,r9
1631 bicl2 #0,r9
1632 cmpl r9,r1
1633 bgequ noname.155
1634 incl r2
1635noname.155:
1636 addl2 r2,r8
1637 bicl2 #0,r8
1638 cmpl r8,r2
1639 bgequ noname.156
1640 incl r10
1641noname.156:
1642
1643 movl r9,24(r11)
1644
1645 clrl r9
1646
1647 movzwl 2(r6),r2
1648 bicl3 #-65536,28(r7),r3
1649 movzwl 30(r7),r0
1650 bicl2 #-65536,r0
1651 bicl3 #-65536,(r6),-460(fp)
1652 bicl3 #-65536,r2,-464(fp)
1653 mull3 r0,-460(fp),-452(fp)
1654 mull2 r3,-460(fp)
1655 mull3 r3,-464(fp),-456(fp)
1656 mull2 r0,-464(fp)
1657 addl3 -452(fp),-456(fp),r0
1658 bicl3 #0,r0,-452(fp)
1659 cmpl -452(fp),-456(fp)
1660 bgequ noname.157
1661 addl2 #65536,-464(fp)
1662noname.157:
1663 movzwl -450(fp),r0
1664 bicl2 #-65536,r0
1665 addl2 r0,-464(fp)
1666 bicl3 #-65536,-452(fp),r0
1667 ashl #16,r0,-456(fp)
1668 addl3 -456(fp),-460(fp),r0
1669 bicl3 #0,r0,-460(fp)
1670 cmpl -460(fp),-456(fp)
1671 bgequ noname.158
1672 incl -464(fp)
1673noname.158:
1674 movl -460(fp),r1
1675 movl -464(fp),r2
1676 addl2 r1,r8
1677 bicl2 #0,r8
1678 cmpl r8,r1
1679 bgequ noname.159
1680 incl r2
1681noname.159:
1682 addl2 r2,r10
1683 bicl2 #0,r10
1684 cmpl r10,r2
1685 bgequ noname.160
1686 incl r9
1687noname.160:
1688
1689 movzwl 6(r6),r2
1690 bicl3 #-65536,24(r7),r3
1691 movzwl 26(r7),r0
1692 bicl2 #-65536,r0
1693 bicl3 #-65536,4(r6),-476(fp)
1694 bicl3 #-65536,r2,-480(fp)
1695 mull3 r0,-476(fp),-468(fp)
1696 mull2 r3,-476(fp)
1697 mull3 r3,-480(fp),-472(fp)
1698 mull2 r0,-480(fp)
1699 addl3 -468(fp),-472(fp),r0
1700 bicl3 #0,r0,-468(fp)
1701 cmpl -468(fp),-472(fp)
1702 bgequ noname.161
1703 addl2 #65536,-480(fp)
1704noname.161:
1705 movzwl -466(fp),r0
1706 bicl2 #-65536,r0
1707 addl2 r0,-480(fp)
1708 bicl3 #-65536,-468(fp),r0
1709 ashl #16,r0,-472(fp)
1710 addl3 -472(fp),-476(fp),r0
1711 bicl3 #0,r0,-476(fp)
1712 cmpl -476(fp),-472(fp)
1713 bgequ noname.162
1714 incl -480(fp)
1715noname.162:
1716 movl -476(fp),r1
1717 movl -480(fp),r2
1718 addl2 r1,r8
1719 bicl2 #0,r8
1720 cmpl r8,r1
1721 bgequ noname.163
1722 incl r2
1723noname.163:
1724 addl2 r2,r10
1725 bicl2 #0,r10
1726 cmpl r10,r2
1727 bgequ noname.164
1728 incl r9
1729noname.164:
1730
1731 movzwl 10(r6),r2
1732 bicl3 #-65536,20(r7),r3
1733 movzwl 22(r7),r0
1734 bicl2 #-65536,r0
1735 bicl3 #-65536,8(r6),-492(fp)
1736 bicl3 #-65536,r2,-496(fp)
1737 mull3 r0,-492(fp),-484(fp)
1738 mull2 r3,-492(fp)
1739 mull3 r3,-496(fp),-488(fp)
1740 mull2 r0,-496(fp)
1741 addl3 -484(fp),-488(fp),r0
1742 bicl3 #0,r0,-484(fp)
1743 cmpl -484(fp),-488(fp)
1744 bgequ noname.165
1745 addl2 #65536,-496(fp)
1746noname.165:
1747 movzwl -482(fp),r0
1748 bicl2 #-65536,r0
1749 addl2 r0,-496(fp)
1750 bicl3 #-65536,-484(fp),r0
1751 ashl #16,r0,-488(fp)
1752 addl3 -488(fp),-492(fp),r0
1753 bicl3 #0,r0,-492(fp)
1754 cmpl -492(fp),-488(fp)
1755 bgequ noname.166
1756 incl -496(fp)
1757noname.166:
1758 movl -492(fp),r1
1759 movl -496(fp),r2
1760 addl2 r1,r8
1761 bicl2 #0,r8
1762 cmpl r8,r1
1763 bgequ noname.167
1764 incl r2
1765noname.167:
1766 addl2 r2,r10
1767 bicl2 #0,r10
1768 cmpl r10,r2
1769 bgequ noname.168
1770 incl r9
1771noname.168:
1772
1773 movzwl 14(r6),r2
1774 bicl3 #-65536,16(r7),r3
1775 movzwl 18(r7),r0
1776 bicl2 #-65536,r0
1777 bicl3 #-65536,12(r6),-508(fp)
1778 bicl3 #-65536,r2,-512(fp)
1779 mull3 r0,-508(fp),-500(fp)
1780 mull2 r3,-508(fp)
1781 mull3 r3,-512(fp),-504(fp)
1782 mull2 r0,-512(fp)
1783 addl3 -500(fp),-504(fp),r0
1784 bicl3 #0,r0,-500(fp)
1785 cmpl -500(fp),-504(fp)
1786 bgequ noname.169
1787 addl2 #65536,-512(fp)
1788noname.169:
1789 movzwl -498(fp),r0
1790 bicl2 #-65536,r0
1791 addl2 r0,-512(fp)
1792 bicl3 #-65536,-500(fp),r0
1793 ashl #16,r0,-504(fp)
1794 addl3 -504(fp),-508(fp),r0
1795 bicl3 #0,r0,-508(fp)
1796 cmpl -508(fp),-504(fp)
1797 bgequ noname.170
1798 incl -512(fp)
1799noname.170:
1800 movl -508(fp),r1
1801 movl -512(fp),r2
1802 addl2 r1,r8
1803 bicl2 #0,r8
1804 cmpl r8,r1
1805 bgequ noname.171
1806 incl r2
1807noname.171:
1808 addl2 r2,r10
1809 bicl2 #0,r10
1810 cmpl r10,r2
1811 bgequ noname.172
1812 incl r9
1813noname.172:
1814
1815 movzwl 18(r6),r2
1816 bicl3 #-65536,12(r7),r3
1817 movzwl 14(r7),r0
1818 bicl2 #-65536,r0
1819 bicl3 #-65536,16(r6),-524(fp)
1820 bicl3 #-65536,r2,-528(fp)
1821 mull3 r0,-524(fp),-516(fp)
1822 mull2 r3,-524(fp)
1823 mull3 r3,-528(fp),-520(fp)
1824 mull2 r0,-528(fp)
1825 addl3 -516(fp),-520(fp),r0
1826 bicl3 #0,r0,-516(fp)
1827 cmpl -516(fp),-520(fp)
1828 bgequ noname.173
1829 addl2 #65536,-528(fp)
1830noname.173:
1831 movzwl -514(fp),r0
1832 bicl2 #-65536,r0
1833 addl2 r0,-528(fp)
1834 bicl3 #-65536,-516(fp),r0
1835 ashl #16,r0,-520(fp)
1836 addl3 -520(fp),-524(fp),r0
1837 bicl3 #0,r0,-524(fp)
1838 cmpl -524(fp),-520(fp)
1839 bgequ noname.174
1840 incl -528(fp)
1841noname.174:
1842 movl -524(fp),r1
1843 movl -528(fp),r2
1844 addl2 r1,r8
1845 bicl2 #0,r8
1846 cmpl r8,r1
1847 bgequ noname.175
1848 incl r2
1849noname.175:
1850 addl2 r2,r10
1851 bicl2 #0,r10
1852 cmpl r10,r2
1853 bgequ noname.176
1854 incl r9
1855noname.176:
1856
1857 movzwl 22(r6),r2
1858 bicl3 #-65536,8(r7),r3
1859 movzwl 10(r7),r0
1860 bicl2 #-65536,r0
1861 bicl3 #-65536,20(r6),-540(fp)
1862 bicl3 #-65536,r2,-544(fp)
1863 mull3 r0,-540(fp),-532(fp)
1864 mull2 r3,-540(fp)
1865 mull3 r3,-544(fp),-536(fp)
1866 mull2 r0,-544(fp)
1867 addl3 -532(fp),-536(fp),r0
1868 bicl3 #0,r0,-532(fp)
1869 cmpl -532(fp),-536(fp)
1870 bgequ noname.177
1871 addl2 #65536,-544(fp)
1872noname.177:
1873 movzwl -530(fp),r0
1874 bicl2 #-65536,r0
1875 addl2 r0,-544(fp)
1876 bicl3 #-65536,-532(fp),r0
1877 ashl #16,r0,-536(fp)
1878 addl3 -536(fp),-540(fp),r0
1879 bicl3 #0,r0,-540(fp)
1880 cmpl -540(fp),-536(fp)
1881 bgequ noname.178
1882 incl -544(fp)
1883noname.178:
1884 movl -540(fp),r1
1885 movl -544(fp),r2
1886 addl2 r1,r8
1887 bicl2 #0,r8
1888 cmpl r8,r1
1889 bgequ noname.179
1890 incl r2
1891noname.179:
1892 addl2 r2,r10
1893 bicl2 #0,r10
1894 cmpl r10,r2
1895 bgequ noname.180
1896 incl r9
1897noname.180:
1898
1899 movzwl 26(r6),r2
1900 bicl3 #-65536,4(r7),r3
1901 movzwl 6(r7),r0
1902 bicl2 #-65536,r0
1903 bicl3 #-65536,24(r6),-556(fp)
1904 bicl3 #-65536,r2,-560(fp)
1905 mull3 r0,-556(fp),-548(fp)
1906 mull2 r3,-556(fp)
1907 mull3 r3,-560(fp),-552(fp)
1908 mull2 r0,-560(fp)
1909 addl3 -548(fp),-552(fp),r0
1910 bicl3 #0,r0,-548(fp)
1911 cmpl -548(fp),-552(fp)
1912 bgequ noname.181
1913 addl2 #65536,-560(fp)
1914noname.181:
1915 movzwl -546(fp),r0
1916 bicl2 #-65536,r0
1917 addl2 r0,-560(fp)
1918 bicl3 #-65536,-548(fp),r0
1919 ashl #16,r0,-552(fp)
1920 addl3 -552(fp),-556(fp),r0
1921 bicl3 #0,r0,-556(fp)
1922 cmpl -556(fp),-552(fp)
1923 bgequ noname.182
1924 incl -560(fp)
1925noname.182:
1926 movl -556(fp),r1
1927 movl -560(fp),r2
1928 addl2 r1,r8
1929 bicl2 #0,r8
1930 cmpl r8,r1
1931 bgequ noname.183
1932 incl r2
1933noname.183:
1934 addl2 r2,r10
1935 bicl2 #0,r10
1936 cmpl r10,r2
1937 bgequ noname.184
1938 incl r9
1939noname.184:
1940
1941 movzwl 30(r6),r2
1942 bicl3 #-65536,(r7),r3
1943 movzwl 2(r7),r0
1944 bicl2 #-65536,r0
1945 bicl3 #-65536,28(r6),-572(fp)
1946 bicl3 #-65536,r2,-576(fp)
1947 mull3 r0,-572(fp),-564(fp)
1948 mull2 r3,-572(fp)
1949 mull3 r3,-576(fp),-568(fp)
1950 mull2 r0,-576(fp)
1951 addl3 -564(fp),-568(fp),r0
1952 bicl3 #0,r0,-564(fp)
1953 cmpl -564(fp),-568(fp)
1954 bgequ noname.185
1955 addl2 #65536,-576(fp)
1956noname.185:
1957 movzwl -562(fp),r0
1958 bicl2 #-65536,r0
1959 addl2 r0,-576(fp)
1960 bicl3 #-65536,-564(fp),r0
1961 ashl #16,r0,-568(fp)
1962 addl3 -568(fp),-572(fp),r0
1963 bicl3 #0,r0,-572(fp)
1964 cmpl -572(fp),-568(fp)
1965 bgequ noname.186
1966 incl -576(fp)
1967noname.186:
1968 movl -572(fp),r1
1969 movl -576(fp),r2
1970 addl2 r1,r8
1971 bicl2 #0,r8
1972 cmpl r8,r1
1973 bgequ noname.187
1974 incl r2
1975noname.187:
1976 addl2 r2,r10
1977 bicl2 #0,r10
1978 cmpl r10,r2
1979 bgequ noname.188
1980 incl r9
1981noname.188:
1982
1983 movl r8,28(r11)
1984
1985 clrl r8
1986
1987 movzwl 30(r6),r2
1988 bicl3 #-65536,4(r7),r3
1989 movzwl 6(r7),r0
1990 bicl2 #-65536,r0
1991 bicl3 #-65536,28(r6),-588(fp)
1992 bicl3 #-65536,r2,-592(fp)
1993 mull3 r0,-588(fp),-580(fp)
1994 mull2 r3,-588(fp)
1995 mull3 r3,-592(fp),-584(fp)
1996 mull2 r0,-592(fp)
1997 addl3 -580(fp),-584(fp),r0
1998 bicl3 #0,r0,-580(fp)
1999 cmpl -580(fp),-584(fp)
2000 bgequ noname.189
2001 addl2 #65536,-592(fp)
2002noname.189:
2003 movzwl -578(fp),r0
2004 bicl2 #-65536,r0
2005 addl2 r0,-592(fp)
2006 bicl3 #-65536,-580(fp),r0
2007 ashl #16,r0,-584(fp)
2008 addl3 -584(fp),-588(fp),r0
2009 bicl3 #0,r0,-588(fp)
2010 cmpl -588(fp),-584(fp)
2011 bgequ noname.190
2012 incl -592(fp)
2013noname.190:
2014 movl -588(fp),r1
2015 movl -592(fp),r2
2016 addl2 r1,r10
2017 bicl2 #0,r10
2018 cmpl r10,r1
2019 bgequ noname.191
2020 incl r2
2021noname.191:
2022 addl2 r2,r9
2023 bicl2 #0,r9
2024 cmpl r9,r2
2025 bgequ noname.192
2026 incl r8
2027noname.192:
2028
2029 movzwl 26(r6),r2
2030 bicl3 #-65536,8(r7),r3
2031 movzwl 10(r7),r0
2032 bicl2 #-65536,r0
2033 bicl3 #-65536,24(r6),-604(fp)
2034 bicl3 #-65536,r2,-608(fp)
2035 mull3 r0,-604(fp),-596(fp)
2036 mull2 r3,-604(fp)
2037 mull3 r3,-608(fp),-600(fp)
2038 mull2 r0,-608(fp)
2039 addl3 -596(fp),-600(fp),r0
2040 bicl3 #0,r0,-596(fp)
2041 cmpl -596(fp),-600(fp)
2042 bgequ noname.193
2043 addl2 #65536,-608(fp)
2044noname.193:
2045 movzwl -594(fp),r0
2046 bicl2 #-65536,r0
2047 addl2 r0,-608(fp)
2048 bicl3 #-65536,-596(fp),r0
2049 ashl #16,r0,-600(fp)
2050 addl3 -600(fp),-604(fp),r0
2051 bicl3 #0,r0,-604(fp)
2052 cmpl -604(fp),-600(fp)
2053 bgequ noname.194
2054 incl -608(fp)
2055noname.194:
2056 movl -604(fp),r1
2057 movl -608(fp),r2
2058 addl2 r1,r10
2059 bicl2 #0,r10
2060 cmpl r10,r1
2061 bgequ noname.195
2062 incl r2
2063noname.195:
2064 addl2 r2,r9
2065 bicl2 #0,r9
2066 cmpl r9,r2
2067 bgequ noname.196
2068 incl r8
2069noname.196:
2070
2071 movzwl 22(r6),r2
2072 bicl3 #-65536,12(r7),r3
2073 movzwl 14(r7),r0
2074 bicl2 #-65536,r0
2075 bicl3 #-65536,20(r6),-620(fp)
2076 bicl3 #-65536,r2,-624(fp)
2077 mull3 r0,-620(fp),-612(fp)
2078 mull2 r3,-620(fp)
2079 mull3 r3,-624(fp),-616(fp)
2080 mull2 r0,-624(fp)
2081 addl3 -612(fp),-616(fp),r0
2082 bicl3 #0,r0,-612(fp)
2083 cmpl -612(fp),-616(fp)
2084 bgequ noname.197
2085 addl2 #65536,-624(fp)
2086noname.197:
2087 movzwl -610(fp),r0
2088 bicl2 #-65536,r0
2089 addl2 r0,-624(fp)
2090 bicl3 #-65536,-612(fp),r0
2091 ashl #16,r0,-616(fp)
2092 addl3 -616(fp),-620(fp),r0
2093 bicl3 #0,r0,-620(fp)
2094 cmpl -620(fp),-616(fp)
2095 bgequ noname.198
2096 incl -624(fp)
2097noname.198:
2098 movl -620(fp),r1
2099 movl -624(fp),r2
2100 addl2 r1,r10
2101 bicl2 #0,r10
2102 cmpl r10,r1
2103 bgequ noname.199
2104 incl r2
2105noname.199:
2106 addl2 r2,r9
2107 bicl2 #0,r9
2108 cmpl r9,r2
2109 bgequ noname.200
2110 incl r8
2111noname.200:
2112
2113 movzwl 18(r6),r2
2114 bicl3 #-65536,16(r7),r3
2115 movzwl 18(r7),r0
2116 bicl2 #-65536,r0
2117 bicl3 #-65536,16(r6),-636(fp)
2118 bicl3 #-65536,r2,-640(fp)
2119 mull3 r0,-636(fp),-628(fp)
2120 mull2 r3,-636(fp)
2121 mull3 r3,-640(fp),-632(fp)
2122 mull2 r0,-640(fp)
2123 addl3 -628(fp),-632(fp),r0
2124 bicl3 #0,r0,-628(fp)
2125 cmpl -628(fp),-632(fp)
2126 bgequ noname.201
2127 addl2 #65536,-640(fp)
2128noname.201:
2129 movzwl -626(fp),r0
2130 bicl2 #-65536,r0
2131 addl2 r0,-640(fp)
2132 bicl3 #-65536,-628(fp),r0
2133 ashl #16,r0,-632(fp)
2134 addl3 -632(fp),-636(fp),r0
2135 bicl3 #0,r0,-636(fp)
2136 cmpl -636(fp),-632(fp)
2137 bgequ noname.202
2138 incl -640(fp)
2139noname.202:
2140 movl -636(fp),r1
2141 movl -640(fp),r2
2142 addl2 r1,r10
2143 bicl2 #0,r10
2144 cmpl r10,r1
2145 bgequ noname.203
2146 incl r2
2147noname.203:
2148 addl2 r2,r9
2149 bicl2 #0,r9
2150 cmpl r9,r2
2151 bgequ noname.204
2152 incl r8
2153noname.204:
2154
2155 movzwl 14(r6),r2
2156 bicl3 #-65536,20(r7),r3
2157 movzwl 22(r7),r0
2158 bicl2 #-65536,r0
2159 bicl3 #-65536,12(r6),-652(fp)
2160 bicl3 #-65536,r2,-656(fp)
2161 mull3 r0,-652(fp),-644(fp)
2162 mull2 r3,-652(fp)
2163 mull3 r3,-656(fp),-648(fp)
2164 mull2 r0,-656(fp)
2165 addl3 -644(fp),-648(fp),r0
2166 bicl3 #0,r0,-644(fp)
2167 cmpl -644(fp),-648(fp)
2168 bgequ noname.205
2169 addl2 #65536,-656(fp)
2170noname.205:
2171 movzwl -642(fp),r0
2172 bicl2 #-65536,r0
2173 addl2 r0,-656(fp)
2174 bicl3 #-65536,-644(fp),r0
2175 ashl #16,r0,-648(fp)
2176 addl3 -648(fp),-652(fp),r0
2177 bicl3 #0,r0,-652(fp)
2178 cmpl -652(fp),-648(fp)
2179 bgequ noname.206
2180 incl -656(fp)
2181noname.206:
2182 movl -652(fp),r1
2183 movl -656(fp),r2
2184 addl2 r1,r10
2185 bicl2 #0,r10
2186 cmpl r10,r1
2187 bgequ noname.207
2188 incl r2
2189noname.207:
2190 addl2 r2,r9
2191 bicl2 #0,r9
2192 cmpl r9,r2
2193 bgequ noname.208
2194 incl r8
2195noname.208:
2196
2197 movzwl 10(r6),r2
2198 bicl3 #-65536,24(r7),r3
2199 movzwl 26(r7),r0
2200 bicl2 #-65536,r0
2201 bicl3 #-65536,8(r6),-668(fp)
2202 bicl3 #-65536,r2,-672(fp)
2203 mull3 r0,-668(fp),-660(fp)
2204 mull2 r3,-668(fp)
2205 mull3 r3,-672(fp),-664(fp)
2206 mull2 r0,-672(fp)
2207 addl3 -660(fp),-664(fp),r0
2208 bicl3 #0,r0,-660(fp)
2209 cmpl -660(fp),-664(fp)
2210 bgequ noname.209
2211 addl2 #65536,-672(fp)
2212noname.209:
2213 movzwl -658(fp),r0
2214 bicl2 #-65536,r0
2215 addl2 r0,-672(fp)
2216 bicl3 #-65536,-660(fp),r0
2217 ashl #16,r0,-664(fp)
2218 addl3 -664(fp),-668(fp),r0
2219 bicl3 #0,r0,-668(fp)
2220 cmpl -668(fp),-664(fp)
2221 bgequ noname.210
2222 incl -672(fp)
2223noname.210:
2224 movl -668(fp),r1
2225 movl -672(fp),r2
2226 addl2 r1,r10
2227 bicl2 #0,r10
2228 cmpl r10,r1
2229 bgequ noname.211
2230 incl r2
2231noname.211:
2232 addl2 r2,r9
2233 bicl2 #0,r9
2234 cmpl r9,r2
2235 bgequ noname.212
2236 incl r8
2237noname.212:
2238
2239 movzwl 6(r6),r2
2240 bicl3 #-65536,28(r7),r3
2241 movzwl 30(r7),r0
2242 bicl2 #-65536,r0
2243 bicl3 #-65536,4(r6),-684(fp)
2244 bicl3 #-65536,r2,-688(fp)
2245 mull3 r0,-684(fp),-676(fp)
2246 mull2 r3,-684(fp)
2247 mull3 r3,-688(fp),-680(fp)
2248 mull2 r0,-688(fp)
2249 addl3 -676(fp),-680(fp),r0
2250 bicl3 #0,r0,-676(fp)
2251 cmpl -676(fp),-680(fp)
2252 bgequ noname.213
2253 addl2 #65536,-688(fp)
2254noname.213:
2255 movzwl -674(fp),r0
2256 bicl2 #-65536,r0
2257 addl2 r0,-688(fp)
2258 bicl3 #-65536,-676(fp),r0
2259 ashl #16,r0,-680(fp)
2260 addl3 -680(fp),-684(fp),r0
2261 bicl3 #0,r0,-684(fp)
2262 cmpl -684(fp),-680(fp)
2263 bgequ noname.214
2264 incl -688(fp)
2265noname.214:
2266 movl -684(fp),r1
2267 movl -688(fp),r2
2268 addl2 r1,r10
2269 bicl2 #0,r10
2270 cmpl r10,r1
2271 bgequ noname.215
2272 incl r2
2273noname.215:
2274 addl2 r2,r9
2275 bicl2 #0,r9
2276 cmpl r9,r2
2277 bgequ noname.216
2278 incl r8
2279noname.216:
2280
2281 movl r10,32(r11)
2282
2283 clrl r10
2284
2285 movzwl 10(r6),r2
2286 bicl3 #-65536,28(r7),r3
2287 movzwl 30(r7),r0
2288 bicl2 #-65536,r0
2289 bicl3 #-65536,8(r6),-700(fp)
2290 bicl3 #-65536,r2,-704(fp)
2291 mull3 r0,-700(fp),-692(fp)
2292 mull2 r3,-700(fp)
2293 mull3 r3,-704(fp),-696(fp)
2294 mull2 r0,-704(fp)
2295 addl3 -692(fp),-696(fp),r0
2296 bicl3 #0,r0,-692(fp)
2297 cmpl -692(fp),-696(fp)
2298 bgequ noname.217
2299 addl2 #65536,-704(fp)
2300noname.217:
2301 movzwl -690(fp),r0
2302 bicl2 #-65536,r0
2303 addl2 r0,-704(fp)
2304 bicl3 #-65536,-692(fp),r0
2305 ashl #16,r0,-696(fp)
2306 addl3 -696(fp),-700(fp),r0
2307 bicl3 #0,r0,-700(fp)
2308 cmpl -700(fp),-696(fp)
2309 bgequ noname.218
2310 incl -704(fp)
2311noname.218:
2312 movl -700(fp),r1
2313 movl -704(fp),r2
2314 addl2 r1,r9
2315 bicl2 #0,r9
2316 cmpl r9,r1
2317 bgequ noname.219
2318 incl r2
2319noname.219:
2320 addl2 r2,r8
2321 bicl2 #0,r8
2322 cmpl r8,r2
2323 bgequ noname.220
2324 incl r10
2325noname.220:
2326
2327 movzwl 14(r6),r2
2328 bicl3 #-65536,24(r7),r3
2329 movzwl 26(r7),r0
2330 bicl2 #-65536,r0
2331 bicl3 #-65536,12(r6),-716(fp)
2332 bicl3 #-65536,r2,-720(fp)
2333 mull3 r0,-716(fp),-708(fp)
2334 mull2 r3,-716(fp)
2335 mull3 r3,-720(fp),-712(fp)
2336 mull2 r0,-720(fp)
2337 addl3 -708(fp),-712(fp),r0
2338 bicl3 #0,r0,-708(fp)
2339 cmpl -708(fp),-712(fp)
2340 bgequ noname.221
2341 addl2 #65536,-720(fp)
2342noname.221:
2343 movzwl -706(fp),r0
2344 bicl2 #-65536,r0
2345 addl2 r0,-720(fp)
2346 bicl3 #-65536,-708(fp),r0
2347 ashl #16,r0,-712(fp)
2348 addl3 -712(fp),-716(fp),r0
2349 bicl3 #0,r0,-716(fp)
2350 cmpl -716(fp),-712(fp)
2351 bgequ noname.222
2352 incl -720(fp)
2353noname.222:
2354 movl -716(fp),r1
2355 movl -720(fp),r2
2356 addl2 r1,r9
2357 bicl2 #0,r9
2358 cmpl r9,r1
2359 bgequ noname.223
2360 incl r2
2361noname.223:
2362 addl2 r2,r8
2363 bicl2 #0,r8
2364 cmpl r8,r2
2365 bgequ noname.224
2366 incl r10
2367noname.224:
2368
2369 movzwl 18(r6),r2
2370 bicl3 #-65536,20(r7),r3
2371 movzwl 22(r7),r0
2372 bicl2 #-65536,r0
2373 bicl3 #-65536,16(r6),-732(fp)
2374 bicl3 #-65536,r2,-736(fp)
2375 mull3 r0,-732(fp),-724(fp)
2376 mull2 r3,-732(fp)
2377 mull3 r3,-736(fp),-728(fp)
2378 mull2 r0,-736(fp)
2379 addl3 -724(fp),-728(fp),r0
2380 bicl3 #0,r0,-724(fp)
2381 cmpl -724(fp),-728(fp)
2382 bgequ noname.225
2383 addl2 #65536,-736(fp)
2384noname.225:
2385 movzwl -722(fp),r0
2386 bicl2 #-65536,r0
2387 addl2 r0,-736(fp)
2388 bicl3 #-65536,-724(fp),r0
2389 ashl #16,r0,-728(fp)
2390 addl3 -728(fp),-732(fp),r0
2391 bicl3 #0,r0,-732(fp)
2392 cmpl -732(fp),-728(fp)
2393 bgequ noname.226
2394 incl -736(fp)
2395noname.226:
2396 movl -732(fp),r1
2397 movl -736(fp),r2
2398 addl2 r1,r9
2399 bicl2 #0,r9
2400 cmpl r9,r1
2401 bgequ noname.227
2402 incl r2
2403noname.227:
2404 addl2 r2,r8
2405 bicl2 #0,r8
2406 cmpl r8,r2
2407 bgequ noname.228
2408 incl r10
2409noname.228:
2410
2411 movzwl 22(r6),r2
2412 bicl3 #-65536,16(r7),r3
2413 movzwl 18(r7),r0
2414 bicl2 #-65536,r0
2415 bicl3 #-65536,20(r6),-748(fp)
2416 bicl3 #-65536,r2,-752(fp)
2417 mull3 r0,-748(fp),-740(fp)
2418 mull2 r3,-748(fp)
2419 mull3 r3,-752(fp),-744(fp)
2420 mull2 r0,-752(fp)
2421 addl3 -740(fp),-744(fp),r0
2422 bicl3 #0,r0,-740(fp)
2423 cmpl -740(fp),-744(fp)
2424 bgequ noname.229
2425 addl2 #65536,-752(fp)
2426noname.229:
2427 movzwl -738(fp),r0
2428 bicl2 #-65536,r0
2429 addl2 r0,-752(fp)
2430 bicl3 #-65536,-740(fp),r0
2431 ashl #16,r0,-744(fp)
2432 addl3 -744(fp),-748(fp),r0
2433 bicl3 #0,r0,-748(fp)
2434 cmpl -748(fp),-744(fp)
2435 bgequ noname.230
2436 incl -752(fp)
2437noname.230:
2438 movl -748(fp),r1
2439 movl -752(fp),r2
2440 addl2 r1,r9
2441 bicl2 #0,r9
2442 cmpl r9,r1
2443 bgequ noname.231
2444 incl r2
2445noname.231:
2446 addl2 r2,r8
2447 bicl2 #0,r8
2448 cmpl r8,r2
2449 bgequ noname.232
2450 incl r10
2451noname.232:
2452
2453 movzwl 26(r6),r2
2454 bicl3 #-65536,12(r7),r3
2455 movzwl 14(r7),r0
2456 bicl2 #-65536,r0
2457 bicl3 #-65536,24(r6),-764(fp)
2458 bicl3 #-65536,r2,-768(fp)
2459 mull3 r0,-764(fp),-756(fp)
2460 mull2 r3,-764(fp)
2461 mull3 r3,-768(fp),-760(fp)
2462 mull2 r0,-768(fp)
2463 addl3 -756(fp),-760(fp),r0
2464 bicl3 #0,r0,-756(fp)
2465 cmpl -756(fp),-760(fp)
2466 bgequ noname.233
2467 addl2 #65536,-768(fp)
2468noname.233:
2469 movzwl -754(fp),r0
2470 bicl2 #-65536,r0
2471 addl2 r0,-768(fp)
2472 bicl3 #-65536,-756(fp),r0
2473 ashl #16,r0,-760(fp)
2474 addl3 -760(fp),-764(fp),r0
2475 bicl3 #0,r0,-764(fp)
2476 cmpl -764(fp),-760(fp)
2477 bgequ noname.234
2478 incl -768(fp)
2479noname.234:
2480 movl -764(fp),r1
2481 movl -768(fp),r2
2482 addl2 r1,r9
2483 bicl2 #0,r9
2484 cmpl r9,r1
2485 bgequ noname.235
2486 incl r2
2487noname.235:
2488 addl2 r2,r8
2489 bicl2 #0,r8
2490 cmpl r8,r2
2491 bgequ noname.236
2492 incl r10
2493noname.236:
2494
2495 bicl3 #-65536,28(r6),r3
2496 movzwl 30(r6),r1
2497 bicl2 #-65536,r1
2498 bicl3 #-65536,8(r7),r2
2499 movzwl 10(r7),r0
2500 bicl2 #-65536,r0
2501 movl r3,r5
2502 movl r1,r4
2503 mull3 r0,r5,-772(fp)
2504 mull2 r2,r5
2505 mull3 r2,r4,-776(fp)
2506 mull2 r0,r4
2507 addl3 -772(fp),-776(fp),r0
2508 bicl3 #0,r0,-772(fp)
2509 cmpl -772(fp),-776(fp)
2510 bgequ noname.237
2511 addl2 #65536,r4
2512noname.237:
2513 movzwl -770(fp),r0
2514 bicl2 #-65536,r0
2515 addl2 r0,r4
2516 bicl3 #-65536,-772(fp),r0
2517 ashl #16,r0,-776(fp)
2518 addl2 -776(fp),r5
2519 bicl2 #0,r5
2520 cmpl r5,-776(fp)
2521 bgequ noname.238
2522 incl r4
2523noname.238:
2524 movl r5,r1
2525 movl r4,r2
2526 addl2 r1,r9
2527 bicl2 #0,r9
2528 cmpl r9,r1
2529 bgequ noname.239
2530 incl r2
2531noname.239:
2532 addl2 r2,r8
2533 bicl2 #0,r8
2534 cmpl r8,r2
2535 bgequ noname.240
2536 incl r10
2537noname.240:
2538
2539 movl r9,36(r11)
2540
2541 clrl r9
2542
2543 bicl3 #-65536,28(r6),r3
2544 movzwl 30(r6),r1
2545 bicl2 #-65536,r1
2546 bicl3 #-65536,12(r7),r2
2547 movzwl 14(r7),r0
2548 bicl2 #-65536,r0
2549 movl r3,r5
2550 movl r1,r4
2551 mull3 r0,r5,-780(fp)
2552 mull2 r2,r5
2553 mull3 r2,r4,-784(fp)
2554 mull2 r0,r4
2555 addl3 -780(fp),-784(fp),r0
2556 bicl3 #0,r0,-780(fp)
2557 cmpl -780(fp),-784(fp)
2558 bgequ noname.241
2559 addl2 #65536,r4
2560noname.241:
2561 movzwl -778(fp),r0
2562 bicl2 #-65536,r0
2563 addl2 r0,r4
2564 bicl3 #-65536,-780(fp),r0
2565 ashl #16,r0,-784(fp)
2566 addl2 -784(fp),r5
2567 bicl2 #0,r5
2568 cmpl r5,-784(fp)
2569 bgequ noname.242
2570 incl r4
2571noname.242:
2572 movl r5,r1
2573 movl r4,r2
2574 addl2 r1,r8
2575 bicl2 #0,r8
2576 cmpl r8,r1
2577 bgequ noname.243
2578 incl r2
2579noname.243:
2580 addl2 r2,r10
2581 bicl2 #0,r10
2582 cmpl r10,r2
2583 bgequ noname.244
2584 incl r9
2585noname.244:
2586
2587 bicl3 #-65536,24(r6),r3
2588 movzwl 26(r6),r1
2589 bicl2 #-65536,r1
2590 bicl3 #-65536,16(r7),r2
2591 movzwl 18(r7),r0
2592 bicl2 #-65536,r0
2593 movl r3,r5
2594 movl r1,r4
2595 mull3 r0,r5,-788(fp)
2596 mull2 r2,r5
2597 mull3 r2,r4,-792(fp)
2598 mull2 r0,r4
2599 addl3 -788(fp),-792(fp),r0
2600 bicl3 #0,r0,-788(fp)
2601 cmpl -788(fp),-792(fp)
2602 bgequ noname.245
2603 addl2 #65536,r4
2604noname.245:
2605 movzwl -786(fp),r0
2606 bicl2 #-65536,r0
2607 addl2 r0,r4
2608 bicl3 #-65536,-788(fp),r0
2609 ashl #16,r0,-792(fp)
2610 addl2 -792(fp),r5
2611 bicl2 #0,r5
2612 cmpl r5,-792(fp)
2613 bgequ noname.246
2614 incl r4
2615noname.246:
2616 movl r5,r1
2617 movl r4,r2
2618 addl2 r1,r8
2619 bicl2 #0,r8
2620 cmpl r8,r1
2621 bgequ noname.247
2622 incl r2
2623noname.247:
2624 addl2 r2,r10
2625 bicl2 #0,r10
2626 cmpl r10,r2
2627 bgequ noname.248
2628 incl r9
2629noname.248:
2630
2631 bicl3 #-65536,20(r6),r3
2632 movzwl 22(r6),r1
2633 bicl2 #-65536,r1
2634 bicl3 #-65536,20(r7),r2
2635 movzwl 22(r7),r0
2636 bicl2 #-65536,r0
2637 movl r3,r5
2638 movl r1,r4
2639 mull3 r0,r5,-796(fp)
2640 mull2 r2,r5
2641 mull3 r2,r4,-800(fp)
2642 mull2 r0,r4
2643 addl3 -796(fp),-800(fp),r0
2644 bicl3 #0,r0,-796(fp)
2645 cmpl -796(fp),-800(fp)
2646 bgequ noname.249
2647 addl2 #65536,r4
2648noname.249:
2649 movzwl -794(fp),r0
2650 bicl2 #-65536,r0
2651 addl2 r0,r4
2652 bicl3 #-65536,-796(fp),r0
2653 ashl #16,r0,-800(fp)
2654 addl2 -800(fp),r5
2655 bicl2 #0,r5
2656 cmpl r5,-800(fp)
2657 bgequ noname.250
2658 incl r4
2659noname.250:
2660 movl r5,r1
2661 movl r4,r2
2662 addl2 r1,r8
2663 bicl2 #0,r8
2664 cmpl r8,r1
2665 bgequ noname.251
2666 incl r2
2667noname.251:
2668 addl2 r2,r10
2669 bicl2 #0,r10
2670 cmpl r10,r2
2671 bgequ noname.252
2672 incl r9
2673noname.252:
2674
2675 bicl3 #-65536,16(r6),r3
2676 movzwl 18(r6),r1
2677 bicl2 #-65536,r1
2678 bicl3 #-65536,24(r7),r2
2679 movzwl 26(r7),r0
2680 bicl2 #-65536,r0
2681 movl r3,r5
2682 movl r1,r4
2683 mull3 r0,r5,-804(fp)
2684 mull2 r2,r5
2685 mull3 r2,r4,-808(fp)
2686 mull2 r0,r4
2687 addl3 -804(fp),-808(fp),r0
2688 bicl3 #0,r0,-804(fp)
2689 cmpl -804(fp),-808(fp)
2690 bgequ noname.253
2691 addl2 #65536,r4
2692noname.253:
2693 movzwl -802(fp),r0
2694 bicl2 #-65536,r0
2695 addl2 r0,r4
2696 bicl3 #-65536,-804(fp),r0
2697 ashl #16,r0,-808(fp)
2698 addl2 -808(fp),r5
2699 bicl2 #0,r5
2700 cmpl r5,-808(fp)
2701 bgequ noname.254
2702 incl r4
2703noname.254:
2704 movl r5,r1
2705 movl r4,r2
2706 addl2 r1,r8
2707 bicl2 #0,r8
2708 cmpl r8,r1
2709 bgequ noname.255
2710 incl r2
2711noname.255:
2712 addl2 r2,r10
2713 bicl2 #0,r10
2714 cmpl r10,r2
2715 bgequ noname.256
2716 incl r9
2717noname.256:
2718
2719 bicl3 #-65536,12(r6),r3
2720 movzwl 14(r6),r1
2721 bicl2 #-65536,r1
2722 bicl3 #-65536,28(r7),r2
2723 movzwl 30(r7),r0
2724 bicl2 #-65536,r0
2725 movl r3,r5
2726 movl r1,r4
2727 mull3 r0,r5,-812(fp)
2728 mull2 r2,r5
2729 mull3 r2,r4,-816(fp)
2730 mull2 r0,r4
2731 addl3 -812(fp),-816(fp),r0
2732 bicl3 #0,r0,-812(fp)
2733 cmpl -812(fp),-816(fp)
2734 bgequ noname.257
2735 addl2 #65536,r4
2736noname.257:
2737 movzwl -810(fp),r0
2738 bicl2 #-65536,r0
2739 addl2 r0,r4
2740 bicl3 #-65536,-812(fp),r0
2741 ashl #16,r0,-816(fp)
2742 addl2 -816(fp),r5
2743 bicl2 #0,r5
2744 cmpl r5,-816(fp)
2745 bgequ noname.258
2746 incl r4
2747noname.258:
2748 movl r5,r1
2749 movl r4,r2
2750 addl2 r1,r8
2751 bicl2 #0,r8
2752 cmpl r8,r1
2753 bgequ noname.259
2754 incl r2
2755noname.259:
2756 addl2 r2,r10
2757 bicl2 #0,r10
2758 cmpl r10,r2
2759 bgequ noname.260
2760 incl r9
2761noname.260:
2762
2763 movl r8,40(r11)
2764
2765 clrl r8
2766
2767 bicl3 #-65536,16(r6),r3
2768 movzwl 18(r6),r2
2769 bicl3 #-65536,28(r7),r1
2770 movzwl 30(r7),r0
2771 bicl2 #-65536,r0
2772 movl r3,r4
2773 bicl3 #-65536,r2,-828(fp)
2774 mull3 r0,r4,-820(fp)
2775 mull2 r1,r4
2776 mull3 r1,-828(fp),-824(fp)
2777 mull2 r0,-828(fp)
2778 addl3 -820(fp),-824(fp),r0
2779 bicl3 #0,r0,-820(fp)
2780 cmpl -820(fp),-824(fp)
2781 bgequ noname.261
2782 addl2 #65536,-828(fp)
2783noname.261:
2784 movzwl -818(fp),r0
2785 bicl2 #-65536,r0
2786 addl2 r0,-828(fp)
2787 bicl3 #-65536,-820(fp),r0
2788 ashl #16,r0,-824(fp)
2789 addl2 -824(fp),r4
2790 bicl2 #0,r4
2791 cmpl r4,-824(fp)
2792 bgequ noname.262
2793 incl -828(fp)
2794noname.262:
2795 movl r4,r1
2796 movl -828(fp),r2
2797 addl2 r1,r10
2798 bicl2 #0,r10
2799 cmpl r10,r1
2800 bgequ noname.263
2801 incl r2
2802noname.263:
2803 addl2 r2,r9
2804 bicl2 #0,r9
2805 cmpl r9,r2
2806 bgequ noname.264
2807 incl r8
2808noname.264:
2809
2810 movzwl 22(r6),r2
2811 bicl3 #-65536,24(r7),r3
2812 movzwl 26(r7),r0
2813 bicl2 #-65536,r0
2814 bicl3 #-65536,20(r6),-840(fp)
2815 bicl3 #-65536,r2,-844(fp)
2816 mull3 r0,-840(fp),-832(fp)
2817 mull2 r3,-840(fp)
2818 mull3 r3,-844(fp),-836(fp)
2819 mull2 r0,-844(fp)
2820 addl3 -832(fp),-836(fp),r0
2821 bicl3 #0,r0,-832(fp)
2822 cmpl -832(fp),-836(fp)
2823 bgequ noname.265
2824 addl2 #65536,-844(fp)
2825noname.265:
2826 movzwl -830(fp),r0
2827 bicl2 #-65536,r0
2828 addl2 r0,-844(fp)
2829 bicl3 #-65536,-832(fp),r0
2830 ashl #16,r0,-836(fp)
2831 addl3 -836(fp),-840(fp),r0
2832 bicl3 #0,r0,-840(fp)
2833 cmpl -840(fp),-836(fp)
2834 bgequ noname.266
2835 incl -844(fp)
2836noname.266:
2837 movl -840(fp),r1
2838 movl -844(fp),r2
2839 addl2 r1,r10
2840 bicl2 #0,r10
2841 cmpl r10,r1
2842 bgequ noname.267
2843 incl r2
2844noname.267:
2845 addl2 r2,r9
2846 bicl2 #0,r9
2847 cmpl r9,r2
2848 bgequ noname.268
2849 incl r8
2850noname.268:
2851
2852 bicl3 #-65536,24(r6),r3
2853 movzwl 26(r6),r1
2854 bicl2 #-65536,r1
2855 bicl3 #-65536,20(r7),r2
2856 movzwl 22(r7),r0
2857 bicl2 #-65536,r0
2858 movl r3,r5
2859 movl r1,r4
2860 mull3 r0,r5,-848(fp)
2861 mull2 r2,r5
2862 mull3 r2,r4,-852(fp)
2863 mull2 r0,r4
2864 addl3 -848(fp),-852(fp),r0
2865 bicl3 #0,r0,-848(fp)
2866 cmpl -848(fp),-852(fp)
2867 bgequ noname.269
2868 addl2 #65536,r4
2869noname.269:
2870 movzwl -846(fp),r0
2871 bicl2 #-65536,r0
2872 addl2 r0,r4
2873 bicl3 #-65536,-848(fp),r0
2874 ashl #16,r0,-852(fp)
2875 addl2 -852(fp),r5
2876 bicl2 #0,r5
2877 cmpl r5,-852(fp)
2878 bgequ noname.270
2879 incl r4
2880noname.270:
2881 movl r5,r1
2882 movl r4,r2
2883 addl2 r1,r10
2884 bicl2 #0,r10
2885 cmpl r10,r1
2886 bgequ noname.271
2887 incl r2
2888noname.271:
2889 addl2 r2,r9
2890 bicl2 #0,r9
2891 cmpl r9,r2
2892 bgequ noname.272
2893 incl r8
2894noname.272:
2895
2896 bicl3 #-65536,28(r6),r3
2897 movzwl 30(r6),r1
2898 bicl2 #-65536,r1
2899 bicl3 #-65536,16(r7),r2
2900 movzwl 18(r7),r0
2901 bicl2 #-65536,r0
2902 movl r3,r5
2903 movl r1,r4
2904 mull3 r0,r5,-856(fp)
2905 mull2 r2,r5
2906 mull3 r2,r4,-860(fp)
2907 mull2 r0,r4
2908 addl3 -856(fp),-860(fp),r0
2909 bicl3 #0,r0,-856(fp)
2910 cmpl -856(fp),-860(fp)
2911 bgequ noname.273
2912 addl2 #65536,r4
2913noname.273:
2914 movzwl -854(fp),r0
2915 bicl2 #-65536,r0
2916 addl2 r0,r4
2917 bicl3 #-65536,-856(fp),r0
2918 ashl #16,r0,-860(fp)
2919 addl2 -860(fp),r5
2920 bicl2 #0,r5
2921 cmpl r5,-860(fp)
2922 bgequ noname.274
2923 incl r4
2924noname.274:
2925 movl r5,r1
2926 movl r4,r2
2927 addl2 r1,r10
2928 bicl2 #0,r10
2929 cmpl r10,r1
2930 bgequ noname.275
2931 incl r2
2932noname.275:
2933 addl2 r2,r9
2934 bicl2 #0,r9
2935 cmpl r9,r2
2936 bgequ noname.276
2937 incl r8
2938noname.276:
2939
2940 movl r10,44(r11)
2941
2942 clrl r10
2943
2944 bicl3 #-65536,28(r6),r3
2945 movzwl 30(r6),r1
2946 bicl2 #-65536,r1
2947 bicl3 #-65536,20(r7),r2
2948 movzwl 22(r7),r0
2949 bicl2 #-65536,r0
2950 movl r3,r5
2951 movl r1,r4
2952 mull3 r0,r5,-864(fp)
2953 mull2 r2,r5
2954 mull3 r2,r4,-868(fp)
2955 mull2 r0,r4
2956 addl3 -864(fp),-868(fp),r0
2957 bicl3 #0,r0,-864(fp)
2958 cmpl -864(fp),-868(fp)
2959 bgequ noname.277
2960 addl2 #65536,r4
2961noname.277:
2962 movzwl -862(fp),r0
2963 bicl2 #-65536,r0
2964 addl2 r0,r4
2965 bicl3 #-65536,-864(fp),r0
2966 ashl #16,r0,-868(fp)
2967 addl2 -868(fp),r5
2968 bicl2 #0,r5
2969 cmpl r5,-868(fp)
2970 bgequ noname.278
2971 incl r4
2972noname.278:
2973 movl r5,r1
2974 movl r4,r2
2975 addl2 r1,r9
2976 bicl2 #0,r9
2977 cmpl r9,r1
2978 bgequ noname.279
2979 incl r2
2980noname.279:
2981 addl2 r2,r8
2982 bicl2 #0,r8
2983 cmpl r8,r2
2984 bgequ noname.280
2985 incl r10
2986noname.280:
2987
2988 bicl3 #-65536,24(r6),r3
2989 movzwl 26(r6),r1
2990 bicl2 #-65536,r1
2991 bicl3 #-65536,24(r7),r2
2992 movzwl 26(r7),r0
2993 bicl2 #-65536,r0
2994 movl r3,r5
2995 movl r1,r4
2996 mull3 r0,r5,-872(fp)
2997 mull2 r2,r5
2998 mull3 r2,r4,-876(fp)
2999 mull2 r0,r4
3000 addl3 -872(fp),-876(fp),r0
3001 bicl3 #0,r0,-872(fp)
3002 cmpl -872(fp),-876(fp)
3003 bgequ noname.281
3004 addl2 #65536,r4
3005noname.281:
3006 movzwl -870(fp),r0
3007 bicl2 #-65536,r0
3008 addl2 r0,r4
3009 bicl3 #-65536,-872(fp),r0
3010 ashl #16,r0,-876(fp)
3011 addl2 -876(fp),r5
3012 bicl2 #0,r5
3013 cmpl r5,-876(fp)
3014 bgequ noname.282
3015 incl r4
3016noname.282:
3017 movl r5,r1
3018 movl r4,r2
3019 addl2 r1,r9
3020 bicl2 #0,r9
3021 cmpl r9,r1
3022 bgequ noname.283
3023 incl r2
3024noname.283:
3025 addl2 r2,r8
3026 bicl2 #0,r8
3027 cmpl r8,r2
3028 bgequ noname.284
3029 incl r10
3030noname.284:
3031
3032 bicl3 #-65536,20(r6),r3
3033 movzwl 22(r6),r1
3034 bicl2 #-65536,r1
3035 bicl3 #-65536,28(r7),r2
3036 movzwl 30(r7),r0
3037 bicl2 #-65536,r0
3038 movl r3,r5
3039 movl r1,r4
3040 mull3 r0,r5,-880(fp)
3041 mull2 r2,r5
3042 mull3 r2,r4,-884(fp)
3043 mull2 r0,r4
3044 addl3 -880(fp),-884(fp),r0
3045 bicl3 #0,r0,-880(fp)
3046 cmpl -880(fp),-884(fp)
3047 bgequ noname.285
3048 addl2 #65536,r4
3049noname.285:
3050 movzwl -878(fp),r0
3051 bicl2 #-65536,r0
3052 addl2 r0,r4
3053 bicl3 #-65536,-880(fp),r0
3054 ashl #16,r0,-884(fp)
3055 addl2 -884(fp),r5
3056 bicl2 #0,r5
3057 cmpl r5,-884(fp)
3058 bgequ noname.286
3059 incl r4
3060noname.286:
3061 movl r5,r1
3062 movl r4,r2
3063 addl2 r1,r9
3064 bicl2 #0,r9
3065 cmpl r9,r1
3066 bgequ noname.287
3067 incl r2
3068noname.287:
3069 addl2 r2,r8
3070 bicl2 #0,r8
3071 cmpl r8,r2
3072 bgequ noname.288
3073 incl r10
3074noname.288:
3075
3076 movl r9,48(r11)
3077
3078 clrl r9
3079
3080 bicl3 #-65536,24(r6),r3
3081 movzwl 26(r6),r1
3082 bicl2 #-65536,r1
3083 bicl3 #-65536,28(r7),r2
3084 movzwl 30(r7),r0
3085 bicl2 #-65536,r0
3086 movl r3,r5
3087 movl r1,r4
3088 mull3 r0,r5,-888(fp)
3089 mull2 r2,r5
3090 mull3 r2,r4,-892(fp)
3091 mull2 r0,r4
3092 addl3 -888(fp),-892(fp),r0
3093 bicl3 #0,r0,-888(fp)
3094 cmpl -888(fp),-892(fp)
3095 bgequ noname.289
3096 addl2 #65536,r4
3097noname.289:
3098 movzwl -886(fp),r0
3099 bicl2 #-65536,r0
3100 addl2 r0,r4
3101 bicl3 #-65536,-888(fp),r0
3102 ashl #16,r0,-892(fp)
3103 addl2 -892(fp),r5
3104 bicl2 #0,r5
3105 cmpl r5,-892(fp)
3106 bgequ noname.290
3107 incl r4
3108noname.290:
3109 movl r5,r1
3110 movl r4,r2
3111 addl2 r1,r8
3112 bicl2 #0,r8
3113 cmpl r8,r1
3114 bgequ noname.291
3115 incl r2
3116noname.291:
3117 addl2 r2,r10
3118 bicl2 #0,r10
3119 cmpl r10,r2
3120 bgequ noname.292
3121 incl r9
3122noname.292:
3123
3124 movzwl 30(r6),r2
3125 bicl3 #-65536,24(r7),r3
3126 movzwl 26(r7),r0
3127 bicl2 #-65536,r0
3128 bicl3 #-65536,28(r6),-904(fp)
3129 bicl3 #-65536,r2,-908(fp)
3130 mull3 r0,-904(fp),-896(fp)
3131 mull2 r3,-904(fp)
3132 mull3 r3,-908(fp),-900(fp)
3133 mull2 r0,-908(fp)
3134 addl3 -896(fp),-900(fp),r0
3135 bicl3 #0,r0,-896(fp)
3136 cmpl -896(fp),-900(fp)
3137 bgequ noname.293
3138 addl2 #65536,-908(fp)
3139noname.293:
3140 movzwl -894(fp),r0
3141 bicl2 #-65536,r0
3142 addl2 r0,-908(fp)
3143 bicl3 #-65536,-896(fp),r0
3144 ashl #16,r0,-900(fp)
3145 addl3 -900(fp),-904(fp),r0
3146 bicl3 #0,r0,-904(fp)
3147 cmpl -904(fp),-900(fp)
3148 bgequ noname.294
3149 incl -908(fp)
3150noname.294:
3151 movl -904(fp),r1
3152 movl -908(fp),r2
3153 addl2 r1,r8
3154 bicl2 #0,r8
3155 cmpl r8,r1
3156 bgequ noname.295
3157 incl r2
3158noname.295:
3159 addl2 r2,r10
3160 bicl2 #0,r10
3161 cmpl r10,r2
3162 bgequ noname.296
3163 incl r9
3164noname.296:
3165
3166 movl r8,52(r11)
3167
3168 clrl r8
3169
3170 movzwl 30(r6),r2
3171 bicl3 #-65536,28(r7),r3
3172 movzwl 30(r7),r0
3173 bicl2 #-65536,r0
3174 bicl3 #-65536,28(r6),-920(fp)
3175 bicl3 #-65536,r2,-924(fp)
3176 mull3 r0,-920(fp),-912(fp)
3177 mull2 r3,-920(fp)
3178 mull3 r3,-924(fp),-916(fp)
3179 mull2 r0,-924(fp)
3180 addl3 -912(fp),-916(fp),r0
3181 bicl3 #0,r0,-912(fp)
3182 cmpl -912(fp),-916(fp)
3183 bgequ noname.297
3184 addl2 #65536,-924(fp)
3185noname.297:
3186 movzwl -910(fp),r0
3187 bicl2 #-65536,r0
3188 addl2 r0,-924(fp)
3189 bicl3 #-65536,-912(fp),r0
3190 ashl #16,r0,-916(fp)
3191 addl3 -916(fp),-920(fp),r0
3192 bicl3 #0,r0,-920(fp)
3193 cmpl -920(fp),-916(fp)
3194 bgequ noname.298
3195 incl -924(fp)
3196noname.298:
3197 movl -920(fp),r1
3198 movl -924(fp),r2
3199 addl2 r1,r10
3200 bicl2 #0,r10
3201 cmpl r10,r1
3202 bgequ noname.299
3203 incl r2
3204noname.299:
3205 addl2 r2,r9
3206 bicl2 #0,r9
3207 cmpl r9,r2
3208 bgequ noname.300
3209 incl r8
3210noname.300:
3211
3212 movl r10,56(r11)
3213
3214 movl r9,60(r11)
3215
3216 ret
3217
3218
3219
3220;r=4 ;(AP)
3221;a=8 ;(AP)
3222;b=12 ;(AP)
3223;n=16 ;(AP) n by value (input)
3224
3225 .psect code,nowrt
3226
3227.entry BN_MUL_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
3228 movab -156(sp),sp
3229
3230 clrq r9
3231
3232 clrl r8
3233
3234 movl 8(ap),r6
3235 bicl3 #-65536,(r6),r3
3236 movzwl 2(r6),r2
3237 bicl2 #-65536,r2
3238 movl 12(ap),r7
3239 bicl3 #-65536,(r7),r1
3240 movzwl 2(r7),r0
3241 bicl2 #-65536,r0
3242 movl r3,r5
3243 movl r2,r4
3244 mull3 r0,r5,-4(fp)
3245 mull2 r1,r5
3246 mull3 r1,r4,-8(fp)
3247 mull2 r0,r4
3248 addl3 -4(fp),-8(fp),r0
3249 bicl3 #0,r0,-4(fp)
3250 cmpl -4(fp),-8(fp)
3251 bgequ noname.303
3252 addl2 #65536,r4
3253noname.303:
3254 movzwl -2(fp),r0
3255 bicl2 #-65536,r0
3256 addl2 r0,r4
3257 bicl3 #-65536,-4(fp),r0
3258 ashl #16,r0,-8(fp)
3259 addl2 -8(fp),r5
3260 bicl2 #0,r5
3261 cmpl r5,-8(fp)
3262 bgequ noname.304
3263 incl r4
3264noname.304:
3265 movl r5,r1
3266 movl r4,r2
3267 addl2 r1,r10
3268 bicl2 #0,r10
3269 cmpl r10,r1
3270 bgequ noname.305
3271 incl r2
3272noname.305:
3273 addl2 r2,r9
3274 bicl2 #0,r9
3275 cmpl r9,r2
3276 bgequ noname.306
3277 incl r8
3278noname.306:
3279
3280 movl 4(ap),r11
3281 movl r10,(r11)
3282
3283 clrl r10
3284
3285 bicl3 #-65536,(r6),r3
3286 movzwl 2(r6),r1
3287 bicl2 #-65536,r1
3288 bicl3 #-65536,4(r7),r2
3289 movzwl 6(r7),r0
3290 bicl2 #-65536,r0
3291 movl r3,r5
3292 movl r1,r4
3293 mull3 r0,r5,-12(fp)
3294 mull2 r2,r5
3295 mull3 r2,r4,-16(fp)
3296 mull2 r0,r4
3297 addl3 -12(fp),-16(fp),r0
3298 bicl3 #0,r0,-12(fp)
3299 cmpl -12(fp),-16(fp)
3300 bgequ noname.307
3301 addl2 #65536,r4
3302noname.307:
3303 movzwl -10(fp),r0
3304 bicl2 #-65536,r0
3305 addl2 r0,r4
3306 bicl3 #-65536,-12(fp),r0
3307 ashl #16,r0,-16(fp)
3308 addl2 -16(fp),r5
3309 bicl2 #0,r5
3310 cmpl r5,-16(fp)
3311 bgequ noname.308
3312 incl r4
3313noname.308:
3314 movl r5,r1
3315 movl r4,r2
3316 addl2 r1,r9
3317 bicl2 #0,r9
3318 cmpl r9,r1
3319 bgequ noname.309
3320 incl r2
3321noname.309:
3322 addl2 r2,r8
3323 bicl2 #0,r8
3324 cmpl r8,r2
3325 bgequ noname.310
3326 incl r10
3327noname.310:
3328
3329 bicl3 #-65536,4(r6),r3
3330 movzwl 6(r6),r1
3331 bicl2 #-65536,r1
3332 bicl3 #-65536,(r7),r2
3333 movzwl 2(r7),r0
3334 bicl2 #-65536,r0
3335 movl r3,r5
3336 movl r1,r4
3337 mull3 r0,r5,-20(fp)
3338 mull2 r2,r5
3339 mull3 r2,r4,-24(fp)
3340 mull2 r0,r4
3341 addl3 -20(fp),-24(fp),r0
3342 bicl3 #0,r0,-20(fp)
3343 cmpl -20(fp),-24(fp)
3344 bgequ noname.311
3345 addl2 #65536,r4
3346noname.311:
3347 movzwl -18(fp),r0
3348 bicl2 #-65536,r0
3349 addl2 r0,r4
3350 bicl3 #-65536,-20(fp),r0
3351 ashl #16,r0,-24(fp)
3352 addl2 -24(fp),r5
3353 bicl2 #0,r5
3354 cmpl r5,-24(fp)
3355 bgequ noname.312
3356 incl r4
3357noname.312:
3358 movl r5,r1
3359 movl r4,r2
3360 addl2 r1,r9
3361 bicl2 #0,r9
3362 cmpl r9,r1
3363 bgequ noname.313
3364 incl r2
3365noname.313:
3366 addl2 r2,r8
3367 bicl2 #0,r8
3368 cmpl r8,r2
3369 bgequ noname.314
3370 incl r10
3371noname.314:
3372
3373 movl r9,4(r11)
3374
3375 clrl r9
3376
3377 bicl3 #-65536,8(r6),r3
3378 movzwl 10(r6),r1
3379 bicl2 #-65536,r1
3380 bicl3 #-65536,(r7),r2
3381 movzwl 2(r7),r0
3382 bicl2 #-65536,r0
3383 movl r3,r5
3384 movl r1,r4
3385 mull3 r0,r5,-28(fp)
3386 mull2 r2,r5
3387 mull3 r2,r4,-32(fp)
3388 mull2 r0,r4
3389 addl3 -28(fp),-32(fp),r0
3390 bicl3 #0,r0,-28(fp)
3391 cmpl -28(fp),-32(fp)
3392 bgequ noname.315
3393 addl2 #65536,r4
3394noname.315:
3395 movzwl -26(fp),r0
3396 bicl2 #-65536,r0
3397 addl2 r0,r4
3398 bicl3 #-65536,-28(fp),r0
3399 ashl #16,r0,-32(fp)
3400 addl2 -32(fp),r5
3401 bicl2 #0,r5
3402 cmpl r5,-32(fp)
3403 bgequ noname.316
3404 incl r4
3405noname.316:
3406 movl r5,r1
3407 movl r4,r2
3408 addl2 r1,r8
3409 bicl2 #0,r8
3410 cmpl r8,r1
3411 bgequ noname.317
3412 incl r2
3413noname.317:
3414 addl2 r2,r10
3415 bicl2 #0,r10
3416 cmpl r10,r2
3417 bgequ noname.318
3418 incl r9
3419noname.318:
3420
3421 bicl3 #-65536,4(r6),r3
3422 movzwl 6(r6),r1
3423 bicl2 #-65536,r1
3424 bicl3 #-65536,4(r7),r2
3425 movzwl 6(r7),r0
3426 bicl2 #-65536,r0
3427 movl r3,r5
3428 movl r1,r4
3429 mull3 r0,r5,-36(fp)
3430 mull2 r2,r5
3431 mull3 r2,r4,-40(fp)
3432 mull2 r0,r4
3433 addl3 -36(fp),-40(fp),r0
3434 bicl3 #0,r0,-36(fp)
3435 cmpl -36(fp),-40(fp)
3436 bgequ noname.319
3437 addl2 #65536,r4
3438noname.319:
3439 movzwl -34(fp),r0
3440 bicl2 #-65536,r0
3441 addl2 r0,r4
3442 bicl3 #-65536,-36(fp),r0
3443 ashl #16,r0,-40(fp)
3444 addl2 -40(fp),r5
3445 bicl2 #0,r5
3446 cmpl r5,-40(fp)
3447 bgequ noname.320
3448 incl r4
3449noname.320:
3450 movl r5,r1
3451 movl r4,r2
3452 addl2 r1,r8
3453 bicl2 #0,r8
3454 cmpl r8,r1
3455 bgequ noname.321
3456 incl r2
3457noname.321:
3458 addl2 r2,r10
3459 bicl2 #0,r10
3460 cmpl r10,r2
3461 bgequ noname.322
3462 incl r9
3463noname.322:
3464
3465 bicl3 #-65536,(r6),r3
3466 movzwl 2(r6),r1
3467 bicl2 #-65536,r1
3468 bicl3 #-65536,8(r7),r2
3469 movzwl 10(r7),r0
3470 bicl2 #-65536,r0
3471 movl r3,r5
3472 movl r1,r4
3473 mull3 r0,r5,-44(fp)
3474 mull2 r2,r5
3475 mull3 r2,r4,-48(fp)
3476 mull2 r0,r4
3477 addl3 -44(fp),-48(fp),r0
3478 bicl3 #0,r0,-44(fp)
3479 cmpl -44(fp),-48(fp)
3480 bgequ noname.323
3481 addl2 #65536,r4
3482noname.323:
3483 movzwl -42(fp),r0
3484 bicl2 #-65536,r0
3485 addl2 r0,r4
3486 bicl3 #-65536,-44(fp),r0
3487 ashl #16,r0,-48(fp)
3488 addl2 -48(fp),r5
3489 bicl2 #0,r5
3490 cmpl r5,-48(fp)
3491 bgequ noname.324
3492 incl r4
3493noname.324:
3494 movl r5,r1
3495 movl r4,r2
3496 addl2 r1,r8
3497 bicl2 #0,r8
3498 cmpl r8,r1
3499 bgequ noname.325
3500 incl r2
3501noname.325:
3502 addl2 r2,r10
3503 bicl2 #0,r10
3504 cmpl r10,r2
3505 bgequ noname.326
3506 incl r9
3507noname.326:
3508
3509 movl r8,8(r11)
3510
3511 clrl r8
3512
3513 bicl3 #-65536,(r6),r3
3514 movzwl 2(r6),r2
3515 bicl3 #-65536,12(r7),r1
3516 movzwl 14(r7),r0
3517 bicl2 #-65536,r0
3518 movl r3,r4
3519 bicl3 #-65536,r2,-60(fp)
3520 mull3 r0,r4,-52(fp)
3521 mull2 r1,r4
3522 mull3 r1,-60(fp),-56(fp)
3523 mull2 r0,-60(fp)
3524 addl3 -52(fp),-56(fp),r0
3525 bicl3 #0,r0,-52(fp)
3526 cmpl -52(fp),-56(fp)
3527 bgequ noname.327
3528 addl2 #65536,-60(fp)
3529noname.327:
3530 movzwl -50(fp),r0
3531 bicl2 #-65536,r0
3532 addl2 r0,-60(fp)
3533 bicl3 #-65536,-52(fp),r0
3534 ashl #16,r0,-56(fp)
3535 addl2 -56(fp),r4
3536 bicl2 #0,r4
3537 cmpl r4,-56(fp)
3538 bgequ noname.328
3539 incl -60(fp)
3540noname.328:
3541 movl r4,r1
3542 movl -60(fp),r2
3543 addl2 r1,r10
3544 bicl2 #0,r10
3545 cmpl r10,r1
3546 bgequ noname.329
3547 incl r2
3548noname.329:
3549 addl2 r2,r9
3550 bicl2 #0,r9
3551 cmpl r9,r2
3552 bgequ noname.330
3553 incl r8
3554noname.330:
3555
3556 movzwl 6(r6),r2
3557 bicl3 #-65536,8(r7),r3
3558 movzwl 10(r7),r0
3559 bicl2 #-65536,r0
3560 bicl3 #-65536,4(r6),-72(fp)
3561 bicl3 #-65536,r2,-76(fp)
3562 mull3 r0,-72(fp),-64(fp)
3563 mull2 r3,-72(fp)
3564 mull3 r3,-76(fp),-68(fp)
3565 mull2 r0,-76(fp)
3566 addl3 -64(fp),-68(fp),r0
3567 bicl3 #0,r0,-64(fp)
3568 cmpl -64(fp),-68(fp)
3569 bgequ noname.331
3570 addl2 #65536,-76(fp)
3571noname.331:
3572 movzwl -62(fp),r0
3573 bicl2 #-65536,r0
3574 addl2 r0,-76(fp)
3575 bicl3 #-65536,-64(fp),r0
3576 ashl #16,r0,-68(fp)
3577 addl3 -68(fp),-72(fp),r0
3578 bicl3 #0,r0,-72(fp)
3579 cmpl -72(fp),-68(fp)
3580 bgequ noname.332
3581 incl -76(fp)
3582noname.332:
3583 movl -72(fp),r1
3584 movl -76(fp),r2
3585 addl2 r1,r10
3586 bicl2 #0,r10
3587 cmpl r10,r1
3588 bgequ noname.333
3589 incl r2
3590noname.333:
3591 addl2 r2,r9
3592 bicl2 #0,r9
3593 cmpl r9,r2
3594 bgequ noname.334
3595 incl r8
3596noname.334:
3597
3598 bicl3 #-65536,8(r6),r3
3599 movzwl 10(r6),r1
3600 bicl2 #-65536,r1
3601 bicl3 #-65536,4(r7),r2
3602 movzwl 6(r7),r0
3603 bicl2 #-65536,r0
3604 movl r3,r5
3605 movl r1,r4
3606 mull3 r0,r5,-80(fp)
3607 mull2 r2,r5
3608 mull3 r2,r4,-84(fp)
3609 mull2 r0,r4
3610 addl3 -80(fp),-84(fp),r0
3611 bicl3 #0,r0,-80(fp)
3612 cmpl -80(fp),-84(fp)
3613 bgequ noname.335
3614 addl2 #65536,r4
3615noname.335:
3616 movzwl -78(fp),r0
3617 bicl2 #-65536,r0
3618 addl2 r0,r4
3619 bicl3 #-65536,-80(fp),r0
3620 ashl #16,r0,-84(fp)
3621 addl2 -84(fp),r5
3622 bicl2 #0,r5
3623 cmpl r5,-84(fp)
3624 bgequ noname.336
3625 incl r4
3626noname.336:
3627 movl r5,r1
3628 movl r4,r2
3629 addl2 r1,r10
3630 bicl2 #0,r10
3631 cmpl r10,r1
3632 bgequ noname.337
3633 incl r2
3634noname.337:
3635 addl2 r2,r9
3636 bicl2 #0,r9
3637 cmpl r9,r2
3638 bgequ noname.338
3639 incl r8
3640noname.338:
3641
3642 bicl3 #-65536,12(r6),r3
3643 movzwl 14(r6),r1
3644 bicl2 #-65536,r1
3645 bicl3 #-65536,(r7),r2
3646 movzwl 2(r7),r0
3647 bicl2 #-65536,r0
3648 movl r3,r5
3649 movl r1,r4
3650 mull3 r0,r5,-88(fp)
3651 mull2 r2,r5
3652 mull3 r2,r4,-92(fp)
3653 mull2 r0,r4
3654 addl3 -88(fp),-92(fp),r0
3655 bicl3 #0,r0,-88(fp)
3656 cmpl -88(fp),-92(fp)
3657 bgequ noname.339
3658 addl2 #65536,r4
3659noname.339:
3660 movzwl -86(fp),r0
3661 bicl2 #-65536,r0
3662 addl2 r0,r4
3663 bicl3 #-65536,-88(fp),r0
3664 ashl #16,r0,-92(fp)
3665 addl2 -92(fp),r5
3666 bicl2 #0,r5
3667 cmpl r5,-92(fp)
3668 bgequ noname.340
3669 incl r4
3670noname.340:
3671 movl r5,r1
3672 movl r4,r2
3673 addl2 r1,r10
3674 bicl2 #0,r10
3675 cmpl r10,r1
3676 bgequ noname.341
3677 incl r2
3678noname.341:
3679 addl2 r2,r9
3680 bicl2 #0,r9
3681 cmpl r9,r2
3682 bgequ noname.342
3683 incl r8
3684noname.342:
3685
3686 movl r10,12(r11)
3687
3688 clrl r10
3689
3690 bicl3 #-65536,12(r6),r3
3691 movzwl 14(r6),r1
3692 bicl2 #-65536,r1
3693 bicl3 #-65536,4(r7),r2
3694 movzwl 6(r7),r0
3695 bicl2 #-65536,r0
3696 movl r3,r5
3697 movl r1,r4
3698 mull3 r0,r5,-96(fp)
3699 mull2 r2,r5
3700 mull3 r2,r4,-100(fp)
3701 mull2 r0,r4
3702 addl3 -96(fp),-100(fp),r0
3703 bicl3 #0,r0,-96(fp)
3704 cmpl -96(fp),-100(fp)
3705 bgequ noname.343
3706 addl2 #65536,r4
3707noname.343:
3708 movzwl -94(fp),r0
3709 bicl2 #-65536,r0
3710 addl2 r0,r4
3711 bicl3 #-65536,-96(fp),r0
3712 ashl #16,r0,-100(fp)
3713 addl2 -100(fp),r5
3714 bicl2 #0,r5
3715 cmpl r5,-100(fp)
3716 bgequ noname.344
3717 incl r4
3718noname.344:
3719 movl r5,r1
3720 movl r4,r2
3721 addl2 r1,r9
3722 bicl2 #0,r9
3723 cmpl r9,r1
3724 bgequ noname.345
3725 incl r2
3726noname.345:
3727 addl2 r2,r8
3728 bicl2 #0,r8
3729 cmpl r8,r2
3730 bgequ noname.346
3731 incl r10
3732noname.346:
3733
3734 bicl3 #-65536,8(r6),r3
3735 movzwl 10(r6),r1
3736 bicl2 #-65536,r1
3737 bicl3 #-65536,8(r7),r2
3738 movzwl 10(r7),r0
3739 bicl2 #-65536,r0
3740 movl r3,r5
3741 movl r1,r4
3742 mull3 r0,r5,-104(fp)
3743 mull2 r2,r5
3744 mull3 r2,r4,-108(fp)
3745 mull2 r0,r4
3746 addl3 -104(fp),-108(fp),r0
3747 bicl3 #0,r0,-104(fp)
3748 cmpl -104(fp),-108(fp)
3749 bgequ noname.347
3750 addl2 #65536,r4
3751noname.347:
3752 movzwl -102(fp),r0
3753 bicl2 #-65536,r0
3754 addl2 r0,r4
3755 bicl3 #-65536,-104(fp),r0
3756 ashl #16,r0,-108(fp)
3757 addl2 -108(fp),r5
3758 bicl2 #0,r5
3759 cmpl r5,-108(fp)
3760 bgequ noname.348
3761 incl r4
3762noname.348:
3763 movl r5,r1
3764 movl r4,r2
3765 addl2 r1,r9
3766 bicl2 #0,r9
3767 cmpl r9,r1
3768 bgequ noname.349
3769 incl r2
3770noname.349:
3771 addl2 r2,r8
3772 bicl2 #0,r8
3773 cmpl r8,r2
3774 bgequ noname.350
3775 incl r10
3776noname.350:
3777
3778 bicl3 #-65536,4(r6),r3
3779 movzwl 6(r6),r1
3780 bicl2 #-65536,r1
3781 bicl3 #-65536,12(r7),r2
3782 movzwl 14(r7),r0
3783 bicl2 #-65536,r0
3784 movl r3,r5
3785 movl r1,r4
3786 mull3 r0,r5,-112(fp)
3787 mull2 r2,r5
3788 mull3 r2,r4,-116(fp)
3789 mull2 r0,r4
3790 addl3 -112(fp),-116(fp),r0
3791 bicl3 #0,r0,-112(fp)
3792 cmpl -112(fp),-116(fp)
3793 bgequ noname.351
3794 addl2 #65536,r4
3795noname.351:
3796 movzwl -110(fp),r0
3797 bicl2 #-65536,r0
3798 addl2 r0,r4
3799 bicl3 #-65536,-112(fp),r0
3800 ashl #16,r0,-116(fp)
3801 addl2 -116(fp),r5
3802 bicl2 #0,r5
3803 cmpl r5,-116(fp)
3804 bgequ noname.352
3805 incl r4
3806noname.352:
3807 movl r5,r1
3808 movl r4,r2
3809 addl2 r1,r9
3810 bicl2 #0,r9
3811 cmpl r9,r1
3812 bgequ noname.353
3813 incl r2
3814noname.353:
3815 addl2 r2,r8
3816 bicl2 #0,r8
3817 cmpl r8,r2
3818 bgequ noname.354
3819 incl r10
3820noname.354:
3821
3822 movl r9,16(r11)
3823
3824 clrl r9
3825
3826 bicl3 #-65536,8(r6),r3
3827 movzwl 10(r6),r1
3828 bicl2 #-65536,r1
3829 bicl3 #-65536,12(r7),r2
3830 movzwl 14(r7),r0
3831 bicl2 #-65536,r0
3832 movl r3,r5
3833 movl r1,r4
3834 mull3 r0,r5,-120(fp)
3835 mull2 r2,r5
3836 mull3 r2,r4,-124(fp)
3837 mull2 r0,r4
3838 addl3 -120(fp),-124(fp),r0
3839 bicl3 #0,r0,-120(fp)
3840 cmpl -120(fp),-124(fp)
3841 bgequ noname.355
3842 addl2 #65536,r4
3843noname.355:
3844 movzwl -118(fp),r0
3845 bicl2 #-65536,r0
3846 addl2 r0,r4
3847 bicl3 #-65536,-120(fp),r0
3848 ashl #16,r0,-124(fp)
3849 addl2 -124(fp),r5
3850 bicl2 #0,r5
3851 cmpl r5,-124(fp)
3852 bgequ noname.356
3853 incl r4
3854noname.356:
3855 movl r5,r1
3856 movl r4,r2
3857 addl2 r1,r8
3858 bicl2 #0,r8
3859 cmpl r8,r1
3860 bgequ noname.357
3861 incl r2
3862noname.357:
3863 addl2 r2,r10
3864 bicl2 #0,r10
3865 cmpl r10,r2
3866 bgequ noname.358
3867 incl r9
3868noname.358:
3869
3870 movzwl 14(r6),r2
3871 bicl3 #-65536,8(r7),r3
3872 movzwl 10(r7),r0
3873 bicl2 #-65536,r0
3874 bicl3 #-65536,12(r6),-136(fp)
3875 bicl3 #-65536,r2,-140(fp)
3876 mull3 r0,-136(fp),-128(fp)
3877 mull2 r3,-136(fp)
3878 mull3 r3,-140(fp),-132(fp)
3879 mull2 r0,-140(fp)
3880 addl3 -128(fp),-132(fp),r0
3881 bicl3 #0,r0,-128(fp)
3882 cmpl -128(fp),-132(fp)
3883 bgequ noname.359
3884 addl2 #65536,-140(fp)
3885noname.359:
3886 movzwl -126(fp),r0
3887 bicl2 #-65536,r0
3888 addl2 r0,-140(fp)
3889 bicl3 #-65536,-128(fp),r0
3890 ashl #16,r0,-132(fp)
3891 addl3 -132(fp),-136(fp),r0
3892 bicl3 #0,r0,-136(fp)
3893 cmpl -136(fp),-132(fp)
3894 bgequ noname.360
3895 incl -140(fp)
3896noname.360:
3897 movl -136(fp),r1
3898 movl -140(fp),r2
3899 addl2 r1,r8
3900 bicl2 #0,r8
3901 cmpl r8,r1
3902 bgequ noname.361
3903 incl r2
3904noname.361:
3905 addl2 r2,r10
3906 bicl2 #0,r10
3907 cmpl r10,r2
3908 bgequ noname.362
3909 incl r9
3910noname.362:
3911
3912 movl r8,20(r11)
3913
3914 clrl r8
3915
3916 movzwl 14(r6),r2
3917 bicl3 #-65536,12(r7),r3
3918 movzwl 14(r7),r0
3919 bicl2 #-65536,r0
3920 bicl3 #-65536,12(r6),-152(fp)
3921 bicl3 #-65536,r2,-156(fp)
3922 mull3 r0,-152(fp),-144(fp)
3923 mull2 r3,-152(fp)
3924 mull3 r3,-156(fp),-148(fp)
3925 mull2 r0,-156(fp)
3926 addl3 -144(fp),-148(fp),r0
3927 bicl3 #0,r0,-144(fp)
3928 cmpl -144(fp),-148(fp)
3929 bgequ noname.363
3930 addl2 #65536,-156(fp)
3931noname.363:
3932 movzwl -142(fp),r0
3933 bicl2 #-65536,r0
3934 addl2 r0,-156(fp)
3935 bicl3 #-65536,-144(fp),r0
3936 ashl #16,r0,-148(fp)
3937 addl3 -148(fp),-152(fp),r0
3938 bicl3 #0,r0,-152(fp)
3939 cmpl -152(fp),-148(fp)
3940 bgequ noname.364
3941 incl -156(fp)
3942noname.364:
3943 movl -152(fp),r1
3944 movl -156(fp),r2
3945 addl2 r1,r10
3946 bicl2 #0,r10
3947 cmpl r10,r1
3948 bgequ noname.365
3949 incl r2
3950noname.365:
3951 addl2 r2,r9
3952 bicl2 #0,r9
3953 cmpl r9,r2
3954 bgequ noname.366
3955 incl r8
3956noname.366:
3957
3958 movl r10,24(r11)
3959
3960 movl r9,28(r11)
3961
3962 ret
3963
3964
3965
3966;r=4 ;(AP)
3967;a=8 ;(AP)
3968;b=12 ;(AP)
3969;n=16 ;(AP) n by value (input)
3970
3971 .psect code,nowrt
3972
3973.entry BN_SQR_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9>
3974 movab -444(sp),sp
3975
3976 clrq r8
3977
3978 clrl r7
3979
3980 movl 8(ap),r4
3981 movl (r4),r3
3982 bicl3 #-65536,r3,-4(fp)
3983 extzv #16,#16,r3,r0
3984 bicl3 #-65536,r0,r3
3985 movl -4(fp),r0
3986 mull3 r0,r3,-8(fp)
3987 mull3 r0,r0,-4(fp)
3988 mull2 r3,r3
3989 bicl3 #32767,-8(fp),r0
3990 extzv #15,#17,r0,r0
3991 addl2 r0,r3
3992 bicl3 #-65536,-8(fp),r0
3993 ashl #17,r0,-8(fp)
3994 addl3 -4(fp),-8(fp),r0
3995 bicl3 #0,r0,-4(fp)
3996 cmpl -4(fp),-8(fp)
3997 bgequ noname.369
3998 incl r3
3999noname.369:
4000 movl -4(fp),r1
4001 movl r3,r2
4002 addl2 r1,r9
4003 bicl2 #0,r9
4004 cmpl r9,r1
4005 bgequ noname.370
4006 incl r2
4007noname.370:
4008 addl2 r2,r8
4009 bicl2 #0,r8
4010 cmpl r8,r2
4011 bgequ noname.371
4012 incl r7
4013noname.371:
4014
4015 movl r9,@4(ap)
4016
4017 clrl r9
4018
4019 movzwl 6(r4),r2
4020 bicl3 #-65536,(r4),r3
4021 movzwl 2(r4),r0
4022 bicl2 #-65536,r0
4023 bicl3 #-65536,4(r4),-20(fp)
4024 bicl3 #-65536,r2,-24(fp)
4025 mull3 r0,-20(fp),-12(fp)
4026 mull2 r3,-20(fp)
4027 mull3 r3,-24(fp),-16(fp)
4028 mull2 r0,-24(fp)
4029 addl3 -12(fp),-16(fp),r0
4030 bicl3 #0,r0,-12(fp)
4031 cmpl -12(fp),-16(fp)
4032 bgequ noname.372
4033 addl2 #65536,-24(fp)
4034noname.372:
4035 movzwl -10(fp),r0
4036 bicl2 #-65536,r0
4037 addl2 r0,-24(fp)
4038 bicl3 #-65536,-12(fp),r0
4039 ashl #16,r0,-16(fp)
4040 addl3 -16(fp),-20(fp),r0
4041 bicl3 #0,r0,-20(fp)
4042 cmpl -20(fp),-16(fp)
4043 bgequ noname.373
4044 incl -24(fp)
4045noname.373:
4046 movl -20(fp),r3
4047 movl -24(fp),r2
4048 bbc #31,r2,noname.374
4049 incl r9
4050noname.374:
4051 addl2 r2,r2
4052 bicl2 #0,r2
4053 bbc #31,r3,noname.375
4054 incl r2
4055noname.375:
4056 addl2 r3,r3
4057 bicl2 #0,r3
4058 addl2 r3,r8
4059 bicl2 #0,r8
4060 cmpl r8,r3
4061 bgequ noname.376
4062 incl r2
4063 bicl3 #0,r2,r0
4064 bneq noname.376
4065 incl r9
4066noname.376:
4067 addl2 r2,r7
4068 bicl2 #0,r7
4069 cmpl r7,r2
4070 bgequ noname.377
4071 incl r9
4072noname.377:
4073
4074 movl 4(ap),r0
4075 movl r8,4(r0)
4076
4077 clrl r8
4078
4079 movl 8(ap),r4
4080 movl 4(r4),r3
4081 bicl3 #-65536,r3,-28(fp)
4082 extzv #16,#16,r3,r0
4083 bicl3 #-65536,r0,r3
4084 movl -28(fp),r0
4085 mull3 r0,r3,-32(fp)
4086 mull3 r0,r0,-28(fp)
4087 mull2 r3,r3
4088 bicl3 #32767,-32(fp),r0
4089 extzv #15,#17,r0,r0
4090 addl2 r0,r3
4091 bicl3 #-65536,-32(fp),r0
4092 ashl #17,r0,-32(fp)
4093 addl3 -28(fp),-32(fp),r0
4094 bicl3 #0,r0,-28(fp)
4095 cmpl -28(fp),-32(fp)
4096 bgequ noname.378
4097 incl r3
4098noname.378:
4099 movl -28(fp),r1
4100 movl r3,r2
4101 addl2 r1,r7
4102 bicl2 #0,r7
4103 cmpl r7,r1
4104 bgequ noname.379
4105 incl r2
4106noname.379:
4107 addl2 r2,r9
4108 bicl2 #0,r9
4109 cmpl r9,r2
4110 bgequ noname.380
4111 incl r8
4112noname.380:
4113
4114 movzwl 10(r4),r2
4115 bicl3 #-65536,(r4),r3
4116 movzwl 2(r4),r0
4117 bicl2 #-65536,r0
4118 bicl3 #-65536,8(r4),-44(fp)
4119 bicl3 #-65536,r2,-48(fp)
4120 mull3 r0,-44(fp),-36(fp)
4121 mull2 r3,-44(fp)
4122 mull3 r3,-48(fp),-40(fp)
4123 mull2 r0,-48(fp)
4124 addl3 -36(fp),-40(fp),r0
4125 bicl3 #0,r0,-36(fp)
4126 cmpl -36(fp),-40(fp)
4127 bgequ noname.381
4128 addl2 #65536,-48(fp)
4129noname.381:
4130 movzwl -34(fp),r0
4131 bicl2 #-65536,r0
4132 addl2 r0,-48(fp)
4133 bicl3 #-65536,-36(fp),r0
4134 ashl #16,r0,-40(fp)
4135 addl3 -40(fp),-44(fp),r0
4136 bicl3 #0,r0,-44(fp)
4137 cmpl -44(fp),-40(fp)
4138 bgequ noname.382
4139 incl -48(fp)
4140noname.382:
4141 movl -44(fp),r3
4142 movl -48(fp),r2
4143 bbc #31,r2,noname.383
4144 incl r8
4145noname.383:
4146 addl2 r2,r2
4147 bicl2 #0,r2
4148 bbc #31,r3,noname.384
4149 incl r2
4150noname.384:
4151 addl2 r3,r3
4152 bicl2 #0,r3
4153 addl2 r3,r7
4154 bicl2 #0,r7
4155 cmpl r7,r3
4156 bgequ noname.385
4157 incl r2
4158 bicl3 #0,r2,r0
4159 bneq noname.385
4160 incl r8
4161noname.385:
4162 addl2 r2,r9
4163 bicl2 #0,r9
4164 cmpl r9,r2
4165 bgequ noname.386
4166 incl r8
4167noname.386:
4168
4169 movl 4(ap),r0
4170 movl r7,8(r0)
4171
4172 clrl r7
4173
4174 movl 8(ap),r0
4175 movzwl 14(r0),r2
4176 bicl3 #-65536,(r0),r3
4177 movzwl 2(r0),r1
4178 bicl2 #-65536,r1
4179 bicl3 #-65536,12(r0),-60(fp)
4180 bicl3 #-65536,r2,-64(fp)
4181 mull3 r1,-60(fp),-52(fp)
4182 mull2 r3,-60(fp)
4183 mull3 r3,-64(fp),-56(fp)
4184 mull2 r1,-64(fp)
4185 addl3 -52(fp),-56(fp),r0
4186 bicl3 #0,r0,-52(fp)
4187 cmpl -52(fp),-56(fp)
4188 bgequ noname.387
4189 addl2 #65536,-64(fp)
4190noname.387:
4191 movzwl -50(fp),r0
4192 bicl2 #-65536,r0
4193 addl2 r0,-64(fp)
4194 bicl3 #-65536,-52(fp),r0
4195 ashl #16,r0,-56(fp)
4196 addl3 -56(fp),-60(fp),r0
4197 bicl3 #0,r0,-60(fp)
4198 cmpl -60(fp),-56(fp)
4199 bgequ noname.388
4200 incl -64(fp)
4201noname.388:
4202 movl -60(fp),r3
4203 movl -64(fp),r2
4204 bbc #31,r2,noname.389
4205 incl r7
4206noname.389:
4207 addl2 r2,r2
4208 bicl2 #0,r2
4209 bbc #31,r3,noname.390
4210 incl r2
4211noname.390:
4212 addl2 r3,r3
4213 bicl2 #0,r3
4214 addl2 r3,r9
4215 bicl2 #0,r9
4216 cmpl r9,r3
4217 bgequ noname.391
4218 incl r2
4219 bicl3 #0,r2,r0
4220 bneq noname.391
4221 incl r7
4222noname.391:
4223 addl2 r2,r8
4224 bicl2 #0,r8
4225 cmpl r8,r2
4226 bgequ noname.392
4227 incl r7
4228noname.392:
4229
4230 movl 8(ap),r0
4231 movzwl 10(r0),r2
4232 bicl3 #-65536,4(r0),r3
4233 movzwl 6(r0),r1
4234 bicl2 #-65536,r1
4235 bicl3 #-65536,8(r0),-76(fp)
4236 bicl3 #-65536,r2,-80(fp)
4237 mull3 r1,-76(fp),-68(fp)
4238 mull2 r3,-76(fp)
4239 mull3 r3,-80(fp),-72(fp)
4240 mull2 r1,-80(fp)
4241 addl3 -68(fp),-72(fp),r0
4242 bicl3 #0,r0,-68(fp)
4243 cmpl -68(fp),-72(fp)
4244 bgequ noname.393
4245 addl2 #65536,-80(fp)
4246noname.393:
4247 movzwl -66(fp),r0
4248 bicl2 #-65536,r0
4249 addl2 r0,-80(fp)
4250 bicl3 #-65536,-68(fp),r0
4251 ashl #16,r0,-72(fp)
4252 addl3 -72(fp),-76(fp),r0
4253 bicl3 #0,r0,-76(fp)
4254 cmpl -76(fp),-72(fp)
4255 bgequ noname.394
4256 incl -80(fp)
4257noname.394:
4258 movl -76(fp),r3
4259 movl -80(fp),r2
4260 bbc #31,r2,noname.395
4261 incl r7
4262noname.395:
4263 addl2 r2,r2
4264 bicl2 #0,r2
4265 bbc #31,r3,noname.396
4266 incl r2
4267noname.396:
4268 addl2 r3,r3
4269 bicl2 #0,r3
4270 addl2 r3,r9
4271 bicl2 #0,r9
4272 cmpl r9,r3
4273 bgequ noname.397
4274 incl r2
4275 bicl3 #0,r2,r0
4276 bneq noname.397
4277 incl r7
4278noname.397:
4279 addl2 r2,r8
4280 bicl2 #0,r8
4281 cmpl r8,r2
4282 bgequ noname.398
4283 incl r7
4284noname.398:
4285
4286 movl 4(ap),r0
4287 movl r9,12(r0)
4288
4289 clrl r9
4290
4291 movl 8(ap),r2
4292 movl 8(r2),r4
4293 bicl3 #-65536,r4,-84(fp)
4294 extzv #16,#16,r4,r0
4295 bicl3 #-65536,r0,r4
4296 movl -84(fp),r0
4297 mull3 r0,r4,-88(fp)
4298 mull3 r0,r0,-84(fp)
4299 mull2 r4,r4
4300 bicl3 #32767,-88(fp),r0
4301 extzv #15,#17,r0,r0
4302 addl2 r0,r4
4303 bicl3 #-65536,-88(fp),r0
4304 ashl #17,r0,-88(fp)
4305 addl3 -84(fp),-88(fp),r0
4306 bicl3 #0,r0,-84(fp)
4307 cmpl -84(fp),-88(fp)
4308 bgequ noname.399
4309 incl r4
4310noname.399:
4311 movl -84(fp),r1
4312 movl r4,r3
4313 addl2 r1,r8
4314 bicl2 #0,r8
4315 cmpl r8,r1
4316 bgequ noname.400
4317 incl r3
4318noname.400:
4319 addl2 r3,r7
4320 bicl2 #0,r7
4321 cmpl r7,r3
4322 bgequ noname.401
4323 incl r9
4324noname.401:
4325
4326 movzwl 14(r2),r3
4327 bicl3 #-65536,4(r2),r1
4328 movzwl 6(r2),r0
4329 bicl2 #-65536,r0
4330 bicl3 #-65536,12(r2),-100(fp)
4331 bicl3 #-65536,r3,-104(fp)
4332 mull3 r0,-100(fp),-92(fp)
4333 mull2 r1,-100(fp)
4334 mull3 r1,-104(fp),-96(fp)
4335 mull2 r0,-104(fp)
4336 addl3 -92(fp),-96(fp),r0
4337 bicl3 #0,r0,-92(fp)
4338 cmpl -92(fp),-96(fp)
4339 bgequ noname.402
4340 addl2 #65536,-104(fp)
4341noname.402:
4342 movzwl -90(fp),r0
4343 bicl2 #-65536,r0
4344 addl2 r0,-104(fp)
4345 bicl3 #-65536,-92(fp),r0
4346 ashl #16,r0,-96(fp)
4347 addl3 -96(fp),-100(fp),r0
4348 bicl3 #0,r0,-100(fp)
4349 cmpl -100(fp),-96(fp)
4350 bgequ noname.403
4351 incl -104(fp)
4352noname.403:
4353 movl -100(fp),r3
4354 movl -104(fp),r2
4355 bbc #31,r2,noname.404
4356 incl r9
4357noname.404:
4358 addl2 r2,r2
4359 bicl2 #0,r2
4360 bbc #31,r3,noname.405
4361 incl r2
4362noname.405:
4363 addl2 r3,r3
4364 bicl2 #0,r3
4365 addl2 r3,r8
4366 bicl2 #0,r8
4367 cmpl r8,r3
4368 bgequ noname.406
4369 incl r2
4370 bicl3 #0,r2,r0
4371 bneq noname.406
4372 incl r9
4373noname.406:
4374 addl2 r2,r7
4375 bicl2 #0,r7
4376 cmpl r7,r2
4377 bgequ noname.407
4378 incl r9
4379noname.407:
4380
4381 movl 8(ap),r0
4382 movzwl 18(r0),r2
4383 bicl3 #-65536,(r0),r3
4384 movzwl 2(r0),r1
4385 bicl2 #-65536,r1
4386 bicl3 #-65536,16(r0),-116(fp)
4387 bicl3 #-65536,r2,-120(fp)
4388 mull3 r1,-116(fp),-108(fp)
4389 mull2 r3,-116(fp)
4390 mull3 r3,-120(fp),-112(fp)
4391 mull2 r1,-120(fp)
4392 addl3 -108(fp),-112(fp),r0
4393 bicl3 #0,r0,-108(fp)
4394 cmpl -108(fp),-112(fp)
4395 bgequ noname.408
4396 addl2 #65536,-120(fp)
4397noname.408:
4398 movzwl -106(fp),r0
4399 bicl2 #-65536,r0
4400 addl2 r0,-120(fp)
4401 bicl3 #-65536,-108(fp),r0
4402 ashl #16,r0,-112(fp)
4403 addl3 -112(fp),-116(fp),r0
4404 bicl3 #0,r0,-116(fp)
4405 cmpl -116(fp),-112(fp)
4406 bgequ noname.409
4407 incl -120(fp)
4408noname.409:
4409 movl -116(fp),r3
4410 movl -120(fp),r2
4411 bbc #31,r2,noname.410
4412 incl r9
4413noname.410:
4414 addl2 r2,r2
4415 bicl2 #0,r2
4416 bbc #31,r3,noname.411
4417 incl r2
4418noname.411:
4419 addl2 r3,r3
4420 bicl2 #0,r3
4421 addl2 r3,r8
4422 bicl2 #0,r8
4423 cmpl r8,r3
4424 bgequ noname.412
4425 incl r2
4426 bicl3 #0,r2,r0
4427 bneq noname.412
4428 incl r9
4429noname.412:
4430 addl2 r2,r7
4431 bicl2 #0,r7
4432 cmpl r7,r2
4433 bgequ noname.413
4434 incl r9
4435noname.413:
4436
4437 movl 4(ap),r0
4438 movl r8,16(r0)
4439
4440 clrl r8
4441
4442 movl 8(ap),r0
4443 movzwl 22(r0),r2
4444 bicl3 #-65536,(r0),r3
4445 movzwl 2(r0),r1
4446 bicl2 #-65536,r1
4447 bicl3 #-65536,20(r0),-132(fp)
4448 bicl3 #-65536,r2,-136(fp)
4449 mull3 r1,-132(fp),-124(fp)
4450 mull2 r3,-132(fp)
4451 mull3 r3,-136(fp),-128(fp)
4452 mull2 r1,-136(fp)
4453 addl3 -124(fp),-128(fp),r0
4454 bicl3 #0,r0,-124(fp)
4455 cmpl -124(fp),-128(fp)
4456 bgequ noname.414
4457 addl2 #65536,-136(fp)
4458noname.414:
4459 movzwl -122(fp),r0
4460 bicl2 #-65536,r0
4461 addl2 r0,-136(fp)
4462 bicl3 #-65536,-124(fp),r0
4463 ashl #16,r0,-128(fp)
4464 addl3 -128(fp),-132(fp),r0
4465 bicl3 #0,r0,-132(fp)
4466 cmpl -132(fp),-128(fp)
4467 bgequ noname.415
4468 incl -136(fp)
4469noname.415:
4470 movl -132(fp),r3
4471 movl -136(fp),r2
4472 bbc #31,r2,noname.416
4473 incl r8
4474noname.416:
4475 addl2 r2,r2
4476 bicl2 #0,r2
4477 bbc #31,r3,noname.417
4478 incl r2
4479noname.417:
4480 addl2 r3,r3
4481 bicl2 #0,r3
4482 addl2 r3,r7
4483 bicl2 #0,r7
4484 cmpl r7,r3
4485 bgequ noname.418
4486 incl r2
4487 bicl3 #0,r2,r0
4488 bneq noname.418
4489 incl r8
4490noname.418:
4491 addl2 r2,r9
4492 bicl2 #0,r9
4493 cmpl r9,r2
4494 bgequ noname.419
4495 incl r8
4496noname.419:
4497
4498 movl 8(ap),r0
4499 movzwl 18(r0),r2
4500 bicl3 #-65536,4(r0),r3
4501 movzwl 6(r0),r1
4502 bicl2 #-65536,r1
4503 bicl3 #-65536,16(r0),-148(fp)
4504 bicl3 #-65536,r2,-152(fp)
4505 mull3 r1,-148(fp),-140(fp)
4506 mull2 r3,-148(fp)
4507 mull3 r3,-152(fp),-144(fp)
4508 mull2 r1,-152(fp)
4509 addl3 -140(fp),-144(fp),r0
4510 bicl3 #0,r0,-140(fp)
4511 cmpl -140(fp),-144(fp)
4512 bgequ noname.420
4513 addl2 #65536,-152(fp)
4514noname.420:
4515 movzwl -138(fp),r0
4516 bicl2 #-65536,r0
4517 addl2 r0,-152(fp)
4518 bicl3 #-65536,-140(fp),r0
4519 ashl #16,r0,-144(fp)
4520 addl3 -144(fp),-148(fp),r0
4521 bicl3 #0,r0,-148(fp)
4522 cmpl -148(fp),-144(fp)
4523 bgequ noname.421
4524 incl -152(fp)
4525noname.421:
4526 movl -148(fp),r3
4527 movl -152(fp),r2
4528 bbc #31,r2,noname.422
4529 incl r8
4530noname.422:
4531 addl2 r2,r2
4532 bicl2 #0,r2
4533 bbc #31,r3,noname.423
4534 incl r2
4535noname.423:
4536 addl2 r3,r3
4537 bicl2 #0,r3
4538 addl2 r3,r7
4539 bicl2 #0,r7
4540 cmpl r7,r3
4541 bgequ noname.424
4542 incl r2
4543 bicl3 #0,r2,r0
4544 bneq noname.424
4545 incl r8
4546noname.424:
4547 addl2 r2,r9
4548 bicl2 #0,r9
4549 cmpl r9,r2
4550 bgequ noname.425
4551 incl r8
4552noname.425:
4553
4554 movl 8(ap),r0
4555 movzwl 14(r0),r2
4556 bicl3 #-65536,8(r0),r3
4557 movzwl 10(r0),r1
4558 bicl2 #-65536,r1
4559 bicl3 #-65536,12(r0),-164(fp)
4560 bicl3 #-65536,r2,-168(fp)
4561 mull3 r1,-164(fp),-156(fp)
4562 mull2 r3,-164(fp)
4563 mull3 r3,-168(fp),-160(fp)
4564 mull2 r1,-168(fp)
4565 addl3 -156(fp),-160(fp),r0
4566 bicl3 #0,r0,-156(fp)
4567 cmpl -156(fp),-160(fp)
4568 bgequ noname.426
4569 addl2 #65536,-168(fp)
4570noname.426:
4571 movzwl -154(fp),r0
4572 bicl2 #-65536,r0
4573 addl2 r0,-168(fp)
4574 bicl3 #-65536,-156(fp),r0
4575 ashl #16,r0,-160(fp)
4576 addl3 -160(fp),-164(fp),r0
4577 bicl3 #0,r0,-164(fp)
4578 cmpl -164(fp),-160(fp)
4579 bgequ noname.427
4580 incl -168(fp)
4581noname.427:
4582 movl -164(fp),r3
4583 movl -168(fp),r2
4584 bbc #31,r2,noname.428
4585 incl r8
4586noname.428:
4587 addl2 r2,r2
4588 bicl2 #0,r2
4589 bbc #31,r3,noname.429
4590 incl r2
4591noname.429:
4592 addl2 r3,r3
4593 bicl2 #0,r3
4594 addl2 r3,r7
4595 bicl2 #0,r7
4596 cmpl r7,r3
4597 bgequ noname.430
4598 incl r2
4599 bicl3 #0,r2,r0
4600 bneq noname.430
4601 incl r8
4602noname.430:
4603 addl2 r2,r9
4604 bicl2 #0,r9
4605 cmpl r9,r2
4606 bgequ noname.431
4607 incl r8
4608noname.431:
4609
4610 movl 4(ap),r0
4611 movl r7,20(r0)
4612
4613 clrl r7
4614
4615 movl 8(ap),r2
4616 movl 12(r2),r4
4617 bicl3 #-65536,r4,-172(fp)
4618 extzv #16,#16,r4,r0
4619 bicl3 #-65536,r0,r4
4620 movl -172(fp),r0
4621 mull3 r0,r4,-176(fp)
4622 mull3 r0,r0,-172(fp)
4623 mull2 r4,r4
4624 bicl3 #32767,-176(fp),r0
4625 extzv #15,#17,r0,r0
4626 addl2 r0,r4
4627 bicl3 #-65536,-176(fp),r0
4628 ashl #17,r0,-176(fp)
4629 addl3 -172(fp),-176(fp),r0
4630 bicl3 #0,r0,-172(fp)
4631 cmpl -172(fp),-176(fp)
4632 bgequ noname.432
4633 incl r4
4634noname.432:
4635 movl -172(fp),r1
4636 movl r4,r3
4637 addl2 r1,r9
4638 bicl2 #0,r9
4639 cmpl r9,r1
4640 bgequ noname.433
4641 incl r3
4642noname.433:
4643 addl2 r3,r8
4644 bicl2 #0,r8
4645 cmpl r8,r3
4646 bgequ noname.434
4647 incl r7
4648noname.434:
4649
4650 movzwl 18(r2),r3
4651 bicl3 #-65536,8(r2),r1
4652 movzwl 10(r2),r0
4653 bicl2 #-65536,r0
4654 bicl3 #-65536,16(r2),-188(fp)
4655 bicl3 #-65536,r3,-192(fp)
4656 mull3 r0,-188(fp),-180(fp)
4657 mull2 r1,-188(fp)
4658 mull3 r1,-192(fp),-184(fp)
4659 mull2 r0,-192(fp)
4660 addl3 -180(fp),-184(fp),r0
4661 bicl3 #0,r0,-180(fp)
4662 cmpl -180(fp),-184(fp)
4663 bgequ noname.435
4664 addl2 #65536,-192(fp)
4665noname.435:
4666 movzwl -178(fp),r0
4667 bicl2 #-65536,r0
4668 addl2 r0,-192(fp)
4669 bicl3 #-65536,-180(fp),r0
4670 ashl #16,r0,-184(fp)
4671 addl3 -184(fp),-188(fp),r0
4672 bicl3 #0,r0,-188(fp)
4673 cmpl -188(fp),-184(fp)
4674 bgequ noname.436
4675 incl -192(fp)
4676noname.436:
4677 movl -188(fp),r3
4678 movl -192(fp),r2
4679 bbc #31,r2,noname.437
4680 incl r7
4681noname.437:
4682 addl2 r2,r2
4683 bicl2 #0,r2
4684 bbc #31,r3,noname.438
4685 incl r2
4686noname.438:
4687 addl2 r3,r3
4688 bicl2 #0,r3
4689 addl2 r3,r9
4690 bicl2 #0,r9
4691 cmpl r9,r3
4692 bgequ noname.439
4693 incl r2
4694 bicl3 #0,r2,r0
4695 bneq noname.439
4696 incl r7
4697noname.439:
4698 addl2 r2,r8
4699 bicl2 #0,r8
4700 cmpl r8,r2
4701 bgequ noname.440
4702 incl r7
4703noname.440:
4704
4705 movl 8(ap),r0
4706 movzwl 22(r0),r2
4707 bicl3 #-65536,4(r0),r3
4708 movzwl 6(r0),r1
4709 bicl2 #-65536,r1
4710 bicl3 #-65536,20(r0),-204(fp)
4711 bicl3 #-65536,r2,-208(fp)
4712 mull3 r1,-204(fp),-196(fp)
4713 mull2 r3,-204(fp)
4714 mull3 r3,-208(fp),-200(fp)
4715 mull2 r1,-208(fp)
4716 addl3 -196(fp),-200(fp),r0
4717 bicl3 #0,r0,-196(fp)
4718 cmpl -196(fp),-200(fp)
4719 bgequ noname.441
4720 addl2 #65536,-208(fp)
4721noname.441:
4722 movzwl -194(fp),r0
4723 bicl2 #-65536,r0
4724 addl2 r0,-208(fp)
4725 bicl3 #-65536,-196(fp),r0
4726 ashl #16,r0,-200(fp)
4727 addl3 -200(fp),-204(fp),r0
4728 bicl3 #0,r0,-204(fp)
4729 cmpl -204(fp),-200(fp)
4730 bgequ noname.442
4731 incl -208(fp)
4732noname.442:
4733 movl -204(fp),r3
4734 movl -208(fp),r2
4735 bbc #31,r2,noname.443
4736 incl r7
4737noname.443:
4738 addl2 r2,r2
4739 bicl2 #0,r2
4740 bbc #31,r3,noname.444
4741 incl r2
4742noname.444:
4743 addl2 r3,r3
4744 bicl2 #0,r3
4745 addl2 r3,r9
4746 bicl2 #0,r9
4747 cmpl r9,r3
4748 bgequ noname.445
4749 incl r2
4750 bicl3 #0,r2,r0
4751 bneq noname.445
4752 incl r7
4753noname.445:
4754 addl2 r2,r8
4755 bicl2 #0,r8
4756 cmpl r8,r2
4757 bgequ noname.446
4758 incl r7
4759noname.446:
4760
4761 movl 8(ap),r0
4762 movzwl 26(r0),r2
4763 bicl3 #-65536,(r0),r3
4764 movzwl 2(r0),r1
4765 bicl2 #-65536,r1
4766 bicl3 #-65536,24(r0),-220(fp)
4767 bicl3 #-65536,r2,-224(fp)
4768 mull3 r1,-220(fp),-212(fp)
4769 mull2 r3,-220(fp)
4770 mull3 r3,-224(fp),-216(fp)
4771 mull2 r1,-224(fp)
4772 addl3 -212(fp),-216(fp),r0
4773 bicl3 #0,r0,-212(fp)
4774 cmpl -212(fp),-216(fp)
4775 bgequ noname.447
4776 addl2 #65536,-224(fp)
4777noname.447:
4778 movzwl -210(fp),r0
4779 bicl2 #-65536,r0
4780 addl2 r0,-224(fp)
4781 bicl3 #-65536,-212(fp),r0
4782 ashl #16,r0,-216(fp)
4783 addl3 -216(fp),-220(fp),r0
4784 bicl3 #0,r0,-220(fp)
4785 cmpl -220(fp),-216(fp)
4786 bgequ noname.448
4787 incl -224(fp)
4788noname.448:
4789 movl -220(fp),r3
4790 movl -224(fp),r2
4791 bbc #31,r2,noname.449
4792 incl r7
4793noname.449:
4794 addl2 r2,r2
4795 bicl2 #0,r2
4796 bbc #31,r3,noname.450
4797 incl r2
4798noname.450:
4799 addl2 r3,r3
4800 bicl2 #0,r3
4801 addl2 r3,r9
4802 bicl2 #0,r9
4803 cmpl r9,r3
4804 bgequ noname.451
4805 incl r2
4806 bicl3 #0,r2,r0
4807 bneq noname.451
4808 incl r7
4809noname.451:
4810 addl2 r2,r8
4811 bicl2 #0,r8
4812 cmpl r8,r2
4813 bgequ noname.452
4814 incl r7
4815noname.452:
4816
4817 movl 4(ap),r0
4818 movl r9,24(r0)
4819
4820 clrl r9
4821
4822 movl 8(ap),r0
4823 movzwl 30(r0),r2
4824 bicl3 #-65536,(r0),r3
4825 movzwl 2(r0),r1
4826 bicl2 #-65536,r1
4827 bicl3 #-65536,28(r0),-236(fp)
4828 bicl3 #-65536,r2,-240(fp)
4829 mull3 r1,-236(fp),-228(fp)
4830 mull2 r3,-236(fp)
4831 mull3 r3,-240(fp),-232(fp)
4832 mull2 r1,-240(fp)
4833 addl3 -228(fp),-232(fp),r0
4834 bicl3 #0,r0,-228(fp)
4835 cmpl -228(fp),-232(fp)
4836 bgequ noname.453
4837 addl2 #65536,-240(fp)
4838noname.453:
4839 movzwl -226(fp),r0
4840 bicl2 #-65536,r0
4841 addl2 r0,-240(fp)
4842 bicl3 #-65536,-228(fp),r0
4843 ashl #16,r0,-232(fp)
4844 addl3 -232(fp),-236(fp),r0
4845 bicl3 #0,r0,-236(fp)
4846 cmpl -236(fp),-232(fp)
4847 bgequ noname.454
4848 incl -240(fp)
4849noname.454:
4850 movl -236(fp),r3
4851 movl -240(fp),r2
4852 bbc #31,r2,noname.455
4853 incl r9
4854noname.455:
4855 addl2 r2,r2
4856 bicl2 #0,r2
4857 bbc #31,r3,noname.456
4858 incl r2
4859noname.456:
4860 addl2 r3,r3
4861 bicl2 #0,r3
4862 addl2 r3,r8
4863 bicl2 #0,r8
4864 cmpl r8,r3
4865 bgequ noname.457
4866 incl r2
4867 bicl3 #0,r2,r0
4868 bneq noname.457
4869 incl r9
4870noname.457:
4871 addl2 r2,r7
4872 bicl2 #0,r7
4873 cmpl r7,r2
4874 bgequ noname.458
4875 incl r9
4876noname.458:
4877
4878 movl 8(ap),r0
4879 movzwl 26(r0),r2
4880 bicl3 #-65536,4(r0),r3
4881 movzwl 6(r0),r1
4882 bicl2 #-65536,r1
4883 bicl3 #-65536,24(r0),-252(fp)
4884 bicl3 #-65536,r2,-256(fp)
4885 mull3 r1,-252(fp),-244(fp)
4886 mull2 r3,-252(fp)
4887 mull3 r3,-256(fp),-248(fp)
4888 mull2 r1,-256(fp)
4889 addl3 -244(fp),-248(fp),r0
4890 bicl3 #0,r0,-244(fp)
4891 cmpl -244(fp),-248(fp)
4892 bgequ noname.459
4893 addl2 #65536,-256(fp)
4894noname.459:
4895 movzwl -242(fp),r0
4896 bicl2 #-65536,r0
4897 addl2 r0,-256(fp)
4898 bicl3 #-65536,-244(fp),r0
4899 ashl #16,r0,-248(fp)
4900 addl3 -248(fp),-252(fp),r0
4901 bicl3 #0,r0,-252(fp)
4902 cmpl -252(fp),-248(fp)
4903 bgequ noname.460
4904 incl -256(fp)
4905noname.460:
4906 movl -252(fp),r3
4907 movl -256(fp),r2
4908 bbc #31,r2,noname.461
4909 incl r9
4910noname.461:
4911 addl2 r2,r2
4912 bicl2 #0,r2
4913 bbc #31,r3,noname.462
4914 incl r2
4915noname.462:
4916 addl2 r3,r3
4917 bicl2 #0,r3
4918 addl2 r3,r8
4919 bicl2 #0,r8
4920 cmpl r8,r3
4921 bgequ noname.463
4922 incl r2
4923 bicl3 #0,r2,r0
4924 bneq noname.463
4925 incl r9
4926noname.463:
4927 addl2 r2,r7
4928 bicl2 #0,r7
4929 cmpl r7,r2
4930 bgequ noname.464
4931 incl r9
4932noname.464:
4933
4934 movl 8(ap),r0
4935 movzwl 22(r0),r2
4936 bicl3 #-65536,8(r0),r3
4937 movzwl 10(r0),r1
4938 bicl2 #-65536,r1
4939 bicl3 #-65536,20(r0),-268(fp)
4940 bicl3 #-65536,r2,-272(fp)
4941 mull3 r1,-268(fp),-260(fp)
4942 mull2 r3,-268(fp)
4943 mull3 r3,-272(fp),-264(fp)
4944 mull2 r1,-272(fp)
4945 addl3 -260(fp),-264(fp),r0
4946 bicl3 #0,r0,-260(fp)
4947 cmpl -260(fp),-264(fp)
4948 bgequ noname.465
4949 addl2 #65536,-272(fp)
4950noname.465:
4951 movzwl -258(fp),r0
4952 bicl2 #-65536,r0
4953 addl2 r0,-272(fp)
4954 bicl3 #-65536,-260(fp),r0
4955 ashl #16,r0,-264(fp)
4956 addl3 -264(fp),-268(fp),r0
4957 bicl3 #0,r0,-268(fp)
4958 cmpl -268(fp),-264(fp)
4959 bgequ noname.466
4960 incl -272(fp)
4961noname.466:
4962 movl -268(fp),r3
4963 movl -272(fp),r2
4964 bbc #31,r2,noname.467
4965 incl r9
4966noname.467:
4967 addl2 r2,r2
4968 bicl2 #0,r2
4969 bbc #31,r3,noname.468
4970 incl r2
4971noname.468:
4972 addl2 r3,r3
4973 bicl2 #0,r3
4974 addl2 r3,r8
4975 bicl2 #0,r8
4976 cmpl r8,r3
4977 bgequ noname.469
4978 incl r2
4979 bicl3 #0,r2,r0
4980 bneq noname.469
4981 incl r9
4982noname.469:
4983 addl2 r2,r7
4984 bicl2 #0,r7
4985 cmpl r7,r2
4986 bgequ noname.470
4987 incl r9
4988noname.470:
4989
4990 movl 8(ap),r0
4991 movzwl 18(r0),r2
4992 bicl3 #-65536,12(r0),r3
4993 movzwl 14(r0),r1
4994 bicl2 #-65536,r1
4995 bicl3 #-65536,16(r0),-284(fp)
4996 bicl3 #-65536,r2,-288(fp)
4997 mull3 r1,-284(fp),-276(fp)
4998 mull2 r3,-284(fp)
4999 mull3 r3,-288(fp),-280(fp)
5000 mull2 r1,-288(fp)
5001 addl3 -276(fp),-280(fp),r0
5002 bicl3 #0,r0,-276(fp)
5003 cmpl -276(fp),-280(fp)
5004 bgequ noname.471
5005 addl2 #65536,-288(fp)
5006noname.471:
5007 movzwl -274(fp),r0
5008 bicl2 #-65536,r0
5009 addl2 r0,-288(fp)
5010 bicl3 #-65536,-276(fp),r0
5011 ashl #16,r0,-280(fp)
5012 addl3 -280(fp),-284(fp),r0
5013 bicl3 #0,r0,-284(fp)
5014 cmpl -284(fp),-280(fp)
5015 bgequ noname.472
5016 incl -288(fp)
5017noname.472:
5018 movl -284(fp),r3
5019 movl -288(fp),r2
5020 bbc #31,r2,noname.473
5021 incl r9
5022noname.473:
5023 addl2 r2,r2
5024 bicl2 #0,r2
5025 bbc #31,r3,noname.474
5026 incl r2
5027noname.474:
5028 addl2 r3,r3
5029 bicl2 #0,r3
5030 addl2 r3,r8
5031 bicl2 #0,r8
5032 cmpl r8,r3
5033 bgequ noname.475
5034 incl r2
5035 bicl3 #0,r2,r0
5036 bneq noname.475
5037 incl r9
5038noname.475:
5039 addl2 r2,r7
5040 bicl2 #0,r7
5041 cmpl r7,r2
5042 bgequ noname.476
5043 incl r9
5044noname.476:
5045
5046 movl 4(ap),r0
5047 movl r8,28(r0)
5048
5049 clrl r8
5050
5051 movl 8(ap),r3
5052 movl 16(r3),r4
5053 bicl3 #-65536,r4,r5
5054 extzv #16,#16,r4,r0
5055 bicl3 #-65536,r0,r4
5056 mull3 r5,r4,-292(fp)
5057 mull2 r5,r5
5058 mull2 r4,r4
5059 bicl3 #32767,-292(fp),r0
5060 extzv #15,#17,r0,r0
5061 addl2 r0,r4
5062 bicl3 #-65536,-292(fp),r0
5063 ashl #17,r0,-292(fp)
5064 addl2 -292(fp),r5
5065 bicl2 #0,r5
5066 cmpl r5,-292(fp)
5067 bgequ noname.477
5068 incl r4
5069noname.477:
5070 movl r5,r1
5071 movl r4,r2
5072 addl2 r1,r7
5073 bicl2 #0,r7
5074 cmpl r7,r1
5075 bgequ noname.478
5076 incl r2
5077noname.478:
5078 addl2 r2,r9
5079 bicl2 #0,r9
5080 cmpl r9,r2
5081 bgequ noname.479
5082 incl r8
5083noname.479:
5084
5085 bicl3 #-65536,20(r3),r4
5086 movzwl 22(r3),r1
5087 bicl2 #-65536,r1
5088 bicl3 #-65536,12(r3),r2
5089 movzwl 14(r3),r0
5090 bicl2 #-65536,r0
5091 movl r4,r6
5092 movl r1,r5
5093 mull3 r0,r6,-296(fp)
5094 mull2 r2,r6
5095 mull3 r2,r5,-300(fp)
5096 mull2 r0,r5
5097 addl3 -296(fp),-300(fp),r0
5098 bicl3 #0,r0,-296(fp)
5099 cmpl -296(fp),-300(fp)
5100 bgequ noname.480
5101 addl2 #65536,r5
5102noname.480:
5103 movzwl -294(fp),r0
5104 bicl2 #-65536,r0
5105 addl2 r0,r5
5106 bicl3 #-65536,-296(fp),r0
5107 ashl #16,r0,-300(fp)
5108 addl2 -300(fp),r6
5109 bicl2 #0,r6
5110 cmpl r6,-300(fp)
5111 bgequ noname.481
5112 incl r5
5113noname.481:
5114 movl r6,r3
5115 movl r5,r2
5116 bbc #31,r2,noname.482
5117 incl r8
5118noname.482:
5119 addl2 r2,r2
5120 bicl2 #0,r2
5121 bbc #31,r3,noname.483
5122 incl r2
5123noname.483:
5124 addl2 r3,r3
5125 bicl2 #0,r3
5126 addl2 r3,r7
5127 bicl2 #0,r7
5128 cmpl r7,r3
5129 bgequ noname.484
5130 incl r2
5131 bicl3 #0,r2,r0
5132 bneq noname.484
5133 incl r8
5134noname.484:
5135 addl2 r2,r9
5136 bicl2 #0,r9
5137 cmpl r9,r2
5138 bgequ noname.485
5139 incl r8
5140noname.485:
5141
5142 movl 8(ap),r0
5143 bicl3 #-65536,24(r0),r3
5144 movzwl 26(r0),r1
5145 bicl2 #-65536,r1
5146 bicl3 #-65536,8(r0),r2
5147 movzwl 10(r0),r0
5148 bicl2 #-65536,r0
5149 movl r3,r5
5150 movl r1,r4
5151 mull3 r0,r5,-304(fp)
5152 mull2 r2,r5
5153 mull3 r2,r4,-308(fp)
5154 mull2 r0,r4
5155 addl3 -304(fp),-308(fp),r0
5156 bicl3 #0,r0,-304(fp)
5157 cmpl -304(fp),-308(fp)
5158 bgequ noname.486
5159 addl2 #65536,r4
5160noname.486:
5161 movzwl -302(fp),r0
5162 bicl2 #-65536,r0
5163 addl2 r0,r4
5164 bicl3 #-65536,-304(fp),r0
5165 ashl #16,r0,-308(fp)
5166 addl2 -308(fp),r5
5167 bicl2 #0,r5
5168 cmpl r5,-308(fp)
5169 bgequ noname.487
5170 incl r4
5171noname.487:
5172 movl r5,r3
5173 movl r4,r2
5174 bbc #31,r2,noname.488
5175 incl r8
5176noname.488:
5177 addl2 r2,r2
5178 bicl2 #0,r2
5179 bbc #31,r3,noname.489
5180 incl r2
5181noname.489:
5182 addl2 r3,r3
5183 bicl2 #0,r3
5184 addl2 r3,r7
5185 bicl2 #0,r7
5186 cmpl r7,r3
5187 bgequ noname.490
5188 incl r2
5189 bicl3 #0,r2,r0
5190 bneq noname.490
5191 incl r8
5192noname.490:
5193 addl2 r2,r9
5194 bicl2 #0,r9
5195 cmpl r9,r2
5196 bgequ noname.491
5197 incl r8
5198noname.491:
5199
5200 movl 8(ap),r0
5201 bicl3 #-65536,28(r0),r3
5202 movzwl 30(r0),r1
5203 bicl2 #-65536,r1
5204 bicl3 #-65536,4(r0),r2
5205 movzwl 6(r0),r0
5206 bicl2 #-65536,r0
5207 movl r3,r5
5208 movl r1,r4
5209 mull3 r0,r5,-312(fp)
5210 mull2 r2,r5
5211 mull3 r2,r4,-316(fp)
5212 mull2 r0,r4
5213 addl3 -312(fp),-316(fp),r0
5214 bicl3 #0,r0,-312(fp)
5215 cmpl -312(fp),-316(fp)
5216 bgequ noname.492
5217 addl2 #65536,r4
5218noname.492:
5219 movzwl -310(fp),r0
5220 bicl2 #-65536,r0
5221 addl2 r0,r4
5222 bicl3 #-65536,-312(fp),r0
5223 ashl #16,r0,-316(fp)
5224 addl2 -316(fp),r5
5225 bicl2 #0,r5
5226 cmpl r5,-316(fp)
5227 bgequ noname.493
5228 incl r4
5229noname.493:
5230 movl r5,r3
5231 movl r4,r2
5232 bbc #31,r2,noname.494
5233 incl r8
5234noname.494:
5235 addl2 r2,r2
5236 bicl2 #0,r2
5237 bbc #31,r3,noname.495
5238 incl r2
5239noname.495:
5240 addl2 r3,r3
5241 bicl2 #0,r3
5242 addl2 r3,r7
5243 bicl2 #0,r7
5244 cmpl r7,r3
5245 bgequ noname.496
5246 incl r2
5247 bicl3 #0,r2,r0
5248 bneq noname.496
5249 incl r8
5250noname.496:
5251 addl2 r2,r9
5252 bicl2 #0,r9
5253 cmpl r9,r2
5254 bgequ noname.497
5255 incl r8
5256noname.497:
5257
5258 movl 4(ap),r0
5259 movl r7,32(r0)
5260
5261 clrl r7
5262
5263 movl 8(ap),r0
5264 bicl3 #-65536,28(r0),r3
5265 movzwl 30(r0),r2
5266 bicl3 #-65536,8(r0),r1
5267 movzwl 10(r0),r0
5268 bicl2 #-65536,r0
5269 movl r3,r4
5270 bicl3 #-65536,r2,-328(fp)
5271 mull3 r0,r4,-320(fp)
5272 mull2 r1,r4
5273 mull3 r1,-328(fp),-324(fp)
5274 mull2 r0,-328(fp)
5275 addl3 -320(fp),-324(fp),r0
5276 bicl3 #0,r0,-320(fp)
5277 cmpl -320(fp),-324(fp)
5278 bgequ noname.498
5279 addl2 #65536,-328(fp)
5280noname.498:
5281 movzwl -318(fp),r0
5282 bicl2 #-65536,r0
5283 addl2 r0,-328(fp)
5284 bicl3 #-65536,-320(fp),r0
5285 ashl #16,r0,-324(fp)
5286 addl2 -324(fp),r4
5287 bicl2 #0,r4
5288 cmpl r4,-324(fp)
5289 bgequ noname.499
5290 incl -328(fp)
5291noname.499:
5292 movl r4,r3
5293 movl -328(fp),r2
5294 bbc #31,r2,noname.500
5295 incl r7
5296noname.500:
5297 addl2 r2,r2
5298 bicl2 #0,r2
5299 bbc #31,r3,noname.501
5300 incl r2
5301noname.501:
5302 addl2 r3,r3
5303 bicl2 #0,r3
5304 addl2 r3,r9
5305 bicl2 #0,r9
5306 cmpl r9,r3
5307 bgequ noname.502
5308 incl r2
5309 bicl3 #0,r2,r0
5310 bneq noname.502
5311 incl r7
5312noname.502:
5313 addl2 r2,r8
5314 bicl2 #0,r8
5315 cmpl r8,r2
5316 bgequ noname.503
5317 incl r7
5318noname.503:
5319
5320 movl 8(ap),r0
5321 movzwl 26(r0),r2
5322 bicl3 #-65536,12(r0),r3
5323 movzwl 14(r0),r1
5324 bicl2 #-65536,r1
5325 bicl3 #-65536,24(r0),-340(fp)
5326 bicl3 #-65536,r2,-344(fp)
5327 mull3 r1,-340(fp),-332(fp)
5328 mull2 r3,-340(fp)
5329 mull3 r3,-344(fp),-336(fp)
5330 mull2 r1,-344(fp)
5331 addl3 -332(fp),-336(fp),r0
5332 bicl3 #0,r0,-332(fp)
5333 cmpl -332(fp),-336(fp)
5334 bgequ noname.504
5335 addl2 #65536,-344(fp)
5336noname.504:
5337 movzwl -330(fp),r0
5338 bicl2 #-65536,r0
5339 addl2 r0,-344(fp)
5340 bicl3 #-65536,-332(fp),r0
5341 ashl #16,r0,-336(fp)
5342 addl3 -336(fp),-340(fp),r0
5343 bicl3 #0,r0,-340(fp)
5344 cmpl -340(fp),-336(fp)
5345 bgequ noname.505
5346 incl -344(fp)
5347noname.505:
5348 movl -340(fp),r3
5349 movl -344(fp),r2
5350 bbc #31,r2,noname.506
5351 incl r7
5352noname.506:
5353 addl2 r2,r2
5354 bicl2 #0,r2
5355 bbc #31,r3,noname.507
5356 incl r2
5357noname.507:
5358 addl2 r3,r3
5359 bicl2 #0,r3
5360 addl2 r3,r9
5361 bicl2 #0,r9
5362 cmpl r9,r3
5363 bgequ noname.508
5364 incl r2
5365 bicl3 #0,r2,r0
5366 bneq noname.508
5367 incl r7
5368noname.508:
5369 addl2 r2,r8
5370 bicl2 #0,r8
5371 cmpl r8,r2
5372 bgequ noname.509
5373 incl r7
5374noname.509:
5375
5376 movl 8(ap),r0
5377 movzwl 22(r0),r2
5378 bicl3 #-65536,16(r0),r3
5379 movzwl 18(r0),r1
5380 bicl2 #-65536,r1
5381 bicl3 #-65536,20(r0),-356(fp)
5382 bicl3 #-65536,r2,-360(fp)
5383 mull3 r1,-356(fp),-348(fp)
5384 mull2 r3,-356(fp)
5385 mull3 r3,-360(fp),-352(fp)
5386 mull2 r1,-360(fp)
5387 addl3 -348(fp),-352(fp),r0
5388 bicl3 #0,r0,-348(fp)
5389 cmpl -348(fp),-352(fp)
5390 bgequ noname.510
5391 addl2 #65536,-360(fp)
5392noname.510:
5393 movzwl -346(fp),r0
5394 bicl2 #-65536,r0
5395 addl2 r0,-360(fp)
5396 bicl3 #-65536,-348(fp),r0
5397 ashl #16,r0,-352(fp)
5398 addl3 -352(fp),-356(fp),r0
5399 bicl3 #0,r0,-356(fp)
5400 cmpl -356(fp),-352(fp)
5401 bgequ noname.511
5402 incl -360(fp)
5403noname.511:
5404 movl -356(fp),r3
5405 movl -360(fp),r2
5406 bbc #31,r2,noname.512
5407 incl r7
5408noname.512:
5409 addl2 r2,r2
5410 bicl2 #0,r2
5411 bbc #31,r3,noname.513
5412 incl r2
5413noname.513:
5414 addl2 r3,r3
5415 bicl2 #0,r3
5416 addl2 r3,r9
5417 bicl2 #0,r9
5418 cmpl r9,r3
5419 bgequ noname.514
5420 incl r2
5421 bicl3 #0,r2,r0
5422 bneq noname.514
5423 incl r7
5424noname.514:
5425 addl2 r2,r8
5426 bicl2 #0,r8
5427 cmpl r8,r2
5428 bgequ noname.515
5429 incl r7
5430noname.515:
5431
5432 movl 4(ap),r0
5433 movl r9,36(r0)
5434
5435 clrl r9
5436
5437 movl 8(ap),r3
5438 movl 20(r3),r4
5439 bicl3 #-65536,r4,-364(fp)
5440 extzv #16,#16,r4,r0
5441 bicl3 #-65536,r0,r4
5442 movl -364(fp),r0
5443 mull3 r0,r4,-368(fp)
5444 mull3 r0,r0,-364(fp)
5445 mull2 r4,r4
5446 bicl3 #32767,-368(fp),r0
5447 extzv #15,#17,r0,r0
5448 addl2 r0,r4
5449 bicl3 #-65536,-368(fp),r0
5450 ashl #17,r0,-368(fp)
5451 addl3 -364(fp),-368(fp),r0
5452 bicl3 #0,r0,-364(fp)
5453 cmpl -364(fp),-368(fp)
5454 bgequ noname.516
5455 incl r4
5456noname.516:
5457 movl -364(fp),r1
5458 movl r4,r2
5459 addl2 r1,r8
5460 bicl2 #0,r8
5461 cmpl r8,r1
5462 bgequ noname.517
5463 incl r2
5464noname.517:
5465 addl2 r2,r7
5466 bicl2 #0,r7
5467 cmpl r7,r2
5468 bgequ noname.518
5469 incl r9
5470noname.518:
5471
5472 bicl3 #-65536,24(r3),r4
5473 movzwl 26(r3),r1
5474 bicl2 #-65536,r1
5475 bicl3 #-65536,16(r3),r2
5476 movzwl 18(r3),r0
5477 bicl2 #-65536,r0
5478 movl r4,r6
5479 movl r1,r5
5480 mull3 r0,r6,-372(fp)
5481 mull2 r2,r6
5482 mull3 r2,r5,-376(fp)
5483 mull2 r0,r5
5484 addl3 -372(fp),-376(fp),r0
5485 bicl3 #0,r0,-372(fp)
5486 cmpl -372(fp),-376(fp)
5487 bgequ noname.519
5488 addl2 #65536,r5
5489noname.519:
5490 movzwl -370(fp),r0
5491 bicl2 #-65536,r0
5492 addl2 r0,r5
5493 bicl3 #-65536,-372(fp),r0
5494 ashl #16,r0,-376(fp)
5495 addl2 -376(fp),r6
5496 bicl2 #0,r6
5497 cmpl r6,-376(fp)
5498 bgequ noname.520
5499 incl r5
5500noname.520:
5501 movl r6,r3
5502 movl r5,r2
5503 bbc #31,r2,noname.521
5504 incl r9
5505noname.521:
5506 addl2 r2,r2
5507 bicl2 #0,r2
5508 bbc #31,r3,noname.522
5509 incl r2
5510noname.522:
5511 addl2 r3,r3
5512 bicl2 #0,r3
5513 addl2 r3,r8
5514 bicl2 #0,r8
5515 cmpl r8,r3
5516 bgequ noname.523
5517 incl r2
5518 bicl3 #0,r2,r0
5519 bneq noname.523
5520 incl r9
5521noname.523:
5522 addl2 r2,r7
5523 bicl2 #0,r7
5524 cmpl r7,r2
5525 bgequ noname.524
5526 incl r9
5527noname.524:
5528
5529 movl 8(ap),r0
5530 bicl3 #-65536,28(r0),r3
5531 movzwl 30(r0),r1
5532 bicl2 #-65536,r1
5533 bicl3 #-65536,12(r0),r2
5534 movzwl 14(r0),r0
5535 bicl2 #-65536,r0
5536 movl r3,r5
5537 movl r1,r4
5538 mull3 r0,r5,-380(fp)
5539 mull2 r2,r5
5540 mull3 r2,r4,-384(fp)
5541 mull2 r0,r4
5542 addl3 -380(fp),-384(fp),r0
5543 bicl3 #0,r0,-380(fp)
5544 cmpl -380(fp),-384(fp)
5545 bgequ noname.525
5546 addl2 #65536,r4
5547noname.525:
5548 movzwl -378(fp),r0
5549 bicl2 #-65536,r0
5550 addl2 r0,r4
5551 bicl3 #-65536,-380(fp),r0
5552 ashl #16,r0,-384(fp)
5553 addl2 -384(fp),r5
5554 bicl2 #0,r5
5555 cmpl r5,-384(fp)
5556 bgequ noname.526
5557 incl r4
5558noname.526:
5559 movl r5,r3
5560 movl r4,r2
5561 bbc #31,r2,noname.527
5562 incl r9
5563noname.527:
5564 addl2 r2,r2
5565 bicl2 #0,r2
5566 bbc #31,r3,noname.528
5567 incl r2
5568noname.528:
5569 addl2 r3,r3
5570 bicl2 #0,r3
5571 addl2 r3,r8
5572 bicl2 #0,r8
5573 cmpl r8,r3
5574 bgequ noname.529
5575 incl r2
5576 bicl3 #0,r2,r0
5577 bneq noname.529
5578 incl r9
5579noname.529:
5580 addl2 r2,r7
5581 bicl2 #0,r7
5582 cmpl r7,r2
5583 bgequ noname.530
5584 incl r9
5585noname.530:
5586 movl 4(ap),r0
5587 movl r8,40(r0)
5588
5589 clrl r8
5590
5591 movl 8(ap),r0
5592 bicl3 #-65536,28(r0),r3
5593 movzwl 30(r0),r1
5594 bicl2 #-65536,r1
5595 bicl3 #-65536,16(r0),r2
5596 movzwl 18(r0),r0
5597 bicl2 #-65536,r0
5598 movl r3,r5
5599 movl r1,r4
5600 mull3 r0,r5,-388(fp)
5601 mull2 r2,r5
5602 mull3 r2,r4,-392(fp)
5603 mull2 r0,r4
5604 addl3 -388(fp),-392(fp),r0
5605 bicl3 #0,r0,-388(fp)
5606 cmpl -388(fp),-392(fp)
5607 bgequ noname.531
5608 addl2 #65536,r4
5609noname.531:
5610 movzwl -386(fp),r0
5611 bicl2 #-65536,r0
5612 addl2 r0,r4
5613 bicl3 #-65536,-388(fp),r0
5614 ashl #16,r0,-392(fp)
5615 addl2 -392(fp),r5
5616 bicl2 #0,r5
5617 cmpl r5,-392(fp)
5618 bgequ noname.532
5619 incl r4
5620noname.532:
5621 movl r5,r3
5622 movl r4,r2
5623 bbc #31,r2,noname.533
5624 incl r8
5625noname.533:
5626 addl2 r2,r2
5627 bicl2 #0,r2
5628 bbc #31,r3,noname.534
5629 incl r2
5630noname.534:
5631 addl2 r3,r3
5632 bicl2 #0,r3
5633 addl2 r3,r7
5634 bicl2 #0,r7
5635 cmpl r7,r3
5636 bgequ noname.535
5637 incl r2
5638 bicl3 #0,r2,r0
5639 bneq noname.535
5640 incl r8
5641noname.535:
5642 addl2 r2,r9
5643 bicl2 #0,r9
5644 cmpl r9,r2
5645 bgequ noname.536
5646 incl r8
5647noname.536:
5648
5649 movl 8(ap),r0
5650 bicl3 #-65536,24(r0),r3
5651 movzwl 26(r0),r1
5652 bicl2 #-65536,r1
5653 bicl3 #-65536,20(r0),r2
5654 movzwl 22(r0),r0
5655 bicl2 #-65536,r0
5656 movl r3,r5
5657 movl r1,r4
5658 mull3 r0,r5,-396(fp)
5659 mull2 r2,r5
5660 mull3 r2,r4,-400(fp)
5661 mull2 r0,r4
5662 addl3 -396(fp),-400(fp),r0
5663 bicl3 #0,r0,-396(fp)
5664 cmpl -396(fp),-400(fp)
5665 bgequ noname.537
5666 addl2 #65536,r4
5667noname.537:
5668 movzwl -394(fp),r0
5669 bicl2 #-65536,r0
5670 addl2 r0,r4
5671 bicl3 #-65536,-396(fp),r0
5672 ashl #16,r0,-400(fp)
5673 addl2 -400(fp),r5
5674 bicl2 #0,r5
5675 cmpl r5,-400(fp)
5676 bgequ noname.538
5677 incl r4
5678noname.538:
5679 movl r5,r3
5680 movl r4,r2
5681 bbc #31,r2,noname.539
5682 incl r8
5683noname.539:
5684 addl2 r2,r2
5685 bicl2 #0,r2
5686 bbc #31,r3,noname.540
5687 incl r2
5688noname.540:
5689 addl2 r3,r3
5690 bicl2 #0,r3
5691 addl2 r3,r7
5692 bicl2 #0,r7
5693 cmpl r7,r3
5694 bgequ noname.541
5695 incl r2
5696 bicl3 #0,r2,r0
5697 bneq noname.541
5698 incl r8
5699noname.541:
5700 addl2 r2,r9
5701 bicl2 #0,r9
5702 cmpl r9,r2
5703 bgequ noname.542
5704 incl r8
5705noname.542:
5706
5707 movl 4(ap),r0
5708 movl r7,44(r0)
5709
5710 clrl r7
5711
5712 movl 8(ap),r3
5713 movl 24(r3),r4
5714 bicl3 #-65536,r4,r5
5715 extzv #16,#16,r4,r0
5716 bicl3 #-65536,r0,r4
5717 mull3 r5,r4,-404(fp)
5718 mull2 r5,r5
5719 mull2 r4,r4
5720 bicl3 #32767,-404(fp),r0
5721 extzv #15,#17,r0,r0
5722 addl2 r0,r4
5723 bicl3 #-65536,-404(fp),r0
5724 ashl #17,r0,-404(fp)
5725 addl2 -404(fp),r5
5726 bicl2 #0,r5
5727 cmpl r5,-404(fp)
5728 bgequ noname.543
5729 incl r4
5730noname.543:
5731 movl r5,r1
5732 movl r4,r2
5733 addl2 r1,r9
5734 bicl2 #0,r9
5735 cmpl r9,r1
5736 bgequ noname.544
5737 incl r2
5738noname.544:
5739 addl2 r2,r8
5740 bicl2 #0,r8
5741 cmpl r8,r2
5742 bgequ noname.545
5743 incl r7
5744noname.545:
5745
5746 movzwl 30(r3),r2
5747 bicl3 #-65536,20(r3),r1
5748 movzwl 22(r3),r0
5749 bicl2 #-65536,r0
5750 bicl3 #-65536,28(r3),-416(fp)
5751 bicl3 #-65536,r2,-420(fp)
5752 mull3 r0,-416(fp),-408(fp)
5753 mull2 r1,-416(fp)
5754 mull3 r1,-420(fp),-412(fp)
5755 mull2 r0,-420(fp)
5756 addl3 -408(fp),-412(fp),r0
5757 bicl3 #0,r0,-408(fp)
5758 cmpl -408(fp),-412(fp)
5759 bgequ noname.546
5760 addl2 #65536,-420(fp)
5761noname.546:
5762 movzwl -406(fp),r0
5763 bicl2 #-65536,r0
5764 addl2 r0,-420(fp)
5765 bicl3 #-65536,-408(fp),r0
5766 ashl #16,r0,-412(fp)
5767 addl3 -412(fp),-416(fp),r0
5768 bicl3 #0,r0,-416(fp)
5769 cmpl -416(fp),-412(fp)
5770 bgequ noname.547
5771 incl -420(fp)
5772noname.547:
5773 movl -416(fp),r3
5774 movl -420(fp),r2
5775 bbc #31,r2,noname.548
5776 incl r7
5777noname.548:
5778 addl2 r2,r2
5779 bicl2 #0,r2
5780 bbc #31,r3,noname.549
5781 incl r2
5782noname.549:
5783 addl2 r3,r3
5784 bicl2 #0,r3
5785 addl2 r3,r9
5786 bicl2 #0,r9
5787 cmpl r9,r3
5788 bgequ noname.550
5789 incl r2
5790 bicl3 #0,r2,r0
5791 bneq noname.550
5792 incl r7
5793noname.550:
5794 addl2 r2,r8
5795 bicl2 #0,r8
5796 cmpl r8,r2
5797 bgequ noname.551
5798 incl r7
5799noname.551:
5800
5801 movl 4(ap),r0
5802 movl r9,48(r0)
5803
5804 clrl r9
5805
5806 movl 8(ap),r0
5807 movzwl 30(r0),r2
5808 bicl3 #-65536,24(r0),r3
5809 movzwl 26(r0),r1
5810 bicl2 #-65536,r1
5811 bicl3 #-65536,28(r0),-432(fp)
5812 bicl3 #-65536,r2,-436(fp)
5813 mull3 r1,-432(fp),-424(fp)
5814 mull2 r3,-432(fp)
5815 mull3 r3,-436(fp),-428(fp)
5816 mull2 r1,-436(fp)
5817 addl3 -424(fp),-428(fp),r0
5818 bicl3 #0,r0,-424(fp)
5819 cmpl -424(fp),-428(fp)
5820 bgequ noname.552
5821 addl2 #65536,-436(fp)
5822noname.552:
5823 movzwl -422(fp),r0
5824 bicl2 #-65536,r0
5825 addl2 r0,-436(fp)
5826 bicl3 #-65536,-424(fp),r0
5827 ashl #16,r0,-428(fp)
5828 addl3 -428(fp),-432(fp),r0
5829 bicl3 #0,r0,-432(fp)
5830 cmpl -432(fp),-428(fp)
5831 bgequ noname.553
5832 incl -436(fp)
5833noname.553:
5834 movl -432(fp),r3
5835 movl -436(fp),r2
5836 bbc #31,r2,noname.554
5837 incl r9
5838noname.554:
5839 addl2 r2,r2
5840 bicl2 #0,r2
5841 bbc #31,r3,noname.555
5842 incl r2
5843noname.555:
5844 addl2 r3,r3
5845 bicl2 #0,r3
5846 addl2 r3,r8
5847 bicl2 #0,r8
5848 cmpl r8,r3
5849 bgequ noname.556
5850 incl r2
5851 bicl3 #0,r2,r0
5852 bneq noname.556
5853 incl r9
5854noname.556:
5855 addl2 r2,r7
5856 bicl2 #0,r7
5857 cmpl r7,r2
5858 bgequ noname.557
5859 incl r9
5860noname.557:
5861
5862 movl 4(ap),r4
5863 movl r8,52(r4)
5864
5865 clrl r8
5866
5867 movl 8(ap),r0
5868 movl 28(r0),r3
5869 bicl3 #-65536,r3,-440(fp)
5870 extzv #16,#16,r3,r0
5871 bicl3 #-65536,r0,r3
5872 movl -440(fp),r0
5873 mull3 r0,r3,-444(fp)
5874 mull3 r0,r0,-440(fp)
5875 mull2 r3,r3
5876 bicl3 #32767,-444(fp),r0
5877 extzv #15,#17,r0,r0
5878 addl2 r0,r3
5879 bicl3 #-65536,-444(fp),r0
5880 ashl #17,r0,-444(fp)
5881 addl3 -440(fp),-444(fp),r0
5882 bicl3 #0,r0,-440(fp)
5883 cmpl -440(fp),-444(fp)
5884 bgequ noname.558
5885 incl r3
5886noname.558:
5887 movl -440(fp),r1
5888 movl r3,r2
5889 addl2 r1,r7
5890 bicl2 #0,r7
5891 cmpl r7,r1
5892 bgequ noname.559
5893 incl r2
5894noname.559:
5895 addl2 r2,r9
5896 bicl2 #0,r9
5897 cmpl r9,r2
5898 bgequ noname.560
5899 incl r8
5900noname.560:
5901
5902 movl r7,56(r4)
5903
5904 movl r9,60(r4)
5905
5906 ret
5907
5908
5909
5910;r=4 ;(AP)
5911;a=8 ;(AP)
5912;b=12 ;(AP)
5913;n=16 ;(AP) n by value (input)
5914
5915 .psect code,nowrt
5916
5917.entry BN_SQR_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
5918 subl2 #44,sp
5919
5920 clrq r8
5921
5922 clrl r10
5923
5924 movl 8(ap),r5
5925 movl (r5),r3
5926 bicl3 #-65536,r3,r4
5927 extzv #16,#16,r3,r0
5928 bicl3 #-65536,r0,r3
5929 mull3 r4,r3,-4(fp)
5930 mull2 r4,r4
5931 mull2 r3,r3
5932 bicl3 #32767,-4(fp),r0
5933 extzv #15,#17,r0,r0
5934 addl2 r0,r3
5935 bicl3 #-65536,-4(fp),r0
5936 ashl #17,r0,-4(fp)
5937 addl2 -4(fp),r4
5938 bicl2 #0,r4
5939 cmpl r4,-4(fp)
5940 bgequ noname.563
5941 incl r3
5942noname.563:
5943 movl r4,r1
5944 movl r3,r2
5945 addl2 r1,r9
5946 bicl2 #0,r9
5947 cmpl r9,r1
5948 bgequ noname.564
5949 incl r2
5950noname.564:
5951 addl2 r2,r8
5952 bicl2 #0,r8
5953 cmpl r8,r2
5954 bgequ noname.565
5955 incl r10
5956noname.565:
5957
5958 movl r9,@4(ap)
5959
5960 clrl r9
5961
5962 bicl3 #-65536,4(r5),r3
5963 movzwl 6(r5),r1
5964 bicl2 #-65536,r1
5965 bicl3 #-65536,(r5),r2
5966 movzwl 2(r5),r0
5967 bicl2 #-65536,r0
5968 movl r3,r6
5969 movl r1,r4
5970 mull3 r0,r6,-8(fp)
5971 mull2 r2,r6
5972 mull2 r4,r2
5973 mull2 r0,r4
5974 addl3 -8(fp),r2,r0
5975 bicl3 #0,r0,-8(fp)
5976 cmpl -8(fp),r2
5977 bgequ noname.566
5978 addl2 #65536,r4
5979noname.566:
5980 movzwl -6(fp),r0
5981 bicl2 #-65536,r0
5982 addl2 r0,r4
5983 bicl3 #-65536,-8(fp),r0
5984 ashl #16,r0,r1
5985 addl2 r1,r6
5986 bicl2 #0,r6
5987 cmpl r6,r1
5988 bgequ noname.567
5989 incl r4
5990noname.567:
5991 movl r6,r3
5992 movl r4,r2
5993 bbc #31,r2,noname.568
5994 incl r9
5995noname.568:
5996 addl2 r2,r2
5997 bicl2 #0,r2
5998 bbc #31,r3,noname.569
5999 incl r2
6000noname.569:
6001 addl2 r3,r3
6002 bicl2 #0,r3
6003 addl2 r3,r8
6004 bicl2 #0,r8
6005 cmpl r8,r3
6006 bgequ noname.570
6007 incl r2
6008 bicl3 #0,r2,r0
6009 bneq noname.570
6010 incl r9
6011noname.570:
6012 addl2 r2,r10
6013 bicl2 #0,r10
6014 cmpl r10,r2
6015 bgequ noname.571
6016 incl r9
6017noname.571:
6018
6019 movl 4(ap),r0
6020 movl r8,4(r0)
6021
6022 clrl r8
6023
6024 movl 8(ap),r4
6025 movl 4(r4),r3
6026 bicl3 #-65536,r3,r5
6027 extzv #16,#16,r3,r0
6028 bicl3 #-65536,r0,r3
6029 mull3 r5,r3,r1
6030 mull2 r5,r5
6031 mull2 r3,r3
6032 bicl3 #32767,r1,r0
6033 extzv #15,#17,r0,r0
6034 addl2 r0,r3
6035 bicl2 #-65536,r1
6036 ashl #17,r1,r1
6037 addl2 r1,r5
6038 bicl2 #0,r5
6039 cmpl r5,r1
6040 bgequ noname.572
6041 incl r3
6042noname.572:
6043 movl r5,r1
6044 movl r3,r2
6045 addl2 r1,r10
6046 bicl2 #0,r10
6047 cmpl r10,r1
6048 bgequ noname.573
6049 incl r2
6050noname.573:
6051 addl2 r2,r9
6052 bicl2 #0,r9
6053 cmpl r9,r2
6054 bgequ noname.574
6055 incl r8
6056noname.574:
6057
6058 bicl3 #-65536,8(r4),r3
6059 movzwl 10(r4),r1
6060 bicl2 #-65536,r1
6061 bicl3 #-65536,(r4),r2
6062 movzwl 2(r4),r0
6063 bicl2 #-65536,r0
6064 movl r3,r6
6065 movl r1,r5
6066 mull3 r0,r6,r7
6067 mull2 r2,r6
6068 mull2 r5,r2
6069 mull2 r0,r5
6070 addl2 r2,r7
6071 bicl2 #0,r7
6072 cmpl r7,r2
6073 bgequ noname.575
6074 addl2 #65536,r5
6075noname.575:
6076 extzv #16,#16,r7,r0
6077 bicl2 #-65536,r0
6078 addl2 r0,r5
6079 bicl3 #-65536,r7,r0
6080 ashl #16,r0,r1
6081 addl2 r1,r6
6082 bicl2 #0,r6
6083 cmpl r6,r1
6084 bgequ noname.576
6085 incl r5
6086noname.576:
6087 movl r6,r3
6088 movl r5,r2
6089 bbc #31,r2,noname.577
6090 incl r8
6091noname.577:
6092 addl2 r2,r2
6093 bicl2 #0,r2
6094 bbc #31,r3,noname.578
6095 incl r2
6096noname.578:
6097 addl2 r3,r3
6098 bicl2 #0,r3
6099 addl2 r3,r10
6100 bicl2 #0,r10
6101 cmpl r10,r3
6102 bgequ noname.579
6103 incl r2
6104 bicl3 #0,r2,r0
6105 bneq noname.579
6106 incl r8
6107noname.579:
6108 addl2 r2,r9
6109 bicl2 #0,r9
6110 cmpl r9,r2
6111 bgequ noname.580
6112 incl r8
6113noname.580:
6114
6115 movl 4(ap),r0
6116 movl r10,8(r0)
6117
6118 clrl r10
6119
6120 movl 8(ap),r0
6121 bicl3 #-65536,12(r0),r3
6122 movzwl 14(r0),r1
6123 bicl2 #-65536,r1
6124 bicl3 #-65536,(r0),r2
6125 movzwl 2(r0),r0
6126 bicl2 #-65536,r0
6127 movl r3,r5
6128 movl r1,r4
6129 mull3 r0,r5,r6
6130 mull2 r2,r5
6131 mull3 r2,r4,-12(fp)
6132 mull2 r0,r4
6133 addl2 -12(fp),r6
6134 bicl2 #0,r6
6135 cmpl r6,-12(fp)
6136 bgequ noname.581
6137 addl2 #65536,r4
6138noname.581:
6139 extzv #16,#16,r6,r0
6140 bicl2 #-65536,r0
6141 addl2 r0,r4
6142 bicl3 #-65536,r6,r0
6143 ashl #16,r0,-12(fp)
6144 addl2 -12(fp),r5
6145 bicl2 #0,r5
6146 cmpl r5,-12(fp)
6147 bgequ noname.582
6148 incl r4
6149noname.582:
6150 movl r5,r3
6151 movl r4,r2
6152 bbc #31,r2,noname.583
6153 incl r10
6154noname.583:
6155 addl2 r2,r2
6156 bicl2 #0,r2
6157 bbc #31,r3,noname.584
6158 incl r2
6159noname.584:
6160 addl2 r3,r3
6161 bicl2 #0,r3
6162 addl2 r3,r9
6163 bicl2 #0,r9
6164 cmpl r9,r3
6165 bgequ noname.585
6166 incl r2
6167 bicl3 #0,r2,r0
6168 bneq noname.585
6169 incl r10
6170noname.585:
6171 addl2 r2,r8
6172 bicl2 #0,r8
6173 cmpl r8,r2
6174 bgequ noname.586
6175 incl r10
6176noname.586:
6177
6178 movl 8(ap),r0
6179 bicl3 #-65536,8(r0),r3
6180 movzwl 10(r0),r1
6181 bicl2 #-65536,r1
6182 bicl3 #-65536,4(r0),r2
6183 movzwl 6(r0),r0
6184 bicl2 #-65536,r0
6185 movl r3,r5
6186 movl r1,r4
6187 mull3 r0,r5,-16(fp)
6188 mull2 r2,r5
6189 mull3 r2,r4,-20(fp)
6190 mull2 r0,r4
6191 addl3 -16(fp),-20(fp),r0
6192 bicl3 #0,r0,-16(fp)
6193 cmpl -16(fp),-20(fp)
6194 bgequ noname.587
6195 addl2 #65536,r4
6196noname.587:
6197 movzwl -14(fp),r0
6198 bicl2 #-65536,r0
6199 addl2 r0,r4
6200 bicl3 #-65536,-16(fp),r0
6201 ashl #16,r0,-20(fp)
6202 addl2 -20(fp),r5
6203 bicl2 #0,r5
6204 cmpl r5,-20(fp)
6205 bgequ noname.588
6206 incl r4
6207noname.588:
6208 movl r5,r3
6209 movl r4,r2
6210 bbc #31,r2,noname.589
6211 incl r10
6212noname.589:
6213 addl2 r2,r2
6214 bicl2 #0,r2
6215 bbc #31,r3,noname.590
6216 incl r2
6217noname.590:
6218 addl2 r3,r3
6219 bicl2 #0,r3
6220 addl2 r3,r9
6221 bicl2 #0,r9
6222 cmpl r9,r3
6223 bgequ noname.591
6224 incl r2
6225 bicl3 #0,r2,r0
6226 bneq noname.591
6227 incl r10
6228noname.591:
6229 addl2 r2,r8
6230 bicl2 #0,r8
6231 cmpl r8,r2
6232 bgequ noname.592
6233 incl r10
6234noname.592:
6235 movl 4(ap),r0
6236 movl r9,12(r0)
6237
6238 clrl r9
6239
6240 movl 8(ap),r3
6241 movl 8(r3),r4
6242 bicl3 #-65536,r4,r5
6243 extzv #16,#16,r4,r0
6244 bicl3 #-65536,r0,r4
6245 mull3 r5,r4,-24(fp)
6246 mull2 r5,r5
6247 mull2 r4,r4
6248 bicl3 #32767,-24(fp),r0
6249 extzv #15,#17,r0,r0
6250 addl2 r0,r4
6251 bicl3 #-65536,-24(fp),r0
6252 ashl #17,r0,-24(fp)
6253 addl2 -24(fp),r5
6254 bicl2 #0,r5
6255 cmpl r5,-24(fp)
6256 bgequ noname.593
6257 incl r4
6258noname.593:
6259 movl r5,r1
6260 movl r4,r2
6261 addl2 r1,r8
6262 bicl2 #0,r8
6263 cmpl r8,r1
6264 bgequ noname.594
6265 incl r2
6266noname.594:
6267 addl2 r2,r10
6268 bicl2 #0,r10
6269 cmpl r10,r2
6270 bgequ noname.595
6271 incl r9
6272noname.595:
6273
6274 bicl3 #-65536,12(r3),r4
6275 movzwl 14(r3),r1
6276 bicl2 #-65536,r1
6277 bicl3 #-65536,4(r3),r2
6278 movzwl 6(r3),r0
6279 bicl2 #-65536,r0
6280 movl r4,r6
6281 movl r1,r5
6282 mull3 r0,r6,-28(fp)
6283 mull2 r2,r6
6284 mull3 r2,r5,-32(fp)
6285 mull2 r0,r5
6286 addl3 -28(fp),-32(fp),r0
6287 bicl3 #0,r0,-28(fp)
6288 cmpl -28(fp),-32(fp)
6289 bgequ noname.596
6290 addl2 #65536,r5
6291noname.596:
6292 movzwl -26(fp),r0
6293 bicl2 #-65536,r0
6294 addl2 r0,r5
6295 bicl3 #-65536,-28(fp),r0
6296 ashl #16,r0,-32(fp)
6297 addl2 -32(fp),r6
6298 bicl2 #0,r6
6299 cmpl r6,-32(fp)
6300 bgequ noname.597
6301 incl r5
6302noname.597:
6303 movl r6,r3
6304 movl r5,r2
6305 bbc #31,r2,noname.598
6306 incl r9
6307noname.598:
6308 addl2 r2,r2
6309 bicl2 #0,r2
6310 bbc #31,r3,noname.599
6311 incl r2
6312noname.599:
6313 addl2 r3,r3
6314 bicl2 #0,r3
6315 addl2 r3,r8
6316 bicl2 #0,r8
6317 cmpl r8,r3
6318 bgequ noname.600
6319 incl r2
6320 bicl3 #0,r2,r0
6321 bneq noname.600
6322 incl r9
6323noname.600:
6324 addl2 r2,r10
6325 bicl2 #0,r10
6326 cmpl r10,r2
6327 bgequ noname.601
6328 incl r9
6329noname.601:
6330
6331 movl 4(ap),r0
6332 movl r8,16(r0)
6333
6334 clrl r8
6335
6336 movl 8(ap),r0
6337 bicl3 #-65536,12(r0),r3
6338 movzwl 14(r0),r1
6339 bicl2 #-65536,r1
6340 bicl3 #-65536,8(r0),r2
6341 movzwl 10(r0),r0
6342 bicl2 #-65536,r0
6343 movl r3,r5
6344 movl r1,r4
6345 mull3 r0,r5,-36(fp)
6346 mull2 r2,r5
6347 mull3 r2,r4,-40(fp)
6348 mull2 r0,r4
6349 addl3 -36(fp),-40(fp),r0
6350 bicl3 #0,r0,-36(fp)
6351 cmpl -36(fp),-40(fp)
6352 bgequ noname.602
6353 addl2 #65536,r4
6354noname.602:
6355 movzwl -34(fp),r0
6356 bicl2 #-65536,r0
6357 addl2 r0,r4
6358 bicl3 #-65536,-36(fp),r0
6359 ashl #16,r0,-40(fp)
6360 addl2 -40(fp),r5
6361 bicl2 #0,r5
6362 cmpl r5,-40(fp)
6363 bgequ noname.603
6364 incl r4
6365noname.603:
6366 movl r5,r3
6367 movl r4,r2
6368 bbc #31,r2,noname.604
6369 incl r8
6370noname.604:
6371 addl2 r2,r2
6372 bicl2 #0,r2
6373 bbc #31,r3,noname.605
6374 incl r2
6375noname.605:
6376 addl2 r3,r3
6377 bicl2 #0,r3
6378 addl2 r3,r10
6379 bicl2 #0,r10
6380 cmpl r10,r3
6381 bgequ noname.606
6382 incl r2
6383 bicl3 #0,r2,r0
6384 bneq noname.606
6385 incl r8
6386noname.606:
6387 addl2 r2,r9
6388 bicl2 #0,r9
6389 cmpl r9,r2
6390 bgequ noname.607
6391 incl r8
6392noname.607:
6393
6394 movl 4(ap),r4
6395 movl r10,20(r4)
6396
6397 clrl r10
6398
6399 movl 8(ap),r0
6400 movl 12(r0),r3
6401 bicl3 #-65536,r3,r5
6402 extzv #16,#16,r3,r0
6403 bicl3 #-65536,r0,r3
6404 mull3 r5,r3,-44(fp)
6405 mull2 r5,r5
6406 mull2 r3,r3
6407 bicl3 #32767,-44(fp),r0
6408 extzv #15,#17,r0,r0
6409 addl2 r0,r3
6410 bicl3 #-65536,-44(fp),r0
6411 ashl #17,r0,-44(fp)
6412 addl2 -44(fp),r5
6413 bicl2 #0,r5
6414 cmpl r5,-44(fp)
6415 bgequ noname.608
6416 incl r3
6417noname.608:
6418 movl r5,r1
6419 movl r3,r2
6420 addl2 r1,r9
6421 bicl2 #0,r9
6422 cmpl r9,r1
6423 bgequ noname.609
6424 incl r2
6425noname.609:
6426 addl2 r2,r8
6427 bicl2 #0,r8
6428 cmpl r8,r2
6429 bgequ noname.610
6430 incl r10
6431noname.610:
6432
6433 movl r9,24(r4)
6434
6435 movl r8,28(r4)
6436
6437 ret
6438
6439; For now, the code below doesn't work, so I end this prematurely.
6440.end
diff --git a/src/lib/libcrypto/bn/asm/x86.pl b/src/lib/libcrypto/bn/asm/x86.pl
new file mode 100644
index 0000000000..1bc4f1bb27
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86.pl
@@ -0,0 +1,28 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6require("x86/mul_add.pl");
7require("x86/mul.pl");
8require("x86/sqr.pl");
9require("x86/div.pl");
10require("x86/add.pl");
11require("x86/sub.pl");
12require("x86/comba.pl");
13
14&asm_init($ARGV[0],$0);
15
16&bn_mul_add_words("bn_mul_add_words");
17&bn_mul_words("bn_mul_words");
18&bn_sqr_words("bn_sqr_words");
19&bn_div_words("bn_div_words");
20&bn_add_words("bn_add_words");
21&bn_sub_words("bn_sub_words");
22&bn_mul_comba("bn_mul_comba8",8);
23&bn_mul_comba("bn_mul_comba4",4);
24&bn_sqr_comba("bn_sqr_comba8",8);
25&bn_sqr_comba("bn_sqr_comba4",4);
26
27&asm_finish();
28
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl
new file mode 100644
index 0000000000..0b5cf583e3
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/add.pl
@@ -0,0 +1,76 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &add($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &add($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &add($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &add($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
new file mode 100644
index 0000000000..2291253629
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/comba.pl
@@ -0,0 +1,277 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub mul_add_c
5 {
6 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7
8 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
9 # words, and 1 if load return value
10
11 &comment("mul a[$ai]*b[$bi]");
12
13 # "eax" and "edx" will always be pre-loaded.
14 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
15 # &mov("edx",&DWP($bi*4,$b,"",0));
16
17 &mul("edx");
18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ###
22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
25 ###
26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
30 }
31
32sub sqr_add_c
33 {
34 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
35
36 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
37 # words, and 1 if load return value
38
39 &comment("sqr a[$ai]*a[$bi]");
40
41 # "eax" and "edx" will always be pre-loaded.
42 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
43 # &mov("edx",&DWP($bi*4,$b,"",0));
44
45 if ($ai == $bi)
46 { &mul("eax");}
47 else
48 { &mul("edx");}
49 &add($c0,"eax");
50 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
51 ###
52 &adc($c1,"edx");
53 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
54 ###
55 &adc($c2,0);
56 # is pos > 1, it means it is the last loop
57 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
58 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
59 }
60
61sub sqr_add_c2
62 {
63 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
64
65 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
66 # words, and 1 if load return value
67
68 &comment("sqr a[$ai]*a[$bi]");
69
70 # "eax" and "edx" will always be pre-loaded.
71 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
72 # &mov("edx",&DWP($bi*4,$a,"",0));
73
74 if ($ai == $bi)
75 { &mul("eax");}
76 else
77 { &mul("edx");}
78 &add("eax","eax");
79 ###
80 &adc("edx","edx");
81 ###
82 &adc($c2,0);
83 &add($c0,"eax");
84 &adc($c1,"edx");
85 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
86 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
87 &adc($c2,0);
88 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
89 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
90 ###
91 }
92
93sub bn_mul_comba
94 {
95 local($name,$num)=@_;
96 local($a,$b,$c0,$c1,$c2);
97 local($i,$as,$ae,$bs,$be,$ai,$bi);
98 local($tot,$end);
99
100 &function_begin_B($name,"");
101
102 $c0="ebx";
103 $c1="ecx";
104 $c2="ebp";
105 $a="esi";
106 $b="edi";
107
108 $as=0;
109 $ae=0;
110 $bs=0;
111 $be=0;
112 $tot=$num+$num-1;
113
114 &push("esi");
115 &mov($a,&wparam(1));
116 &push("edi");
117 &mov($b,&wparam(2));
118 &push("ebp");
119 &push("ebx");
120
121 &xor($c0,$c0);
122 &mov("eax",&DWP(0,$a,"",0)); # load the first word
123 &xor($c1,$c1);
124 &mov("edx",&DWP(0,$b,"",0)); # load the first second
125
126 for ($i=0; $i<$tot; $i++)
127 {
128 $ai=$as;
129 $bi=$bs;
130 $end=$be+1;
131
132 &comment("################## Calculate word $i");
133
134 for ($j=$bs; $j<$end; $j++)
135 {
136 &xor($c2,$c2) if ($j == $bs);
137 if (($j+1) == $end)
138 {
139 $v=1;
140 $v=2 if (($i+1) == $tot);
141 }
142 else
143 { $v=0; }
144 if (($j+1) != $end)
145 {
146 $na=($ai-1);
147 $nb=($bi+1);
148 }
149 else
150 {
151 $na=$as+($i < ($num-1));
152 $nb=$bs+($i >= ($num-1));
153 }
154#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
155 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
156 if ($v)
157 {
158 &comment("saved r[$i]");
159 # &mov("eax",&wparam(0));
160 # &mov(&DWP($i*4,"eax","",0),$c0);
161 ($c0,$c1,$c2)=($c1,$c2,$c0);
162 }
163 $ai--;
164 $bi++;
165 }
166 $as++ if ($i < ($num-1));
167 $ae++ if ($i >= ($num-1));
168
169 $bs++ if ($i >= ($num-1));
170 $be++ if ($i < ($num-1));
171 }
172 &comment("save r[$i]");
173 # &mov("eax",&wparam(0));
174 &mov(&DWP($i*4,"eax","",0),$c0);
175
176 &pop("ebx");
177 &pop("ebp");
178 &pop("edi");
179 &pop("esi");
180 &ret();
181 &function_end_B($name);
182 }
183
184sub bn_sqr_comba
185 {
186 local($name,$num)=@_;
187 local($r,$a,$c0,$c1,$c2)=@_;
188 local($i,$as,$ae,$bs,$be,$ai,$bi);
189 local($b,$tot,$end,$half);
190
191 &function_begin_B($name,"");
192
193 $c0="ebx";
194 $c1="ecx";
195 $c2="ebp";
196 $a="esi";
197 $r="edi";
198
199 &push("esi");
200 &push("edi");
201 &push("ebp");
202 &push("ebx");
203 &mov($r,&wparam(0));
204 &mov($a,&wparam(1));
205 &xor($c0,$c0);
206 &xor($c1,$c1);
207 &mov("eax",&DWP(0,$a,"",0)); # load the first word
208
209 $as=0;
210 $ae=0;
211 $bs=0;
212 $be=0;
213 $tot=$num+$num-1;
214
215 for ($i=0; $i<$tot; $i++)
216 {
217 $ai=$as;
218 $bi=$bs;
219 $end=$be+1;
220
221 &comment("############### Calculate word $i");
222 for ($j=$bs; $j<$end; $j++)
223 {
224 &xor($c2,$c2) if ($j == $bs);
225 if (($ai-1) < ($bi+1))
226 {
227 $v=1;
228 $v=2 if ($i+1) == $tot;
229 }
230 else
231 { $v=0; }
232 if (!$v)
233 {
234 $na=$ai-1;
235 $nb=$bi+1;
236 }
237 else
238 {
239 $na=$as+($i < ($num-1));
240 $nb=$bs+($i >= ($num-1));
241 }
242 if ($ai == $bi)
243 {
244 &sqr_add_c($r,$a,$ai,$bi,
245 $c0,$c1,$c2,$v,$i,$na,$nb);
246 }
247 else
248 {
249 &sqr_add_c2($r,$a,$ai,$bi,
250 $c0,$c1,$c2,$v,$i,$na,$nb);
251 }
252 if ($v)
253 {
254 &comment("saved r[$i]");
255 #&mov(&DWP($i*4,$r,"",0),$c0);
256 ($c0,$c1,$c2)=($c1,$c2,$c0);
257 last;
258 }
259 $ai--;
260 $bi++;
261 }
262 $as++ if ($i < ($num-1));
263 $ae++ if ($i >= ($num-1));
264
265 $bs++ if ($i >= ($num-1));
266 $be++ if ($i < ($num-1));
267 }
268 &mov(&DWP($i*4,$r,"",0),$c0);
269 &pop("ebx");
270 &pop("ebp");
271 &pop("edi");
272 &pop("esi");
273 &ret();
274 &function_end_B($name);
275 }
276
2771;
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl
new file mode 100644
index 0000000000..0e90152caa
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/div.pl
@@ -0,0 +1,15 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_div_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9 &mov("edx",&wparam(0)); #
10 &mov("eax",&wparam(1)); #
11 &mov("ebx",&wparam(2)); #
12 &div("ebx");
13 &function_end($name);
14 }
151;
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f
new file mode 100644
index 0000000000..22e4112224
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/f
@@ -0,0 +1,3 @@
1#!/usr/local/bin/perl
2# x86 assember
3
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl
new file mode 100644
index 0000000000..674cb9b055
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/mul.pl
@@ -0,0 +1,77 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ecx";
15 $r="edi";
16 $c="esi";
17 $num="ebp";
18
19 &xor($c,$c); # clear carry
20 &mov($r,&wparam(0)); #
21 &mov($a,&wparam(1)); #
22 &mov($num,&wparam(2)); #
23 &mov($w,&wparam(3)); #
24
25 &and($num,0xfffffff8); # num / 8
26 &jz(&label("mw_finish"));
27
28 &set_label("mw_loop",0);
29 for ($i=0; $i<32; $i+=4)
30 {
31 &comment("Round $i");
32
33 &mov("eax",&DWP($i,$a,"",0)); # *a
34 &mul($w); # *a * w
35 &add("eax",$c); # L(t)+=c
36 # XXX
37
38 &adc("edx",0); # H(t)+=carry
39 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
40
41 &mov($c,"edx"); # c= H(t);
42 }
43
44 &comment("");
45 &add($a,32);
46 &add($r,32);
47 &sub($num,8);
48 &jz(&label("mw_finish"));
49 &jmp(&label("mw_loop"));
50
51 &set_label("mw_finish",0);
52 &mov($num,&wparam(2)); # get num
53 &and($num,7);
54 &jnz(&label("mw_finish2"));
55 &jmp(&label("mw_end"));
56
57 &set_label("mw_finish2",1);
58 for ($i=0; $i<7; $i++)
59 {
60 &comment("Tail Round $i");
61 &mov("eax",&DWP($i*4,$a,"",0));# *a
62 &mul($w); # *a * w
63 &add("eax",$c); # L(t)+=c
64 # XXX
65 &adc("edx",0); # H(t)+=carry
66 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
67 &mov($c,"edx"); # c= H(t);
68 &dec($num) if ($i != 7-1);
69 &jz(&label("mw_end")) if ($i != 7-1);
70 }
71 &set_label("mw_end",0);
72 &mov("eax",$c);
73
74 &function_end($name);
75 }
76
771;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
new file mode 100644
index 0000000000..61830d3a90
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
@@ -0,0 +1,87 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ebp";
15 $r="edi";
16 $c="esi";
17
18 &xor($c,$c); # clear carry
19 &mov($r,&wparam(0)); #
20
21 &mov("ecx",&wparam(2)); #
22 &mov($a,&wparam(1)); #
23
24 &and("ecx",0xfffffff8); # num / 8
25 &mov($w,&wparam(3)); #
26
27 &push("ecx"); # Up the stack for a tmp variable
28
29 &jz(&label("maw_finish"));
30
31 &set_label("maw_loop",0);
32
33 &mov(&swtmp(0),"ecx"); #
34
35 for ($i=0; $i<32; $i+=4)
36 {
37 &comment("Round $i");
38
39 &mov("eax",&DWP($i,$a,"",0)); # *a
40 &mul($w); # *a * w
41 &add("eax",$c); # L(t)+= *r
42 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
43 &adc("edx",0); # H(t)+=carry
44 &add("eax",$c); # L(t)+=c
45 &adc("edx",0); # H(t)+=carry
46 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
47 &mov($c,"edx"); # c= H(t);
48 }
49
50 &comment("");
51 &mov("ecx",&swtmp(0)); #
52 &add($a,32);
53 &add($r,32);
54 &sub("ecx",8);
55 &jnz(&label("maw_loop"));
56
57 &set_label("maw_finish",0);
58 &mov("ecx",&wparam(2)); # get num
59 &and("ecx",7);
60 &jnz(&label("maw_finish2")); # helps branch prediction
61 &jmp(&label("maw_end"));
62
63 &set_label("maw_finish2",1);
64 for ($i=0; $i<7; $i++)
65 {
66 &comment("Tail Round $i");
67 &mov("eax",&DWP($i*4,$a,"",0));# *a
68 &mul($w); # *a * w
69 &add("eax",$c); # L(t)+=c
70 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
71 &adc("edx",0); # H(t)+=carry
72 &add("eax",$c);
73 &adc("edx",0); # H(t)+=carry
74 &dec("ecx") if ($i != 7-1);
75 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
76 &mov($c,"edx"); # c= H(t);
77 &jz(&label("maw_end")) if ($i != 7-1);
78 }
79 &set_label("maw_end",0);
80 &mov("eax",$c);
81
82 &pop("ecx"); # clear variable from
83
84 &function_end($name);
85 }
86
871;
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl
new file mode 100644
index 0000000000..1f90993cf6
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/sqr.pl
@@ -0,0 +1,60 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $r="esi";
12 $a="edi";
13 $num="ebx";
14
15 &mov($r,&wparam(0)); #
16 &mov($a,&wparam(1)); #
17 &mov($num,&wparam(2)); #
18
19 &and($num,0xfffffff8); # num / 8
20 &jz(&label("sw_finish"));
21
22 &set_label("sw_loop",0);
23 for ($i=0; $i<32; $i+=4)
24 {
25 &comment("Round $i");
26 &mov("eax",&DWP($i,$a,"",0)); # *a
27 # XXX
28 &mul("eax"); # *a * *a
29 &mov(&DWP($i*2,$r,"",0),"eax"); #
30 &mov(&DWP($i*2+4,$r,"",0),"edx");#
31 }
32
33 &comment("");
34 &add($a,32);
35 &add($r,64);
36 &sub($num,8);
37 &jnz(&label("sw_loop"));
38
39 &set_label("sw_finish",0);
40 &mov($num,&wparam(2)); # get num
41 &and($num,7);
42 &jz(&label("sw_end"));
43
44 for ($i=0; $i<7; $i++)
45 {
46 &comment("Tail Round $i");
47 &mov("eax",&DWP($i*4,$a,"",0)); # *a
48 # XXX
49 &mul("eax"); # *a * *a
50 &mov(&DWP($i*8,$r,"",0),"eax"); #
51 &dec($num) if ($i != 7-1);
52 &mov(&DWP($i*8+4,$r,"",0),"edx");
53 &jz(&label("sw_end")) if ($i != 7-1);
54 }
55 &set_label("sw_end",0);
56
57 &function_end($name);
58 }
59
601;
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl
new file mode 100644
index 0000000000..837b0e1b07
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/sub.pl
@@ -0,0 +1,76 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &sub($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &sub($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &sub($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &sub($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
new file mode 100644
index 0000000000..7378344251
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
@@ -0,0 +1,593 @@
1/*
2 * x86_64 BIGNUM accelerator version 0.1, December 2002.
3 *
4 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 * project.
6 *
7 * Rights for redistribution and usage in source and binary forms are
8 * granted according to the OpenSSL license. Warranty of any kind is
9 * disclaimed.
10 *
11 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
12 * versions, like 1.0...
13 * A. Well, that's because this code is basically a quick-n-dirty
14 * proof-of-concept hack. As you can see it's implemented with
15 * inline assembler, which means that you're bound to GCC and that
16 * there might be enough room for further improvement.
17 *
18 * Q. Why inline assembler?
19 * A. x86_64 features own ABI which I'm not familiar with. This is
20 * why I decided to let the compiler take care of subroutine
21 * prologue/epilogue as well as register allocation. For reference.
22 * Win64 implements different ABI for AMD64, different from Linux.
23 *
24 * Q. How much faster does it get?
25 * A. 'apps/openssl speed rsa dsa' output with no-asm:
26 *
27 * sign verify sign/s verify/s
28 * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
29 * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
30 * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
31 * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
32 * sign verify sign/s verify/s
33 * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
34 * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
35 * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
36 *
37 * 'apps/openssl speed rsa dsa' output with this module:
38 *
39 * sign verify sign/s verify/s
40 * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
41 * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
42 * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
43 * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
44 * sign verify sign/s verify/s
45 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
46 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
47 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
48 *
49 * For the reference. IA-32 assembler implementation performs
50 * very much like 64-bit code compiled with no-asm on the same
51 * machine.
52 */
53
54#define BN_ULONG unsigned long
55
56/*
57 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
58 * "g"(0) let the compiler to decide where does it
59 * want to keep the value of zero;
60 */
61#define mul_add(r,a,word,carry) do { \
62 register BN_ULONG high,low; \
63 asm ("mulq %3" \
64 : "=a"(low),"=d"(high) \
65 : "a"(word),"m"(a) \
66 : "cc"); \
67 asm ("addq %2,%0; adcq %3,%1" \
68 : "+r"(carry),"+d"(high)\
69 : "a"(low),"g"(0) \
70 : "cc"); \
71 asm ("addq %2,%0; adcq %3,%1" \
72 : "+m"(r),"+d"(high) \
73 : "r"(carry),"g"(0) \
74 : "cc"); \
75 carry=high; \
76 } while (0)
77
78#define mul(r,a,word,carry) do { \
79 register BN_ULONG high,low; \
80 asm ("mulq %3" \
81 : "=a"(low),"=d"(high) \
82 : "a"(word),"g"(a) \
83 : "cc"); \
84 asm ("addq %2,%0; adcq %3,%1" \
85 : "+r"(carry),"+d"(high)\
86 : "a"(low),"g"(0) \
87 : "cc"); \
88 (r)=carry, carry=high; \
89 } while (0)
90
91#define sqr(r0,r1,a) \
92 asm ("mulq %2" \
93 : "=a"(r0),"=d"(r1) \
94 : "a"(a) \
95 : "cc");
96
97BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
98 {
99 BN_ULONG c1=0;
100
101 if (num <= 0) return(c1);
102
103 while (num&~3)
104 {
105 mul_add(rp[0],ap[0],w,c1);
106 mul_add(rp[1],ap[1],w,c1);
107 mul_add(rp[2],ap[2],w,c1);
108 mul_add(rp[3],ap[3],w,c1);
109 ap+=4; rp+=4; num-=4;
110 }
111 if (num)
112 {
113 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
114 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
115 mul_add(rp[2],ap[2],w,c1); return c1;
116 }
117
118 return(c1);
119 }
120
121BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
122 {
123 BN_ULONG c1=0;
124
125 if (num <= 0) return(c1);
126
127 while (num&~3)
128 {
129 mul(rp[0],ap[0],w,c1);
130 mul(rp[1],ap[1],w,c1);
131 mul(rp[2],ap[2],w,c1);
132 mul(rp[3],ap[3],w,c1);
133 ap+=4; rp+=4; num-=4;
134 }
135 if (num)
136 {
137 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
138 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
139 mul(rp[2],ap[2],w,c1);
140 }
141 return(c1);
142 }
143
144void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
145 {
146 if (n <= 0) return;
147
148 while (n&~3)
149 {
150 sqr(r[0],r[1],a[0]);
151 sqr(r[2],r[3],a[1]);
152 sqr(r[4],r[5],a[2]);
153 sqr(r[6],r[7],a[3]);
154 a+=4; r+=8; n-=4;
155 }
156 if (n)
157 {
158 sqr(r[0],r[1],a[0]); if (--n == 0) return;
159 sqr(r[2],r[3],a[1]); if (--n == 0) return;
160 sqr(r[4],r[5],a[2]);
161 }
162 }
163
164BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
165{ BN_ULONG ret,waste;
166
167 asm ("divq %4"
168 : "=a"(ret),"=d"(waste)
169 : "a"(l),"d"(h),"g"(d)
170 : "cc");
171
172 return ret;
173}
174
175BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
176{ BN_ULONG ret=0,i=0;
177
178 if (n <= 0) return 0;
179
180 asm (
181 " subq %2,%2 \n"
182 ".align 16 \n"
183 "1: movq (%4,%2,8),%0 \n"
184 " adcq (%5,%2,8),%0 \n"
185 " movq %0,(%3,%2,8) \n"
186 " leaq 1(%2),%2 \n"
187 " loop 1b \n"
188 " sbbq %0,%0 \n"
189 : "=&a"(ret),"+c"(n),"=&r"(i)
190 : "r"(rp),"r"(ap),"r"(bp)
191 : "cc"
192 );
193
194 return ret&1;
195}
196
197#ifndef SIMICS
198BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
199{ BN_ULONG ret=0,i=0;
200
201 if (n <= 0) return 0;
202
203 asm (
204 " subq %2,%2 \n"
205 ".align 16 \n"
206 "1: movq (%4,%2,8),%0 \n"
207 " sbbq (%5,%2,8),%0 \n"
208 " movq %0,(%3,%2,8) \n"
209 " leaq 1(%2),%2 \n"
210 " loop 1b \n"
211 " sbbq %0,%0 \n"
212 : "=&a"(ret),"+c"(n),"=&r"(i)
213 : "r"(rp),"r"(ap),"r"(bp)
214 : "cc"
215 );
216
217 return ret&1;
218}
219#else
220/* Simics 1.4<7 has buggy sbbq:-( */
221#define BN_MASK2 0xffffffffffffffffL
222BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
223 {
224 BN_ULONG t1,t2;
225 int c=0;
226
227 if (n <= 0) return((BN_ULONG)0);
228
229 for (;;)
230 {
231 t1=a[0]; t2=b[0];
232 r[0]=(t1-t2-c)&BN_MASK2;
233 if (t1 != t2) c=(t1 < t2);
234 if (--n <= 0) break;
235
236 t1=a[1]; t2=b[1];
237 r[1]=(t1-t2-c)&BN_MASK2;
238 if (t1 != t2) c=(t1 < t2);
239 if (--n <= 0) break;
240
241 t1=a[2]; t2=b[2];
242 r[2]=(t1-t2-c)&BN_MASK2;
243 if (t1 != t2) c=(t1 < t2);
244 if (--n <= 0) break;
245
246 t1=a[3]; t2=b[3];
247 r[3]=(t1-t2-c)&BN_MASK2;
248 if (t1 != t2) c=(t1 < t2);
249 if (--n <= 0) break;
250
251 a+=4;
252 b+=4;
253 r+=4;
254 }
255 return(c);
256 }
257#endif
258
259/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
260/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
261/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
262/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
263
264#if 0
265/* original macros are kept for reference purposes */
266#define mul_add_c(a,b,c0,c1,c2) { \
267 BN_ULONG ta=(a),tb=(b); \
268 t1 = ta * tb; \
269 t2 = BN_UMULT_HIGH(ta,tb); \
270 c0 += t1; t2 += (c0<t1)?1:0; \
271 c1 += t2; c2 += (c1<t2)?1:0; \
272 }
273
274#define mul_add_c2(a,b,c0,c1,c2) { \
275 BN_ULONG ta=(a),tb=(b),t0; \
276 t1 = BN_UMULT_HIGH(ta,tb); \
277 t0 = ta * tb; \
278 t2 = t1+t1; c2 += (t2<t1)?1:0; \
279 t1 = t0+t0; t2 += (t1<t0)?1:0; \
280 c0 += t1; t2 += (c0<t1)?1:0; \
281 c1 += t2; c2 += (c1<t2)?1:0; \
282 }
283#else
284#define mul_add_c(a,b,c0,c1,c2) do { \
285 asm ("mulq %3" \
286 : "=a"(t1),"=d"(t2) \
287 : "a"(a),"m"(b) \
288 : "cc"); \
289 asm ("addq %2,%0; adcq %3,%1" \
290 : "+r"(c0),"+d"(t2) \
291 : "a"(t1),"g"(0) \
292 : "cc"); \
293 asm ("addq %2,%0; adcq %3,%1" \
294 : "+r"(c1),"+r"(c2) \
295 : "d"(t2),"g"(0) \
296 : "cc"); \
297 } while (0)
298
299#define sqr_add_c(a,i,c0,c1,c2) do { \
300 asm ("mulq %2" \
301 : "=a"(t1),"=d"(t2) \
302 : "a"(a[i]) \
303 : "cc"); \
304 asm ("addq %2,%0; adcq %3,%1" \
305 : "+r"(c0),"+d"(t2) \
306 : "a"(t1),"g"(0) \
307 : "cc"); \
308 asm ("addq %2,%0; adcq %3,%1" \
309 : "+r"(c1),"+r"(c2) \
310 : "d"(t2),"g"(0) \
311 : "cc"); \
312 } while (0)
313
314#define mul_add_c2(a,b,c0,c1,c2) do { \
315 asm ("mulq %3" \
316 : "=a"(t1),"=d"(t2) \
317 : "a"(a),"m"(b) \
318 : "cc"); \
319 asm ("addq %0,%0; adcq %2,%1" \
320 : "+d"(t2),"+r"(c2) \
321 : "g"(0) \
322 : "cc"); \
323 asm ("addq %0,%0; adcq %2,%1" \
324 : "+a"(t1),"+d"(t2) \
325 : "g"(0) \
326 : "cc"); \
327 asm ("addq %2,%0; adcq %3,%1" \
328 : "+r"(c0),"+d"(t2) \
329 : "a"(t1),"g"(0) \
330 : "cc"); \
331 asm ("addq %2,%0; adcq %3,%1" \
332 : "+r"(c1),"+r"(c2) \
333 : "d"(t2),"g"(0) \
334 : "cc"); \
335 } while (0)
336#endif
337
338#define sqr_add_c2(a,i,j,c0,c1,c2) \
339 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
340
341void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
342 {
343 BN_ULONG t1,t2;
344 BN_ULONG c1,c2,c3;
345
346 c1=0;
347 c2=0;
348 c3=0;
349 mul_add_c(a[0],b[0],c1,c2,c3);
350 r[0]=c1;
351 c1=0;
352 mul_add_c(a[0],b[1],c2,c3,c1);
353 mul_add_c(a[1],b[0],c2,c3,c1);
354 r[1]=c2;
355 c2=0;
356 mul_add_c(a[2],b[0],c3,c1,c2);
357 mul_add_c(a[1],b[1],c3,c1,c2);
358 mul_add_c(a[0],b[2],c3,c1,c2);
359 r[2]=c3;
360 c3=0;
361 mul_add_c(a[0],b[3],c1,c2,c3);
362 mul_add_c(a[1],b[2],c1,c2,c3);
363 mul_add_c(a[2],b[1],c1,c2,c3);
364 mul_add_c(a[3],b[0],c1,c2,c3);
365 r[3]=c1;
366 c1=0;
367 mul_add_c(a[4],b[0],c2,c3,c1);
368 mul_add_c(a[3],b[1],c2,c3,c1);
369 mul_add_c(a[2],b[2],c2,c3,c1);
370 mul_add_c(a[1],b[3],c2,c3,c1);
371 mul_add_c(a[0],b[4],c2,c3,c1);
372 r[4]=c2;
373 c2=0;
374 mul_add_c(a[0],b[5],c3,c1,c2);
375 mul_add_c(a[1],b[4],c3,c1,c2);
376 mul_add_c(a[2],b[3],c3,c1,c2);
377 mul_add_c(a[3],b[2],c3,c1,c2);
378 mul_add_c(a[4],b[1],c3,c1,c2);
379 mul_add_c(a[5],b[0],c3,c1,c2);
380 r[5]=c3;
381 c3=0;
382 mul_add_c(a[6],b[0],c1,c2,c3);
383 mul_add_c(a[5],b[1],c1,c2,c3);
384 mul_add_c(a[4],b[2],c1,c2,c3);
385 mul_add_c(a[3],b[3],c1,c2,c3);
386 mul_add_c(a[2],b[4],c1,c2,c3);
387 mul_add_c(a[1],b[5],c1,c2,c3);
388 mul_add_c(a[0],b[6],c1,c2,c3);
389 r[6]=c1;
390 c1=0;
391 mul_add_c(a[0],b[7],c2,c3,c1);
392 mul_add_c(a[1],b[6],c2,c3,c1);
393 mul_add_c(a[2],b[5],c2,c3,c1);
394 mul_add_c(a[3],b[4],c2,c3,c1);
395 mul_add_c(a[4],b[3],c2,c3,c1);
396 mul_add_c(a[5],b[2],c2,c3,c1);
397 mul_add_c(a[6],b[1],c2,c3,c1);
398 mul_add_c(a[7],b[0],c2,c3,c1);
399 r[7]=c2;
400 c2=0;
401 mul_add_c(a[7],b[1],c3,c1,c2);
402 mul_add_c(a[6],b[2],c3,c1,c2);
403 mul_add_c(a[5],b[3],c3,c1,c2);
404 mul_add_c(a[4],b[4],c3,c1,c2);
405 mul_add_c(a[3],b[5],c3,c1,c2);
406 mul_add_c(a[2],b[6],c3,c1,c2);
407 mul_add_c(a[1],b[7],c3,c1,c2);
408 r[8]=c3;
409 c3=0;
410 mul_add_c(a[2],b[7],c1,c2,c3);
411 mul_add_c(a[3],b[6],c1,c2,c3);
412 mul_add_c(a[4],b[5],c1,c2,c3);
413 mul_add_c(a[5],b[4],c1,c2,c3);
414 mul_add_c(a[6],b[3],c1,c2,c3);
415 mul_add_c(a[7],b[2],c1,c2,c3);
416 r[9]=c1;
417 c1=0;
418 mul_add_c(a[7],b[3],c2,c3,c1);
419 mul_add_c(a[6],b[4],c2,c3,c1);
420 mul_add_c(a[5],b[5],c2,c3,c1);
421 mul_add_c(a[4],b[6],c2,c3,c1);
422 mul_add_c(a[3],b[7],c2,c3,c1);
423 r[10]=c2;
424 c2=0;
425 mul_add_c(a[4],b[7],c3,c1,c2);
426 mul_add_c(a[5],b[6],c3,c1,c2);
427 mul_add_c(a[6],b[5],c3,c1,c2);
428 mul_add_c(a[7],b[4],c3,c1,c2);
429 r[11]=c3;
430 c3=0;
431 mul_add_c(a[7],b[5],c1,c2,c3);
432 mul_add_c(a[6],b[6],c1,c2,c3);
433 mul_add_c(a[5],b[7],c1,c2,c3);
434 r[12]=c1;
435 c1=0;
436 mul_add_c(a[6],b[7],c2,c3,c1);
437 mul_add_c(a[7],b[6],c2,c3,c1);
438 r[13]=c2;
439 c2=0;
440 mul_add_c(a[7],b[7],c3,c1,c2);
441 r[14]=c3;
442 r[15]=c1;
443 }
444
445void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
446 {
447 BN_ULONG t1,t2;
448 BN_ULONG c1,c2,c3;
449
450 c1=0;
451 c2=0;
452 c3=0;
453 mul_add_c(a[0],b[0],c1,c2,c3);
454 r[0]=c1;
455 c1=0;
456 mul_add_c(a[0],b[1],c2,c3,c1);
457 mul_add_c(a[1],b[0],c2,c3,c1);
458 r[1]=c2;
459 c2=0;
460 mul_add_c(a[2],b[0],c3,c1,c2);
461 mul_add_c(a[1],b[1],c3,c1,c2);
462 mul_add_c(a[0],b[2],c3,c1,c2);
463 r[2]=c3;
464 c3=0;
465 mul_add_c(a[0],b[3],c1,c2,c3);
466 mul_add_c(a[1],b[2],c1,c2,c3);
467 mul_add_c(a[2],b[1],c1,c2,c3);
468 mul_add_c(a[3],b[0],c1,c2,c3);
469 r[3]=c1;
470 c1=0;
471 mul_add_c(a[3],b[1],c2,c3,c1);
472 mul_add_c(a[2],b[2],c2,c3,c1);
473 mul_add_c(a[1],b[3],c2,c3,c1);
474 r[4]=c2;
475 c2=0;
476 mul_add_c(a[2],b[3],c3,c1,c2);
477 mul_add_c(a[3],b[2],c3,c1,c2);
478 r[5]=c3;
479 c3=0;
480 mul_add_c(a[3],b[3],c1,c2,c3);
481 r[6]=c1;
482 r[7]=c2;
483 }
484
485void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
486 {
487 BN_ULONG t1,t2;
488 BN_ULONG c1,c2,c3;
489
490 c1=0;
491 c2=0;
492 c3=0;
493 sqr_add_c(a,0,c1,c2,c3);
494 r[0]=c1;
495 c1=0;
496 sqr_add_c2(a,1,0,c2,c3,c1);
497 r[1]=c2;
498 c2=0;
499 sqr_add_c(a,1,c3,c1,c2);
500 sqr_add_c2(a,2,0,c3,c1,c2);
501 r[2]=c3;
502 c3=0;
503 sqr_add_c2(a,3,0,c1,c2,c3);
504 sqr_add_c2(a,2,1,c1,c2,c3);
505 r[3]=c1;
506 c1=0;
507 sqr_add_c(a,2,c2,c3,c1);
508 sqr_add_c2(a,3,1,c2,c3,c1);
509 sqr_add_c2(a,4,0,c2,c3,c1);
510 r[4]=c2;
511 c2=0;
512 sqr_add_c2(a,5,0,c3,c1,c2);
513 sqr_add_c2(a,4,1,c3,c1,c2);
514 sqr_add_c2(a,3,2,c3,c1,c2);
515 r[5]=c3;
516 c3=0;
517 sqr_add_c(a,3,c1,c2,c3);
518 sqr_add_c2(a,4,2,c1,c2,c3);
519 sqr_add_c2(a,5,1,c1,c2,c3);
520 sqr_add_c2(a,6,0,c1,c2,c3);
521 r[6]=c1;
522 c1=0;
523 sqr_add_c2(a,7,0,c2,c3,c1);
524 sqr_add_c2(a,6,1,c2,c3,c1);
525 sqr_add_c2(a,5,2,c2,c3,c1);
526 sqr_add_c2(a,4,3,c2,c3,c1);
527 r[7]=c2;
528 c2=0;
529 sqr_add_c(a,4,c3,c1,c2);
530 sqr_add_c2(a,5,3,c3,c1,c2);
531 sqr_add_c2(a,6,2,c3,c1,c2);
532 sqr_add_c2(a,7,1,c3,c1,c2);
533 r[8]=c3;
534 c3=0;
535 sqr_add_c2(a,7,2,c1,c2,c3);
536 sqr_add_c2(a,6,3,c1,c2,c3);
537 sqr_add_c2(a,5,4,c1,c2,c3);
538 r[9]=c1;
539 c1=0;
540 sqr_add_c(a,5,c2,c3,c1);
541 sqr_add_c2(a,6,4,c2,c3,c1);
542 sqr_add_c2(a,7,3,c2,c3,c1);
543 r[10]=c2;
544 c2=0;
545 sqr_add_c2(a,7,4,c3,c1,c2);
546 sqr_add_c2(a,6,5,c3,c1,c2);
547 r[11]=c3;
548 c3=0;
549 sqr_add_c(a,6,c1,c2,c3);
550 sqr_add_c2(a,7,5,c1,c2,c3);
551 r[12]=c1;
552 c1=0;
553 sqr_add_c2(a,7,6,c2,c3,c1);
554 r[13]=c2;
555 c2=0;
556 sqr_add_c(a,7,c3,c1,c2);
557 r[14]=c3;
558 r[15]=c1;
559 }
560
561void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
562 {
563 BN_ULONG t1,t2;
564 BN_ULONG c1,c2,c3;
565
566 c1=0;
567 c2=0;
568 c3=0;
569 sqr_add_c(a,0,c1,c2,c3);
570 r[0]=c1;
571 c1=0;
572 sqr_add_c2(a,1,0,c2,c3,c1);
573 r[1]=c2;
574 c2=0;
575 sqr_add_c(a,1,c3,c1,c2);
576 sqr_add_c2(a,2,0,c3,c1,c2);
577 r[2]=c3;
578 c3=0;
579 sqr_add_c2(a,3,0,c1,c2,c3);
580 sqr_add_c2(a,2,1,c1,c2,c3);
581 r[3]=c1;
582 c1=0;
583 sqr_add_c(a,2,c2,c3,c1);
584 sqr_add_c2(a,3,1,c2,c3,c1);
585 r[4]=c2;
586 c2=0;
587 sqr_add_c2(a,3,2,c3,c1,c2);
588 r[5]=c3;
589 c3=0;
590 sqr_add_c(a,3,c1,c2,c3);
591 r[6]=c1;
592 r[7]=c2;
593 }
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
new file mode 100644
index 0000000000..1251521c54
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn.h
@@ -0,0 +1,583 @@
1/* crypto/bn/bn.h */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef HEADER_BN_H
60#define HEADER_BN_H
61
62#include <openssl/e_os2.h>
63#ifndef OPENSSL_NO_FP_API
64#include <stdio.h> /* FILE */
65#endif
66
67#ifdef __cplusplus
68extern "C" {
69#endif
70
71#ifdef OPENSSL_SYS_VMS
72#undef BN_LLONG /* experimental, so far... */
73#endif
74
75#define BN_MUL_COMBA
76#define BN_SQR_COMBA
77#define BN_RECURSION
78
79/* This next option uses the C libraries (2 word)/(1 word) function.
80 * If it is not defined, I use my C version (which is slower).
81 * The reason for this flag is that when the particular C compiler
82 * library routine is used, and the library is linked with a different
83 * compiler, the library is missing. This mostly happens when the
84 * library is built with gcc and then linked using normal cc. This would
85 * be a common occurrence because gcc normally produces code that is
86 * 2 times faster than system compilers for the big number stuff.
87 * For machines with only one compiler (or shared libraries), this should
88 * be on. Again this in only really a problem on machines
89 * using "long long's", are 32bit, and are not using my assembler code. */
90#if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
91 defined(OPENSSL_SYS_WIN32) || defined(linux)
92# ifndef BN_DIV2W
93# define BN_DIV2W
94# endif
95#endif
96
97/* assuming long is 64bit - this is the DEC Alpha
98 * unsigned long long is only 64 bits :-(, don't define
99 * BN_LLONG for the DEC Alpha */
100#ifdef SIXTY_FOUR_BIT_LONG
101#define BN_ULLONG unsigned long long
102#define BN_ULONG unsigned long
103#define BN_LONG long
104#define BN_BITS 128
105#define BN_BYTES 8
106#define BN_BITS2 64
107#define BN_BITS4 32
108#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
109#define BN_MASK2 (0xffffffffffffffffL)
110#define BN_MASK2l (0xffffffffL)
111#define BN_MASK2h (0xffffffff00000000L)
112#define BN_MASK2h1 (0xffffffff80000000L)
113#define BN_TBIT (0x8000000000000000L)
114#define BN_DEC_CONV (10000000000000000000UL)
115#define BN_DEC_FMT1 "%lu"
116#define BN_DEC_FMT2 "%019lu"
117#define BN_DEC_NUM 19
118#endif
119
120/* This is where the long long data type is 64 bits, but long is 32.
121 * For machines where there are 64bit registers, this is the mode to use.
122 * IRIX, on R4000 and above should use this mode, along with the relevant
123 * assembler code :-). Do NOT define BN_LLONG.
124 */
125#ifdef SIXTY_FOUR_BIT
126#undef BN_LLONG
127#undef BN_ULLONG
128#define BN_ULONG unsigned long long
129#define BN_LONG long long
130#define BN_BITS 128
131#define BN_BYTES 8
132#define BN_BITS2 64
133#define BN_BITS4 32
134#define BN_MASK2 (0xffffffffffffffffLL)
135#define BN_MASK2l (0xffffffffL)
136#define BN_MASK2h (0xffffffff00000000LL)
137#define BN_MASK2h1 (0xffffffff80000000LL)
138#define BN_TBIT (0x8000000000000000LL)
139#define BN_DEC_CONV (10000000000000000000ULL)
140#define BN_DEC_FMT1 "%llu"
141#define BN_DEC_FMT2 "%019llu"
142#define BN_DEC_NUM 19
143#endif
144
145#ifdef THIRTY_TWO_BIT
146#if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__)
147#define BN_ULLONG unsigned _int64
148#else
149#define BN_ULLONG unsigned long long
150#endif
151#define BN_ULONG unsigned long
152#define BN_LONG long
153#define BN_BITS 64
154#define BN_BYTES 4
155#define BN_BITS2 32
156#define BN_BITS4 16
157#ifdef OPENSSL_SYS_WIN32
158/* VC++ doesn't like the LL suffix */
159#define BN_MASK (0xffffffffffffffffL)
160#else
161#define BN_MASK (0xffffffffffffffffLL)
162#endif
163#define BN_MASK2 (0xffffffffL)
164#define BN_MASK2l (0xffff)
165#define BN_MASK2h1 (0xffff8000L)
166#define BN_MASK2h (0xffff0000L)
167#define BN_TBIT (0x80000000L)
168#define BN_DEC_CONV (1000000000L)
169#define BN_DEC_FMT1 "%lu"
170#define BN_DEC_FMT2 "%09lu"
171#define BN_DEC_NUM 9
172#endif
173
174#ifdef SIXTEEN_BIT
175#ifndef BN_DIV2W
176#define BN_DIV2W
177#endif
178#define BN_ULLONG unsigned long
179#define BN_ULONG unsigned short
180#define BN_LONG short
181#define BN_BITS 32
182#define BN_BYTES 2
183#define BN_BITS2 16
184#define BN_BITS4 8
185#define BN_MASK (0xffffffff)
186#define BN_MASK2 (0xffff)
187#define BN_MASK2l (0xff)
188#define BN_MASK2h1 (0xff80)
189#define BN_MASK2h (0xff00)
190#define BN_TBIT (0x8000)
191#define BN_DEC_CONV (100000)
192#define BN_DEC_FMT1 "%u"
193#define BN_DEC_FMT2 "%05u"
194#define BN_DEC_NUM 5
195#endif
196
197#ifdef EIGHT_BIT
198#ifndef BN_DIV2W
199#define BN_DIV2W
200#endif
201#define BN_ULLONG unsigned short
202#define BN_ULONG unsigned char
203#define BN_LONG char
204#define BN_BITS 16
205#define BN_BYTES 1
206#define BN_BITS2 8
207#define BN_BITS4 4
208#define BN_MASK (0xffff)
209#define BN_MASK2 (0xff)
210#define BN_MASK2l (0xf)
211#define BN_MASK2h1 (0xf8)
212#define BN_MASK2h (0xf0)
213#define BN_TBIT (0x80)
214#define BN_DEC_CONV (100)
215#define BN_DEC_FMT1 "%u"
216#define BN_DEC_FMT2 "%02u"
217#define BN_DEC_NUM 2
218#endif
219
220#define BN_DEFAULT_BITS 1280
221
222#ifdef BIGNUM
223#undef BIGNUM
224#endif
225
226#define BN_FLG_MALLOCED 0x01
227#define BN_FLG_STATIC_DATA 0x02
228#define BN_FLG_EXP_CONSTTIME 0x04 /* avoid leaking exponent information through timings
229 * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
230#define BN_FLG_FREE 0x8000 /* used for debuging */
231#define BN_set_flags(b,n) ((b)->flags|=(n))
232#define BN_get_flags(b,n) ((b)->flags&(n))
233
234/* get a clone of a BIGNUM with changed flags, for *temporary* use only
235 * (the two BIGNUMs cannot not be used in parallel!) */
236#define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \
237 (dest)->top=(b)->top, \
238 (dest)->dmax=(b)->dmax, \
239 (dest)->neg=(b)->neg, \
240 (dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \
241 | ((b)->flags & ~BN_FLG_MALLOCED) \
242 | BN_FLG_STATIC_DATA \
243 | (n)))
244
245typedef struct bignum_st
246 {
247 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
248 int top; /* Index of last used d +1. */
249 /* The next are internal book keeping for bn_expand. */
250 int dmax; /* Size of the d array. */
251 int neg; /* one if the number is negative */
252 int flags;
253 } BIGNUM;
254
255/* Used for temp variables (declaration hidden in bn_lcl.h) */
256typedef struct bignum_ctx BN_CTX;
257
258typedef struct bn_blinding_st
259 {
260 int init;
261 BIGNUM *A;
262 BIGNUM *Ai;
263 BIGNUM *mod; /* just a reference */
264 unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
265 * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
266 } BN_BLINDING;
267
268/* Used for montgomery multiplication */
269typedef struct bn_mont_ctx_st
270 {
271 int ri; /* number of bits in R */
272 BIGNUM RR; /* used to convert to montgomery form */
273 BIGNUM N; /* The modulus */
274 BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
275 * (Ni is only stored for bignum algorithm) */
276 BN_ULONG n0; /* least significant word of Ni */
277 int flags;
278 } BN_MONT_CTX;
279
280/* Used for reciprocal division/mod functions
281 * It cannot be shared between threads
282 */
283typedef struct bn_recp_ctx_st
284 {
285 BIGNUM N; /* the divisor */
286 BIGNUM Nr; /* the reciprocal */
287 int num_bits;
288 int shift;
289 int flags;
290 } BN_RECP_CTX;
291
292#define BN_prime_checks 0 /* default: select number of iterations
293 based on the size of the number */
294
295/* number of Miller-Rabin iterations for an error rate of less than 2^-80
296 * for random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook
297 * of Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996];
298 * original paper: Damgaard, Landrock, Pomerance: Average case error estimates
299 * for the strong probable prime test. -- Math. Comp. 61 (1993) 177-194) */
300#define BN_prime_checks_for_size(b) ((b) >= 1300 ? 2 : \
301 (b) >= 850 ? 3 : \
302 (b) >= 650 ? 4 : \
303 (b) >= 550 ? 5 : \
304 (b) >= 450 ? 6 : \
305 (b) >= 400 ? 7 : \
306 (b) >= 350 ? 8 : \
307 (b) >= 300 ? 9 : \
308 (b) >= 250 ? 12 : \
309 (b) >= 200 ? 15 : \
310 (b) >= 150 ? 18 : \
311 /* b >= 100 */ 27)
312
313#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
314
315/* Note that BN_abs_is_word does not work reliably for w == 0 */
316#define BN_abs_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w)))
317#define BN_is_zero(a) (((a)->top == 0) || BN_abs_is_word(a,0))
318#define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg)
319#define BN_is_word(a,w) ((w) ? BN_abs_is_word((a),(w)) && !(a)->neg : \
320 BN_is_zero((a)))
321#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
322
323#define BN_one(a) (BN_set_word((a),1))
324#define BN_zero(a) (BN_set_word((a),0))
325
326/*#define BN_ascii2bn(a) BN_hex2bn(a) */
327/*#define BN_bn2ascii(a) BN_bn2hex(a) */
328
329const BIGNUM *BN_value_one(void);
330char * BN_options(void);
331BN_CTX *BN_CTX_new(void);
332void BN_CTX_init(BN_CTX *c);
333void BN_CTX_free(BN_CTX *c);
334void BN_CTX_start(BN_CTX *ctx);
335BIGNUM *BN_CTX_get(BN_CTX *ctx);
336void BN_CTX_end(BN_CTX *ctx);
337int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
338int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom);
339int BN_rand_range(BIGNUM *rnd, BIGNUM *range);
340int BN_pseudo_rand_range(BIGNUM *rnd, BIGNUM *range);
341int BN_num_bits(const BIGNUM *a);
342int BN_num_bits_word(BN_ULONG);
343BIGNUM *BN_new(void);
344void BN_init(BIGNUM *);
345void BN_clear_free(BIGNUM *a);
346BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
347void BN_swap(BIGNUM *a, BIGNUM *b);
348BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret);
349int BN_bn2bin(const BIGNUM *a, unsigned char *to);
350BIGNUM *BN_mpi2bn(const unsigned char *s,int len,BIGNUM *ret);
351int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
352int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
353int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
354int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
355int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
356int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
357int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx);
358
359int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
360 BN_CTX *ctx);
361#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
362int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
363int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
364int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
365int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
366int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
367int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
368 const BIGNUM *m, BN_CTX *ctx);
369int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
370int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
371int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m);
372int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx);
373int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m);
374
375BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w);
376BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
377int BN_mul_word(BIGNUM *a, BN_ULONG w);
378int BN_add_word(BIGNUM *a, BN_ULONG w);
379int BN_sub_word(BIGNUM *a, BN_ULONG w);
380int BN_set_word(BIGNUM *a, BN_ULONG w);
381BN_ULONG BN_get_word(const BIGNUM *a);
382
383int BN_cmp(const BIGNUM *a, const BIGNUM *b);
384void BN_free(BIGNUM *a);
385int BN_is_bit_set(const BIGNUM *a, int n);
386int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
387int BN_lshift1(BIGNUM *r, const BIGNUM *a);
388int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,BN_CTX *ctx);
389
390int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
391 const BIGNUM *m,BN_CTX *ctx);
392int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
393 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
394int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
395 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont);
396int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
397 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
398int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1,
399 const BIGNUM *a2, const BIGNUM *p2,const BIGNUM *m,
400 BN_CTX *ctx,BN_MONT_CTX *m_ctx);
401int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
402 const BIGNUM *m,BN_CTX *ctx);
403
404int BN_mask_bits(BIGNUM *a,int n);
405#ifndef OPENSSL_NO_FP_API
406int BN_print_fp(FILE *fp, const BIGNUM *a);
407#endif
408#ifdef HEADER_BIO_H
409int BN_print(BIO *fp, const BIGNUM *a);
410#else
411int BN_print(void *fp, const BIGNUM *a);
412#endif
413int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx);
414int BN_rshift(BIGNUM *r, const BIGNUM *a, int n);
415int BN_rshift1(BIGNUM *r, const BIGNUM *a);
416void BN_clear(BIGNUM *a);
417BIGNUM *BN_dup(const BIGNUM *a);
418int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
419int BN_set_bit(BIGNUM *a, int n);
420int BN_clear_bit(BIGNUM *a, int n);
421char * BN_bn2hex(const BIGNUM *a);
422char * BN_bn2dec(const BIGNUM *a);
423int BN_hex2bn(BIGNUM **a, const char *str);
424int BN_dec2bn(BIGNUM **a, const char *str);
425int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
426int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
427BIGNUM *BN_mod_inverse(BIGNUM *ret,
428 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
429BIGNUM *BN_mod_sqrt(BIGNUM *ret,
430 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
431BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
432 const BIGNUM *add, const BIGNUM *rem,
433 void (*callback)(int,int,void *),void *cb_arg);
434int BN_is_prime(const BIGNUM *p,int nchecks,
435 void (*callback)(int,int,void *),
436 BN_CTX *ctx,void *cb_arg);
437int BN_is_prime_fasttest(const BIGNUM *p,int nchecks,
438 void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg,
439 int do_trial_division);
440
441#ifdef OPENSSL_FIPS
442int BN_X931_derive_prime(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
443 void (*cb)(int, int, void *), void *cb_arg,
444 const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
445 const BIGNUM *e, BN_CTX *ctx);
446int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx);
447int BN_X931_generate_prime(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
448 BIGNUM *Xp1, BIGNUM *Xp2,
449 const BIGNUM *Xp,
450 const BIGNUM *e, BN_CTX *ctx,
451 void (*cb)(int, int, void *), void *cb_arg);
452#endif
453
454BN_MONT_CTX *BN_MONT_CTX_new(void );
455void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
456int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
457 BN_MONT_CTX *mont, BN_CTX *ctx);
458#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
459 (r),(a),&((mont)->RR),(mont),(ctx))
460int BN_from_montgomery(BIGNUM *r,const BIGNUM *a,
461 BN_MONT_CTX *mont, BN_CTX *ctx);
462void BN_MONT_CTX_free(BN_MONT_CTX *mont);
463int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *mod,BN_CTX *ctx);
464BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
465BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
466 const BIGNUM *mod, BN_CTX *ctx);
467
468BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod);
469void BN_BLINDING_free(BN_BLINDING *b);
470int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
471int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx);
472int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
473
474void BN_set_params(int mul,int high,int low,int mont);
475int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
476
477void BN_RECP_CTX_init(BN_RECP_CTX *recp);
478BN_RECP_CTX *BN_RECP_CTX_new(void);
479void BN_RECP_CTX_free(BN_RECP_CTX *recp);
480int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx);
481int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
482 BN_RECP_CTX *recp,BN_CTX *ctx);
483int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
484 const BIGNUM *m, BN_CTX *ctx);
485int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
486 BN_RECP_CTX *recp, BN_CTX *ctx);
487
488/* library internal functions */
489
490#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
491 (a):bn_expand2((a),(bits)/BN_BITS2+1))
492#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
493BIGNUM *bn_expand2(BIGNUM *a, int words);
494BIGNUM *bn_dup_expand(const BIGNUM *a, int words);
495
496#define bn_fix_top(a) \
497 { \
498 BN_ULONG *ftl; \
499 if ((a)->top > 0) \
500 { \
501 for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
502 if (*(ftl--)) break; \
503 } \
504 }
505
506BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
507BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
508void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
509BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
510BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
511BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
512
513#ifdef BN_DEBUG
514void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n);
515# define bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \
516 fprintf(stderr,"\n");}
517# define bn_dump(a,n) bn_dump1(stderr,#a,a,n);
518#else
519# define bn_print(a)
520# define bn_dump(a,b)
521#endif
522
523int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
524
525/* BEGIN ERROR CODES */
526/* The following lines are auto generated by the script mkerr.pl. Any changes
527 * made after this point may be overwritten when the script is next run.
528 */
529void ERR_load_BN_strings(void);
530
531/* Error codes for the BN functions. */
532
533/* Function codes. */
534#define BN_F_BN_BLINDING_CONVERT 100
535#define BN_F_BN_BLINDING_INVERT 101
536#define BN_F_BN_BLINDING_NEW 102
537#define BN_F_BN_BLINDING_UPDATE 103
538#define BN_F_BN_BN2DEC 104
539#define BN_F_BN_BN2HEX 105
540#define BN_F_BN_CTX_GET 116
541#define BN_F_BN_CTX_NEW 106
542#define BN_F_BN_DIV 107
543#define BN_F_BN_EXP 123
544#define BN_F_BN_EXPAND2 108
545#define BN_F_BN_EXPAND_INTERNAL 120
546#define BN_F_BN_MOD_EXP2_MONT 118
547#define BN_F_BN_MOD_EXP_MONT 109
548#define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124
549#define BN_F_BN_MOD_EXP_MONT_WORD 117
550#define BN_F_BN_MOD_EXP_RECP 125
551#define BN_F_BN_MOD_EXP_SIMPLE 126
552#define BN_F_BN_MOD_INVERSE 110
553#define BN_F_BN_MOD_LSHIFT_QUICK 119
554#define BN_F_BN_MOD_MUL_RECIPROCAL 111
555#define BN_F_BN_MOD_SQRT 121
556#define BN_F_BN_MPI2BN 112
557#define BN_F_BN_NEW 113
558#define BN_F_BN_RAND 114
559#define BN_F_BN_RAND_RANGE 122
560#define BN_F_BN_USUB 115
561
562/* Reason codes. */
563#define BN_R_ARG2_LT_ARG3 100
564#define BN_R_BAD_RECIPROCAL 101
565#define BN_R_BIGNUM_TOO_LONG 114
566#define BN_R_CALLED_WITH_EVEN_MODULUS 102
567#define BN_R_DIV_BY_ZERO 103
568#define BN_R_ENCODING_ERROR 104
569#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
570#define BN_R_INPUT_NOT_REDUCED 110
571#define BN_R_INVALID_LENGTH 106
572#define BN_R_INVALID_RANGE 115
573#define BN_R_NOT_A_SQUARE 111
574#define BN_R_NOT_INITIALIZED 107
575#define BN_R_NO_INVERSE 108
576#define BN_R_P_IS_NOT_PRIME 112
577#define BN_R_TOO_MANY_ITERATIONS 113
578#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
579
580#ifdef __cplusplus
581}
582#endif
583#endif
diff --git a/src/lib/libcrypto/bn/bn.mul b/src/lib/libcrypto/bn/bn.mul
new file mode 100644
index 0000000000..9728870d38
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn.mul
@@ -0,0 +1,19 @@
1We need
2
3* bn_mul_comba8
4* bn_mul_comba4
5* bn_mul_normal
6* bn_mul_recursive
7
8* bn_sqr_comba8
9* bn_sqr_comba4
10bn_sqr_normal -> BN_sqr
11* bn_sqr_recursive
12
13* bn_mul_low_recursive
14* bn_mul_low_normal
15* bn_mul_high
16
17* bn_mul_part_recursive # symetric but not power of 2
18
19bn_mul_asymetric_recursive # uneven, but do the chop up.
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
new file mode 100644
index 0000000000..6cba07e9f6
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_add.c
@@ -0,0 +1,309 @@
1/* crypto/bn/bn_add.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r can == a or b */
64int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
65 {
66 const BIGNUM *tmp;
67 int a_neg = a->neg;
68
69 bn_check_top(a);
70 bn_check_top(b);
71
72 /* a + b a+b
73 * a + -b a-b
74 * -a + b b-a
75 * -a + -b -(a+b)
76 */
77 if (a_neg ^ b->neg)
78 {
79 /* only one is negative */
80 if (a_neg)
81 { tmp=a; a=b; b=tmp; }
82
83 /* we are now a - b */
84
85 if (BN_ucmp(a,b) < 0)
86 {
87 if (!BN_usub(r,b,a)) return(0);
88 r->neg=1;
89 }
90 else
91 {
92 if (!BN_usub(r,a,b)) return(0);
93 r->neg=0;
94 }
95 return(1);
96 }
97
98 if (!BN_uadd(r,a,b)) return(0);
99 if (a_neg) /* both are neg */
100 r->neg=1;
101 else
102 r->neg=0;
103 return(1);
104 }
105
106/* unsigned add of b to a, r must be large enough */
107int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
108 {
109 register int i;
110 int max,min;
111 BN_ULONG *ap,*bp,*rp,carry,t1;
112 const BIGNUM *tmp;
113
114 bn_check_top(a);
115 bn_check_top(b);
116
117 if (a->top < b->top)
118 { tmp=a; a=b; b=tmp; }
119 max=a->top;
120 min=b->top;
121
122 if (bn_wexpand(r,max+1) == NULL)
123 return(0);
124
125 r->top=max;
126
127
128 ap=a->d;
129 bp=b->d;
130 rp=r->d;
131 carry=0;
132
133 carry=bn_add_words(rp,ap,bp,min);
134 rp+=min;
135 ap+=min;
136 bp+=min;
137 i=min;
138
139 if (carry)
140 {
141 while (i < max)
142 {
143 i++;
144 t1= *(ap++);
145 if ((*(rp++)=(t1+1)&BN_MASK2) >= t1)
146 {
147 carry=0;
148 break;
149 }
150 }
151 if ((i >= max) && carry)
152 {
153 *(rp++)=1;
154 r->top++;
155 }
156 }
157 if (rp != ap)
158 {
159 for (; i<max; i++)
160 *(rp++)= *(ap++);
161 }
162 /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/
163 r->neg = 0;
164 return(1);
165 }
166
167/* unsigned subtraction of b from a, a must be larger than b. */
168int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
169 {
170 int max,min;
171 register BN_ULONG t1,t2,*ap,*bp,*rp;
172 int i,carry;
173#if defined(IRIX_CC_BUG) && !defined(LINT)
174 int dummy;
175#endif
176
177 bn_check_top(a);
178 bn_check_top(b);
179
180 if (a->top < b->top) /* hmm... should not be happening */
181 {
182 BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
183 return(0);
184 }
185
186 max=a->top;
187 min=b->top;
188 if (bn_wexpand(r,max) == NULL) return(0);
189
190 ap=a->d;
191 bp=b->d;
192 rp=r->d;
193
194#if 1
195 carry=0;
196 for (i=0; i<min; i++)
197 {
198 t1= *(ap++);
199 t2= *(bp++);
200 if (carry)
201 {
202 carry=(t1 <= t2);
203 t1=(t1-t2-1)&BN_MASK2;
204 }
205 else
206 {
207 carry=(t1 < t2);
208 t1=(t1-t2)&BN_MASK2;
209 }
210#if defined(IRIX_CC_BUG) && !defined(LINT)
211 dummy=t1;
212#endif
213 *(rp++)=t1&BN_MASK2;
214 }
215#else
216 carry=bn_sub_words(rp,ap,bp,min);
217 ap+=min;
218 bp+=min;
219 rp+=min;
220 i=min;
221#endif
222 if (carry) /* subtracted */
223 {
224 while (i < max)
225 {
226 i++;
227 t1= *(ap++);
228 t2=(t1-1)&BN_MASK2;
229 *(rp++)=t2;
230 if (t1 > t2) break;
231 }
232 }
233#if 0
234 memcpy(rp,ap,sizeof(*rp)*(max-i));
235#else
236 if (rp != ap)
237 {
238 for (;;)
239 {
240 if (i++ >= max) break;
241 rp[0]=ap[0];
242 if (i++ >= max) break;
243 rp[1]=ap[1];
244 if (i++ >= max) break;
245 rp[2]=ap[2];
246 if (i++ >= max) break;
247 rp[3]=ap[3];
248 rp+=4;
249 ap+=4;
250 }
251 }
252#endif
253
254 r->top=max;
255 r->neg=0;
256 bn_fix_top(r);
257 return(1);
258 }
259
260int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
261 {
262 int max;
263 int add=0,neg=0;
264 const BIGNUM *tmp;
265
266 bn_check_top(a);
267 bn_check_top(b);
268
269 /* a - b a-b
270 * a - -b a+b
271 * -a - b -(a+b)
272 * -a - -b b-a
273 */
274 if (a->neg)
275 {
276 if (b->neg)
277 { tmp=a; a=b; b=tmp; }
278 else
279 { add=1; neg=1; }
280 }
281 else
282 {
283 if (b->neg) { add=1; neg=0; }
284 }
285
286 if (add)
287 {
288 if (!BN_uadd(r,a,b)) return(0);
289 r->neg=neg;
290 return(1);
291 }
292
293 /* We are actually doing a - b :-) */
294
295 max=(a->top > b->top)?a->top:b->top;
296 if (bn_wexpand(r,max) == NULL) return(0);
297 if (BN_ucmp(a,b) < 0)
298 {
299 if (!BN_usub(r,b,a)) return(0);
300 r->neg=1;
301 }
302 else
303 {
304 if (!BN_usub(r,a,b)) return(0);
305 r->neg=0;
306 }
307 return(1);
308 }
309
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
new file mode 100644
index 0000000000..19978085b2
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -0,0 +1,832 @@
1/* crypto/bn/bn_asm.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70
71BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
72 {
73 BN_ULONG c1=0;
74
75 assert(num >= 0);
76 if (num <= 0) return(c1);
77
78 while (num&~3)
79 {
80 mul_add(rp[0],ap[0],w,c1);
81 mul_add(rp[1],ap[1],w,c1);
82 mul_add(rp[2],ap[2],w,c1);
83 mul_add(rp[3],ap[3],w,c1);
84 ap+=4; rp+=4; num-=4;
85 }
86 if (num)
87 {
88 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90 mul_add(rp[2],ap[2],w,c1); return c1;
91 }
92
93 return(c1);
94 }
95
96BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
97 {
98 BN_ULONG c1=0;
99
100 assert(num >= 0);
101 if (num <= 0) return(c1);
102
103 while (num&~3)
104 {
105 mul(rp[0],ap[0],w,c1);
106 mul(rp[1],ap[1],w,c1);
107 mul(rp[2],ap[2],w,c1);
108 mul(rp[3],ap[3],w,c1);
109 ap+=4; rp+=4; num-=4;
110 }
111 if (num)
112 {
113 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115 mul(rp[2],ap[2],w,c1);
116 }
117 return(c1);
118 }
119
120void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
121 {
122 assert(n >= 0);
123 if (n <= 0) return;
124 while (n&~3)
125 {
126 sqr(r[0],r[1],a[0]);
127 sqr(r[2],r[3],a[1]);
128 sqr(r[4],r[5],a[2]);
129 sqr(r[6],r[7],a[3]);
130 a+=4; r+=8; n-=4;
131 }
132 if (n)
133 {
134 sqr(r[0],r[1],a[0]); if (--n == 0) return;
135 sqr(r[2],r[3],a[1]); if (--n == 0) return;
136 sqr(r[4],r[5],a[2]);
137 }
138 }
139
140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
141
142BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
143 {
144 BN_ULONG c=0;
145 BN_ULONG bl,bh;
146
147 assert(num >= 0);
148 if (num <= 0) return((BN_ULONG)0);
149
150 bl=LBITS(w);
151 bh=HBITS(w);
152
153 for (;;)
154 {
155 mul_add(rp[0],ap[0],bl,bh,c);
156 if (--num == 0) break;
157 mul_add(rp[1],ap[1],bl,bh,c);
158 if (--num == 0) break;
159 mul_add(rp[2],ap[2],bl,bh,c);
160 if (--num == 0) break;
161 mul_add(rp[3],ap[3],bl,bh,c);
162 if (--num == 0) break;
163 ap+=4;
164 rp+=4;
165 }
166 return(c);
167 }
168
169BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
170 {
171 BN_ULONG carry=0;
172 BN_ULONG bl,bh;
173
174 assert(num >= 0);
175 if (num <= 0) return((BN_ULONG)0);
176
177 bl=LBITS(w);
178 bh=HBITS(w);
179
180 for (;;)
181 {
182 mul(rp[0],ap[0],bl,bh,carry);
183 if (--num == 0) break;
184 mul(rp[1],ap[1],bl,bh,carry);
185 if (--num == 0) break;
186 mul(rp[2],ap[2],bl,bh,carry);
187 if (--num == 0) break;
188 mul(rp[3],ap[3],bl,bh,carry);
189 if (--num == 0) break;
190 ap+=4;
191 rp+=4;
192 }
193 return(carry);
194 }
195
196void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
197 {
198 assert(n >= 0);
199 if (n <= 0) return;
200 for (;;)
201 {
202 sqr64(r[0],r[1],a[0]);
203 if (--n == 0) break;
204
205 sqr64(r[2],r[3],a[1]);
206 if (--n == 0) break;
207
208 sqr64(r[4],r[5],a[2]);
209 if (--n == 0) break;
210
211 sqr64(r[6],r[7],a[3]);
212 if (--n == 0) break;
213
214 a+=4;
215 r+=8;
216 }
217 }
218
219#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
220
221#if defined(BN_LLONG) && defined(BN_DIV2W)
222
223BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
224 {
225 return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
226 }
227
228#else
229
230/* Divide h,l by d and return the result. */
231/* I need to test this some more :-( */
232BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
233 {
234 BN_ULONG dh,dl,q,ret=0,th,tl,t;
235 int i,count=2;
236
237 if (d == 0) return(BN_MASK2);
238
239 i=BN_num_bits_word(d);
240 assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i));
241
242 i=BN_BITS2-i;
243 if (h >= d) h-=d;
244
245 if (i)
246 {
247 d<<=i;
248 h=(h<<i)|(l>>(BN_BITS2-i));
249 l<<=i;
250 }
251 dh=(d&BN_MASK2h)>>BN_BITS4;
252 dl=(d&BN_MASK2l);
253 for (;;)
254 {
255 if ((h>>BN_BITS4) == dh)
256 q=BN_MASK2l;
257 else
258 q=h/dh;
259
260 th=q*dh;
261 tl=dl*q;
262 for (;;)
263 {
264 t=h-th;
265 if ((t&BN_MASK2h) ||
266 ((tl) <= (
267 (t<<BN_BITS4)|
268 ((l&BN_MASK2h)>>BN_BITS4))))
269 break;
270 q--;
271 th-=dh;
272 tl-=dl;
273 }
274 t=(tl>>BN_BITS4);
275 tl=(tl<<BN_BITS4)&BN_MASK2h;
276 th+=t;
277
278 if (l < tl) th++;
279 l-=tl;
280 if (h < th)
281 {
282 h+=d;
283 q--;
284 }
285 h-=th;
286
287 if (--count == 0) break;
288
289 ret=q<<BN_BITS4;
290 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
291 l=(l&BN_MASK2l)<<BN_BITS4;
292 }
293 ret|=q;
294 return(ret);
295 }
296#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
297
298#ifdef BN_LLONG
299BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
300 {
301 BN_ULLONG ll=0;
302
303 assert(n >= 0);
304 if (n <= 0) return((BN_ULONG)0);
305
306 for (;;)
307 {
308 ll+=(BN_ULLONG)a[0]+b[0];
309 r[0]=(BN_ULONG)ll&BN_MASK2;
310 ll>>=BN_BITS2;
311 if (--n <= 0) break;
312
313 ll+=(BN_ULLONG)a[1]+b[1];
314 r[1]=(BN_ULONG)ll&BN_MASK2;
315 ll>>=BN_BITS2;
316 if (--n <= 0) break;
317
318 ll+=(BN_ULLONG)a[2]+b[2];
319 r[2]=(BN_ULONG)ll&BN_MASK2;
320 ll>>=BN_BITS2;
321 if (--n <= 0) break;
322
323 ll+=(BN_ULLONG)a[3]+b[3];
324 r[3]=(BN_ULONG)ll&BN_MASK2;
325 ll>>=BN_BITS2;
326 if (--n <= 0) break;
327
328 a+=4;
329 b+=4;
330 r+=4;
331 }
332 return((BN_ULONG)ll);
333 }
334#else /* !BN_LLONG */
335BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
336 {
337 BN_ULONG c,l,t;
338
339 assert(n >= 0);
340 if (n <= 0) return((BN_ULONG)0);
341
342 c=0;
343 for (;;)
344 {
345 t=a[0];
346 t=(t+c)&BN_MASK2;
347 c=(t < c);
348 l=(t+b[0])&BN_MASK2;
349 c+=(l < t);
350 r[0]=l;
351 if (--n <= 0) break;
352
353 t=a[1];
354 t=(t+c)&BN_MASK2;
355 c=(t < c);
356 l=(t+b[1])&BN_MASK2;
357 c+=(l < t);
358 r[1]=l;
359 if (--n <= 0) break;
360
361 t=a[2];
362 t=(t+c)&BN_MASK2;
363 c=(t < c);
364 l=(t+b[2])&BN_MASK2;
365 c+=(l < t);
366 r[2]=l;
367 if (--n <= 0) break;
368
369 t=a[3];
370 t=(t+c)&BN_MASK2;
371 c=(t < c);
372 l=(t+b[3])&BN_MASK2;
373 c+=(l < t);
374 r[3]=l;
375 if (--n <= 0) break;
376
377 a+=4;
378 b+=4;
379 r+=4;
380 }
381 return((BN_ULONG)c);
382 }
383#endif /* !BN_LLONG */
384
385BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
386 {
387 BN_ULONG t1,t2;
388 int c=0;
389
390 assert(n >= 0);
391 if (n <= 0) return((BN_ULONG)0);
392
393 for (;;)
394 {
395 t1=a[0]; t2=b[0];
396 r[0]=(t1-t2-c)&BN_MASK2;
397 if (t1 != t2) c=(t1 < t2);
398 if (--n <= 0) break;
399
400 t1=a[1]; t2=b[1];
401 r[1]=(t1-t2-c)&BN_MASK2;
402 if (t1 != t2) c=(t1 < t2);
403 if (--n <= 0) break;
404
405 t1=a[2]; t2=b[2];
406 r[2]=(t1-t2-c)&BN_MASK2;
407 if (t1 != t2) c=(t1 < t2);
408 if (--n <= 0) break;
409
410 t1=a[3]; t2=b[3];
411 r[3]=(t1-t2-c)&BN_MASK2;
412 if (t1 != t2) c=(t1 < t2);
413 if (--n <= 0) break;
414
415 a+=4;
416 b+=4;
417 r+=4;
418 }
419 return(c);
420 }
421
422#ifdef BN_MUL_COMBA
423
424#undef bn_mul_comba8
425#undef bn_mul_comba4
426#undef bn_sqr_comba8
427#undef bn_sqr_comba4
428
429/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
430/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
431/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
432/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
433
434#ifdef BN_LLONG
435#define mul_add_c(a,b,c0,c1,c2) \
436 t=(BN_ULLONG)a*b; \
437 t1=(BN_ULONG)Lw(t); \
438 t2=(BN_ULONG)Hw(t); \
439 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
440 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
441
442#define mul_add_c2(a,b,c0,c1,c2) \
443 t=(BN_ULLONG)a*b; \
444 tt=(t+t)&BN_MASK; \
445 if (tt < t) c2++; \
446 t1=(BN_ULONG)Lw(tt); \
447 t2=(BN_ULONG)Hw(tt); \
448 c0=(c0+t1)&BN_MASK2; \
449 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
450 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
451
452#define sqr_add_c(a,i,c0,c1,c2) \
453 t=(BN_ULLONG)a[i]*a[i]; \
454 t1=(BN_ULONG)Lw(t); \
455 t2=(BN_ULONG)Hw(t); \
456 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
457 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
458
459#define sqr_add_c2(a,i,j,c0,c1,c2) \
460 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
461
462#elif defined(BN_UMULT_HIGH)
463
464#define mul_add_c(a,b,c0,c1,c2) { \
465 BN_ULONG ta=(a),tb=(b); \
466 t1 = ta * tb; \
467 t2 = BN_UMULT_HIGH(ta,tb); \
468 c0 += t1; t2 += (c0<t1)?1:0; \
469 c1 += t2; c2 += (c1<t2)?1:0; \
470 }
471
472#define mul_add_c2(a,b,c0,c1,c2) { \
473 BN_ULONG ta=(a),tb=(b),t0; \
474 t1 = BN_UMULT_HIGH(ta,tb); \
475 t0 = ta * tb; \
476 t2 = t1+t1; c2 += (t2<t1)?1:0; \
477 t1 = t0+t0; t2 += (t1<t0)?1:0; \
478 c0 += t1; t2 += (c0<t1)?1:0; \
479 c1 += t2; c2 += (c1<t2)?1:0; \
480 }
481
482#define sqr_add_c(a,i,c0,c1,c2) { \
483 BN_ULONG ta=(a)[i]; \
484 t1 = ta * ta; \
485 t2 = BN_UMULT_HIGH(ta,ta); \
486 c0 += t1; t2 += (c0<t1)?1:0; \
487 c1 += t2; c2 += (c1<t2)?1:0; \
488 }
489
490#define sqr_add_c2(a,i,j,c0,c1,c2) \
491 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
492
493#else /* !BN_LLONG */
494#define mul_add_c(a,b,c0,c1,c2) \
495 t1=LBITS(a); t2=HBITS(a); \
496 bl=LBITS(b); bh=HBITS(b); \
497 mul64(t1,t2,bl,bh); \
498 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
499 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
500
501#define mul_add_c2(a,b,c0,c1,c2) \
502 t1=LBITS(a); t2=HBITS(a); \
503 bl=LBITS(b); bh=HBITS(b); \
504 mul64(t1,t2,bl,bh); \
505 if (t2 & BN_TBIT) c2++; \
506 t2=(t2+t2)&BN_MASK2; \
507 if (t1 & BN_TBIT) t2++; \
508 t1=(t1+t1)&BN_MASK2; \
509 c0=(c0+t1)&BN_MASK2; \
510 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
511 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
512
513#define sqr_add_c(a,i,c0,c1,c2) \
514 sqr64(t1,t2,(a)[i]); \
515 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
516 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
517
518#define sqr_add_c2(a,i,j,c0,c1,c2) \
519 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
520#endif /* !BN_LLONG */
521
522void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
523 {
524#ifdef BN_LLONG
525 BN_ULLONG t;
526#else
527 BN_ULONG bl,bh;
528#endif
529 BN_ULONG t1,t2;
530 BN_ULONG c1,c2,c3;
531
532 c1=0;
533 c2=0;
534 c3=0;
535 mul_add_c(a[0],b[0],c1,c2,c3);
536 r[0]=c1;
537 c1=0;
538 mul_add_c(a[0],b[1],c2,c3,c1);
539 mul_add_c(a[1],b[0],c2,c3,c1);
540 r[1]=c2;
541 c2=0;
542 mul_add_c(a[2],b[0],c3,c1,c2);
543 mul_add_c(a[1],b[1],c3,c1,c2);
544 mul_add_c(a[0],b[2],c3,c1,c2);
545 r[2]=c3;
546 c3=0;
547 mul_add_c(a[0],b[3],c1,c2,c3);
548 mul_add_c(a[1],b[2],c1,c2,c3);
549 mul_add_c(a[2],b[1],c1,c2,c3);
550 mul_add_c(a[3],b[0],c1,c2,c3);
551 r[3]=c1;
552 c1=0;
553 mul_add_c(a[4],b[0],c2,c3,c1);
554 mul_add_c(a[3],b[1],c2,c3,c1);
555 mul_add_c(a[2],b[2],c2,c3,c1);
556 mul_add_c(a[1],b[3],c2,c3,c1);
557 mul_add_c(a[0],b[4],c2,c3,c1);
558 r[4]=c2;
559 c2=0;
560 mul_add_c(a[0],b[5],c3,c1,c2);
561 mul_add_c(a[1],b[4],c3,c1,c2);
562 mul_add_c(a[2],b[3],c3,c1,c2);
563 mul_add_c(a[3],b[2],c3,c1,c2);
564 mul_add_c(a[4],b[1],c3,c1,c2);
565 mul_add_c(a[5],b[0],c3,c1,c2);
566 r[5]=c3;
567 c3=0;
568 mul_add_c(a[6],b[0],c1,c2,c3);
569 mul_add_c(a[5],b[1],c1,c2,c3);
570 mul_add_c(a[4],b[2],c1,c2,c3);
571 mul_add_c(a[3],b[3],c1,c2,c3);
572 mul_add_c(a[2],b[4],c1,c2,c3);
573 mul_add_c(a[1],b[5],c1,c2,c3);
574 mul_add_c(a[0],b[6],c1,c2,c3);
575 r[6]=c1;
576 c1=0;
577 mul_add_c(a[0],b[7],c2,c3,c1);
578 mul_add_c(a[1],b[6],c2,c3,c1);
579 mul_add_c(a[2],b[5],c2,c3,c1);
580 mul_add_c(a[3],b[4],c2,c3,c1);
581 mul_add_c(a[4],b[3],c2,c3,c1);
582 mul_add_c(a[5],b[2],c2,c3,c1);
583 mul_add_c(a[6],b[1],c2,c3,c1);
584 mul_add_c(a[7],b[0],c2,c3,c1);
585 r[7]=c2;
586 c2=0;
587 mul_add_c(a[7],b[1],c3,c1,c2);
588 mul_add_c(a[6],b[2],c3,c1,c2);
589 mul_add_c(a[5],b[3],c3,c1,c2);
590 mul_add_c(a[4],b[4],c3,c1,c2);
591 mul_add_c(a[3],b[5],c3,c1,c2);
592 mul_add_c(a[2],b[6],c3,c1,c2);
593 mul_add_c(a[1],b[7],c3,c1,c2);
594 r[8]=c3;
595 c3=0;
596 mul_add_c(a[2],b[7],c1,c2,c3);
597 mul_add_c(a[3],b[6],c1,c2,c3);
598 mul_add_c(a[4],b[5],c1,c2,c3);
599 mul_add_c(a[5],b[4],c1,c2,c3);
600 mul_add_c(a[6],b[3],c1,c2,c3);
601 mul_add_c(a[7],b[2],c1,c2,c3);
602 r[9]=c1;
603 c1=0;
604 mul_add_c(a[7],b[3],c2,c3,c1);
605 mul_add_c(a[6],b[4],c2,c3,c1);
606 mul_add_c(a[5],b[5],c2,c3,c1);
607 mul_add_c(a[4],b[6],c2,c3,c1);
608 mul_add_c(a[3],b[7],c2,c3,c1);
609 r[10]=c2;
610 c2=0;
611 mul_add_c(a[4],b[7],c3,c1,c2);
612 mul_add_c(a[5],b[6],c3,c1,c2);
613 mul_add_c(a[6],b[5],c3,c1,c2);
614 mul_add_c(a[7],b[4],c3,c1,c2);
615 r[11]=c3;
616 c3=0;
617 mul_add_c(a[7],b[5],c1,c2,c3);
618 mul_add_c(a[6],b[6],c1,c2,c3);
619 mul_add_c(a[5],b[7],c1,c2,c3);
620 r[12]=c1;
621 c1=0;
622 mul_add_c(a[6],b[7],c2,c3,c1);
623 mul_add_c(a[7],b[6],c2,c3,c1);
624 r[13]=c2;
625 c2=0;
626 mul_add_c(a[7],b[7],c3,c1,c2);
627 r[14]=c3;
628 r[15]=c1;
629 }
630
631void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
632 {
633#ifdef BN_LLONG
634 BN_ULLONG t;
635#else
636 BN_ULONG bl,bh;
637#endif
638 BN_ULONG t1,t2;
639 BN_ULONG c1,c2,c3;
640
641 c1=0;
642 c2=0;
643 c3=0;
644 mul_add_c(a[0],b[0],c1,c2,c3);
645 r[0]=c1;
646 c1=0;
647 mul_add_c(a[0],b[1],c2,c3,c1);
648 mul_add_c(a[1],b[0],c2,c3,c1);
649 r[1]=c2;
650 c2=0;
651 mul_add_c(a[2],b[0],c3,c1,c2);
652 mul_add_c(a[1],b[1],c3,c1,c2);
653 mul_add_c(a[0],b[2],c3,c1,c2);
654 r[2]=c3;
655 c3=0;
656 mul_add_c(a[0],b[3],c1,c2,c3);
657 mul_add_c(a[1],b[2],c1,c2,c3);
658 mul_add_c(a[2],b[1],c1,c2,c3);
659 mul_add_c(a[3],b[0],c1,c2,c3);
660 r[3]=c1;
661 c1=0;
662 mul_add_c(a[3],b[1],c2,c3,c1);
663 mul_add_c(a[2],b[2],c2,c3,c1);
664 mul_add_c(a[1],b[3],c2,c3,c1);
665 r[4]=c2;
666 c2=0;
667 mul_add_c(a[2],b[3],c3,c1,c2);
668 mul_add_c(a[3],b[2],c3,c1,c2);
669 r[5]=c3;
670 c3=0;
671 mul_add_c(a[3],b[3],c1,c2,c3);
672 r[6]=c1;
673 r[7]=c2;
674 }
675
676void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
677 {
678#ifdef BN_LLONG
679 BN_ULLONG t,tt;
680#else
681 BN_ULONG bl,bh;
682#endif
683 BN_ULONG t1,t2;
684 BN_ULONG c1,c2,c3;
685
686 c1=0;
687 c2=0;
688 c3=0;
689 sqr_add_c(a,0,c1,c2,c3);
690 r[0]=c1;
691 c1=0;
692 sqr_add_c2(a,1,0,c2,c3,c1);
693 r[1]=c2;
694 c2=0;
695 sqr_add_c(a,1,c3,c1,c2);
696 sqr_add_c2(a,2,0,c3,c1,c2);
697 r[2]=c3;
698 c3=0;
699 sqr_add_c2(a,3,0,c1,c2,c3);
700 sqr_add_c2(a,2,1,c1,c2,c3);
701 r[3]=c1;
702 c1=0;
703 sqr_add_c(a,2,c2,c3,c1);
704 sqr_add_c2(a,3,1,c2,c3,c1);
705 sqr_add_c2(a,4,0,c2,c3,c1);
706 r[4]=c2;
707 c2=0;
708 sqr_add_c2(a,5,0,c3,c1,c2);
709 sqr_add_c2(a,4,1,c3,c1,c2);
710 sqr_add_c2(a,3,2,c3,c1,c2);
711 r[5]=c3;
712 c3=0;
713 sqr_add_c(a,3,c1,c2,c3);
714 sqr_add_c2(a,4,2,c1,c2,c3);
715 sqr_add_c2(a,5,1,c1,c2,c3);
716 sqr_add_c2(a,6,0,c1,c2,c3);
717 r[6]=c1;
718 c1=0;
719 sqr_add_c2(a,7,0,c2,c3,c1);
720 sqr_add_c2(a,6,1,c2,c3,c1);
721 sqr_add_c2(a,5,2,c2,c3,c1);
722 sqr_add_c2(a,4,3,c2,c3,c1);
723 r[7]=c2;
724 c2=0;
725 sqr_add_c(a,4,c3,c1,c2);
726 sqr_add_c2(a,5,3,c3,c1,c2);
727 sqr_add_c2(a,6,2,c3,c1,c2);
728 sqr_add_c2(a,7,1,c3,c1,c2);
729 r[8]=c3;
730 c3=0;
731 sqr_add_c2(a,7,2,c1,c2,c3);
732 sqr_add_c2(a,6,3,c1,c2,c3);
733 sqr_add_c2(a,5,4,c1,c2,c3);
734 r[9]=c1;
735 c1=0;
736 sqr_add_c(a,5,c2,c3,c1);
737 sqr_add_c2(a,6,4,c2,c3,c1);
738 sqr_add_c2(a,7,3,c2,c3,c1);
739 r[10]=c2;
740 c2=0;
741 sqr_add_c2(a,7,4,c3,c1,c2);
742 sqr_add_c2(a,6,5,c3,c1,c2);
743 r[11]=c3;
744 c3=0;
745 sqr_add_c(a,6,c1,c2,c3);
746 sqr_add_c2(a,7,5,c1,c2,c3);
747 r[12]=c1;
748 c1=0;
749 sqr_add_c2(a,7,6,c2,c3,c1);
750 r[13]=c2;
751 c2=0;
752 sqr_add_c(a,7,c3,c1,c2);
753 r[14]=c3;
754 r[15]=c1;
755 }
756
757void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
758 {
759#ifdef BN_LLONG
760 BN_ULLONG t,tt;
761#else
762 BN_ULONG bl,bh;
763#endif
764 BN_ULONG t1,t2;
765 BN_ULONG c1,c2,c3;
766
767 c1=0;
768 c2=0;
769 c3=0;
770 sqr_add_c(a,0,c1,c2,c3);
771 r[0]=c1;
772 c1=0;
773 sqr_add_c2(a,1,0,c2,c3,c1);
774 r[1]=c2;
775 c2=0;
776 sqr_add_c(a,1,c3,c1,c2);
777 sqr_add_c2(a,2,0,c3,c1,c2);
778 r[2]=c3;
779 c3=0;
780 sqr_add_c2(a,3,0,c1,c2,c3);
781 sqr_add_c2(a,2,1,c1,c2,c3);
782 r[3]=c1;
783 c1=0;
784 sqr_add_c(a,2,c2,c3,c1);
785 sqr_add_c2(a,3,1,c2,c3,c1);
786 r[4]=c2;
787 c2=0;
788 sqr_add_c2(a,3,2,c3,c1,c2);
789 r[5]=c3;
790 c3=0;
791 sqr_add_c(a,3,c1,c2,c3);
792 r[6]=c1;
793 r[7]=c2;
794 }
795#else /* !BN_MUL_COMBA */
796
797/* hmm... is it faster just to do a multiply? */
798#undef bn_sqr_comba4
799void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
800 {
801 BN_ULONG t[8];
802 bn_sqr_normal(r,a,4,t);
803 }
804
805#undef bn_sqr_comba8
806void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
807 {
808 BN_ULONG t[16];
809 bn_sqr_normal(r,a,8,t);
810 }
811
812void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
813 {
814 r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
815 r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
816 r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
817 r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
818 }
819
820void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
821 {
822 r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
823 r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
824 r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
825 r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
826 r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
827 r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
828 r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
829 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
830 }
831
832#endif /* !BN_MUL_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
new file mode 100644
index 0000000000..2d287e6d1b
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_blind.c
@@ -0,0 +1,144 @@
1/* crypto/bn/bn_blind.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod)
64 {
65 BN_BLINDING *ret=NULL;
66
67 bn_check_top(Ai);
68 bn_check_top(mod);
69
70 if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL)
71 {
72 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
73 return(NULL);
74 }
75 memset(ret,0,sizeof(BN_BLINDING));
76 if ((ret->A=BN_new()) == NULL) goto err;
77 if ((ret->Ai=BN_new()) == NULL) goto err;
78 if (!BN_copy(ret->A,A)) goto err;
79 if (!BN_copy(ret->Ai,Ai)) goto err;
80 ret->mod=mod;
81 return(ret);
82err:
83 if (ret != NULL) BN_BLINDING_free(ret);
84 return(NULL);
85 }
86
87void BN_BLINDING_free(BN_BLINDING *r)
88 {
89 if(r == NULL)
90 return;
91
92 if (r->A != NULL) BN_free(r->A );
93 if (r->Ai != NULL) BN_free(r->Ai);
94 OPENSSL_free(r);
95 }
96
97int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
98 {
99 int ret=0;
100
101 if ((b->A == NULL) || (b->Ai == NULL))
102 {
103 BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED);
104 goto err;
105 }
106
107 if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err;
108 if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err;
109
110 ret=1;
111err:
112 return(ret);
113 }
114
115int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
116 {
117 bn_check_top(n);
118
119 if ((b->A == NULL) || (b->Ai == NULL))
120 {
121 BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITIALIZED);
122 return(0);
123 }
124 return(BN_mod_mul(n,n,b->A,b->mod,ctx));
125 }
126
127int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
128 {
129 int ret;
130
131 bn_check_top(n);
132 if ((b->A == NULL) || (b->Ai == NULL))
133 {
134 BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITIALIZED);
135 return(0);
136 }
137 if ((ret=BN_mod_mul(n,n,b->Ai,b->mod,ctx)) >= 0)
138 {
139 if (!BN_BLINDING_update(b,ctx))
140 return(0);
141 }
142 return(ret);
143 }
144
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
new file mode 100644
index 0000000000..7daf19eb84
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_ctx.c
@@ -0,0 +1,155 @@
1/* crypto/bn/bn_ctx.c */
2/* Written by Ulf Moeller for the OpenSSL project. */
3/* ====================================================================
4 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * 3. All advertising materials mentioning features or use of this
19 * software must display the following acknowledgment:
20 * "This product includes software developed by the OpenSSL Project
21 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
22 *
23 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
24 * endorse or promote products derived from this software without
25 * prior written permission. For written permission, please contact
26 * openssl-core@openssl.org.
27 *
28 * 5. Products derived from this software may not be called "OpenSSL"
29 * nor may "OpenSSL" appear in their names without prior written
30 * permission of the OpenSSL Project.
31 *
32 * 6. Redistributions of any form whatsoever must retain the following
33 * acknowledgment:
34 * "This product includes software developed by the OpenSSL Project
35 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
38 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
40 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
43 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
44 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
46 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
47 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
48 * OF THE POSSIBILITY OF SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This product includes cryptographic software written by Eric Young
52 * (eay@cryptsoft.com). This product includes software written by Tim
53 * Hudson (tjh@cryptsoft.com).
54 *
55 */
56
57#ifndef BN_CTX_DEBUG
58# undef NDEBUG /* avoid conflicting definitions */
59# define NDEBUG
60#endif
61
62#include <stdio.h>
63#include <assert.h>
64
65#include "cryptlib.h"
66#include "bn_lcl.h"
67
68
69BN_CTX *BN_CTX_new(void)
70 {
71 BN_CTX *ret;
72
73 ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX));
74 if (ret == NULL)
75 {
76 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
77 return(NULL);
78 }
79
80 BN_CTX_init(ret);
81 ret->flags=BN_FLG_MALLOCED;
82 return(ret);
83 }
84
85void BN_CTX_init(BN_CTX *ctx)
86 {
87#if 0 /* explicit version */
88 int i;
89 ctx->tos = 0;
90 ctx->flags = 0;
91 ctx->depth = 0;
92 ctx->too_many = 0;
93 for (i = 0; i < BN_CTX_NUM; i++)
94 BN_init(&(ctx->bn[i]));
95#else
96 memset(ctx, 0, sizeof *ctx);
97#endif
98 }
99
100void BN_CTX_free(BN_CTX *ctx)
101 {
102 int i;
103
104 if (ctx == NULL) return;
105 assert(ctx->depth == 0);
106
107 for (i=0; i < BN_CTX_NUM; i++)
108 BN_clear_free(&(ctx->bn[i]));
109 if (ctx->flags & BN_FLG_MALLOCED)
110 OPENSSL_free(ctx);
111 }
112
113void BN_CTX_start(BN_CTX *ctx)
114 {
115 if (ctx->depth < BN_CTX_NUM_POS)
116 ctx->pos[ctx->depth] = ctx->tos;
117 ctx->depth++;
118 }
119
120
121BIGNUM *BN_CTX_get(BN_CTX *ctx)
122 {
123 /* Note: If BN_CTX_get is ever changed to allocate BIGNUMs dynamically,
124 * make sure that if BN_CTX_get fails once it will return NULL again
125 * until BN_CTX_end is called. (This is so that callers have to check
126 * only the last return value.)
127 */
128 if (ctx->depth > BN_CTX_NUM_POS || ctx->tos >= BN_CTX_NUM)
129 {
130 if (!ctx->too_many)
131 {
132 BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
133 /* disable error code until BN_CTX_end is called: */
134 ctx->too_many = 1;
135 }
136 return NULL;
137 }
138 return (&(ctx->bn[ctx->tos++]));
139 }
140
141void BN_CTX_end(BN_CTX *ctx)
142 {
143 if (ctx == NULL) return;
144 assert(ctx->depth > 0);
145 if (ctx->depth == 0)
146 /* should never happen, but we can tolerate it if not in
147 * debug mode (could be a 'goto err' in the calling function
148 * before BN_CTX_start was reached) */
149 BN_CTX_start(ctx);
150
151 ctx->too_many = 0;
152 ctx->depth--;
153 if (ctx->depth < BN_CTX_NUM_POS)
154 ctx->tos = ctx->pos[ctx->depth];
155 }
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
new file mode 100644
index 0000000000..580d1201bc
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -0,0 +1,387 @@
1/* crypto/bn/bn_div.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <openssl/bn.h>
61#include "cryptlib.h"
62#include "bn_lcl.h"
63
64
65/* The old slow way */
66#if 0
67int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
68 BN_CTX *ctx)
69 {
70 int i,nm,nd;
71 int ret = 0;
72 BIGNUM *D;
73
74 bn_check_top(m);
75 bn_check_top(d);
76 if (BN_is_zero(d))
77 {
78 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
79 return(0);
80 }
81
82 if (BN_ucmp(m,d) < 0)
83 {
84 if (rem != NULL)
85 { if (BN_copy(rem,m) == NULL) return(0); }
86 if (dv != NULL) BN_zero(dv);
87 return(1);
88 }
89
90 BN_CTX_start(ctx);
91 D = BN_CTX_get(ctx);
92 if (dv == NULL) dv = BN_CTX_get(ctx);
93 if (rem == NULL) rem = BN_CTX_get(ctx);
94 if (D == NULL || dv == NULL || rem == NULL)
95 goto end;
96
97 nd=BN_num_bits(d);
98 nm=BN_num_bits(m);
99 if (BN_copy(D,d) == NULL) goto end;
100 if (BN_copy(rem,m) == NULL) goto end;
101
102 /* The next 2 are needed so we can do a dv->d[0]|=1 later
103 * since BN_lshift1 will only work once there is a value :-) */
104 BN_zero(dv);
105 bn_wexpand(dv,1);
106 dv->top=1;
107
108 if (!BN_lshift(D,D,nm-nd)) goto end;
109 for (i=nm-nd; i>=0; i--)
110 {
111 if (!BN_lshift1(dv,dv)) goto end;
112 if (BN_ucmp(rem,D) >= 0)
113 {
114 dv->d[0]|=1;
115 if (!BN_usub(rem,rem,D)) goto end;
116 }
117/* CAN IMPROVE (and have now :=) */
118 if (!BN_rshift1(D,D)) goto end;
119 }
120 rem->neg=BN_is_zero(rem)?0:m->neg;
121 dv->neg=m->neg^d->neg;
122 ret = 1;
123 end:
124 BN_CTX_end(ctx);
125 return(ret);
126 }
127
128#else
129
130#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
131 && !defined(PEDANTIC) && !defined(BN_DIV3W)
132# if defined(__GNUC__) && __GNUC__>=2
133# if defined(__i386) || defined (__i386__)
134 /*
135 * There were two reasons for implementing this template:
136 * - GNU C generates a call to a function (__udivdi3 to be exact)
137 * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
138 * understand why...);
139 * - divl doesn't only calculate quotient, but also leaves
140 * remainder in %edx which we can definitely use here:-)
141 *
142 * <appro@fy.chalmers.se>
143 */
144# define bn_div_words(n0,n1,d0) \
145 ({ asm volatile ( \
146 "divl %4" \
147 : "=a"(q), "=d"(rem) \
148 : "a"(n1), "d"(n0), "g"(d0) \
149 : "cc"); \
150 q; \
151 })
152# define REMAINDER_IS_ALREADY_CALCULATED
153# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
154 /*
155 * Same story here, but it's 128-bit by 64-bit division. Wow!
156 * <appro@fy.chalmers.se>
157 */
158# define bn_div_words(n0,n1,d0) \
159 ({ asm volatile ( \
160 "divq %4" \
161 : "=a"(q), "=d"(rem) \
162 : "a"(n1), "d"(n0), "g"(d0) \
163 : "cc"); \
164 q; \
165 })
166# define REMAINDER_IS_ALREADY_CALCULATED
167# endif /* __<cpu> */
168# endif /* __GNUC__ */
169#endif /* OPENSSL_NO_ASM */
170
171
172/* BN_div computes dv := num / divisor, rounding towards zero, and sets up
173 * rm such that dv*divisor + rm = num holds.
174 * Thus:
175 * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
176 * rm->neg == num->neg (unless the remainder is zero)
177 * If 'dv' or 'rm' is NULL, the respective value is not returned.
178 */
179int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
180 BN_CTX *ctx)
181 {
182 int norm_shift,i,j,loop;
183 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
184 BN_ULONG *resp,*wnump;
185 BN_ULONG d0,d1;
186 int num_n,div_n;
187
188 bn_check_top(num);
189 bn_check_top(divisor);
190
191 if (BN_is_zero(divisor))
192 {
193 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
194 return(0);
195 }
196
197 if (BN_ucmp(num,divisor) < 0)
198 {
199 if (rm != NULL)
200 { if (BN_copy(rm,num) == NULL) return(0); }
201 if (dv != NULL) BN_zero(dv);
202 return(1);
203 }
204
205 BN_CTX_start(ctx);
206 tmp=BN_CTX_get(ctx);
207 snum=BN_CTX_get(ctx);
208 sdiv=BN_CTX_get(ctx);
209 if (dv == NULL)
210 res=BN_CTX_get(ctx);
211 else res=dv;
212 if (sdiv == NULL || res == NULL) goto err;
213 tmp->neg=0;
214
215 /* First we normalise the numbers */
216 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
217 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
218 sdiv->neg=0;
219 norm_shift+=BN_BITS2;
220 if (!(BN_lshift(snum,num,norm_shift))) goto err;
221 snum->neg=0;
222 div_n=sdiv->top;
223 num_n=snum->top;
224 loop=num_n-div_n;
225
226 /* Lets setup a 'window' into snum
227 * This is the part that corresponds to the current
228 * 'area' being divided */
229 BN_init(&wnum);
230 wnum.d= &(snum->d[loop]);
231 wnum.top= div_n;
232 wnum.dmax= snum->dmax+1; /* a bit of a lie */
233
234 /* Get the top 2 words of sdiv */
235 /* i=sdiv->top; */
236 d0=sdiv->d[div_n-1];
237 d1=(div_n == 1)?0:sdiv->d[div_n-2];
238
239 /* pointer to the 'top' of snum */
240 wnump= &(snum->d[num_n-1]);
241
242 /* Setup to 'res' */
243 res->neg= (num->neg^divisor->neg);
244 if (!bn_wexpand(res,(loop+1))) goto err;
245 res->top=loop;
246 resp= &(res->d[loop-1]);
247
248 /* space for temp */
249 if (!bn_wexpand(tmp,(div_n+1))) goto err;
250
251 if (BN_ucmp(&wnum,sdiv) >= 0)
252 {
253 if (!BN_usub(&wnum,&wnum,sdiv)) goto err;
254 *resp=1;
255 res->d[res->top-1]=1;
256 }
257 else
258 res->top--;
259 if (res->top == 0)
260 res->neg = 0;
261 resp--;
262
263 for (i=0; i<loop-1; i++)
264 {
265 BN_ULONG q,l0;
266#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
267 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
268 q=bn_div_3_words(wnump,d1,d0);
269#else
270 BN_ULONG n0,n1,rem=0;
271
272 n0=wnump[0];
273 n1=wnump[-1];
274 if (n0 == d0)
275 q=BN_MASK2;
276 else /* n0 < d0 */
277 {
278#ifdef BN_LLONG
279 BN_ULLONG t2;
280
281#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
282 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
283#else
284 q=bn_div_words(n0,n1,d0);
285#ifdef BN_DEBUG_LEVITTE
286 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
287X) -> 0x%08X\n",
288 n0, n1, d0, q);
289#endif
290#endif
291
292#ifndef REMAINDER_IS_ALREADY_CALCULATED
293 /*
294 * rem doesn't have to be BN_ULLONG. The least we
295 * know it's less that d0, isn't it?
296 */
297 rem=(n1-q*d0)&BN_MASK2;
298#endif
299 t2=(BN_ULLONG)d1*q;
300
301 for (;;)
302 {
303 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
304 break;
305 q--;
306 rem += d0;
307 if (rem < d0) break; /* don't let rem overflow */
308 t2 -= d1;
309 }
310#else /* !BN_LLONG */
311 BN_ULONG t2l,t2h,ql,qh;
312
313 q=bn_div_words(n0,n1,d0);
314#ifdef BN_DEBUG_LEVITTE
315 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
316X) -> 0x%08X\n",
317 n0, n1, d0, q);
318#endif
319#ifndef REMAINDER_IS_ALREADY_CALCULATED
320 rem=(n1-q*d0)&BN_MASK2;
321#endif
322
323#if defined(BN_UMULT_LOHI)
324 BN_UMULT_LOHI(t2l,t2h,d1,q);
325#elif defined(BN_UMULT_HIGH)
326 t2l = d1 * q;
327 t2h = BN_UMULT_HIGH(d1,q);
328#else
329 t2l=LBITS(d1); t2h=HBITS(d1);
330 ql =LBITS(q); qh =HBITS(q);
331 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
332#endif
333
334 for (;;)
335 {
336 if ((t2h < rem) ||
337 ((t2h == rem) && (t2l <= wnump[-2])))
338 break;
339 q--;
340 rem += d0;
341 if (rem < d0) break; /* don't let rem overflow */
342 if (t2l < d1) t2h--; t2l -= d1;
343 }
344#endif /* !BN_LLONG */
345 }
346#endif /* !BN_DIV3W */
347
348 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
349 wnum.d--; wnum.top++;
350 tmp->d[div_n]=l0;
351 for (j=div_n+1; j>0; j--)
352 if (tmp->d[j-1]) break;
353 tmp->top=j;
354
355 j=wnum.top;
356 if (!BN_sub(&wnum,&wnum,tmp)) goto err;
357
358 snum->top=snum->top+wnum.top-j;
359
360 if (wnum.neg)
361 {
362 q--;
363 j=wnum.top;
364 if (!BN_add(&wnum,&wnum,sdiv)) goto err;
365 snum->top+=wnum.top-j;
366 }
367 *(resp--)=q;
368 wnump--;
369 }
370 if (rm != NULL)
371 {
372 /* Keep a copy of the neg flag in num because if rm==num
373 * BN_rshift() will overwrite it.
374 */
375 int neg = num->neg;
376 BN_rshift(rm,snum,norm_shift);
377 if (!BN_is_zero(rm))
378 rm->neg = neg;
379 }
380 BN_CTX_end(ctx);
381 return(1);
382err:
383 BN_CTX_end(ctx);
384 return(0);
385 }
386
387#endif
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
new file mode 100644
index 0000000000..5dfac00c88
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_err.c
@@ -0,0 +1,139 @@
1/* crypto/bn/bn_err.c */
2/* ====================================================================
3 * Copyright (c) 1999-2005 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56/* NOTE: this file was auto generated by the mkerr.pl script: any changes
57 * made to it will be overwritten when the script next updates this file,
58 * only reason strings will be preserved.
59 */
60
61#include <stdio.h>
62#include <openssl/err.h>
63#include <openssl/bn.h>
64
65/* BEGIN ERROR CODES */
66#ifndef OPENSSL_NO_ERR
67
68#define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
69#define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
70
71static ERR_STRING_DATA BN_str_functs[]=
72 {
73{ERR_FUNC(BN_F_BN_BLINDING_CONVERT), "BN_BLINDING_convert"},
74{ERR_FUNC(BN_F_BN_BLINDING_INVERT), "BN_BLINDING_invert"},
75{ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
76{ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
77{ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
78{ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
79{ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
80{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
81{ERR_FUNC(BN_F_BN_DIV), "BN_div"},
82{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
83{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
84{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
85{ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
86{ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
87{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
88{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
89{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
90{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
91{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
92{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
93{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"},
94{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
95{ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
96{ERR_FUNC(BN_F_BN_NEW), "BN_new"},
97{ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
98{ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
99{ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
100{0,NULL}
101 };
102
103static ERR_STRING_DATA BN_str_reasons[]=
104 {
105{ERR_REASON(BN_R_ARG2_LT_ARG3) ,"arg2 lt arg3"},
106{ERR_REASON(BN_R_BAD_RECIPROCAL) ,"bad reciprocal"},
107{ERR_REASON(BN_R_BIGNUM_TOO_LONG) ,"bignum too long"},
108{ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS),"called with even modulus"},
109{ERR_REASON(BN_R_DIV_BY_ZERO) ,"div by zero"},
110{ERR_REASON(BN_R_ENCODING_ERROR) ,"encoding error"},
111{ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),"expand on static bignum data"},
112{ERR_REASON(BN_R_INPUT_NOT_REDUCED) ,"input not reduced"},
113{ERR_REASON(BN_R_INVALID_LENGTH) ,"invalid length"},
114{ERR_REASON(BN_R_INVALID_RANGE) ,"invalid range"},
115{ERR_REASON(BN_R_NOT_A_SQUARE) ,"not a square"},
116{ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"},
117{ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"},
118{ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"},
119{ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"},
120{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"},
121{0,NULL}
122 };
123
124#endif
125
126void ERR_load_BN_strings(void)
127 {
128 static int init=1;
129
130 if (init)
131 {
132 init=0;
133#ifndef OPENSSL_NO_ERR
134 ERR_load_strings(0,BN_str_functs);
135 ERR_load_strings(0,BN_str_reasons);
136#endif
137
138 }
139 }
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
new file mode 100644
index 0000000000..9e1e88abe8
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -0,0 +1,987 @@
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116/* maximum precomputation table size for *variable* sliding windows */
117#define TABLE_SIZE 32
118
119/* this one works - simple but works */
120int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121 {
122 int i,bits,ret=0;
123 BIGNUM *v,*rr;
124
125 if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
126 {
127 /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
128 BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
129 return -1;
130 }
131
132 BN_CTX_start(ctx);
133 if ((r == a) || (r == p))
134 rr = BN_CTX_get(ctx);
135 else
136 rr = r;
137 if ((v = BN_CTX_get(ctx)) == NULL) goto err;
138
139 if (BN_copy(v,a) == NULL) goto err;
140 bits=BN_num_bits(p);
141
142 if (BN_is_odd(p))
143 { if (BN_copy(rr,a) == NULL) goto err; }
144 else { if (!BN_one(rr)) goto err; }
145
146 for (i=1; i<bits; i++)
147 {
148 if (!BN_sqr(v,v,ctx)) goto err;
149 if (BN_is_bit_set(p,i))
150 {
151 if (!BN_mul(rr,rr,v,ctx)) goto err;
152 }
153 }
154 ret=1;
155err:
156 if (r != rr) BN_copy(r,rr);
157 BN_CTX_end(ctx);
158 return(ret);
159 }
160
161
162int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
163 BN_CTX *ctx)
164 {
165 int ret;
166
167 bn_check_top(a);
168 bn_check_top(p);
169 bn_check_top(m);
170
171 /* For even modulus m = 2^k*m_odd, it might make sense to compute
172 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
173 * exponentiation for the odd part), using appropriate exponent
174 * reductions, and combine the results using the CRT.
175 *
176 * For now, we use Montgomery only if the modulus is odd; otherwise,
177 * exponentiation using the reciprocal-based quick remaindering
178 * algorithm is used.
179 *
180 * (Timing obtained with expspeed.c [computations a^p mod m
181 * where a, p, m are of the same length: 256, 512, 1024, 2048,
182 * 4096, 8192 bits], compared to the running time of the
183 * standard algorithm:
184 *
185 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
186 * 55 .. 77 % [UltraSparc processor, but
187 * debug-solaris-sparcv8-gcc conf.]
188 *
189 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
190 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
191 *
192 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
193 * at 2048 and more bits, but at 512 and 1024 bits, it was
194 * slower even than the standard algorithm!
195 *
196 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
197 * should be obtained when the new Montgomery reduction code
198 * has been integrated into OpenSSL.)
199 */
200
201#define MONT_MUL_MOD
202#define MONT_EXP_WORD
203#define RECP_MUL_MOD
204
205#ifdef MONT_MUL_MOD
206 /* I have finally been able to take out this pre-condition of
207 * the top bit being set. It was caused by an error in BN_div
208 * with negatives. There was also another problem when for a^b%m
209 * a >= m. eay 07-May-97 */
210/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
211
212 if (BN_is_odd(m))
213 {
214# ifdef MONT_EXP_WORD
215 if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) == 0))
216 {
217 BN_ULONG A = a->d[0];
218 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
219 }
220 else
221# endif
222 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
223 }
224 else
225#endif
226#ifdef RECP_MUL_MOD
227 { ret=BN_mod_exp_recp(r,a,p,m,ctx); }
228#else
229 { ret=BN_mod_exp_simple(r,a,p,m,ctx); }
230#endif
231
232 return(ret);
233 }
234
235
236int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
237 const BIGNUM *m, BN_CTX *ctx)
238 {
239 int i,j,bits,ret=0,wstart,wend,window,wvalue;
240 int start=1,ts=0;
241 BIGNUM *aa;
242 BIGNUM val[TABLE_SIZE];
243 BN_RECP_CTX recp;
244
245 if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
246 {
247 /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
248 BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
249 return -1;
250 }
251
252 bits=BN_num_bits(p);
253
254 if (bits == 0)
255 {
256 ret = BN_one(r);
257 return ret;
258 }
259
260 BN_CTX_start(ctx);
261 if ((aa = BN_CTX_get(ctx)) == NULL) goto err;
262
263 BN_RECP_CTX_init(&recp);
264 if (m->neg)
265 {
266 /* ignore sign of 'm' */
267 if (!BN_copy(aa, m)) goto err;
268 aa->neg = 0;
269 if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
270 }
271 else
272 {
273 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
274 }
275
276 BN_init(&(val[0]));
277 ts=1;
278
279 if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */
280 if (BN_is_zero(&(val[0])))
281 {
282 ret = BN_zero(r);
283 goto err;
284 }
285
286 window = BN_window_bits_for_exponent_size(bits);
287 if (window > 1)
288 {
289 if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
290 goto err; /* 2 */
291 j=1<<(window-1);
292 for (i=1; i<j; i++)
293 {
294 BN_init(&val[i]);
295 if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
296 goto err;
297 }
298 ts=i;
299 }
300
301 start=1; /* This is used to avoid multiplication etc
302 * when there is only the value '1' in the
303 * buffer. */
304 wvalue=0; /* The 'value' of the window */
305 wstart=bits-1; /* The top bit of the window */
306 wend=0; /* The bottom bit of the window */
307
308 if (!BN_one(r)) goto err;
309
310 for (;;)
311 {
312 if (BN_is_bit_set(p,wstart) == 0)
313 {
314 if (!start)
315 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
316 goto err;
317 if (wstart == 0) break;
318 wstart--;
319 continue;
320 }
321 /* We now have wstart on a 'set' bit, we now need to work out
322 * how bit a window to do. To do this we need to scan
323 * forward until the last set bit before the end of the
324 * window */
325 j=wstart;
326 wvalue=1;
327 wend=0;
328 for (i=1; i<window; i++)
329 {
330 if (wstart-i < 0) break;
331 if (BN_is_bit_set(p,wstart-i))
332 {
333 wvalue<<=(i-wend);
334 wvalue|=1;
335 wend=i;
336 }
337 }
338
339 /* wend is the size of the current window */
340 j=wend+1;
341 /* add the 'bytes above' */
342 if (!start)
343 for (i=0; i<j; i++)
344 {
345 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
346 goto err;
347 }
348
349 /* wvalue will be an odd number < 2^window */
350 if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
351 goto err;
352
353 /* move the 'window' down further */
354 wstart-=wend+1;
355 wvalue=0;
356 start=0;
357 if (wstart < 0) break;
358 }
359 ret=1;
360err:
361 BN_CTX_end(ctx);
362 for (i=0; i<ts; i++)
363 BN_clear_free(&(val[i]));
364 BN_RECP_CTX_free(&recp);
365 return(ret);
366 }
367
368
369int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
370 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
371 {
372 int i,j,bits,ret=0,wstart,wend,window,wvalue;
373 int start=1,ts=0;
374 BIGNUM *d,*r;
375 const BIGNUM *aa;
376 BIGNUM val[TABLE_SIZE];
377 BN_MONT_CTX *mont=NULL;
378
379 if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
380 {
381 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
382 }
383
384 bn_check_top(a);
385 bn_check_top(p);
386 bn_check_top(m);
387
388 if (!(m->d[0] & 1))
389 {
390 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
391 return(0);
392 }
393 bits=BN_num_bits(p);
394 if (bits == 0)
395 {
396 ret = BN_one(rr);
397 return ret;
398 }
399
400 BN_CTX_start(ctx);
401 d = BN_CTX_get(ctx);
402 r = BN_CTX_get(ctx);
403 if (d == NULL || r == NULL) goto err;
404
405 /* If this is not done, things will break in the montgomery
406 * part */
407
408 if (in_mont != NULL)
409 mont=in_mont;
410 else
411 {
412 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
413 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
414 }
415
416 BN_init(&val[0]);
417 ts=1;
418 if (a->neg || BN_ucmp(a,m) >= 0)
419 {
420 if (!BN_nnmod(&(val[0]),a,m,ctx))
421 goto err;
422 aa= &(val[0]);
423 }
424 else
425 aa=a;
426 if (BN_is_zero(aa))
427 {
428 ret = BN_zero(rr);
429 goto err;
430 }
431 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
432
433 window = BN_window_bits_for_exponent_size(bits);
434 if (window > 1)
435 {
436 if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
437 j=1<<(window-1);
438 for (i=1; i<j; i++)
439 {
440 BN_init(&(val[i]));
441 if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
442 goto err;
443 }
444 ts=i;
445 }
446
447 start=1; /* This is used to avoid multiplication etc
448 * when there is only the value '1' in the
449 * buffer. */
450 wvalue=0; /* The 'value' of the window */
451 wstart=bits-1; /* The top bit of the window */
452 wend=0; /* The bottom bit of the window */
453
454 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
455 for (;;)
456 {
457 if (BN_is_bit_set(p,wstart) == 0)
458 {
459 if (!start)
460 {
461 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
462 goto err;
463 }
464 if (wstart == 0) break;
465 wstart--;
466 continue;
467 }
468 /* We now have wstart on a 'set' bit, we now need to work out
469 * how bit a window to do. To do this we need to scan
470 * forward until the last set bit before the end of the
471 * window */
472 j=wstart;
473 wvalue=1;
474 wend=0;
475 for (i=1; i<window; i++)
476 {
477 if (wstart-i < 0) break;
478 if (BN_is_bit_set(p,wstart-i))
479 {
480 wvalue<<=(i-wend);
481 wvalue|=1;
482 wend=i;
483 }
484 }
485
486 /* wend is the size of the current window */
487 j=wend+1;
488 /* add the 'bytes above' */
489 if (!start)
490 for (i=0; i<j; i++)
491 {
492 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
493 goto err;
494 }
495
496 /* wvalue will be an odd number < 2^window */
497 if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
498 goto err;
499
500 /* move the 'window' down further */
501 wstart-=wend+1;
502 wvalue=0;
503 start=0;
504 if (wstart < 0) break;
505 }
506 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
507 ret=1;
508err:
509 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
510 BN_CTX_end(ctx);
511 for (i=0; i<ts; i++)
512 BN_clear_free(&(val[i]));
513 return(ret);
514 }
515
516
517/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
518 * so that accessing any of these table values shows the same access pattern as far
519 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
520 * from/to that table. */
521
522static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
523 {
524 size_t i, j;
525
526 if (bn_wexpand(b, top) == NULL)
527 return 0;
528 while (b->top < top)
529 {
530 b->d[b->top++] = 0;
531 }
532
533 for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
534 {
535 buf[j] = ((unsigned char*)b->d)[i];
536 }
537
538 bn_fix_top(b);
539 return 1;
540 }
541
542static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
543 {
544 size_t i, j;
545
546 if (bn_wexpand(b, top) == NULL)
547 return 0;
548
549 for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
550 {
551 ((unsigned char*)b->d)[i] = buf[j];
552 }
553
554 b->top = top;
555 bn_fix_top(b);
556 return 1;
557 }
558
559/* Given a pointer value, compute the next address that is a cache line multiple. */
560#define MOD_EXP_CTIME_ALIGN(x_) \
561 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
562
563/* This variant of BN_mod_exp_mont() uses fixed windows and the special
564 * precomputation memory layout to limit data-dependency to a minimum
565 * to protect secret exponents (cf. the hyper-threading timing attacks
566 * pointed out by Colin Percival,
567 * http://www.daemonology.net/hyperthreading-considered-harmful/)
568 */
569int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
570 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
571 {
572 int i,bits,ret=0,idx,window,wvalue;
573 int top;
574 BIGNUM *r;
575 const BIGNUM *aa;
576 BN_MONT_CTX *mont=NULL;
577
578 int numPowers;
579 unsigned char *powerbufFree=NULL;
580 int powerbufLen = 0;
581 unsigned char *powerbuf=NULL;
582 BIGNUM *computeTemp=NULL, *am=NULL;
583
584 bn_check_top(a);
585 bn_check_top(p);
586 bn_check_top(m);
587
588 top = m->top;
589
590 if (!(m->d[0] & 1))
591 {
592 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
593 return(0);
594 }
595 bits=BN_num_bits(p);
596 if (bits == 0)
597 {
598 ret = BN_one(rr);
599 return ret;
600 }
601
602 /* Initialize BIGNUM context and allocate intermediate result */
603 BN_CTX_start(ctx);
604 r = BN_CTX_get(ctx);
605 if (r == NULL) goto err;
606
607 /* Allocate a montgomery context if it was not supplied by the caller.
608 * If this is not done, things will break in the montgomery part.
609 */
610 if (in_mont != NULL)
611 mont=in_mont;
612 else
613 {
614 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
615 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
616 }
617
618 /* Get the window size to use with size of p. */
619 window = BN_window_bits_for_ctime_exponent_size(bits);
620
621 /* Allocate a buffer large enough to hold all of the pre-computed
622 * powers of a.
623 */
624 numPowers = 1 << window;
625 powerbufLen = sizeof(m->d[0])*top*numPowers;
626 if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
627 goto err;
628
629 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
630 memset(powerbuf, 0, powerbufLen);
631
632 /* Initialize the intermediate result. Do this early to save double conversion,
633 * once each for a^0 and intermediate result.
634 */
635 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
636 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
637
638 /* Initialize computeTemp as a^1 with montgomery precalcs */
639 computeTemp = BN_CTX_get(ctx);
640 am = BN_CTX_get(ctx);
641 if (computeTemp==NULL || am==NULL) goto err;
642
643 if (a->neg || BN_ucmp(a,m) >= 0)
644 {
645 if (!BN_mod(am,a,m,ctx))
646 goto err;
647 aa= am;
648 }
649 else
650 aa=a;
651 if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
652 if (!BN_copy(computeTemp, am)) goto err;
653 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
654
655 /* If the window size is greater than 1, then calculate
656 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
657 * (even powers could instead be computed as (a^(i/2))^2
658 * to use the slight performance advantage of sqr over mul).
659 */
660 if (window > 1)
661 {
662 for (i=2; i<numPowers; i++)
663 {
664 /* Calculate a^i = a^(i-1) * a */
665 if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
666 goto err;
667 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
668 }
669 }
670
671 /* Adjust the number of bits up to a multiple of the window size.
672 * If the exponent length is not a multiple of the window size, then
673 * this pads the most significant bits with zeros to normalize the
674 * scanning loop to there's no special cases.
675 *
676 * * NOTE: Making the window size a power of two less than the native
677 * * word size ensures that the padded bits won't go past the last
678 * * word in the internal BIGNUM structure. Going past the end will
679 * * still produce the correct result, but causes a different branch
680 * * to be taken in the BN_is_bit_set function.
681 */
682 bits = ((bits+window-1)/window)*window;
683 idx=bits-1; /* The top bit of the window */
684
685 /* Scan the exponent one window at a time starting from the most
686 * significant bits.
687 */
688 while (idx >= 0)
689 {
690 wvalue=0; /* The 'value' of the window */
691
692 /* Scan the window, squaring the result as we go */
693 for (i=0; i<window; i++,idx--)
694 {
695 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
696 wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
697 }
698
699 /* Fetch the appropriate pre-computed value from the pre-buf */
700 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
701
702 /* Multiply the result into the intermediate result */
703 if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
704 }
705
706 /* Convert the final result from montgomery to standard format */
707 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
708 ret=1;
709err:
710 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
711 if (powerbuf!=NULL)
712 {
713 OPENSSL_cleanse(powerbuf,powerbufLen);
714 OPENSSL_free(powerbufFree);
715 }
716 if (am!=NULL) BN_clear(am);
717 if (computeTemp!=NULL) BN_clear(computeTemp);
718 BN_CTX_end(ctx);
719 return(ret);
720 }
721
722int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
723 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
724 {
725 BN_MONT_CTX *mont = NULL;
726 int b, bits, ret=0;
727 int r_is_one;
728 BN_ULONG w, next_w;
729 BIGNUM *d, *r, *t;
730 BIGNUM *swap_tmp;
731#define BN_MOD_MUL_WORD(r, w, m) \
732 (BN_mul_word(r, (w)) && \
733 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
734 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
735 /* BN_MOD_MUL_WORD is only used with 'w' large,
736 * so the BN_ucmp test is probably more overhead
737 * than always using BN_mod (which uses BN_copy if
738 * a similar test returns true). */
739 /* We can use BN_mod and do not need BN_nnmod because our
740 * accumulator is never negative (the result of BN_mod does
741 * not depend on the sign of the modulus).
742 */
743#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
744 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
745
746 if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
747 {
748 /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
749 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
750 return -1;
751 }
752
753 bn_check_top(p);
754 bn_check_top(m);
755
756 if (m->top == 0 || !(m->d[0] & 1))
757 {
758 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
759 return(0);
760 }
761 if (m->top == 1)
762 a %= m->d[0]; /* make sure that 'a' is reduced */
763
764 bits = BN_num_bits(p);
765 if (bits == 0)
766 {
767 ret = BN_one(rr);
768 return ret;
769 }
770 if (a == 0)
771 {
772 ret = BN_zero(rr);
773 return ret;
774 }
775
776 BN_CTX_start(ctx);
777 d = BN_CTX_get(ctx);
778 r = BN_CTX_get(ctx);
779 t = BN_CTX_get(ctx);
780 if (d == NULL || r == NULL || t == NULL) goto err;
781
782 if (in_mont != NULL)
783 mont=in_mont;
784 else
785 {
786 if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
787 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
788 }
789
790 r_is_one = 1; /* except for Montgomery factor */
791
792 /* bits-1 >= 0 */
793
794 /* The result is accumulated in the product r*w. */
795 w = a; /* bit 'bits-1' of 'p' is always set */
796 for (b = bits-2; b >= 0; b--)
797 {
798 /* First, square r*w. */
799 next_w = w*w;
800 if ((next_w/w) != w) /* overflow */
801 {
802 if (r_is_one)
803 {
804 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
805 r_is_one = 0;
806 }
807 else
808 {
809 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
810 }
811 next_w = 1;
812 }
813 w = next_w;
814 if (!r_is_one)
815 {
816 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
817 }
818
819 /* Second, multiply r*w by 'a' if exponent bit is set. */
820 if (BN_is_bit_set(p, b))
821 {
822 next_w = w*a;
823 if ((next_w/a) != w) /* overflow */
824 {
825 if (r_is_one)
826 {
827 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
828 r_is_one = 0;
829 }
830 else
831 {
832 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
833 }
834 next_w = a;
835 }
836 w = next_w;
837 }
838 }
839
840 /* Finally, set r:=r*w. */
841 if (w != 1)
842 {
843 if (r_is_one)
844 {
845 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
846 r_is_one = 0;
847 }
848 else
849 {
850 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
851 }
852 }
853
854 if (r_is_one) /* can happen only if a == 1*/
855 {
856 if (!BN_one(rr)) goto err;
857 }
858 else
859 {
860 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
861 }
862 ret = 1;
863err:
864 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
865 BN_CTX_end(ctx);
866 return(ret);
867 }
868
869
870/* The old fallback, simple version :-) */
871int BN_mod_exp_simple(BIGNUM *r,
872 const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
873 BN_CTX *ctx)
874 {
875 int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
876 int start=1;
877 BIGNUM *d;
878 BIGNUM val[TABLE_SIZE];
879
880 if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
881 {
882 /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
883 BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
884 return -1;
885 }
886
887 bits=BN_num_bits(p);
888
889 if (bits == 0)
890 {
891 ret = BN_one(r);
892 return ret;
893 }
894
895 BN_CTX_start(ctx);
896 if ((d = BN_CTX_get(ctx)) == NULL) goto err;
897
898 BN_init(&(val[0]));
899 ts=1;
900 if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */
901 if (BN_is_zero(&(val[0])))
902 {
903 ret = BN_zero(r);
904 goto err;
905 }
906
907 window = BN_window_bits_for_exponent_size(bits);
908 if (window > 1)
909 {
910 if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
911 goto err; /* 2 */
912 j=1<<(window-1);
913 for (i=1; i<j; i++)
914 {
915 BN_init(&(val[i]));
916 if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
917 goto err;
918 }
919 ts=i;
920 }
921
922 start=1; /* This is used to avoid multiplication etc
923 * when there is only the value '1' in the
924 * buffer. */
925 wvalue=0; /* The 'value' of the window */
926 wstart=bits-1; /* The top bit of the window */
927 wend=0; /* The bottom bit of the window */
928
929 if (!BN_one(r)) goto err;
930
931 for (;;)
932 {
933 if (BN_is_bit_set(p,wstart) == 0)
934 {
935 if (!start)
936 if (!BN_mod_mul(r,r,r,m,ctx))
937 goto err;
938 if (wstart == 0) break;
939 wstart--;
940 continue;
941 }
942 /* We now have wstart on a 'set' bit, we now need to work out
943 * how bit a window to do. To do this we need to scan
944 * forward until the last set bit before the end of the
945 * window */
946 j=wstart;
947 wvalue=1;
948 wend=0;
949 for (i=1; i<window; i++)
950 {
951 if (wstart-i < 0) break;
952 if (BN_is_bit_set(p,wstart-i))
953 {
954 wvalue<<=(i-wend);
955 wvalue|=1;
956 wend=i;
957 }
958 }
959
960 /* wend is the size of the current window */
961 j=wend+1;
962 /* add the 'bytes above' */
963 if (!start)
964 for (i=0; i<j; i++)
965 {
966 if (!BN_mod_mul(r,r,r,m,ctx))
967 goto err;
968 }
969
970 /* wvalue will be an odd number < 2^window */
971 if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx))
972 goto err;
973
974 /* move the 'window' down further */
975 wstart-=wend+1;
976 wvalue=0;
977 start=0;
978 if (wstart < 0) break;
979 }
980 ret=1;
981err:
982 BN_CTX_end(ctx);
983 for (i=0; i<ts; i++)
984 BN_clear_free(&(val[i]));
985 return(ret);
986 }
987
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
new file mode 100644
index 0000000000..73ccd58a83
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_exp2.c
@@ -0,0 +1,313 @@
1/* crypto/bn/bn_exp2.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define TABLE_SIZE 32
117
118int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
119 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
120 BN_CTX *ctx, BN_MONT_CTX *in_mont)
121 {
122 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
123 int r_is_one=1,ts1=0,ts2=0;
124 BIGNUM *d,*r;
125 const BIGNUM *a_mod_m;
126 BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE];
127 BN_MONT_CTX *mont=NULL;
128
129 bn_check_top(a1);
130 bn_check_top(p1);
131 bn_check_top(a2);
132 bn_check_top(p2);
133 bn_check_top(m);
134
135 if (!(m->d[0] & 1))
136 {
137 BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
138 return(0);
139 }
140 bits1=BN_num_bits(p1);
141 bits2=BN_num_bits(p2);
142 if ((bits1 == 0) && (bits2 == 0))
143 {
144 ret = BN_one(rr);
145 return ret;
146 }
147
148 bits=(bits1 > bits2)?bits1:bits2;
149
150 BN_CTX_start(ctx);
151 d = BN_CTX_get(ctx);
152 r = BN_CTX_get(ctx);
153 if (d == NULL || r == NULL) goto err;
154
155 if (in_mont != NULL)
156 mont=in_mont;
157 else
158 {
159 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
160 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
161 }
162
163 window1 = BN_window_bits_for_exponent_size(bits1);
164 window2 = BN_window_bits_for_exponent_size(bits2);
165
166 /*
167 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
168 */
169 BN_init(&val1[0]);
170 ts1=1;
171 if (a1->neg || BN_ucmp(a1,m) >= 0)
172 {
173 if (!BN_mod(&(val1[0]),a1,m,ctx))
174 goto err;
175 a_mod_m = &(val1[0]);
176 }
177 else
178 a_mod_m = a1;
179 if (BN_is_zero(a_mod_m))
180 {
181 ret = BN_zero(rr);
182 goto err;
183 }
184
185 if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err;
186 if (window1 > 1)
187 {
188 if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err;
189
190 j=1<<(window1-1);
191 for (i=1; i<j; i++)
192 {
193 BN_init(&(val1[i]));
194 if (!BN_mod_mul_montgomery(&(val1[i]),&(val1[i-1]),d,mont,ctx))
195 goto err;
196 }
197 ts1=i;
198 }
199
200
201 /*
202 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
203 */
204 BN_init(&val2[0]);
205 ts2=1;
206 if (a2->neg || BN_ucmp(a2,m) >= 0)
207 {
208 if (!BN_mod(&(val2[0]),a2,m,ctx))
209 goto err;
210 a_mod_m = &(val2[0]);
211 }
212 else
213 a_mod_m = a2;
214 if (BN_is_zero(a_mod_m))
215 {
216 ret = BN_zero(rr);
217 goto err;
218 }
219 if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err;
220 if (window2 > 1)
221 {
222 if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err;
223
224 j=1<<(window2-1);
225 for (i=1; i<j; i++)
226 {
227 BN_init(&(val2[i]));
228 if (!BN_mod_mul_montgomery(&(val2[i]),&(val2[i-1]),d,mont,ctx))
229 goto err;
230 }
231 ts2=i;
232 }
233
234
235 /* Now compute the power product, using independent windows. */
236 r_is_one=1;
237 wvalue1=0; /* The 'value' of the first window */
238 wvalue2=0; /* The 'value' of the second window */
239 wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */
240 wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */
241
242 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
243 for (b=bits-1; b>=0; b--)
244 {
245 if (!r_is_one)
246 {
247 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
248 goto err;
249 }
250
251 if (!wvalue1)
252 if (BN_is_bit_set(p1, b))
253 {
254 /* consider bits b-window1+1 .. b for this window */
255 i = b-window1+1;
256 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
257 i++;
258 wpos1 = i;
259 wvalue1 = 1;
260 for (i = b-1; i >= wpos1; i--)
261 {
262 wvalue1 <<= 1;
263 if (BN_is_bit_set(p1, i))
264 wvalue1++;
265 }
266 }
267
268 if (!wvalue2)
269 if (BN_is_bit_set(p2, b))
270 {
271 /* consider bits b-window2+1 .. b for this window */
272 i = b-window2+1;
273 while (!BN_is_bit_set(p2, i))
274 i++;
275 wpos2 = i;
276 wvalue2 = 1;
277 for (i = b-1; i >= wpos2; i--)
278 {
279 wvalue2 <<= 1;
280 if (BN_is_bit_set(p2, i))
281 wvalue2++;
282 }
283 }
284
285 if (wvalue1 && b == wpos1)
286 {
287 /* wvalue1 is odd and < 2^window1 */
288 if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx))
289 goto err;
290 wvalue1 = 0;
291 r_is_one = 0;
292 }
293
294 if (wvalue2 && b == wpos2)
295 {
296 /* wvalue2 is odd and < 2^window2 */
297 if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx))
298 goto err;
299 wvalue2 = 0;
300 r_is_one = 0;
301 }
302 }
303 BN_from_montgomery(rr,r,mont,ctx);
304 ret=1;
305err:
306 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
307 BN_CTX_end(ctx);
308 for (i=0; i<ts1; i++)
309 BN_clear_free(&(val1[i]));
310 for (i=0; i<ts2; i++)
311 BN_clear_free(&(val2[i]));
312 return(ret);
313 }
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
new file mode 100644
index 0000000000..7649f63fd2
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_gcd.c
@@ -0,0 +1,490 @@
1/* crypto/bn/bn_gcd.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "bn_lcl.h"
114
115static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
116
117int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
118 {
119 BIGNUM *a,*b,*t;
120 int ret=0;
121
122 bn_check_top(in_a);
123 bn_check_top(in_b);
124
125 BN_CTX_start(ctx);
126 a = BN_CTX_get(ctx);
127 b = BN_CTX_get(ctx);
128 if (a == NULL || b == NULL) goto err;
129
130 if (BN_copy(a,in_a) == NULL) goto err;
131 if (BN_copy(b,in_b) == NULL) goto err;
132 a->neg = 0;
133 b->neg = 0;
134
135 if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; }
136 t=euclid(a,b);
137 if (t == NULL) goto err;
138
139 if (BN_copy(r,t) == NULL) goto err;
140 ret=1;
141err:
142 BN_CTX_end(ctx);
143 return(ret);
144 }
145
146static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
147 {
148 BIGNUM *t;
149 int shifts=0;
150
151 bn_check_top(a);
152 bn_check_top(b);
153
154 /* 0 <= b <= a */
155 while (!BN_is_zero(b))
156 {
157 /* 0 < b <= a */
158
159 if (BN_is_odd(a))
160 {
161 if (BN_is_odd(b))
162 {
163 if (!BN_sub(a,a,b)) goto err;
164 if (!BN_rshift1(a,a)) goto err;
165 if (BN_cmp(a,b) < 0)
166 { t=a; a=b; b=t; }
167 }
168 else /* a odd - b even */
169 {
170 if (!BN_rshift1(b,b)) goto err;
171 if (BN_cmp(a,b) < 0)
172 { t=a; a=b; b=t; }
173 }
174 }
175 else /* a is even */
176 {
177 if (BN_is_odd(b))
178 {
179 if (!BN_rshift1(a,a)) goto err;
180 if (BN_cmp(a,b) < 0)
181 { t=a; a=b; b=t; }
182 }
183 else /* a even - b even */
184 {
185 if (!BN_rshift1(a,a)) goto err;
186 if (!BN_rshift1(b,b)) goto err;
187 shifts++;
188 }
189 }
190 /* 0 <= b <= a */
191 }
192
193 if (shifts)
194 {
195 if (!BN_lshift(a,a,shifts)) goto err;
196 }
197 return(a);
198err:
199 return(NULL);
200 }
201
202
203/* solves ax == 1 (mod n) */
204BIGNUM *BN_mod_inverse(BIGNUM *in,
205 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
206 {
207 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
208 BIGNUM *ret=NULL;
209 int sign;
210
211 bn_check_top(a);
212 bn_check_top(n);
213
214 BN_CTX_start(ctx);
215 A = BN_CTX_get(ctx);
216 B = BN_CTX_get(ctx);
217 X = BN_CTX_get(ctx);
218 D = BN_CTX_get(ctx);
219 M = BN_CTX_get(ctx);
220 Y = BN_CTX_get(ctx);
221 T = BN_CTX_get(ctx);
222 if (T == NULL) goto err;
223
224 if (in == NULL)
225 R=BN_new();
226 else
227 R=in;
228 if (R == NULL) goto err;
229
230 BN_one(X);
231 BN_zero(Y);
232 if (BN_copy(B,a) == NULL) goto err;
233 if (BN_copy(A,n) == NULL) goto err;
234 A->neg = 0;
235 if (B->neg || (BN_ucmp(B, A) >= 0))
236 {
237 if (!BN_nnmod(B, B, A, ctx)) goto err;
238 }
239 sign = -1;
240 /* From B = a mod |n|, A = |n| it follows that
241 *
242 * 0 <= B < A,
243 * -sign*X*a == B (mod |n|),
244 * sign*Y*a == A (mod |n|).
245 */
246
247 if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
248 {
249 /* Binary inversion algorithm; requires odd modulus.
250 * This is faster than the general algorithm if the modulus
251 * is sufficiently small (about 400 .. 500 bits on 32-bit
252 * sytems, but much more on 64-bit systems) */
253 int shift;
254
255 while (!BN_is_zero(B))
256 {
257 /*
258 * 0 < B < |n|,
259 * 0 < A <= |n|,
260 * (1) -sign*X*a == B (mod |n|),
261 * (2) sign*Y*a == A (mod |n|)
262 */
263
264 /* Now divide B by the maximum possible power of two in the integers,
265 * and divide X by the same value mod |n|.
266 * When we're done, (1) still holds. */
267 shift = 0;
268 while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
269 {
270 shift++;
271
272 if (BN_is_odd(X))
273 {
274 if (!BN_uadd(X, X, n)) goto err;
275 }
276 /* now X is even, so we can easily divide it by two */
277 if (!BN_rshift1(X, X)) goto err;
278 }
279 if (shift > 0)
280 {
281 if (!BN_rshift(B, B, shift)) goto err;
282 }
283
284
285 /* Same for A and Y. Afterwards, (2) still holds. */
286 shift = 0;
287 while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
288 {
289 shift++;
290
291 if (BN_is_odd(Y))
292 {
293 if (!BN_uadd(Y, Y, n)) goto err;
294 }
295 /* now Y is even */
296 if (!BN_rshift1(Y, Y)) goto err;
297 }
298 if (shift > 0)
299 {
300 if (!BN_rshift(A, A, shift)) goto err;
301 }
302
303
304 /* We still have (1) and (2).
305 * Both A and B are odd.
306 * The following computations ensure that
307 *
308 * 0 <= B < |n|,
309 * 0 < A < |n|,
310 * (1) -sign*X*a == B (mod |n|),
311 * (2) sign*Y*a == A (mod |n|),
312 *
313 * and that either A or B is even in the next iteration.
314 */
315 if (BN_ucmp(B, A) >= 0)
316 {
317 /* -sign*(X + Y)*a == B - A (mod |n|) */
318 if (!BN_uadd(X, X, Y)) goto err;
319 /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
320 * actually makes the algorithm slower */
321 if (!BN_usub(B, B, A)) goto err;
322 }
323 else
324 {
325 /* sign*(X + Y)*a == A - B (mod |n|) */
326 if (!BN_uadd(Y, Y, X)) goto err;
327 /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
328 if (!BN_usub(A, A, B)) goto err;
329 }
330 }
331 }
332 else
333 {
334 /* general inversion algorithm */
335
336 while (!BN_is_zero(B))
337 {
338 BIGNUM *tmp;
339
340 /*
341 * 0 < B < A,
342 * (*) -sign*X*a == B (mod |n|),
343 * sign*Y*a == A (mod |n|)
344 */
345
346 /* (D, M) := (A/B, A%B) ... */
347 if (BN_num_bits(A) == BN_num_bits(B))
348 {
349 if (!BN_one(D)) goto err;
350 if (!BN_sub(M,A,B)) goto err;
351 }
352 else if (BN_num_bits(A) == BN_num_bits(B) + 1)
353 {
354 /* A/B is 1, 2, or 3 */
355 if (!BN_lshift1(T,B)) goto err;
356 if (BN_ucmp(A,T) < 0)
357 {
358 /* A < 2*B, so D=1 */
359 if (!BN_one(D)) goto err;
360 if (!BN_sub(M,A,B)) goto err;
361 }
362 else
363 {
364 /* A >= 2*B, so D=2 or D=3 */
365 if (!BN_sub(M,A,T)) goto err;
366 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */
367 if (BN_ucmp(A,D) < 0)
368 {
369 /* A < 3*B, so D=2 */
370 if (!BN_set_word(D,2)) goto err;
371 /* M (= A - 2*B) already has the correct value */
372 }
373 else
374 {
375 /* only D=3 remains */
376 if (!BN_set_word(D,3)) goto err;
377 /* currently M = A - 2*B, but we need M = A - 3*B */
378 if (!BN_sub(M,M,B)) goto err;
379 }
380 }
381 }
382 else
383 {
384 if (!BN_div(D,M,A,B,ctx)) goto err;
385 }
386
387 /* Now
388 * A = D*B + M;
389 * thus we have
390 * (**) sign*Y*a == D*B + M (mod |n|).
391 */
392
393 tmp=A; /* keep the BIGNUM object, the value does not matter */
394
395 /* (A, B) := (B, A mod B) ... */
396 A=B;
397 B=M;
398 /* ... so we have 0 <= B < A again */
399
400 /* Since the former M is now B and the former B is now A,
401 * (**) translates into
402 * sign*Y*a == D*A + B (mod |n|),
403 * i.e.
404 * sign*Y*a - D*A == B (mod |n|).
405 * Similarly, (*) translates into
406 * -sign*X*a == A (mod |n|).
407 *
408 * Thus,
409 * sign*Y*a + D*sign*X*a == B (mod |n|),
410 * i.e.
411 * sign*(Y + D*X)*a == B (mod |n|).
412 *
413 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
414 * -sign*X*a == B (mod |n|),
415 * sign*Y*a == A (mod |n|).
416 * Note that X and Y stay non-negative all the time.
417 */
418
419 /* most of the time D is very small, so we can optimize tmp := D*X+Y */
420 if (BN_is_one(D))
421 {
422 if (!BN_add(tmp,X,Y)) goto err;
423 }
424 else
425 {
426 if (BN_is_word(D,2))
427 {
428 if (!BN_lshift1(tmp,X)) goto err;
429 }
430 else if (BN_is_word(D,4))
431 {
432 if (!BN_lshift(tmp,X,2)) goto err;
433 }
434 else if (D->top == 1)
435 {
436 if (!BN_copy(tmp,X)) goto err;
437 if (!BN_mul_word(tmp,D->d[0])) goto err;
438 }
439 else
440 {
441 if (!BN_mul(tmp,D,X,ctx)) goto err;
442 }
443 if (!BN_add(tmp,tmp,Y)) goto err;
444 }
445
446 M=Y; /* keep the BIGNUM object, the value does not matter */
447 Y=X;
448 X=tmp;
449 sign = -sign;
450 }
451 }
452
453 /*
454 * The while loop (Euclid's algorithm) ends when
455 * A == gcd(a,n);
456 * we have
457 * sign*Y*a == A (mod |n|),
458 * where Y is non-negative.
459 */
460
461 if (sign < 0)
462 {
463 if (!BN_sub(Y,n,Y)) goto err;
464 }
465 /* Now Y*a == A (mod |n|). */
466
467
468 if (BN_is_one(A))
469 {
470 /* Y*a == 1 (mod |n|) */
471 if (!Y->neg && BN_ucmp(Y,n) < 0)
472 {
473 if (!BN_copy(R,Y)) goto err;
474 }
475 else
476 {
477 if (!BN_nnmod(R,Y,n,ctx)) goto err;
478 }
479 }
480 else
481 {
482 BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE);
483 goto err;
484 }
485 ret=R;
486err:
487 if ((ret == NULL) && (in == NULL)) BN_free(R);
488 BN_CTX_end(ctx);
489 return(ret);
490 }
diff --git a/src/lib/libcrypto/bn/bn_kron.c b/src/lib/libcrypto/bn/bn_kron.c
new file mode 100644
index 0000000000..49f75594ae
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_kron.c
@@ -0,0 +1,182 @@
1/* crypto/bn/bn_kron.c */
2/* ====================================================================
3 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include "bn_lcl.h"
57
58
59/* least significant word */
60#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
61
62/* Returns -2 for errors because both -1 and 0 are valid results. */
63int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
64 {
65 int i;
66 int ret = -2; /* avoid 'uninitialized' warning */
67 int err = 0;
68 BIGNUM *A, *B, *tmp;
69 /* In 'tab', only odd-indexed entries are relevant:
70 * For any odd BIGNUM n,
71 * tab[BN_lsw(n) & 7]
72 * is $(-1)^{(n^2-1)/8}$ (using TeX notation).
73 * Note that the sign of n does not matter.
74 */
75 static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
76
77 BN_CTX_start(ctx);
78 A = BN_CTX_get(ctx);
79 B = BN_CTX_get(ctx);
80 if (B == NULL) goto end;
81
82 err = !BN_copy(A, a);
83 if (err) goto end;
84 err = !BN_copy(B, b);
85 if (err) goto end;
86
87 /*
88 * Kronecker symbol, imlemented according to Henri Cohen,
89 * "A Course in Computational Algebraic Number Theory"
90 * (algorithm 1.4.10).
91 */
92
93 /* Cohen's step 1: */
94
95 if (BN_is_zero(B))
96 {
97 ret = BN_abs_is_word(A, 1);
98 goto end;
99 }
100
101 /* Cohen's step 2: */
102
103 if (!BN_is_odd(A) && !BN_is_odd(B))
104 {
105 ret = 0;
106 goto end;
107 }
108
109 /* now B is non-zero */
110 i = 0;
111 while (!BN_is_bit_set(B, i))
112 i++;
113 err = !BN_rshift(B, B, i);
114 if (err) goto end;
115 if (i & 1)
116 {
117 /* i is odd */
118 /* (thus B was even, thus A must be odd!) */
119
120 /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
121 ret = tab[BN_lsw(A) & 7];
122 }
123 else
124 {
125 /* i is even */
126 ret = 1;
127 }
128
129 if (B->neg)
130 {
131 B->neg = 0;
132 if (A->neg)
133 ret = -ret;
134 }
135
136 /* now B is positive and odd, so what remains to be done is
137 * to compute the Jacobi symbol (A/B) and multiply it by 'ret' */
138
139 while (1)
140 {
141 /* Cohen's step 3: */
142
143 /* B is positive and odd */
144
145 if (BN_is_zero(A))
146 {
147 ret = BN_is_one(B) ? ret : 0;
148 goto end;
149 }
150
151 /* now A is non-zero */
152 i = 0;
153 while (!BN_is_bit_set(A, i))
154 i++;
155 err = !BN_rshift(A, A, i);
156 if (err) goto end;
157 if (i & 1)
158 {
159 /* i is odd */
160 /* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
161 ret = ret * tab[BN_lsw(B) & 7];
162 }
163
164 /* Cohen's step 4: */
165 /* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
166 if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
167 ret = -ret;
168
169 /* (A, B) := (B mod |A|, |A|) */
170 err = !BN_nnmod(B, B, A, ctx);
171 if (err) goto end;
172 tmp = A; A = B; B = tmp;
173 tmp->neg = 0;
174 }
175
176 end:
177 BN_CTX_end(ctx);
178 if (err)
179 return -2;
180 else
181 return ret;
182 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
new file mode 100644
index 0000000000..a84998f2bd
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_lcl.h
@@ -0,0 +1,492 @@
1/* crypto/bn/bn_lcl.h */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#ifndef HEADER_BN_LCL_H
113#define HEADER_BN_LCL_H
114
115#include <openssl/bn.h>
116
117#ifdef __cplusplus
118extern "C" {
119#endif
120
121
122/* Used for temp variables */
123#define BN_CTX_NUM 32
124#define BN_CTX_NUM_POS 12
125struct bignum_ctx
126 {
127 int tos;
128 BIGNUM bn[BN_CTX_NUM];
129 int flags;
130 int depth;
131 int pos[BN_CTX_NUM_POS];
132 int too_many;
133 } /* BN_CTX */;
134
135
136/*
137 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
138 *
139 *
140 * For window size 'w' (w >= 2) and a random 'b' bits exponent,
141 * the number of multiplications is a constant plus on average
142 *
143 * 2^(w-1) + (b-w)/(w+1);
144 *
145 * here 2^(w-1) is for precomputing the table (we actually need
146 * entries only for windows that have the lowest bit set), and
147 * (b-w)/(w+1) is an approximation for the expected number of
148 * w-bit windows, not counting the first one.
149 *
150 * Thus we should use
151 *
152 * w >= 6 if b > 671
153 * w = 5 if 671 > b > 239
154 * w = 4 if 239 > b > 79
155 * w = 3 if 79 > b > 23
156 * w <= 2 if 23 > b
157 *
158 * (with draws in between). Very small exponents are often selected
159 * with low Hamming weight, so we use w = 1 for b <= 23.
160 */
161#if 1
162#define BN_window_bits_for_exponent_size(b) \
163 ((b) > 671 ? 6 : \
164 (b) > 239 ? 5 : \
165 (b) > 79 ? 4 : \
166 (b) > 23 ? 3 : 1)
167#else
168/* Old SSLeay/OpenSSL table.
169 * Maximum window size was 5, so this table differs for b==1024;
170 * but it coincides for other interesting values (b==160, b==512).
171 */
172#define BN_window_bits_for_exponent_size(b) \
173 ((b) > 255 ? 5 : \
174 (b) > 127 ? 4 : \
175 (b) > 17 ? 3 : 1)
176#endif
177
178
179
180/* BN_mod_exp_mont_conttime is based on the assumption that the
181 * L1 data cache line width of the target processor is at least
182 * the following value.
183 */
184#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
185#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
186
187/* Window sizes optimized for fixed window size modular exponentiation
188 * algorithm (BN_mod_exp_mont_consttime).
189 *
190 * To achieve the security goals of BN_mode_exp_mont_consttime, the
191 * maximum size of the window must not exceed
192 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
193 *
194 * Window size thresholds are defined for cache line sizes of 32 and 64,
195 * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
196 * window size of 7 should only be used on processors that have a 128
197 * byte or greater cache line size.
198 */
199#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
200
201# define BN_window_bits_for_ctime_exponent_size(b) \
202 ((b) > 937 ? 6 : \
203 (b) > 306 ? 5 : \
204 (b) > 89 ? 4 : \
205 (b) > 22 ? 3 : 1)
206# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
207
208#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
209
210# define BN_window_bits_for_ctime_exponent_size(b) \
211 ((b) > 306 ? 5 : \
212 (b) > 89 ? 4 : \
213 (b) > 22 ? 3 : 1)
214# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
215
216#endif
217
218
219/* Pentium pro 16,16,16,32,64 */
220/* Alpha 16,16,16,16.64 */
221#define BN_MULL_SIZE_NORMAL (16) /* 32 */
222#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
223#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
224#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
225#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
226
227#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
228/*
229 * BN_UMULT_HIGH section.
230 *
231 * No, I'm not trying to overwhelm you when stating that the
232 * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
233 * you to be impressed when I say that if the compiler doesn't
234 * support 2*N integer type, then you have to replace every N*N
235 * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
236 * and additions which unavoidably results in severe performance
237 * penalties. Of course provided that the hardware is capable of
238 * producing 2*N result... That's when you normally start
239 * considering assembler implementation. However! It should be
240 * pointed out that some CPUs (most notably Alpha, PowerPC and
241 * upcoming IA-64 family:-) provide *separate* instruction
242 * calculating the upper half of the product placing the result
243 * into a general purpose register. Now *if* the compiler supports
244 * inline assembler, then it's not impossible to implement the
245 * "bignum" routines (and have the compiler optimize 'em)
246 * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
247 * macro is about:-)
248 *
249 * <appro@fy.chalmers.se>
250 */
251# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
252# if defined(__DECC)
253# include <c_asm.h>
254# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
255# elif defined(__GNUC__)
256# define BN_UMULT_HIGH(a,b) ({ \
257 register BN_ULONG ret; \
258 asm ("umulh %1,%2,%0" \
259 : "=r"(ret) \
260 : "r"(a), "r"(b)); \
261 ret; })
262# endif /* compiler */
263# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
264# if defined(__GNUC__)
265# define BN_UMULT_HIGH(a,b) ({ \
266 register BN_ULONG ret; \
267 asm ("mulhdu %0,%1,%2" \
268 : "=r"(ret) \
269 : "r"(a), "r"(b)); \
270 ret; })
271# endif /* compiler */
272# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
273# if defined(__GNUC__)
274# define BN_UMULT_HIGH(a,b) ({ \
275 register BN_ULONG ret,discard; \
276 asm ("mulq %3" \
277 : "=a"(discard),"=d"(ret) \
278 : "a"(a), "g"(b) \
279 : "cc"); \
280 ret; })
281# define BN_UMULT_LOHI(low,high,a,b) \
282 asm ("mulq %3" \
283 : "=a"(low),"=d"(high) \
284 : "a"(a),"g"(b) \
285 : "cc");
286# endif
287# endif /* cpu */
288#endif /* OPENSSL_NO_ASM */
289
290/*************************************************************
291 * Using the long long type
292 */
293#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
294#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
295
296/* This is used for internal error checking and is not normally used */
297#ifdef BN_DEBUG
298# include <assert.h>
299# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax);
300#else
301# define bn_check_top(a)
302#endif
303
304/* This macro is to add extra stuff for development checking */
305#ifdef BN_DEBUG
306#define bn_set_max(r) ((r)->max=(r)->top,BN_set_flags((r),BN_FLG_STATIC_DATA))
307#else
308#define bn_set_max(r)
309#endif
310
311/* These macros are used to 'take' a section of a bignum for read only use */
312#define bn_set_low(r,a,n) \
313 { \
314 (r)->top=((a)->top > (n))?(n):(a)->top; \
315 (r)->d=(a)->d; \
316 (r)->neg=(a)->neg; \
317 (r)->flags|=BN_FLG_STATIC_DATA; \
318 bn_set_max(r); \
319 }
320
321#define bn_set_high(r,a,n) \
322 { \
323 if ((a)->top > (n)) \
324 { \
325 (r)->top=(a)->top-n; \
326 (r)->d= &((a)->d[n]); \
327 } \
328 else \
329 (r)->top=0; \
330 (r)->neg=(a)->neg; \
331 (r)->flags|=BN_FLG_STATIC_DATA; \
332 bn_set_max(r); \
333 }
334
335#ifdef BN_LLONG
336#define mul_add(r,a,w,c) { \
337 BN_ULLONG t; \
338 t=(BN_ULLONG)w * (a) + (r) + (c); \
339 (r)= Lw(t); \
340 (c)= Hw(t); \
341 }
342
343#define mul(r,a,w,c) { \
344 BN_ULLONG t; \
345 t=(BN_ULLONG)w * (a) + (c); \
346 (r)= Lw(t); \
347 (c)= Hw(t); \
348 }
349
350#define sqr(r0,r1,a) { \
351 BN_ULLONG t; \
352 t=(BN_ULLONG)(a)*(a); \
353 (r0)=Lw(t); \
354 (r1)=Hw(t); \
355 }
356
357#elif defined(BN_UMULT_HIGH)
358#define mul_add(r,a,w,c) { \
359 BN_ULONG high,low,ret,tmp=(a); \
360 ret = (r); \
361 high= BN_UMULT_HIGH(w,tmp); \
362 ret += (c); \
363 low = (w) * tmp; \
364 (c) = (ret<(c))?1:0; \
365 (c) += high; \
366 ret += low; \
367 (c) += (ret<low)?1:0; \
368 (r) = ret; \
369 }
370
371#define mul(r,a,w,c) { \
372 BN_ULONG high,low,ret,ta=(a); \
373 low = (w) * ta; \
374 high= BN_UMULT_HIGH(w,ta); \
375 ret = low + (c); \
376 (c) = high; \
377 (c) += (ret<low)?1:0; \
378 (r) = ret; \
379 }
380
381#define sqr(r0,r1,a) { \
382 BN_ULONG tmp=(a); \
383 (r0) = tmp * tmp; \
384 (r1) = BN_UMULT_HIGH(tmp,tmp); \
385 }
386
387#else
388/*************************************************************
389 * No long long type
390 */
391
392#define LBITS(a) ((a)&BN_MASK2l)
393#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
394#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
395
396#define LLBITS(a) ((a)&BN_MASKl)
397#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
398#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
399
400#define mul64(l,h,bl,bh) \
401 { \
402 BN_ULONG m,m1,lt,ht; \
403 \
404 lt=l; \
405 ht=h; \
406 m =(bh)*(lt); \
407 lt=(bl)*(lt); \
408 m1=(bl)*(ht); \
409 ht =(bh)*(ht); \
410 m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
411 ht+=HBITS(m); \
412 m1=L2HBITS(m); \
413 lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
414 (l)=lt; \
415 (h)=ht; \
416 }
417
418#define sqr64(lo,ho,in) \
419 { \
420 BN_ULONG l,h,m; \
421 \
422 h=(in); \
423 l=LBITS(h); \
424 h=HBITS(h); \
425 m =(l)*(h); \
426 l*=l; \
427 h*=h; \
428 h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
429 m =(m&BN_MASK2l)<<(BN_BITS4+1); \
430 l=(l+m)&BN_MASK2; if (l < m) h++; \
431 (lo)=l; \
432 (ho)=h; \
433 }
434
435#define mul_add(r,a,bl,bh,c) { \
436 BN_ULONG l,h; \
437 \
438 h= (a); \
439 l=LBITS(h); \
440 h=HBITS(h); \
441 mul64(l,h,(bl),(bh)); \
442 \
443 /* non-multiply part */ \
444 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
445 (c)=(r); \
446 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
447 (c)=h&BN_MASK2; \
448 (r)=l; \
449 }
450
451#define mul(r,a,bl,bh,c) { \
452 BN_ULONG l,h; \
453 \
454 h= (a); \
455 l=LBITS(h); \
456 h=HBITS(h); \
457 mul64(l,h,(bl),(bh)); \
458 \
459 /* non-multiply part */ \
460 l+=(c); if ((l&BN_MASK2) < (c)) h++; \
461 (c)=h&BN_MASK2; \
462 (r)=l&BN_MASK2; \
463 }
464#endif /* !BN_LLONG */
465
466void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
467void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
468void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
469void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
470void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
471void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
472int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
473int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
474 int cl, int dl);
475#ifdef BN_RECURSION
476void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
477 BN_ULONG *t);
478void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
479 int n, BN_ULONG *t);
480void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
481 BN_ULONG *t);
482void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
483 BN_ULONG *t);
484void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
485#endif
486void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
487
488#ifdef __cplusplus
489}
490#endif
491
492#endif
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
new file mode 100644
index 0000000000..e1660450bc
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -0,0 +1,824 @@
1/* crypto/bn/bn_lib.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
65#include <limits.h>
66#include <stdio.h>
67#include "cryptlib.h"
68#include "bn_lcl.h"
69
70const char *BN_version="Big Number" OPENSSL_VERSION_PTEXT;
71
72/* For a 32 bit machine
73 * 2 - 4 == 128
74 * 3 - 8 == 256
75 * 4 - 16 == 512
76 * 5 - 32 == 1024
77 * 6 - 64 == 2048
78 * 7 - 128 == 4096
79 * 8 - 256 == 8192
80 */
81static int bn_limit_bits=0;
82static int bn_limit_num=8; /* (1<<bn_limit_bits) */
83static int bn_limit_bits_low=0;
84static int bn_limit_num_low=8; /* (1<<bn_limit_bits_low) */
85static int bn_limit_bits_high=0;
86static int bn_limit_num_high=8; /* (1<<bn_limit_bits_high) */
87static int bn_limit_bits_mont=0;
88static int bn_limit_num_mont=8; /* (1<<bn_limit_bits_mont) */
89
90void BN_set_params(int mult, int high, int low, int mont)
91 {
92 if (mult >= 0)
93 {
94 if (mult > (sizeof(int)*8)-1)
95 mult=sizeof(int)*8-1;
96 bn_limit_bits=mult;
97 bn_limit_num=1<<mult;
98 }
99 if (high >= 0)
100 {
101 if (high > (sizeof(int)*8)-1)
102 high=sizeof(int)*8-1;
103 bn_limit_bits_high=high;
104 bn_limit_num_high=1<<high;
105 }
106 if (low >= 0)
107 {
108 if (low > (sizeof(int)*8)-1)
109 low=sizeof(int)*8-1;
110 bn_limit_bits_low=low;
111 bn_limit_num_low=1<<low;
112 }
113 if (mont >= 0)
114 {
115 if (mont > (sizeof(int)*8)-1)
116 mont=sizeof(int)*8-1;
117 bn_limit_bits_mont=mont;
118 bn_limit_num_mont=1<<mont;
119 }
120 }
121
122int BN_get_params(int which)
123 {
124 if (which == 0) return(bn_limit_bits);
125 else if (which == 1) return(bn_limit_bits_high);
126 else if (which == 2) return(bn_limit_bits_low);
127 else if (which == 3) return(bn_limit_bits_mont);
128 else return(0);
129 }
130
131const BIGNUM *BN_value_one(void)
132 {
133 static BN_ULONG data_one=1L;
134 static BIGNUM const_one={&data_one,1,1,0};
135
136 return(&const_one);
137 }
138
139char *BN_options(void)
140 {
141 static int init=0;
142 static char data[16];
143
144 if (!init)
145 {
146 init++;
147#ifdef BN_LLONG
148 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
149 (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
150#else
151 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
152 (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
153#endif
154 }
155 return(data);
156 }
157
158int BN_num_bits_word(BN_ULONG l)
159 {
160 static const char bits[256]={
161 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
162 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
163 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
164 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
165 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
166 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
167 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
168 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
169 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
170 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
171 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
172 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
173 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
174 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
175 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
176 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
177 };
178
179#if defined(SIXTY_FOUR_BIT_LONG)
180 if (l & 0xffffffff00000000L)
181 {
182 if (l & 0xffff000000000000L)
183 {
184 if (l & 0xff00000000000000L)
185 {
186 return(bits[(int)(l>>56)]+56);
187 }
188 else return(bits[(int)(l>>48)]+48);
189 }
190 else
191 {
192 if (l & 0x0000ff0000000000L)
193 {
194 return(bits[(int)(l>>40)]+40);
195 }
196 else return(bits[(int)(l>>32)]+32);
197 }
198 }
199 else
200#else
201#ifdef SIXTY_FOUR_BIT
202 if (l & 0xffffffff00000000LL)
203 {
204 if (l & 0xffff000000000000LL)
205 {
206 if (l & 0xff00000000000000LL)
207 {
208 return(bits[(int)(l>>56)]+56);
209 }
210 else return(bits[(int)(l>>48)]+48);
211 }
212 else
213 {
214 if (l & 0x0000ff0000000000LL)
215 {
216 return(bits[(int)(l>>40)]+40);
217 }
218 else return(bits[(int)(l>>32)]+32);
219 }
220 }
221 else
222#endif
223#endif
224 {
225#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
226 if (l & 0xffff0000L)
227 {
228 if (l & 0xff000000L)
229 return(bits[(int)(l>>24L)]+24);
230 else return(bits[(int)(l>>16L)]+16);
231 }
232 else
233#endif
234 {
235#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
236 if (l & 0xff00L)
237 return(bits[(int)(l>>8)]+8);
238 else
239#endif
240 return(bits[(int)(l )] );
241 }
242 }
243 }
244
245int BN_num_bits(const BIGNUM *a)
246 {
247 BN_ULONG l;
248 int i;
249
250 bn_check_top(a);
251
252 if (a->top == 0) return(0);
253 l=a->d[a->top-1];
254 assert(l != 0);
255 i=(a->top-1)*BN_BITS2;
256 return(i+BN_num_bits_word(l));
257 }
258
259void BN_clear_free(BIGNUM *a)
260 {
261 int i;
262
263 if (a == NULL) return;
264 if (a->d != NULL)
265 {
266 OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0]));
267 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
268 OPENSSL_free(a->d);
269 }
270 i=BN_get_flags(a,BN_FLG_MALLOCED);
271 OPENSSL_cleanse(a,sizeof(BIGNUM));
272 if (i)
273 OPENSSL_free(a);
274 }
275
276void BN_free(BIGNUM *a)
277 {
278 if (a == NULL) return;
279 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
280 OPENSSL_free(a->d);
281 a->flags|=BN_FLG_FREE; /* REMOVE? */
282 if (a->flags & BN_FLG_MALLOCED)
283 OPENSSL_free(a);
284 }
285
286void BN_init(BIGNUM *a)
287 {
288 memset(a,0,sizeof(BIGNUM));
289 }
290
291BIGNUM *BN_new(void)
292 {
293 BIGNUM *ret;
294
295 if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL)
296 {
297 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
298 return(NULL);
299 }
300 ret->flags=BN_FLG_MALLOCED;
301 ret->top=0;
302 ret->neg=0;
303 ret->dmax=0;
304 ret->d=NULL;
305 return(ret);
306 }
307
308/* This is used both by bn_expand2() and bn_dup_expand() */
309/* The caller MUST check that words > b->dmax before calling this */
310static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
311 {
312 BN_ULONG *A,*a = NULL;
313 const BN_ULONG *B;
314 int i;
315
316 if (words > (INT_MAX/(4*BN_BITS2)))
317 {
318 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG);
319 return NULL;
320 }
321
322 bn_check_top(b);
323 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
324 {
325 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
326 return(NULL);
327 }
328 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
329 if (A == NULL)
330 {
331 BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
332 return(NULL);
333 }
334#if 1
335 B=b->d;
336 /* Check if the previous number needs to be copied */
337 if (B != NULL)
338 {
339 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
340 {
341 /*
342 * The fact that the loop is unrolled
343 * 4-wise is a tribute to Intel. It's
344 * the one that doesn't have enough
345 * registers to accomodate more data.
346 * I'd unroll it 8-wise otherwise:-)
347 *
348 * <appro@fy.chalmers.se>
349 */
350 BN_ULONG a0,a1,a2,a3;
351 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
352 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
353 }
354 switch (b->top&3)
355 {
356 case 3: A[2]=B[2];
357 case 2: A[1]=B[1];
358 case 1: A[0]=B[0];
359 case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
360 * the switch table by doing a=top&3; a--; goto jump_table[a];
361 * which fails for top== 0 */
362 ;
363 }
364 }
365
366 /* Now need to zero any data between b->top and b->max */
367 /* XXX Why? */
368
369 A= &(a[b->top]);
370 for (i=(words - b->top)>>3; i>0; i--,A+=8)
371 {
372 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
373 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
374 }
375 for (i=(words - b->top)&7; i>0; i--,A++)
376 A[0]=0;
377#else
378 memset(A,0,sizeof(BN_ULONG)*(words+1));
379 memcpy(A,b->d,sizeof(b->d[0])*b->top);
380#endif
381
382 return(a);
383 }
384
385/* This is an internal function that can be used instead of bn_expand2()
386 * when there is a need to copy BIGNUMs instead of only expanding the
387 * data part, while still expanding them.
388 * Especially useful when needing to expand BIGNUMs that are declared
389 * 'const' and should therefore not be changed.
390 * The reason to use this instead of a BN_dup() followed by a bn_expand2()
391 * is memory allocation overhead. A BN_dup() followed by a bn_expand2()
392 * will allocate new memory for the BIGNUM data twice, and free it once,
393 * while bn_dup_expand() makes sure allocation is made only once.
394 */
395
396BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
397 {
398 BIGNUM *r = NULL;
399
400 /* This function does not work if
401 * words <= b->dmax && top < words
402 * because BN_dup() does not preserve 'dmax'!
403 * (But bn_dup_expand() is not used anywhere yet.)
404 */
405
406 if (words > b->dmax)
407 {
408 BN_ULONG *a = bn_expand_internal(b, words);
409
410 if (a)
411 {
412 r = BN_new();
413 if (r)
414 {
415 r->top = b->top;
416 r->dmax = words;
417 r->neg = b->neg;
418 r->d = a;
419 }
420 else
421 {
422 /* r == NULL, BN_new failure */
423 OPENSSL_free(a);
424 }
425 }
426 /* If a == NULL, there was an error in allocation in
427 bn_expand_internal(), and NULL should be returned */
428 }
429 else
430 {
431 r = BN_dup(b);
432 }
433
434 return r;
435 }
436
437/* This is an internal function that should not be used in applications.
438 * It ensures that 'b' has enough room for a 'words' word number number.
439 * It is mostly used by the various BIGNUM routines. If there is an error,
440 * NULL is returned. If not, 'b' is returned. */
441
442BIGNUM *bn_expand2(BIGNUM *b, int words)
443 {
444 if (words > b->dmax)
445 {
446 BN_ULONG *a = bn_expand_internal(b, words);
447
448 if (a)
449 {
450 if (b->d)
451 OPENSSL_free(b->d);
452 b->d=a;
453 b->dmax=words;
454 }
455 else
456 b = NULL;
457 }
458 return b;
459 }
460
461BIGNUM *BN_dup(const BIGNUM *a)
462 {
463 BIGNUM *r, *t;
464
465 if (a == NULL) return NULL;
466
467 bn_check_top(a);
468
469 t = BN_new();
470 if (t == NULL) return(NULL);
471 r = BN_copy(t, a);
472 /* now r == t || r == NULL */
473 if (r == NULL)
474 BN_free(t);
475 return r;
476 }
477
478BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
479 {
480 int i;
481 BN_ULONG *A;
482 const BN_ULONG *B;
483
484 bn_check_top(b);
485
486 if (a == b) return(a);
487 if (bn_wexpand(a,b->top) == NULL) return(NULL);
488
489#if 1
490 A=a->d;
491 B=b->d;
492 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
493 {
494 BN_ULONG a0,a1,a2,a3;
495 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
496 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
497 }
498 switch (b->top&3)
499 {
500 case 3: A[2]=B[2];
501 case 2: A[1]=B[1];
502 case 1: A[0]=B[0];
503 case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
504 }
505#else
506 memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
507#endif
508
509/* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/
510 a->top=b->top;
511 if ((a->top == 0) && (a->d != NULL))
512 a->d[0]=0;
513 a->neg=b->neg;
514 return(a);
515 }
516
517void BN_swap(BIGNUM *a, BIGNUM *b)
518 {
519 int flags_old_a, flags_old_b;
520 BN_ULONG *tmp_d;
521 int tmp_top, tmp_dmax, tmp_neg;
522
523 flags_old_a = a->flags;
524 flags_old_b = b->flags;
525
526 tmp_d = a->d;
527 tmp_top = a->top;
528 tmp_dmax = a->dmax;
529 tmp_neg = a->neg;
530
531 a->d = b->d;
532 a->top = b->top;
533 a->dmax = b->dmax;
534 a->neg = b->neg;
535
536 b->d = tmp_d;
537 b->top = tmp_top;
538 b->dmax = tmp_dmax;
539 b->neg = tmp_neg;
540
541 a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA);
542 b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA);
543 }
544
545
546void BN_clear(BIGNUM *a)
547 {
548 if (a->d != NULL)
549 memset(a->d,0,a->dmax*sizeof(a->d[0]));
550 a->top=0;
551 a->neg=0;
552 }
553
554BN_ULONG BN_get_word(const BIGNUM *a)
555 {
556 int i,n;
557 BN_ULONG ret=0;
558
559 n=BN_num_bytes(a);
560 if (n > sizeof(BN_ULONG))
561 return(BN_MASK2);
562 for (i=a->top-1; i>=0; i--)
563 {
564#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
565 ret<<=BN_BITS4; /* stops the compiler complaining */
566 ret<<=BN_BITS4;
567#else
568 ret=0;
569#endif
570 ret|=a->d[i];
571 }
572 return(ret);
573 }
574
575int BN_set_word(BIGNUM *a, BN_ULONG w)
576 {
577 int i,n;
578 if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0);
579
580 n=sizeof(BN_ULONG)/BN_BYTES;
581 a->neg=0;
582 a->top=0;
583 a->d[0]=(BN_ULONG)w&BN_MASK2;
584 if (a->d[0] != 0) a->top=1;
585 for (i=1; i<n; i++)
586 {
587 /* the following is done instead of
588 * w>>=BN_BITS2 so compilers don't complain
589 * on builds where sizeof(long) == BN_TYPES */
590#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
591 w>>=BN_BITS4;
592 w>>=BN_BITS4;
593#else
594 w=0;
595#endif
596 a->d[i]=(BN_ULONG)w&BN_MASK2;
597 if (a->d[i] != 0) a->top=i+1;
598 }
599 return(1);
600 }
601
602BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
603 {
604 unsigned int i,m;
605 unsigned int n;
606 BN_ULONG l;
607
608 if (ret == NULL) ret=BN_new();
609 if (ret == NULL) return(NULL);
610 l=0;
611 n=len;
612 if (n == 0)
613 {
614 ret->top=0;
615 return(ret);
616 }
617 if (bn_expand(ret,(int)(n+2)*8) == NULL)
618 return(NULL);
619 i=((n-1)/BN_BYTES)+1;
620 m=((n-1)%(BN_BYTES));
621 ret->top=i;
622 ret->neg=0;
623 while (n-- > 0)
624 {
625 l=(l<<8L)| *(s++);
626 if (m-- == 0)
627 {
628 ret->d[--i]=l;
629 l=0;
630 m=BN_BYTES-1;
631 }
632 }
633 /* need to call this due to clear byte at top if avoiding
634 * having the top bit set (-ve number) */
635 bn_fix_top(ret);
636 return(ret);
637 }
638
639/* ignore negative */
640int BN_bn2bin(const BIGNUM *a, unsigned char *to)
641 {
642 int n,i;
643 BN_ULONG l;
644
645 n=i=BN_num_bytes(a);
646 while (i-- > 0)
647 {
648 l=a->d[i/BN_BYTES];
649 *(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff;
650 }
651 return(n);
652 }
653
654int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
655 {
656 int i;
657 BN_ULONG t1,t2,*ap,*bp;
658
659 bn_check_top(a);
660 bn_check_top(b);
661
662 i=a->top-b->top;
663 if (i != 0) return(i);
664 ap=a->d;
665 bp=b->d;
666 for (i=a->top-1; i>=0; i--)
667 {
668 t1= ap[i];
669 t2= bp[i];
670 if (t1 != t2)
671 return(t1 > t2?1:-1);
672 }
673 return(0);
674 }
675
676int BN_cmp(const BIGNUM *a, const BIGNUM *b)
677 {
678 int i;
679 int gt,lt;
680 BN_ULONG t1,t2;
681
682 if ((a == NULL) || (b == NULL))
683 {
684 if (a != NULL)
685 return(-1);
686 else if (b != NULL)
687 return(1);
688 else
689 return(0);
690 }
691
692 bn_check_top(a);
693 bn_check_top(b);
694
695 if (a->neg != b->neg)
696 {
697 if (a->neg)
698 return(-1);
699 else return(1);
700 }
701 if (a->neg == 0)
702 { gt=1; lt= -1; }
703 else { gt= -1; lt=1; }
704
705 if (a->top > b->top) return(gt);
706 if (a->top < b->top) return(lt);
707 for (i=a->top-1; i>=0; i--)
708 {
709 t1=a->d[i];
710 t2=b->d[i];
711 if (t1 > t2) return(gt);
712 if (t1 < t2) return(lt);
713 }
714 return(0);
715 }
716
717int BN_set_bit(BIGNUM *a, int n)
718 {
719 int i,j,k;
720
721 i=n/BN_BITS2;
722 j=n%BN_BITS2;
723 if (a->top <= i)
724 {
725 if (bn_wexpand(a,i+1) == NULL) return(0);
726 for(k=a->top; k<i+1; k++)
727 a->d[k]=0;
728 a->top=i+1;
729 }
730
731 a->d[i]|=(((BN_ULONG)1)<<j);
732 return(1);
733 }
734
735int BN_clear_bit(BIGNUM *a, int n)
736 {
737 int i,j;
738
739 i=n/BN_BITS2;
740 j=n%BN_BITS2;
741 if (a->top <= i) return(0);
742
743 a->d[i]&=(~(((BN_ULONG)1)<<j));
744 bn_fix_top(a);
745 return(1);
746 }
747
748int BN_is_bit_set(const BIGNUM *a, int n)
749 {
750 int i,j;
751
752 if (n < 0) return(0);
753 i=n/BN_BITS2;
754 j=n%BN_BITS2;
755 if (a->top <= i) return(0);
756 return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
757 }
758
759int BN_mask_bits(BIGNUM *a, int n)
760 {
761 int b,w;
762
763 w=n/BN_BITS2;
764 b=n%BN_BITS2;
765 if (w >= a->top) return(0);
766 if (b == 0)
767 a->top=w;
768 else
769 {
770 a->top=w+1;
771 a->d[w]&= ~(BN_MASK2<<b);
772 }
773 bn_fix_top(a);
774 return(1);
775 }
776
777int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
778 {
779 int i;
780 BN_ULONG aa,bb;
781
782 aa=a[n-1];
783 bb=b[n-1];
784 if (aa != bb) return((aa > bb)?1:-1);
785 for (i=n-2; i>=0; i--)
786 {
787 aa=a[i];
788 bb=b[i];
789 if (aa != bb) return((aa > bb)?1:-1);
790 }
791 return(0);
792 }
793
794/* Here follows a specialised variants of bn_cmp_words(). It has the
795 property of performing the operation on arrays of different sizes.
796 The sizes of those arrays is expressed through cl, which is the
797 common length ( basicall, min(len(a),len(b)) ), and dl, which is the
798 delta between the two lengths, calculated as len(a)-len(b).
799 All lengths are the number of BN_ULONGs... */
800
801int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
802 int cl, int dl)
803 {
804 int n,i;
805 n = cl-1;
806
807 if (dl < 0)
808 {
809 for (i=dl; i<0; i++)
810 {
811 if (b[n-i] != 0)
812 return -1; /* a < b */
813 }
814 }
815 if (dl > 0)
816 {
817 for (i=dl; i>0; i--)
818 {
819 if (a[n+i] != 0)
820 return 1; /* a > b */
821 }
822 }
823 return bn_cmp_words(a,b,cl);
824 }
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
new file mode 100644
index 0000000000..5cf82480d7
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_mod.c
@@ -0,0 +1,296 @@
1/* crypto/bn/bn_mod.c */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
58 * All rights reserved.
59 *
60 * This package is an SSL implementation written
61 * by Eric Young (eay@cryptsoft.com).
62 * The implementation was written so as to conform with Netscapes SSL.
63 *
64 * This library is free for commercial and non-commercial use as long as
65 * the following conditions are aheared to. The following conditions
66 * apply to all code found in this distribution, be it the RC4, RSA,
67 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
68 * included with this distribution is covered by the same copyright terms
69 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
70 *
71 * Copyright remains Eric Young's, and as such any Copyright notices in
72 * the code are not to be removed.
73 * If this package is used in a product, Eric Young should be given attribution
74 * as the author of the parts of the library used.
75 * This can be in the form of a textual message at program startup or
76 * in documentation (online or textual) provided with the package.
77 *
78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions
80 * are met:
81 * 1. Redistributions of source code must retain the copyright
82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement:
88 * "This product includes cryptographic software written by
89 * Eric Young (eay@cryptsoft.com)"
90 * The word 'cryptographic' can be left out if the rouines from the library
91 * being used are not cryptographic related :-).
92 * 4. If you include any Windows specific code (or a derivative thereof) from
93 * the apps directory (application code) you must include an acknowledgement:
94 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
95 *
96 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
97 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
98 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
99 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
100 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
101 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
102 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
103 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
104 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
105 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
106 * SUCH DAMAGE.
107 *
108 * The licence and distribution terms for any publically available version or
109 * derivative of this code cannot be changed. i.e. this code cannot simply be
110 * copied and put under another distribution licence
111 * [including the GNU Public Licence.]
112 */
113
114#include "cryptlib.h"
115#include "bn_lcl.h"
116
117
118#if 0 /* now just a #define */
119int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
120 {
121 return(BN_div(NULL,rem,m,d,ctx));
122 /* note that rem->neg == m->neg (unless the remainder is zero) */
123 }
124#endif
125
126
127int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
128 {
129 /* like BN_mod, but returns non-negative remainder
130 * (i.e., 0 <= r < |d| always holds) */
131
132 if (!(BN_mod(r,m,d,ctx)))
133 return 0;
134 if (!r->neg)
135 return 1;
136 /* now -|d| < r < 0, so we have to set r := r + |d| */
137 return (d->neg ? BN_sub : BN_add)(r, r, d);
138}
139
140
141int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
142 {
143 if (!BN_add(r, a, b)) return 0;
144 return BN_nnmod(r, r, m, ctx);
145 }
146
147
148/* BN_mod_add variant that may be used if both a and b are non-negative
149 * and less than m */
150int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
151 {
152 if (!BN_add(r, a, b)) return 0;
153 if (BN_ucmp(r, m) >= 0)
154 return BN_usub(r, r, m);
155 return 1;
156 }
157
158
159int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
160 {
161 if (!BN_sub(r, a, b)) return 0;
162 return BN_nnmod(r, r, m, ctx);
163 }
164
165
166/* BN_mod_sub variant that may be used if both a and b are non-negative
167 * and less than m */
168int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
169 {
170 if (!BN_sub(r, a, b)) return 0;
171 if (r->neg)
172 return BN_add(r, r, m);
173 return 1;
174 }
175
176
177/* slow but works */
178int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
179 BN_CTX *ctx)
180 {
181 BIGNUM *t;
182 int ret=0;
183
184 bn_check_top(a);
185 bn_check_top(b);
186 bn_check_top(m);
187
188 BN_CTX_start(ctx);
189 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
190 if (a == b)
191 { if (!BN_sqr(t,a,ctx)) goto err; }
192 else
193 { if (!BN_mul(t,a,b,ctx)) goto err; }
194 if (!BN_nnmod(r,t,m,ctx)) goto err;
195 ret=1;
196err:
197 BN_CTX_end(ctx);
198 return(ret);
199 }
200
201
202int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
203 {
204 if (!BN_sqr(r, a, ctx)) return 0;
205 /* r->neg == 0, thus we don't need BN_nnmod */
206 return BN_mod(r, r, m, ctx);
207 }
208
209
210int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
211 {
212 if (!BN_lshift1(r, a)) return 0;
213 return BN_nnmod(r, r, m, ctx);
214 }
215
216
217/* BN_mod_lshift1 variant that may be used if a is non-negative
218 * and less than m */
219int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
220 {
221 if (!BN_lshift1(r, a)) return 0;
222 if (BN_cmp(r, m) >= 0)
223 return BN_sub(r, r, m);
224 return 1;
225 }
226
227
228int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
229 {
230 BIGNUM *abs_m = NULL;
231 int ret;
232
233 if (!BN_nnmod(r, a, m, ctx)) return 0;
234
235 if (m->neg)
236 {
237 abs_m = BN_dup(m);
238 if (abs_m == NULL) return 0;
239 abs_m->neg = 0;
240 }
241
242 ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
243
244 if (abs_m)
245 BN_free(abs_m);
246 return ret;
247 }
248
249
250/* BN_mod_lshift variant that may be used if a is non-negative
251 * and less than m */
252int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
253 {
254 if (r != a)
255 {
256 if (BN_copy(r, a) == NULL) return 0;
257 }
258
259 while (n > 0)
260 {
261 int max_shift;
262
263 /* 0 < r < m */
264 max_shift = BN_num_bits(m) - BN_num_bits(r);
265 /* max_shift >= 0 */
266
267 if (max_shift < 0)
268 {
269 BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
270 return 0;
271 }
272
273 if (max_shift > n)
274 max_shift = n;
275
276 if (max_shift)
277 {
278 if (!BN_lshift(r, r, max_shift)) return 0;
279 n -= max_shift;
280 }
281 else
282 {
283 if (!BN_lshift1(r, r)) return 0;
284 --n;
285 }
286
287 /* BN_num_bits(r) <= BN_num_bits(m) */
288
289 if (BN_cmp(r, m) >= 0)
290 {
291 if (!BN_sub(r, r, m)) return 0;
292 }
293 }
294
295 return 1;
296 }
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
new file mode 100644
index 0000000000..726d5f2b1b
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -0,0 +1,421 @@
1/* crypto/bn/bn_mont.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/*
60 * Details about Montgomery multiplication algorithms can be found at
61 * http://security.ece.orst.edu/publications.html, e.g.
62 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
63 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
64 */
65
66#include <stdio.h>
67#include "cryptlib.h"
68#include "bn_lcl.h"
69
70#define MONT_WORD /* use the faster word-based algorithm */
71
72int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
73 BN_MONT_CTX *mont, BN_CTX *ctx)
74 {
75 BIGNUM *tmp;
76 int ret=0;
77
78 BN_CTX_start(ctx);
79 tmp = BN_CTX_get(ctx);
80 if (tmp == NULL) goto err;
81
82 bn_check_top(tmp);
83 if (a == b)
84 {
85 if (!BN_sqr(tmp,a,ctx)) goto err;
86 }
87 else
88 {
89 if (!BN_mul(tmp,a,b,ctx)) goto err;
90 }
91 /* reduce from aRR to aR */
92 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
93 ret=1;
94err:
95 BN_CTX_end(ctx);
96 return(ret);
97 }
98
99int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
100 BN_CTX *ctx)
101 {
102 int retn=0;
103
104#ifdef MONT_WORD
105 BIGNUM *n,*r;
106 BN_ULONG *ap,*np,*rp,n0,v,*nrp;
107 int al,nl,max,i,x,ri;
108
109 BN_CTX_start(ctx);
110 if ((r = BN_CTX_get(ctx)) == NULL) goto err;
111
112 if (!BN_copy(r,a)) goto err;
113 n= &(mont->N);
114
115 ap=a->d;
116 /* mont->ri is the size of mont->N in bits (rounded up
117 to the word size) */
118 al=ri=mont->ri/BN_BITS2;
119
120 nl=n->top;
121 if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
122
123 max=(nl+al+1); /* allow for overflow (no?) XXX */
124 if (bn_wexpand(r,max) == NULL) goto err;
125
126 r->neg=a->neg^n->neg;
127 np=n->d;
128 rp=r->d;
129 nrp= &(r->d[nl]);
130
131 /* clear the top words of T */
132#if 1
133 for (i=r->top; i<max; i++) /* memset? XXX */
134 r->d[i]=0;
135#else
136 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
137#endif
138
139 r->top=max;
140 n0=mont->n0;
141
142#ifdef BN_COUNT
143 fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl);
144#endif
145 for (i=0; i<nl; i++)
146 {
147#ifdef __TANDEM
148 {
149 long long t1;
150 long long t2;
151 long long t3;
152 t1 = rp[0] * (n0 & 0177777);
153 t2 = 037777600000l;
154 t2 = n0 & t2;
155 t3 = rp[0] & 0177777;
156 t2 = (t3 * t2) & BN_MASK2;
157 t1 = t1 + t2;
158 v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
159 }
160#else
161 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
162#endif
163 nrp++;
164 rp++;
165 if (((nrp[-1]+=v)&BN_MASK2) >= v)
166 continue;
167 else
168 {
169 if (((++nrp[0])&BN_MASK2) != 0) continue;
170 if (((++nrp[1])&BN_MASK2) != 0) continue;
171 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
172 }
173 }
174 bn_fix_top(r);
175
176 /* mont->ri will be a multiple of the word size and below code
177 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
178 if (r->top <= ri)
179 {
180 ret->top=0;
181 retn=1;
182 goto err;
183 }
184 al=r->top-ri;
185
186# define BRANCH_FREE 1
187# if BRANCH_FREE
188 if (bn_wexpand(ret,ri) == NULL) goto err;
189 x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
190 ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
191 ret->neg=r->neg;
192
193 rp=ret->d;
194 ap=&(r->d[ri]);
195
196 {
197 size_t m1,m2;
198
199 v=bn_sub_words(rp,ap,np,ri);
200 /* this ----------------^^ works even in al<ri case
201 * thanks to zealous zeroing of top of the vector in the
202 * beginning. */
203
204 /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
205 /* in other words if subtraction result is real, then
206 * trick unconditional memcpy below to perform in-place
207 * "refresh" instead of actual copy. */
208 m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
209 m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
210 m1|=m2; /* (al!=ri) */
211 m1|=(0-(size_t)v); /* (al!=ri || v) */
212 m1&=~m2; /* (al!=ri || v) && !al>ri */
213 nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
214 }
215
216 /* 'i<ri' is chosen to eliminate dependency on input data, even
217 * though it results in redundant copy in al<ri case. */
218 for (i=0,ri-=4; i<ri; i+=4)
219 {
220 BN_ULONG t1,t2,t3,t4;
221
222 t1=nrp[i+0];
223 t2=nrp[i+1];
224 t3=nrp[i+2]; ap[i+0]=0;
225 t4=nrp[i+3]; ap[i+1]=0;
226 rp[i+0]=t1; ap[i+2]=0;
227 rp[i+1]=t2; ap[i+3]=0;
228 rp[i+2]=t3;
229 rp[i+3]=t4;
230 }
231 for (ri+=4; i<ri; i++)
232 rp[i]=nrp[i], ap[i]=0;
233# else
234 if (bn_wexpand(ret,al) == NULL) goto err;
235 ret->top=al;
236 ret->neg=r->neg;
237
238 rp=ret->d;
239 ap=&(r->d[ri]);
240 al-=4;
241 for (i=0; i<al; i+=4)
242 {
243 BN_ULONG t1,t2,t3,t4;
244
245 t1=ap[i+0];
246 t2=ap[i+1];
247 t3=ap[i+2];
248 t4=ap[i+3];
249 rp[i+0]=t1;
250 rp[i+1]=t2;
251 rp[i+2]=t3;
252 rp[i+3]=t4;
253 }
254 al+=4;
255 for (; i<al; i++)
256 rp[i]=ap[i];
257# endif
258#else /* !MONT_WORD */
259 BIGNUM *t1,*t2;
260
261 BN_CTX_start(ctx);
262 t1 = BN_CTX_get(ctx);
263 t2 = BN_CTX_get(ctx);
264 if (t1 == NULL || t2 == NULL) goto err;
265
266 if (!BN_copy(t1,a)) goto err;
267 BN_mask_bits(t1,mont->ri);
268
269 if (!BN_mul(t2,t1,&mont->Ni,ctx)) goto err;
270 BN_mask_bits(t2,mont->ri);
271
272 if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
273 if (!BN_add(t2,a,t1)) goto err;
274 if (!BN_rshift(ret,t2,mont->ri)) goto err;
275#endif /* MONT_WORD */
276
277#if !defined(BRANCH_FREE) || BRANCH_FREE==0
278 if (BN_ucmp(ret, &(mont->N)) >= 0)
279 {
280 if (!BN_usub(ret,ret,&(mont->N))) goto err;
281 }
282#endif
283 retn=1;
284 err:
285 BN_CTX_end(ctx);
286 return(retn);
287 }
288
289BN_MONT_CTX *BN_MONT_CTX_new(void)
290 {
291 BN_MONT_CTX *ret;
292
293 if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
294 return(NULL);
295
296 BN_MONT_CTX_init(ret);
297 ret->flags=BN_FLG_MALLOCED;
298 return(ret);
299 }
300
301void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
302 {
303 ctx->ri=0;
304 BN_init(&(ctx->RR));
305 BN_init(&(ctx->N));
306 BN_init(&(ctx->Ni));
307 ctx->flags=0;
308 }
309
310void BN_MONT_CTX_free(BN_MONT_CTX *mont)
311 {
312 if(mont == NULL)
313 return;
314
315 BN_free(&(mont->RR));
316 BN_free(&(mont->N));
317 BN_free(&(mont->Ni));
318 if (mont->flags & BN_FLG_MALLOCED)
319 OPENSSL_free(mont);
320 }
321
322int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
323 {
324 BIGNUM Ri,*R;
325
326 BN_init(&Ri);
327 R= &(mont->RR); /* grab RR as a temp */
328 if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */
329 mont->N.neg = 0;
330
331#ifdef MONT_WORD
332 {
333 BIGNUM tmod;
334 BN_ULONG buf[2];
335
336 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
337 if (!(BN_zero(R))) goto err;
338 if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
339
340 buf[0]=mod->d[0]; /* tmod = N mod word size */
341 buf[1]=0;
342 tmod.d=buf;
343 tmod.top=1;
344 tmod.dmax=2;
345 tmod.neg=0;
346 /* Ri = R^-1 mod N*/
347 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
348 goto err;
349 if (!BN_lshift(&Ri,&Ri,BN_BITS2)) goto err; /* R*Ri */
350 if (!BN_is_zero(&Ri))
351 {
352 if (!BN_sub_word(&Ri,1)) goto err;
353 }
354 else /* if N mod word size == 1 */
355 {
356 if (!BN_set_word(&Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
357 }
358 if (!BN_div(&Ri,NULL,&Ri,&tmod,ctx)) goto err;
359 /* Ni = (R*Ri-1)/N,
360 * keep only least significant word: */
361 mont->n0 = (Ri.top > 0) ? Ri.d[0] : 0;
362 BN_free(&Ri);
363 }
364#else /* !MONT_WORD */
365 { /* bignum version */
366 mont->ri=BN_num_bits(&mont->N);
367 if (!BN_zero(R)) goto err;
368 if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */
369 /* Ri = R^-1 mod N*/
370 if ((BN_mod_inverse(&Ri,R,&mont->N,ctx)) == NULL)
371 goto err;
372 if (!BN_lshift(&Ri,&Ri,mont->ri)) goto err; /* R*Ri */
373 if (!BN_sub_word(&Ri,1)) goto err;
374 /* Ni = (R*Ri-1) / N */
375 if (!BN_div(&(mont->Ni),NULL,&Ri,&mont->N,ctx)) goto err;
376 BN_free(&Ri);
377 }
378#endif
379
380 /* setup RR for conversions */
381 if (!BN_zero(&(mont->RR))) goto err;
382 if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
383 if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
384
385 return(1);
386err:
387 return(0);
388 }
389
390BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
391 {
392 if (to == from) return(to);
393
394 if (!BN_copy(&(to->RR),&(from->RR))) return NULL;
395 if (!BN_copy(&(to->N),&(from->N))) return NULL;
396 if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
397 to->ri=from->ri;
398 to->n0=from->n0;
399 return(to);
400 }
401
402BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
403 const BIGNUM *mod, BN_CTX *ctx)
404 {
405 if (*pmont)
406 return *pmont;
407 CRYPTO_w_lock(lock);
408 if (!*pmont)
409 {
410 *pmont = BN_MONT_CTX_new();
411 if (*pmont && !BN_MONT_CTX_set(*pmont, mod, ctx))
412 {
413 BN_MONT_CTX_free(*pmont);
414 *pmont = NULL;
415 }
416 }
417 CRYPTO_w_unlock(lock);
418 return *pmont;
419 }
420
421
diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c
new file mode 100644
index 0000000000..05fa9d1e9a
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_mpi.c
@@ -0,0 +1,129 @@
1/* crypto/bn/bn_mpi.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
64 {
65 int bits;
66 int num=0;
67 int ext=0;
68 long l;
69
70 bits=BN_num_bits(a);
71 num=(bits+7)/8;
72 if (bits > 0)
73 {
74 ext=((bits & 0x07) == 0);
75 }
76 if (d == NULL)
77 return(num+4+ext);
78
79 l=num+ext;
80 d[0]=(unsigned char)(l>>24)&0xff;
81 d[1]=(unsigned char)(l>>16)&0xff;
82 d[2]=(unsigned char)(l>> 8)&0xff;
83 d[3]=(unsigned char)(l )&0xff;
84 if (ext) d[4]=0;
85 num=BN_bn2bin(a,&(d[4+ext]));
86 if (a->neg)
87 d[4]|=0x80;
88 return(num+4+ext);
89 }
90
91BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a)
92 {
93 long len;
94 int neg=0;
95
96 if (n < 4)
97 {
98 BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
99 return(NULL);
100 }
101 len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
102 if ((len+4) != n)
103 {
104 BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
105 return(NULL);
106 }
107
108 if (a == NULL) a=BN_new();
109 if (a == NULL) return(NULL);
110
111 if (len == 0)
112 {
113 a->neg=0;
114 a->top=0;
115 return(a);
116 }
117 d+=4;
118 if ((*d) & 0x80)
119 neg=1;
120 if (BN_bin2bn(d,(int)len,a) == NULL)
121 return(NULL);
122 a->neg=neg;
123 if (neg)
124 {
125 BN_clear_bit(a,BN_num_bits(a)-1);
126 }
127 return(a);
128 }
129
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
new file mode 100644
index 0000000000..3ae3822bc2
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -0,0 +1,802 @@
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63#ifdef BN_RECURSION
64/* Karatsuba recursive multiplication algorithm
65 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
66
67/* r is 2*n2 words in size,
68 * a and b are both n2 words in size.
69 * n2 must be a power of 2.
70 * We multiply and return the result.
71 * t must be 2*n2 words in size
72 * We calculate
73 * a[0]*b[0]
74 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
75 * a[1]*b[1]
76 */
77void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
78 BN_ULONG *t)
79 {
80 int n=n2/2,c1,c2;
81 unsigned int neg,zero;
82 BN_ULONG ln,lo,*p;
83
84# ifdef BN_COUNT
85 printf(" bn_mul_recursive %d * %d\n",n2,n2);
86# endif
87# ifdef BN_MUL_COMBA
88# if 0
89 if (n2 == 4)
90 {
91 bn_mul_comba4(r,a,b);
92 return;
93 }
94# endif
95 if (n2 == 8)
96 {
97 bn_mul_comba8(r,a,b);
98 return;
99 }
100# endif /* BN_MUL_COMBA */
101 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
102 {
103 /* This should not happen */
104 bn_mul_normal(r,a,n2,b,n2);
105 return;
106 }
107 /* r=(a[0]-a[1])*(b[1]-b[0]) */
108 c1=bn_cmp_words(a,&(a[n]),n);
109 c2=bn_cmp_words(&(b[n]),b,n);
110 zero=neg=0;
111 switch (c1*3+c2)
112 {
113 case -4:
114 bn_sub_words(t, &(a[n]),a, n); /* - */
115 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
116 break;
117 case -3:
118 zero=1;
119 break;
120 case -2:
121 bn_sub_words(t, &(a[n]),a, n); /* - */
122 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
123 neg=1;
124 break;
125 case -1:
126 case 0:
127 case 1:
128 zero=1;
129 break;
130 case 2:
131 bn_sub_words(t, a, &(a[n]),n); /* + */
132 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
133 neg=1;
134 break;
135 case 3:
136 zero=1;
137 break;
138 case 4:
139 bn_sub_words(t, a, &(a[n]),n);
140 bn_sub_words(&(t[n]),&(b[n]),b, n);
141 break;
142 }
143
144# ifdef BN_MUL_COMBA
145 if (n == 4)
146 {
147 if (!zero)
148 bn_mul_comba4(&(t[n2]),t,&(t[n]));
149 else
150 memset(&(t[n2]),0,8*sizeof(BN_ULONG));
151
152 bn_mul_comba4(r,a,b);
153 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
154 }
155 else if (n == 8)
156 {
157 if (!zero)
158 bn_mul_comba8(&(t[n2]),t,&(t[n]));
159 else
160 memset(&(t[n2]),0,16*sizeof(BN_ULONG));
161
162 bn_mul_comba8(r,a,b);
163 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
164 }
165 else
166# endif /* BN_MUL_COMBA */
167 {
168 p= &(t[n2*2]);
169 if (!zero)
170 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
171 else
172 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
173 bn_mul_recursive(r,a,b,n,p);
174 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
175 }
176
177 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
178 * r[10] holds (a[0]*b[0])
179 * r[32] holds (b[1]*b[1])
180 */
181
182 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
183
184 if (neg) /* if t[32] is negative */
185 {
186 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
187 }
188 else
189 {
190 /* Might have a carry */
191 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
192 }
193
194 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
195 * r[10] holds (a[0]*b[0])
196 * r[32] holds (b[1]*b[1])
197 * c1 holds the carry bits
198 */
199 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
200 if (c1)
201 {
202 p= &(r[n+n2]);
203 lo= *p;
204 ln=(lo+c1)&BN_MASK2;
205 *p=ln;
206
207 /* The overflow will stop before we over write
208 * words we should not overwrite */
209 if (ln < (BN_ULONG)c1)
210 {
211 do {
212 p++;
213 lo= *p;
214 ln=(lo+1)&BN_MASK2;
215 *p=ln;
216 } while (ln == 0);
217 }
218 }
219 }
220
221/* n+tn is the word length
222 * t needs to be n*4 is size, as does r */
223void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
224 int n, BN_ULONG *t)
225 {
226 int i,j,n2=n*2;
227 int c1,c2,neg,zero;
228 BN_ULONG ln,lo,*p;
229
230# ifdef BN_COUNT
231 printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
232# endif
233 if (n < 8)
234 {
235 i=tn+n;
236 bn_mul_normal(r,a,i,b,i);
237 return;
238 }
239
240 /* r=(a[0]-a[1])*(b[1]-b[0]) */
241 c1=bn_cmp_words(a,&(a[n]),n);
242 c2=bn_cmp_words(&(b[n]),b,n);
243 zero=neg=0;
244 switch (c1*3+c2)
245 {
246 case -4:
247 bn_sub_words(t, &(a[n]),a, n); /* - */
248 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
249 break;
250 case -3:
251 zero=1;
252 /* break; */
253 case -2:
254 bn_sub_words(t, &(a[n]),a, n); /* - */
255 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
256 neg=1;
257 break;
258 case -1:
259 case 0:
260 case 1:
261 zero=1;
262 /* break; */
263 case 2:
264 bn_sub_words(t, a, &(a[n]),n); /* + */
265 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
266 neg=1;
267 break;
268 case 3:
269 zero=1;
270 /* break; */
271 case 4:
272 bn_sub_words(t, a, &(a[n]),n);
273 bn_sub_words(&(t[n]),&(b[n]),b, n);
274 break;
275 }
276 /* The zero case isn't yet implemented here. The speedup
277 would probably be negligible. */
278# if 0
279 if (n == 4)
280 {
281 bn_mul_comba4(&(t[n2]),t,&(t[n]));
282 bn_mul_comba4(r,a,b);
283 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
284 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
285 }
286 else
287# endif
288 if (n == 8)
289 {
290 bn_mul_comba8(&(t[n2]),t,&(t[n]));
291 bn_mul_comba8(r,a,b);
292 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
293 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
294 }
295 else
296 {
297 p= &(t[n2*2]);
298 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
299 bn_mul_recursive(r,a,b,n,p);
300 i=n/2;
301 /* If there is only a bottom half to the number,
302 * just do it */
303 j=tn-i;
304 if (j == 0)
305 {
306 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
307 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
308 }
309 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
310 {
311 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
312 j,i,p);
313 memset(&(r[n2+tn*2]),0,
314 sizeof(BN_ULONG)*(n2-tn*2));
315 }
316 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
317 {
318 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
319 if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
320 {
321 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
322 }
323 else
324 {
325 for (;;)
326 {
327 i/=2;
328 if (i < tn)
329 {
330 bn_mul_part_recursive(&(r[n2]),
331 &(a[n]),&(b[n]),
332 tn-i,i,p);
333 break;
334 }
335 else if (i == tn)
336 {
337 bn_mul_recursive(&(r[n2]),
338 &(a[n]),&(b[n]),
339 i,p);
340 break;
341 }
342 }
343 }
344 }
345 }
346
347 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
348 * r[10] holds (a[0]*b[0])
349 * r[32] holds (b[1]*b[1])
350 */
351
352 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
353
354 if (neg) /* if t[32] is negative */
355 {
356 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
357 }
358 else
359 {
360 /* Might have a carry */
361 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
362 }
363
364 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
365 * r[10] holds (a[0]*b[0])
366 * r[32] holds (b[1]*b[1])
367 * c1 holds the carry bits
368 */
369 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
370 if (c1)
371 {
372 p= &(r[n+n2]);
373 lo= *p;
374 ln=(lo+c1)&BN_MASK2;
375 *p=ln;
376
377 /* The overflow will stop before we over write
378 * words we should not overwrite */
379 if (ln < (BN_ULONG)c1)
380 {
381 do {
382 p++;
383 lo= *p;
384 ln=(lo+1)&BN_MASK2;
385 *p=ln;
386 } while (ln == 0);
387 }
388 }
389 }
390
391/* a and b must be the same size, which is n2.
392 * r needs to be n2 words and t needs to be n2*2
393 */
394void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
395 BN_ULONG *t)
396 {
397 int n=n2/2;
398
399# ifdef BN_COUNT
400 printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
401# endif
402
403 bn_mul_recursive(r,a,b,n,&(t[0]));
404 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
405 {
406 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
407 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
408 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
409 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
410 }
411 else
412 {
413 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
414 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
415 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
416 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
417 }
418 }
419
420/* a and b must be the same size, which is n2.
421 * r needs to be n2 words and t needs to be n2*2
422 * l is the low words of the output.
423 * t needs to be n2*3
424 */
425void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
426 BN_ULONG *t)
427 {
428 int i,n;
429 int c1,c2;
430 int neg,oneg,zero;
431 BN_ULONG ll,lc,*lp,*mp;
432
433# ifdef BN_COUNT
434 printf(" bn_mul_high %d * %d\n",n2,n2);
435# endif
436 n=n2/2;
437
438 /* Calculate (al-ah)*(bh-bl) */
439 neg=zero=0;
440 c1=bn_cmp_words(&(a[0]),&(a[n]),n);
441 c2=bn_cmp_words(&(b[n]),&(b[0]),n);
442 switch (c1*3+c2)
443 {
444 case -4:
445 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
446 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
447 break;
448 case -3:
449 zero=1;
450 break;
451 case -2:
452 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
453 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
454 neg=1;
455 break;
456 case -1:
457 case 0:
458 case 1:
459 zero=1;
460 break;
461 case 2:
462 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
463 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
464 neg=1;
465 break;
466 case 3:
467 zero=1;
468 break;
469 case 4:
470 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
471 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
472 break;
473 }
474
475 oneg=neg;
476 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
477 /* r[10] = (a[1]*b[1]) */
478# ifdef BN_MUL_COMBA
479 if (n == 8)
480 {
481 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
482 bn_mul_comba8(r,&(a[n]),&(b[n]));
483 }
484 else
485# endif
486 {
487 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
488 bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
489 }
490
491 /* s0 == low(al*bl)
492 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
493 * We know s0 and s1 so the only unknown is high(al*bl)
494 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
495 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
496 */
497 if (l != NULL)
498 {
499 lp= &(t[n2+n]);
500 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
501 }
502 else
503 {
504 c1=0;
505 lp= &(r[0]);
506 }
507
508 if (neg)
509 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
510 else
511 {
512 bn_add_words(&(t[n2]),lp,&(t[0]),n);
513 neg=0;
514 }
515
516 if (l != NULL)
517 {
518 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
519 }
520 else
521 {
522 lp= &(t[n2+n]);
523 mp= &(t[n2]);
524 for (i=0; i<n; i++)
525 lp[i]=((~mp[i])+1)&BN_MASK2;
526 }
527
528 /* s[0] = low(al*bl)
529 * t[3] = high(al*bl)
530 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
531 * r[10] = (a[1]*b[1])
532 */
533 /* R[10] = al*bl
534 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
535 * R[32] = ah*bh
536 */
537 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
538 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
539 * R[3]=r[1]+(carry/borrow)
540 */
541 if (l != NULL)
542 {
543 lp= &(t[n2]);
544 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
545 }
546 else
547 {
548 lp= &(t[n2+n]);
549 c1=0;
550 }
551 c1+=(int)(bn_add_words(&(t[n2]),lp, &(r[0]),n));
552 if (oneg)
553 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
554 else
555 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
556
557 c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
558 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
559 if (oneg)
560 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
561 else
562 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
563
564 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
565 {
566 i=0;
567 if (c1 > 0)
568 {
569 lc=c1;
570 do {
571 ll=(r[i]+lc)&BN_MASK2;
572 r[i++]=ll;
573 lc=(lc > ll);
574 } while (lc);
575 }
576 else
577 {
578 lc= -c1;
579 do {
580 ll=r[i];
581 r[i++]=(ll-lc)&BN_MASK2;
582 lc=(lc > ll);
583 } while (lc);
584 }
585 }
586 if (c2 != 0) /* Add starting at r[1] */
587 {
588 i=n;
589 if (c2 > 0)
590 {
591 lc=c2;
592 do {
593 ll=(r[i]+lc)&BN_MASK2;
594 r[i++]=ll;
595 lc=(lc > ll);
596 } while (lc);
597 }
598 else
599 {
600 lc= -c2;
601 do {
602 ll=r[i];
603 r[i++]=(ll-lc)&BN_MASK2;
604 lc=(lc > ll);
605 } while (lc);
606 }
607 }
608 }
609#endif /* BN_RECURSION */
610
611int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
612 {
613 int top,al,bl;
614 BIGNUM *rr;
615 int ret = 0;
616#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
617 int i;
618#endif
619#ifdef BN_RECURSION
620 BIGNUM *t;
621 int j,k;
622#endif
623
624#ifdef BN_COUNT
625 printf("BN_mul %d * %d\n",a->top,b->top);
626#endif
627
628 bn_check_top(a);
629 bn_check_top(b);
630 bn_check_top(r);
631
632 al=a->top;
633 bl=b->top;
634
635 if ((al == 0) || (bl == 0))
636 {
637 if (!BN_zero(r)) goto err;
638 return(1);
639 }
640 top=al+bl;
641
642 BN_CTX_start(ctx);
643 if ((r == a) || (r == b))
644 {
645 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
646 }
647 else
648 rr = r;
649 rr->neg=a->neg^b->neg;
650
651#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
652 i = al-bl;
653#endif
654#ifdef BN_MUL_COMBA
655 if (i == 0)
656 {
657# if 0
658 if (al == 4)
659 {
660 if (bn_wexpand(rr,8) == NULL) goto err;
661 rr->top=8;
662 bn_mul_comba4(rr->d,a->d,b->d);
663 goto end;
664 }
665# endif
666 if (al == 8)
667 {
668 if (bn_wexpand(rr,16) == NULL) goto err;
669 rr->top=16;
670 bn_mul_comba8(rr->d,a->d,b->d);
671 goto end;
672 }
673 }
674#endif /* BN_MUL_COMBA */
675#ifdef BN_RECURSION
676 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
677 {
678 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA) && bl<b->dmax)
679 {
680#if 0 /* tribute to const-ification, bl<b->dmax above covers for this */
681 if (bn_wexpand(b,al) == NULL) goto err;
682#endif
683 b->d[bl]=0;
684 bl++;
685 i--;
686 }
687 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA) && al<a->dmax)
688 {
689#if 0 /* tribute to const-ification, al<a->dmax above covers for this */
690 if (bn_wexpand(a,bl) == NULL) goto err;
691#endif
692 a->d[al]=0;
693 al++;
694 i++;
695 }
696 if (i == 0)
697 {
698 /* symmetric and > 4 */
699 /* 16 or larger */
700 j=BN_num_bits_word((BN_ULONG)al);
701 j=1<<(j-1);
702 k=j+j;
703 t = BN_CTX_get(ctx);
704 if (al == j) /* exact multiple */
705 {
706 if (bn_wexpand(t,k*2) == NULL) goto err;
707 if (bn_wexpand(rr,k*2) == NULL) goto err;
708 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
709 rr->top=top;
710 goto end;
711 }
712#if 0 /* tribute to const-ification, rsa/dsa performance is not affected */
713 else
714 {
715 if (bn_wexpand(a,k) == NULL ) goto err;
716 if (bn_wexpand(b,k) == NULL ) goto err;
717 if (bn_wexpand(t,k*4) == NULL ) goto err;
718 if (bn_wexpand(rr,k*4) == NULL ) goto err;
719 for (i=a->top; i<k; i++)
720 a->d[i]=0;
721 for (i=b->top; i<k; i++)
722 b->d[i]=0;
723 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
724 }
725 rr->top=top;
726 goto end;
727#endif
728 }
729 }
730#endif /* BN_RECURSION */
731 if (bn_wexpand(rr,top) == NULL) goto err;
732 rr->top=top;
733 bn_mul_normal(rr->d,a->d,al,b->d,bl);
734
735#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
736end:
737#endif
738 bn_fix_top(rr);
739 if (r != rr) BN_copy(r,rr);
740 ret=1;
741err:
742 BN_CTX_end(ctx);
743 return(ret);
744 }
745
746void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
747 {
748 BN_ULONG *rr;
749
750#ifdef BN_COUNT
751 printf(" bn_mul_normal %d * %d\n",na,nb);
752#endif
753
754 if (na < nb)
755 {
756 int itmp;
757 BN_ULONG *ltmp;
758
759 itmp=na; na=nb; nb=itmp;
760 ltmp=a; a=b; b=ltmp;
761
762 }
763 rr= &(r[na]);
764 rr[0]=bn_mul_words(r,a,na,b[0]);
765
766 for (;;)
767 {
768 if (--nb <= 0) return;
769 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
770 if (--nb <= 0) return;
771 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
772 if (--nb <= 0) return;
773 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
774 if (--nb <= 0) return;
775 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
776 rr+=4;
777 r+=4;
778 b+=4;
779 }
780 }
781
782void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
783 {
784#ifdef BN_COUNT
785 printf(" bn_mul_low_normal %d * %d\n",n,n);
786#endif
787 bn_mul_words(r,a,n,b[0]);
788
789 for (;;)
790 {
791 if (--n <= 0) return;
792 bn_mul_add_words(&(r[1]),a,n,b[1]);
793 if (--n <= 0) return;
794 bn_mul_add_words(&(r[2]),a,n,b[2]);
795 if (--n <= 0) return;
796 bn_mul_add_words(&(r[3]),a,n,b[3]);
797 if (--n <= 0) return;
798 bn_mul_add_words(&(r[4]),a,n,b[4]);
799 r+=4;
800 b+=4;
801 }
802 }
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
new file mode 100644
index 0000000000..f422172f16
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_prime.c
@@ -0,0 +1,468 @@
1/* crypto/bn/bn_prime.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118/* The quick sieve algorithm approach to weeding out primes is
119 * Philip Zimmermann's, as implemented in PGP. I have had a read of
120 * his comments and implemented my own version.
121 */
122#include "bn_prime.h"
123
124static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
125 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
126static int probable_prime(BIGNUM *rnd, int bits);
127static int probable_prime_dh(BIGNUM *rnd, int bits,
128 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
129static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
130 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
131
132BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
133 const BIGNUM *add, const BIGNUM *rem,
134 void (*callback)(int,int,void *), void *cb_arg)
135 {
136 BIGNUM *rnd=NULL;
137 BIGNUM t;
138 int found=0;
139 int i,j,c1=0;
140 BN_CTX *ctx;
141 int checks = BN_prime_checks_for_size(bits);
142
143 BN_init(&t);
144 ctx=BN_CTX_new();
145 if (ctx == NULL) goto err;
146 if (ret == NULL)
147 {
148 if ((rnd=BN_new()) == NULL) goto err;
149 }
150 else
151 rnd=ret;
152loop:
153 /* make a random number and set the top and bottom bits */
154 if (add == NULL)
155 {
156 if (!probable_prime(rnd,bits)) goto err;
157 }
158 else
159 {
160 if (safe)
161 {
162 if (!probable_prime_dh_safe(rnd,bits,add,rem,ctx))
163 goto err;
164 }
165 else
166 {
167 if (!probable_prime_dh(rnd,bits,add,rem,ctx))
168 goto err;
169 }
170 }
171 /* if (BN_mod_word(rnd,(BN_ULONG)3) == 1) goto loop; */
172 if (callback != NULL) callback(0,c1++,cb_arg);
173
174 if (!safe)
175 {
176 i=BN_is_prime_fasttest(rnd,checks,callback,ctx,cb_arg,0);
177 if (i == -1) goto err;
178 if (i == 0) goto loop;
179 }
180 else
181 {
182 /* for "safe prime" generation,
183 * check that (p-1)/2 is prime.
184 * Since a prime is odd, We just
185 * need to divide by 2 */
186 if (!BN_rshift1(&t,rnd)) goto err;
187
188 for (i=0; i<checks; i++)
189 {
190 j=BN_is_prime_fasttest(rnd,1,callback,ctx,cb_arg,0);
191 if (j == -1) goto err;
192 if (j == 0) goto loop;
193
194 j=BN_is_prime_fasttest(&t,1,callback,ctx,cb_arg,0);
195 if (j == -1) goto err;
196 if (j == 0) goto loop;
197
198 if (callback != NULL) callback(2,c1-1,cb_arg);
199 /* We have a safe prime test pass */
200 }
201 }
202 /* we have a prime :-) */
203 found = 1;
204err:
205 if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd);
206 BN_free(&t);
207 if (ctx != NULL) BN_CTX_free(ctx);
208 return(found ? rnd : NULL);
209 }
210
211int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *),
212 BN_CTX *ctx_passed, void *cb_arg)
213 {
214 return BN_is_prime_fasttest(a, checks, callback, ctx_passed, cb_arg, 0);
215 }
216
217int BN_is_prime_fasttest(const BIGNUM *a, int checks,
218 void (*callback)(int,int,void *),
219 BN_CTX *ctx_passed, void *cb_arg,
220 int do_trial_division)
221 {
222 int i, j, ret = -1;
223 int k;
224 BN_CTX *ctx = NULL;
225 BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
226 BN_MONT_CTX *mont = NULL;
227 const BIGNUM *A = NULL;
228
229 if (BN_cmp(a, BN_value_one()) <= 0)
230 return 0;
231
232 if (checks == BN_prime_checks)
233 checks = BN_prime_checks_for_size(BN_num_bits(a));
234
235 /* first look for small factors */
236 if (!BN_is_odd(a))
237 /* a is even => a is prime if and only if a == 2 */
238 return BN_is_word(a, 2);
239
240 if (do_trial_division)
241 {
242 for (i = 1; i < NUMPRIMES; i++)
243 if (BN_mod_word(a, primes[i]) == 0)
244 return 0;
245 if (callback != NULL) callback(1, -1, cb_arg);
246 }
247
248 if (ctx_passed != NULL)
249 ctx = ctx_passed;
250 else
251 if ((ctx=BN_CTX_new()) == NULL)
252 goto err;
253 BN_CTX_start(ctx);
254
255 /* A := abs(a) */
256 if (a->neg)
257 {
258 BIGNUM *t;
259 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
260 BN_copy(t, a);
261 t->neg = 0;
262 A = t;
263 }
264 else
265 A = a;
266 A1 = BN_CTX_get(ctx);
267 A1_odd = BN_CTX_get(ctx);
268 check = BN_CTX_get(ctx);
269 if (check == NULL) goto err;
270
271 /* compute A1 := A - 1 */
272 if (!BN_copy(A1, A))
273 goto err;
274 if (!BN_sub_word(A1, 1))
275 goto err;
276 if (BN_is_zero(A1))
277 {
278 ret = 0;
279 goto err;
280 }
281
282 /* write A1 as A1_odd * 2^k */
283 k = 1;
284 while (!BN_is_bit_set(A1, k))
285 k++;
286 if (!BN_rshift(A1_odd, A1, k))
287 goto err;
288
289 /* Montgomery setup for computations mod A */
290 mont = BN_MONT_CTX_new();
291 if (mont == NULL)
292 goto err;
293 if (!BN_MONT_CTX_set(mont, A, ctx))
294 goto err;
295
296 for (i = 0; i < checks; i++)
297 {
298 if (!BN_pseudo_rand_range(check, A1))
299 goto err;
300 if (!BN_add_word(check, 1))
301 goto err;
302 /* now 1 <= check < A */
303
304 j = witness(check, A, A1, A1_odd, k, ctx, mont);
305 if (j == -1) goto err;
306 if (j)
307 {
308 ret=0;
309 goto err;
310 }
311 if (callback != NULL) callback(1,i,cb_arg);
312 }
313 ret=1;
314err:
315 if (ctx != NULL)
316 {
317 BN_CTX_end(ctx);
318 if (ctx_passed == NULL)
319 BN_CTX_free(ctx);
320 }
321 if (mont != NULL)
322 BN_MONT_CTX_free(mont);
323
324 return(ret);
325 }
326
327static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
328 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont)
329 {
330 if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
331 return -1;
332 if (BN_is_one(w))
333 return 0; /* probably prime */
334 if (BN_cmp(w, a1) == 0)
335 return 0; /* w == -1 (mod a), 'a' is probably prime */
336 while (--k)
337 {
338 if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
339 return -1;
340 if (BN_is_one(w))
341 return 1; /* 'a' is composite, otherwise a previous 'w' would
342 * have been == -1 (mod 'a') */
343 if (BN_cmp(w, a1) == 0)
344 return 0; /* w == -1 (mod a), 'a' is probably prime */
345 }
346 /* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
347 * and it is neither -1 nor +1 -- so 'a' cannot be prime */
348 return 1;
349 }
350
351static int probable_prime(BIGNUM *rnd, int bits)
352 {
353 int i;
354 BN_ULONG mods[NUMPRIMES];
355 BN_ULONG delta,d;
356
357again:
358 if (!BN_rand(rnd,bits,1,1)) return(0);
359 /* we now have a random number 'rand' to test. */
360 for (i=1; i<NUMPRIMES; i++)
361 mods[i]=BN_mod_word(rnd,(BN_ULONG)primes[i]);
362 delta=0;
363 loop: for (i=1; i<NUMPRIMES; i++)
364 {
365 /* check that rnd is not a prime and also
366 * that gcd(rnd-1,primes) == 1 (except for 2) */
367 if (((mods[i]+delta)%primes[i]) <= 1)
368 {
369 d=delta;
370 delta+=2;
371 /* perhaps need to check for overflow of
372 * delta (but delta can be up to 2^32)
373 * 21-May-98 eay - added overflow check */
374 if (delta < d) goto again;
375 goto loop;
376 }
377 }
378 if (!BN_add_word(rnd,delta)) return(0);
379 return(1);
380 }
381
382static int probable_prime_dh(BIGNUM *rnd, int bits,
383 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
384 {
385 int i,ret=0;
386 BIGNUM *t1;
387
388 BN_CTX_start(ctx);
389 if ((t1 = BN_CTX_get(ctx)) == NULL) goto err;
390
391 if (!BN_rand(rnd,bits,0,1)) goto err;
392
393 /* we need ((rnd-rem) % add) == 0 */
394
395 if (!BN_mod(t1,rnd,add,ctx)) goto err;
396 if (!BN_sub(rnd,rnd,t1)) goto err;
397 if (rem == NULL)
398 { if (!BN_add_word(rnd,1)) goto err; }
399 else
400 { if (!BN_add(rnd,rnd,rem)) goto err; }
401
402 /* we now have a random number 'rand' to test. */
403
404 loop: for (i=1; i<NUMPRIMES; i++)
405 {
406 /* check that rnd is a prime */
407 if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
408 {
409 if (!BN_add(rnd,rnd,add)) goto err;
410 goto loop;
411 }
412 }
413 ret=1;
414err:
415 BN_CTX_end(ctx);
416 return(ret);
417 }
418
419static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
420 const BIGNUM *rem, BN_CTX *ctx)
421 {
422 int i,ret=0;
423 BIGNUM *t1,*qadd,*q;
424
425 bits--;
426 BN_CTX_start(ctx);
427 t1 = BN_CTX_get(ctx);
428 q = BN_CTX_get(ctx);
429 qadd = BN_CTX_get(ctx);
430 if (qadd == NULL) goto err;
431
432 if (!BN_rshift1(qadd,padd)) goto err;
433
434 if (!BN_rand(q,bits,0,1)) goto err;
435
436 /* we need ((rnd-rem) % add) == 0 */
437 if (!BN_mod(t1,q,qadd,ctx)) goto err;
438 if (!BN_sub(q,q,t1)) goto err;
439 if (rem == NULL)
440 { if (!BN_add_word(q,1)) goto err; }
441 else
442 {
443 if (!BN_rshift1(t1,rem)) goto err;
444 if (!BN_add(q,q,t1)) goto err;
445 }
446
447 /* we now have a random number 'rand' to test. */
448 if (!BN_lshift1(p,q)) goto err;
449 if (!BN_add_word(p,1)) goto err;
450
451 loop: for (i=1; i<NUMPRIMES; i++)
452 {
453 /* check that p and q are prime */
454 /* check that for p and q
455 * gcd(p-1,primes) == 1 (except for 2) */
456 if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
457 (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
458 {
459 if (!BN_add(p,p,padd)) goto err;
460 if (!BN_add(q,q,qadd)) goto err;
461 goto loop;
462 }
463 }
464 ret=1;
465err:
466 BN_CTX_end(ctx);
467 return(ret);
468 }
diff --git a/src/lib/libcrypto/bn/bn_prime.h b/src/lib/libcrypto/bn/bn_prime.h
new file mode 100644
index 0000000000..b7cf9a9bfe
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_prime.h
@@ -0,0 +1,325 @@
1/* Auto generated by bn_prime.pl */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef EIGHT_BIT
60#define NUMPRIMES 2048
61#else
62#define NUMPRIMES 54
63#endif
64static const unsigned int primes[NUMPRIMES]=
65 {
66 2, 3, 5, 7, 11, 13, 17, 19,
67 23, 29, 31, 37, 41, 43, 47, 53,
68 59, 61, 67, 71, 73, 79, 83, 89,
69 97, 101, 103, 107, 109, 113, 127, 131,
70 137, 139, 149, 151, 157, 163, 167, 173,
71 179, 181, 191, 193, 197, 199, 211, 223,
72 227, 229, 233, 239, 241, 251,
73#ifndef EIGHT_BIT
74 257, 263,
75 269, 271, 277, 281, 283, 293, 307, 311,
76 313, 317, 331, 337, 347, 349, 353, 359,
77 367, 373, 379, 383, 389, 397, 401, 409,
78 419, 421, 431, 433, 439, 443, 449, 457,
79 461, 463, 467, 479, 487, 491, 499, 503,
80 509, 521, 523, 541, 547, 557, 563, 569,
81 571, 577, 587, 593, 599, 601, 607, 613,
82 617, 619, 631, 641, 643, 647, 653, 659,
83 661, 673, 677, 683, 691, 701, 709, 719,
84 727, 733, 739, 743, 751, 757, 761, 769,
85 773, 787, 797, 809, 811, 821, 823, 827,
86 829, 839, 853, 857, 859, 863, 877, 881,
87 883, 887, 907, 911, 919, 929, 937, 941,
88 947, 953, 967, 971, 977, 983, 991, 997,
89 1009,1013,1019,1021,1031,1033,1039,1049,
90 1051,1061,1063,1069,1087,1091,1093,1097,
91 1103,1109,1117,1123,1129,1151,1153,1163,
92 1171,1181,1187,1193,1201,1213,1217,1223,
93 1229,1231,1237,1249,1259,1277,1279,1283,
94 1289,1291,1297,1301,1303,1307,1319,1321,
95 1327,1361,1367,1373,1381,1399,1409,1423,
96 1427,1429,1433,1439,1447,1451,1453,1459,
97 1471,1481,1483,1487,1489,1493,1499,1511,
98 1523,1531,1543,1549,1553,1559,1567,1571,
99 1579,1583,1597,1601,1607,1609,1613,1619,
100 1621,1627,1637,1657,1663,1667,1669,1693,
101 1697,1699,1709,1721,1723,1733,1741,1747,
102 1753,1759,1777,1783,1787,1789,1801,1811,
103 1823,1831,1847,1861,1867,1871,1873,1877,
104 1879,1889,1901,1907,1913,1931,1933,1949,
105 1951,1973,1979,1987,1993,1997,1999,2003,
106 2011,2017,2027,2029,2039,2053,2063,2069,
107 2081,2083,2087,2089,2099,2111,2113,2129,
108 2131,2137,2141,2143,2153,2161,2179,2203,
109 2207,2213,2221,2237,2239,2243,2251,2267,
110 2269,2273,2281,2287,2293,2297,2309,2311,
111 2333,2339,2341,2347,2351,2357,2371,2377,
112 2381,2383,2389,2393,2399,2411,2417,2423,
113 2437,2441,2447,2459,2467,2473,2477,2503,
114 2521,2531,2539,2543,2549,2551,2557,2579,
115 2591,2593,2609,2617,2621,2633,2647,2657,
116 2659,2663,2671,2677,2683,2687,2689,2693,
117 2699,2707,2711,2713,2719,2729,2731,2741,
118 2749,2753,2767,2777,2789,2791,2797,2801,
119 2803,2819,2833,2837,2843,2851,2857,2861,
120 2879,2887,2897,2903,2909,2917,2927,2939,
121 2953,2957,2963,2969,2971,2999,3001,3011,
122 3019,3023,3037,3041,3049,3061,3067,3079,
123 3083,3089,3109,3119,3121,3137,3163,3167,
124 3169,3181,3187,3191,3203,3209,3217,3221,
125 3229,3251,3253,3257,3259,3271,3299,3301,
126 3307,3313,3319,3323,3329,3331,3343,3347,
127 3359,3361,3371,3373,3389,3391,3407,3413,
128 3433,3449,3457,3461,3463,3467,3469,3491,
129 3499,3511,3517,3527,3529,3533,3539,3541,
130 3547,3557,3559,3571,3581,3583,3593,3607,
131 3613,3617,3623,3631,3637,3643,3659,3671,
132 3673,3677,3691,3697,3701,3709,3719,3727,
133 3733,3739,3761,3767,3769,3779,3793,3797,
134 3803,3821,3823,3833,3847,3851,3853,3863,
135 3877,3881,3889,3907,3911,3917,3919,3923,
136 3929,3931,3943,3947,3967,3989,4001,4003,
137 4007,4013,4019,4021,4027,4049,4051,4057,
138 4073,4079,4091,4093,4099,4111,4127,4129,
139 4133,4139,4153,4157,4159,4177,4201,4211,
140 4217,4219,4229,4231,4241,4243,4253,4259,
141 4261,4271,4273,4283,4289,4297,4327,4337,
142 4339,4349,4357,4363,4373,4391,4397,4409,
143 4421,4423,4441,4447,4451,4457,4463,4481,
144 4483,4493,4507,4513,4517,4519,4523,4547,
145 4549,4561,4567,4583,4591,4597,4603,4621,
146 4637,4639,4643,4649,4651,4657,4663,4673,
147 4679,4691,4703,4721,4723,4729,4733,4751,
148 4759,4783,4787,4789,4793,4799,4801,4813,
149 4817,4831,4861,4871,4877,4889,4903,4909,
150 4919,4931,4933,4937,4943,4951,4957,4967,
151 4969,4973,4987,4993,4999,5003,5009,5011,
152 5021,5023,5039,5051,5059,5077,5081,5087,
153 5099,5101,5107,5113,5119,5147,5153,5167,
154 5171,5179,5189,5197,5209,5227,5231,5233,
155 5237,5261,5273,5279,5281,5297,5303,5309,
156 5323,5333,5347,5351,5381,5387,5393,5399,
157 5407,5413,5417,5419,5431,5437,5441,5443,
158 5449,5471,5477,5479,5483,5501,5503,5507,
159 5519,5521,5527,5531,5557,5563,5569,5573,
160 5581,5591,5623,5639,5641,5647,5651,5653,
161 5657,5659,5669,5683,5689,5693,5701,5711,
162 5717,5737,5741,5743,5749,5779,5783,5791,
163 5801,5807,5813,5821,5827,5839,5843,5849,
164 5851,5857,5861,5867,5869,5879,5881,5897,
165 5903,5923,5927,5939,5953,5981,5987,6007,
166 6011,6029,6037,6043,6047,6053,6067,6073,
167 6079,6089,6091,6101,6113,6121,6131,6133,
168 6143,6151,6163,6173,6197,6199,6203,6211,
169 6217,6221,6229,6247,6257,6263,6269,6271,
170 6277,6287,6299,6301,6311,6317,6323,6329,
171 6337,6343,6353,6359,6361,6367,6373,6379,
172 6389,6397,6421,6427,6449,6451,6469,6473,
173 6481,6491,6521,6529,6547,6551,6553,6563,
174 6569,6571,6577,6581,6599,6607,6619,6637,
175 6653,6659,6661,6673,6679,6689,6691,6701,
176 6703,6709,6719,6733,6737,6761,6763,6779,
177 6781,6791,6793,6803,6823,6827,6829,6833,
178 6841,6857,6863,6869,6871,6883,6899,6907,
179 6911,6917,6947,6949,6959,6961,6967,6971,
180 6977,6983,6991,6997,7001,7013,7019,7027,
181 7039,7043,7057,7069,7079,7103,7109,7121,
182 7127,7129,7151,7159,7177,7187,7193,7207,
183 7211,7213,7219,7229,7237,7243,7247,7253,
184 7283,7297,7307,7309,7321,7331,7333,7349,
185 7351,7369,7393,7411,7417,7433,7451,7457,
186 7459,7477,7481,7487,7489,7499,7507,7517,
187 7523,7529,7537,7541,7547,7549,7559,7561,
188 7573,7577,7583,7589,7591,7603,7607,7621,
189 7639,7643,7649,7669,7673,7681,7687,7691,
190 7699,7703,7717,7723,7727,7741,7753,7757,
191 7759,7789,7793,7817,7823,7829,7841,7853,
192 7867,7873,7877,7879,7883,7901,7907,7919,
193 7927,7933,7937,7949,7951,7963,7993,8009,
194 8011,8017,8039,8053,8059,8069,8081,8087,
195 8089,8093,8101,8111,8117,8123,8147,8161,
196 8167,8171,8179,8191,8209,8219,8221,8231,
197 8233,8237,8243,8263,8269,8273,8287,8291,
198 8293,8297,8311,8317,8329,8353,8363,8369,
199 8377,8387,8389,8419,8423,8429,8431,8443,
200 8447,8461,8467,8501,8513,8521,8527,8537,
201 8539,8543,8563,8573,8581,8597,8599,8609,
202 8623,8627,8629,8641,8647,8663,8669,8677,
203 8681,8689,8693,8699,8707,8713,8719,8731,
204 8737,8741,8747,8753,8761,8779,8783,8803,
205 8807,8819,8821,8831,8837,8839,8849,8861,
206 8863,8867,8887,8893,8923,8929,8933,8941,
207 8951,8963,8969,8971,8999,9001,9007,9011,
208 9013,9029,9041,9043,9049,9059,9067,9091,
209 9103,9109,9127,9133,9137,9151,9157,9161,
210 9173,9181,9187,9199,9203,9209,9221,9227,
211 9239,9241,9257,9277,9281,9283,9293,9311,
212 9319,9323,9337,9341,9343,9349,9371,9377,
213 9391,9397,9403,9413,9419,9421,9431,9433,
214 9437,9439,9461,9463,9467,9473,9479,9491,
215 9497,9511,9521,9533,9539,9547,9551,9587,
216 9601,9613,9619,9623,9629,9631,9643,9649,
217 9661,9677,9679,9689,9697,9719,9721,9733,
218 9739,9743,9749,9767,9769,9781,9787,9791,
219 9803,9811,9817,9829,9833,9839,9851,9857,
220 9859,9871,9883,9887,9901,9907,9923,9929,
221 9931,9941,9949,9967,9973,10007,10009,10037,
222 10039,10061,10067,10069,10079,10091,10093,10099,
223 10103,10111,10133,10139,10141,10151,10159,10163,
224 10169,10177,10181,10193,10211,10223,10243,10247,
225 10253,10259,10267,10271,10273,10289,10301,10303,
226 10313,10321,10331,10333,10337,10343,10357,10369,
227 10391,10399,10427,10429,10433,10453,10457,10459,
228 10463,10477,10487,10499,10501,10513,10529,10531,
229 10559,10567,10589,10597,10601,10607,10613,10627,
230 10631,10639,10651,10657,10663,10667,10687,10691,
231 10709,10711,10723,10729,10733,10739,10753,10771,
232 10781,10789,10799,10831,10837,10847,10853,10859,
233 10861,10867,10883,10889,10891,10903,10909,10937,
234 10939,10949,10957,10973,10979,10987,10993,11003,
235 11027,11047,11057,11059,11069,11071,11083,11087,
236 11093,11113,11117,11119,11131,11149,11159,11161,
237 11171,11173,11177,11197,11213,11239,11243,11251,
238 11257,11261,11273,11279,11287,11299,11311,11317,
239 11321,11329,11351,11353,11369,11383,11393,11399,
240 11411,11423,11437,11443,11447,11467,11471,11483,
241 11489,11491,11497,11503,11519,11527,11549,11551,
242 11579,11587,11593,11597,11617,11621,11633,11657,
243 11677,11681,11689,11699,11701,11717,11719,11731,
244 11743,11777,11779,11783,11789,11801,11807,11813,
245 11821,11827,11831,11833,11839,11863,11867,11887,
246 11897,11903,11909,11923,11927,11933,11939,11941,
247 11953,11959,11969,11971,11981,11987,12007,12011,
248 12037,12041,12043,12049,12071,12073,12097,12101,
249 12107,12109,12113,12119,12143,12149,12157,12161,
250 12163,12197,12203,12211,12227,12239,12241,12251,
251 12253,12263,12269,12277,12281,12289,12301,12323,
252 12329,12343,12347,12373,12377,12379,12391,12401,
253 12409,12413,12421,12433,12437,12451,12457,12473,
254 12479,12487,12491,12497,12503,12511,12517,12527,
255 12539,12541,12547,12553,12569,12577,12583,12589,
256 12601,12611,12613,12619,12637,12641,12647,12653,
257 12659,12671,12689,12697,12703,12713,12721,12739,
258 12743,12757,12763,12781,12791,12799,12809,12821,
259 12823,12829,12841,12853,12889,12893,12899,12907,
260 12911,12917,12919,12923,12941,12953,12959,12967,
261 12973,12979,12983,13001,13003,13007,13009,13033,
262 13037,13043,13049,13063,13093,13099,13103,13109,
263 13121,13127,13147,13151,13159,13163,13171,13177,
264 13183,13187,13217,13219,13229,13241,13249,13259,
265 13267,13291,13297,13309,13313,13327,13331,13337,
266 13339,13367,13381,13397,13399,13411,13417,13421,
267 13441,13451,13457,13463,13469,13477,13487,13499,
268 13513,13523,13537,13553,13567,13577,13591,13597,
269 13613,13619,13627,13633,13649,13669,13679,13681,
270 13687,13691,13693,13697,13709,13711,13721,13723,
271 13729,13751,13757,13759,13763,13781,13789,13799,
272 13807,13829,13831,13841,13859,13873,13877,13879,
273 13883,13901,13903,13907,13913,13921,13931,13933,
274 13963,13967,13997,13999,14009,14011,14029,14033,
275 14051,14057,14071,14081,14083,14087,14107,14143,
276 14149,14153,14159,14173,14177,14197,14207,14221,
277 14243,14249,14251,14281,14293,14303,14321,14323,
278 14327,14341,14347,14369,14387,14389,14401,14407,
279 14411,14419,14423,14431,14437,14447,14449,14461,
280 14479,14489,14503,14519,14533,14537,14543,14549,
281 14551,14557,14561,14563,14591,14593,14621,14627,
282 14629,14633,14639,14653,14657,14669,14683,14699,
283 14713,14717,14723,14731,14737,14741,14747,14753,
284 14759,14767,14771,14779,14783,14797,14813,14821,
285 14827,14831,14843,14851,14867,14869,14879,14887,
286 14891,14897,14923,14929,14939,14947,14951,14957,
287 14969,14983,15013,15017,15031,15053,15061,15073,
288 15077,15083,15091,15101,15107,15121,15131,15137,
289 15139,15149,15161,15173,15187,15193,15199,15217,
290 15227,15233,15241,15259,15263,15269,15271,15277,
291 15287,15289,15299,15307,15313,15319,15329,15331,
292 15349,15359,15361,15373,15377,15383,15391,15401,
293 15413,15427,15439,15443,15451,15461,15467,15473,
294 15493,15497,15511,15527,15541,15551,15559,15569,
295 15581,15583,15601,15607,15619,15629,15641,15643,
296 15647,15649,15661,15667,15671,15679,15683,15727,
297 15731,15733,15737,15739,15749,15761,15767,15773,
298 15787,15791,15797,15803,15809,15817,15823,15859,
299 15877,15881,15887,15889,15901,15907,15913,15919,
300 15923,15937,15959,15971,15973,15991,16001,16007,
301 16033,16057,16061,16063,16067,16069,16073,16087,
302 16091,16097,16103,16111,16127,16139,16141,16183,
303 16187,16189,16193,16217,16223,16229,16231,16249,
304 16253,16267,16273,16301,16319,16333,16339,16349,
305 16361,16363,16369,16381,16411,16417,16421,16427,
306 16433,16447,16451,16453,16477,16481,16487,16493,
307 16519,16529,16547,16553,16561,16567,16573,16603,
308 16607,16619,16631,16633,16649,16651,16657,16661,
309 16673,16691,16693,16699,16703,16729,16741,16747,
310 16759,16763,16787,16811,16823,16829,16831,16843,
311 16871,16879,16883,16889,16901,16903,16921,16927,
312 16931,16937,16943,16963,16979,16981,16987,16993,
313 17011,17021,17027,17029,17033,17041,17047,17053,
314 17077,17093,17099,17107,17117,17123,17137,17159,
315 17167,17183,17189,17191,17203,17207,17209,17231,
316 17239,17257,17291,17293,17299,17317,17321,17327,
317 17333,17341,17351,17359,17377,17383,17387,17389,
318 17393,17401,17417,17419,17431,17443,17449,17467,
319 17471,17477,17483,17489,17491,17497,17509,17519,
320 17539,17551,17569,17573,17579,17581,17597,17599,
321 17609,17623,17627,17657,17659,17669,17681,17683,
322 17707,17713,17729,17737,17747,17749,17761,17783,
323 17789,17791,17807,17827,17837,17839,17851,17863,
324#endif
325 };
diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl
new file mode 100644
index 0000000000..9fc3765486
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_prime.pl
@@ -0,0 +1,117 @@
1#!/usr/local/bin/perl
2# bn_prime.pl
3
4$num=2048;
5$num=$ARGV[0] if ($#ARGV >= 0);
6
7push(@primes,2);
8$p=1;
9loop: while ($#primes < $num-1)
10 {
11 $p+=2;
12 $s=int(sqrt($p));
13
14 for ($i=0; $primes[$i]<=$s; $i++)
15 {
16 next loop if (($p%$primes[$i]) == 0);
17 }
18 push(@primes,$p);
19 }
20
21# print <<"EOF";
22# /* Auto generated by bn_prime.pl */
23# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au).
24# * All rights reserved.
25# * Copyright remains Eric Young's, and as such any Copyright notices in
26# * the code are not to be removed.
27# * See the COPYRIGHT file in the SSLeay distribution for more details.
28# */
29#
30# EOF
31
32print <<\EOF;
33/* Auto generated by bn_prime.pl */
34/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35 * All rights reserved.
36 *
37 * This package is an SSL implementation written
38 * by Eric Young (eay@cryptsoft.com).
39 * The implementation was written so as to conform with Netscapes SSL.
40 *
41 * This library is free for commercial and non-commercial use as long as
42 * the following conditions are aheared to. The following conditions
43 * apply to all code found in this distribution, be it the RC4, RSA,
44 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
45 * included with this distribution is covered by the same copyright terms
46 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
47 *
48 * Copyright remains Eric Young's, and as such any Copyright notices in
49 * the code are not to be removed.
50 * If this package is used in a product, Eric Young should be given attribution
51 * as the author of the parts of the library used.
52 * This can be in the form of a textual message at program startup or
53 * in documentation (online or textual) provided with the package.
54 *
55 * Redistribution and use in source and binary forms, with or without
56 * modification, are permitted provided that the following conditions
57 * are met:
58 * 1. Redistributions of source code must retain the copyright
59 * notice, this list of conditions and the following disclaimer.
60 * 2. Redistributions in binary form must reproduce the above copyright
61 * notice, this list of conditions and the following disclaimer in the
62 * documentation and/or other materials provided with the distribution.
63 * 3. All advertising materials mentioning features or use of this software
64 * must display the following acknowledgement:
65 * "This product includes cryptographic software written by
66 * Eric Young (eay@cryptsoft.com)"
67 * The word 'cryptographic' can be left out if the rouines from the library
68 * being used are not cryptographic related :-).
69 * 4. If you include any Windows specific code (or a derivative thereof) from
70 * the apps directory (application code) you must include an acknowledgement:
71 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
72 *
73 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * The licence and distribution terms for any publically available version or
86 * derivative of this code cannot be changed. i.e. this code cannot simply be
87 * copied and put under another distribution licence
88 * [including the GNU Public Licence.]
89 */
90
91EOF
92
93for ($i=0; $i <= $#primes; $i++)
94 {
95 if ($primes[$i] > 256)
96 {
97 $eight=$i;
98 last;
99 }
100 }
101
102printf "#ifndef EIGHT_BIT\n";
103printf "#define NUMPRIMES %d\n",$num;
104printf "#else\n";
105printf "#define NUMPRIMES %d\n",$eight;
106printf "#endif\n";
107print "static const unsigned int primes[NUMPRIMES]=\n\t{\n\t";
108$init=0;
109for ($i=0; $i <= $#primes; $i++)
110 {
111 printf "\n#ifndef EIGHT_BIT\n\t" if ($primes[$i] > 256) && !($init++);
112 printf("\n\t") if (($i%8) == 0) && ($i != 0);
113 printf("%4d,",$primes[$i]);
114 }
115print "\n#endif\n\t};\n";
116
117
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
new file mode 100644
index 0000000000..acba7ed7ee
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_print.c
@@ -0,0 +1,333 @@
1/* crypto/bn/bn_print.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <ctype.h>
61#include "cryptlib.h"
62#include <openssl/buffer.h>
63#include "bn_lcl.h"
64
65static const char *Hex="0123456789ABCDEF";
66
67/* Must 'OPENSSL_free' the returned data */
68char *BN_bn2hex(const BIGNUM *a)
69 {
70 int i,j,v,z=0;
71 char *buf;
72 char *p;
73
74 buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2);
75 if (buf == NULL)
76 {
77 BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE);
78 goto err;
79 }
80 p=buf;
81 if (a->neg) *(p++)='-';
82 if (BN_is_zero(a)) *(p++)='0';
83 for (i=a->top-1; i >=0; i--)
84 {
85 for (j=BN_BITS2-8; j >= 0; j-=8)
86 {
87 /* strip leading zeros */
88 v=((int)(a->d[i]>>(long)j))&0xff;
89 if (z || (v != 0))
90 {
91 *(p++)=Hex[v>>4];
92 *(p++)=Hex[v&0x0f];
93 z=1;
94 }
95 }
96 }
97 *p='\0';
98err:
99 return(buf);
100 }
101
102/* Must 'OPENSSL_free' the returned data */
103char *BN_bn2dec(const BIGNUM *a)
104 {
105 int i=0,num;
106 char *buf=NULL;
107 char *p;
108 BIGNUM *t=NULL;
109 BN_ULONG *bn_data=NULL,*lp;
110
111 i=BN_num_bits(a)*3;
112 num=(i/10+i/1000+3)+1;
113 bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
114 buf=(char *)OPENSSL_malloc(num+3);
115 if ((buf == NULL) || (bn_data == NULL))
116 {
117 BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE);
118 goto err;
119 }
120 if ((t=BN_dup(a)) == NULL) goto err;
121
122#define BUF_REMAIN (num+3 - (size_t)(p - buf))
123 p=buf;
124 lp=bn_data;
125 if (t->neg) *(p++)='-';
126 if (BN_is_zero(t))
127 {
128 *(p++)='0';
129 *(p++)='\0';
130 }
131 else
132 {
133 i=0;
134 while (!BN_is_zero(t))
135 {
136 *lp=BN_div_word(t,BN_DEC_CONV);
137 lp++;
138 }
139 lp--;
140 /* We now have a series of blocks, BN_DEC_NUM chars
141 * in length, where the last one needs truncation.
142 * The blocks need to be reversed in order. */
143 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT1,*lp);
144 while (*p) p++;
145 while (lp != bn_data)
146 {
147 lp--;
148 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT2,*lp);
149 while (*p) p++;
150 }
151 }
152err:
153 if (bn_data != NULL) OPENSSL_free(bn_data);
154 if (t != NULL) BN_free(t);
155 return(buf);
156 }
157
158int BN_hex2bn(BIGNUM **bn, const char *a)
159 {
160 BIGNUM *ret=NULL;
161 BN_ULONG l=0;
162 int neg=0,h,m,i,j,k,c;
163 int num;
164
165 if ((a == NULL) || (*a == '\0')) return(0);
166
167 if (*a == '-') { neg=1; a++; }
168
169 for (i=0; isxdigit((unsigned char) a[i]); i++)
170 ;
171
172 num=i+neg;
173 if (bn == NULL) return(num);
174
175 /* a is the start of the hex digits, and it is 'i' long */
176 if (*bn == NULL)
177 {
178 if ((ret=BN_new()) == NULL) return(0);
179 }
180 else
181 {
182 ret= *bn;
183 BN_zero(ret);
184 }
185
186 /* i is the number of hex digests; */
187 if (bn_expand(ret,i*4) == NULL) goto err;
188
189 j=i; /* least significant 'hex' */
190 m=0;
191 h=0;
192 while (j > 0)
193 {
194 m=((BN_BYTES*2) <= j)?(BN_BYTES*2):j;
195 l=0;
196 for (;;)
197 {
198 c=a[j-m];
199 if ((c >= '0') && (c <= '9')) k=c-'0';
200 else if ((c >= 'a') && (c <= 'f')) k=c-'a'+10;
201 else if ((c >= 'A') && (c <= 'F')) k=c-'A'+10;
202 else k=0; /* paranoia */
203 l=(l<<4)|k;
204
205 if (--m <= 0)
206 {
207 ret->d[h++]=l;
208 break;
209 }
210 }
211 j-=(BN_BYTES*2);
212 }
213 ret->top=h;
214 bn_fix_top(ret);
215 ret->neg=neg;
216
217 *bn=ret;
218 return(num);
219err:
220 if (*bn == NULL) BN_free(ret);
221 return(0);
222 }
223
224int BN_dec2bn(BIGNUM **bn, const char *a)
225 {
226 BIGNUM *ret=NULL;
227 BN_ULONG l=0;
228 int neg=0,i,j;
229 int num;
230
231 if ((a == NULL) || (*a == '\0')) return(0);
232 if (*a == '-') { neg=1; a++; }
233
234 for (i=0; isdigit((unsigned char) a[i]); i++)
235 ;
236
237 num=i+neg;
238 if (bn == NULL) return(num);
239
240 /* a is the start of the digits, and it is 'i' long.
241 * We chop it into BN_DEC_NUM digits at a time */
242 if (*bn == NULL)
243 {
244 if ((ret=BN_new()) == NULL) return(0);
245 }
246 else
247 {
248 ret= *bn;
249 BN_zero(ret);
250 }
251
252 /* i is the number of digests, a bit of an over expand; */
253 if (bn_expand(ret,i*4) == NULL) goto err;
254
255 j=BN_DEC_NUM-(i%BN_DEC_NUM);
256 if (j == BN_DEC_NUM) j=0;
257 l=0;
258 while (*a)
259 {
260 l*=10;
261 l+= *a-'0';
262 a++;
263 if (++j == BN_DEC_NUM)
264 {
265 BN_mul_word(ret,BN_DEC_CONV);
266 BN_add_word(ret,l);
267 l=0;
268 j=0;
269 }
270 }
271 ret->neg=neg;
272
273 bn_fix_top(ret);
274 *bn=ret;
275 return(num);
276err:
277 if (*bn == NULL) BN_free(ret);
278 return(0);
279 }
280
281#ifndef OPENSSL_NO_BIO
282#ifndef OPENSSL_NO_FP_API
283int BN_print_fp(FILE *fp, const BIGNUM *a)
284 {
285 BIO *b;
286 int ret;
287
288 if ((b=BIO_new(BIO_s_file())) == NULL)
289 return(0);
290 BIO_set_fp(b,fp,BIO_NOCLOSE);
291 ret=BN_print(b,a);
292 BIO_free(b);
293 return(ret);
294 }
295#endif
296
297int BN_print(BIO *bp, const BIGNUM *a)
298 {
299 int i,j,v,z=0;
300 int ret=0;
301
302 if ((a->neg) && (BIO_write(bp,"-",1) != 1)) goto end;
303 if ((BN_is_zero(a)) && (BIO_write(bp,"0",1) != 1)) goto end;
304 for (i=a->top-1; i >=0; i--)
305 {
306 for (j=BN_BITS2-4; j >= 0; j-=4)
307 {
308 /* strip leading zeros */
309 v=((int)(a->d[i]>>(long)j))&0x0f;
310 if (z || (v != 0))
311 {
312 if (BIO_write(bp,&(Hex[v]),1) != 1)
313 goto end;
314 z=1;
315 }
316 }
317 }
318 ret=1;
319end:
320 return(ret);
321 }
322#endif
323
324#ifdef BN_DEBUG
325void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n)
326 {
327 int i;
328 fprintf(o, "%s=", a);
329 for (i=n-1;i>=0;i--)
330 fprintf(o, "%08lX", b[i]); /* assumes 32-bit BN_ULONG */
331 fprintf(o, "\n");
332 }
333#endif
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
new file mode 100644
index 0000000000..893c9d2af9
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_rand.c
@@ -0,0 +1,291 @@
1/* crypto/bn/bn_rand.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
119 {
120 unsigned char *buf=NULL;
121 int ret=0,bit,bytes,mask;
122 time_t tim;
123
124 if (bits == 0)
125 {
126 BN_zero(rnd);
127 return 1;
128 }
129
130 bytes=(bits+7)/8;
131 bit=(bits-1)%8;
132 mask=0xff<<(bit+1);
133
134 buf=(unsigned char *)OPENSSL_malloc(bytes);
135 if (buf == NULL)
136 {
137 BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE);
138 goto err;
139 }
140
141 /* make a random number and set the top and bottom bits */
142 time(&tim);
143 RAND_add(&tim,sizeof(tim),0);
144
145 if (pseudorand)
146 {
147 if (RAND_pseudo_bytes(buf, bytes) == -1)
148 goto err;
149 }
150 else
151 {
152 if (RAND_bytes(buf, bytes) <= 0)
153 goto err;
154 }
155
156#if 1
157 if (pseudorand == 2)
158 {
159 /* generate patterns that are more likely to trigger BN
160 library bugs */
161 int i;
162 unsigned char c;
163
164 for (i = 0; i < bytes; i++)
165 {
166 RAND_pseudo_bytes(&c, 1);
167 if (c >= 128 && i > 0)
168 buf[i] = buf[i-1];
169 else if (c < 42)
170 buf[i] = 0;
171 else if (c < 84)
172 buf[i] = 255;
173 }
174 }
175#endif
176
177 if (top != -1)
178 {
179 if (top)
180 {
181 if (bit == 0)
182 {
183 buf[0]=1;
184 buf[1]|=0x80;
185 }
186 else
187 {
188 buf[0]|=(3<<(bit-1));
189 }
190 }
191 else
192 {
193 buf[0]|=(1<<bit);
194 }
195 }
196 buf[0] &= ~mask;
197 if (bottom) /* set bottom bit if requested */
198 buf[bytes-1]|=1;
199 if (!BN_bin2bn(buf,bytes,rnd)) goto err;
200 ret=1;
201err:
202 if (buf != NULL)
203 {
204 OPENSSL_cleanse(buf,bytes);
205 OPENSSL_free(buf);
206 }
207 return(ret);
208 }
209
210int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
211 {
212 return bnrand(0, rnd, bits, top, bottom);
213 }
214
215int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
216 {
217 return bnrand(1, rnd, bits, top, bottom);
218 }
219
220#if 1
221int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
222 {
223 return bnrand(2, rnd, bits, top, bottom);
224 }
225#endif
226
227
228/* random number r: 0 <= r < range */
229static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range)
230 {
231 int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
232 int n;
233
234 if (range->neg || BN_is_zero(range))
235 {
236 BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
237 return 0;
238 }
239
240 n = BN_num_bits(range); /* n > 0 */
241
242 /* BN_is_bit_set(range, n - 1) always holds */
243
244 if (n == 1)
245 {
246 if (!BN_zero(r)) return 0;
247 }
248 else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3))
249 {
250 /* range = 100..._2,
251 * so 3*range (= 11..._2) is exactly one bit longer than range */
252 do
253 {
254 if (!bn_rand(r, n + 1, -1, 0)) return 0;
255 /* If r < 3*range, use r := r MOD range
256 * (which is either r, r - range, or r - 2*range).
257 * Otherwise, iterate once more.
258 * Since 3*range = 11..._2, each iteration succeeds with
259 * probability >= .75. */
260 if (BN_cmp(r ,range) >= 0)
261 {
262 if (!BN_sub(r, r, range)) return 0;
263 if (BN_cmp(r, range) >= 0)
264 if (!BN_sub(r, r, range)) return 0;
265 }
266 }
267 while (BN_cmp(r, range) >= 0);
268 }
269 else
270 {
271 do
272 {
273 /* range = 11..._2 or range = 101..._2 */
274 if (!bn_rand(r, n, -1, 0)) return 0;
275 }
276 while (BN_cmp(r, range) >= 0);
277 }
278
279 return 1;
280 }
281
282
283int BN_rand_range(BIGNUM *r, BIGNUM *range)
284 {
285 return bn_rand_range(0, r, range);
286 }
287
288int BN_pseudo_rand_range(BIGNUM *r, BIGNUM *range)
289 {
290 return bn_rand_range(1, r, range);
291 }
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
new file mode 100644
index 0000000000..ef5fdd4708
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_recp.c
@@ -0,0 +1,230 @@
1/* crypto/bn/bn_recp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63void BN_RECP_CTX_init(BN_RECP_CTX *recp)
64 {
65 BN_init(&(recp->N));
66 BN_init(&(recp->Nr));
67 recp->num_bits=0;
68 recp->flags=0;
69 }
70
71BN_RECP_CTX *BN_RECP_CTX_new(void)
72 {
73 BN_RECP_CTX *ret;
74
75 if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
76 return(NULL);
77
78 BN_RECP_CTX_init(ret);
79 ret->flags=BN_FLG_MALLOCED;
80 return(ret);
81 }
82
83void BN_RECP_CTX_free(BN_RECP_CTX *recp)
84 {
85 if(recp == NULL)
86 return;
87
88 BN_free(&(recp->N));
89 BN_free(&(recp->Nr));
90 if (recp->flags & BN_FLG_MALLOCED)
91 OPENSSL_free(recp);
92 }
93
94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
95 {
96 if (!BN_copy(&(recp->N),d)) return 0;
97 if (!BN_zero(&(recp->Nr))) return 0;
98 recp->num_bits=BN_num_bits(d);
99 recp->shift=0;
100 return(1);
101 }
102
103int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
104 BN_RECP_CTX *recp, BN_CTX *ctx)
105 {
106 int ret=0;
107 BIGNUM *a;
108 const BIGNUM *ca;
109
110 BN_CTX_start(ctx);
111 if ((a = BN_CTX_get(ctx)) == NULL) goto err;
112 if (y != NULL)
113 {
114 if (x == y)
115 { if (!BN_sqr(a,x,ctx)) goto err; }
116 else
117 { if (!BN_mul(a,x,y,ctx)) goto err; }
118 ca = a;
119 }
120 else
121 ca=x; /* Just do the mod */
122
123 ret = BN_div_recp(NULL,r,ca,recp,ctx);
124err:
125 BN_CTX_end(ctx);
126 return(ret);
127 }
128
129int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
130 BN_RECP_CTX *recp, BN_CTX *ctx)
131 {
132 int i,j,ret=0;
133 BIGNUM *a,*b,*d,*r;
134
135 BN_CTX_start(ctx);
136 a=BN_CTX_get(ctx);
137 b=BN_CTX_get(ctx);
138 if (dv != NULL)
139 d=dv;
140 else
141 d=BN_CTX_get(ctx);
142 if (rem != NULL)
143 r=rem;
144 else
145 r=BN_CTX_get(ctx);
146 if (a == NULL || b == NULL || d == NULL || r == NULL) goto err;
147
148 if (BN_ucmp(m,&(recp->N)) < 0)
149 {
150 if (!BN_zero(d)) return 0;
151 if (!BN_copy(r,m)) return 0;
152 BN_CTX_end(ctx);
153 return(1);
154 }
155
156 /* We want the remainder
157 * Given input of ABCDEF / ab
158 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
159 *
160 */
161
162 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
163 i=BN_num_bits(m);
164 j=recp->num_bits<<1;
165 if (j>i) i=j;
166
167 /* Nr := round(2^i / N) */
168 if (i != recp->shift)
169 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
170 i,ctx); /* BN_reciprocal returns i, or -1 for an error */
171 if (recp->shift == -1) goto err;
172
173 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
174 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
175 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
176 * = |m/N|
177 */
178 if (!BN_rshift(a,m,recp->num_bits)) goto err;
179 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
180 if (!BN_rshift(d,b,i-recp->num_bits)) goto err;
181 d->neg=0;
182
183 if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
184 if (!BN_usub(r,m,b)) goto err;
185 r->neg=0;
186
187#if 1
188 j=0;
189 while (BN_ucmp(r,&(recp->N)) >= 0)
190 {
191 if (j++ > 2)
192 {
193 BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL);
194 goto err;
195 }
196 if (!BN_usub(r,r,&(recp->N))) goto err;
197 if (!BN_add_word(d,1)) goto err;
198 }
199#endif
200
201 r->neg=BN_is_zero(r)?0:m->neg;
202 d->neg=m->neg^recp->N.neg;
203 ret=1;
204err:
205 BN_CTX_end(ctx);
206 return(ret);
207 }
208
209/* len is the expected size of the result
210 * We actually calculate with an extra word of precision, so
211 * we can do faster division if the remainder is not required.
212 */
213/* r := 2^len / m */
214int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
215 {
216 int ret= -1;
217 BIGNUM t;
218
219 BN_init(&t);
220
221 if (!BN_zero(&t)) goto err;
222 if (!BN_set_bit(&t,len)) goto err;
223
224 if (!BN_div(r,NULL,&t,m,ctx)) goto err;
225
226 ret=len;
227err:
228 BN_free(&t);
229 return(ret);
230 }
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
new file mode 100644
index 0000000000..70f785ea18
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_shift.c
@@ -0,0 +1,205 @@
1/* crypto/bn/bn_shift.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_lshift1(BIGNUM *r, const BIGNUM *a)
64 {
65 register BN_ULONG *ap,*rp,t,c;
66 int i;
67
68 if (r != a)
69 {
70 r->neg=a->neg;
71 if (bn_wexpand(r,a->top+1) == NULL) return(0);
72 r->top=a->top;
73 }
74 else
75 {
76 if (bn_wexpand(r,a->top+1) == NULL) return(0);
77 }
78 ap=a->d;
79 rp=r->d;
80 c=0;
81 for (i=0; i<a->top; i++)
82 {
83 t= *(ap++);
84 *(rp++)=((t<<1)|c)&BN_MASK2;
85 c=(t & BN_TBIT)?1:0;
86 }
87 if (c)
88 {
89 *rp=1;
90 r->top++;
91 }
92 return(1);
93 }
94
95int BN_rshift1(BIGNUM *r, const BIGNUM *a)
96 {
97 BN_ULONG *ap,*rp,t,c;
98 int i;
99
100 if (BN_is_zero(a))
101 {
102 BN_zero(r);
103 return(1);
104 }
105 if (a != r)
106 {
107 if (bn_wexpand(r,a->top) == NULL) return(0);
108 r->top=a->top;
109 r->neg=a->neg;
110 }
111 ap=a->d;
112 rp=r->d;
113 c=0;
114 for (i=a->top-1; i>=0; i--)
115 {
116 t=ap[i];
117 rp[i]=((t>>1)&BN_MASK2)|c;
118 c=(t&1)?BN_TBIT:0;
119 }
120 bn_fix_top(r);
121 return(1);
122 }
123
124int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
125 {
126 int i,nw,lb,rb;
127 BN_ULONG *t,*f;
128 BN_ULONG l;
129
130 r->neg=a->neg;
131 nw=n/BN_BITS2;
132 if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
133 lb=n%BN_BITS2;
134 rb=BN_BITS2-lb;
135 f=a->d;
136 t=r->d;
137 t[a->top+nw]=0;
138 if (lb == 0)
139 for (i=a->top-1; i>=0; i--)
140 t[nw+i]=f[i];
141 else
142 for (i=a->top-1; i>=0; i--)
143 {
144 l=f[i];
145 t[nw+i+1]|=(l>>rb)&BN_MASK2;
146 t[nw+i]=(l<<lb)&BN_MASK2;
147 }
148 memset(t,0,nw*sizeof(t[0]));
149/* for (i=0; i<nw; i++)
150 t[i]=0;*/
151 r->top=a->top+nw+1;
152 bn_fix_top(r);
153 return(1);
154 }
155
156int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
157 {
158 int i,j,nw,lb,rb;
159 BN_ULONG *t,*f;
160 BN_ULONG l,tmp;
161
162 nw=n/BN_BITS2;
163 rb=n%BN_BITS2;
164 lb=BN_BITS2-rb;
165 if (nw > a->top || a->top == 0)
166 {
167 BN_zero(r);
168 return(1);
169 }
170 if (r != a)
171 {
172 r->neg=a->neg;
173 if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
174 }
175 else
176 {
177 if (n == 0)
178 return 1; /* or the copying loop will go berserk */
179 }
180
181 f= &(a->d[nw]);
182 t=r->d;
183 j=a->top-nw;
184 r->top=j;
185
186 if (rb == 0)
187 {
188 for (i=j+1; i > 0; i--)
189 *(t++)= *(f++);
190 }
191 else
192 {
193 l= *(f++);
194 for (i=1; i<j; i++)
195 {
196 tmp =(l>>rb)&BN_MASK2;
197 l= *(f++);
198 *(t++) =(tmp|(l<<lb))&BN_MASK2;
199 }
200 *(t++) =(l>>rb)&BN_MASK2;
201 }
202 *t=0;
203 bn_fix_top(r);
204 return(1);
205 }
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
new file mode 100644
index 0000000000..c1d0cca438
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -0,0 +1,288 @@
1/* crypto/bn/bn_sqr.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r must not be a */
64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
65int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
66 {
67 int max,al;
68 int ret = 0;
69 BIGNUM *tmp,*rr;
70
71#ifdef BN_COUNT
72 fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top);
73#endif
74 bn_check_top(a);
75
76 al=a->top;
77 if (al <= 0)
78 {
79 r->top=0;
80 return(1);
81 }
82
83 BN_CTX_start(ctx);
84 rr=(a != r) ? r : BN_CTX_get(ctx);
85 tmp=BN_CTX_get(ctx);
86 if (tmp == NULL) goto err;
87
88 max=(al+al);
89 if (bn_wexpand(rr,max+1) == NULL) goto err;
90
91 if (al == 4)
92 {
93#ifndef BN_SQR_COMBA
94 BN_ULONG t[8];
95 bn_sqr_normal(rr->d,a->d,4,t);
96#else
97 bn_sqr_comba4(rr->d,a->d);
98#endif
99 }
100 else if (al == 8)
101 {
102#ifndef BN_SQR_COMBA
103 BN_ULONG t[16];
104 bn_sqr_normal(rr->d,a->d,8,t);
105#else
106 bn_sqr_comba8(rr->d,a->d);
107#endif
108 }
109 else
110 {
111#if defined(BN_RECURSION)
112 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
113 {
114 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
115 bn_sqr_normal(rr->d,a->d,al,t);
116 }
117 else
118 {
119 int j,k;
120
121 j=BN_num_bits_word((BN_ULONG)al);
122 j=1<<(j-1);
123 k=j+j;
124 if (al == j)
125 {
126 if (bn_wexpand(tmp,k*2) == NULL) goto err;
127 bn_sqr_recursive(rr->d,a->d,al,tmp->d);
128 }
129 else
130 {
131 if (bn_wexpand(tmp,max) == NULL) goto err;
132 bn_sqr_normal(rr->d,a->d,al,tmp->d);
133 }
134 }
135#else
136 if (bn_wexpand(tmp,max) == NULL) goto err;
137 bn_sqr_normal(rr->d,a->d,al,tmp->d);
138#endif
139 }
140
141 rr->top=max;
142 rr->neg=0;
143 if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
144 if (rr != r) BN_copy(r,rr);
145 ret = 1;
146 err:
147 BN_CTX_end(ctx);
148 return(ret);
149 }
150
151/* tmp must have 2*n words */
152void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
153 {
154 int i,j,max;
155 const BN_ULONG *ap;
156 BN_ULONG *rp;
157
158 max=n*2;
159 ap=a;
160 rp=r;
161 rp[0]=rp[max-1]=0;
162 rp++;
163 j=n;
164
165 if (--j > 0)
166 {
167 ap++;
168 rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
169 rp+=2;
170 }
171
172 for (i=n-2; i>0; i--)
173 {
174 j--;
175 ap++;
176 rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
177 rp+=2;
178 }
179
180 bn_add_words(r,r,r,max);
181
182 /* There will not be a carry */
183
184 bn_sqr_words(tmp,a,n);
185
186 bn_add_words(r,r,tmp,max);
187 }
188
189#ifdef BN_RECURSION
190/* r is 2*n words in size,
191 * a and b are both n words in size. (There's not actually a 'b' here ...)
192 * n must be a power of 2.
193 * We multiply and return the result.
194 * t must be 2*n words in size
195 * We calculate
196 * a[0]*b[0]
197 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
198 * a[1]*b[1]
199 */
200void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
201 {
202 int n=n2/2;
203 int zero,c1;
204 BN_ULONG ln,lo,*p;
205
206#ifdef BN_COUNT
207 fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2);
208#endif
209 if (n2 == 4)
210 {
211#ifndef BN_SQR_COMBA
212 bn_sqr_normal(r,a,4,t);
213#else
214 bn_sqr_comba4(r,a);
215#endif
216 return;
217 }
218 else if (n2 == 8)
219 {
220#ifndef BN_SQR_COMBA
221 bn_sqr_normal(r,a,8,t);
222#else
223 bn_sqr_comba8(r,a);
224#endif
225 return;
226 }
227 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
228 {
229 bn_sqr_normal(r,a,n2,t);
230 return;
231 }
232 /* r=(a[0]-a[1])*(a[1]-a[0]) */
233 c1=bn_cmp_words(a,&(a[n]),n);
234 zero=0;
235 if (c1 > 0)
236 bn_sub_words(t,a,&(a[n]),n);
237 else if (c1 < 0)
238 bn_sub_words(t,&(a[n]),a,n);
239 else
240 zero=1;
241
242 /* The result will always be negative unless it is zero */
243 p= &(t[n2*2]);
244
245 if (!zero)
246 bn_sqr_recursive(&(t[n2]),t,n,p);
247 else
248 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
249 bn_sqr_recursive(r,a,n,p);
250 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
251
252 /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
253 * r[10] holds (a[0]*b[0])
254 * r[32] holds (b[1]*b[1])
255 */
256
257 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
258
259 /* t[32] is negative */
260 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
261
262 /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
263 * r[10] holds (a[0]*a[0])
264 * r[32] holds (a[1]*a[1])
265 * c1 holds the carry bits
266 */
267 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
268 if (c1)
269 {
270 p= &(r[n+n2]);
271 lo= *p;
272 ln=(lo+c1)&BN_MASK2;
273 *p=ln;
274
275 /* The overflow will stop before we over write
276 * words we should not overwrite */
277 if (ln < (BN_ULONG)c1)
278 {
279 do {
280 p++;
281 lo= *p;
282 ln=(lo+1)&BN_MASK2;
283 *p=ln;
284 } while (ln == 0);
285 }
286 }
287 }
288#endif
diff --git a/src/lib/libcrypto/bn/bn_sqrt.c b/src/lib/libcrypto/bn/bn_sqrt.c
new file mode 100644
index 0000000000..e2a1105dc8
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_sqrt.c
@@ -0,0 +1,387 @@
1/* crypto/bn/bn_mod.c */
2/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * and Bodo Moeller for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#include "bn_lcl.h"
60
61
62BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
63/* Returns 'ret' such that
64 * ret^2 == a (mod p),
65 * using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course
66 * in Algebraic Computational Number Theory", algorithm 1.5.1).
67 * 'p' must be prime!
68 * If 'a' is not a square, this is not necessarily detected by
69 * the algorithms; a bogus result must be expected in this case.
70 */
71 {
72 BIGNUM *ret = in;
73 int err = 1;
74 int r;
75 BIGNUM *b, *q, *t, *x, *y;
76 int e, i, j;
77
78 if (!BN_is_odd(p) || BN_abs_is_word(p, 1))
79 {
80 if (BN_abs_is_word(p, 2))
81 {
82 if (ret == NULL)
83 ret = BN_new();
84 if (ret == NULL)
85 goto end;
86 if (!BN_set_word(ret, BN_is_bit_set(a, 0)))
87 {
88 BN_free(ret);
89 return NULL;
90 }
91 return ret;
92 }
93
94 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
95 return(NULL);
96 }
97
98 if (BN_is_zero(a) || BN_is_one(a))
99 {
100 if (ret == NULL)
101 ret = BN_new();
102 if (ret == NULL)
103 goto end;
104 if (!BN_set_word(ret, BN_is_one(a)))
105 {
106 BN_free(ret);
107 return NULL;
108 }
109 return ret;
110 }
111
112#if 0 /* if BN_mod_sqrt is used with correct input, this just wastes time */
113 r = BN_kronecker(a, p, ctx);
114 if (r < -1) return NULL;
115 if (r == -1)
116 {
117 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
118 return(NULL);
119 }
120#endif
121
122 BN_CTX_start(ctx);
123 b = BN_CTX_get(ctx);
124 q = BN_CTX_get(ctx);
125 t = BN_CTX_get(ctx);
126 x = BN_CTX_get(ctx);
127 y = BN_CTX_get(ctx);
128 if (y == NULL) goto end;
129
130 if (ret == NULL)
131 ret = BN_new();
132 if (ret == NULL) goto end;
133
134 /* now write |p| - 1 as 2^e*q where q is odd */
135 e = 1;
136 while (!BN_is_bit_set(p, e))
137 e++;
138 /* we'll set q later (if needed) */
139
140 if (e == 1)
141 {
142 /* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
143 * modulo (|p|-1)/2, and square roots can be computed
144 * directly by modular exponentiation.
145 * We have
146 * 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
147 * so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
148 */
149 if (!BN_rshift(q, p, 2)) goto end;
150 q->neg = 0;
151 if (!BN_add_word(q, 1)) goto end;
152 if (!BN_mod_exp(ret, a, q, p, ctx)) goto end;
153 err = 0;
154 goto end;
155 }
156
157 if (e == 2)
158 {
159 /* |p| == 5 (mod 8)
160 *
161 * In this case 2 is always a non-square since
162 * Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
163 * So if a really is a square, then 2*a is a non-square.
164 * Thus for
165 * b := (2*a)^((|p|-5)/8),
166 * i := (2*a)*b^2
167 * we have
168 * i^2 = (2*a)^((1 + (|p|-5)/4)*2)
169 * = (2*a)^((p-1)/2)
170 * = -1;
171 * so if we set
172 * x := a*b*(i-1),
173 * then
174 * x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
175 * = a^2 * b^2 * (-2*i)
176 * = a*(-i)*(2*a*b^2)
177 * = a*(-i)*i
178 * = a.
179 *
180 * (This is due to A.O.L. Atkin,
181 * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
182 * November 1992.)
183 */
184
185 /* make sure that a is reduced modulo p */
186 if (a->neg || BN_ucmp(a, p) >= 0)
187 {
188 if (!BN_nnmod(x, a, p, ctx)) goto end;
189 a = x; /* use x as temporary variable */
190 }
191
192 /* t := 2*a */
193 if (!BN_mod_lshift1_quick(t, a, p)) goto end;
194
195 /* b := (2*a)^((|p|-5)/8) */
196 if (!BN_rshift(q, p, 3)) goto end;
197 q->neg = 0;
198 if (!BN_mod_exp(b, t, q, p, ctx)) goto end;
199
200 /* y := b^2 */
201 if (!BN_mod_sqr(y, b, p, ctx)) goto end;
202
203 /* t := (2*a)*b^2 - 1*/
204 if (!BN_mod_mul(t, t, y, p, ctx)) goto end;
205 if (!BN_sub_word(t, 1)) goto end;
206
207 /* x = a*b*t */
208 if (!BN_mod_mul(x, a, b, p, ctx)) goto end;
209 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
210
211 if (!BN_copy(ret, x)) goto end;
212 err = 0;
213 goto end;
214 }
215
216 /* e > 2, so we really have to use the Tonelli/Shanks algorithm.
217 * First, find some y that is not a square. */
218 if (!BN_copy(q, p)) goto end; /* use 'q' as temp */
219 q->neg = 0;
220 i = 2;
221 do
222 {
223 /* For efficiency, try small numbers first;
224 * if this fails, try random numbers.
225 */
226 if (i < 22)
227 {
228 if (!BN_set_word(y, i)) goto end;
229 }
230 else
231 {
232 if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) goto end;
233 if (BN_ucmp(y, p) >= 0)
234 {
235 if (!(p->neg ? BN_add : BN_sub)(y, y, p)) goto end;
236 }
237 /* now 0 <= y < |p| */
238 if (BN_is_zero(y))
239 if (!BN_set_word(y, i)) goto end;
240 }
241
242 r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
243 if (r < -1) goto end;
244 if (r == 0)
245 {
246 /* m divides p */
247 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
248 goto end;
249 }
250 }
251 while (r == 1 && ++i < 82);
252
253 if (r != -1)
254 {
255 /* Many rounds and still no non-square -- this is more likely
256 * a bug than just bad luck.
257 * Even if p is not prime, we should have found some y
258 * such that r == -1.
259 */
260 BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
261 goto end;
262 }
263
264 /* Here's our actual 'q': */
265 if (!BN_rshift(q, q, e)) goto end;
266
267 /* Now that we have some non-square, we can find an element
268 * of order 2^e by computing its q'th power. */
269 if (!BN_mod_exp(y, y, q, p, ctx)) goto end;
270 if (BN_is_one(y))
271 {
272 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
273 goto end;
274 }
275
276 /* Now we know that (if p is indeed prime) there is an integer
277 * k, 0 <= k < 2^e, such that
278 *
279 * a^q * y^k == 1 (mod p).
280 *
281 * As a^q is a square and y is not, k must be even.
282 * q+1 is even, too, so there is an element
283 *
284 * X := a^((q+1)/2) * y^(k/2),
285 *
286 * and it satisfies
287 *
288 * X^2 = a^q * a * y^k
289 * = a,
290 *
291 * so it is the square root that we are looking for.
292 */
293
294 /* t := (q-1)/2 (note that q is odd) */
295 if (!BN_rshift1(t, q)) goto end;
296
297 /* x := a^((q-1)/2) */
298 if (BN_is_zero(t)) /* special case: p = 2^e + 1 */
299 {
300 if (!BN_nnmod(t, a, p, ctx)) goto end;
301 if (BN_is_zero(t))
302 {
303 /* special case: a == 0 (mod p) */
304 if (!BN_zero(ret)) goto end;
305 err = 0;
306 goto end;
307 }
308 else
309 if (!BN_one(x)) goto end;
310 }
311 else
312 {
313 if (!BN_mod_exp(x, a, t, p, ctx)) goto end;
314 if (BN_is_zero(x))
315 {
316 /* special case: a == 0 (mod p) */
317 if (!BN_zero(ret)) goto end;
318 err = 0;
319 goto end;
320 }
321 }
322
323 /* b := a*x^2 (= a^q) */
324 if (!BN_mod_sqr(b, x, p, ctx)) goto end;
325 if (!BN_mod_mul(b, b, a, p, ctx)) goto end;
326
327 /* x := a*x (= a^((q+1)/2)) */
328 if (!BN_mod_mul(x, x, a, p, ctx)) goto end;
329
330 while (1)
331 {
332 /* Now b is a^q * y^k for some even k (0 <= k < 2^E
333 * where E refers to the original value of e, which we
334 * don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
335 *
336 * We have a*b = x^2,
337 * y^2^(e-1) = -1,
338 * b^2^(e-1) = 1.
339 */
340
341 if (BN_is_one(b))
342 {
343 if (!BN_copy(ret, x)) goto end;
344 err = 0;
345 goto end;
346 }
347
348
349 /* find smallest i such that b^(2^i) = 1 */
350 i = 1;
351 if (!BN_mod_sqr(t, b, p, ctx)) goto end;
352 while (!BN_is_one(t))
353 {
354 i++;
355 if (i == e)
356 {
357 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
358 goto end;
359 }
360 if (!BN_mod_mul(t, t, t, p, ctx)) goto end;
361 }
362
363
364 /* t := y^2^(e - i - 1) */
365 if (!BN_copy(t, y)) goto end;
366 for (j = e - i - 1; j > 0; j--)
367 {
368 if (!BN_mod_sqr(t, t, p, ctx)) goto end;
369 }
370 if (!BN_mod_mul(y, t, t, p, ctx)) goto end;
371 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
372 if (!BN_mod_mul(b, b, y, p, ctx)) goto end;
373 e = i;
374 }
375
376 end:
377 if (err)
378 {
379 if (ret != NULL && ret != in)
380 {
381 BN_clear_free(ret);
382 }
383 ret = NULL;
384 }
385 BN_CTX_end(ctx);
386 return ret;
387 }
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
new file mode 100644
index 0000000000..de610ce54c
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_word.c
@@ -0,0 +1,208 @@
1/* crypto/bn/bn_word.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
64 {
65#ifndef BN_LLONG
66 BN_ULONG ret=0;
67#else
68 BN_ULLONG ret=0;
69#endif
70 int i;
71
72 w&=BN_MASK2;
73 for (i=a->top-1; i>=0; i--)
74 {
75#ifndef BN_LLONG
76 ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
77 ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
78#else
79 ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
80 (BN_ULLONG)w);
81#endif
82 }
83 return((BN_ULONG)ret);
84 }
85
86BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
87 {
88 BN_ULONG ret;
89 int i;
90
91 if (a->top == 0) return(0);
92 ret=0;
93 w&=BN_MASK2;
94 for (i=a->top-1; i>=0; i--)
95 {
96 BN_ULONG l,d;
97
98 l=a->d[i];
99 d=bn_div_words(ret,l,w);
100 ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
101 a->d[i]=d;
102 }
103 if ((a->top > 0) && (a->d[a->top-1] == 0))
104 a->top--;
105 return(ret);
106 }
107
108int BN_add_word(BIGNUM *a, BN_ULONG w)
109 {
110 BN_ULONG l;
111 int i;
112
113 if ((w & BN_MASK2) == 0)
114 return(1);
115
116 if (a->neg)
117 {
118 a->neg=0;
119 i=BN_sub_word(a,w);
120 if (!BN_is_zero(a))
121 a->neg=!(a->neg);
122 return(i);
123 }
124 w&=BN_MASK2;
125 if (bn_wexpand(a,a->top+1) == NULL) return(0);
126 i=0;
127 for (;;)
128 {
129 if (i >= a->top)
130 l=w;
131 else
132 l=(a->d[i]+(BN_ULONG)w)&BN_MASK2;
133 a->d[i]=l;
134 if (w > l)
135 w=1;
136 else
137 break;
138 i++;
139 }
140 if (i >= a->top)
141 a->top++;
142 return(1);
143 }
144
145int BN_sub_word(BIGNUM *a, BN_ULONG w)
146 {
147 int i;
148
149 if ((w & BN_MASK2) == 0)
150 return(1);
151
152 if (BN_is_zero(a) || a->neg)
153 {
154 a->neg=0;
155 i=BN_add_word(a,w);
156 a->neg=1;
157 return(i);
158 }
159
160 w&=BN_MASK2;
161 if ((a->top == 1) && (a->d[0] < w))
162 {
163 a->d[0]=w-a->d[0];
164 a->neg=1;
165 return(1);
166 }
167 i=0;
168 for (;;)
169 {
170 if (a->d[i] >= w)
171 {
172 a->d[i]-=w;
173 break;
174 }
175 else
176 {
177 a->d[i]=(a->d[i]-w)&BN_MASK2;
178 i++;
179 w=1;
180 }
181 }
182 if ((a->d[i] == 0) && (i == (a->top-1)))
183 a->top--;
184 return(1);
185 }
186
187int BN_mul_word(BIGNUM *a, BN_ULONG w)
188 {
189 BN_ULONG ll;
190
191 w&=BN_MASK2;
192 if (a->top)
193 {
194 if (w == 0)
195 BN_zero(a);
196 else
197 {
198 ll=bn_mul_words(a->d,a->d,a->top,w);
199 if (ll)
200 {
201 if (bn_wexpand(a,a->top+1) == NULL) return(0);
202 a->d[a->top++]=ll;
203 }
204 }
205 }
206 return(1);
207 }
208
diff --git a/src/lib/libcrypto/bn/bn_x931p.c b/src/lib/libcrypto/bn/bn_x931p.c
new file mode 100644
index 0000000000..c64410dd3a
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_x931p.c
@@ -0,0 +1,282 @@
1/* bn_x931p.c */
2/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
3 * project 2005.
4 */
5/* ====================================================================
6 * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * licensing@OpenSSL.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include <stdio.h>
60#include <openssl/bn.h>
61
62#ifdef OPENSSL_FIPS
63
64/* X9.31 routines for prime derivation */
65
66
67/* X9.31 prime derivation. This is used to generate the primes pi
68 * (p1, p2, q1, q2) from a parameter Xpi by checking successive odd
69 * integers.
70 */
71
72static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
73 void (*cb)(int, int, void *), void *cb_arg)
74 {
75 int i = 0;
76 if (!BN_copy(pi, Xpi))
77 return 0;
78 if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
79 return 0;
80 for(;;)
81 {
82 i++;
83 if (cb)
84 cb(0, i, cb_arg);
85 /* NB 27 MR is specificed in X9.31 */
86 if (BN_is_prime_fasttest(pi, 27, cb, ctx, cb_arg, 1))
87 break;
88 if (!BN_add_word(pi, 2))
89 return 0;
90 }
91 if (cb)
92 cb(2, i, cb_arg);
93 return 1;
94 }
95
96/* This is the main X9.31 prime derivation function. From parameters
97 * Xp1, Xp2 and Xp derive the prime p. If the parameters p1 or p2 are
98 * not NULL they will be returned too: this is needed for testing.
99 */
100
101int BN_X931_derive_prime(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
102 void (*cb)(int, int, void *), void *cb_arg,
103 const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
104 const BIGNUM *e, BN_CTX *ctx)
105 {
106 int ret = 0;
107
108 BIGNUM *t, *p1p2, *pm1;
109
110 /* Only even e supported */
111 if (!BN_is_odd(e))
112 return 0;
113
114 BN_CTX_start(ctx);
115 if (!p1)
116 p1 = BN_CTX_get(ctx);
117
118 if (!p2)
119 p2 = BN_CTX_get(ctx);
120
121 t = BN_CTX_get(ctx);
122
123 p1p2 = BN_CTX_get(ctx);
124
125 pm1 = BN_CTX_get(ctx);
126
127 if (!bn_x931_derive_pi(p1, Xp1, ctx, cb, cb_arg))
128 goto err;
129
130 if (!bn_x931_derive_pi(p2, Xp2, ctx, cb, cb_arg))
131 goto err;
132
133 if (!BN_mul(p1p2, p1, p2, ctx))
134 goto err;
135
136 /* First set p to value of Rp */
137
138 if (!BN_mod_inverse(p, p2, p1, ctx))
139 goto err;
140
141 if (!BN_mul(p, p, p2, ctx))
142 goto err;
143
144 if (!BN_mod_inverse(t, p1, p2, ctx))
145 goto err;
146
147 if (!BN_mul(t, t, p1, ctx))
148 goto err;
149
150 if (!BN_sub(p, p, t))
151 goto err;
152
153 if (p->neg && !BN_add(p, p, p1p2))
154 goto err;
155
156 /* p now equals Rp */
157
158 if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
159 goto err;
160
161 if (!BN_add(p, p, Xp))
162 goto err;
163
164 /* p now equals Yp0 */
165
166 for (;;)
167 {
168 int i = 1;
169 if (cb)
170 cb(0, i++, cb_arg);
171 if (!BN_copy(pm1, p))
172 goto err;
173 if (!BN_sub_word(pm1, 1))
174 goto err;
175 if (!BN_gcd(t, pm1, e, ctx))
176 goto err;
177 if (BN_is_one(t)
178 /* X9.31 specifies 8 MR and 1 Lucas test or any prime test
179 * offering similar or better guarantees 50 MR is considerably
180 * better.
181 */
182 && BN_is_prime_fasttest(p, 50, cb, ctx, cb_arg, 1))
183 break;
184 if (!BN_add(p, p, p1p2))
185 goto err;
186 }
187
188 if (cb)
189 cb(3, 0, cb_arg);
190
191 ret = 1;
192
193 err:
194
195 BN_CTX_end(ctx);
196
197 return ret;
198 }
199
200/* Generate pair of paramters Xp, Xq for X9.31 prime generation.
201 * Note: nbits paramter is sum of number of bits in both.
202 */
203
204int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
205 {
206 BIGNUM *t;
207 int i;
208 /* Number of bits for each prime is of the form
209 * 512+128s for s = 0, 1, ...
210 */
211 if ((nbits < 1024) || (nbits & 0xff))
212 return 0;
213 nbits >>= 1;
214 /* The random value Xp must be between sqrt(2) * 2^(nbits-1) and
215 * 2^nbits - 1. By setting the top two bits we ensure that the lower
216 * bound is exceeded.
217 */
218 if (!BN_rand(Xp, nbits, 1, 0))
219 return 0;
220
221 BN_CTX_start(ctx);
222 t = BN_CTX_get(ctx);
223
224 for (i = 0; i < 1000; i++)
225 {
226 if (!BN_rand(Xq, nbits, 1, 0))
227 return 0;
228 /* Check that |Xp - Xq| > 2^(nbits - 100) */
229 BN_sub(t, Xp, Xq);
230 if (BN_num_bits(t) > (nbits - 100))
231 break;
232 }
233
234 BN_CTX_end(ctx);
235
236 if (i < 1000)
237 return 1;
238
239 return 0;
240
241 }
242
243/* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1
244 * and Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL
245 * the relevant parameter will be stored in it.
246 *
247 * Due to the fact that |Xp - Xq| > 2^(nbits - 100) must be satisfied Xp and Xq
248 * are generated using the previous function and supplied as input.
249 */
250
251int BN_X931_generate_prime(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
252 BIGNUM *Xp1, BIGNUM *Xp2,
253 const BIGNUM *Xp,
254 const BIGNUM *e, BN_CTX *ctx,
255 void (*cb)(int, int, void *), void *cb_arg)
256 {
257 int ret = 0;
258
259 BN_CTX_start(ctx);
260 if (!Xp1)
261 Xp1 = BN_CTX_get(ctx);
262 if (!Xp2)
263 Xp2 = BN_CTX_get(ctx);
264
265 if (!BN_rand(Xp1, 101, 0, 0))
266 goto error;
267 if (!BN_rand(Xp2, 101, 0, 0))
268 goto error;
269 if (!BN_X931_derive_prime(p, p1, p2, cb, cb_arg,
270 Xp, Xp1, Xp2, e, ctx))
271 goto error;
272
273 ret = 1;
274
275 error:
276 BN_CTX_end(ctx);
277
278 return ret;
279
280 }
281
282#endif
diff --git a/src/lib/libcrypto/bn/bnspeed.c b/src/lib/libcrypto/bn/bnspeed.c
new file mode 100644
index 0000000000..b554ac8cf8
--- /dev/null
+++ b/src/lib/libcrypto/bn/bnspeed.c
@@ -0,0 +1,233 @@
1/* unused */
2
3/* crypto/bn/bnspeed.c */
4/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
5 * All rights reserved.
6 *
7 * This package is an SSL implementation written
8 * by Eric Young (eay@cryptsoft.com).
9 * The implementation was written so as to conform with Netscapes SSL.
10 *
11 * This library is free for commercial and non-commercial use as long as
12 * the following conditions are aheared to. The following conditions
13 * apply to all code found in this distribution, be it the RC4, RSA,
14 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
15 * included with this distribution is covered by the same copyright terms
16 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
17 *
18 * Copyright remains Eric Young's, and as such any Copyright notices in
19 * the code are not to be removed.
20 * If this package is used in a product, Eric Young should be given attribution
21 * as the author of the parts of the library used.
22 * This can be in the form of a textual message at program startup or
23 * in documentation (online or textual) provided with the package.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 * 3. All advertising materials mentioning features or use of this software
34 * must display the following acknowledgement:
35 * "This product includes cryptographic software written by
36 * Eric Young (eay@cryptsoft.com)"
37 * The word 'cryptographic' can be left out if the rouines from the library
38 * being used are not cryptographic related :-).
39 * 4. If you include any Windows specific code (or a derivative thereof) from
40 * the apps directory (application code) you must include an acknowledgement:
41 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
42 *
43 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * The licence and distribution terms for any publically available version or
56 * derivative of this code cannot be changed. i.e. this code cannot simply be
57 * copied and put under another distribution licence
58 * [including the GNU Public Licence.]
59 */
60
61/* most of this code has been pilfered from my libdes speed.c program */
62
63#define BASENUM 1000000
64#undef PROG
65#define PROG bnspeed_main
66
67#include <stdio.h>
68#include <stdlib.h>
69#include <signal.h>
70#include <string.h>
71#include <openssl/crypto.h>
72#include <openssl/err.h>
73
74#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
75#define TIMES
76#endif
77
78#ifndef _IRIX
79#include <time.h>
80#endif
81#ifdef TIMES
82#include <sys/types.h>
83#include <sys/times.h>
84#endif
85
86/* Depending on the VMS version, the tms structure is perhaps defined.
87 The __TMS macro will show if it was. If it wasn't defined, we should
88 undefine TIMES, since that tells the rest of the program how things
89 should be handled. -- Richard Levitte */
90#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
91#undef TIMES
92#endif
93
94#ifndef TIMES
95#include <sys/timeb.h>
96#endif
97
98#if defined(sun) || defined(__ultrix)
99#define _POSIX_SOURCE
100#include <limits.h>
101#include <sys/param.h>
102#endif
103
104#include <openssl/bn.h>
105#include <openssl/x509.h>
106
107/* The following if from times(3) man page. It may need to be changed */
108#ifndef HZ
109# ifndef CLK_TCK
110# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
111# define HZ 100.0
112# else /* _BSD_CLK_TCK_ */
113# define HZ ((double)_BSD_CLK_TCK_)
114# endif
115# else /* CLK_TCK */
116# define HZ ((double)CLK_TCK)
117# endif
118#endif
119
120#undef BUFSIZE
121#define BUFSIZE ((long)1024*8)
122int run=0;
123
124static double Time_F(int s);
125#define START 0
126#define STOP 1
127
128static double Time_F(int s)
129 {
130 double ret;
131#ifdef TIMES
132 static struct tms tstart,tend;
133
134 if (s == START)
135 {
136 times(&tstart);
137 return(0);
138 }
139 else
140 {
141 times(&tend);
142 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
143 return((ret < 1e-3)?1e-3:ret);
144 }
145#else /* !times() */
146 static struct timeb tstart,tend;
147 long i;
148
149 if (s == START)
150 {
151 ftime(&tstart);
152 return(0);
153 }
154 else
155 {
156 ftime(&tend);
157 i=(long)tend.millitm-(long)tstart.millitm;
158 ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
159 return((ret < 0.001)?0.001:ret);
160 }
161#endif
162 }
163
164#define NUM_SIZES 5
165static int sizes[NUM_SIZES]={128,256,512,1024,2048};
166/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
167
168void do_mul(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_CTX *ctx);
169
170int main(int argc, char **argv)
171 {
172 BN_CTX *ctx;
173 BIGNUM a,b,c;
174
175 ctx=BN_CTX_new();
176 BN_init(&a);
177 BN_init(&b);
178 BN_init(&c);
179
180 do_mul(&a,&b,&c,ctx);
181 }
182
183void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
184 {
185 int i,j,k;
186 double tm;
187 long num;
188
189 for (i=0; i<NUM_SIZES; i++)
190 {
191 num=BASENUM;
192 if (i) num/=(i*3);
193 BN_rand(a,sizes[i],1,0);
194 for (j=i; j<NUM_SIZES; j++)
195 {
196 BN_rand(b,sizes[j],1,0);
197 Time_F(START);
198 for (k=0; k<num; k++)
199 BN_mul(r,b,a,ctx);
200 tm=Time_F(STOP);
201 printf("mul %4d x %4d -> %8.3fms\n",sizes[i],sizes[j],tm*1000.0/num);
202 }
203 }
204
205 for (i=0; i<NUM_SIZES; i++)
206 {
207 num=BASENUM;
208 if (i) num/=(i*3);
209 BN_rand(a,sizes[i],1,0);
210 Time_F(START);
211 for (k=0; k<num; k++)
212 BN_sqr(r,a,ctx);
213 tm=Time_F(STOP);
214 printf("sqr %4d x %4d -> %8.3fms\n",sizes[i],sizes[i],tm*1000.0/num);
215 }
216
217 for (i=0; i<NUM_SIZES; i++)
218 {
219 num=BASENUM/10;
220 if (i) num/=(i*3);
221 BN_rand(a,sizes[i]-1,1,0);
222 for (j=i; j<NUM_SIZES; j++)
223 {
224 BN_rand(b,sizes[j],1,0);
225 Time_F(START);
226 for (k=0; k<100000; k++)
227 BN_div(r, NULL, b, a,ctx);
228 tm=Time_F(STOP);
229 printf("div %4d / %4d -> %8.3fms\n",sizes[j],sizes[i]-1,tm*1000.0/num);
230 }
231 }
232 }
233
diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c
new file mode 100644
index 0000000000..792a75ff4f
--- /dev/null
+++ b/src/lib/libcrypto/bn/bntest.c
@@ -0,0 +1,1290 @@
1/* crypto/bn/bntest.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62
63#include "e_os.h"
64
65#include <openssl/bio.h>
66#include <openssl/bn.h>
67#include <openssl/rand.h>
68#include <openssl/x509.h>
69#include <openssl/err.h>
70
71const int num0 = 100; /* number of tests */
72const int num1 = 50; /* additional tests for some functions */
73const int num2 = 5; /* number of tests for slow functions */
74
75int test_add(BIO *bp);
76int test_sub(BIO *bp);
77int test_lshift1(BIO *bp);
78int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_);
79int test_rshift1(BIO *bp);
80int test_rshift(BIO *bp,BN_CTX *ctx);
81int test_div(BIO *bp,BN_CTX *ctx);
82int test_div_recp(BIO *bp,BN_CTX *ctx);
83int test_mul(BIO *bp);
84int test_sqr(BIO *bp,BN_CTX *ctx);
85int test_mont(BIO *bp,BN_CTX *ctx);
86int test_mod(BIO *bp,BN_CTX *ctx);
87int test_mod_mul(BIO *bp,BN_CTX *ctx);
88int test_mod_exp(BIO *bp,BN_CTX *ctx);
89int test_mod_exp_mont_consttime(BIO *bp,BN_CTX *ctx);
90int test_exp(BIO *bp,BN_CTX *ctx);
91int test_kron(BIO *bp,BN_CTX *ctx);
92int test_sqrt(BIO *bp,BN_CTX *ctx);
93int rand_neg(void);
94static int results=0;
95
96static unsigned char lst[]="\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9"
97"\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0";
98
99static const char rnd_seed[] = "string to make the random number generator think it has entropy";
100
101static void message(BIO *out, char *m)
102 {
103 fprintf(stderr, "test %s\n", m);
104 BIO_puts(out, "print \"test ");
105 BIO_puts(out, m);
106 BIO_puts(out, "\\n\"\n");
107 }
108
109int main(int argc, char *argv[])
110 {
111 BN_CTX *ctx;
112 BIO *out;
113 char *outfile=NULL;
114
115 results = 0;
116
117 RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_generate_prime may fail */
118
119 argc--;
120 argv++;
121 while (argc >= 1)
122 {
123 if (strcmp(*argv,"-results") == 0)
124 results=1;
125 else if (strcmp(*argv,"-out") == 0)
126 {
127 if (--argc < 1) break;
128 outfile= *(++argv);
129 }
130 argc--;
131 argv++;
132 }
133
134
135 ctx=BN_CTX_new();
136 if (ctx == NULL) EXIT(1);
137
138 out=BIO_new(BIO_s_file());
139 if (out == NULL) EXIT(1);
140 if (outfile == NULL)
141 {
142 BIO_set_fp(out,stdout,BIO_NOCLOSE);
143 }
144 else
145 {
146 if (!BIO_write_filename(out,outfile))
147 {
148 perror(outfile);
149 EXIT(1);
150 }
151 }
152
153 if (!results)
154 BIO_puts(out,"obase=16\nibase=16\n");
155
156 message(out,"BN_add");
157 if (!test_add(out)) goto err;
158 BIO_flush(out);
159
160 message(out,"BN_sub");
161 if (!test_sub(out)) goto err;
162 BIO_flush(out);
163
164 message(out,"BN_lshift1");
165 if (!test_lshift1(out)) goto err;
166 BIO_flush(out);
167
168 message(out,"BN_lshift (fixed)");
169 if (!test_lshift(out,ctx,BN_bin2bn(lst,sizeof(lst)-1,NULL)))
170 goto err;
171 BIO_flush(out);
172
173 message(out,"BN_lshift");
174 if (!test_lshift(out,ctx,NULL)) goto err;
175 BIO_flush(out);
176
177 message(out,"BN_rshift1");
178 if (!test_rshift1(out)) goto err;
179 BIO_flush(out);
180
181 message(out,"BN_rshift");
182 if (!test_rshift(out,ctx)) goto err;
183 BIO_flush(out);
184
185 message(out,"BN_sqr");
186 if (!test_sqr(out,ctx)) goto err;
187 BIO_flush(out);
188
189 message(out,"BN_mul");
190 if (!test_mul(out)) goto err;
191 BIO_flush(out);
192
193 message(out,"BN_div");
194 if (!test_div(out,ctx)) goto err;
195 BIO_flush(out);
196
197 message(out,"BN_div_recp");
198 if (!test_div_recp(out,ctx)) goto err;
199 BIO_flush(out);
200
201 message(out,"BN_mod");
202 if (!test_mod(out,ctx)) goto err;
203 BIO_flush(out);
204
205 message(out,"BN_mod_mul");
206 if (!test_mod_mul(out,ctx)) goto err;
207 BIO_flush(out);
208
209 message(out,"BN_mont");
210 if (!test_mont(out,ctx)) goto err;
211 BIO_flush(out);
212
213 message(out,"BN_mod_exp");
214 if (!test_mod_exp(out,ctx)) goto err;
215 BIO_flush(out);
216
217 message(out,"BN_mod_exp_mont_consttime");
218 if (!test_mod_exp_mont_consttime(out,ctx)) goto err;
219 BIO_flush(out);
220
221 message(out,"BN_exp");
222 if (!test_exp(out,ctx)) goto err;
223 BIO_flush(out);
224
225 message(out,"BN_kronecker");
226 if (!test_kron(out,ctx)) goto err;
227 BIO_flush(out);
228
229 message(out,"BN_mod_sqrt");
230 if (!test_sqrt(out,ctx)) goto err;
231 BIO_flush(out);
232
233 BN_CTX_free(ctx);
234 BIO_free(out);
235
236/**/
237 EXIT(0);
238err:
239 BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices
240 * the failure, see test_bn in test/Makefile */
241 BIO_flush(out);
242 ERR_load_crypto_strings();
243 ERR_print_errors_fp(stderr);
244 EXIT(1);
245 return(1);
246 }
247
248int test_add(BIO *bp)
249 {
250 BIGNUM a,b,c;
251 int i;
252
253 BN_init(&a);
254 BN_init(&b);
255 BN_init(&c);
256
257 BN_bntest_rand(&a,512,0,0);
258 for (i=0; i<num0; i++)
259 {
260 BN_bntest_rand(&b,450+i,0,0);
261 a.neg=rand_neg();
262 b.neg=rand_neg();
263 BN_add(&c,&a,&b);
264 if (bp != NULL)
265 {
266 if (!results)
267 {
268 BN_print(bp,&a);
269 BIO_puts(bp," + ");
270 BN_print(bp,&b);
271 BIO_puts(bp," - ");
272 }
273 BN_print(bp,&c);
274 BIO_puts(bp,"\n");
275 }
276 a.neg=!a.neg;
277 b.neg=!b.neg;
278 BN_add(&c,&c,&b);
279 BN_add(&c,&c,&a);
280 if(!BN_is_zero(&c))
281 {
282 fprintf(stderr,"Add test failed!\n");
283 return 0;
284 }
285 }
286 BN_free(&a);
287 BN_free(&b);
288 BN_free(&c);
289 return(1);
290 }
291
292int test_sub(BIO *bp)
293 {
294 BIGNUM a,b,c;
295 int i;
296
297 BN_init(&a);
298 BN_init(&b);
299 BN_init(&c);
300
301 for (i=0; i<num0+num1; i++)
302 {
303 if (i < num1)
304 {
305 BN_bntest_rand(&a,512,0,0);
306 BN_copy(&b,&a);
307 if (BN_set_bit(&a,i)==0) return(0);
308 BN_add_word(&b,i);
309 }
310 else
311 {
312 BN_bntest_rand(&b,400+i-num1,0,0);
313 a.neg=rand_neg();
314 b.neg=rand_neg();
315 }
316 BN_sub(&c,&a,&b);
317 if (bp != NULL)
318 {
319 if (!results)
320 {
321 BN_print(bp,&a);
322 BIO_puts(bp," - ");
323 BN_print(bp,&b);
324 BIO_puts(bp," - ");
325 }
326 BN_print(bp,&c);
327 BIO_puts(bp,"\n");
328 }
329 BN_add(&c,&c,&b);
330 BN_sub(&c,&c,&a);
331 if(!BN_is_zero(&c))
332 {
333 fprintf(stderr,"Subtract test failed!\n");
334 return 0;
335 }
336 }
337 BN_free(&a);
338 BN_free(&b);
339 BN_free(&c);
340 return(1);
341 }
342
343int test_div(BIO *bp, BN_CTX *ctx)
344 {
345 BIGNUM a,b,c,d,e;
346 int i;
347
348 BN_init(&a);
349 BN_init(&b);
350 BN_init(&c);
351 BN_init(&d);
352 BN_init(&e);
353
354 for (i=0; i<num0+num1; i++)
355 {
356 if (i < num1)
357 {
358 BN_bntest_rand(&a,400,0,0);
359 BN_copy(&b,&a);
360 BN_lshift(&a,&a,i);
361 BN_add_word(&a,i);
362 }
363 else
364 BN_bntest_rand(&b,50+3*(i-num1),0,0);
365 a.neg=rand_neg();
366 b.neg=rand_neg();
367 BN_div(&d,&c,&a,&b,ctx);
368 if (bp != NULL)
369 {
370 if (!results)
371 {
372 BN_print(bp,&a);
373 BIO_puts(bp," / ");
374 BN_print(bp,&b);
375 BIO_puts(bp," - ");
376 }
377 BN_print(bp,&d);
378 BIO_puts(bp,"\n");
379
380 if (!results)
381 {
382 BN_print(bp,&a);
383 BIO_puts(bp," % ");
384 BN_print(bp,&b);
385 BIO_puts(bp," - ");
386 }
387 BN_print(bp,&c);
388 BIO_puts(bp,"\n");
389 }
390 BN_mul(&e,&d,&b,ctx);
391 BN_add(&d,&e,&c);
392 BN_sub(&d,&d,&a);
393 if(!BN_is_zero(&d))
394 {
395 fprintf(stderr,"Division test failed!\n");
396 return 0;
397 }
398 }
399 BN_free(&a);
400 BN_free(&b);
401 BN_free(&c);
402 BN_free(&d);
403 BN_free(&e);
404 return(1);
405 }
406
407int test_div_recp(BIO *bp, BN_CTX *ctx)
408 {
409 BIGNUM a,b,c,d,e;
410 BN_RECP_CTX recp;
411 int i;
412
413 BN_RECP_CTX_init(&recp);
414 BN_init(&a);
415 BN_init(&b);
416 BN_init(&c);
417 BN_init(&d);
418 BN_init(&e);
419
420 for (i=0; i<num0+num1; i++)
421 {
422 if (i < num1)
423 {
424 BN_bntest_rand(&a,400,0,0);
425 BN_copy(&b,&a);
426 BN_lshift(&a,&a,i);
427 BN_add_word(&a,i);
428 }
429 else
430 BN_bntest_rand(&b,50+3*(i-num1),0,0);
431 a.neg=rand_neg();
432 b.neg=rand_neg();
433 BN_RECP_CTX_set(&recp,&b,ctx);
434 BN_div_recp(&d,&c,&a,&recp,ctx);
435 if (bp != NULL)
436 {
437 if (!results)
438 {
439 BN_print(bp,&a);
440 BIO_puts(bp," / ");
441 BN_print(bp,&b);
442 BIO_puts(bp," - ");
443 }
444 BN_print(bp,&d);
445 BIO_puts(bp,"\n");
446
447 if (!results)
448 {
449 BN_print(bp,&a);
450 BIO_puts(bp," % ");
451 BN_print(bp,&b);
452 BIO_puts(bp," - ");
453 }
454 BN_print(bp,&c);
455 BIO_puts(bp,"\n");
456 }
457 BN_mul(&e,&d,&b,ctx);
458 BN_add(&d,&e,&c);
459 BN_sub(&d,&d,&a);
460 if(!BN_is_zero(&d))
461 {
462 fprintf(stderr,"Reciprocal division test failed!\n");
463 fprintf(stderr,"a=");
464 BN_print_fp(stderr,&a);
465 fprintf(stderr,"\nb=");
466 BN_print_fp(stderr,&b);
467 fprintf(stderr,"\n");
468 return 0;
469 }
470 }
471 BN_free(&a);
472 BN_free(&b);
473 BN_free(&c);
474 BN_free(&d);
475 BN_free(&e);
476 BN_RECP_CTX_free(&recp);
477 return(1);
478 }
479
480int test_mul(BIO *bp)
481 {
482 BIGNUM a,b,c,d,e;
483 int i;
484 BN_CTX *ctx;
485
486 ctx = BN_CTX_new();
487 if (ctx == NULL) EXIT(1);
488
489 BN_init(&a);
490 BN_init(&b);
491 BN_init(&c);
492 BN_init(&d);
493 BN_init(&e);
494
495 for (i=0; i<num0+num1; i++)
496 {
497 if (i <= num1)
498 {
499 BN_bntest_rand(&a,100,0,0);
500 BN_bntest_rand(&b,100,0,0);
501 }
502 else
503 BN_bntest_rand(&b,i-num1,0,0);
504 a.neg=rand_neg();
505 b.neg=rand_neg();
506 BN_mul(&c,&a,&b,ctx);
507 if (bp != NULL)
508 {
509 if (!results)
510 {
511 BN_print(bp,&a);
512 BIO_puts(bp," * ");
513 BN_print(bp,&b);
514 BIO_puts(bp," - ");
515 }
516 BN_print(bp,&c);
517 BIO_puts(bp,"\n");
518 }
519 BN_div(&d,&e,&c,&a,ctx);
520 BN_sub(&d,&d,&b);
521 if(!BN_is_zero(&d) || !BN_is_zero(&e))
522 {
523 fprintf(stderr,"Multiplication test failed!\n");
524 return 0;
525 }
526 }
527 BN_free(&a);
528 BN_free(&b);
529 BN_free(&c);
530 BN_free(&d);
531 BN_free(&e);
532 BN_CTX_free(ctx);
533 return(1);
534 }
535
536int test_sqr(BIO *bp, BN_CTX *ctx)
537 {
538 BIGNUM a,c,d,e;
539 int i;
540
541 BN_init(&a);
542 BN_init(&c);
543 BN_init(&d);
544 BN_init(&e);
545
546 for (i=0; i<num0; i++)
547 {
548 BN_bntest_rand(&a,40+i*10,0,0);
549 a.neg=rand_neg();
550 BN_sqr(&c,&a,ctx);
551 if (bp != NULL)
552 {
553 if (!results)
554 {
555 BN_print(bp,&a);
556 BIO_puts(bp," * ");
557 BN_print(bp,&a);
558 BIO_puts(bp," - ");
559 }
560 BN_print(bp,&c);
561 BIO_puts(bp,"\n");
562 }
563 BN_div(&d,&e,&c,&a,ctx);
564 BN_sub(&d,&d,&a);
565 if(!BN_is_zero(&d) || !BN_is_zero(&e))
566 {
567 fprintf(stderr,"Square test failed!\n");
568 return 0;
569 }
570 }
571 BN_free(&a);
572 BN_free(&c);
573 BN_free(&d);
574 BN_free(&e);
575 return(1);
576 }
577
578int test_mont(BIO *bp, BN_CTX *ctx)
579 {
580 BIGNUM a,b,c,d,A,B;
581 BIGNUM n;
582 int i;
583 BN_MONT_CTX *mont;
584
585 BN_init(&a);
586 BN_init(&b);
587 BN_init(&c);
588 BN_init(&d);
589 BN_init(&A);
590 BN_init(&B);
591 BN_init(&n);
592
593 mont=BN_MONT_CTX_new();
594
595 BN_bntest_rand(&a,100,0,0); /**/
596 BN_bntest_rand(&b,100,0,0); /**/
597 for (i=0; i<num2; i++)
598 {
599 int bits = (200*(i+1))/num2;
600
601 if (bits == 0)
602 continue;
603 BN_bntest_rand(&n,bits,0,1);
604 BN_MONT_CTX_set(mont,&n,ctx);
605
606 BN_nnmod(&a,&a,&n,ctx);
607 BN_nnmod(&b,&b,&n,ctx);
608
609 BN_to_montgomery(&A,&a,mont,ctx);
610 BN_to_montgomery(&B,&b,mont,ctx);
611
612 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
613 BN_from_montgomery(&A,&c,mont,ctx);/**/
614 if (bp != NULL)
615 {
616 if (!results)
617 {
618#ifdef undef
619fprintf(stderr,"%d * %d %% %d\n",
620BN_num_bits(&a),
621BN_num_bits(&b),
622BN_num_bits(mont->N));
623#endif
624 BN_print(bp,&a);
625 BIO_puts(bp," * ");
626 BN_print(bp,&b);
627 BIO_puts(bp," % ");
628 BN_print(bp,&(mont->N));
629 BIO_puts(bp," - ");
630 }
631 BN_print(bp,&A);
632 BIO_puts(bp,"\n");
633 }
634 BN_mod_mul(&d,&a,&b,&n,ctx);
635 BN_sub(&d,&d,&A);
636 if(!BN_is_zero(&d))
637 {
638 fprintf(stderr,"Montgomery multiplication test failed!\n");
639 return 0;
640 }
641 }
642 BN_MONT_CTX_free(mont);
643 BN_free(&a);
644 BN_free(&b);
645 BN_free(&c);
646 BN_free(&d);
647 BN_free(&A);
648 BN_free(&B);
649 BN_free(&n);
650 return(1);
651 }
652
653int test_mod(BIO *bp, BN_CTX *ctx)
654 {
655 BIGNUM *a,*b,*c,*d,*e;
656 int i;
657
658 a=BN_new();
659 b=BN_new();
660 c=BN_new();
661 d=BN_new();
662 e=BN_new();
663
664 BN_bntest_rand(a,1024,0,0); /**/
665 for (i=0; i<num0; i++)
666 {
667 BN_bntest_rand(b,450+i*10,0,0); /**/
668 a->neg=rand_neg();
669 b->neg=rand_neg();
670 BN_mod(c,a,b,ctx);/**/
671 if (bp != NULL)
672 {
673 if (!results)
674 {
675 BN_print(bp,a);
676 BIO_puts(bp," % ");
677 BN_print(bp,b);
678 BIO_puts(bp," - ");
679 }
680 BN_print(bp,c);
681 BIO_puts(bp,"\n");
682 }
683 BN_div(d,e,a,b,ctx);
684 BN_sub(e,e,c);
685 if(!BN_is_zero(e))
686 {
687 fprintf(stderr,"Modulo test failed!\n");
688 return 0;
689 }
690 }
691 BN_free(a);
692 BN_free(b);
693 BN_free(c);
694 BN_free(d);
695 BN_free(e);
696 return(1);
697 }
698
699int test_mod_mul(BIO *bp, BN_CTX *ctx)
700 {
701 BIGNUM *a,*b,*c,*d,*e;
702 int i,j;
703
704 a=BN_new();
705 b=BN_new();
706 c=BN_new();
707 d=BN_new();
708 e=BN_new();
709
710 for (j=0; j<3; j++) {
711 BN_bntest_rand(c,1024,0,0); /**/
712 for (i=0; i<num0; i++)
713 {
714 BN_bntest_rand(a,475+i*10,0,0); /**/
715 BN_bntest_rand(b,425+i*11,0,0); /**/
716 a->neg=rand_neg();
717 b->neg=rand_neg();
718 if (!BN_mod_mul(e,a,b,c,ctx))
719 {
720 unsigned long l;
721
722 while ((l=ERR_get_error()))
723 fprintf(stderr,"ERROR:%s\n",
724 ERR_error_string(l,NULL));
725 EXIT(1);
726 }
727 if (bp != NULL)
728 {
729 if (!results)
730 {
731 BN_print(bp,a);
732 BIO_puts(bp," * ");
733 BN_print(bp,b);
734 BIO_puts(bp," % ");
735 BN_print(bp,c);
736 if ((a->neg ^ b->neg) && !BN_is_zero(e))
737 {
738 /* If (a*b) % c is negative, c must be added
739 * in order to obtain the normalized remainder
740 * (new with OpenSSL 0.9.7, previous versions of
741 * BN_mod_mul could generate negative results)
742 */
743 BIO_puts(bp," + ");
744 BN_print(bp,c);
745 }
746 BIO_puts(bp," - ");
747 }
748 BN_print(bp,e);
749 BIO_puts(bp,"\n");
750 }
751 BN_mul(d,a,b,ctx);
752 BN_sub(d,d,e);
753 BN_div(a,b,d,c,ctx);
754 if(!BN_is_zero(b))
755 {
756 fprintf(stderr,"Modulo multiply test failed!\n");
757 ERR_print_errors_fp(stderr);
758 return 0;
759 }
760 }
761 }
762 BN_free(a);
763 BN_free(b);
764 BN_free(c);
765 BN_free(d);
766 BN_free(e);
767 return(1);
768 }
769
770int test_mod_exp(BIO *bp, BN_CTX *ctx)
771 {
772 BIGNUM *a,*b,*c,*d,*e;
773 int i;
774
775 a=BN_new();
776 b=BN_new();
777 c=BN_new();
778 d=BN_new();
779 e=BN_new();
780
781 BN_bntest_rand(c,30,0,1); /* must be odd for montgomery */
782 for (i=0; i<num2; i++)
783 {
784 BN_bntest_rand(a,20+i*5,0,0); /**/
785 BN_bntest_rand(b,2+i,0,0); /**/
786
787 if (!BN_mod_exp(d,a,b,c,ctx))
788 return(0);
789
790 if (bp != NULL)
791 {
792 if (!results)
793 {
794 BN_print(bp,a);
795 BIO_puts(bp," ^ ");
796 BN_print(bp,b);
797 BIO_puts(bp," % ");
798 BN_print(bp,c);
799 BIO_puts(bp," - ");
800 }
801 BN_print(bp,d);
802 BIO_puts(bp,"\n");
803 }
804 BN_exp(e,a,b,ctx);
805 BN_sub(e,e,d);
806 BN_div(a,b,e,c,ctx);
807 if(!BN_is_zero(b))
808 {
809 fprintf(stderr,"Modulo exponentiation test failed!\n");
810 return 0;
811 }
812 }
813 BN_free(a);
814 BN_free(b);
815 BN_free(c);
816 BN_free(d);
817 BN_free(e);
818 return(1);
819 }
820
821int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx)
822 {
823 BIGNUM *a,*b,*c,*d,*e;
824 int i;
825
826 a=BN_new();
827 b=BN_new();
828 c=BN_new();
829 d=BN_new();
830 e=BN_new();
831
832 BN_bntest_rand(c,30,0,1); /* must be odd for montgomery */
833 for (i=0; i<num2; i++)
834 {
835 BN_bntest_rand(a,20+i*5,0,0); /**/
836 BN_bntest_rand(b,2+i,0,0); /**/
837
838 if (!BN_mod_exp_mont_consttime(d,a,b,c,ctx,NULL))
839 return(00);
840
841 if (bp != NULL)
842 {
843 if (!results)
844 {
845 BN_print(bp,a);
846 BIO_puts(bp," ^ ");
847 BN_print(bp,b);
848 BIO_puts(bp," % ");
849 BN_print(bp,c);
850 BIO_puts(bp," - ");
851 }
852 BN_print(bp,d);
853 BIO_puts(bp,"\n");
854 }
855 BN_exp(e,a,b,ctx);
856 BN_sub(e,e,d);
857 BN_div(a,b,e,c,ctx);
858 if(!BN_is_zero(b))
859 {
860 fprintf(stderr,"Modulo exponentiation test failed!\n");
861 return 0;
862 }
863 }
864 BN_free(a);
865 BN_free(b);
866 BN_free(c);
867 BN_free(d);
868 BN_free(e);
869 return(1);
870 }
871
872int test_exp(BIO *bp, BN_CTX *ctx)
873 {
874 BIGNUM *a,*b,*d,*e,*one;
875 int i;
876
877 a=BN_new();
878 b=BN_new();
879 d=BN_new();
880 e=BN_new();
881 one=BN_new();
882 BN_one(one);
883
884 for (i=0; i<num2; i++)
885 {
886 BN_bntest_rand(a,20+i*5,0,0); /**/
887 BN_bntest_rand(b,2+i,0,0); /**/
888
889 if (!BN_exp(d,a,b,ctx))
890 return(0);
891
892 if (bp != NULL)
893 {
894 if (!results)
895 {
896 BN_print(bp,a);
897 BIO_puts(bp," ^ ");
898 BN_print(bp,b);
899 BIO_puts(bp," - ");
900 }
901 BN_print(bp,d);
902 BIO_puts(bp,"\n");
903 }
904 BN_one(e);
905 for( ; !BN_is_zero(b) ; BN_sub(b,b,one))
906 BN_mul(e,e,a,ctx);
907 BN_sub(e,e,d);
908 if(!BN_is_zero(e))
909 {
910 fprintf(stderr,"Exponentiation test failed!\n");
911 return 0;
912 }
913 }
914 BN_free(a);
915 BN_free(b);
916 BN_free(d);
917 BN_free(e);
918 BN_free(one);
919 return(1);
920 }
921
922static void genprime_cb(int p, int n, void *arg)
923 {
924 char c='*';
925
926 if (p == 0) c='.';
927 if (p == 1) c='+';
928 if (p == 2) c='*';
929 if (p == 3) c='\n';
930 putc(c, stderr);
931 fflush(stderr);
932 (void)n;
933 (void)arg;
934 }
935
936int test_kron(BIO *bp, BN_CTX *ctx)
937 {
938 BIGNUM *a,*b,*r,*t;
939 int i;
940 int legendre, kronecker;
941 int ret = 0;
942
943 a = BN_new();
944 b = BN_new();
945 r = BN_new();
946 t = BN_new();
947 if (a == NULL || b == NULL || r == NULL || t == NULL) goto err;
948
949 /* We test BN_kronecker(a, b, ctx) just for b odd (Jacobi symbol).
950 * In this case we know that if b is prime, then BN_kronecker(a, b, ctx)
951 * is congruent to $a^{(b-1)/2}$, modulo $b$ (Legendre symbol).
952 * So we generate a random prime b and compare these values
953 * for a number of random a's. (That is, we run the Solovay-Strassen
954 * primality test to confirm that b is prime, except that we
955 * don't want to test whether b is prime but whether BN_kronecker
956 * works.) */
957
958 if (!BN_generate_prime(b, 512, 0, NULL, NULL, genprime_cb, NULL)) goto err;
959 b->neg = rand_neg();
960 putc('\n', stderr);
961
962 for (i = 0; i < num0; i++)
963 {
964 if (!BN_bntest_rand(a, 512, 0, 0)) goto err;
965 a->neg = rand_neg();
966
967 /* t := (|b|-1)/2 (note that b is odd) */
968 if (!BN_copy(t, b)) goto err;
969 t->neg = 0;
970 if (!BN_sub_word(t, 1)) goto err;
971 if (!BN_rshift1(t, t)) goto err;
972 /* r := a^t mod b */
973 b->neg=0;
974
975 if (!BN_mod_exp_recp(r, a, t, b, ctx)) goto err;
976 b->neg=1;
977
978 if (BN_is_word(r, 1))
979 legendre = 1;
980 else if (BN_is_zero(r))
981 legendre = 0;
982 else
983 {
984 if (!BN_add_word(r, 1)) goto err;
985 if (0 != BN_ucmp(r, b))
986 {
987 fprintf(stderr, "Legendre symbol computation failed\n");
988 goto err;
989 }
990 legendre = -1;
991 }
992
993 kronecker = BN_kronecker(a, b, ctx);
994 if (kronecker < -1) goto err;
995 /* we actually need BN_kronecker(a, |b|) */
996 if (a->neg && b->neg)
997 kronecker = -kronecker;
998
999 if (legendre != kronecker)
1000 {
1001 fprintf(stderr, "legendre != kronecker; a = ");
1002 BN_print_fp(stderr, a);
1003 fprintf(stderr, ", b = ");
1004 BN_print_fp(stderr, b);
1005 fprintf(stderr, "\n");
1006 goto err;
1007 }
1008
1009 putc('.', stderr);
1010 fflush(stderr);
1011 }
1012
1013 putc('\n', stderr);
1014 fflush(stderr);
1015 ret = 1;
1016 err:
1017 if (a != NULL) BN_free(a);
1018 if (b != NULL) BN_free(b);
1019 if (r != NULL) BN_free(r);
1020 if (t != NULL) BN_free(t);
1021 return ret;
1022 }
1023
1024int test_sqrt(BIO *bp, BN_CTX *ctx)
1025 {
1026 BIGNUM *a,*p,*r;
1027 int i, j;
1028 int ret = 0;
1029
1030 a = BN_new();
1031 p = BN_new();
1032 r = BN_new();
1033 if (a == NULL || p == NULL || r == NULL) goto err;
1034
1035 for (i = 0; i < 16; i++)
1036 {
1037 if (i < 8)
1038 {
1039 unsigned primes[8] = { 2, 3, 5, 7, 11, 13, 17, 19 };
1040
1041 if (!BN_set_word(p, primes[i])) goto err;
1042 }
1043 else
1044 {
1045 if (!BN_set_word(a, 32)) goto err;
1046 if (!BN_set_word(r, 2*i + 1)) goto err;
1047
1048 if (!BN_generate_prime(p, 256, 0, a, r, genprime_cb, NULL)) goto err;
1049 putc('\n', stderr);
1050 }
1051 p->neg = rand_neg();
1052
1053 for (j = 0; j < num2; j++)
1054 {
1055 /* construct 'a' such that it is a square modulo p,
1056 * but in general not a proper square and not reduced modulo p */
1057 if (!BN_bntest_rand(r, 256, 0, 3)) goto err;
1058 if (!BN_nnmod(r, r, p, ctx)) goto err;
1059 if (!BN_mod_sqr(r, r, p, ctx)) goto err;
1060 if (!BN_bntest_rand(a, 256, 0, 3)) goto err;
1061 if (!BN_nnmod(a, a, p, ctx)) goto err;
1062 if (!BN_mod_sqr(a, a, p, ctx)) goto err;
1063 if (!BN_mul(a, a, r, ctx)) goto err;
1064 if (rand_neg())
1065 if (!BN_sub(a, a, p)) goto err;
1066
1067 if (!BN_mod_sqrt(r, a, p, ctx)) goto err;
1068 if (!BN_mod_sqr(r, r, p, ctx)) goto err;
1069
1070 if (!BN_nnmod(a, a, p, ctx)) goto err;
1071
1072 if (BN_cmp(a, r) != 0)
1073 {
1074 fprintf(stderr, "BN_mod_sqrt failed: a = ");
1075 BN_print_fp(stderr, a);
1076 fprintf(stderr, ", r = ");
1077 BN_print_fp(stderr, r);
1078 fprintf(stderr, ", p = ");
1079 BN_print_fp(stderr, p);
1080 fprintf(stderr, "\n");
1081 goto err;
1082 }
1083
1084 putc('.', stderr);
1085 fflush(stderr);
1086 }
1087
1088 putc('\n', stderr);
1089 fflush(stderr);
1090 }
1091 ret = 1;
1092 err:
1093 if (a != NULL) BN_free(a);
1094 if (p != NULL) BN_free(p);
1095 if (r != NULL) BN_free(r);
1096 return ret;
1097 }
1098
1099int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_)
1100 {
1101 BIGNUM *a,*b,*c,*d;
1102 int i;
1103
1104 b=BN_new();
1105 c=BN_new();
1106 d=BN_new();
1107 BN_one(c);
1108
1109 if(a_)
1110 a=a_;
1111 else
1112 {
1113 a=BN_new();
1114 BN_bntest_rand(a,200,0,0); /**/
1115 a->neg=rand_neg();
1116 }
1117 for (i=0; i<num0; i++)
1118 {
1119 BN_lshift(b,a,i+1);
1120 BN_add(c,c,c);
1121 if (bp != NULL)
1122 {
1123 if (!results)
1124 {
1125 BN_print(bp,a);
1126 BIO_puts(bp," * ");
1127 BN_print(bp,c);
1128 BIO_puts(bp," - ");
1129 }
1130 BN_print(bp,b);
1131 BIO_puts(bp,"\n");
1132 }
1133 BN_mul(d,a,c,ctx);
1134 BN_sub(d,d,b);
1135 if(!BN_is_zero(d))
1136 {
1137 fprintf(stderr,"Left shift test failed!\n");
1138 fprintf(stderr,"a=");
1139 BN_print_fp(stderr,a);
1140 fprintf(stderr,"\nb=");
1141 BN_print_fp(stderr,b);
1142 fprintf(stderr,"\nc=");
1143 BN_print_fp(stderr,c);
1144 fprintf(stderr,"\nd=");
1145 BN_print_fp(stderr,d);
1146 fprintf(stderr,"\n");
1147 return 0;
1148 }
1149 }
1150 BN_free(a);
1151 BN_free(b);
1152 BN_free(c);
1153 BN_free(d);
1154 return(1);
1155 }
1156
1157int test_lshift1(BIO *bp)
1158 {
1159 BIGNUM *a,*b,*c;
1160 int i;
1161
1162 a=BN_new();
1163 b=BN_new();
1164 c=BN_new();
1165
1166 BN_bntest_rand(a,200,0,0); /**/
1167 a->neg=rand_neg();
1168 for (i=0; i<num0; i++)
1169 {
1170 BN_lshift1(b,a);
1171 if (bp != NULL)
1172 {
1173 if (!results)
1174 {
1175 BN_print(bp,a);
1176 BIO_puts(bp," * 2");
1177 BIO_puts(bp," - ");
1178 }
1179 BN_print(bp,b);
1180 BIO_puts(bp,"\n");
1181 }
1182 BN_add(c,a,a);
1183 BN_sub(a,b,c);
1184 if(!BN_is_zero(a))
1185 {
1186 fprintf(stderr,"Left shift one test failed!\n");
1187 return 0;
1188 }
1189
1190 BN_copy(a,b);
1191 }
1192 BN_free(a);
1193 BN_free(b);
1194 BN_free(c);
1195 return(1);
1196 }
1197
1198int test_rshift(BIO *bp,BN_CTX *ctx)
1199 {
1200 BIGNUM *a,*b,*c,*d,*e;
1201 int i;
1202
1203 a=BN_new();
1204 b=BN_new();
1205 c=BN_new();
1206 d=BN_new();
1207 e=BN_new();
1208 BN_one(c);
1209
1210 BN_bntest_rand(a,200,0,0); /**/
1211 a->neg=rand_neg();
1212 for (i=0; i<num0; i++)
1213 {
1214 BN_rshift(b,a,i+1);
1215 BN_add(c,c,c);
1216 if (bp != NULL)
1217 {
1218 if (!results)
1219 {
1220 BN_print(bp,a);
1221 BIO_puts(bp," / ");
1222 BN_print(bp,c);
1223 BIO_puts(bp," - ");
1224 }
1225 BN_print(bp,b);
1226 BIO_puts(bp,"\n");
1227 }
1228 BN_div(d,e,a,c,ctx);
1229 BN_sub(d,d,b);
1230 if(!BN_is_zero(d))
1231 {
1232 fprintf(stderr,"Right shift test failed!\n");
1233 return 0;
1234 }
1235 }
1236 BN_free(a);
1237 BN_free(b);
1238 BN_free(c);
1239 BN_free(d);
1240 BN_free(e);
1241 return(1);
1242 }
1243
1244int test_rshift1(BIO *bp)
1245 {
1246 BIGNUM *a,*b,*c;
1247 int i;
1248
1249 a=BN_new();
1250 b=BN_new();
1251 c=BN_new();
1252
1253 BN_bntest_rand(a,200,0,0); /**/
1254 a->neg=rand_neg();
1255 for (i=0; i<num0; i++)
1256 {
1257 BN_rshift1(b,a);
1258 if (bp != NULL)
1259 {
1260 if (!results)
1261 {
1262 BN_print(bp,a);
1263 BIO_puts(bp," / 2");
1264 BIO_puts(bp," - ");
1265 }
1266 BN_print(bp,b);
1267 BIO_puts(bp,"\n");
1268 }
1269 BN_sub(c,a,b);
1270 BN_sub(c,c,b);
1271 if(!BN_is_zero(c) && !BN_abs_is_word(c, 1))
1272 {
1273 fprintf(stderr,"Right shift one test failed!\n");
1274 return 0;
1275 }
1276 BN_copy(a,b);
1277 }
1278 BN_free(a);
1279 BN_free(b);
1280 BN_free(c);
1281 return(1);
1282 }
1283
1284int rand_neg(void)
1285 {
1286 static unsigned int neg=0;
1287 static int sign[8]={0,0,0,1,1,0,1,1};
1288
1289 return(sign[(neg++)%8]);
1290 }
diff --git a/src/lib/libcrypto/bn/divtest.c b/src/lib/libcrypto/bn/divtest.c
new file mode 100644
index 0000000000..d3fc688f33
--- /dev/null
+++ b/src/lib/libcrypto/bn/divtest.c
@@ -0,0 +1,41 @@
1#include <openssl/bn.h>
2#include <openssl/rand.h>
3
4static int Rand(n)
5{
6 unsigned char x[2];
7 RAND_pseudo_bytes(x,2);
8 return (x[0] + 2*x[1]);
9}
10
11static void bug(char *m, BIGNUM *a, BIGNUM *b)
12{
13 printf("%s!\na=",m);
14 BN_print_fp(stdout, a);
15 printf("\nb=");
16 BN_print_fp(stdout, b);
17 printf("\n");
18 fflush(stdout);
19}
20
21main()
22{
23 BIGNUM *a=BN_new(), *b=BN_new(), *c=BN_new(), *d=BN_new(),
24 *C=BN_new(), *D=BN_new();
25 BN_RECP_CTX *recp=BN_RECP_CTX_new();
26 BN_CTX *ctx=BN_CTX_new();
27
28 for(;;) {
29 BN_pseudo_rand(a,Rand(),0,0);
30 BN_pseudo_rand(b,Rand(),0,0);
31 if (BN_is_zero(b)) continue;
32
33 BN_RECP_CTX_set(recp,b,ctx);
34 if (BN_div(C,D,a,b,ctx) != 1)
35 bug("BN_div failed",a,b);
36 if (BN_div_recp(c,d,a,recp,ctx) != 1)
37 bug("BN_div_recp failed",a,b);
38 else if (BN_cmp(c,C) != 0 || BN_cmp(c,C) != 0)
39 bug("mismatch",a,b);
40 }
41}
diff --git a/src/lib/libcrypto/bn/exp.c b/src/lib/libcrypto/bn/exp.c
new file mode 100644
index 0000000000..4865b0ef74
--- /dev/null
+++ b/src/lib/libcrypto/bn/exp.c
@@ -0,0 +1,62 @@
1/* unused */
2
3#include <stdio.h>
4#include <openssl/tmdiff.h>
5#include "bn_lcl.h"
6
7#define SIZE 256
8#define NUM (8*8*8)
9#define MOD (8*8*8*8*8)
10
11main(argc,argv)
12int argc;
13char *argv[];
14 {
15 BN_CTX ctx;
16 BIGNUM a,b,c,r,rr,t,l;
17 int j,i,size=SIZE,num=NUM,mod=MOD;
18 char *start,*end;
19 BN_MONT_CTX mont;
20 double d,md;
21
22 BN_MONT_CTX_init(&mont);
23 BN_CTX_init(&ctx);
24 BN_init(&a);
25 BN_init(&b);
26 BN_init(&c);
27 BN_init(&r);
28
29 start=ms_time_new();
30 end=ms_time_new();
31 while (size <= 1024*8)
32 {
33 BN_rand(&a,size,0,0);
34 BN_rand(&b,size,1,0);
35 BN_rand(&c,size,0,1);
36
37 BN_mod(&a,&a,&c,&ctx);
38
39 ms_time_get(start);
40 for (i=0; i<10; i++)
41 BN_MONT_CTX_set(&mont,&c,&ctx);
42 ms_time_get(end);
43 md=ms_time_diff(start,end);
44
45 ms_time_get(start);
46 for (i=0; i<num; i++)
47 {
48 /* bn_mull(&r,&a,&b,&ctx); */
49 /* BN_sqr(&r,&a,&ctx); */
50 BN_mod_exp_mont(&r,&a,&b,&c,&ctx,&mont);
51 }
52 ms_time_get(end);
53 d=ms_time_diff(start,end)/* *50/33 */;
54 printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n",size,
55 d,num,d/num,(int)((d/num)*mod),md/10.0);
56 num/=8;
57 mod/=8;
58 if (num <= 0) num=1;
59 size*=2;
60 }
61
62 }
diff --git a/src/lib/libcrypto/bn/expspeed.c b/src/lib/libcrypto/bn/expspeed.c
new file mode 100644
index 0000000000..4d5f221f33
--- /dev/null
+++ b/src/lib/libcrypto/bn/expspeed.c
@@ -0,0 +1,353 @@
1/* unused */
2
3/* crypto/bn/expspeed.c */
4/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
5 * All rights reserved.
6 *
7 * This package is an SSL implementation written
8 * by Eric Young (eay@cryptsoft.com).
9 * The implementation was written so as to conform with Netscapes SSL.
10 *
11 * This library is free for commercial and non-commercial use as long as
12 * the following conditions are aheared to. The following conditions
13 * apply to all code found in this distribution, be it the RC4, RSA,
14 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
15 * included with this distribution is covered by the same copyright terms
16 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
17 *
18 * Copyright remains Eric Young's, and as such any Copyright notices in
19 * the code are not to be removed.
20 * If this package is used in a product, Eric Young should be given attribution
21 * as the author of the parts of the library used.
22 * This can be in the form of a textual message at program startup or
23 * in documentation (online or textual) provided with the package.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 * 3. All advertising materials mentioning features or use of this software
34 * must display the following acknowledgement:
35 * "This product includes cryptographic software written by
36 * Eric Young (eay@cryptsoft.com)"
37 * The word 'cryptographic' can be left out if the rouines from the library
38 * being used are not cryptographic related :-).
39 * 4. If you include any Windows specific code (or a derivative thereof) from
40 * the apps directory (application code) you must include an acknowledgement:
41 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
42 *
43 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * The licence and distribution terms for any publically available version or
56 * derivative of this code cannot be changed. i.e. this code cannot simply be
57 * copied and put under another distribution licence
58 * [including the GNU Public Licence.]
59 */
60
61/* most of this code has been pilfered from my libdes speed.c program */
62
63#define BASENUM 5000
64#define NUM_START 0
65
66
67/* determine timings for modexp, modmul, modsqr, gcd, Kronecker symbol,
68 * modular inverse, or modular square roots */
69#define TEST_EXP
70#undef TEST_MUL
71#undef TEST_SQR
72#undef TEST_GCD
73#undef TEST_KRON
74#undef TEST_INV
75#undef TEST_SQRT
76#define P_MOD_64 9 /* least significant 6 bits for prime to be used for BN_sqrt timings */
77
78#if defined(TEST_EXP) + defined(TEST_MUL) + defined(TEST_SQR) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) +defined(TEST_SQRT) != 1
79# error "choose one test"
80#endif
81
82#if defined(TEST_INV) || defined(TEST_SQRT)
83# define C_PRIME
84static void genprime_cb(int p, int n, void *arg);
85#endif
86
87
88
89#undef PROG
90#define PROG bnspeed_main
91
92#include <stdio.h>
93#include <stdlib.h>
94#include <signal.h>
95#include <string.h>
96#include <openssl/crypto.h>
97#include <openssl/err.h>
98#include <openssl/rand.h>
99
100#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
101#define TIMES
102#endif
103
104#ifndef _IRIX
105#include <time.h>
106#endif
107#ifdef TIMES
108#include <sys/types.h>
109#include <sys/times.h>
110#endif
111
112/* Depending on the VMS version, the tms structure is perhaps defined.
113 The __TMS macro will show if it was. If it wasn't defined, we should
114 undefine TIMES, since that tells the rest of the program how things
115 should be handled. -- Richard Levitte */
116#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
117#undef TIMES
118#endif
119
120#ifndef TIMES
121#include <sys/timeb.h>
122#endif
123
124#if defined(sun) || defined(__ultrix)
125#define _POSIX_SOURCE
126#include <limits.h>
127#include <sys/param.h>
128#endif
129
130#include <openssl/bn.h>
131#include <openssl/x509.h>
132
133/* The following if from times(3) man page. It may need to be changed */
134#ifndef HZ
135# ifndef CLK_TCK
136# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
137# define HZ 100.0
138# else /* _BSD_CLK_TCK_ */
139# define HZ ((double)_BSD_CLK_TCK_)
140# endif
141# else /* CLK_TCK */
142# define HZ ((double)CLK_TCK)
143# endif
144#endif
145
146#undef BUFSIZE
147#define BUFSIZE ((long)1024*8)
148int run=0;
149
150static double Time_F(int s);
151#define START 0
152#define STOP 1
153
154static double Time_F(int s)
155 {
156 double ret;
157#ifdef TIMES
158 static struct tms tstart,tend;
159
160 if (s == START)
161 {
162 times(&tstart);
163 return(0);
164 }
165 else
166 {
167 times(&tend);
168 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
169 return((ret < 1e-3)?1e-3:ret);
170 }
171#else /* !times() */
172 static struct timeb tstart,tend;
173 long i;
174
175 if (s == START)
176 {
177 ftime(&tstart);
178 return(0);
179 }
180 else
181 {
182 ftime(&tend);
183 i=(long)tend.millitm-(long)tstart.millitm;
184 ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
185 return((ret < 0.001)?0.001:ret);
186 }
187#endif
188 }
189
190#define NUM_SIZES 7
191#if NUM_START > NUM_SIZES
192# error "NUM_START > NUM_SIZES"
193#endif
194static int sizes[NUM_SIZES]={128,256,512,1024,2048,4096,8192};
195static int mul_c[NUM_SIZES]={8*8*8*8*8*8,8*8*8*8*8,8*8*8*8,8*8*8,8*8,8,1};
196/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
197
198#define RAND_SEED(string) { const char str[] = string; RAND_seed(string, sizeof str); }
199
200void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx);
201
202int main(int argc, char **argv)
203 {
204 BN_CTX *ctx;
205 BIGNUM *a,*b,*c,*r;
206
207#if 1
208 if (!CRYPTO_set_mem_debug_functions(0,0,0,0,0))
209 abort();
210#endif
211
212 ctx=BN_CTX_new();
213 a=BN_new();
214 b=BN_new();
215 c=BN_new();
216 r=BN_new();
217
218 while (!RAND_status())
219 /* not enough bits */
220 RAND_SEED("I demand a manual recount!");
221
222 do_mul_exp(r,a,b,c,ctx);
223 return 0;
224 }
225
226void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
227 {
228 int i,k;
229 double tm;
230 long num;
231
232 num=BASENUM;
233 for (i=NUM_START; i<NUM_SIZES; i++)
234 {
235#ifdef C_PRIME
236# ifdef TEST_SQRT
237 if (!BN_set_word(a, 64)) goto err;
238 if (!BN_set_word(b, P_MOD_64)) goto err;
239# define ADD a
240# define REM b
241# else
242# define ADD NULL
243# define REM NULL
244# endif
245 if (!BN_generate_prime(c,sizes[i],0,ADD,REM,genprime_cb,NULL)) goto err;
246 putc('\n', stderr);
247 fflush(stderr);
248#endif
249
250 for (k=0; k<num; k++)
251 {
252 if (k%50 == 0) /* Average over num/50 different choices of random numbers. */
253 {
254 if (!BN_pseudo_rand(a,sizes[i],1,0)) goto err;
255
256 if (!BN_pseudo_rand(b,sizes[i],1,0)) goto err;
257
258#ifndef C_PRIME
259 if (!BN_pseudo_rand(c,sizes[i],1,1)) goto err;
260#endif
261
262#ifdef TEST_SQRT
263 if (!BN_mod_sqr(a,a,c,ctx)) goto err;
264 if (!BN_mod_sqr(b,b,c,ctx)) goto err;
265#else
266 if (!BN_nnmod(a,a,c,ctx)) goto err;
267 if (!BN_nnmod(b,b,c,ctx)) goto err;
268#endif
269
270 if (k == 0)
271 Time_F(START);
272 }
273
274#if defined(TEST_EXP)
275 if (!BN_mod_exp(r,a,b,c,ctx)) goto err;
276#elif defined(TEST_MUL)
277 {
278 int i = 0;
279 for (i = 0; i < 50; i++)
280 if (!BN_mod_mul(r,a,b,c,ctx)) goto err;
281 }
282#elif defined(TEST_SQR)
283 {
284 int i = 0;
285 for (i = 0; i < 50; i++)
286 {
287 if (!BN_mod_sqr(r,a,c,ctx)) goto err;
288 if (!BN_mod_sqr(r,b,c,ctx)) goto err;
289 }
290 }
291#elif defined(TEST_GCD)
292 if (!BN_gcd(r,a,b,ctx)) goto err;
293 if (!BN_gcd(r,b,c,ctx)) goto err;
294 if (!BN_gcd(r,c,a,ctx)) goto err;
295#elif defined(TEST_KRON)
296 if (-2 == BN_kronecker(a,b,ctx)) goto err;
297 if (-2 == BN_kronecker(b,c,ctx)) goto err;
298 if (-2 == BN_kronecker(c,a,ctx)) goto err;
299#elif defined(TEST_INV)
300 if (!BN_mod_inverse(r,a,c,ctx)) goto err;
301 if (!BN_mod_inverse(r,b,c,ctx)) goto err;
302#else /* TEST_SQRT */
303 if (!BN_mod_sqrt(r,a,c,ctx)) goto err;
304 if (!BN_mod_sqrt(r,b,c,ctx)) goto err;
305#endif
306 }
307 tm=Time_F(STOP);
308 printf(
309#if defined(TEST_EXP)
310 "modexp %4d ^ %4d %% %4d"
311#elif defined(TEST_MUL)
312 "50*modmul %4d %4d %4d"
313#elif defined(TEST_SQR)
314 "100*modsqr %4d %4d %4d"
315#elif defined(TEST_GCD)
316 "3*gcd %4d %4d %4d"
317#elif defined(TEST_KRON)
318 "3*kronecker %4d %4d %4d"
319#elif defined(TEST_INV)
320 "2*inv %4d %4d mod %4d"
321#else /* TEST_SQRT */
322 "2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d"
323#endif
324 " -> %8.6fms %5.1f (%ld)\n",
325#ifdef TEST_SQRT
326 P_MOD_64,
327#endif
328 sizes[i],sizes[i],sizes[i],tm*1000.0/num,tm*mul_c[i]/num, num);
329 num/=7;
330 if (num <= 0) num=1;
331 }
332 return;
333
334 err:
335 ERR_print_errors_fp(stderr);
336 }
337
338
339#ifdef C_PRIME
340static void genprime_cb(int p, int n, void *arg)
341 {
342 char c='*';
343
344 if (p == 0) c='.';
345 if (p == 1) c='+';
346 if (p == 2) c='*';
347 if (p == 3) c='\n';
348 putc(c, stderr);
349 fflush(stderr);
350 (void)n;
351 (void)arg;
352 }
353#endif
diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c
new file mode 100644
index 0000000000..28aaac2ac1
--- /dev/null
+++ b/src/lib/libcrypto/bn/exptest.c
@@ -0,0 +1,201 @@
1/* crypto/bn/exptest.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62
63#include "../e_os.h"
64
65#include <openssl/bio.h>
66#include <openssl/bn.h>
67#include <openssl/rand.h>
68#include <openssl/err.h>
69
70#define NUM_BITS (BN_BITS*2)
71
72static const char rnd_seed[] = "string to make the random number generator think it has entropy";
73
74int main(int argc, char *argv[])
75 {
76 BN_CTX *ctx;
77 BIO *out=NULL;
78 int i,ret;
79 unsigned char c;
80 BIGNUM *r_mont,*r_mont_const,*r_recp,*r_simple,*a,*b,*m;
81
82 RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_rand may fail, and we don't
83 * even check its return value
84 * (which we should) */
85
86 ERR_load_BN_strings();
87
88 ctx=BN_CTX_new();
89 if (ctx == NULL) EXIT(1);
90 r_mont=BN_new();
91 r_mont_const=BN_new();
92 r_recp=BN_new();
93 r_simple=BN_new();
94 a=BN_new();
95 b=BN_new();
96 m=BN_new();
97 if ( (r_mont == NULL) || (r_recp == NULL) ||
98 (a == NULL) || (b == NULL))
99 goto err;
100
101 out=BIO_new(BIO_s_file());
102
103 if (out == NULL) EXIT(1);
104 BIO_set_fp(out,stdout,BIO_NOCLOSE);
105
106 for (i=0; i<200; i++)
107 {
108 RAND_bytes(&c,1);
109 c=(c%BN_BITS)-BN_BITS2;
110 BN_rand(a,NUM_BITS+c,0,0);
111
112 RAND_bytes(&c,1);
113 c=(c%BN_BITS)-BN_BITS2;
114 BN_rand(b,NUM_BITS+c,0,0);
115
116 RAND_bytes(&c,1);
117 c=(c%BN_BITS)-BN_BITS2;
118 BN_rand(m,NUM_BITS+c,0,1);
119
120 BN_mod(a,a,m,ctx);
121 BN_mod(b,b,m,ctx);
122
123 ret=BN_mod_exp_mont(r_mont,a,b,m,ctx,NULL);
124 if (ret <= 0)
125 {
126 printf("BN_mod_exp_mont() problems\n");
127 ERR_print_errors(out);
128 EXIT(1);
129 }
130
131 ret=BN_mod_exp_recp(r_recp,a,b,m,ctx);
132 if (ret <= 0)
133 {
134 printf("BN_mod_exp_recp() problems\n");
135 ERR_print_errors(out);
136 EXIT(1);
137 }
138
139 ret=BN_mod_exp_simple(r_simple,a,b,m,ctx);
140 if (ret <= 0)
141 {
142 printf("BN_mod_exp_simple() problems\n");
143 ERR_print_errors(out);
144 EXIT(1);
145 }
146
147 ret=BN_mod_exp_mont_consttime(r_mont_const,a,b,m,ctx,NULL);
148 if (ret <= 0)
149 {
150 printf("BN_mod_exp_mont_consttime() problems\n");
151 ERR_print_errors(out);
152 EXIT(1);
153 }
154
155 if (BN_cmp(r_simple, r_mont) == 0
156 && BN_cmp(r_simple,r_recp) == 0
157 && BN_cmp(r_simple,r_mont_const) == 0)
158 {
159 printf(".");
160 fflush(stdout);
161 }
162 else
163 {
164 if (BN_cmp(r_simple,r_mont) != 0)
165 printf("\nsimple and mont results differ\n");
166 if (BN_cmp(r_simple,r_mont) != 0)
167 printf("\nsimple and mont const time results differ\n");
168 if (BN_cmp(r_simple,r_recp) != 0)
169 printf("\nsimple and recp results differ\n");
170
171 printf("a (%3d) = ",BN_num_bits(a)); BN_print(out,a);
172 printf("\nb (%3d) = ",BN_num_bits(b)); BN_print(out,b);
173 printf("\nm (%3d) = ",BN_num_bits(m)); BN_print(out,m);
174 printf("\nsimple ="); BN_print(out,r_simple);
175 printf("\nrecp ="); BN_print(out,r_recp);
176 printf("\nmont ="); BN_print(out,r_mont);
177 printf("\nmont_ct ="); BN_print(out,r_mont_const);
178 printf("\n");
179 EXIT(1);
180 }
181 }
182 BN_free(r_mont);
183 BN_free(r_mont_const);
184 BN_free(r_recp);
185 BN_free(r_simple);
186 BN_free(a);
187 BN_free(b);
188 BN_free(m);
189 BN_CTX_free(ctx);
190 ERR_remove_state(0);
191 CRYPTO_mem_leaks(out);
192 BIO_free(out);
193 printf(" done\n");
194 EXIT(0);
195err:
196 ERR_load_crypto_strings();
197 ERR_print_errors(out);
198 EXIT(1);
199 return(1);
200 }
201
diff --git a/src/lib/libcrypto/bn/todo b/src/lib/libcrypto/bn/todo
new file mode 100644
index 0000000000..e47e381aea
--- /dev/null
+++ b/src/lib/libcrypto/bn/todo
@@ -0,0 +1,3 @@
1Cache RECP_CTX values
2make the result argument independant of the inputs.
3split up the _exp_ functions
diff --git a/src/lib/libcrypto/bn/vms-helper.c b/src/lib/libcrypto/bn/vms-helper.c
new file mode 100644
index 0000000000..4b63149bf3
--- /dev/null
+++ b/src/lib/libcrypto/bn/vms-helper.c
@@ -0,0 +1,68 @@
1/* vms-helper.c */
2/* ====================================================================
3 * Copyright (c) 1999 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include <stdio.h>
57#include "cryptlib.h"
58#include "bn_lcl.h"
59
60bn_div_words_abort(int i)
61{
62#ifdef BN_DEBUG
63#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16)
64 fprintf(stderr,"Division would overflow (%d)\n",i);
65#endif
66 abort();
67#endif
68}