summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/Makefile347
-rw-r--r--src/lib/libcrypto/bn/asm/README27
-rw-r--r--src/lib/libcrypto/bn/asm/alpha-mont.pl321
-rw-r--r--src/lib/libcrypto/bn/asm/armv4-mont.pl201
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl774
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl287
-rw-r--r--src/lib/libcrypto/bn/asm/ia64.S1555
-rw-r--r--src/lib/libcrypto/bn/asm/mips3-mont.pl327
-rw-r--r--src/lib/libcrypto/bn/asm/mips3.s2201
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2.s1618
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2W.s1605
-rw-r--r--src/lib/libcrypto/bn/asm/ppc-mont.pl323
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl1981
-rw-r--r--src/lib/libcrypto/bn/asm/ppc64-mont.pl918
-rw-r--r--src/lib/libcrypto/bn/asm/s390x-mont.pl225
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/s390x.S678
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8.S1458
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8plus.S1558
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv9-mont.pl606
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/sparcv9a-mont.pl882
-rw-r--r--src/lib/libcrypto/bn/asm/via-mont.pl242
-rw-r--r--src/lib/libcrypto/bn/asm/vms.mar6440
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86-mont.pl593
-rw-r--r--src/lib/libcrypto/bn/asm/x86.pl28
-rw-r--r--src/lib/libcrypto/bn/asm/x86/add.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl277
-rw-r--r--src/lib/libcrypto/bn/asm/x86/div.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/x86/f3
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul.pl77
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul_add.pl87
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sqr.pl60
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sub.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gcc.c606
-rwxr-xr-xsrc/lib/libcrypto/bn/asm/x86_64-mont.pl330
-rw-r--r--src/lib/libcrypto/bn/bn.h876
-rw-r--r--src/lib/libcrypto/bn/bn.mul19
-rw-r--r--src/lib/libcrypto/bn/bn_add.c313
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c1030
-rw-r--r--src/lib/libcrypto/bn/bn_blind.c385
-rw-r--r--src/lib/libcrypto/bn/bn_const.c402
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c454
-rw-r--r--src/lib/libcrypto/bn/bn_depr.c112
-rw-r--r--src/lib/libcrypto/bn/bn_div.c650
-rw-r--r--src/lib/libcrypto/bn/bn_err.c150
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c991
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c312
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c654
-rw-r--r--src/lib/libcrypto/bn/bn_gf2m.c1035
-rw-r--r--src/lib/libcrypto/bn/bn_kron.c184
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h491
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c845
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c301
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c567
-rw-r--r--src/lib/libcrypto/bn/bn_mpi.c130
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c1166
-rw-r--r--src/lib/libcrypto/bn/bn_nist.c844
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c494
-rw-r--r--src/lib/libcrypto/bn/bn_prime.h327
-rw-r--r--src/lib/libcrypto/bn/bn_prime.pl119
-rw-r--r--src/lib/libcrypto/bn/bn_print.c359
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c305
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c234
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c220
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c294
-rw-r--r--src/lib/libcrypto/bn/bn_sqrt.c393
-rw-r--r--src/lib/libcrypto/bn/bn_word.c247
-rw-r--r--src/lib/libcrypto/bn/bnspeed.c233
-rw-r--r--src/lib/libcrypto/bn/bntest.c2013
-rw-r--r--src/lib/libcrypto/bn/divtest.c41
-rw-r--r--src/lib/libcrypto/bn/exp.c62
-rw-r--r--src/lib/libcrypto/bn/expspeed.c353
-rw-r--r--src/lib/libcrypto/bn/exptest.c204
-rw-r--r--src/lib/libcrypto/bn/todo3
-rw-r--r--src/lib/libcrypto/bn/vms-helper.c68
74 files changed, 0 insertions, 44682 deletions
diff --git a/src/lib/libcrypto/bn/Makefile b/src/lib/libcrypto/bn/Makefile
deleted file mode 100644
index aabc4f56b8..0000000000
--- a/src/lib/libcrypto/bn/Makefile
+++ /dev/null
@@ -1,347 +0,0 @@
1#
2# OpenSSL/crypto/bn/Makefile
3#
4
5DIR= bn
6TOP= ../..
7CC= cc
8CPP= $(CC) -E
9INCLUDES= -I.. -I$(TOP) -I../../include
10CFLAG=-g
11MAKEFILE= Makefile
12AR= ar r
13
14BN_ASM= bn_asm.o
15
16CFLAGS= $(INCLUDES) $(CFLAG)
17ASFLAGS= $(INCLUDES) $(ASFLAG)
18AFLAGS= $(ASFLAGS)
19
20GENERAL=Makefile
21TEST=bntest.c exptest.c
22APPS=
23
24LIB=$(TOP)/libcrypto.a
25LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
26 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
27 bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
28 bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
29 bn_depr.c bn_const.c
30
31LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
32 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
33 bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
34 bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \
35 bn_depr.o bn_const.o
36
37SRC= $(LIBSRC)
38
39EXHEADER= bn.h
40HEADER= bn_lcl.h bn_prime.h $(EXHEADER)
41
42ALL= $(GENERAL) $(SRC) $(HEADER)
43
44top:
45 (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
46
47all: lib
48
49bn_prime.h: bn_prime.pl
50 $(PERL) bn_prime.pl >bn_prime.h
51
52divtest: divtest.c ../../libcrypto.a
53 cc -I../../include divtest.c -o divtest ../../libcrypto.a
54
55bnbug: bnbug.c ../../libcrypto.a top
56 cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a
57
58lib: $(LIBOBJ)
59 $(AR) $(LIB) $(LIBOBJ)
60 $(RANLIB) $(LIB) || echo Never mind.
61 @touch lib
62
63bn-586.s: asm/bn-586.pl ../perlasm/x86asm.pl
64 $(PERL) asm/bn-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
65co-586.s: asm/co-586.pl ../perlasm/x86asm.pl
66 $(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
67x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl
68 $(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
69
70sparcv8.o: asm/sparcv8.S
71 $(CC) $(CFLAGS) -c asm/sparcv8.S
72bn-sparcv9.o: asm/sparcv8plus.S
73 $(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S
74sparcv9a-mont.s: asm/sparcv9a-mont.pl
75 $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
76sparcv9-mont.s: asm/sparcv9-mont.pl
77 $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
78
79bn-mips3.o: asm/mips3.s
80 @if [ "$(CC)" = "gcc" ]; then \
81 ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \
82 as -$$ABI -O -o $@ asm/mips3.s; \
83 else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi
84
85bn-s390x.o: asm/s390x.S
86 $(CC) $(CFLAGS) -c -o $@ asm/s390x.S
87
88x86_64-gcc.o: asm/x86_64-gcc.c
89 $(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c
90x86_64-mont.s: asm/x86_64-mont.pl
91 $(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@
92
93bn-ia64.s: asm/ia64.S
94 $(CC) $(CFLAGS) -E asm/ia64.S > $@
95
96# GNU assembler fails to compile PA-RISC2 modules, insist on calling
97# vendor assembler...
98pa-risc2W.o: asm/pa-risc2W.s
99 /usr/ccs/bin/as -o pa-risc2W.o asm/pa-risc2W.s
100pa-risc2.o: asm/pa-risc2.s
101 /usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s
102
103# ppc - AIX, Linux, MacOS X...
104bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@
105ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
106
107alpha-mont.s: asm/alpha-mont.pl
108 $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
109
110# GNU make "catch all"
111%-mont.s: asm/%-mont.pl; $(PERL) $< $(CFLAGS) > $@
112
113files:
114 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
115
116links:
117 @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
118 @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
119 @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
120
121install:
122 @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
123 @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
124 do \
125 (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
126 chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
127 done;
128
129exptest:
130 rm -f exptest
131 gcc -I../../include -g2 -ggdb -o exptest exptest.c ../../libcrypto.a
132
133div:
134 rm -f a.out
135 gcc -I.. -g div.c ../../libcrypto.a
136
137tags:
138 ctags $(SRC)
139
140tests:
141
142lint:
143 lint -DLINT $(INCLUDES) $(SRC)>fluff
144
145depend:
146 @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
147 $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
148
149dclean:
150 $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
151 mv -f Makefile.new $(MAKEFILE)
152
153clean:
154 rm -f *.s *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
155
156# DO NOT DELETE THIS LINE -- make depend depends on it.
157
158bn_add.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
159bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
160bn_add.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
161bn_add.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
162bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
163bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
164bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_add.c bn_lcl.h
165bn_asm.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
166bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
167bn_asm.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
168bn_asm.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
169bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
170bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
171bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_asm.c bn_lcl.h
172bn_blind.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
173bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
174bn_blind.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
175bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
176bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
177bn_blind.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
178bn_blind.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_blind.c bn_lcl.h
179bn_const.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
180bn_const.o: ../../include/openssl/opensslconf.h
181bn_const.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
182bn_const.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
183bn_const.o: ../../include/openssl/symhacks.h bn.h bn_const.c
184bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
185bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
186bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
187bn_ctx.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
188bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
189bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
190bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_ctx.c bn_lcl.h
191bn_depr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
192bn_depr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
193bn_depr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
194bn_depr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
195bn_depr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
196bn_depr.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
197bn_depr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
198bn_depr.o: ../cryptlib.h bn_depr.c bn_lcl.h
199bn_div.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
200bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
201bn_div.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
202bn_div.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
203bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
204bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
205bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_div.c bn_lcl.h
206bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
207bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
208bn_err.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
209bn_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
210bn_err.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
211bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
212bn_err.o: bn_err.c
213bn_exp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
214bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
215bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
216bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
217bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
218bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
219bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp.c bn_lcl.h
220bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
221bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
222bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
223bn_exp2.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
224bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
225bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
226bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp2.c bn_lcl.h
227bn_gcd.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
228bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
229bn_gcd.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
230bn_gcd.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
231bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
232bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
233bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_gcd.c bn_lcl.h
234bn_gf2m.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
235bn_gf2m.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
236bn_gf2m.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
237bn_gf2m.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
238bn_gf2m.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
239bn_gf2m.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
240bn_gf2m.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_gf2m.c bn_lcl.h
241bn_kron.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
242bn_kron.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
243bn_kron.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
244bn_kron.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
245bn_kron.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
246bn_kron.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
247bn_kron.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_kron.c bn_lcl.h
248bn_lib.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
249bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
250bn_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
251bn_lib.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
252bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
253bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
254bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_lib.c
255bn_mod.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
256bn_mod.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
257bn_mod.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
258bn_mod.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
259bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
260bn_mod.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
261bn_mod.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mod.c
262bn_mont.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
263bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
264bn_mont.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
265bn_mont.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
266bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
267bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
268bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mont.c
269bn_mpi.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
270bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
271bn_mpi.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
272bn_mpi.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
273bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
274bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
275bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mpi.c
276bn_mul.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
277bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
278bn_mul.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
279bn_mul.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
280bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
281bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
282bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_mul.c
283bn_nist.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
284bn_nist.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
285bn_nist.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
286bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
287bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
288bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
289bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c
290bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
291bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
292bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
293bn_prime.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
294bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
295bn_prime.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
296bn_prime.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
297bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.c bn_prime.h
298bn_print.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
299bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
300bn_print.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
301bn_print.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
302bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
303bn_print.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
304bn_print.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_print.c
305bn_rand.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
306bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
307bn_rand.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
308bn_rand.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
309bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
310bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
311bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
312bn_rand.o: ../cryptlib.h bn_lcl.h bn_rand.c
313bn_recp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
314bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
315bn_recp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
316bn_recp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
317bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
318bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
319bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_recp.c
320bn_shift.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
321bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
322bn_shift.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
323bn_shift.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
324bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
325bn_shift.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
326bn_shift.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_shift.c
327bn_sqr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
328bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
329bn_sqr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
330bn_sqr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
331bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
332bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
333bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_sqr.c
334bn_sqrt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
335bn_sqrt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
336bn_sqrt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
337bn_sqrt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
338bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
339bn_sqrt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
340bn_sqrt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_sqrt.c
341bn_word.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
342bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
343bn_word.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
344bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
345bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
346bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
347bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c
diff --git a/src/lib/libcrypto/bn/asm/README b/src/lib/libcrypto/bn/asm/README
deleted file mode 100644
index b0f3a68a06..0000000000
--- a/src/lib/libcrypto/bn/asm/README
+++ /dev/null
@@ -1,27 +0,0 @@
1<OBSOLETE>
2
3All assember in this directory are just version of the file
4crypto/bn/bn_asm.c.
5
6Quite a few of these files are just the assember output from gcc since on
7quite a few machines they are 2 times faster than the system compiler.
8
9For the x86, I have hand written assember because of the bad job all
10compilers seem to do on it. This normally gives a 2 time speed up in the RSA
11routines.
12
13For the DEC alpha, I also hand wrote the assember (except the division which
14is just the output from the C compiler pasted on the end of the file).
15On the 2 alpha C compilers I had access to, it was not possible to do
1664b x 64b -> 128b calculations (both long and the long long data types
17were 64 bits). So the hand assember gives access to the 128 bit result and
18a 2 times speedup :-).
19
20There are 3 versions of assember for the HP PA-RISC.
21
22pa-risc.s is the origional one which works fine and generated using gcc :-)
23
24pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations
25by Chris Ruemmler from HP (with some help from the HP C compiler).
26
27</OBSOLETE>
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl
deleted file mode 100644
index 03596e2014..0000000000
--- a/src/lib/libcrypto/bn/asm/alpha-mont.pl
+++ /dev/null
@@ -1,321 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# On 21264 RSA sign performance improves by 70/35/20/15 percent for
11# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
12# instructed to '-tune host' code with in-line assembler. Other
13# benchmarks improve by 15-20%. To anchor it to something else, the
14# code provides approximately the same performance per GHz as AMD64.
15# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
16# difference.
17
18# int bn_mul_mont(
19$rp="a0"; # BN_ULONG *rp,
20$ap="a1"; # const BN_ULONG *ap,
21$bp="a2"; # const BN_ULONG *bp,
22$np="a3"; # const BN_ULONG *np,
23$n0="a4"; # const BN_ULONG *n0,
24$num="a5"; # int num);
25
26$lo0="t0";
27$hi0="t1";
28$lo1="t2";
29$hi1="t3";
30$aj="t4";
31$bi="t5";
32$nj="t6";
33$tp="t7";
34$alo="t8";
35$ahi="t9";
36$nlo="t10";
37$nhi="t11";
38$tj="t12";
39$i="s3";
40$j="s4";
41$m1="s5";
42
43$code=<<___;
44#ifdef __linux__
45#include <asm/regdef.h>
46#else
47#include <asm.h>
48#include <regdef.h>
49#endif
50
51.text
52
53.set noat
54.set noreorder
55
56.globl bn_mul_mont
57.align 5
58.ent bn_mul_mont
59bn_mul_mont:
60 lda sp,-48(sp)
61 stq ra,0(sp)
62 stq s3,8(sp)
63 stq s4,16(sp)
64 stq s5,24(sp)
65 stq fp,32(sp)
66 mov sp,fp
67 .mask 0x0400f000,-48
68 .frame fp,48,ra
69 .prologue 0
70
71 .align 4
72 .set reorder
73 sextl $num,$num
74 mov 0,v0
75 cmplt $num,4,AT
76 bne AT,.Lexit
77
78 ldq $hi0,0($ap) # ap[0]
79 s8addq $num,16,AT
80 ldq $aj,8($ap)
81 subq sp,AT,sp
82 ldq $bi,0($bp) # bp[0]
83 lda AT,-4096(zero) # mov -4096,AT
84 ldq $n0,0($n0)
85 and sp,AT,sp
86
87 mulq $hi0,$bi,$lo0
88 ldq $hi1,0($np) # np[0]
89 umulh $hi0,$bi,$hi0
90 ldq $nj,8($np)
91
92 mulq $lo0,$n0,$m1
93
94 mulq $hi1,$m1,$lo1
95 umulh $hi1,$m1,$hi1
96
97 addq $lo1,$lo0,$lo1
98 cmpult $lo1,$lo0,AT
99 addq $hi1,AT,$hi1
100
101 mulq $aj,$bi,$alo
102 mov 2,$j
103 umulh $aj,$bi,$ahi
104 mov sp,$tp
105
106 mulq $nj,$m1,$nlo
107 s8addq $j,$ap,$aj
108 umulh $nj,$m1,$nhi
109 s8addq $j,$np,$nj
110.align 4
111.L1st:
112 .set noreorder
113 ldq $aj,0($aj)
114 addl $j,1,$j
115 ldq $nj,0($nj)
116 lda $tp,8($tp)
117
118 addq $alo,$hi0,$lo0
119 mulq $aj,$bi,$alo
120 cmpult $lo0,$hi0,AT
121 addq $nlo,$hi1,$lo1
122
123 mulq $nj,$m1,$nlo
124 addq $ahi,AT,$hi0
125 cmpult $lo1,$hi1,v0
126 cmplt $j,$num,$tj
127
128 umulh $aj,$bi,$ahi
129 addq $nhi,v0,$hi1
130 addq $lo1,$lo0,$lo1
131 s8addq $j,$ap,$aj
132
133 umulh $nj,$m1,$nhi
134 cmpult $lo1,$lo0,v0
135 addq $hi1,v0,$hi1
136 s8addq $j,$np,$nj
137
138 stq $lo1,-8($tp)
139 nop
140 unop
141 bne $tj,.L1st
142 .set reorder
143
144 addq $alo,$hi0,$lo0
145 addq $nlo,$hi1,$lo1
146 cmpult $lo0,$hi0,AT
147 cmpult $lo1,$hi1,v0
148 addq $ahi,AT,$hi0
149 addq $nhi,v0,$hi1
150
151 addq $lo1,$lo0,$lo1
152 cmpult $lo1,$lo0,v0
153 addq $hi1,v0,$hi1
154
155 stq $lo1,0($tp)
156
157 addq $hi1,$hi0,$hi1
158 cmpult $hi1,$hi0,AT
159 stq $hi1,8($tp)
160 stq AT,16($tp)
161
162 mov 1,$i
163.align 4
164.Louter:
165 s8addq $i,$bp,$bi
166 ldq $hi0,0($ap)
167 ldq $aj,8($ap)
168 ldq $bi,0($bi)
169 ldq $hi1,0($np)
170 ldq $nj,8($np)
171 ldq $tj,0(sp)
172
173 mulq $hi0,$bi,$lo0
174 umulh $hi0,$bi,$hi0
175
176 addq $lo0,$tj,$lo0
177 cmpult $lo0,$tj,AT
178 addq $hi0,AT,$hi0
179
180 mulq $lo0,$n0,$m1
181
182 mulq $hi1,$m1,$lo1
183 umulh $hi1,$m1,$hi1
184
185 addq $lo1,$lo0,$lo1
186 cmpult $lo1,$lo0,AT
187 mov 2,$j
188 addq $hi1,AT,$hi1
189
190 mulq $aj,$bi,$alo
191 mov sp,$tp
192 umulh $aj,$bi,$ahi
193
194 mulq $nj,$m1,$nlo
195 s8addq $j,$ap,$aj
196 umulh $nj,$m1,$nhi
197.align 4
198.Linner:
199 .set noreorder
200 ldq $tj,8($tp) #L0
201 nop #U1
202 ldq $aj,0($aj) #L1
203 s8addq $j,$np,$nj #U0
204
205 ldq $nj,0($nj) #L0
206 nop #U1
207 addq $alo,$hi0,$lo0 #L1
208 lda $tp,8($tp)
209
210 mulq $aj,$bi,$alo #U1
211 cmpult $lo0,$hi0,AT #L0
212 addq $nlo,$hi1,$lo1 #L1
213 addl $j,1,$j
214
215 mulq $nj,$m1,$nlo #U1
216 addq $ahi,AT,$hi0 #L0
217 addq $lo0,$tj,$lo0 #L1
218 cmpult $lo1,$hi1,v0 #U0
219
220 umulh $aj,$bi,$ahi #U1
221 cmpult $lo0,$tj,AT #L0
222 addq $lo1,$lo0,$lo1 #L1
223 addq $nhi,v0,$hi1 #U0
224
225 umulh $nj,$m1,$nhi #U1
226 s8addq $j,$ap,$aj #L0
227 cmpult $lo1,$lo0,v0 #L1
228 cmplt $j,$num,$tj #U0 # borrow $tj
229
230 addq $hi0,AT,$hi0 #L0
231 addq $hi1,v0,$hi1 #U1
232 stq $lo1,-8($tp) #L1
233 bne $tj,.Linner #U0
234 .set reorder
235
236 ldq $tj,8($tp)
237 addq $alo,$hi0,$lo0
238 addq $nlo,$hi1,$lo1
239 cmpult $lo0,$hi0,AT
240 cmpult $lo1,$hi1,v0
241 addq $ahi,AT,$hi0
242 addq $nhi,v0,$hi1
243
244 addq $lo0,$tj,$lo0
245 cmpult $lo0,$tj,AT
246 addq $hi0,AT,$hi0
247
248 ldq $tj,16($tp)
249 addq $lo1,$lo0,$j
250 cmpult $j,$lo0,v0
251 addq $hi1,v0,$hi1
252
253 addq $hi1,$hi0,$lo1
254 stq $j,0($tp)
255 cmpult $lo1,$hi0,$hi1
256 addq $lo1,$tj,$lo1
257 cmpult $lo1,$tj,AT
258 addl $i,1,$i
259 addq $hi1,AT,$hi1
260 stq $lo1,8($tp)
261 cmplt $i,$num,$tj # borrow $tj
262 stq $hi1,16($tp)
263 bne $tj,.Louter
264
265 s8addq $num,sp,$tj # &tp[num]
266 mov $rp,$bp # put rp aside
267 mov sp,$tp
268 mov sp,$ap
269 mov 0,$hi0 # clear borrow bit
270
271.align 4
272.Lsub: ldq $lo0,0($tp)
273 ldq $lo1,0($np)
274 lda $tp,8($tp)
275 lda $np,8($np)
276 subq $lo0,$lo1,$lo1 # tp[i]-np[i]
277 cmpult $lo0,$lo1,AT
278 subq $lo1,$hi0,$lo0
279 cmpult $lo1,$lo0,$hi0
280 or $hi0,AT,$hi0
281 stq $lo0,0($rp)
282 cmpult $tp,$tj,v0
283 lda $rp,8($rp)
284 bne v0,.Lsub
285
286 subq $hi1,$hi0,$hi0 # handle upmost overflow bit
287 mov sp,$tp
288 mov $bp,$rp # restore rp
289
290 and sp,$hi0,$ap
291 bic $bp,$hi0,$bp
292 bis $bp,$ap,$ap # ap=borrow?tp:rp
293
294.align 4
295.Lcopy: ldq $aj,0($ap) # copy or in-place refresh
296 lda $tp,8($tp)
297 lda $rp,8($rp)
298 lda $ap,8($ap)
299 stq zero,-8($tp) # zap tp
300 cmpult $tp,$tj,AT
301 stq $aj,-8($rp)
302 bne AT,.Lcopy
303 mov 1,v0
304
305.Lexit:
306 .set noreorder
307 mov fp,sp
308 /*ldq ra,0(sp)*/
309 ldq s3,8(sp)
310 ldq s4,16(sp)
311 ldq s5,24(sp)
312 ldq fp,32(sp)
313 lda sp,48(sp)
314 ret (ra)
315.end bn_mul_mont
316.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
317.align 2
318___
319
320print $code;
321close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl
deleted file mode 100644
index 14e0d2d1dd..0000000000
--- a/src/lib/libcrypto/bn/asm/armv4-mont.pl
+++ /dev/null
@@ -1,201 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# January 2007.
11
12# Montgomery multiplication for ARMv4.
13#
14# Performance improvement naturally varies among CPU implementations
15# and compilers. The code was observed to provide +65-35% improvement
16# [depending on key length, less for longer keys] on ARM920T, and
17# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
18# base and compiler generated code with in-lined umull and even umlal
19# instructions. The latter means that this code didn't really have an
20# "advantage" of utilizing some "secret" instruction.
21#
22# The code is interoperable with Thumb ISA and is rather compact, less
23# than 1/2KB. Windows CE port would be trivial, as it's exclusively
24# about decorations, ABI and instruction syntax are identical.
25
26$num="r0"; # starts as num argument, but holds &tp[num-1]
27$ap="r1";
28$bp="r2"; $bi="r2"; $rp="r2";
29$np="r3";
30$tp="r4";
31$aj="r5";
32$nj="r6";
33$tj="r7";
34$n0="r8";
35########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer
36$alo="r10"; # sl, gcc uses it to keep @GOT
37$ahi="r11"; # fp
38$nlo="r12"; # ip
39########### # r13 is stack pointer
40$nhi="r14"; # lr
41########### # r15 is program counter
42
43#### argument block layout relative to &tp[num-1], a.k.a. $num
44$_rp="$num,#12*4";
45# ap permanently resides in r1
46$_bp="$num,#13*4";
47# np permanently resides in r3
48$_n0="$num,#14*4";
49$_num="$num,#15*4"; $_bpend=$_num;
50
51$code=<<___;
52.text
53
54.global bn_mul_mont
55.type bn_mul_mont,%function
56
57.align 2
58bn_mul_mont:
59 stmdb sp!,{r0,r2} @ sp points at argument block
60 ldr $num,[sp,#3*4] @ load num
61 cmp $num,#2
62 movlt r0,#0
63 addlt sp,sp,#2*4
64 blt .Labrt
65
66 stmdb sp!,{r4-r12,lr} @ save 10 registers
67
68 mov $num,$num,lsl#2 @ rescale $num for byte count
69 sub sp,sp,$num @ alloca(4*num)
70 sub sp,sp,#4 @ +extra dword
71 sub $num,$num,#4 @ "num=num-1"
72 add $tp,$bp,$num @ &bp[num-1]
73
74 add $num,sp,$num @ $num to point at &tp[num-1]
75 ldr $n0,[$_n0] @ &n0
76 ldr $bi,[$bp] @ bp[0]
77 ldr $aj,[$ap],#4 @ ap[0],ap++
78 ldr $nj,[$np],#4 @ np[0],np++
79 ldr $n0,[$n0] @ *n0
80 str $tp,[$_bpend] @ save &bp[num]
81
82 umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0]
83 str $n0,[$_n0] @ save n0 value
84 mul $n0,$alo,$n0 @ "tp[0]"*n0
85 mov $nlo,#0
86 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]"
87 mov $tp,sp
88
89.L1st:
90 ldr $aj,[$ap],#4 @ ap[j],ap++
91 mov $alo,$ahi
92 mov $ahi,#0
93 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
94 ldr $nj,[$np],#4 @ np[j],np++
95 mov $nhi,#0
96 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
97 adds $nlo,$nlo,$alo
98 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
99 adc $nlo,$nhi,#0
100 cmp $tp,$num
101 bne .L1st
102
103 adds $nlo,$nlo,$ahi
104 mov $nhi,#0
105 adc $nhi,$nhi,#0
106 ldr $tp,[$_bp] @ restore bp
107 str $nlo,[$num] @ tp[num-1]=
108 ldr $n0,[$_n0] @ restore n0
109 str $nhi,[$num,#4] @ tp[num]=
110
111.Louter:
112 sub $tj,$num,sp @ "original" $num-1 value
113 sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
114 sub $np,$np,$tj @ "rewind" np to &np[1]
115 ldr $bi,[$tp,#4]! @ *(++bp)
116 ldr $aj,[$ap,#-4] @ ap[0]
117 ldr $nj,[$np,#-4] @ np[0]
118 ldr $alo,[sp] @ tp[0]
119 ldr $tj,[sp,#4] @ tp[1]
120
121 mov $ahi,#0
122 umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0]
123 str $tp,[$_bp] @ save bp
124 mul $n0,$alo,$n0
125 mov $nlo,#0
126 umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]"
127 mov $tp,sp
128
129.Linner:
130 ldr $aj,[$ap],#4 @ ap[j],ap++
131 adds $alo,$ahi,$tj @ +=tp[j]
132 mov $ahi,#0
133 umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
134 ldr $nj,[$np],#4 @ np[j],np++
135 mov $nhi,#0
136 umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
137 ldr $tj,[$tp,#8] @ tp[j+1]
138 adc $ahi,$ahi,#0
139 adds $nlo,$nlo,$alo
140 str $nlo,[$tp],#4 @ tp[j-1]=,tp++
141 adc $nlo,$nhi,#0
142 cmp $tp,$num
143 bne .Linner
144
145 adds $nlo,$nlo,$ahi
146 mov $nhi,#0
147 adc $nhi,$nhi,#0
148 adds $nlo,$nlo,$tj
149 adc $nhi,$nhi,#0
150 ldr $tp,[$_bp] @ restore bp
151 ldr $tj,[$_bpend] @ restore &bp[num]
152 str $nlo,[$num] @ tp[num-1]=
153 ldr $n0,[$_n0] @ restore n0
154 str $nhi,[$num,#4] @ tp[num]=
155
156 cmp $tp,$tj
157 bne .Louter
158
159 ldr $rp,[$_rp] @ pull rp
160 add $num,$num,#4 @ $num to point at &tp[num]
161 sub $aj,$num,sp @ "original" num value
162 mov $tp,sp @ "rewind" $tp
163 mov $ap,$tp @ "borrow" $ap
164 sub $np,$np,$aj @ "rewind" $np to &np[0]
165
166 subs $tj,$tj,$tj @ "clear" carry flag
167.Lsub: ldr $tj,[$tp],#4
168 ldr $nj,[$np],#4
169 sbcs $tj,$tj,$nj @ tp[j]-np[j]
170 str $tj,[$rp],#4 @ rp[j]=
171 teq $tp,$num @ preserve carry
172 bne .Lsub
173 sbcs $nhi,$nhi,#0 @ upmost carry
174 mov $tp,sp @ "rewind" $tp
175 sub $rp,$rp,$aj @ "rewind" $rp
176
177 and $ap,$tp,$nhi
178 bic $np,$rp,$nhi
179 orr $ap,$ap,$np @ ap=borrow?tp:rp
180
181.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh
182 str sp,[$tp],#4 @ zap tp
183 str $tj,[$rp],#4
184 cmp $tp,$num
185 bne .Lcopy
186
187 add sp,$num,#4 @ skip over tp[num+1]
188 ldmia sp!,{r4-r12,lr} @ restore registers
189 add sp,sp,#2*4 @ skip over {r0,r2}
190 mov r0,#1
191.Labrt: tst lr,#1
192 moveq pc,lr @ be binary compatible with V4, yet
193 bx lr @ interoperable with Thumb ISA:-)
194.size bn_mul_mont,.-bn_mul_mont
195.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
196.align 2
197___
198
199$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
200print $code;
201close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
deleted file mode 100644
index 332ef3e91d..0000000000
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ /dev/null
@@ -1,774 +0,0 @@
1#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],$0);
8
9$sse2=0;
10for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
11
12&external_label("OPENSSL_ia32cap_P") if ($sse2);
13
14&bn_mul_add_words("bn_mul_add_words");
15&bn_mul_words("bn_mul_words");
16&bn_sqr_words("bn_sqr_words");
17&bn_div_words("bn_div_words");
18&bn_add_words("bn_add_words");
19&bn_sub_words("bn_sub_words");
20&bn_sub_part_words("bn_sub_part_words");
21
22&asm_finish();
23
24sub bn_mul_add_words
25 {
26 local($name)=@_;
27
28 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
29
30 $r="eax";
31 $a="edx";
32 $c="ecx";
33
34 if ($sse2) {
35 &picmeup("eax","OPENSSL_ia32cap_P");
36 &bt(&DWP(0,"eax"),26);
37 &jnc(&label("maw_non_sse2"));
38
39 &mov($r,&wparam(0));
40 &mov($a,&wparam(1));
41 &mov($c,&wparam(2));
42 &movd("mm0",&wparam(3)); # mm0 = w
43 &pxor("mm1","mm1"); # mm1 = carry_in
44 &jmp(&label("maw_sse2_entry"));
45
46 &set_label("maw_sse2_unrolled",16);
47 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
48 &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
49 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
50 &pmuludq("mm2","mm0"); # mm2 = w*a[0]
51 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
52 &pmuludq("mm4","mm0"); # mm4 = w*a[1]
53 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
54 &pmuludq("mm6","mm0"); # mm6 = w*a[2]
55 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
56 &pmuludq("mm7","mm0"); # mm7 = w*a[3]
57 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
58 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
59 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
60 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
61 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
62 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
63 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
64 &movd(&DWP(0,$r,"",0),"mm1");
65 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
66 &pmuludq("mm2","mm0"); # mm2 = w*a[4]
67 &psrlq("mm1",32); # mm1 = carry0
68 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
69 &pmuludq("mm4","mm0"); # mm4 = w*a[5]
70 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
71 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
72 &pmuludq("mm6","mm0"); # mm6 = w*a[6]
73 &movd(&DWP(4,$r,"",0),"mm1");
74 &psrlq("mm1",32); # mm1 = carry1
75 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
76 &add($a,32);
77 &pmuludq("mm3","mm0"); # mm3 = w*a[7]
78 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
79 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
80 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
81 &movd(&DWP(8,$r,"",0),"mm1");
82 &psrlq("mm1",32); # mm1 = carry2
83 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
84 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
85 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
86 &movd(&DWP(12,$r,"",0),"mm1");
87 &psrlq("mm1",32); # mm1 = carry3
88 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
89 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
90 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
91 &movd(&DWP(16,$r,"",0),"mm1");
92 &psrlq("mm1",32); # mm1 = carry4
93 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
94 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
95 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
96 &movd(&DWP(20,$r,"",0),"mm1");
97 &psrlq("mm1",32); # mm1 = carry5
98 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
99 &movd(&DWP(24,$r,"",0),"mm1");
100 &psrlq("mm1",32); # mm1 = carry6
101 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
102 &movd(&DWP(28,$r,"",0),"mm1");
103 &lea($r,&DWP(32,$r));
104 &psrlq("mm1",32); # mm1 = carry_out
105
106 &sub($c,8);
107 &jz(&label("maw_sse2_exit"));
108 &set_label("maw_sse2_entry");
109 &test($c,0xfffffff8);
110 &jnz(&label("maw_sse2_unrolled"));
111
112 &set_label("maw_sse2_loop",4);
113 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
114 &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
115 &pmuludq("mm2","mm0"); # a[i] *= w
116 &lea($a,&DWP(4,$a));
117 &paddq("mm1","mm3"); # carry += r[i]
118 &paddq("mm1","mm2"); # carry += a[i]*w
119 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
120 &sub($c,1);
121 &psrlq("mm1",32); # carry = carry_high
122 &lea($r,&DWP(4,$r));
123 &jnz(&label("maw_sse2_loop"));
124 &set_label("maw_sse2_exit");
125 &movd("eax","mm1"); # c = carry_out
126 &emms();
127 &ret();
128
129 &set_label("maw_non_sse2",16);
130 }
131
132 # function_begin prologue
133 &push("ebp");
134 &push("ebx");
135 &push("esi");
136 &push("edi");
137
138 &comment("");
139 $Low="eax";
140 $High="edx";
141 $a="ebx";
142 $w="ebp";
143 $r="edi";
144 $c="esi";
145
146 &xor($c,$c); # clear carry
147 &mov($r,&wparam(0)); #
148
149 &mov("ecx",&wparam(2)); #
150 &mov($a,&wparam(1)); #
151
152 &and("ecx",0xfffffff8); # num / 8
153 &mov($w,&wparam(3)); #
154
155 &push("ecx"); # Up the stack for a tmp variable
156
157 &jz(&label("maw_finish"));
158
159 &set_label("maw_loop",16);
160
161 for ($i=0; $i<32; $i+=4)
162 {
163 &comment("Round $i");
164
165 &mov("eax",&DWP($i,$a)); # *a
166 &mul($w); # *a * w
167 &add("eax",$c); # L(t)+= c
168 &adc("edx",0); # H(t)+=carry
169 &add("eax",&DWP($i,$r)); # L(t)+= *r
170 &adc("edx",0); # H(t)+=carry
171 &mov(&DWP($i,$r),"eax"); # *r= L(t);
172 &mov($c,"edx"); # c= H(t);
173 }
174
175 &comment("");
176 &sub("ecx",8);
177 &lea($a,&DWP(32,$a));
178 &lea($r,&DWP(32,$r));
179 &jnz(&label("maw_loop"));
180
181 &set_label("maw_finish",0);
182 &mov("ecx",&wparam(2)); # get num
183 &and("ecx",7);
184 &jnz(&label("maw_finish2")); # helps branch prediction
185 &jmp(&label("maw_end"));
186
187 &set_label("maw_finish2",1);
188 for ($i=0; $i<7; $i++)
189 {
190 &comment("Tail Round $i");
191 &mov("eax",&DWP($i*4,$a)); # *a
192 &mul($w); # *a * w
193 &add("eax",$c); # L(t)+=c
194 &adc("edx",0); # H(t)+=carry
195 &add("eax",&DWP($i*4,$r)); # L(t)+= *r
196 &adc("edx",0); # H(t)+=carry
197 &dec("ecx") if ($i != 7-1);
198 &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
199 &mov($c,"edx"); # c= H(t);
200 &jz(&label("maw_end")) if ($i != 7-1);
201 }
202 &set_label("maw_end",0);
203 &mov("eax",$c);
204
205 &pop("ecx"); # clear variable from
206
207 &function_end($name);
208 }
209
210sub bn_mul_words
211 {
212 local($name)=@_;
213
214 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
215
216 $r="eax";
217 $a="edx";
218 $c="ecx";
219
220 if ($sse2) {
221 &picmeup("eax","OPENSSL_ia32cap_P");
222 &bt(&DWP(0,"eax"),26);
223 &jnc(&label("mw_non_sse2"));
224
225 &mov($r,&wparam(0));
226 &mov($a,&wparam(1));
227 &mov($c,&wparam(2));
228 &movd("mm0",&wparam(3)); # mm0 = w
229 &pxor("mm1","mm1"); # mm1 = carry = 0
230
231 &set_label("mw_sse2_loop",16);
232 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
233 &pmuludq("mm2","mm0"); # a[i] *= w
234 &lea($a,&DWP(4,$a));
235 &paddq("mm1","mm2"); # carry += a[i]*w
236 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
237 &sub($c,1);
238 &psrlq("mm1",32); # carry = carry_high
239 &lea($r,&DWP(4,$r));
240 &jnz(&label("mw_sse2_loop"));
241
242 &movd("eax","mm1"); # return carry
243 &emms();
244 &ret();
245 &set_label("mw_non_sse2",16);
246 }
247
248 # function_begin prologue
249 &push("ebp");
250 &push("ebx");
251 &push("esi");
252 &push("edi");
253
254 &comment("");
255 $Low="eax";
256 $High="edx";
257 $a="ebx";
258 $w="ecx";
259 $r="edi";
260 $c="esi";
261 $num="ebp";
262
263 &xor($c,$c); # clear carry
264 &mov($r,&wparam(0)); #
265 &mov($a,&wparam(1)); #
266 &mov($num,&wparam(2)); #
267 &mov($w,&wparam(3)); #
268
269 &and($num,0xfffffff8); # num / 8
270 &jz(&label("mw_finish"));
271
272 &set_label("mw_loop",0);
273 for ($i=0; $i<32; $i+=4)
274 {
275 &comment("Round $i");
276
277 &mov("eax",&DWP($i,$a,"",0)); # *a
278 &mul($w); # *a * w
279 &add("eax",$c); # L(t)+=c
280 # XXX
281
282 &adc("edx",0); # H(t)+=carry
283 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
284
285 &mov($c,"edx"); # c= H(t);
286 }
287
288 &comment("");
289 &add($a,32);
290 &add($r,32);
291 &sub($num,8);
292 &jz(&label("mw_finish"));
293 &jmp(&label("mw_loop"));
294
295 &set_label("mw_finish",0);
296 &mov($num,&wparam(2)); # get num
297 &and($num,7);
298 &jnz(&label("mw_finish2"));
299 &jmp(&label("mw_end"));
300
301 &set_label("mw_finish2",1);
302 for ($i=0; $i<7; $i++)
303 {
304 &comment("Tail Round $i");
305 &mov("eax",&DWP($i*4,$a,"",0));# *a
306 &mul($w); # *a * w
307 &add("eax",$c); # L(t)+=c
308 # XXX
309 &adc("edx",0); # H(t)+=carry
310 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
311 &mov($c,"edx"); # c= H(t);
312 &dec($num) if ($i != 7-1);
313 &jz(&label("mw_end")) if ($i != 7-1);
314 }
315 &set_label("mw_end",0);
316 &mov("eax",$c);
317
318 &function_end($name);
319 }
320
321sub bn_sqr_words
322 {
323 local($name)=@_;
324
325 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
326
327 $r="eax";
328 $a="edx";
329 $c="ecx";
330
331 if ($sse2) {
332 &picmeup("eax","OPENSSL_ia32cap_P");
333 &bt(&DWP(0,"eax"),26);
334 &jnc(&label("sqr_non_sse2"));
335
336 &mov($r,&wparam(0));
337 &mov($a,&wparam(1));
338 &mov($c,&wparam(2));
339
340 &set_label("sqr_sse2_loop",16);
341 &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
342 &pmuludq("mm0","mm0"); # a[i] *= a[i]
343 &lea($a,&DWP(4,$a)); # a++
344 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
345 &sub($c,1);
346 &lea($r,&DWP(8,$r)); # r += 2
347 &jnz(&label("sqr_sse2_loop"));
348
349 &emms();
350 &ret();
351 &set_label("sqr_non_sse2",16);
352 }
353
354 # function_begin prologue
355 &push("ebp");
356 &push("ebx");
357 &push("esi");
358 &push("edi");
359
360 &comment("");
361 $r="esi";
362 $a="edi";
363 $num="ebx";
364
365 &mov($r,&wparam(0)); #
366 &mov($a,&wparam(1)); #
367 &mov($num,&wparam(2)); #
368
369 &and($num,0xfffffff8); # num / 8
370 &jz(&label("sw_finish"));
371
372 &set_label("sw_loop",0);
373 for ($i=0; $i<32; $i+=4)
374 {
375 &comment("Round $i");
376 &mov("eax",&DWP($i,$a,"",0)); # *a
377 # XXX
378 &mul("eax"); # *a * *a
379 &mov(&DWP($i*2,$r,"",0),"eax"); #
380 &mov(&DWP($i*2+4,$r,"",0),"edx");#
381 }
382
383 &comment("");
384 &add($a,32);
385 &add($r,64);
386 &sub($num,8);
387 &jnz(&label("sw_loop"));
388
389 &set_label("sw_finish",0);
390 &mov($num,&wparam(2)); # get num
391 &and($num,7);
392 &jz(&label("sw_end"));
393
394 for ($i=0; $i<7; $i++)
395 {
396 &comment("Tail Round $i");
397 &mov("eax",&DWP($i*4,$a,"",0)); # *a
398 # XXX
399 &mul("eax"); # *a * *a
400 &mov(&DWP($i*8,$r,"",0),"eax"); #
401 &dec($num) if ($i != 7-1);
402 &mov(&DWP($i*8+4,$r,"",0),"edx");
403 &jz(&label("sw_end")) if ($i != 7-1);
404 }
405 &set_label("sw_end",0);
406
407 &function_end($name);
408 }
409
410sub bn_div_words
411 {
412 local($name)=@_;
413
414 &function_begin_B($name,"");
415 &mov("edx",&wparam(0)); #
416 &mov("eax",&wparam(1)); #
417 &mov("ecx",&wparam(2)); #
418 &div("ecx");
419 &ret();
420 &function_end_B($name);
421 }
422
423sub bn_add_words
424 {
425 local($name)=@_;
426
427 &function_begin($name,"");
428
429 &comment("");
430 $a="esi";
431 $b="edi";
432 $c="eax";
433 $r="ebx";
434 $tmp1="ecx";
435 $tmp2="edx";
436 $num="ebp";
437
438 &mov($r,&wparam(0)); # get r
439 &mov($a,&wparam(1)); # get a
440 &mov($b,&wparam(2)); # get b
441 &mov($num,&wparam(3)); # get num
442 &xor($c,$c); # clear carry
443 &and($num,0xfffffff8); # num / 8
444
445 &jz(&label("aw_finish"));
446
447 &set_label("aw_loop",0);
448 for ($i=0; $i<8; $i++)
449 {
450 &comment("Round $i");
451
452 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
453 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
454 &add($tmp1,$c);
455 &mov($c,0);
456 &adc($c,$c);
457 &add($tmp1,$tmp2);
458 &adc($c,0);
459 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
460 }
461
462 &comment("");
463 &add($a,32);
464 &add($b,32);
465 &add($r,32);
466 &sub($num,8);
467 &jnz(&label("aw_loop"));
468
469 &set_label("aw_finish",0);
470 &mov($num,&wparam(3)); # get num
471 &and($num,7);
472 &jz(&label("aw_end"));
473
474 for ($i=0; $i<7; $i++)
475 {
476 &comment("Tail Round $i");
477 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
478 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
479 &add($tmp1,$c);
480 &mov($c,0);
481 &adc($c,$c);
482 &add($tmp1,$tmp2);
483 &adc($c,0);
484 &dec($num) if ($i != 6);
485 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
486 &jz(&label("aw_end")) if ($i != 6);
487 }
488 &set_label("aw_end",0);
489
490# &mov("eax",$c); # $c is "eax"
491
492 &function_end($name);
493 }
494
495sub bn_sub_words
496 {
497 local($name)=@_;
498
499 &function_begin($name,"");
500
501 &comment("");
502 $a="esi";
503 $b="edi";
504 $c="eax";
505 $r="ebx";
506 $tmp1="ecx";
507 $tmp2="edx";
508 $num="ebp";
509
510 &mov($r,&wparam(0)); # get r
511 &mov($a,&wparam(1)); # get a
512 &mov($b,&wparam(2)); # get b
513 &mov($num,&wparam(3)); # get num
514 &xor($c,$c); # clear carry
515 &and($num,0xfffffff8); # num / 8
516
517 &jz(&label("aw_finish"));
518
519 &set_label("aw_loop",0);
520 for ($i=0; $i<8; $i++)
521 {
522 &comment("Round $i");
523
524 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
525 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
526 &sub($tmp1,$c);
527 &mov($c,0);
528 &adc($c,$c);
529 &sub($tmp1,$tmp2);
530 &adc($c,0);
531 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
532 }
533
534 &comment("");
535 &add($a,32);
536 &add($b,32);
537 &add($r,32);
538 &sub($num,8);
539 &jnz(&label("aw_loop"));
540
541 &set_label("aw_finish",0);
542 &mov($num,&wparam(3)); # get num
543 &and($num,7);
544 &jz(&label("aw_end"));
545
546 for ($i=0; $i<7; $i++)
547 {
548 &comment("Tail Round $i");
549 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
550 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
551 &sub($tmp1,$c);
552 &mov($c,0);
553 &adc($c,$c);
554 &sub($tmp1,$tmp2);
555 &adc($c,0);
556 &dec($num) if ($i != 6);
557 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
558 &jz(&label("aw_end")) if ($i != 6);
559 }
560 &set_label("aw_end",0);
561
562# &mov("eax",$c); # $c is "eax"
563
564 &function_end($name);
565 }
566
567sub bn_sub_part_words
568 {
569 local($name)=@_;
570
571 &function_begin($name,"");
572
573 &comment("");
574 $a="esi";
575 $b="edi";
576 $c="eax";
577 $r="ebx";
578 $tmp1="ecx";
579 $tmp2="edx";
580 $num="ebp";
581
582 &mov($r,&wparam(0)); # get r
583 &mov($a,&wparam(1)); # get a
584 &mov($b,&wparam(2)); # get b
585 &mov($num,&wparam(3)); # get num
586 &xor($c,$c); # clear carry
587 &and($num,0xfffffff8); # num / 8
588
589 &jz(&label("aw_finish"));
590
591 &set_label("aw_loop",0);
592 for ($i=0; $i<8; $i++)
593 {
594 &comment("Round $i");
595
596 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
597 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
598 &sub($tmp1,$c);
599 &mov($c,0);
600 &adc($c,$c);
601 &sub($tmp1,$tmp2);
602 &adc($c,0);
603 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
604 }
605
606 &comment("");
607 &add($a,32);
608 &add($b,32);
609 &add($r,32);
610 &sub($num,8);
611 &jnz(&label("aw_loop"));
612
613 &set_label("aw_finish",0);
614 &mov($num,&wparam(3)); # get num
615 &and($num,7);
616 &jz(&label("aw_end"));
617
618 for ($i=0; $i<7; $i++)
619 {
620 &comment("Tail Round $i");
621 &mov($tmp1,&DWP(0,$a,"",0)); # *a
622 &mov($tmp2,&DWP(0,$b,"",0));# *b
623 &sub($tmp1,$c);
624 &mov($c,0);
625 &adc($c,$c);
626 &sub($tmp1,$tmp2);
627 &adc($c,0);
628 &mov(&DWP(0,$r,"",0),$tmp1); # *r
629 &add($a, 4);
630 &add($b, 4);
631 &add($r, 4);
632 &dec($num) if ($i != 6);
633 &jz(&label("aw_end")) if ($i != 6);
634 }
635 &set_label("aw_end",0);
636
637 &cmp(&wparam(4),0);
638 &je(&label("pw_end"));
639
640 &mov($num,&wparam(4)); # get dl
641 &cmp($num,0);
642 &je(&label("pw_end"));
643 &jge(&label("pw_pos"));
644
645 &comment("pw_neg");
646 &mov($tmp2,0);
647 &sub($tmp2,$num);
648 &mov($num,$tmp2);
649 &and($num,0xfffffff8); # num / 8
650 &jz(&label("pw_neg_finish"));
651
652 &set_label("pw_neg_loop",0);
653 for ($i=0; $i<8; $i++)
654 {
655 &comment("dl<0 Round $i");
656
657 &mov($tmp1,0);
658 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
659 &sub($tmp1,$c);
660 &mov($c,0);
661 &adc($c,$c);
662 &sub($tmp1,$tmp2);
663 &adc($c,0);
664 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
665 }
666
667 &comment("");
668 &add($b,32);
669 &add($r,32);
670 &sub($num,8);
671 &jnz(&label("pw_neg_loop"));
672
673 &set_label("pw_neg_finish",0);
674 &mov($tmp2,&wparam(4)); # get dl
675 &mov($num,0);
676 &sub($num,$tmp2);
677 &and($num,7);
678 &jz(&label("pw_end"));
679
680 for ($i=0; $i<7; $i++)
681 {
682 &comment("dl<0 Tail Round $i");
683 &mov($tmp1,0);
684 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
685 &sub($tmp1,$c);
686 &mov($c,0);
687 &adc($c,$c);
688 &sub($tmp1,$tmp2);
689 &adc($c,0);
690 &dec($num) if ($i != 6);
691 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
692 &jz(&label("pw_end")) if ($i != 6);
693 }
694
695 &jmp(&label("pw_end"));
696
697 &set_label("pw_pos",0);
698
699 &and($num,0xfffffff8); # num / 8
700 &jz(&label("pw_pos_finish"));
701
702 &set_label("pw_pos_loop",0);
703
704 for ($i=0; $i<8; $i++)
705 {
706 &comment("dl>0 Round $i");
707
708 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
709 &sub($tmp1,$c);
710 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
711 &jnc(&label("pw_nc".$i));
712 }
713
714 &comment("");
715 &add($a,32);
716 &add($r,32);
717 &sub($num,8);
718 &jnz(&label("pw_pos_loop"));
719
720 &set_label("pw_pos_finish",0);
721 &mov($num,&wparam(4)); # get dl
722 &and($num,7);
723 &jz(&label("pw_end"));
724
725 for ($i=0; $i<7; $i++)
726 {
727 &comment("dl>0 Tail Round $i");
728 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
729 &sub($tmp1,$c);
730 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
731 &jnc(&label("pw_tail_nc".$i));
732 &dec($num) if ($i != 6);
733 &jz(&label("pw_end")) if ($i != 6);
734 }
735 &mov($c,1);
736 &jmp(&label("pw_end"));
737
738 &set_label("pw_nc_loop",0);
739 for ($i=0; $i<8; $i++)
740 {
741 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
742 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
743 &set_label("pw_nc".$i,0);
744 }
745
746 &comment("");
747 &add($a,32);
748 &add($r,32);
749 &sub($num,8);
750 &jnz(&label("pw_nc_loop"));
751
752 &mov($num,&wparam(4)); # get dl
753 &and($num,7);
754 &jz(&label("pw_nc_end"));
755
756 for ($i=0; $i<7; $i++)
757 {
758 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
759 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
760 &set_label("pw_tail_nc".$i,0);
761 &dec($num) if ($i != 6);
762 &jz(&label("pw_nc_end")) if ($i != 6);
763 }
764
765 &set_label("pw_nc_end",0);
766 &mov($c,0);
767
768 &set_label("pw_end",0);
769
770# &mov("eax",$c); # $c is "eax"
771
772 &function_end($name);
773 }
774
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
deleted file mode 100644
index 57101a6bd7..0000000000
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ /dev/null
@@ -1,287 +0,0 @@
1#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],$0);
8
9&bn_mul_comba("bn_mul_comba8",8);
10&bn_mul_comba("bn_mul_comba4",4);
11&bn_sqr_comba("bn_sqr_comba8",8);
12&bn_sqr_comba("bn_sqr_comba4",4);
13
14&asm_finish();
15
16sub mul_add_c
17 {
18 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
19
20 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
21 # words, and 1 if load return value
22
23 &comment("mul a[$ai]*b[$bi]");
24
25 # "eax" and "edx" will always be pre-loaded.
26 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
27 # &mov("edx",&DWP($bi*4,$b,"",0));
28
29 &mul("edx");
30 &add($c0,"eax");
31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
32 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
33 ###
34 &adc($c1,"edx");
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
37 ###
38 &adc($c2,0);
39 # is pos > 1, it means it is the last loop
40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
42 }
43
44sub sqr_add_c
45 {
46 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
47
48 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
49 # words, and 1 if load return value
50
51 &comment("sqr a[$ai]*a[$bi]");
52
53 # "eax" and "edx" will always be pre-loaded.
54 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
55 # &mov("edx",&DWP($bi*4,$b,"",0));
56
57 if ($ai == $bi)
58 { &mul("eax");}
59 else
60 { &mul("edx");}
61 &add($c0,"eax");
62 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
63 ###
64 &adc($c1,"edx");
65 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
66 ###
67 &adc($c2,0);
68 # is pos > 1, it means it is the last loop
69 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
70 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
71 }
72
73sub sqr_add_c2
74 {
75 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
76
77 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
78 # words, and 1 if load return value
79
80 &comment("sqr a[$ai]*a[$bi]");
81
82 # "eax" and "edx" will always be pre-loaded.
83 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
84 # &mov("edx",&DWP($bi*4,$a,"",0));
85
86 if ($ai == $bi)
87 { &mul("eax");}
88 else
89 { &mul("edx");}
90 &add("eax","eax");
91 ###
92 &adc("edx","edx");
93 ###
94 &adc($c2,0);
95 &add($c0,"eax");
96 &adc($c1,"edx");
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
98 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
99 &adc($c2,0);
100 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
101 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
102 ###
103 }
104
105sub bn_mul_comba
106 {
107 local($name,$num)=@_;
108 local($a,$b,$c0,$c1,$c2);
109 local($i,$as,$ae,$bs,$be,$ai,$bi);
110 local($tot,$end);
111
112 &function_begin_B($name,"");
113
114 $c0="ebx";
115 $c1="ecx";
116 $c2="ebp";
117 $a="esi";
118 $b="edi";
119
120 $as=0;
121 $ae=0;
122 $bs=0;
123 $be=0;
124 $tot=$num+$num-1;
125
126 &push("esi");
127 &mov($a,&wparam(1));
128 &push("edi");
129 &mov($b,&wparam(2));
130 &push("ebp");
131 &push("ebx");
132
133 &xor($c0,$c0);
134 &mov("eax",&DWP(0,$a,"",0)); # load the first word
135 &xor($c1,$c1);
136 &mov("edx",&DWP(0,$b,"",0)); # load the first second
137
138 for ($i=0; $i<$tot; $i++)
139 {
140 $ai=$as;
141 $bi=$bs;
142 $end=$be+1;
143
144 &comment("################## Calculate word $i");
145
146 for ($j=$bs; $j<$end; $j++)
147 {
148 &xor($c2,$c2) if ($j == $bs);
149 if (($j+1) == $end)
150 {
151 $v=1;
152 $v=2 if (($i+1) == $tot);
153 }
154 else
155 { $v=0; }
156 if (($j+1) != $end)
157 {
158 $na=($ai-1);
159 $nb=($bi+1);
160 }
161 else
162 {
163 $na=$as+($i < ($num-1));
164 $nb=$bs+($i >= ($num-1));
165 }
166#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
167 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
168 if ($v)
169 {
170 &comment("saved r[$i]");
171 # &mov("eax",&wparam(0));
172 # &mov(&DWP($i*4,"eax","",0),$c0);
173 ($c0,$c1,$c2)=($c1,$c2,$c0);
174 }
175 $ai--;
176 $bi++;
177 }
178 $as++ if ($i < ($num-1));
179 $ae++ if ($i >= ($num-1));
180
181 $bs++ if ($i >= ($num-1));
182 $be++ if ($i < ($num-1));
183 }
184 &comment("save r[$i]");
185 # &mov("eax",&wparam(0));
186 &mov(&DWP($i*4,"eax","",0),$c0);
187
188 &pop("ebx");
189 &pop("ebp");
190 &pop("edi");
191 &pop("esi");
192 &ret();
193 &function_end_B($name);
194 }
195
196sub bn_sqr_comba
197 {
198 local($name,$num)=@_;
199 local($r,$a,$c0,$c1,$c2)=@_;
200 local($i,$as,$ae,$bs,$be,$ai,$bi);
201 local($b,$tot,$end,$half);
202
203 &function_begin_B($name,"");
204
205 $c0="ebx";
206 $c1="ecx";
207 $c2="ebp";
208 $a="esi";
209 $r="edi";
210
211 &push("esi");
212 &push("edi");
213 &push("ebp");
214 &push("ebx");
215 &mov($r,&wparam(0));
216 &mov($a,&wparam(1));
217 &xor($c0,$c0);
218 &xor($c1,$c1);
219 &mov("eax",&DWP(0,$a,"",0)); # load the first word
220
221 $as=0;
222 $ae=0;
223 $bs=0;
224 $be=0;
225 $tot=$num+$num-1;
226
227 for ($i=0; $i<$tot; $i++)
228 {
229 $ai=$as;
230 $bi=$bs;
231 $end=$be+1;
232
233 &comment("############### Calculate word $i");
234 for ($j=$bs; $j<$end; $j++)
235 {
236 &xor($c2,$c2) if ($j == $bs);
237 if (($ai-1) < ($bi+1))
238 {
239 $v=1;
240 $v=2 if ($i+1) == $tot;
241 }
242 else
243 { $v=0; }
244 if (!$v)
245 {
246 $na=$ai-1;
247 $nb=$bi+1;
248 }
249 else
250 {
251 $na=$as+($i < ($num-1));
252 $nb=$bs+($i >= ($num-1));
253 }
254 if ($ai == $bi)
255 {
256 &sqr_add_c($r,$a,$ai,$bi,
257 $c0,$c1,$c2,$v,$i,$na,$nb);
258 }
259 else
260 {
261 &sqr_add_c2($r,$a,$ai,$bi,
262 $c0,$c1,$c2,$v,$i,$na,$nb);
263 }
264 if ($v)
265 {
266 &comment("saved r[$i]");
267 #&mov(&DWP($i*4,$r,"",0),$c0);
268 ($c0,$c1,$c2)=($c1,$c2,$c0);
269 last;
270 }
271 $ai--;
272 $bi++;
273 }
274 $as++ if ($i < ($num-1));
275 $ae++ if ($i >= ($num-1));
276
277 $bs++ if ($i >= ($num-1));
278 $be++ if ($i < ($num-1));
279 }
280 &mov(&DWP($i*4,$r,"",0),$c0);
281 &pop("ebx");
282 &pop("ebp");
283 &pop("edi");
284 &pop("esi");
285 &ret();
286 &function_end_B($name);
287 }
diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S
deleted file mode 100644
index 951abc53ea..0000000000
--- a/src/lib/libcrypto/bn/asm/ia64.S
+++ /dev/null
@@ -1,1555 +0,0 @@
1.explicit
2.text
3.ident "ia64.S, Version 2.1"
4.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6//
7// ====================================================================
8// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9// project.
10//
11// Rights for redistribution and usage in source and binary forms are
12// granted according to the OpenSSL license. Warranty of any kind is
13// disclaimed.
14// ====================================================================
15//
16// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
17// different from Itanium to this module viewpoint. Most notably, is it
18// "wider" than Itanium? Can you experience loop scalability as
19// discussed in commentary sections? Not really:-( Itanium2 has 6
20// integer ALU ports, i.e. it's 2 ports wider, but it's not enough to
21// spin twice as fast, as I need 8 IALU ports. Amount of floating point
22// ports is the same, i.e. 2, while I need 4. In other words, to this
23// module Itanium2 remains effectively as "wide" as Itanium. Yet it's
24// essentially different in respect to this module, and a re-tune was
25// required. Well, because some intruction latencies has changed. Most
26// noticeably those intensively used:
27//
28// Itanium Itanium2
29// ldf8 9 6 L2 hit
30// ld8 2 1 L1 hit
31// getf 2 5
32// xma[->getf] 7[+1] 4[+0]
33// add[->st8] 1[+1] 1[+0]
34//
35// What does it mean? You might ratiocinate that the original code
36// should run just faster... Because sum of latencies is smaller...
37// Wrong! Note that getf latency increased. This means that if a loop is
38// scheduled for lower latency (as they were), then it will suffer from
39// stall condition and the code will therefore turn anti-scalable, e.g.
40// original bn_mul_words spun at 5*n or 2.5 times slower than expected
41// on Itanium2! What to do? Reschedule loops for Itanium2? But then
42// Itanium would exhibit anti-scalability. So I've chosen to reschedule
43// for worst latency for every instruction aiming for best *all-round*
44// performance.
45
46// Q. How much faster does it get?
47// A. Here is the output from 'openssl speed rsa dsa' for vanilla
48// 0.9.6a compiled with gcc version 2.96 20000731 (Red Hat
49// Linux 7.1 2.96-81):
50//
51// sign verify sign/s verify/s
52// rsa 512 bits 0.0036s 0.0003s 275.3 2999.2
53// rsa 1024 bits 0.0203s 0.0011s 49.3 894.1
54// rsa 2048 bits 0.1331s 0.0040s 7.5 250.9
55// rsa 4096 bits 0.9270s 0.0147s 1.1 68.1
56// sign verify sign/s verify/s
57// dsa 512 bits 0.0035s 0.0043s 288.3 234.8
58// dsa 1024 bits 0.0111s 0.0135s 90.0 74.2
59//
60// And here is similar output but for this assembler
61// implementation:-)
62//
63// sign verify sign/s verify/s
64// rsa 512 bits 0.0021s 0.0001s 549.4 9638.5
65// rsa 1024 bits 0.0055s 0.0002s 183.8 4481.1
66// rsa 2048 bits 0.0244s 0.0006s 41.4 1726.3
67// rsa 4096 bits 0.1295s 0.0018s 7.7 561.5
68// sign verify sign/s verify/s
69// dsa 512 bits 0.0012s 0.0013s 891.9 756.6
70// dsa 1024 bits 0.0023s 0.0028s 440.4 376.2
71//
72// Yes, you may argue that it's not fair comparison as it's
73// possible to craft the C implementation with BN_UMULT_HIGH
74// inline assembler macro. But of course! Here is the output
75// with the macro:
76//
77// sign verify sign/s verify/s
78// rsa 512 bits 0.0020s 0.0002s 495.0 6561.0
79// rsa 1024 bits 0.0086s 0.0004s 116.2 2235.7
80// rsa 2048 bits 0.0519s 0.0015s 19.3 667.3
81// rsa 4096 bits 0.3464s 0.0053s 2.9 187.7
82// sign verify sign/s verify/s
83// dsa 512 bits 0.0016s 0.0020s 613.1 510.5
84// dsa 1024 bits 0.0045s 0.0054s 221.0 183.9
85//
86// My code is still way faster, huh:-) And I believe that even
87// higher performance can be achieved. Note that as keys get
88// longer, performance gain is larger. Why? According to the
89// profiler there is another player in the field, namely
90// BN_from_montgomery consuming larger and larger portion of CPU
91// time as keysize decreases. I therefore consider putting effort
92// to assembler implementation of the following routine:
93//
94// void bn_mul_add_mont (BN_ULONG *rp,BN_ULONG *np,int nl,BN_ULONG n0)
95// {
96// int i,j;
97// BN_ULONG v;
98//
99// for (i=0; i<nl; i++)
100// {
101// v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
102// nrp++;
103// rp++;
104// if (((nrp[-1]+=v)&BN_MASK2) < v)
105// for (j=0; ((++nrp[j])&BN_MASK2) == 0; j++) ;
106// }
107// }
108//
109// It might as well be beneficial to implement even combaX
110// variants, as it appears as it can literally unleash the
111// performance (see comment section to bn_mul_comba8 below).
112//
113// And finally for your reference the output for 0.9.6a compiled
114// with SGIcc version 0.01.0-12 (keep in mind that for the moment
115// of this writing it's not possible to convince SGIcc to use
116// BN_UMULT_HIGH inline assembler macro, yet the code is fast,
117// i.e. for a compiler generated one:-):
118//
119// sign verify sign/s verify/s
120// rsa 512 bits 0.0022s 0.0002s 452.7 5894.3
121// rsa 1024 bits 0.0097s 0.0005s 102.7 2002.9
122// rsa 2048 bits 0.0578s 0.0017s 17.3 600.2
123// rsa 4096 bits 0.3838s 0.0061s 2.6 164.5
124// sign verify sign/s verify/s
125// dsa 512 bits 0.0018s 0.0022s 547.3 459.6
126// dsa 1024 bits 0.0051s 0.0062s 196.6 161.3
127//
128// Oh! Benchmarks were performed on 733MHz Lion-class Itanium
129// system running Redhat Linux 7.1 (very special thanks to Ray
130// McCaffity of Williams Communications for providing an account).
131//
132// Q. What's the heck with 'rum 1<<5' at the end of every function?
133// A. Well, by clearing the "upper FP registers written" bit of the
134// User Mask I want to excuse the kernel from preserving upper
135// (f32-f128) FP register bank over process context switch, thus
136// minimizing bus bandwidth consumption during the switch (i.e.
137// after PKI opration completes and the program is off doing
138// something else like bulk symmetric encryption). Having said
139// this, I also want to point out that it might be good idea
140// to compile the whole toolkit (as well as majority of the
141// programs for that matter) with -mfixed-range=f32-f127 command
142// line option. No, it doesn't prevent the compiler from writing
143// to upper bank, but at least discourages to do so. If you don't
144// like the idea you have the option to compile the module with
145// -Drum=nop.m in command line.
146//
147
148#if defined(_HPUX_SOURCE) && !defined(_LP64)
149#define ADDP addp4
150#else
151#define ADDP add
152#endif
153
154#if 1
155//
156// bn_[add|sub]_words routines.
157//
158// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
159// data reside in L1 cache, i.e. 2 ticks away). It's possible to
160// compress the epilogue and get down to 2*n+6, but at the cost of
161// scalability (the neat feature of this implementation is that it
162// shall automagically spin in n+5 on "wider" IA-64 implementations:-)
163// I consider that the epilogue is short enough as it is to trade tiny
164// performance loss on Itanium for scalability.
165//
166// BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
167//
168.global bn_add_words#
169.proc bn_add_words#
170.align 64
171.skip 32 // makes the loop body aligned at 64-byte boundary
172bn_add_words:
173 .prologue
174 .save ar.pfs,r2
175{ .mii; alloc r2=ar.pfs,4,12,0,16
176 cmp4.le p6,p0=r35,r0 };;
177{ .mfb; mov r8=r0 // return value
178(p6) br.ret.spnt.many b0 };;
179
180{ .mib; sub r10=r35,r0,1
181 .save ar.lc,r3
182 mov r3=ar.lc
183 brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16
184 }
185{ .mib; ADDP r14=0,r32 // rp
186 .save pr,r9
187 mov r9=pr };;
188 .body
189{ .mii; ADDP r15=0,r33 // ap
190 mov ar.lc=r10
191 mov ar.ec=6 }
192{ .mib; ADDP r16=0,r34 // bp
193 mov pr.rot=1<<16 };;
194
195.L_bn_add_words_ctop:
196{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
197 (p18) add r39=r37,r34
198 (p19) cmp.ltu.unc p56,p0=r40,r38 }
199{ .mfb; (p0) nop.m 0x0
200 (p0) nop.f 0x0
201 (p0) nop.b 0x0 }
202{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
203 (p58) cmp.eq.or p57,p0=-1,r41 // (p20)
204 (p58) add r41=1,r41 } // (p20)
205{ .mfb; (p21) st8 [r14]=r42,8 // *(rp++)=r
206 (p0) nop.f 0x0
207 br.ctop.sptk .L_bn_add_words_ctop };;
208.L_bn_add_words_cend:
209
210{ .mii;
211(p59) add r8=1,r8 // return value
212 mov pr=r9,0x1ffff
213 mov ar.lc=r3 }
214{ .mbb; nop.b 0x0
215 br.ret.sptk.many b0 };;
216.endp bn_add_words#
217
218//
219// BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
220//
221.global bn_sub_words#
222.proc bn_sub_words#
223.align 64
224.skip 32 // makes the loop body aligned at 64-byte boundary
225bn_sub_words:
226 .prologue
227 .save ar.pfs,r2
228{ .mii; alloc r2=ar.pfs,4,12,0,16
229 cmp4.le p6,p0=r35,r0 };;
230{ .mfb; mov r8=r0 // return value
231(p6) br.ret.spnt.many b0 };;
232
233{ .mib; sub r10=r35,r0,1
234 .save ar.lc,r3
235 mov r3=ar.lc
236 brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16
237 }
238{ .mib; ADDP r14=0,r32 // rp
239 .save pr,r9
240 mov r9=pr };;
241 .body
242{ .mii; ADDP r15=0,r33 // ap
243 mov ar.lc=r10
244 mov ar.ec=6 }
245{ .mib; ADDP r16=0,r34 // bp
246 mov pr.rot=1<<16 };;
247
248.L_bn_sub_words_ctop:
249{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
250 (p18) sub r39=r37,r34
251 (p19) cmp.gtu.unc p56,p0=r40,r38 }
252{ .mfb; (p0) nop.m 0x0
253 (p0) nop.f 0x0
254 (p0) nop.b 0x0 }
255{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
256 (p58) cmp.eq.or p57,p0=0,r41 // (p20)
257 (p58) add r41=-1,r41 } // (p20)
258{ .mbb; (p21) st8 [r14]=r42,8 // *(rp++)=r
259 (p0) nop.b 0x0
260 br.ctop.sptk .L_bn_sub_words_ctop };;
261.L_bn_sub_words_cend:
262
263{ .mii;
264(p59) add r8=1,r8 // return value
265 mov pr=r9,0x1ffff
266 mov ar.lc=r3 }
267{ .mbb; nop.b 0x0
268 br.ret.sptk.many b0 };;
269.endp bn_sub_words#
270#endif
271
272#if 0
273#define XMA_TEMPTATION
274#endif
275
276#if 1
277//
278// BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
279//
280.global bn_mul_words#
281.proc bn_mul_words#
282.align 64
283.skip 32 // makes the loop body aligned at 64-byte boundary
284bn_mul_words:
285 .prologue
286 .save ar.pfs,r2
287#ifdef XMA_TEMPTATION
288{ .mfi; alloc r2=ar.pfs,4,0,0,0 };;
289#else
290{ .mfi; alloc r2=ar.pfs,4,12,0,16 };;
291#endif
292{ .mib; mov r8=r0 // return value
293 cmp4.le p6,p0=r34,r0
294(p6) br.ret.spnt.many b0 };;
295
296{ .mii; sub r10=r34,r0,1
297 .save ar.lc,r3
298 mov r3=ar.lc
299 .save pr,r9
300 mov r9=pr };;
301
302 .body
303{ .mib; setf.sig f8=r35 // w
304 mov pr.rot=0x800001<<16
305 // ------^----- serves as (p50) at first (p27)
306 brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16
307 }
308
309#ifndef XMA_TEMPTATION
310
311{ .mmi; ADDP r14=0,r32 // rp
312 ADDP r15=0,r33 // ap
313 mov ar.lc=r10 }
314{ .mmi; mov r40=0 // serves as r35 at first (p27)
315 mov ar.ec=13 };;
316
317// This loop spins in 2*(n+12) ticks. It's scheduled for data in Itanium
318// L2 cache (i.e. 9 ticks away) as floating point load/store instructions
319// bypass L1 cache and L2 latency is actually best-case scenario for
320// ldf8. The loop is not scalable and shall run in 2*(n+12) even on
321// "wider" IA-64 implementations. It's a trade-off here. n+24 loop
322// would give us ~5% in *overall* performance improvement on "wider"
323// IA-64, but would hurt Itanium for about same because of longer
324// epilogue. As it's a matter of few percents in either case I've
325// chosen to trade the scalability for development time (you can see
326// this very instruction sequence in bn_mul_add_words loop which in
327// turn is scalable).
328.L_bn_mul_words_ctop:
329{ .mfi; (p25) getf.sig r36=f52 // low
330 (p21) xmpy.lu f48=f37,f8
331 (p28) cmp.ltu p54,p50=r41,r39 }
332{ .mfi; (p16) ldf8 f32=[r15],8
333 (p21) xmpy.hu f40=f37,f8
334 (p0) nop.i 0x0 };;
335{ .mii; (p25) getf.sig r32=f44 // high
336 .pred.rel "mutex",p50,p54
337 (p50) add r40=r38,r35 // (p27)
338 (p54) add r40=r38,r35,1 } // (p27)
339{ .mfb; (p28) st8 [r14]=r41,8
340 (p0) nop.f 0x0
341 br.ctop.sptk .L_bn_mul_words_ctop };;
342.L_bn_mul_words_cend:
343
344{ .mii; nop.m 0x0
345.pred.rel "mutex",p51,p55
346(p51) add r8=r36,r0
347(p55) add r8=r36,r0,1 }
348{ .mfb; nop.m 0x0
349 nop.f 0x0
350 nop.b 0x0 }
351
352#else // XMA_TEMPTATION
353
354 setf.sig f37=r0 // serves as carry at (p18) tick
355 mov ar.lc=r10
356 mov ar.ec=5;;
357
358// Most of you examining this code very likely wonder why in the name
359// of Intel the following loop is commented out? Indeed, it looks so
360// neat that you find it hard to believe that it's something wrong
361// with it, right? The catch is that every iteration depends on the
362// result from previous one and the latter isn't available instantly.
363// The loop therefore spins at the latency of xma minus 1, or in other
364// words at 6*(n+4) ticks:-( Compare to the "production" loop above
365// that runs in 2*(n+11) where the low latency problem is worked around
366// by moving the dependency to one-tick latent interger ALU. Note that
367// "distance" between ldf8 and xma is not latency of ldf8, but the
368// *difference* between xma and ldf8 latencies.
369.L_bn_mul_words_ctop:
370{ .mfi; (p16) ldf8 f32=[r33],8
371 (p18) xma.hu f38=f34,f8,f39 }
372{ .mfb; (p20) stf8 [r32]=f37,8
373 (p18) xma.lu f35=f34,f8,f39
374 br.ctop.sptk .L_bn_mul_words_ctop };;
375.L_bn_mul_words_cend:
376
377 getf.sig r8=f41 // the return value
378
379#endif // XMA_TEMPTATION
380
381{ .mii; nop.m 0x0
382 mov pr=r9,0x1ffff
383 mov ar.lc=r3 }
384{ .mfb; rum 1<<5 // clear um.mfh
385 nop.f 0x0
386 br.ret.sptk.many b0 };;
387.endp bn_mul_words#
388#endif
389
390#if 1
391//
392// BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
393//
394.global bn_mul_add_words#
395.proc bn_mul_add_words#
396.align 64
397.skip 48 // makes the loop body aligned at 64-byte boundary
398bn_mul_add_words:
399 .prologue
400 .save ar.pfs,r2
401{ .mmi; alloc r2=ar.pfs,4,4,0,8
402 cmp4.le p6,p0=r34,r0
403 .save ar.lc,r3
404 mov r3=ar.lc };;
405{ .mib; mov r8=r0 // return value
406 sub r10=r34,r0,1
407(p6) br.ret.spnt.many b0 };;
408
409{ .mib; setf.sig f8=r35 // w
410 .save pr,r9
411 mov r9=pr
412 brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16
413 }
414 .body
415{ .mmi; ADDP r14=0,r32 // rp
416 ADDP r15=0,r33 // ap
417 mov ar.lc=r10 }
418{ .mii; ADDP r16=0,r32 // rp copy
419 mov pr.rot=0x2001<<16
420 // ------^----- serves as (p40) at first (p27)
421 mov ar.ec=11 };;
422
423// This loop spins in 3*(n+10) ticks on Itanium and in 2*(n+10) on
424// Itanium 2. Yes, unlike previous versions it scales:-) Previous
425// version was peforming *all* additions in IALU and was starving
426// for those even on Itanium 2. In this version one addition is
427// moved to FPU and is folded with multiplication. This is at cost
428// of propogating the result from previous call to this subroutine
429// to L2 cache... In other words negligible even for shorter keys.
430// *Overall* performance improvement [over previous version] varies
431// from 11 to 22 percent depending on key length.
432.L_bn_mul_add_words_ctop:
433.pred.rel "mutex",p40,p42
434{ .mfi; (p23) getf.sig r36=f45 // low
435 (p20) xma.lu f42=f36,f8,f50 // low
436 (p40) add r39=r39,r35 } // (p27)
437{ .mfi; (p16) ldf8 f32=[r15],8 // *(ap++)
438 (p20) xma.hu f36=f36,f8,f50 // high
439 (p42) add r39=r39,r35,1 };; // (p27)
440{ .mmi; (p24) getf.sig r32=f40 // high
441 (p16) ldf8 f46=[r16],8 // *(rp1++)
442 (p40) cmp.ltu p41,p39=r39,r35 } // (p27)
443{ .mib; (p26) st8 [r14]=r39,8 // *(rp2++)
444 (p42) cmp.leu p41,p39=r39,r35 // (p27)
445 br.ctop.sptk .L_bn_mul_add_words_ctop};;
446.L_bn_mul_add_words_cend:
447
448{ .mmi; .pred.rel "mutex",p40,p42
449(p40) add r8=r35,r0
450(p42) add r8=r35,r0,1
451 mov pr=r9,0x1ffff }
452{ .mib; rum 1<<5 // clear um.mfh
453 mov ar.lc=r3
454 br.ret.sptk.many b0 };;
455.endp bn_mul_add_words#
456#endif
457
458#if 1
459//
460// void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
461//
462.global bn_sqr_words#
463.proc bn_sqr_words#
464.align 64
465.skip 32 // makes the loop body aligned at 64-byte boundary
466bn_sqr_words:
467 .prologue
468 .save ar.pfs,r2
469{ .mii; alloc r2=ar.pfs,3,0,0,0
470 sxt4 r34=r34 };;
471{ .mii; cmp.le p6,p0=r34,r0
472 mov r8=r0 } // return value
473{ .mfb; ADDP r32=0,r32
474 nop.f 0x0
475(p6) br.ret.spnt.many b0 };;
476
477{ .mii; sub r10=r34,r0,1
478 .save ar.lc,r3
479 mov r3=ar.lc
480 .save pr,r9
481 mov r9=pr };;
482
483 .body
484{ .mib; ADDP r33=0,r33
485 mov pr.rot=1<<16
486 brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16
487 }
488{ .mii; add r34=8,r32
489 mov ar.lc=r10
490 mov ar.ec=18 };;
491
492// 2*(n+17) on Itanium, (n+17) on "wider" IA-64 implementations. It's
493// possible to compress the epilogue (I'm getting tired to write this
494// comment over and over) and get down to 2*n+16 at the cost of
495// scalability. The decision will very likely be reconsidered after the
496// benchmark program is profiled. I.e. if perfomance gain on Itanium
497// will appear larger than loss on "wider" IA-64, then the loop should
498// be explicitely split and the epilogue compressed.
499.L_bn_sqr_words_ctop:
500{ .mfi; (p16) ldf8 f32=[r33],8
501 (p25) xmpy.lu f42=f41,f41
502 (p0) nop.i 0x0 }
503{ .mib; (p33) stf8 [r32]=f50,16
504 (p0) nop.i 0x0
505 (p0) nop.b 0x0 }
506{ .mfi; (p0) nop.m 0x0
507 (p25) xmpy.hu f52=f41,f41
508 (p0) nop.i 0x0 }
509{ .mib; (p33) stf8 [r34]=f60,16
510 (p0) nop.i 0x0
511 br.ctop.sptk .L_bn_sqr_words_ctop };;
512.L_bn_sqr_words_cend:
513
514{ .mii; nop.m 0x0
515 mov pr=r9,0x1ffff
516 mov ar.lc=r3 }
517{ .mfb; rum 1<<5 // clear um.mfh
518 nop.f 0x0
519 br.ret.sptk.many b0 };;
520.endp bn_sqr_words#
521#endif
522
523#if 1
524// Apparently we win nothing by implementing special bn_sqr_comba8.
525// Yes, it is possible to reduce the number of multiplications by
526// almost factor of two, but then the amount of additions would
527// increase by factor of two (as we would have to perform those
528// otherwise performed by xma ourselves). Normally we would trade
529// anyway as multiplications are way more expensive, but not this
530// time... Multiplication kernel is fully pipelined and as we drain
531// one 128-bit multiplication result per clock cycle multiplications
532// are effectively as inexpensive as additions. Special implementation
533// might become of interest for "wider" IA-64 implementation as you'll
534// be able to get through the multiplication phase faster (there won't
535// be any stall issues as discussed in the commentary section below and
536// you therefore will be able to employ all 4 FP units)... But these
537// Itanium days it's simply too hard to justify the effort so I just
538// drop down to bn_mul_comba8 code:-)
539//
540// void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
541//
542.global bn_sqr_comba8#
543.proc bn_sqr_comba8#
544.align 64
545bn_sqr_comba8:
546 .prologue
547 .save ar.pfs,r2
548#if defined(_HPUX_SOURCE) && !defined(_LP64)
549{ .mii; alloc r2=ar.pfs,2,1,0,0
550 addp4 r33=0,r33
551 addp4 r32=0,r32 };;
552{ .mii;
553#else
554{ .mii; alloc r2=ar.pfs,2,1,0,0
555#endif
556 mov r34=r33
557 add r14=8,r33 };;
558 .body
559{ .mii; add r17=8,r34
560 add r15=16,r33
561 add r18=16,r34 }
562{ .mfb; add r16=24,r33
563 br .L_cheat_entry_point8 };;
564.endp bn_sqr_comba8#
565#endif
566
567#if 1
568// I've estimated this routine to run in ~120 ticks, but in reality
569// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
570// cycles consumed for instructions fetch? Or did I misinterpret some
571// clause in Itanium µ-architecture manual? Comments are welcomed and
572// highly appreciated.
573//
574// On Itanium 2 it takes ~190 ticks. This is because of stalls on
575// result from getf.sig. I do nothing about it at this point for
576// reasons depicted below.
577//
578// However! It should be noted that even 160 ticks is darn good result
579// as it's over 10 (yes, ten, spelled as t-e-n) times faster than the
580// C version (compiled with gcc with inline assembler). I really
581// kicked compiler's butt here, didn't I? Yeah! This brings us to the
582// following statement. It's damn shame that this routine isn't called
583// very often nowadays! According to the profiler most CPU time is
584// consumed by bn_mul_add_words called from BN_from_montgomery. In
585// order to estimate what we're missing, I've compared the performance
586// of this routine against "traditional" implementation, i.e. against
587// following routine:
588//
589// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
590// { r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
591// r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
592// r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
593// r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
594// r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
595// r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
596// r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
597// r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
598// }
599//
600// The one below is over 8 times faster than the one above:-( Even
601// more reasons to "combafy" bn_mul_add_mont...
602//
603// And yes, this routine really made me wish there were an optimizing
604// assembler! It also feels like it deserves a dedication.
605//
606// To my wife for being there and to my kids...
607//
608// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
609//
610#define carry1 r14
611#define carry2 r15
612#define carry3 r34
613.global bn_mul_comba8#
614.proc bn_mul_comba8#
615.align 64
616bn_mul_comba8:
617 .prologue
618 .save ar.pfs,r2
619#if defined(_HPUX_SOURCE) && !defined(_LP64)
620{ .mii; alloc r2=ar.pfs,3,0,0,0
621 addp4 r33=0,r33
622 addp4 r34=0,r34 };;
623{ .mii; addp4 r32=0,r32
624#else
625{ .mii; alloc r2=ar.pfs,3,0,0,0
626#endif
627 add r14=8,r33
628 add r17=8,r34 }
629 .body
630{ .mii; add r15=16,r33
631 add r18=16,r34
632 add r16=24,r33 }
633.L_cheat_entry_point8:
634{ .mmi; add r19=24,r34
635
636 ldf8 f32=[r33],32 };;
637
638{ .mmi; ldf8 f120=[r34],32
639 ldf8 f121=[r17],32 }
640{ .mmi; ldf8 f122=[r18],32
641 ldf8 f123=[r19],32 };;
642{ .mmi; ldf8 f124=[r34]
643 ldf8 f125=[r17] }
644{ .mmi; ldf8 f126=[r18]
645 ldf8 f127=[r19] }
646
647{ .mmi; ldf8 f33=[r14],32
648 ldf8 f34=[r15],32 }
649{ .mmi; ldf8 f35=[r16],32;;
650 ldf8 f36=[r33] }
651{ .mmi; ldf8 f37=[r14]
652 ldf8 f38=[r15] }
653{ .mfi; ldf8 f39=[r16]
654// -------\ Entering multiplier's heaven /-------
655// ------------\ /------------
656// -----------------\ /-----------------
657// ----------------------\/----------------------
658 xma.hu f41=f32,f120,f0 }
659{ .mfi; xma.lu f40=f32,f120,f0 };; // (*)
660{ .mfi; xma.hu f51=f32,f121,f0 }
661{ .mfi; xma.lu f50=f32,f121,f0 };;
662{ .mfi; xma.hu f61=f32,f122,f0 }
663{ .mfi; xma.lu f60=f32,f122,f0 };;
664{ .mfi; xma.hu f71=f32,f123,f0 }
665{ .mfi; xma.lu f70=f32,f123,f0 };;
666{ .mfi; xma.hu f81=f32,f124,f0 }
667{ .mfi; xma.lu f80=f32,f124,f0 };;
668{ .mfi; xma.hu f91=f32,f125,f0 }
669{ .mfi; xma.lu f90=f32,f125,f0 };;
670{ .mfi; xma.hu f101=f32,f126,f0 }
671{ .mfi; xma.lu f100=f32,f126,f0 };;
672{ .mfi; xma.hu f111=f32,f127,f0 }
673{ .mfi; xma.lu f110=f32,f127,f0 };;//
674// (*) You can argue that splitting at every second bundle would
675// prevent "wider" IA-64 implementations from achieving the peak
676// performance. Well, not really... The catch is that if you
677// intend to keep 4 FP units busy by splitting at every fourth
678// bundle and thus perform these 16 multiplications in 4 ticks,
679// the first bundle *below* would stall because the result from
680// the first xma bundle *above* won't be available for another 3
681// ticks (if not more, being an optimist, I assume that "wider"
682// implementation will have same latency:-). This stall will hold
683// you back and the performance would be as if every second bundle
684// were split *anyway*...
685{ .mfi; getf.sig r16=f40
686 xma.hu f42=f33,f120,f41
687 add r33=8,r32 }
688{ .mfi; xma.lu f41=f33,f120,f41 };;
689{ .mfi; getf.sig r24=f50
690 xma.hu f52=f33,f121,f51 }
691{ .mfi; xma.lu f51=f33,f121,f51 };;
692{ .mfi; st8 [r32]=r16,16
693 xma.hu f62=f33,f122,f61 }
694{ .mfi; xma.lu f61=f33,f122,f61 };;
695{ .mfi; xma.hu f72=f33,f123,f71 }
696{ .mfi; xma.lu f71=f33,f123,f71 };;
697{ .mfi; xma.hu f82=f33,f124,f81 }
698{ .mfi; xma.lu f81=f33,f124,f81 };;
699{ .mfi; xma.hu f92=f33,f125,f91 }
700{ .mfi; xma.lu f91=f33,f125,f91 };;
701{ .mfi; xma.hu f102=f33,f126,f101 }
702{ .mfi; xma.lu f101=f33,f126,f101 };;
703{ .mfi; xma.hu f112=f33,f127,f111 }
704{ .mfi; xma.lu f111=f33,f127,f111 };;//
705//-------------------------------------------------//
706{ .mfi; getf.sig r25=f41
707 xma.hu f43=f34,f120,f42 }
708{ .mfi; xma.lu f42=f34,f120,f42 };;
709{ .mfi; getf.sig r16=f60
710 xma.hu f53=f34,f121,f52 }
711{ .mfi; xma.lu f52=f34,f121,f52 };;
712{ .mfi; getf.sig r17=f51
713 xma.hu f63=f34,f122,f62
714 add r25=r25,r24 }
715{ .mfi; xma.lu f62=f34,f122,f62
716 mov carry1=0 };;
717{ .mfi; cmp.ltu p6,p0=r25,r24
718 xma.hu f73=f34,f123,f72 }
719{ .mfi; xma.lu f72=f34,f123,f72 };;
720{ .mfi; st8 [r33]=r25,16
721 xma.hu f83=f34,f124,f82
722(p6) add carry1=1,carry1 }
723{ .mfi; xma.lu f82=f34,f124,f82 };;
724{ .mfi; xma.hu f93=f34,f125,f92 }
725{ .mfi; xma.lu f92=f34,f125,f92 };;
726{ .mfi; xma.hu f103=f34,f126,f102 }
727{ .mfi; xma.lu f102=f34,f126,f102 };;
728{ .mfi; xma.hu f113=f34,f127,f112 }
729{ .mfi; xma.lu f112=f34,f127,f112 };;//
730//-------------------------------------------------//
731{ .mfi; getf.sig r18=f42
732 xma.hu f44=f35,f120,f43
733 add r17=r17,r16 }
734{ .mfi; xma.lu f43=f35,f120,f43 };;
735{ .mfi; getf.sig r24=f70
736 xma.hu f54=f35,f121,f53 }
737{ .mfi; mov carry2=0
738 xma.lu f53=f35,f121,f53 };;
739{ .mfi; getf.sig r25=f61
740 xma.hu f64=f35,f122,f63
741 cmp.ltu p7,p0=r17,r16 }
742{ .mfi; add r18=r18,r17
743 xma.lu f63=f35,f122,f63 };;
744{ .mfi; getf.sig r26=f52
745 xma.hu f74=f35,f123,f73
746(p7) add carry2=1,carry2 }
747{ .mfi; cmp.ltu p7,p0=r18,r17
748 xma.lu f73=f35,f123,f73
749 add r18=r18,carry1 };;
750{ .mfi;
751 xma.hu f84=f35,f124,f83
752(p7) add carry2=1,carry2 }
753{ .mfi; cmp.ltu p7,p0=r18,carry1
754 xma.lu f83=f35,f124,f83 };;
755{ .mfi; st8 [r32]=r18,16
756 xma.hu f94=f35,f125,f93
757(p7) add carry2=1,carry2 }
758{ .mfi; xma.lu f93=f35,f125,f93 };;
759{ .mfi; xma.hu f104=f35,f126,f103 }
760{ .mfi; xma.lu f103=f35,f126,f103 };;
761{ .mfi; xma.hu f114=f35,f127,f113 }
762{ .mfi; mov carry1=0
763 xma.lu f113=f35,f127,f113
764 add r25=r25,r24 };;//
765//-------------------------------------------------//
766{ .mfi; getf.sig r27=f43
767 xma.hu f45=f36,f120,f44
768 cmp.ltu p6,p0=r25,r24 }
769{ .mfi; xma.lu f44=f36,f120,f44
770 add r26=r26,r25 };;
771{ .mfi; getf.sig r16=f80
772 xma.hu f55=f36,f121,f54
773(p6) add carry1=1,carry1 }
774{ .mfi; xma.lu f54=f36,f121,f54 };;
775{ .mfi; getf.sig r17=f71
776 xma.hu f65=f36,f122,f64
777 cmp.ltu p6,p0=r26,r25 }
778{ .mfi; xma.lu f64=f36,f122,f64
779 add r27=r27,r26 };;
780{ .mfi; getf.sig r18=f62
781 xma.hu f75=f36,f123,f74
782(p6) add carry1=1,carry1 }
783{ .mfi; cmp.ltu p6,p0=r27,r26
784 xma.lu f74=f36,f123,f74
785 add r27=r27,carry2 };;
786{ .mfi; getf.sig r19=f53
787 xma.hu f85=f36,f124,f84
788(p6) add carry1=1,carry1 }
789{ .mfi; xma.lu f84=f36,f124,f84
790 cmp.ltu p6,p0=r27,carry2 };;
791{ .mfi; st8 [r33]=r27,16
792 xma.hu f95=f36,f125,f94
793(p6) add carry1=1,carry1 }
794{ .mfi; xma.lu f94=f36,f125,f94 };;
795{ .mfi; xma.hu f105=f36,f126,f104 }
796{ .mfi; mov carry2=0
797 xma.lu f104=f36,f126,f104
798 add r17=r17,r16 };;
799{ .mfi; xma.hu f115=f36,f127,f114
800 cmp.ltu p7,p0=r17,r16 }
801{ .mfi; xma.lu f114=f36,f127,f114
802 add r18=r18,r17 };;//
803//-------------------------------------------------//
804{ .mfi; getf.sig r20=f44
805 xma.hu f46=f37,f120,f45
806(p7) add carry2=1,carry2 }
807{ .mfi; cmp.ltu p7,p0=r18,r17
808 xma.lu f45=f37,f120,f45
809 add r19=r19,r18 };;
810{ .mfi; getf.sig r24=f90
811 xma.hu f56=f37,f121,f55 }
812{ .mfi; xma.lu f55=f37,f121,f55 };;
813{ .mfi; getf.sig r25=f81
814 xma.hu f66=f37,f122,f65
815(p7) add carry2=1,carry2 }
816{ .mfi; cmp.ltu p7,p0=r19,r18
817 xma.lu f65=f37,f122,f65
818 add r20=r20,r19 };;
819{ .mfi; getf.sig r26=f72
820 xma.hu f76=f37,f123,f75
821(p7) add carry2=1,carry2 }
822{ .mfi; cmp.ltu p7,p0=r20,r19
823 xma.lu f75=f37,f123,f75
824 add r20=r20,carry1 };;
825{ .mfi; getf.sig r27=f63
826 xma.hu f86=f37,f124,f85
827(p7) add carry2=1,carry2 }
828{ .mfi; xma.lu f85=f37,f124,f85
829 cmp.ltu p7,p0=r20,carry1 };;
830{ .mfi; getf.sig r28=f54
831 xma.hu f96=f37,f125,f95
832(p7) add carry2=1,carry2 }
833{ .mfi; st8 [r32]=r20,16
834 xma.lu f95=f37,f125,f95 };;
835{ .mfi; xma.hu f106=f37,f126,f105 }
836{ .mfi; mov carry1=0
837 xma.lu f105=f37,f126,f105
838 add r25=r25,r24 };;
839{ .mfi; xma.hu f116=f37,f127,f115
840 cmp.ltu p6,p0=r25,r24 }
841{ .mfi; xma.lu f115=f37,f127,f115
842 add r26=r26,r25 };;//
843//-------------------------------------------------//
844{ .mfi; getf.sig r29=f45
845 xma.hu f47=f38,f120,f46
846(p6) add carry1=1,carry1 }
847{ .mfi; cmp.ltu p6,p0=r26,r25
848 xma.lu f46=f38,f120,f46
849 add r27=r27,r26 };;
850{ .mfi; getf.sig r16=f100
851 xma.hu f57=f38,f121,f56
852(p6) add carry1=1,carry1 }
853{ .mfi; cmp.ltu p6,p0=r27,r26
854 xma.lu f56=f38,f121,f56
855 add r28=r28,r27 };;
856{ .mfi; getf.sig r17=f91
857 xma.hu f67=f38,f122,f66
858(p6) add carry1=1,carry1 }
859{ .mfi; cmp.ltu p6,p0=r28,r27
860 xma.lu f66=f38,f122,f66
861 add r29=r29,r28 };;
862{ .mfi; getf.sig r18=f82
863 xma.hu f77=f38,f123,f76
864(p6) add carry1=1,carry1 }
865{ .mfi; cmp.ltu p6,p0=r29,r28
866 xma.lu f76=f38,f123,f76
867 add r29=r29,carry2 };;
868{ .mfi; getf.sig r19=f73
869 xma.hu f87=f38,f124,f86
870(p6) add carry1=1,carry1 }
871{ .mfi; xma.lu f86=f38,f124,f86
872 cmp.ltu p6,p0=r29,carry2 };;
873{ .mfi; getf.sig r20=f64
874 xma.hu f97=f38,f125,f96
875(p6) add carry1=1,carry1 }
876{ .mfi; st8 [r33]=r29,16
877 xma.lu f96=f38,f125,f96 };;
878{ .mfi; getf.sig r21=f55
879 xma.hu f107=f38,f126,f106 }
880{ .mfi; mov carry2=0
881 xma.lu f106=f38,f126,f106
882 add r17=r17,r16 };;
883{ .mfi; xma.hu f117=f38,f127,f116
884 cmp.ltu p7,p0=r17,r16 }
885{ .mfi; xma.lu f116=f38,f127,f116
886 add r18=r18,r17 };;//
887//-------------------------------------------------//
888{ .mfi; getf.sig r22=f46
889 xma.hu f48=f39,f120,f47
890(p7) add carry2=1,carry2 }
891{ .mfi; cmp.ltu p7,p0=r18,r17
892 xma.lu f47=f39,f120,f47
893 add r19=r19,r18 };;
894{ .mfi; getf.sig r24=f110
895 xma.hu f58=f39,f121,f57
896(p7) add carry2=1,carry2 }
897{ .mfi; cmp.ltu p7,p0=r19,r18
898 xma.lu f57=f39,f121,f57
899 add r20=r20,r19 };;
900{ .mfi; getf.sig r25=f101
901 xma.hu f68=f39,f122,f67
902(p7) add carry2=1,carry2 }
903{ .mfi; cmp.ltu p7,p0=r20,r19
904 xma.lu f67=f39,f122,f67
905 add r21=r21,r20 };;
906{ .mfi; getf.sig r26=f92
907 xma.hu f78=f39,f123,f77
908(p7) add carry2=1,carry2 }
909{ .mfi; cmp.ltu p7,p0=r21,r20
910 xma.lu f77=f39,f123,f77
911 add r22=r22,r21 };;
912{ .mfi; getf.sig r27=f83
913 xma.hu f88=f39,f124,f87
914(p7) add carry2=1,carry2 }
915{ .mfi; cmp.ltu p7,p0=r22,r21
916 xma.lu f87=f39,f124,f87
917 add r22=r22,carry1 };;
918{ .mfi; getf.sig r28=f74
919 xma.hu f98=f39,f125,f97
920(p7) add carry2=1,carry2 }
921{ .mfi; xma.lu f97=f39,f125,f97
922 cmp.ltu p7,p0=r22,carry1 };;
923{ .mfi; getf.sig r29=f65
924 xma.hu f108=f39,f126,f107
925(p7) add carry2=1,carry2 }
926{ .mfi; st8 [r32]=r22,16
927 xma.lu f107=f39,f126,f107 };;
928{ .mfi; getf.sig r30=f56
929 xma.hu f118=f39,f127,f117 }
930{ .mfi; xma.lu f117=f39,f127,f117 };;//
931//-------------------------------------------------//
932// Leaving muliplier's heaven... Quite a ride, huh?
933
934{ .mii; getf.sig r31=f47
935 add r25=r25,r24
936 mov carry1=0 };;
937{ .mii; getf.sig r16=f111
938 cmp.ltu p6,p0=r25,r24
939 add r26=r26,r25 };;
940{ .mfb; getf.sig r17=f102 }
941{ .mii;
942(p6) add carry1=1,carry1
943 cmp.ltu p6,p0=r26,r25
944 add r27=r27,r26 };;
945{ .mfb; nop.m 0x0 }
946{ .mii;
947(p6) add carry1=1,carry1
948 cmp.ltu p6,p0=r27,r26
949 add r28=r28,r27 };;
950{ .mii; getf.sig r18=f93
951 add r17=r17,r16
952 mov carry3=0 }
953{ .mii;
954(p6) add carry1=1,carry1
955 cmp.ltu p6,p0=r28,r27
956 add r29=r29,r28 };;
957{ .mii; getf.sig r19=f84
958 cmp.ltu p7,p0=r17,r16 }
959{ .mii;
960(p6) add carry1=1,carry1
961 cmp.ltu p6,p0=r29,r28
962 add r30=r30,r29 };;
963{ .mii; getf.sig r20=f75
964 add r18=r18,r17 }
965{ .mii;
966(p6) add carry1=1,carry1
967 cmp.ltu p6,p0=r30,r29
968 add r31=r31,r30 };;
969{ .mfb; getf.sig r21=f66 }
970{ .mii; (p7) add carry3=1,carry3
971 cmp.ltu p7,p0=r18,r17
972 add r19=r19,r18 }
973{ .mfb; nop.m 0x0 }
974{ .mii;
975(p6) add carry1=1,carry1
976 cmp.ltu p6,p0=r31,r30
977 add r31=r31,carry2 };;
978{ .mfb; getf.sig r22=f57 }
979{ .mii; (p7) add carry3=1,carry3
980 cmp.ltu p7,p0=r19,r18
981 add r20=r20,r19 }
982{ .mfb; nop.m 0x0 }
983{ .mii;
984(p6) add carry1=1,carry1
985 cmp.ltu p6,p0=r31,carry2 };;
986{ .mfb; getf.sig r23=f48 }
987{ .mii; (p7) add carry3=1,carry3
988 cmp.ltu p7,p0=r20,r19
989 add r21=r21,r20 }
990{ .mii;
991(p6) add carry1=1,carry1 }
992{ .mfb; st8 [r33]=r31,16 };;
993
994{ .mfb; getf.sig r24=f112 }
995{ .mii; (p7) add carry3=1,carry3
996 cmp.ltu p7,p0=r21,r20
997 add r22=r22,r21 };;
998{ .mfb; getf.sig r25=f103 }
999{ .mii; (p7) add carry3=1,carry3
1000 cmp.ltu p7,p0=r22,r21
1001 add r23=r23,r22 };;
1002{ .mfb; getf.sig r26=f94 }
1003{ .mii; (p7) add carry3=1,carry3
1004 cmp.ltu p7,p0=r23,r22
1005 add r23=r23,carry1 };;
1006{ .mfb; getf.sig r27=f85 }
1007{ .mii; (p7) add carry3=1,carry3
1008 cmp.ltu p7,p8=r23,carry1};;
1009{ .mii; getf.sig r28=f76
1010 add r25=r25,r24
1011 mov carry1=0 }
1012{ .mii; st8 [r32]=r23,16
1013 (p7) add carry2=1,carry3
1014 (p8) add carry2=0,carry3 };;
1015
1016{ .mfb; nop.m 0x0 }
1017{ .mii; getf.sig r29=f67
1018 cmp.ltu p6,p0=r25,r24
1019 add r26=r26,r25 };;
1020{ .mfb; getf.sig r30=f58 }
1021{ .mii;
1022(p6) add carry1=1,carry1
1023 cmp.ltu p6,p0=r26,r25
1024 add r27=r27,r26 };;
1025{ .mfb; getf.sig r16=f113 }
1026{ .mii;
1027(p6) add carry1=1,carry1
1028 cmp.ltu p6,p0=r27,r26
1029 add r28=r28,r27 };;
1030{ .mfb; getf.sig r17=f104 }
1031{ .mii;
1032(p6) add carry1=1,carry1
1033 cmp.ltu p6,p0=r28,r27
1034 add r29=r29,r28 };;
1035{ .mfb; getf.sig r18=f95 }
1036{ .mii;
1037(p6) add carry1=1,carry1
1038 cmp.ltu p6,p0=r29,r28
1039 add r30=r30,r29 };;
1040{ .mii; getf.sig r19=f86
1041 add r17=r17,r16
1042 mov carry3=0 }
1043{ .mii;
1044(p6) add carry1=1,carry1
1045 cmp.ltu p6,p0=r30,r29
1046 add r30=r30,carry2 };;
1047{ .mii; getf.sig r20=f77
1048 cmp.ltu p7,p0=r17,r16
1049 add r18=r18,r17 }
1050{ .mii;
1051(p6) add carry1=1,carry1
1052 cmp.ltu p6,p0=r30,carry2 };;
1053{ .mfb; getf.sig r21=f68 }
1054{ .mii; st8 [r33]=r30,16
1055(p6) add carry1=1,carry1 };;
1056
1057{ .mfb; getf.sig r24=f114 }
1058{ .mii; (p7) add carry3=1,carry3
1059 cmp.ltu p7,p0=r18,r17
1060 add r19=r19,r18 };;
1061{ .mfb; getf.sig r25=f105 }
1062{ .mii; (p7) add carry3=1,carry3
1063 cmp.ltu p7,p0=r19,r18
1064 add r20=r20,r19 };;
1065{ .mfb; getf.sig r26=f96 }
1066{ .mii; (p7) add carry3=1,carry3
1067 cmp.ltu p7,p0=r20,r19
1068 add r21=r21,r20 };;
1069{ .mfb; getf.sig r27=f87 }
1070{ .mii; (p7) add carry3=1,carry3
1071 cmp.ltu p7,p0=r21,r20
1072 add r21=r21,carry1 };;
1073{ .mib; getf.sig r28=f78
1074 add r25=r25,r24 }
1075{ .mib; (p7) add carry3=1,carry3
1076 cmp.ltu p7,p8=r21,carry1};;
1077{ .mii; st8 [r32]=r21,16
1078 (p7) add carry2=1,carry3
1079 (p8) add carry2=0,carry3 }
1080
1081{ .mii; mov carry1=0
1082 cmp.ltu p6,p0=r25,r24
1083 add r26=r26,r25 };;
1084{ .mfb; getf.sig r16=f115 }
1085{ .mii;
1086(p6) add carry1=1,carry1
1087 cmp.ltu p6,p0=r26,r25
1088 add r27=r27,r26 };;
1089{ .mfb; getf.sig r17=f106 }
1090{ .mii;
1091(p6) add carry1=1,carry1
1092 cmp.ltu p6,p0=r27,r26
1093 add r28=r28,r27 };;
1094{ .mfb; getf.sig r18=f97 }
1095{ .mii;
1096(p6) add carry1=1,carry1
1097 cmp.ltu p6,p0=r28,r27
1098 add r28=r28,carry2 };;
1099{ .mib; getf.sig r19=f88
1100 add r17=r17,r16 }
1101{ .mib;
1102(p6) add carry1=1,carry1
1103 cmp.ltu p6,p0=r28,carry2 };;
1104{ .mii; st8 [r33]=r28,16
1105(p6) add carry1=1,carry1 }
1106
1107{ .mii; mov carry2=0
1108 cmp.ltu p7,p0=r17,r16
1109 add r18=r18,r17 };;
1110{ .mfb; getf.sig r24=f116 }
1111{ .mii; (p7) add carry2=1,carry2
1112 cmp.ltu p7,p0=r18,r17
1113 add r19=r19,r18 };;
1114{ .mfb; getf.sig r25=f107 }
1115{ .mii; (p7) add carry2=1,carry2
1116 cmp.ltu p7,p0=r19,r18
1117 add r19=r19,carry1 };;
1118{ .mfb; getf.sig r26=f98 }
1119{ .mii; (p7) add carry2=1,carry2
1120 cmp.ltu p7,p0=r19,carry1};;
1121{ .mii; st8 [r32]=r19,16
1122 (p7) add carry2=1,carry2 }
1123
1124{ .mfb; add r25=r25,r24 };;
1125
1126{ .mfb; getf.sig r16=f117 }
1127{ .mii; mov carry1=0
1128 cmp.ltu p6,p0=r25,r24
1129 add r26=r26,r25 };;
1130{ .mfb; getf.sig r17=f108 }
1131{ .mii;
1132(p6) add carry1=1,carry1
1133 cmp.ltu p6,p0=r26,r25
1134 add r26=r26,carry2 };;
1135{ .mfb; nop.m 0x0 }
1136{ .mii;
1137(p6) add carry1=1,carry1
1138 cmp.ltu p6,p0=r26,carry2 };;
1139{ .mii; st8 [r33]=r26,16
1140(p6) add carry1=1,carry1 }
1141
1142{ .mfb; add r17=r17,r16 };;
1143{ .mfb; getf.sig r24=f118 }
1144{ .mii; mov carry2=0
1145 cmp.ltu p7,p0=r17,r16
1146 add r17=r17,carry1 };;
1147{ .mii; (p7) add carry2=1,carry2
1148 cmp.ltu p7,p0=r17,carry1};;
1149{ .mii; st8 [r32]=r17
1150 (p7) add carry2=1,carry2 };;
1151{ .mfb; add r24=r24,carry2 };;
1152{ .mib; st8 [r33]=r24 }
1153
1154{ .mib; rum 1<<5 // clear um.mfh
1155 br.ret.sptk.many b0 };;
1156.endp bn_mul_comba8#
1157#undef carry3
1158#undef carry2
1159#undef carry1
1160#endif
1161
1162#if 1
1163// It's possible to make it faster (see comment to bn_sqr_comba8), but
1164// I reckon it doesn't worth the effort. Basically because the routine
1165// (actually both of them) practically never called... So I just play
1166// same trick as with bn_sqr_comba8.
1167//
1168// void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1169//
1170.global bn_sqr_comba4#
1171.proc bn_sqr_comba4#
1172.align 64
1173bn_sqr_comba4:
1174 .prologue
1175 .save ar.pfs,r2
1176#if defined(_HPUX_SOURCE) && !defined(_LP64)
1177{ .mii; alloc r2=ar.pfs,2,1,0,0
1178 addp4 r32=0,r32
1179 addp4 r33=0,r33 };;
1180{ .mii;
1181#else
1182{ .mii; alloc r2=ar.pfs,2,1,0,0
1183#endif
1184 mov r34=r33
1185 add r14=8,r33 };;
1186 .body
1187{ .mii; add r17=8,r34
1188 add r15=16,r33
1189 add r18=16,r34 }
1190{ .mfb; add r16=24,r33
1191 br .L_cheat_entry_point4 };;
1192.endp bn_sqr_comba4#
1193#endif
1194
1195#if 1
1196// Runs in ~115 cycles and ~4.5 times faster than C. Well, whatever...
1197//
1198// void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1199//
1200#define carry1 r14
1201#define carry2 r15
1202.global bn_mul_comba4#
1203.proc bn_mul_comba4#
1204.align 64
1205bn_mul_comba4:
1206 .prologue
1207 .save ar.pfs,r2
1208#if defined(_HPUX_SOURCE) && !defined(_LP64)
1209{ .mii; alloc r2=ar.pfs,3,0,0,0
1210 addp4 r33=0,r33
1211 addp4 r34=0,r34 };;
1212{ .mii; addp4 r32=0,r32
1213#else
1214{ .mii; alloc r2=ar.pfs,3,0,0,0
1215#endif
1216 add r14=8,r33
1217 add r17=8,r34 }
1218 .body
1219{ .mii; add r15=16,r33
1220 add r18=16,r34
1221 add r16=24,r33 };;
1222.L_cheat_entry_point4:
1223{ .mmi; add r19=24,r34
1224
1225 ldf8 f32=[r33] }
1226
1227{ .mmi; ldf8 f120=[r34]
1228 ldf8 f121=[r17] };;
1229{ .mmi; ldf8 f122=[r18]
1230 ldf8 f123=[r19] }
1231
1232{ .mmi; ldf8 f33=[r14]
1233 ldf8 f34=[r15] }
1234{ .mfi; ldf8 f35=[r16]
1235
1236 xma.hu f41=f32,f120,f0 }
1237{ .mfi; xma.lu f40=f32,f120,f0 };;
1238{ .mfi; xma.hu f51=f32,f121,f0 }
1239{ .mfi; xma.lu f50=f32,f121,f0 };;
1240{ .mfi; xma.hu f61=f32,f122,f0 }
1241{ .mfi; xma.lu f60=f32,f122,f0 };;
1242{ .mfi; xma.hu f71=f32,f123,f0 }
1243{ .mfi; xma.lu f70=f32,f123,f0 };;//
1244// Major stall takes place here, and 3 more places below. Result from
1245// first xma is not available for another 3 ticks.
1246{ .mfi; getf.sig r16=f40
1247 xma.hu f42=f33,f120,f41
1248 add r33=8,r32 }
1249{ .mfi; xma.lu f41=f33,f120,f41 };;
1250{ .mfi; getf.sig r24=f50
1251 xma.hu f52=f33,f121,f51 }
1252{ .mfi; xma.lu f51=f33,f121,f51 };;
1253{ .mfi; st8 [r32]=r16,16
1254 xma.hu f62=f33,f122,f61 }
1255{ .mfi; xma.lu f61=f33,f122,f61 };;
1256{ .mfi; xma.hu f72=f33,f123,f71 }
1257{ .mfi; xma.lu f71=f33,f123,f71 };;//
1258//-------------------------------------------------//
1259{ .mfi; getf.sig r25=f41
1260 xma.hu f43=f34,f120,f42 }
1261{ .mfi; xma.lu f42=f34,f120,f42 };;
1262{ .mfi; getf.sig r16=f60
1263 xma.hu f53=f34,f121,f52 }
1264{ .mfi; xma.lu f52=f34,f121,f52 };;
1265{ .mfi; getf.sig r17=f51
1266 xma.hu f63=f34,f122,f62
1267 add r25=r25,r24 }
1268{ .mfi; mov carry1=0
1269 xma.lu f62=f34,f122,f62 };;
1270{ .mfi; st8 [r33]=r25,16
1271 xma.hu f73=f34,f123,f72
1272 cmp.ltu p6,p0=r25,r24 }
1273{ .mfi; xma.lu f72=f34,f123,f72 };;//
1274//-------------------------------------------------//
1275{ .mfi; getf.sig r18=f42
1276 xma.hu f44=f35,f120,f43
1277(p6) add carry1=1,carry1 }
1278{ .mfi; add r17=r17,r16
1279 xma.lu f43=f35,f120,f43
1280 mov carry2=0 };;
1281{ .mfi; getf.sig r24=f70
1282 xma.hu f54=f35,f121,f53
1283 cmp.ltu p7,p0=r17,r16 }
1284{ .mfi; xma.lu f53=f35,f121,f53 };;
1285{ .mfi; getf.sig r25=f61
1286 xma.hu f64=f35,f122,f63
1287 add r18=r18,r17 }
1288{ .mfi; xma.lu f63=f35,f122,f63
1289(p7) add carry2=1,carry2 };;
1290{ .mfi; getf.sig r26=f52
1291 xma.hu f74=f35,f123,f73
1292 cmp.ltu p7,p0=r18,r17 }
1293{ .mfi; xma.lu f73=f35,f123,f73
1294 add r18=r18,carry1 };;
1295//-------------------------------------------------//
1296{ .mii; st8 [r32]=r18,16
1297(p7) add carry2=1,carry2
1298 cmp.ltu p7,p0=r18,carry1 };;
1299
1300{ .mfi; getf.sig r27=f43 // last major stall
1301(p7) add carry2=1,carry2 };;
1302{ .mii; getf.sig r16=f71
1303 add r25=r25,r24
1304 mov carry1=0 };;
1305{ .mii; getf.sig r17=f62
1306 cmp.ltu p6,p0=r25,r24
1307 add r26=r26,r25 };;
1308{ .mii;
1309(p6) add carry1=1,carry1
1310 cmp.ltu p6,p0=r26,r25
1311 add r27=r27,r26 };;
1312{ .mii;
1313(p6) add carry1=1,carry1
1314 cmp.ltu p6,p0=r27,r26
1315 add r27=r27,carry2 };;
1316{ .mii; getf.sig r18=f53
1317(p6) add carry1=1,carry1
1318 cmp.ltu p6,p0=r27,carry2 };;
1319{ .mfi; st8 [r33]=r27,16
1320(p6) add carry1=1,carry1 }
1321
1322{ .mii; getf.sig r19=f44
1323 add r17=r17,r16
1324 mov carry2=0 };;
1325{ .mii; getf.sig r24=f72
1326 cmp.ltu p7,p0=r17,r16
1327 add r18=r18,r17 };;
1328{ .mii; (p7) add carry2=1,carry2
1329 cmp.ltu p7,p0=r18,r17
1330 add r19=r19,r18 };;
1331{ .mii; (p7) add carry2=1,carry2
1332 cmp.ltu p7,p0=r19,r18
1333 add r19=r19,carry1 };;
1334{ .mii; getf.sig r25=f63
1335 (p7) add carry2=1,carry2
1336 cmp.ltu p7,p0=r19,carry1};;
1337{ .mii; st8 [r32]=r19,16
1338 (p7) add carry2=1,carry2 }
1339
1340{ .mii; getf.sig r26=f54
1341 add r25=r25,r24
1342 mov carry1=0 };;
1343{ .mii; getf.sig r16=f73
1344 cmp.ltu p6,p0=r25,r24
1345 add r26=r26,r25 };;
1346{ .mii;
1347(p6) add carry1=1,carry1
1348 cmp.ltu p6,p0=r26,r25
1349 add r26=r26,carry2 };;
1350{ .mii; getf.sig r17=f64
1351(p6) add carry1=1,carry1
1352 cmp.ltu p6,p0=r26,carry2 };;
1353{ .mii; st8 [r33]=r26,16
1354(p6) add carry1=1,carry1 }
1355
1356{ .mii; getf.sig r24=f74
1357 add r17=r17,r16
1358 mov carry2=0 };;
1359{ .mii; cmp.ltu p7,p0=r17,r16
1360 add r17=r17,carry1 };;
1361
1362{ .mii; (p7) add carry2=1,carry2
1363 cmp.ltu p7,p0=r17,carry1};;
1364{ .mii; st8 [r32]=r17,16
1365 (p7) add carry2=1,carry2 };;
1366
1367{ .mii; add r24=r24,carry2 };;
1368{ .mii; st8 [r33]=r24 }
1369
1370{ .mib; rum 1<<5 // clear um.mfh
1371 br.ret.sptk.many b0 };;
1372.endp bn_mul_comba4#
1373#undef carry2
1374#undef carry1
1375#endif
1376
1377#if 1
1378//
1379// BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
1380//
1381// In the nutshell it's a port of my MIPS III/IV implementation.
1382//
1383#define AT r14
1384#define H r16
1385#define HH r20
1386#define L r17
1387#define D r18
1388#define DH r22
1389#define I r21
1390
1391#if 0
1392// Some preprocessors (most notably HP-UX) appear to be allergic to
1393// macros enclosed to parenthesis [as these three were].
1394#define cont p16
1395#define break p0 // p20
1396#define equ p24
1397#else
1398cont=p16
1399break=p0
1400equ=p24
1401#endif
1402
1403.global abort#
1404.global bn_div_words#
1405.proc bn_div_words#
1406.align 64
1407bn_div_words:
1408 .prologue
1409 .save ar.pfs,r2
1410{ .mii; alloc r2=ar.pfs,3,5,0,8
1411 .save b0,r3
1412 mov r3=b0
1413 .save pr,r10
1414 mov r10=pr };;
1415{ .mmb; cmp.eq p6,p0=r34,r0
1416 mov r8=-1
1417(p6) br.ret.spnt.many b0 };;
1418
1419 .body
1420{ .mii; mov H=r32 // save h
1421 mov ar.ec=0 // don't rotate at exit
1422 mov pr.rot=0 }
1423{ .mii; mov L=r33 // save l
1424 mov r36=r0 };;
1425
1426.L_divw_shift: // -vv- note signed comparison
1427{ .mfi; (p0) cmp.lt p16,p0=r0,r34 // d
1428 (p0) shladd r33=r34,1,r0 }
1429{ .mfb; (p0) add r35=1,r36
1430 (p0) nop.f 0x0
1431(p16) br.wtop.dpnt .L_divw_shift };;
1432
1433{ .mii; mov D=r34
1434 shr.u DH=r34,32
1435 sub r35=64,r36 };;
1436{ .mii; setf.sig f7=DH
1437 shr.u AT=H,r35
1438 mov I=r36 };;
1439{ .mib; cmp.ne p6,p0=r0,AT
1440 shl H=H,r36
1441(p6) br.call.spnt.clr b0=abort };; // overflow, die...
1442
1443{ .mfi; fcvt.xuf.s1 f7=f7
1444 shr.u AT=L,r35 };;
1445{ .mii; shl L=L,r36
1446 or H=H,AT };;
1447
1448{ .mii; nop.m 0x0
1449 cmp.leu p6,p0=D,H;;
1450(p6) sub H=H,D }
1451
1452{ .mlx; setf.sig f14=D
1453 movl AT=0xffffffff };;
1454///////////////////////////////////////////////////////////
1455{ .mii; setf.sig f6=H
1456 shr.u HH=H,32;;
1457 cmp.eq p6,p7=HH,DH };;
1458{ .mfb;
1459(p6) setf.sig f8=AT
1460(p7) fcvt.xuf.s1 f6=f6
1461(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1462
1463{ .mfi; getf.sig r33=f8 // q
1464 xmpy.lu f9=f8,f14 }
1465{ .mfi; xmpy.hu f10=f8,f14
1466 shrp H=H,L,32 };;
1467
1468{ .mmi; getf.sig r35=f9 // tl
1469 getf.sig r31=f10 };; // th
1470
1471.L_divw_1st_iter:
1472{ .mii; (p0) add r32=-1,r33
1473 (p0) cmp.eq equ,cont=HH,r31 };;
1474{ .mii; (p0) cmp.ltu p8,p0=r35,D
1475 (p0) sub r34=r35,D
1476 (equ) cmp.leu break,cont=r35,H };;
1477{ .mib; (cont) cmp.leu cont,break=HH,r31
1478 (p8) add r31=-1,r31
1479(cont) br.wtop.spnt .L_divw_1st_iter };;
1480///////////////////////////////////////////////////////////
1481{ .mii; sub H=H,r35
1482 shl r8=r33,32
1483 shl L=L,32 };;
1484///////////////////////////////////////////////////////////
1485{ .mii; setf.sig f6=H
1486 shr.u HH=H,32;;
1487 cmp.eq p6,p7=HH,DH };;
1488{ .mfb;
1489(p6) setf.sig f8=AT
1490(p7) fcvt.xuf.s1 f6=f6
1491(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1492
1493{ .mfi; getf.sig r33=f8 // q
1494 xmpy.lu f9=f8,f14 }
1495{ .mfi; xmpy.hu f10=f8,f14
1496 shrp H=H,L,32 };;
1497
1498{ .mmi; getf.sig r35=f9 // tl
1499 getf.sig r31=f10 };; // th
1500
1501.L_divw_2nd_iter:
1502{ .mii; (p0) add r32=-1,r33
1503 (p0) cmp.eq equ,cont=HH,r31 };;
1504{ .mii; (p0) cmp.ltu p8,p0=r35,D
1505 (p0) sub r34=r35,D
1506 (equ) cmp.leu break,cont=r35,H };;
1507{ .mib; (cont) cmp.leu cont,break=HH,r31
1508 (p8) add r31=-1,r31
1509(cont) br.wtop.spnt .L_divw_2nd_iter };;
1510///////////////////////////////////////////////////////////
1511{ .mii; sub H=H,r35
1512 or r8=r8,r33
1513 mov ar.pfs=r2 };;
1514{ .mii; shr.u r9=H,I // remainder if anybody wants it
1515 mov pr=r10,0x1ffff }
1516{ .mfb; br.ret.sptk.many b0 };;
1517
1518// Unsigned 64 by 32 (well, by 64 for the moment) bit integer division
1519// procedure.
1520//
1521// inputs: f6 = (double)a, f7 = (double)b
1522// output: f8 = (int)(a/b)
1523// clobbered: f8,f9,f10,f11,pred
1524pred=p15
1525// One can argue that this snippet is copyrighted to Intel
1526// Corporation, as it's essentially identical to one of those
1527// found in "Divide, Square Root and Remainder" section at
1528// http://www.intel.com/software/products/opensource/libraries/num.htm.
1529// Yes, I admit that the referred code was used as template,
1530// but after I realized that there hardly is any other instruction
1531// sequence which would perform this operation. I mean I figure that
1532// any independent attempt to implement high-performance division
1533// will result in code virtually identical to the Intel code. It
1534// should be noted though that below division kernel is 1 cycle
1535// faster than Intel one (note commented splits:-), not to mention
1536// original prologue (rather lack of one) and epilogue.
1537.align 32
1538.skip 16
1539.L_udiv64_32_b6:
1540 frcpa.s1 f8,pred=f6,f7;; // [0] y0 = 1 / b
1541
1542(pred) fnma.s1 f9=f7,f8,f1 // [5] e0 = 1 - b * y0
1543(pred) fmpy.s1 f10=f6,f8;; // [5] q0 = a * y0
1544(pred) fmpy.s1 f11=f9,f9 // [10] e1 = e0 * e0
1545(pred) fma.s1 f10=f9,f10,f10;; // [10] q1 = q0 + e0 * q0
1546(pred) fma.s1 f8=f9,f8,f8 //;; // [15] y1 = y0 + e0 * y0
1547(pred) fma.s1 f9=f11,f10,f10;; // [15] q2 = q1 + e1 * q1
1548(pred) fma.s1 f8=f11,f8,f8 //;; // [20] y2 = y1 + e1 * y1
1549(pred) fnma.s1 f10=f7,f9,f6;; // [20] r2 = a - b * q2
1550(pred) fma.s1 f8=f10,f8,f9;; // [25] q3 = q2 + r2 * y2
1551
1552 fcvt.fxu.trunc.s1 f8=f8 // [30] q = trunc(q3)
1553 br.ret.sptk.many b6;;
1554.endp bn_div_words#
1555#endif
diff --git a/src/lib/libcrypto/bn/asm/mips3-mont.pl b/src/lib/libcrypto/bn/asm/mips3-mont.pl
deleted file mode 100644
index 8f9156e02a..0000000000
--- a/src/lib/libcrypto/bn/asm/mips3-mont.pl
+++ /dev/null
@@ -1,327 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# This module doesn't present direct interest for OpenSSL, because it
11# doesn't provide better performance for longer keys. While 512-bit
12# RSA private key operations are 40% faster, 1024-bit ones are hardly
13# faster at all, while longer key operations are slower by up to 20%.
14# It might be of interest to embedded system developers though, as
15# it's smaller than 1KB, yet offers ~3x improvement over compiler
16# generated code.
17#
18# The module targets N32 and N64 MIPS ABIs and currently is a bit
19# IRIX-centric, i.e. is likely to require adaptation for other OSes.
20
21# int bn_mul_mont(
22$rp="a0"; # BN_ULONG *rp,
23$ap="a1"; # const BN_ULONG *ap,
24$bp="a2"; # const BN_ULONG *bp,
25$np="a3"; # const BN_ULONG *np,
26$n0="a4"; # const BN_ULONG *n0,
27$num="a5"; # int num);
28
29$lo0="a6";
30$hi0="a7";
31$lo1="v0";
32$hi1="v1";
33$aj="t0";
34$bi="t1";
35$nj="t2";
36$tp="t3";
37$alo="s0";
38$ahi="s1";
39$nlo="s2";
40$nhi="s3";
41$tj="s4";
42$i="s5";
43$j="s6";
44$fp="t8";
45$m1="t9";
46
47$FRAME=8*(2+8);
48
49$code=<<___;
50#include <asm.h>
51#include <regdef.h>
52
53.text
54
55.set noat
56.set reorder
57
58.align 5
59.globl bn_mul_mont
60.ent bn_mul_mont
61bn_mul_mont:
62 .set noreorder
63 PTR_SUB sp,64
64 move $fp,sp
65 .frame $fp,64,ra
66 slt AT,$num,4
67 li v0,0
68 beqzl AT,.Lproceed
69 nop
70 jr ra
71 PTR_ADD sp,$fp,64
72 .set reorder
73.align 5
74.Lproceed:
75 ld $n0,0($n0)
76 ld $bi,0($bp) # bp[0]
77 ld $aj,0($ap) # ap[0]
78 ld $nj,0($np) # np[0]
79 PTR_SUB sp,16 # place for two extra words
80 sll $num,3
81 li AT,-4096
82 PTR_SUB sp,$num
83 and sp,AT
84
85 sd s0,0($fp)
86 sd s1,8($fp)
87 sd s2,16($fp)
88 sd s3,24($fp)
89 sd s4,32($fp)
90 sd s5,40($fp)
91 sd s6,48($fp)
92 sd s7,56($fp)
93
94 dmultu $aj,$bi
95 ld $alo,8($ap)
96 ld $nlo,8($np)
97 mflo $lo0
98 mfhi $hi0
99 dmultu $lo0,$n0
100 mflo $m1
101
102 dmultu $alo,$bi
103 mflo $alo
104 mfhi $ahi
105
106 dmultu $nj,$m1
107 mflo $lo1
108 mfhi $hi1
109 dmultu $nlo,$m1
110 daddu $lo1,$lo0
111 sltu AT,$lo1,$lo0
112 daddu $hi1,AT
113 mflo $nlo
114 mfhi $nhi
115
116 move $tp,sp
117 li $j,16
118.align 4
119.L1st:
120 .set noreorder
121 PTR_ADD $aj,$ap,$j
122 ld $aj,($aj)
123 PTR_ADD $nj,$np,$j
124 ld $nj,($nj)
125
126 dmultu $aj,$bi
127 daddu $lo0,$alo,$hi0
128 daddu $lo1,$nlo,$hi1
129 sltu AT,$lo0,$hi0
130 sltu s7,$lo1,$hi1
131 daddu $hi0,$ahi,AT
132 daddu $hi1,$nhi,s7
133 mflo $alo
134 mfhi $ahi
135
136 daddu $lo1,$lo0
137 sltu AT,$lo1,$lo0
138 dmultu $nj,$m1
139 daddu $hi1,AT
140 addu $j,8
141 sd $lo1,($tp)
142 sltu s7,$j,$num
143 mflo $nlo
144 mfhi $nhi
145
146 bnez s7,.L1st
147 PTR_ADD $tp,8
148 .set reorder
149
150 daddu $lo0,$alo,$hi0
151 sltu AT,$lo0,$hi0
152 daddu $hi0,$ahi,AT
153
154 daddu $lo1,$nlo,$hi1
155 sltu s7,$lo1,$hi1
156 daddu $hi1,$nhi,s7
157 daddu $lo1,$lo0
158 sltu AT,$lo1,$lo0
159 daddu $hi1,AT
160
161 sd $lo1,($tp)
162
163 daddu $hi1,$hi0
164 sltu AT,$hi1,$hi0
165 sd $hi1,8($tp)
166 sd AT,16($tp)
167
168 li $i,8
169.align 4
170.Louter:
171 PTR_ADD $bi,$bp,$i
172 ld $bi,($bi)
173 ld $aj,($ap)
174 ld $alo,8($ap)
175 ld $tj,(sp)
176
177 dmultu $aj,$bi
178 ld $nj,($np)
179 ld $nlo,8($np)
180 mflo $lo0
181 mfhi $hi0
182 daddu $lo0,$tj
183 dmultu $lo0,$n0
184 sltu AT,$lo0,$tj
185 daddu $hi0,AT
186 mflo $m1
187
188 dmultu $alo,$bi
189 mflo $alo
190 mfhi $ahi
191
192 dmultu $nj,$m1
193 mflo $lo1
194 mfhi $hi1
195
196 dmultu $nlo,$m1
197 daddu $lo1,$lo0
198 sltu AT,$lo1,$lo0
199 daddu $hi1,AT
200 mflo $nlo
201 mfhi $nhi
202
203 move $tp,sp
204 li $j,16
205 ld $tj,8($tp)
206.align 4
207.Linner:
208 .set noreorder
209 PTR_ADD $aj,$ap,$j
210 ld $aj,($aj)
211 PTR_ADD $nj,$np,$j
212 ld $nj,($nj)
213
214 dmultu $aj,$bi
215 daddu $lo0,$alo,$hi0
216 daddu $lo1,$nlo,$hi1
217 sltu AT,$lo0,$hi0
218 sltu s7,$lo1,$hi1
219 daddu $hi0,$ahi,AT
220 daddu $hi1,$nhi,s7
221 mflo $alo
222 mfhi $ahi
223
224 daddu $lo0,$tj
225 addu $j,8
226 dmultu $nj,$m1
227 sltu AT,$lo0,$tj
228 daddu $lo1,$lo0
229 daddu $hi0,AT
230 sltu s7,$lo1,$lo0
231 ld $tj,16($tp)
232 daddu $hi1,s7
233 sltu AT,$j,$num
234 mflo $nlo
235 mfhi $nhi
236 sd $lo1,($tp)
237 bnez AT,.Linner
238 PTR_ADD $tp,8
239 .set reorder
240
241 daddu $lo0,$alo,$hi0
242 sltu AT,$lo0,$hi0
243 daddu $hi0,$ahi,AT
244 daddu $lo0,$tj
245 sltu s7,$lo0,$tj
246 daddu $hi0,s7
247
248 ld $tj,16($tp)
249 daddu $lo1,$nlo,$hi1
250 sltu AT,$lo1,$hi1
251 daddu $hi1,$nhi,AT
252 daddu $lo1,$lo0
253 sltu s7,$lo1,$lo0
254 daddu $hi1,s7
255 sd $lo1,($tp)
256
257 daddu $lo1,$hi1,$hi0
258 sltu $hi1,$lo1,$hi0
259 daddu $lo1,$tj
260 sltu AT,$lo1,$tj
261 daddu $hi1,AT
262 sd $lo1,8($tp)
263 sd $hi1,16($tp)
264
265 addu $i,8
266 sltu s7,$i,$num
267 bnez s7,.Louter
268
269 .set noreorder
270 PTR_ADD $tj,sp,$num # &tp[num]
271 move $tp,sp
272 move $ap,sp
273 li $hi0,0 # clear borrow bit
274
275.align 4
276.Lsub: ld $lo0,($tp)
277 ld $lo1,($np)
278 PTR_ADD $tp,8
279 PTR_ADD $np,8
280 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i]
281 sgtu AT,$lo1,$lo0
282 dsubu $lo0,$lo1,$hi0
283 sgtu $hi0,$lo0,$lo1
284 sd $lo0,($rp)
285 or $hi0,AT
286 sltu AT,$tp,$tj
287 bnez AT,.Lsub
288 PTR_ADD $rp,8
289
290 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit
291 move $tp,sp
292 PTR_SUB $rp,$num # restore rp
293 not $hi1,$hi0
294
295 and $ap,$hi0,sp
296 and $bp,$hi1,$rp
297 or $ap,$ap,$bp # ap=borrow?tp:rp
298
299.align 4
300.Lcopy: ld $aj,($ap)
301 PTR_ADD $ap,8
302 PTR_ADD $tp,8
303 sd zero,-8($tp)
304 sltu AT,$tp,$tj
305 sd $aj,($rp)
306 bnez AT,.Lcopy
307 PTR_ADD $rp,8
308
309 ld s0,0($fp)
310 ld s1,8($fp)
311 ld s2,16($fp)
312 ld s3,24($fp)
313 ld s4,32($fp)
314 ld s5,40($fp)
315 ld s6,48($fp)
316 ld s7,56($fp)
317 li v0,1
318 jr ra
319 PTR_ADD sp,$fp,64
320 .set reorder
321END(bn_mul_mont)
322.rdata
323.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>"
324___
325
326print $code;
327close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s
deleted file mode 100644
index dca4105c7d..0000000000
--- a/src/lib/libcrypto/bn/asm/mips3.s
+++ /dev/null
@@ -1,2201 +0,0 @@
1.rdata
2.asciiz "mips3.s, Version 1.1"
3.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4
5/*
6 * ====================================================================
7 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
8 * project.
9 *
10 * Rights for redistribution and usage in source and binary forms are
11 * granted according to the OpenSSL license. Warranty of any kind is
12 * disclaimed.
13 * ====================================================================
14 */
15
16/*
17 * This is my modest contributon to the OpenSSL project (see
18 * http://www.openssl.org/ for more information about it) and is
19 * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
20 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 *
22 * The module is designed to work with either of the "new" MIPS ABI(5),
23 * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
24 * IRIX 5.x not only because it doesn't support new ABIs but also
25 * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
26 * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
27 * cause illegal instruction exception:-(
28 *
29 * In addition the code depends on preprocessor flags set up by MIPSpro
30 * compiler driver (either as or cc) and therefore (probably?) can't be
31 * compiled by the GNU assembler. GNU C driver manages fine though...
32 * I mean as long as -mmips-as is specified or is the default option,
33 * because then it simply invokes /usr/bin/as which in turn takes
34 * perfect care of the preprocessor definitions. Another neat feature
35 * offered by the MIPSpro assembler is an optimization pass. This gave
36 * me the opportunity to have the code looking more regular as all those
37 * architecture dependent instruction rescheduling details were left to
38 * the assembler. Cool, huh?
39 *
40 * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
41 * goes way over 3 times faster!
42 *
43 * <appro@fy.chalmers.se>
44 */
45#include <asm.h>
46#include <regdef.h>
47
48#if _MIPS_ISA>=4
49#define MOVNZ(cond,dst,src) \
50 movn dst,src,cond
51#else
52#define MOVNZ(cond,dst,src) \
53 .set noreorder; \
54 bnezl cond,.+8; \
55 move dst,src; \
56 .set reorder
57#endif
58
59.text
60
61.set noat
62.set reorder
63
64#define MINUS4 v1
65
66.align 5
67LEAF(bn_mul_add_words)
68 .set noreorder
69 bgtzl a2,.L_bn_mul_add_words_proceed
70 ld t0,0(a1)
71 jr ra
72 move v0,zero
73 .set reorder
74
75.L_bn_mul_add_words_proceed:
76 li MINUS4,-4
77 and ta0,a2,MINUS4
78 move v0,zero
79 beqz ta0,.L_bn_mul_add_words_tail
80
81.L_bn_mul_add_words_loop:
82 dmultu t0,a3
83 ld t1,0(a0)
84 ld t2,8(a1)
85 ld t3,8(a0)
86 ld ta0,16(a1)
87 ld ta1,16(a0)
88 daddu t1,v0
89 sltu v0,t1,v0 /* All manuals say it "compares 32-bit
90 * values", but it seems to work fine
91 * even on 64-bit registers. */
92 mflo AT
93 mfhi t0
94 daddu t1,AT
95 daddu v0,t0
96 sltu AT,t1,AT
97 sd t1,0(a0)
98 daddu v0,AT
99
100 dmultu t2,a3
101 ld ta2,24(a1)
102 ld ta3,24(a0)
103 daddu t3,v0
104 sltu v0,t3,v0
105 mflo AT
106 mfhi t2
107 daddu t3,AT
108 daddu v0,t2
109 sltu AT,t3,AT
110 sd t3,8(a0)
111 daddu v0,AT
112
113 dmultu ta0,a3
114 subu a2,4
115 PTR_ADD a0,32
116 PTR_ADD a1,32
117 daddu ta1,v0
118 sltu v0,ta1,v0
119 mflo AT
120 mfhi ta0
121 daddu ta1,AT
122 daddu v0,ta0
123 sltu AT,ta1,AT
124 sd ta1,-16(a0)
125 daddu v0,AT
126
127
128 dmultu ta2,a3
129 and ta0,a2,MINUS4
130 daddu ta3,v0
131 sltu v0,ta3,v0
132 mflo AT
133 mfhi ta2
134 daddu ta3,AT
135 daddu v0,ta2
136 sltu AT,ta3,AT
137 sd ta3,-8(a0)
138 daddu v0,AT
139 .set noreorder
140 bgtzl ta0,.L_bn_mul_add_words_loop
141 ld t0,0(a1)
142
143 bnezl a2,.L_bn_mul_add_words_tail
144 ld t0,0(a1)
145 .set reorder
146
147.L_bn_mul_add_words_return:
148 jr ra
149
150.L_bn_mul_add_words_tail:
151 dmultu t0,a3
152 ld t1,0(a0)
153 subu a2,1
154 daddu t1,v0
155 sltu v0,t1,v0
156 mflo AT
157 mfhi t0
158 daddu t1,AT
159 daddu v0,t0
160 sltu AT,t1,AT
161 sd t1,0(a0)
162 daddu v0,AT
163 beqz a2,.L_bn_mul_add_words_return
164
165 ld t0,8(a1)
166 dmultu t0,a3
167 ld t1,8(a0)
168 subu a2,1
169 daddu t1,v0
170 sltu v0,t1,v0
171 mflo AT
172 mfhi t0
173 daddu t1,AT
174 daddu v0,t0
175 sltu AT,t1,AT
176 sd t1,8(a0)
177 daddu v0,AT
178 beqz a2,.L_bn_mul_add_words_return
179
180 ld t0,16(a1)
181 dmultu t0,a3
182 ld t1,16(a0)
183 daddu t1,v0
184 sltu v0,t1,v0
185 mflo AT
186 mfhi t0
187 daddu t1,AT
188 daddu v0,t0
189 sltu AT,t1,AT
190 sd t1,16(a0)
191 daddu v0,AT
192 jr ra
193END(bn_mul_add_words)
194
195.align 5
196LEAF(bn_mul_words)
197 .set noreorder
198 bgtzl a2,.L_bn_mul_words_proceed
199 ld t0,0(a1)
200 jr ra
201 move v0,zero
202 .set reorder
203
204.L_bn_mul_words_proceed:
205 li MINUS4,-4
206 and ta0,a2,MINUS4
207 move v0,zero
208 beqz ta0,.L_bn_mul_words_tail
209
210.L_bn_mul_words_loop:
211 dmultu t0,a3
212 ld t2,8(a1)
213 ld ta0,16(a1)
214 ld ta2,24(a1)
215 mflo AT
216 mfhi t0
217 daddu v0,AT
218 sltu t1,v0,AT
219 sd v0,0(a0)
220 daddu v0,t1,t0
221
222 dmultu t2,a3
223 subu a2,4
224 PTR_ADD a0,32
225 PTR_ADD a1,32
226 mflo AT
227 mfhi t2
228 daddu v0,AT
229 sltu t3,v0,AT
230 sd v0,-24(a0)
231 daddu v0,t3,t2
232
233 dmultu ta0,a3
234 mflo AT
235 mfhi ta0
236 daddu v0,AT
237 sltu ta1,v0,AT
238 sd v0,-16(a0)
239 daddu v0,ta1,ta0
240
241
242 dmultu ta2,a3
243 and ta0,a2,MINUS4
244 mflo AT
245 mfhi ta2
246 daddu v0,AT
247 sltu ta3,v0,AT
248 sd v0,-8(a0)
249 daddu v0,ta3,ta2
250 .set noreorder
251 bgtzl ta0,.L_bn_mul_words_loop
252 ld t0,0(a1)
253
254 bnezl a2,.L_bn_mul_words_tail
255 ld t0,0(a1)
256 .set reorder
257
258.L_bn_mul_words_return:
259 jr ra
260
261.L_bn_mul_words_tail:
262 dmultu t0,a3
263 subu a2,1
264 mflo AT
265 mfhi t0
266 daddu v0,AT
267 sltu t1,v0,AT
268 sd v0,0(a0)
269 daddu v0,t1,t0
270 beqz a2,.L_bn_mul_words_return
271
272 ld t0,8(a1)
273 dmultu t0,a3
274 subu a2,1
275 mflo AT
276 mfhi t0
277 daddu v0,AT
278 sltu t1,v0,AT
279 sd v0,8(a0)
280 daddu v0,t1,t0
281 beqz a2,.L_bn_mul_words_return
282
283 ld t0,16(a1)
284 dmultu t0,a3
285 mflo AT
286 mfhi t0
287 daddu v0,AT
288 sltu t1,v0,AT
289 sd v0,16(a0)
290 daddu v0,t1,t0
291 jr ra
292END(bn_mul_words)
293
294.align 5
295LEAF(bn_sqr_words)
296 .set noreorder
297 bgtzl a2,.L_bn_sqr_words_proceed
298 ld t0,0(a1)
299 jr ra
300 move v0,zero
301 .set reorder
302
303.L_bn_sqr_words_proceed:
304 li MINUS4,-4
305 and ta0,a2,MINUS4
306 move v0,zero
307 beqz ta0,.L_bn_sqr_words_tail
308
309.L_bn_sqr_words_loop:
310 dmultu t0,t0
311 ld t2,8(a1)
312 ld ta0,16(a1)
313 ld ta2,24(a1)
314 mflo t1
315 mfhi t0
316 sd t1,0(a0)
317 sd t0,8(a0)
318
319 dmultu t2,t2
320 subu a2,4
321 PTR_ADD a0,64
322 PTR_ADD a1,32
323 mflo t3
324 mfhi t2
325 sd t3,-48(a0)
326 sd t2,-40(a0)
327
328 dmultu ta0,ta0
329 mflo ta1
330 mfhi ta0
331 sd ta1,-32(a0)
332 sd ta0,-24(a0)
333
334
335 dmultu ta2,ta2
336 and ta0,a2,MINUS4
337 mflo ta3
338 mfhi ta2
339 sd ta3,-16(a0)
340 sd ta2,-8(a0)
341
342 .set noreorder
343 bgtzl ta0,.L_bn_sqr_words_loop
344 ld t0,0(a1)
345
346 bnezl a2,.L_bn_sqr_words_tail
347 ld t0,0(a1)
348 .set reorder
349
350.L_bn_sqr_words_return:
351 move v0,zero
352 jr ra
353
354.L_bn_sqr_words_tail:
355 dmultu t0,t0
356 subu a2,1
357 mflo t1
358 mfhi t0
359 sd t1,0(a0)
360 sd t0,8(a0)
361 beqz a2,.L_bn_sqr_words_return
362
363 ld t0,8(a1)
364 dmultu t0,t0
365 subu a2,1
366 mflo t1
367 mfhi t0
368 sd t1,16(a0)
369 sd t0,24(a0)
370 beqz a2,.L_bn_sqr_words_return
371
372 ld t0,16(a1)
373 dmultu t0,t0
374 mflo t1
375 mfhi t0
376 sd t1,32(a0)
377 sd t0,40(a0)
378 jr ra
379END(bn_sqr_words)
380
381.align 5
382LEAF(bn_add_words)
383 .set noreorder
384 bgtzl a3,.L_bn_add_words_proceed
385 ld t0,0(a1)
386 jr ra
387 move v0,zero
388 .set reorder
389
390.L_bn_add_words_proceed:
391 li MINUS4,-4
392 and AT,a3,MINUS4
393 move v0,zero
394 beqz AT,.L_bn_add_words_tail
395
396.L_bn_add_words_loop:
397 ld ta0,0(a2)
398 subu a3,4
399 ld t1,8(a1)
400 and AT,a3,MINUS4
401 ld t2,16(a1)
402 PTR_ADD a2,32
403 ld t3,24(a1)
404 PTR_ADD a0,32
405 ld ta1,-24(a2)
406 PTR_ADD a1,32
407 ld ta2,-16(a2)
408 ld ta3,-8(a2)
409 daddu ta0,t0
410 sltu t8,ta0,t0
411 daddu t0,ta0,v0
412 sltu v0,t0,ta0
413 sd t0,-32(a0)
414 daddu v0,t8
415
416 daddu ta1,t1
417 sltu t9,ta1,t1
418 daddu t1,ta1,v0
419 sltu v0,t1,ta1
420 sd t1,-24(a0)
421 daddu v0,t9
422
423 daddu ta2,t2
424 sltu t8,ta2,t2
425 daddu t2,ta2,v0
426 sltu v0,t2,ta2
427 sd t2,-16(a0)
428 daddu v0,t8
429
430 daddu ta3,t3
431 sltu t9,ta3,t3
432 daddu t3,ta3,v0
433 sltu v0,t3,ta3
434 sd t3,-8(a0)
435 daddu v0,t9
436
437 .set noreorder
438 bgtzl AT,.L_bn_add_words_loop
439 ld t0,0(a1)
440
441 bnezl a3,.L_bn_add_words_tail
442 ld t0,0(a1)
443 .set reorder
444
445.L_bn_add_words_return:
446 jr ra
447
448.L_bn_add_words_tail:
449 ld ta0,0(a2)
450 daddu ta0,t0
451 subu a3,1
452 sltu t8,ta0,t0
453 daddu t0,ta0,v0
454 sltu v0,t0,ta0
455 sd t0,0(a0)
456 daddu v0,t8
457 beqz a3,.L_bn_add_words_return
458
459 ld t1,8(a1)
460 ld ta1,8(a2)
461 daddu ta1,t1
462 subu a3,1
463 sltu t9,ta1,t1
464 daddu t1,ta1,v0
465 sltu v0,t1,ta1
466 sd t1,8(a0)
467 daddu v0,t9
468 beqz a3,.L_bn_add_words_return
469
470 ld t2,16(a1)
471 ld ta2,16(a2)
472 daddu ta2,t2
473 sltu t8,ta2,t2
474 daddu t2,ta2,v0
475 sltu v0,t2,ta2
476 sd t2,16(a0)
477 daddu v0,t8
478 jr ra
479END(bn_add_words)
480
481.align 5
482LEAF(bn_sub_words)
483 .set noreorder
484 bgtzl a3,.L_bn_sub_words_proceed
485 ld t0,0(a1)
486 jr ra
487 move v0,zero
488 .set reorder
489
490.L_bn_sub_words_proceed:
491 li MINUS4,-4
492 and AT,a3,MINUS4
493 move v0,zero
494 beqz AT,.L_bn_sub_words_tail
495
496.L_bn_sub_words_loop:
497 ld ta0,0(a2)
498 subu a3,4
499 ld t1,8(a1)
500 and AT,a3,MINUS4
501 ld t2,16(a1)
502 PTR_ADD a2,32
503 ld t3,24(a1)
504 PTR_ADD a0,32
505 ld ta1,-24(a2)
506 PTR_ADD a1,32
507 ld ta2,-16(a2)
508 ld ta3,-8(a2)
509 sltu t8,t0,ta0
510 dsubu t0,ta0
511 dsubu ta0,t0,v0
512 sd ta0,-32(a0)
513 MOVNZ (t0,v0,t8)
514
515 sltu t9,t1,ta1
516 dsubu t1,ta1
517 dsubu ta1,t1,v0
518 sd ta1,-24(a0)
519 MOVNZ (t1,v0,t9)
520
521
522 sltu t8,t2,ta2
523 dsubu t2,ta2
524 dsubu ta2,t2,v0
525 sd ta2,-16(a0)
526 MOVNZ (t2,v0,t8)
527
528 sltu t9,t3,ta3
529 dsubu t3,ta3
530 dsubu ta3,t3,v0
531 sd ta3,-8(a0)
532 MOVNZ (t3,v0,t9)
533
534 .set noreorder
535 bgtzl AT,.L_bn_sub_words_loop
536 ld t0,0(a1)
537
538 bnezl a3,.L_bn_sub_words_tail
539 ld t0,0(a1)
540 .set reorder
541
542.L_bn_sub_words_return:
543 jr ra
544
545.L_bn_sub_words_tail:
546 ld ta0,0(a2)
547 subu a3,1
548 sltu t8,t0,ta0
549 dsubu t0,ta0
550 dsubu ta0,t0,v0
551 MOVNZ (t0,v0,t8)
552 sd ta0,0(a0)
553 beqz a3,.L_bn_sub_words_return
554
555 ld t1,8(a1)
556 subu a3,1
557 ld ta1,8(a2)
558 sltu t9,t1,ta1
559 dsubu t1,ta1
560 dsubu ta1,t1,v0
561 MOVNZ (t1,v0,t9)
562 sd ta1,8(a0)
563 beqz a3,.L_bn_sub_words_return
564
565 ld t2,16(a1)
566 ld ta2,16(a2)
567 sltu t8,t2,ta2
568 dsubu t2,ta2
569 dsubu ta2,t2,v0
570 MOVNZ (t2,v0,t8)
571 sd ta2,16(a0)
572 jr ra
573END(bn_sub_words)
574
575#undef MINUS4
576
577.align 5
578LEAF(bn_div_3_words)
579 .set reorder
580 move a3,a0 /* we know that bn_div_words doesn't
581 * touch a3, ta2, ta3 and preserves a2
582 * so that we can save two arguments
583 * and return address in registers
584 * instead of stack:-)
585 */
586 ld a0,(a3)
587 move ta2,a1
588 ld a1,-8(a3)
589 bne a0,a2,.L_bn_div_3_words_proceed
590 li v0,-1
591 jr ra
592.L_bn_div_3_words_proceed:
593 move ta3,ra
594 bal bn_div_words
595 move ra,ta3
596 dmultu ta2,v0
597 ld t2,-16(a3)
598 move ta0,zero
599 mfhi t1
600 mflo t0
601 sltu t8,t1,v1
602.L_bn_div_3_words_inner_loop:
603 bnez t8,.L_bn_div_3_words_inner_loop_done
604 sgeu AT,t2,t0
605 seq t9,t1,v1
606 and AT,t9
607 sltu t3,t0,ta2
608 daddu v1,a2
609 dsubu t1,t3
610 dsubu t0,ta2
611 sltu t8,t1,v1
612 sltu ta0,v1,a2
613 or t8,ta0
614 .set noreorder
615 beqzl AT,.L_bn_div_3_words_inner_loop
616 dsubu v0,1
617 .set reorder
618.L_bn_div_3_words_inner_loop_done:
619 jr ra
620END(bn_div_3_words)
621
622.align 5
623LEAF(bn_div_words)
624 .set noreorder
625 bnezl a2,.L_bn_div_words_proceed
626 move v1,zero
627 jr ra
628 li v0,-1 /* I'd rather signal div-by-zero
629 * which can be done with 'break 7' */
630
631.L_bn_div_words_proceed:
632 bltz a2,.L_bn_div_words_body
633 move t9,v1
634 dsll a2,1
635 bgtz a2,.-4
636 addu t9,1
637
638 .set reorder
639 negu t1,t9
640 li t2,-1
641 dsll t2,t1
642 and t2,a0
643 dsrl AT,a1,t1
644 .set noreorder
645 bnezl t2,.+8
646 break 6 /* signal overflow */
647 .set reorder
648 dsll a0,t9
649 dsll a1,t9
650 or a0,AT
651
652#define QT ta0
653#define HH ta1
654#define DH v1
655.L_bn_div_words_body:
656 dsrl DH,a2,32
657 sgeu AT,a0,a2
658 .set noreorder
659 bnezl AT,.+8
660 dsubu a0,a2
661 .set reorder
662
663 li QT,-1
664 dsrl HH,a0,32
665 dsrl QT,32 /* q=0xffffffff */
666 beq DH,HH,.L_bn_div_words_skip_div1
667 ddivu zero,a0,DH
668 mflo QT
669.L_bn_div_words_skip_div1:
670 dmultu a2,QT
671 dsll t3,a0,32
672 dsrl AT,a1,32
673 or t3,AT
674 mflo t0
675 mfhi t1
676.L_bn_div_words_inner_loop1:
677 sltu t2,t3,t0
678 seq t8,HH,t1
679 sltu AT,HH,t1
680 and t2,t8
681 sltu v0,t0,a2
682 or AT,t2
683 .set noreorder
684 beqz AT,.L_bn_div_words_inner_loop1_done
685 dsubu t1,v0
686 dsubu t0,a2
687 b .L_bn_div_words_inner_loop1
688 dsubu QT,1
689 .set reorder
690.L_bn_div_words_inner_loop1_done:
691
692 dsll a1,32
693 dsubu a0,t3,t0
694 dsll v0,QT,32
695
696 li QT,-1
697 dsrl HH,a0,32
698 dsrl QT,32 /* q=0xffffffff */
699 beq DH,HH,.L_bn_div_words_skip_div2
700 ddivu zero,a0,DH
701 mflo QT
702.L_bn_div_words_skip_div2:
703#undef DH
704 dmultu a2,QT
705 dsll t3,a0,32
706 dsrl AT,a1,32
707 or t3,AT
708 mflo t0
709 mfhi t1
710.L_bn_div_words_inner_loop2:
711 sltu t2,t3,t0
712 seq t8,HH,t1
713 sltu AT,HH,t1
714 and t2,t8
715 sltu v1,t0,a2
716 or AT,t2
717 .set noreorder
718 beqz AT,.L_bn_div_words_inner_loop2_done
719 dsubu t1,v1
720 dsubu t0,a2
721 b .L_bn_div_words_inner_loop2
722 dsubu QT,1
723 .set reorder
724.L_bn_div_words_inner_loop2_done:
725#undef HH
726
727 dsubu a0,t3,t0
728 or v0,QT
729 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
730 dsrl a2,t9 /* restore a2 */
731 jr ra
732#undef QT
733END(bn_div_words)
734
735#define a_0 t0
736#define a_1 t1
737#define a_2 t2
738#define a_3 t3
739#define b_0 ta0
740#define b_1 ta1
741#define b_2 ta2
742#define b_3 ta3
743
744#define a_4 s0
745#define a_5 s2
746#define a_6 s4
747#define a_7 a1 /* once we load a[7] we don't need a anymore */
748#define b_4 s1
749#define b_5 s3
750#define b_6 s5
751#define b_7 a2 /* once we load b[7] we don't need b anymore */
752
753#define t_1 t8
754#define t_2 t9
755
756#define c_1 v0
757#define c_2 v1
758#define c_3 a3
759
760#define FRAME_SIZE 48
761
762.align 5
763LEAF(bn_mul_comba8)
764 .set noreorder
765 PTR_SUB sp,FRAME_SIZE
766 .frame sp,64,ra
767 .set reorder
768 ld a_0,0(a1) /* If compiled with -mips3 option on
769 * R5000 box assembler barks on this
770 * line with "shouldn't have mult/div
771 * as last instruction in bb (R10K
772 * bug)" warning. If anybody out there
773 * has a clue about how to circumvent
774 * this do send me a note.
775 * <appro@fy.chalmers.se>
776 */
777 ld b_0,0(a2)
778 ld a_1,8(a1)
779 ld a_2,16(a1)
780 ld a_3,24(a1)
781 ld b_1,8(a2)
782 ld b_2,16(a2)
783 ld b_3,24(a2)
784 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
785 sd s0,0(sp)
786 sd s1,8(sp)
787 sd s2,16(sp)
788 sd s3,24(sp)
789 sd s4,32(sp)
790 sd s5,40(sp)
791 mflo c_1
792 mfhi c_2
793
794 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
795 ld a_4,32(a1)
796 ld a_5,40(a1)
797 ld a_6,48(a1)
798 ld a_7,56(a1)
799 ld b_4,32(a2)
800 ld b_5,40(a2)
801 mflo t_1
802 mfhi t_2
803 daddu c_2,t_1
804 sltu AT,c_2,t_1
805 daddu c_3,t_2,AT
806 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
807 ld b_6,48(a2)
808 ld b_7,56(a2)
809 sd c_1,0(a0) /* r[0]=c1; */
810 mflo t_1
811 mfhi t_2
812 daddu c_2,t_1
813 sltu AT,c_2,t_1
814 daddu t_2,AT
815 daddu c_3,t_2
816 sltu c_1,c_3,t_2
817 sd c_2,8(a0) /* r[1]=c2; */
818
819 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
820 mflo t_1
821 mfhi t_2
822 daddu c_3,t_1
823 sltu AT,c_3,t_1
824 daddu t_2,AT
825 daddu c_1,t_2
826 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
827 mflo t_1
828 mfhi t_2
829 daddu c_3,t_1
830 sltu AT,c_3,t_1
831 daddu t_2,AT
832 daddu c_1,t_2
833 sltu c_2,c_1,t_2
834 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
835 mflo t_1
836 mfhi t_2
837 daddu c_3,t_1
838 sltu AT,c_3,t_1
839 daddu t_2,AT
840 daddu c_1,t_2
841 sltu AT,c_1,t_2
842 daddu c_2,AT
843 sd c_3,16(a0) /* r[2]=c3; */
844
845 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
846 mflo t_1
847 mfhi t_2
848 daddu c_1,t_1
849 sltu AT,c_1,t_1
850 daddu t_2,AT
851 daddu c_2,t_2
852 sltu c_3,c_2,t_2
853 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
854 mflo t_1
855 mfhi t_2
856 daddu c_1,t_1
857 sltu AT,c_1,t_1
858 daddu t_2,AT
859 daddu c_2,t_2
860 sltu AT,c_2,t_2
861 daddu c_3,AT
862 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
863 mflo t_1
864 mfhi t_2
865 daddu c_1,t_1
866 sltu AT,c_1,t_1
867 daddu t_2,AT
868 daddu c_2,t_2
869 sltu AT,c_2,t_2
870 daddu c_3,AT
871 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
872 mflo t_1
873 mfhi t_2
874 daddu c_1,t_1
875 sltu AT,c_1,t_1
876 daddu t_2,AT
877 daddu c_2,t_2
878 sltu AT,c_2,t_2
879 daddu c_3,AT
880 sd c_1,24(a0) /* r[3]=c1; */
881
882 dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
883 mflo t_1
884 mfhi t_2
885 daddu c_2,t_1
886 sltu AT,c_2,t_1
887 daddu t_2,AT
888 daddu c_3,t_2
889 sltu c_1,c_3,t_2
890 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
891 mflo t_1
892 mfhi t_2
893 daddu c_2,t_1
894 sltu AT,c_2,t_1
895 daddu t_2,AT
896 daddu c_3,t_2
897 sltu AT,c_3,t_2
898 daddu c_1,AT
899 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
900 mflo t_1
901 mfhi t_2
902 daddu c_2,t_1
903 sltu AT,c_2,t_1
904 daddu t_2,AT
905 daddu c_3,t_2
906 sltu AT,c_3,t_2
907 daddu c_1,AT
908 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
909 mflo t_1
910 mfhi t_2
911 daddu c_2,t_1
912 sltu AT,c_2,t_1
913 daddu t_2,AT
914 daddu c_3,t_2
915 sltu AT,c_3,t_2
916 daddu c_1,AT
917 dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
918 mflo t_1
919 mfhi t_2
920 daddu c_2,t_1
921 sltu AT,c_2,t_1
922 daddu t_2,AT
923 daddu c_3,t_2
924 sltu AT,c_3,t_2
925 daddu c_1,AT
926 sd c_2,32(a0) /* r[4]=c2; */
927
928 dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
929 mflo t_1
930 mfhi t_2
931 daddu c_3,t_1
932 sltu AT,c_3,t_1
933 daddu t_2,AT
934 daddu c_1,t_2
935 sltu c_2,c_1,t_2
936 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
937 mflo t_1
938 mfhi t_2
939 daddu c_3,t_1
940 sltu AT,c_3,t_1
941 daddu t_2,AT
942 daddu c_1,t_2
943 sltu AT,c_1,t_2
944 daddu c_2,AT
945 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
946 mflo t_1
947 mfhi t_2
948 daddu c_3,t_1
949 sltu AT,c_3,t_1
950 daddu t_2,AT
951 daddu c_1,t_2
952 sltu AT,c_1,t_2
953 daddu c_2,AT
954 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
955 mflo t_1
956 mfhi t_2
957 daddu c_3,t_1
958 sltu AT,c_3,t_1
959 daddu t_2,AT
960 daddu c_1,t_2
961 sltu AT,c_1,t_2
962 daddu c_2,AT
963 dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
964 mflo t_1
965 mfhi t_2
966 daddu c_3,t_1
967 sltu AT,c_3,t_1
968 daddu t_2,AT
969 daddu c_1,t_2
970 sltu AT,c_1,t_2
971 daddu c_2,AT
972 dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
973 mflo t_1
974 mfhi t_2
975 daddu c_3,t_1
976 sltu AT,c_3,t_1
977 daddu t_2,AT
978 daddu c_1,t_2
979 sltu AT,c_1,t_2
980 daddu c_2,AT
981 sd c_3,40(a0) /* r[5]=c3; */
982
983 dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
984 mflo t_1
985 mfhi t_2
986 daddu c_1,t_1
987 sltu AT,c_1,t_1
988 daddu t_2,AT
989 daddu c_2,t_2
990 sltu c_3,c_2,t_2
991 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
992 mflo t_1
993 mfhi t_2
994 daddu c_1,t_1
995 sltu AT,c_1,t_1
996 daddu t_2,AT
997 daddu c_2,t_2
998 sltu AT,c_2,t_2
999 daddu c_3,AT
1000 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
1001 mflo t_1
1002 mfhi t_2
1003 daddu c_1,t_1
1004 sltu AT,c_1,t_1
1005 daddu t_2,AT
1006 daddu c_2,t_2
1007 sltu AT,c_2,t_2
1008 daddu c_3,AT
1009 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1010 mflo t_1
1011 mfhi t_2
1012 daddu c_1,t_1
1013 sltu AT,c_1,t_1
1014 daddu t_2,AT
1015 daddu c_2,t_2
1016 sltu AT,c_2,t_2
1017 daddu c_3,AT
1018 dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
1019 mflo t_1
1020 mfhi t_2
1021 daddu c_1,t_1
1022 sltu AT,c_1,t_1
1023 daddu t_2,AT
1024 daddu c_2,t_2
1025 sltu AT,c_2,t_2
1026 daddu c_3,AT
1027 dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
1028 mflo t_1
1029 mfhi t_2
1030 daddu c_1,t_1
1031 sltu AT,c_1,t_1
1032 daddu t_2,AT
1033 daddu c_2,t_2
1034 sltu AT,c_2,t_2
1035 daddu c_3,AT
1036 dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
1037 mflo t_1
1038 mfhi t_2
1039 daddu c_1,t_1
1040 sltu AT,c_1,t_1
1041 daddu t_2,AT
1042 daddu c_2,t_2
1043 sltu AT,c_2,t_2
1044 daddu c_3,AT
1045 sd c_1,48(a0) /* r[6]=c1; */
1046
1047 dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
1048 mflo t_1
1049 mfhi t_2
1050 daddu c_2,t_1
1051 sltu AT,c_2,t_1
1052 daddu t_2,AT
1053 daddu c_3,t_2
1054 sltu c_1,c_3,t_2
1055 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1056 mflo t_1
1057 mfhi t_2
1058 daddu c_2,t_1
1059 sltu AT,c_2,t_1
1060 daddu t_2,AT
1061 daddu c_3,t_2
1062 sltu AT,c_3,t_2
1063 daddu c_1,AT
1064 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1065 mflo t_1
1066 mfhi t_2
1067 daddu c_2,t_1
1068 sltu AT,c_2,t_1
1069 daddu t_2,AT
1070 daddu c_3,t_2
1071 sltu AT,c_3,t_2
1072 daddu c_1,AT
1073 dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
1074 mflo t_1
1075 mfhi t_2
1076 daddu c_2,t_1
1077 sltu AT,c_2,t_1
1078 daddu t_2,AT
1079 daddu c_3,t_2
1080 sltu AT,c_3,t_2
1081 daddu c_1,AT
1082 dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
1083 mflo t_1
1084 mfhi t_2
1085 daddu c_2,t_1
1086 sltu AT,c_2,t_1
1087 daddu t_2,AT
1088 daddu c_3,t_2
1089 sltu AT,c_3,t_2
1090 daddu c_1,AT
1091 dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
1092 mflo t_1
1093 mfhi t_2
1094 daddu c_2,t_1
1095 sltu AT,c_2,t_1
1096 daddu t_2,AT
1097 daddu c_3,t_2
1098 sltu AT,c_3,t_2
1099 daddu c_1,AT
1100 dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
1101 mflo t_1
1102 mfhi t_2
1103 daddu c_2,t_1
1104 sltu AT,c_2,t_1
1105 daddu t_2,AT
1106 daddu c_3,t_2
1107 sltu AT,c_3,t_2
1108 daddu c_1,AT
1109 dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
1110 mflo t_1
1111 mfhi t_2
1112 daddu c_2,t_1
1113 sltu AT,c_2,t_1
1114 daddu t_2,AT
1115 daddu c_3,t_2
1116 sltu AT,c_3,t_2
1117 daddu c_1,AT
1118 sd c_2,56(a0) /* r[7]=c2; */
1119
1120 dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
1121 mflo t_1
1122 mfhi t_2
1123 daddu c_3,t_1
1124 sltu AT,c_3,t_1
1125 daddu t_2,AT
1126 daddu c_1,t_2
1127 sltu c_2,c_1,t_2
1128 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1129 mflo t_1
1130 mfhi t_2
1131 daddu c_3,t_1
1132 sltu AT,c_3,t_1
1133 daddu t_2,AT
1134 daddu c_1,t_2
1135 sltu AT,c_1,t_2
1136 daddu c_2,AT
1137 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1138 mflo t_1
1139 mfhi t_2
1140 daddu c_3,t_1
1141 sltu AT,c_3,t_1
1142 daddu t_2,AT
1143 daddu c_1,t_2
1144 sltu AT,c_1,t_2
1145 daddu c_2,AT
1146 dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1147 mflo t_1
1148 mfhi t_2
1149 daddu c_3,t_1
1150 sltu AT,c_3,t_1
1151 daddu t_2,AT
1152 daddu c_1,t_2
1153 sltu AT,c_1,t_2
1154 daddu c_2,AT
1155 dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
1156 mflo t_1
1157 mfhi t_2
1158 daddu c_3,t_1
1159 sltu AT,c_3,t_1
1160 daddu t_2,AT
1161 daddu c_1,t_2
1162 sltu AT,c_1,t_2
1163 daddu c_2,AT
1164 dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
1165 mflo t_1
1166 mfhi t_2
1167 daddu c_3,t_1
1168 sltu AT,c_3,t_1
1169 daddu t_2,AT
1170 daddu c_1,t_2
1171 sltu AT,c_1,t_2
1172 daddu c_2,AT
1173 dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
1174 mflo t_1
1175 mfhi t_2
1176 daddu c_3,t_1
1177 sltu AT,c_3,t_1
1178 daddu t_2,AT
1179 daddu c_1,t_2
1180 sltu AT,c_1,t_2
1181 daddu c_2,AT
1182 sd c_3,64(a0) /* r[8]=c3; */
1183
1184 dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
1185 mflo t_1
1186 mfhi t_2
1187 daddu c_1,t_1
1188 sltu AT,c_1,t_1
1189 daddu t_2,AT
1190 daddu c_2,t_2
1191 sltu c_3,c_2,t_2
1192 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1193 mflo t_1
1194 mfhi t_2
1195 daddu c_1,t_1
1196 sltu AT,c_1,t_1
1197 daddu t_2,AT
1198 daddu c_2,t_2
1199 sltu AT,c_2,t_2
1200 daddu c_3,AT
1201 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1202 mflo t_1
1203 mfhi t_2
1204 daddu c_1,t_1
1205 sltu AT,c_1,t_1
1206 daddu t_2,AT
1207 daddu c_2,t_2
1208 sltu AT,c_2,t_2
1209 daddu c_3,AT
1210 dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
1211 mflo t_1
1212 mfhi t_2
1213 daddu c_1,t_1
1214 sltu AT,c_1,t_1
1215 daddu t_2,AT
1216 daddu c_2,t_2
1217 sltu AT,c_2,t_2
1218 daddu c_3,AT
1219 dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
1220 mflo t_1
1221 mfhi t_2
1222 daddu c_1,t_1
1223 sltu AT,c_1,t_1
1224 daddu t_2,AT
1225 daddu c_2,t_2
1226 sltu AT,c_2,t_2
1227 daddu c_3,AT
1228 dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
1229 mflo t_1
1230 mfhi t_2
1231 daddu c_1,t_1
1232 sltu AT,c_1,t_1
1233 daddu t_2,AT
1234 daddu c_2,t_2
1235 sltu AT,c_2,t_2
1236 daddu c_3,AT
1237 sd c_1,72(a0) /* r[9]=c1; */
1238
1239 dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
1240 mflo t_1
1241 mfhi t_2
1242 daddu c_2,t_1
1243 sltu AT,c_2,t_1
1244 daddu t_2,AT
1245 daddu c_3,t_2
1246 sltu c_1,c_3,t_2
1247 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1248 mflo t_1
1249 mfhi t_2
1250 daddu c_2,t_1
1251 sltu AT,c_2,t_1
1252 daddu t_2,AT
1253 daddu c_3,t_2
1254 sltu AT,c_3,t_2
1255 daddu c_1,AT
1256 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1257 mflo t_1
1258 mfhi t_2
1259 daddu c_2,t_1
1260 sltu AT,c_2,t_1
1261 daddu t_2,AT
1262 daddu c_3,t_2
1263 sltu AT,c_3,t_2
1264 daddu c_1,AT
1265 dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
1266 mflo t_1
1267 mfhi t_2
1268 daddu c_2,t_1
1269 sltu AT,c_2,t_1
1270 daddu t_2,AT
1271 daddu c_3,t_2
1272 sltu AT,c_3,t_2
1273 daddu c_1,AT
1274 dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
1275 mflo t_1
1276 mfhi t_2
1277 daddu c_2,t_1
1278 sltu AT,c_2,t_1
1279 daddu t_2,AT
1280 daddu c_3,t_2
1281 sltu AT,c_3,t_2
1282 daddu c_1,AT
1283 sd c_2,80(a0) /* r[10]=c2; */
1284
1285 dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
1286 mflo t_1
1287 mfhi t_2
1288 daddu c_3,t_1
1289 sltu AT,c_3,t_1
1290 daddu t_2,AT
1291 daddu c_1,t_2
1292 sltu c_2,c_1,t_2
1293 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
1294 mflo t_1
1295 mfhi t_2
1296 daddu c_3,t_1
1297 sltu AT,c_3,t_1
1298 daddu t_2,AT
1299 daddu c_1,t_2
1300 sltu AT,c_1,t_2
1301 daddu c_2,AT
1302 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
1303 mflo t_1
1304 mfhi t_2
1305 daddu c_3,t_1
1306 sltu AT,c_3,t_1
1307 daddu t_2,AT
1308 daddu c_1,t_2
1309 sltu AT,c_1,t_2
1310 daddu c_2,AT
1311 dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
1312 mflo t_1
1313 mfhi t_2
1314 daddu c_3,t_1
1315 sltu AT,c_3,t_1
1316 daddu t_2,AT
1317 daddu c_1,t_2
1318 sltu AT,c_1,t_2
1319 daddu c_2,AT
1320 sd c_3,88(a0) /* r[11]=c3; */
1321
1322 dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
1323 mflo t_1
1324 mfhi t_2
1325 daddu c_1,t_1
1326 sltu AT,c_1,t_1
1327 daddu t_2,AT
1328 daddu c_2,t_2
1329 sltu c_3,c_2,t_2
1330 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1331 mflo t_1
1332 mfhi t_2
1333 daddu c_1,t_1
1334 sltu AT,c_1,t_1
1335 daddu t_2,AT
1336 daddu c_2,t_2
1337 sltu AT,c_2,t_2
1338 daddu c_3,AT
1339 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
1340 mflo t_1
1341 mfhi t_2
1342 daddu c_1,t_1
1343 sltu AT,c_1,t_1
1344 daddu t_2,AT
1345 daddu c_2,t_2
1346 sltu AT,c_2,t_2
1347 daddu c_3,AT
1348 sd c_1,96(a0) /* r[12]=c1; */
1349
1350 dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
1351 mflo t_1
1352 mfhi t_2
1353 daddu c_2,t_1
1354 sltu AT,c_2,t_1
1355 daddu t_2,AT
1356 daddu c_3,t_2
1357 sltu c_1,c_3,t_2
1358 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
1359 mflo t_1
1360 mfhi t_2
1361 daddu c_2,t_1
1362 sltu AT,c_2,t_1
1363 daddu t_2,AT
1364 daddu c_3,t_2
1365 sltu AT,c_3,t_2
1366 daddu c_1,AT
1367 sd c_2,104(a0) /* r[13]=c2; */
1368
1369 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
1370 ld s0,0(sp)
1371 ld s1,8(sp)
1372 ld s2,16(sp)
1373 ld s3,24(sp)
1374 ld s4,32(sp)
1375 ld s5,40(sp)
1376 mflo t_1
1377 mfhi t_2
1378 daddu c_3,t_1
1379 sltu AT,c_3,t_1
1380 daddu t_2,AT
1381 daddu c_1,t_2
1382 sd c_3,112(a0) /* r[14]=c3; */
1383 sd c_1,120(a0) /* r[15]=c1; */
1384
1385 PTR_ADD sp,FRAME_SIZE
1386
1387 jr ra
1388END(bn_mul_comba8)
1389
1390.align 5
1391LEAF(bn_mul_comba4)
1392 .set reorder
1393 ld a_0,0(a1)
1394 ld b_0,0(a2)
1395 ld a_1,8(a1)
1396 ld a_2,16(a1)
1397 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1398 ld a_3,24(a1)
1399 ld b_1,8(a2)
1400 ld b_2,16(a2)
1401 ld b_3,24(a2)
1402 mflo c_1
1403 mfhi c_2
1404 sd c_1,0(a0)
1405
1406 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
1407 mflo t_1
1408 mfhi t_2
1409 daddu c_2,t_1
1410 sltu AT,c_2,t_1
1411 daddu c_3,t_2,AT
1412 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
1413 mflo t_1
1414 mfhi t_2
1415 daddu c_2,t_1
1416 sltu AT,c_2,t_1
1417 daddu t_2,AT
1418 daddu c_3,t_2
1419 sltu c_1,c_3,t_2
1420 sd c_2,8(a0)
1421
1422 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
1423 mflo t_1
1424 mfhi t_2
1425 daddu c_3,t_1
1426 sltu AT,c_3,t_1
1427 daddu t_2,AT
1428 daddu c_1,t_2
1429 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1430 mflo t_1
1431 mfhi t_2
1432 daddu c_3,t_1
1433 sltu AT,c_3,t_1
1434 daddu t_2,AT
1435 daddu c_1,t_2
1436 sltu c_2,c_1,t_2
1437 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
1438 mflo t_1
1439 mfhi t_2
1440 daddu c_3,t_1
1441 sltu AT,c_3,t_1
1442 daddu t_2,AT
1443 daddu c_1,t_2
1444 sltu AT,c_1,t_2
1445 daddu c_2,AT
1446 sd c_3,16(a0)
1447
1448 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
1449 mflo t_1
1450 mfhi t_2
1451 daddu c_1,t_1
1452 sltu AT,c_1,t_1
1453 daddu t_2,AT
1454 daddu c_2,t_2
1455 sltu c_3,c_2,t_2
1456 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1457 mflo t_1
1458 mfhi t_2
1459 daddu c_1,t_1
1460 sltu AT,c_1,t_1
1461 daddu t_2,AT
1462 daddu c_2,t_2
1463 sltu AT,c_2,t_2
1464 daddu c_3,AT
1465 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1466 mflo t_1
1467 mfhi t_2
1468 daddu c_1,t_1
1469 sltu AT,c_1,t_1
1470 daddu t_2,AT
1471 daddu c_2,t_2
1472 sltu AT,c_2,t_2
1473 daddu c_3,AT
1474 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
1475 mflo t_1
1476 mfhi t_2
1477 daddu c_1,t_1
1478 sltu AT,c_1,t_1
1479 daddu t_2,AT
1480 daddu c_2,t_2
1481 sltu AT,c_2,t_2
1482 daddu c_3,AT
1483 sd c_1,24(a0)
1484
1485 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
1486 mflo t_1
1487 mfhi t_2
1488 daddu c_2,t_1
1489 sltu AT,c_2,t_1
1490 daddu t_2,AT
1491 daddu c_3,t_2
1492 sltu c_1,c_3,t_2
1493 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1494 mflo t_1
1495 mfhi t_2
1496 daddu c_2,t_1
1497 sltu AT,c_2,t_1
1498 daddu t_2,AT
1499 daddu c_3,t_2
1500 sltu AT,c_3,t_2
1501 daddu c_1,AT
1502 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1503 mflo t_1
1504 mfhi t_2
1505 daddu c_2,t_1
1506 sltu AT,c_2,t_1
1507 daddu t_2,AT
1508 daddu c_3,t_2
1509 sltu AT,c_3,t_2
1510 daddu c_1,AT
1511 sd c_2,32(a0)
1512
1513 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
1514 mflo t_1
1515 mfhi t_2
1516 daddu c_3,t_1
1517 sltu AT,c_3,t_1
1518 daddu t_2,AT
1519 daddu c_1,t_2
1520 sltu c_2,c_1,t_2
1521 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1522 mflo t_1
1523 mfhi t_2
1524 daddu c_3,t_1
1525 sltu AT,c_3,t_1
1526 daddu t_2,AT
1527 daddu c_1,t_2
1528 sltu AT,c_1,t_2
1529 daddu c_2,AT
1530 sd c_3,40(a0)
1531
1532 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1533 mflo t_1
1534 mfhi t_2
1535 daddu c_1,t_1
1536 sltu AT,c_1,t_1
1537 daddu t_2,AT
1538 daddu c_2,t_2
1539 sd c_1,48(a0)
1540 sd c_2,56(a0)
1541
1542 jr ra
1543END(bn_mul_comba4)
1544
1545#undef a_4
1546#undef a_5
1547#undef a_6
1548#undef a_7
1549#define a_4 b_0
1550#define a_5 b_1
1551#define a_6 b_2
1552#define a_7 b_3
1553
1554.align 5
1555LEAF(bn_sqr_comba8)
1556 .set reorder
1557 ld a_0,0(a1)
1558 ld a_1,8(a1)
1559 ld a_2,16(a1)
1560 ld a_3,24(a1)
1561
1562 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1563 ld a_4,32(a1)
1564 ld a_5,40(a1)
1565 ld a_6,48(a1)
1566 ld a_7,56(a1)
1567 mflo c_1
1568 mfhi c_2
1569 sd c_1,0(a0)
1570
1571 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
1572 mflo t_1
1573 mfhi t_2
1574 slt c_1,t_2,zero
1575 dsll t_2,1
1576 slt a2,t_1,zero
1577 daddu t_2,a2
1578 dsll t_1,1
1579 daddu c_2,t_1
1580 sltu AT,c_2,t_1
1581 daddu c_3,t_2,AT
1582 sd c_2,8(a0)
1583
1584 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
1585 mflo t_1
1586 mfhi t_2
1587 slt c_2,t_2,zero
1588 dsll t_2,1
1589 slt a2,t_1,zero
1590 daddu t_2,a2
1591 dsll t_1,1
1592 daddu c_3,t_1
1593 sltu AT,c_3,t_1
1594 daddu t_2,AT
1595 daddu c_1,t_2
1596 sltu AT,c_1,t_2
1597 daddu c_2,AT
1598 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1599 mflo t_1
1600 mfhi t_2
1601 daddu c_3,t_1
1602 sltu AT,c_3,t_1
1603 daddu t_2,AT
1604 daddu c_1,t_2
1605 sltu AT,c_1,t_2
1606 daddu c_2,AT
1607 sd c_3,16(a0)
1608
1609 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
1610 mflo t_1
1611 mfhi t_2
1612 slt c_3,t_2,zero
1613 dsll t_2,1
1614 slt a2,t_1,zero
1615 daddu t_2,a2
1616 dsll t_1,1
1617 daddu c_1,t_1
1618 sltu AT,c_1,t_1
1619 daddu t_2,AT
1620 daddu c_2,t_2
1621 sltu AT,c_2,t_2
1622 daddu c_3,AT
1623 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
1624 mflo t_1
1625 mfhi t_2
1626 slt AT,t_2,zero
1627 daddu c_3,AT
1628 dsll t_2,1
1629 slt a2,t_1,zero
1630 daddu t_2,a2
1631 dsll t_1,1
1632 daddu c_1,t_1
1633 sltu AT,c_1,t_1
1634 daddu t_2,AT
1635 daddu c_2,t_2
1636 sltu AT,c_2,t_2
1637 daddu c_3,AT
1638 sd c_1,24(a0)
1639
1640 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
1641 mflo t_1
1642 mfhi t_2
1643 slt c_1,t_2,zero
1644 dsll t_2,1
1645 slt a2,t_1,zero
1646 daddu t_2,a2
1647 dsll t_1,1
1648 daddu c_2,t_1
1649 sltu AT,c_2,t_1
1650 daddu t_2,AT
1651 daddu c_3,t_2
1652 sltu AT,c_3,t_2
1653 daddu c_1,AT
1654 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
1655 mflo t_1
1656 mfhi t_2
1657 slt AT,t_2,zero
1658 daddu c_1,AT
1659 dsll t_2,1
1660 slt a2,t_1,zero
1661 daddu t_2,a2
1662 dsll t_1,1
1663 daddu c_2,t_1
1664 sltu AT,c_2,t_1
1665 daddu t_2,AT
1666 daddu c_3,t_2
1667 sltu AT,c_3,t_2
1668 daddu c_1,AT
1669 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1670 mflo t_1
1671 mfhi t_2
1672 daddu c_2,t_1
1673 sltu AT,c_2,t_1
1674 daddu t_2,AT
1675 daddu c_3,t_2
1676 sltu AT,c_3,t_2
1677 daddu c_1,AT
1678 sd c_2,32(a0)
1679
1680 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
1681 mflo t_1
1682 mfhi t_2
1683 slt c_2,t_2,zero
1684 dsll t_2,1
1685 slt a2,t_1,zero
1686 daddu t_2,a2
1687 dsll t_1,1
1688 daddu c_3,t_1
1689 sltu AT,c_3,t_1
1690 daddu t_2,AT
1691 daddu c_1,t_2
1692 sltu AT,c_1,t_2
1693 daddu c_2,AT
1694 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
1695 mflo t_1
1696 mfhi t_2
1697 slt AT,t_2,zero
1698 daddu c_2,AT
1699 dsll t_2,1
1700 slt a2,t_1,zero
1701 daddu t_2,a2
1702 dsll t_1,1
1703 daddu c_3,t_1
1704 sltu AT,c_3,t_1
1705 daddu t_2,AT
1706 daddu c_1,t_2
1707 sltu AT,c_1,t_2
1708 daddu c_2,AT
1709 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
1710 mflo t_1
1711 mfhi t_2
1712 slt AT,t_2,zero
1713 daddu c_2,AT
1714 dsll t_2,1
1715 slt a2,t_1,zero
1716 daddu t_2,a2
1717 dsll t_1,1
1718 daddu c_3,t_1
1719 sltu AT,c_3,t_1
1720 daddu t_2,AT
1721 daddu c_1,t_2
1722 sltu AT,c_1,t_2
1723 daddu c_2,AT
1724 sd c_3,40(a0)
1725
1726 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
1727 mflo t_1
1728 mfhi t_2
1729 slt c_3,t_2,zero
1730 dsll t_2,1
1731 slt a2,t_1,zero
1732 daddu t_2,a2
1733 dsll t_1,1
1734 daddu c_1,t_1
1735 sltu AT,c_1,t_1
1736 daddu t_2,AT
1737 daddu c_2,t_2
1738 sltu AT,c_2,t_2
1739 daddu c_3,AT
1740 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
1741 mflo t_1
1742 mfhi t_2
1743 slt AT,t_2,zero
1744 daddu c_3,AT
1745 dsll t_2,1
1746 slt a2,t_1,zero
1747 daddu t_2,a2
1748 dsll t_1,1
1749 daddu c_1,t_1
1750 sltu AT,c_1,t_1
1751 daddu t_2,AT
1752 daddu c_2,t_2
1753 sltu AT,c_2,t_2
1754 daddu c_3,AT
1755 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
1756 mflo t_1
1757 mfhi t_2
1758 slt AT,t_2,zero
1759 daddu c_3,AT
1760 dsll t_2,1
1761 slt a2,t_1,zero
1762 daddu t_2,a2
1763 dsll t_1,1
1764 daddu c_1,t_1
1765 sltu AT,c_1,t_1
1766 daddu t_2,AT
1767 daddu c_2,t_2
1768 sltu AT,c_2,t_2
1769 daddu c_3,AT
1770 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1771 mflo t_1
1772 mfhi t_2
1773 daddu c_1,t_1
1774 sltu AT,c_1,t_1
1775 daddu t_2,AT
1776 daddu c_2,t_2
1777 sltu AT,c_2,t_2
1778 daddu c_3,AT
1779 sd c_1,48(a0)
1780
1781 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
1782 mflo t_1
1783 mfhi t_2
1784 slt c_1,t_2,zero
1785 dsll t_2,1
1786 slt a2,t_1,zero
1787 daddu t_2,a2
1788 dsll t_1,1
1789 daddu c_2,t_1
1790 sltu AT,c_2,t_1
1791 daddu t_2,AT
1792 daddu c_3,t_2
1793 sltu AT,c_3,t_2
1794 daddu c_1,AT
1795 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
1796 mflo t_1
1797 mfhi t_2
1798 slt AT,t_2,zero
1799 daddu c_1,AT
1800 dsll t_2,1
1801 slt a2,t_1,zero
1802 daddu t_2,a2
1803 dsll t_1,1
1804 daddu c_2,t_1
1805 sltu AT,c_2,t_1
1806 daddu t_2,AT
1807 daddu c_3,t_2
1808 sltu AT,c_3,t_2
1809 daddu c_1,AT
1810 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
1811 mflo t_1
1812 mfhi t_2
1813 slt AT,t_2,zero
1814 daddu c_1,AT
1815 dsll t_2,1
1816 slt a2,t_1,zero
1817 daddu t_2,a2
1818 dsll t_1,1
1819 daddu c_2,t_1
1820 sltu AT,c_2,t_1
1821 daddu t_2,AT
1822 daddu c_3,t_2
1823 sltu AT,c_3,t_2
1824 daddu c_1,AT
1825 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
1826 mflo t_1
1827 mfhi t_2
1828 slt AT,t_2,zero
1829 daddu c_1,AT
1830 dsll t_2,1
1831 slt a2,t_1,zero
1832 daddu t_2,a2
1833 dsll t_1,1
1834 daddu c_2,t_1
1835 sltu AT,c_2,t_1
1836 daddu t_2,AT
1837 daddu c_3,t_2
1838 sltu AT,c_3,t_2
1839 daddu c_1,AT
1840 sd c_2,56(a0)
1841
1842 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
1843 mflo t_1
1844 mfhi t_2
1845 slt c_2,t_2,zero
1846 dsll t_2,1
1847 slt a2,t_1,zero
1848 daddu t_2,a2
1849 dsll t_1,1
1850 daddu c_3,t_1
1851 sltu AT,c_3,t_1
1852 daddu t_2,AT
1853 daddu c_1,t_2
1854 sltu AT,c_1,t_2
1855 daddu c_2,AT
1856 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
1857 mflo t_1
1858 mfhi t_2
1859 slt AT,t_2,zero
1860 daddu c_2,AT
1861 dsll t_2,1
1862 slt a2,t_1,zero
1863 daddu t_2,a2
1864 dsll t_1,1
1865 daddu c_3,t_1
1866 sltu AT,c_3,t_1
1867 daddu t_2,AT
1868 daddu c_1,t_2
1869 sltu AT,c_1,t_2
1870 daddu c_2,AT
1871 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
1872 mflo t_1
1873 mfhi t_2
1874 slt AT,t_2,zero
1875 daddu c_2,AT
1876 dsll t_2,1
1877 slt a2,t_1,zero
1878 daddu t_2,a2
1879 dsll t_1,1
1880 daddu c_3,t_1
1881 sltu AT,c_3,t_1
1882 daddu t_2,AT
1883 daddu c_1,t_2
1884 sltu AT,c_1,t_2
1885 daddu c_2,AT
1886 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1887 mflo t_1
1888 mfhi t_2
1889 daddu c_3,t_1
1890 sltu AT,c_3,t_1
1891 daddu t_2,AT
1892 daddu c_1,t_2
1893 sltu AT,c_1,t_2
1894 daddu c_2,AT
1895 sd c_3,64(a0)
1896
1897 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
1898 mflo t_1
1899 mfhi t_2
1900 slt c_3,t_2,zero
1901 dsll t_2,1
1902 slt a2,t_1,zero
1903 daddu t_2,a2
1904 dsll t_1,1
1905 daddu c_1,t_1
1906 sltu AT,c_1,t_1
1907 daddu t_2,AT
1908 daddu c_2,t_2
1909 sltu AT,c_2,t_2
1910 daddu c_3,AT
1911 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
1912 mflo t_1
1913 mfhi t_2
1914 slt AT,t_2,zero
1915 daddu c_3,AT
1916 dsll t_2,1
1917 slt a2,t_1,zero
1918 daddu t_2,a2
1919 dsll t_1,1
1920 daddu c_1,t_1
1921 sltu AT,c_1,t_1
1922 daddu t_2,AT
1923 daddu c_2,t_2
1924 sltu AT,c_2,t_2
1925 daddu c_3,AT
1926 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
1927 mflo t_1
1928 mfhi t_2
1929 slt AT,t_2,zero
1930 daddu c_3,AT
1931 dsll t_2,1
1932 slt a2,t_1,zero
1933 daddu t_2,a2
1934 dsll t_1,1
1935 daddu c_1,t_1
1936 sltu AT,c_1,t_1
1937 daddu t_2,AT
1938 daddu c_2,t_2
1939 sltu AT,c_2,t_2
1940 daddu c_3,AT
1941 sd c_1,72(a0)
1942
1943 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
1944 mflo t_1
1945 mfhi t_2
1946 slt c_1,t_2,zero
1947 dsll t_2,1
1948 slt a2,t_1,zero
1949 daddu t_2,a2
1950 dsll t_1,1
1951 daddu c_2,t_1
1952 sltu AT,c_2,t_1
1953 daddu t_2,AT
1954 daddu c_3,t_2
1955 sltu AT,c_3,t_2
1956 daddu c_1,AT
1957 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
1958 mflo t_1
1959 mfhi t_2
1960 slt AT,t_2,zero
1961 daddu c_1,AT
1962 dsll t_2,1
1963 slt a2,t_1,zero
1964 daddu t_2,a2
1965 dsll t_1,1
1966 daddu c_2,t_1
1967 sltu AT,c_2,t_1
1968 daddu t_2,AT
1969 daddu c_3,t_2
1970 sltu AT,c_3,t_2
1971 daddu c_1,AT
1972 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1973 mflo t_1
1974 mfhi t_2
1975 daddu c_2,t_1
1976 sltu AT,c_2,t_1
1977 daddu t_2,AT
1978 daddu c_3,t_2
1979 sltu AT,c_3,t_2
1980 daddu c_1,AT
1981 sd c_2,80(a0)
1982
1983 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
1984 mflo t_1
1985 mfhi t_2
1986 slt c_2,t_2,zero
1987 dsll t_2,1
1988 slt a2,t_1,zero
1989 daddu t_2,a2
1990 dsll t_1,1
1991 daddu c_3,t_1
1992 sltu AT,c_3,t_1
1993 daddu t_2,AT
1994 daddu c_1,t_2
1995 sltu AT,c_1,t_2
1996 daddu c_2,AT
1997 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
1998 mflo t_1
1999 mfhi t_2
2000 slt AT,t_2,zero
2001 daddu c_2,AT
2002 dsll t_2,1
2003 slt a2,t_1,zero
2004 daddu t_2,a2
2005 dsll t_1,1
2006 daddu c_3,t_1
2007 sltu AT,c_3,t_1
2008 daddu t_2,AT
2009 daddu c_1,t_2
2010 sltu AT,c_1,t_2
2011 daddu c_2,AT
2012 sd c_3,88(a0)
2013
2014 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
2015 mflo t_1
2016 mfhi t_2
2017 slt c_3,t_2,zero
2018 dsll t_2,1
2019 slt a2,t_1,zero
2020 daddu t_2,a2
2021 dsll t_1,1
2022 daddu c_1,t_1
2023 sltu AT,c_1,t_1
2024 daddu t_2,AT
2025 daddu c_2,t_2
2026 sltu AT,c_2,t_2
2027 daddu c_3,AT
2028 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
2029 mflo t_1
2030 mfhi t_2
2031 daddu c_1,t_1
2032 sltu AT,c_1,t_1
2033 daddu t_2,AT
2034 daddu c_2,t_2
2035 sltu AT,c_2,t_2
2036 daddu c_3,AT
2037 sd c_1,96(a0)
2038
2039 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
2040 mflo t_1
2041 mfhi t_2
2042 slt c_1,t_2,zero
2043 dsll t_2,1
2044 slt a2,t_1,zero
2045 daddu t_2,a2
2046 dsll t_1,1
2047 daddu c_2,t_1
2048 sltu AT,c_2,t_1
2049 daddu t_2,AT
2050 daddu c_3,t_2
2051 sltu AT,c_3,t_2
2052 daddu c_1,AT
2053 sd c_2,104(a0)
2054
2055 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2056 mflo t_1
2057 mfhi t_2
2058 daddu c_3,t_1
2059 sltu AT,c_3,t_1
2060 daddu t_2,AT
2061 daddu c_1,t_2
2062 sd c_3,112(a0)
2063 sd c_1,120(a0)
2064
2065 jr ra
2066END(bn_sqr_comba8)
2067
2068.align 5
2069LEAF(bn_sqr_comba4)
2070 .set reorder
2071 ld a_0,0(a1)
2072 ld a_1,8(a1)
2073 ld a_2,16(a1)
2074 ld a_3,24(a1)
2075 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2076 mflo c_1
2077 mfhi c_2
2078 sd c_1,0(a0)
2079
2080 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2081 mflo t_1
2082 mfhi t_2
2083 slt c_1,t_2,zero
2084 dsll t_2,1
2085 slt a2,t_1,zero
2086 daddu t_2,a2
2087 dsll t_1,1
2088 daddu c_2,t_1
2089 sltu AT,c_2,t_1
2090 daddu c_3,t_2,AT
2091 sd c_2,8(a0)
2092
2093 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2094 mflo t_1
2095 mfhi t_2
2096 slt c_2,t_2,zero
2097 dsll t_2,1
2098 slt a2,t_1,zero
2099 daddu t_2,a2
2100 dsll t_1,1
2101 daddu c_3,t_1
2102 sltu AT,c_3,t_1
2103 daddu t_2,AT
2104 daddu c_1,t_2
2105 sltu AT,c_1,t_2
2106 daddu c_2,AT
2107 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2108 mflo t_1
2109 mfhi t_2
2110 daddu c_3,t_1
2111 sltu AT,c_3,t_1
2112 daddu t_2,AT
2113 daddu c_1,t_2
2114 sltu AT,c_1,t_2
2115 daddu c_2,AT
2116 sd c_3,16(a0)
2117
2118 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2119 mflo t_1
2120 mfhi t_2
2121 slt c_3,t_2,zero
2122 dsll t_2,1
2123 slt a2,t_1,zero
2124 daddu t_2,a2
2125 dsll t_1,1
2126 daddu c_1,t_1
2127 sltu AT,c_1,t_1
2128 daddu t_2,AT
2129 daddu c_2,t_2
2130 sltu AT,c_2,t_2
2131 daddu c_3,AT
2132 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2133 mflo t_1
2134 mfhi t_2
2135 slt AT,t_2,zero
2136 daddu c_3,AT
2137 dsll t_2,1
2138 slt a2,t_1,zero
2139 daddu t_2,a2
2140 dsll t_1,1
2141 daddu c_1,t_1
2142 sltu AT,c_1,t_1
2143 daddu t_2,AT
2144 daddu c_2,t_2
2145 sltu AT,c_2,t_2
2146 daddu c_3,AT
2147 sd c_1,24(a0)
2148
2149 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2150 mflo t_1
2151 mfhi t_2
2152 slt c_1,t_2,zero
2153 dsll t_2,1
2154 slt a2,t_1,zero
2155 daddu t_2,a2
2156 dsll t_1,1
2157 daddu c_2,t_1
2158 sltu AT,c_2,t_1
2159 daddu t_2,AT
2160 daddu c_3,t_2
2161 sltu AT,c_3,t_2
2162 daddu c_1,AT
2163 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2164 mflo t_1
2165 mfhi t_2
2166 daddu c_2,t_1
2167 sltu AT,c_2,t_1
2168 daddu t_2,AT
2169 daddu c_3,t_2
2170 sltu AT,c_3,t_2
2171 daddu c_1,AT
2172 sd c_2,32(a0)
2173
2174 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2175 mflo t_1
2176 mfhi t_2
2177 slt c_2,t_2,zero
2178 dsll t_2,1
2179 slt a2,t_1,zero
2180 daddu t_2,a2
2181 dsll t_1,1
2182 daddu c_3,t_1
2183 sltu AT,c_3,t_1
2184 daddu t_2,AT
2185 daddu c_1,t_2
2186 sltu AT,c_1,t_2
2187 daddu c_2,AT
2188 sd c_3,40(a0)
2189
2190 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2191 mflo t_1
2192 mfhi t_2
2193 daddu c_1,t_1
2194 sltu AT,c_1,t_1
2195 daddu t_2,AT
2196 daddu c_2,t_2
2197 sd c_1,48(a0)
2198 sd c_2,56(a0)
2199
2200 jr ra
2201END(bn_sqr_comba4)
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s
deleted file mode 100644
index f3b16290eb..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2.s
+++ /dev/null
@@ -1,1618 +0,0 @@
1;
2; PA-RISC 2.0 implementation of bn_asm code, based on the
3; 64-bit version of the code. This code is effectively the
4; same as the 64-bit version except the register model is
5; slightly different given all values must be 32-bit between
6; function calls. Thus the 64-bit return values are returned
7; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
8;
9;
10; This code is approximately 2x faster than the C version
11; for RSA/DSA.
12;
13; See http://devresource.hp.com/ for more details on the PA-RISC
14; architecture. Also see the book "PA-RISC 2.0 Architecture"
15; by Gerry Kane for information on the instruction set architecture.
16;
17; Code written by Chris Ruemmler (with some help from the HP C
18; compiler).
19;
20; The code compiles with HP's assembler
21;
22
23 .level 2.0N
24 .space $TEXT$
25 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
26
27;
28; Global Register definitions used for the routines.
29;
30; Some information about HP's runtime architecture for 32-bits.
31;
32; "Caller save" means the calling function must save the register
33; if it wants the register to be preserved.
34; "Callee save" means if a function uses the register, it must save
35; the value before using it.
36;
37; For the floating point registers
38;
39; "caller save" registers: fr4-fr11, fr22-fr31
40; "callee save" registers: fr12-fr21
41; "special" registers: fr0-fr3 (status and exception registers)
42;
43; For the integer registers
44; value zero : r0
45; "caller save" registers: r1,r19-r26
46; "callee save" registers: r3-r18
47; return register : r2 (rp)
48; return values ; r28,r29 (ret0,ret1)
49; Stack pointer ; r30 (sp)
50; millicode return ptr ; r31 (also a caller save register)
51
52
53;
54; Arguments to the routines
55;
56r_ptr .reg %r26
57a_ptr .reg %r25
58b_ptr .reg %r24
59num .reg %r24
60n .reg %r23
61
62;
63; Note that the "w" argument for bn_mul_add_words and bn_mul_words
64; is passed on the stack at a delta of -56 from the top of stack
65; as the routine is entered.
66;
67
68;
69; Globals used in some routines
70;
71
72top_overflow .reg %r23
73high_mask .reg %r22 ; value 0xffffffff80000000L
74
75
76;------------------------------------------------------------------------------
77;
78; bn_mul_add_words
79;
80;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
81; int num, BN_ULONG w)
82;
83; arg0 = r_ptr
84; arg1 = a_ptr
85; arg3 = num
86; -56(sp) = w
87;
88; Local register definitions
89;
90
91fm1 .reg %fr22
92fm .reg %fr23
93ht_temp .reg %fr24
94ht_temp_1 .reg %fr25
95lt_temp .reg %fr26
96lt_temp_1 .reg %fr27
97fm1_1 .reg %fr28
98fm_1 .reg %fr29
99
100fw_h .reg %fr7L
101fw_l .reg %fr7R
102fw .reg %fr7
103
104fht_0 .reg %fr8L
105flt_0 .reg %fr8R
106t_float_0 .reg %fr8
107
108fht_1 .reg %fr9L
109flt_1 .reg %fr9R
110t_float_1 .reg %fr9
111
112tmp_0 .reg %r31
113tmp_1 .reg %r21
114m_0 .reg %r20
115m_1 .reg %r19
116ht_0 .reg %r1
117ht_1 .reg %r3
118lt_0 .reg %r4
119lt_1 .reg %r5
120m1_0 .reg %r6
121m1_1 .reg %r7
122rp_val .reg %r8
123rp_val_1 .reg %r9
124
125bn_mul_add_words
126 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
127 .proc
128 .callinfo frame=128
129 .entry
130 .align 64
131
132 STD %r3,0(%sp) ; save r3
133 STD %r4,8(%sp) ; save r4
134 NOP ; Needed to make the loop 16-byte aligned
135 NOP ; needed to make the loop 16-byte aligned
136
137 STD %r5,16(%sp) ; save r5
138 NOP
139 STD %r6,24(%sp) ; save r6
140 STD %r7,32(%sp) ; save r7
141
142 STD %r8,40(%sp) ; save r8
143 STD %r9,48(%sp) ; save r9
144 COPY %r0,%ret1 ; return 0 by default
145 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
146
147 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
148 LDO 128(%sp),%sp ; bump stack
149
150 ;
151 ; The loop is unrolled twice, so if there is only 1 number
152 ; then go straight to the cleanup code.
153 ;
154 CMPIB,= 1,num,bn_mul_add_words_single_top
155 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
156
157 ;
158 ; This loop is unrolled 2 times (64-byte aligned as well)
159 ;
160 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
161 ; two 32-bit mutiplies can be issued per cycle.
162 ;
163bn_mul_add_words_unroll2
164
165 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
166 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
167 LDD 0(r_ptr),rp_val ; rp[0]
168 LDD 8(r_ptr),rp_val_1 ; rp[1]
169
170 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
171 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
172 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
173 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
174
175 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
176 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
177 FSTD fm,-8(%sp) ; -8(sp) = m[0]
178 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
179
180 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
181 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
182 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
183 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
184
185 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
186 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
187 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
188 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
189
190 LDD -8(%sp),m_0 ; m[0]
191 LDD -40(%sp),m_1 ; m[1]
192 LDD -16(%sp),m1_0 ; m1[0]
193 LDD -48(%sp),m1_1 ; m1[1]
194
195 LDD -24(%sp),ht_0 ; ht[0]
196 LDD -56(%sp),ht_1 ; ht[1]
197 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
198 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
199
200 LDD -32(%sp),lt_0
201 LDD -64(%sp),lt_1
202 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
203 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
204
205 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
206 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
207 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
208 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
209
210 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
211 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
212 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
213 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
214
215 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
216 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
217 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
218 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
219
220 ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
221 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
222 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
223 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
224
225 LDO -2(num),num ; num = num - 2;
226 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
227 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
228 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
229
230 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
231 ADD,DC ht_1,%r0,%ret1 ; ht[1]++
232 LDO 16(a_ptr),a_ptr ; a_ptr += 2
233
234 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
235 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
236 LDO 16(r_ptr),r_ptr ; r_ptr += 2
237
238 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
239
240 ;
241 ; Top of loop aligned on 64-byte boundary
242 ;
243bn_mul_add_words_single_top
244 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
245 LDD 0(r_ptr),rp_val ; rp[0]
246 LDO 8(a_ptr),a_ptr ; a_ptr++
247 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
248 FSTD fm1,-16(%sp) ; -16(sp) = m1
249 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
250 FSTD fm,-8(%sp) ; -8(sp) = m
251 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
252 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
253 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
254 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
255
256 LDD -8(%sp),m_0
257 LDD -16(%sp),m1_0 ; m1 = temp1
258 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
259 LDD -24(%sp),ht_0
260 LDD -32(%sp),lt_0
261
262 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
263 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
264
265 EXTRD,U tmp_0,31,32,m_0 ; m>>32
266 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
267
268 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
269 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
270 ADD,DC ht_0,%r0,ht_0 ; ht++
271 ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
272 ADD,DC ht_0,%r0,ht_0 ; ht++
273 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
274 ADD,DC ht_0,%r0,%ret1 ; ht++
275 STD lt_0,0(r_ptr) ; rp[0] = lt
276
277bn_mul_add_words_exit
278 .EXIT
279
280 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
281 LDD -80(%sp),%r9 ; restore r9
282 LDD -88(%sp),%r8 ; restore r8
283 LDD -96(%sp),%r7 ; restore r7
284 LDD -104(%sp),%r6 ; restore r6
285 LDD -112(%sp),%r5 ; restore r5
286 LDD -120(%sp),%r4 ; restore r4
287 BVE (%rp)
288 LDD,MB -128(%sp),%r3 ; restore r3
289 .PROCEND ;in=23,24,25,26,29;out=28;
290
291;----------------------------------------------------------------------------
292;
293;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
294;
295; arg0 = rp
296; arg1 = ap
297; arg3 = num
298; w on stack at -56(sp)
299
300bn_mul_words
301 .proc
302 .callinfo frame=128
303 .entry
304 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
305 .align 64
306
307 STD %r3,0(%sp) ; save r3
308 STD %r4,8(%sp) ; save r4
309 NOP
310 STD %r5,16(%sp) ; save r5
311
312 STD %r6,24(%sp) ; save r6
313 STD %r7,32(%sp) ; save r7
314 COPY %r0,%ret1 ; return 0 by default
315 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
316
317 CMPIB,>= 0,num,bn_mul_words_exit
318 LDO 128(%sp),%sp ; bump stack
319
320 ;
321 ; See if only 1 word to do, thus just do cleanup
322 ;
323 CMPIB,= 1,num,bn_mul_words_single_top
324 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
325
326 ;
327 ; This loop is unrolled 2 times (64-byte aligned as well)
328 ;
329 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
330 ; two 32-bit mutiplies can be issued per cycle.
331 ;
332bn_mul_words_unroll2
333
334 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
335 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
336 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
337 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
338
339 FSTD fm1,-16(%sp) ; -16(sp) = m1
340 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
341 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
342 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
343
344 FSTD fm,-8(%sp) ; -8(sp) = m
345 FSTD fm_1,-40(%sp) ; -40(sp) = m
346 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
347 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
348
349 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
350 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
351 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
352 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
353
354 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
355 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
356 LDD -8(%sp),m_0
357 LDD -40(%sp),m_1
358
359 LDD -16(%sp),m1_0
360 LDD -48(%sp),m1_1
361 LDD -24(%sp),ht_0
362 LDD -56(%sp),ht_1
363
364 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
365 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
366 LDD -32(%sp),lt_0
367 LDD -64(%sp),lt_1
368
369 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
370 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
371 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
372 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
373
374 EXTRD,U tmp_0,31,32,m_0 ; m>>32
375 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
376 EXTRD,U tmp_1,31,32,m_1 ; m>>32
377 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
378
379 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
380 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
381 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
382 ADD,DC ht_0,%r0,ht_0 ; ht++
383
384 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
385 ADD,DC ht_1,%r0,ht_1 ; ht++
386 ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1);
387 ADD,DC ht_0,%r0,ht_0 ; ht++
388
389 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
390 ADD,DC ht_1,%r0,ht_1 ; ht++
391 STD lt_0,0(r_ptr) ; rp[0] = lt
392 STD lt_1,8(r_ptr) ; rp[1] = lt
393
394 COPY ht_1,%ret1 ; carry = ht
395 LDO -2(num),num ; num = num - 2;
396 LDO 16(a_ptr),a_ptr ; ap += 2
397 CMPIB,<= 2,num,bn_mul_words_unroll2
398 LDO 16(r_ptr),r_ptr ; rp++
399
400 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
401
402 ;
403 ; Top of loop aligned on 64-byte boundary
404 ;
405bn_mul_words_single_top
406 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
407
408 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
409 FSTD fm1,-16(%sp) ; -16(sp) = m1
410 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
411 FSTD fm,-8(%sp) ; -8(sp) = m
412 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
413 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
414 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
415 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
416
417 LDD -8(%sp),m_0
418 LDD -16(%sp),m1_0
419 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
420 LDD -24(%sp),ht_0
421 LDD -32(%sp),lt_0
422
423 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
424 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
425
426 EXTRD,U tmp_0,31,32,m_0 ; m>>32
427 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
428
429 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
430 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
431 ADD,DC ht_0,%r0,ht_0 ; ht++
432
433 ADD %ret1,lt_0,lt_0 ; lt = lt + c;
434 ADD,DC ht_0,%r0,ht_0 ; ht++
435
436 COPY ht_0,%ret1 ; copy carry
437 STD lt_0,0(r_ptr) ; rp[0] = lt
438
439bn_mul_words_exit
440 .EXIT
441 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
442 LDD -96(%sp),%r7 ; restore r7
443 LDD -104(%sp),%r6 ; restore r6
444 LDD -112(%sp),%r5 ; restore r5
445 LDD -120(%sp),%r4 ; restore r4
446 BVE (%rp)
447 LDD,MB -128(%sp),%r3 ; restore r3
448 .PROCEND
449
450;----------------------------------------------------------------------------
451;
452;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
453;
454; arg0 = rp
455; arg1 = ap
456; arg2 = num
457;
458
459bn_sqr_words
460 .proc
461 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
462 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
463 .entry
464 .align 64
465
466 STD %r3,0(%sp) ; save r3
467 STD %r4,8(%sp) ; save r4
468 NOP
469 STD %r5,16(%sp) ; save r5
470
471 CMPIB,>= 0,num,bn_sqr_words_exit
472 LDO 128(%sp),%sp ; bump stack
473
474 ;
475 ; If only 1, the goto straight to cleanup
476 ;
477 CMPIB,= 1,num,bn_sqr_words_single_top
478 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
479
480 ;
481 ; This loop is unrolled 2 times (64-byte aligned as well)
482 ;
483
484bn_sqr_words_unroll2
485 FLDD 0(a_ptr),t_float_0 ; a[0]
486 FLDD 8(a_ptr),t_float_1 ; a[1]
487 XMPYU fht_0,flt_0,fm ; m[0]
488 XMPYU fht_1,flt_1,fm_1 ; m[1]
489
490 FSTD fm,-24(%sp) ; store m[0]
491 FSTD fm_1,-56(%sp) ; store m[1]
492 XMPYU flt_0,flt_0,lt_temp ; lt[0]
493 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
494
495 FSTD lt_temp,-16(%sp) ; store lt[0]
496 FSTD lt_temp_1,-48(%sp) ; store lt[1]
497 XMPYU fht_0,fht_0,ht_temp ; ht[0]
498 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
499
500 FSTD ht_temp,-8(%sp) ; store ht[0]
501 FSTD ht_temp_1,-40(%sp) ; store ht[1]
502 LDD -24(%sp),m_0
503 LDD -56(%sp),m_1
504
505 AND m_0,high_mask,tmp_0 ; m[0] & Mask
506 AND m_1,high_mask,tmp_1 ; m[1] & Mask
507 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
508 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
509
510 LDD -16(%sp),lt_0
511 LDD -48(%sp),lt_1
512 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
513 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
514
515 LDD -8(%sp),ht_0
516 LDD -40(%sp),ht_1
517 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
518 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
519
520 ADD lt_0,m_0,lt_0 ; lt = lt+m
521 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
522 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
523 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
524
525 ADD lt_1,m_1,lt_1 ; lt = lt+m
526 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
527 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
528 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
529
530 LDO -2(num),num ; num = num - 2;
531 LDO 16(a_ptr),a_ptr ; ap += 2
532 CMPIB,<= 2,num,bn_sqr_words_unroll2
533 LDO 32(r_ptr),r_ptr ; rp += 4
534
535 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
536
537 ;
538 ; Top of loop aligned on 64-byte boundary
539 ;
540bn_sqr_words_single_top
541 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
542
543 XMPYU fht_0,flt_0,fm ; m
544 FSTD fm,-24(%sp) ; store m
545
546 XMPYU flt_0,flt_0,lt_temp ; lt
547 FSTD lt_temp,-16(%sp) ; store lt
548
549 XMPYU fht_0,fht_0,ht_temp ; ht
550 FSTD ht_temp,-8(%sp) ; store ht
551
552 LDD -24(%sp),m_0 ; load m
553 AND m_0,high_mask,tmp_0 ; m & Mask
554 DEPD,Z m_0,30,31,m_0 ; m << 32+1
555 LDD -16(%sp),lt_0 ; lt
556
557 LDD -8(%sp),ht_0 ; ht
558 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
559 ADD m_0,lt_0,lt_0 ; lt = lt+m
560 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
561 ADD,DC ht_0,%r0,ht_0 ; ht++
562
563 STD lt_0,0(r_ptr) ; rp[0] = lt
564 STD ht_0,8(r_ptr) ; rp[1] = ht
565
566bn_sqr_words_exit
567 .EXIT
568 LDD -112(%sp),%r5 ; restore r5
569 LDD -120(%sp),%r4 ; restore r4
570 BVE (%rp)
571 LDD,MB -128(%sp),%r3
572 .PROCEND ;in=23,24,25,26,29;out=28;
573
574
575;----------------------------------------------------------------------------
576;
577;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
578;
579; arg0 = rp
580; arg1 = ap
581; arg2 = bp
582; arg3 = n
583
584t .reg %r22
585b .reg %r21
586l .reg %r20
587
588bn_add_words
589 .proc
590 .entry
591 .callinfo
592 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
593 .align 64
594
595 CMPIB,>= 0,n,bn_add_words_exit
596 COPY %r0,%ret1 ; return 0 by default
597
598 ;
599 ; If 2 or more numbers do the loop
600 ;
601 CMPIB,= 1,n,bn_add_words_single_top
602 NOP
603
604 ;
605 ; This loop is unrolled 2 times (64-byte aligned as well)
606 ;
607bn_add_words_unroll2
608 LDD 0(a_ptr),t
609 LDD 0(b_ptr),b
610 ADD t,%ret1,t ; t = t+c;
611 ADD,DC %r0,%r0,%ret1 ; set c to carry
612 ADD t,b,l ; l = t + b[0]
613 ADD,DC %ret1,%r0,%ret1 ; c+= carry
614 STD l,0(r_ptr)
615
616 LDD 8(a_ptr),t
617 LDD 8(b_ptr),b
618 ADD t,%ret1,t ; t = t+c;
619 ADD,DC %r0,%r0,%ret1 ; set c to carry
620 ADD t,b,l ; l = t + b[0]
621 ADD,DC %ret1,%r0,%ret1 ; c+= carry
622 STD l,8(r_ptr)
623
624 LDO -2(n),n
625 LDO 16(a_ptr),a_ptr
626 LDO 16(b_ptr),b_ptr
627
628 CMPIB,<= 2,n,bn_add_words_unroll2
629 LDO 16(r_ptr),r_ptr
630
631 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
632
633bn_add_words_single_top
634 LDD 0(a_ptr),t
635 LDD 0(b_ptr),b
636
637 ADD t,%ret1,t ; t = t+c;
638 ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??)
639 ADD t,b,l ; l = t + b[0]
640 ADD,DC %ret1,%r0,%ret1 ; c+= carry
641 STD l,0(r_ptr)
642
643bn_add_words_exit
644 .EXIT
645 BVE (%rp)
646 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
647 .PROCEND ;in=23,24,25,26,29;out=28;
648
649;----------------------------------------------------------------------------
650;
651;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
652;
653; arg0 = rp
654; arg1 = ap
655; arg2 = bp
656; arg3 = n
657
658t1 .reg %r22
659t2 .reg %r21
660sub_tmp1 .reg %r20
661sub_tmp2 .reg %r19
662
663
664bn_sub_words
665 .proc
666 .callinfo
667 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
668 .entry
669 .align 64
670
671 CMPIB,>= 0,n,bn_sub_words_exit
672 COPY %r0,%ret1 ; return 0 by default
673
674 ;
675 ; If 2 or more numbers do the loop
676 ;
677 CMPIB,= 1,n,bn_sub_words_single_top
678 NOP
679
680 ;
681 ; This loop is unrolled 2 times (64-byte aligned as well)
682 ;
683bn_sub_words_unroll2
684 LDD 0(a_ptr),t1
685 LDD 0(b_ptr),t2
686 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
687 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
688
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret1
694 STD sub_tmp1,0(r_ptr)
695
696 LDD 8(a_ptr),t1
697 LDD 8(b_ptr),t2
698 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
699 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
700 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
701 LDO 1(%r0),sub_tmp2
702
703 CMPCLR,*= t1,t2,%r0
704 COPY sub_tmp2,%ret1
705 STD sub_tmp1,8(r_ptr)
706
707 LDO -2(n),n
708 LDO 16(a_ptr),a_ptr
709 LDO 16(b_ptr),b_ptr
710
711 CMPIB,<= 2,n,bn_sub_words_unroll2
712 LDO 16(r_ptr),r_ptr
713
714 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
715
716bn_sub_words_single_top
717 LDD 0(a_ptr),t1
718 LDD 0(b_ptr),t2
719 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
720 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
721 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
722 LDO 1(%r0),sub_tmp2
723
724 CMPCLR,*= t1,t2,%r0
725 COPY sub_tmp2,%ret1
726
727 STD sub_tmp1,0(r_ptr)
728
729bn_sub_words_exit
730 .EXIT
731 BVE (%rp)
732 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
733 .PROCEND ;in=23,24,25,26,29;out=28;
734
735;------------------------------------------------------------------------------
736;
737; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
738;
739; arg0 = h
740; arg1 = l
741; arg2 = d
742;
743; This is mainly just output from the HP C compiler.
744;
745;------------------------------------------------------------------------------
746bn_div_words
747 .PROC
748 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
749 .IMPORT BN_num_bits_word,CODE
750 ;--- not PIC .IMPORT __iob,DATA
751 ;--- not PIC .IMPORT fprintf,CODE
752 .IMPORT abort,CODE
753 .IMPORT $$div2U,MILLICODE
754 .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
755 .ENTRY
756 STW %r2,-20(%r30) ;offset 0x8ec
757 STW,MA %r3,192(%r30) ;offset 0x8f0
758 STW %r4,-188(%r30) ;offset 0x8f4
759 DEPD %r5,31,32,%r6 ;offset 0x8f8
760 STD %r6,-184(%r30) ;offset 0x8fc
761 DEPD %r7,31,32,%r8 ;offset 0x900
762 STD %r8,-176(%r30) ;offset 0x904
763 STW %r9,-168(%r30) ;offset 0x908
764 LDD -248(%r30),%r3 ;offset 0x90c
765 COPY %r26,%r4 ;offset 0x910
766 COPY %r24,%r5 ;offset 0x914
767 DEPD %r25,31,32,%r4 ;offset 0x918
768 CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c
769 DEPD %r23,31,32,%r5 ;offset 0x920
770 MOVIB,TR -1,%r29,$00060002 ;offset 0x924
771 EXTRD,U %r29,31,32,%r28 ;offset 0x928
772$0006002A
773 LDO -1(%r29),%r29 ;offset 0x92c
774 SUB %r23,%r7,%r23 ;offset 0x930
775$00060024
776 SUB %r4,%r31,%r25 ;offset 0x934
777 AND %r25,%r19,%r26 ;offset 0x938
778 CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c
779 DEPD,Z %r25,31,32,%r20 ;offset 0x940
780 OR %r20,%r24,%r21 ;offset 0x944
781 CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948
782 SUB %r31,%r2,%r31 ;offset 0x94c
783$00060046
784$0006002E
785 DEPD,Z %r23,31,32,%r25 ;offset 0x950
786 EXTRD,U %r23,31,32,%r26 ;offset 0x954
787 AND %r25,%r19,%r24 ;offset 0x958
788 ADD,L %r31,%r26,%r31 ;offset 0x95c
789 CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960
790 LDO 1(%r31),%r31 ;offset 0x964
791$00060032
792 CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968
793 LDO -1(%r29),%r29 ;offset 0x96c
794 ADD,L %r4,%r3,%r4 ;offset 0x970
795$00060036
796 ADDIB,=,N -1,%r8,$D0 ;offset 0x974
797 SUB %r5,%r24,%r28 ;offset 0x978
798$0006003A
799 SUB %r4,%r31,%r24 ;offset 0x97c
800 SHRPD %r24,%r28,32,%r4 ;offset 0x980
801 DEPD,Z %r29,31,32,%r9 ;offset 0x984
802 DEPD,Z %r28,31,32,%r5 ;offset 0x988
803$0006001C
804 EXTRD,U %r4,31,32,%r31 ;offset 0x98c
805 CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990
806 MOVB,TR %r6,%r29,$D1 ;offset 0x994
807 STD %r29,-152(%r30) ;offset 0x998
808$0006000C
809 EXTRD,U %r3,31,32,%r25 ;offset 0x99c
810 COPY %r3,%r26 ;offset 0x9a0
811 EXTRD,U %r3,31,32,%r9 ;offset 0x9a4
812 EXTRD,U %r4,31,32,%r8 ;offset 0x9a8
813 .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28;
814 B,L BN_num_bits_word,%r2 ;offset 0x9ac
815 EXTRD,U %r5,31,32,%r7 ;offset 0x9b0
816 LDI 64,%r20 ;offset 0x9b4
817 DEPD %r7,31,32,%r5 ;offset 0x9b8
818 DEPD %r8,31,32,%r4 ;offset 0x9bc
819 DEPD %r9,31,32,%r3 ;offset 0x9c0
820 CMPB,= %r28,%r20,$00060012 ;offset 0x9c4
821 COPY %r28,%r24 ;offset 0x9c8
822 MTSARCM %r24 ;offset 0x9cc
823 DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0
824 CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4
825$00060012
826 SUBI 64,%r24,%r31 ;offset 0x9d8
827 CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc
828 SUB %r4,%r3,%r4 ;offset 0x9e0
829$00060016
830 CMPB,= %r31,%r0,$0006001A ;offset 0x9e4
831 COPY %r0,%r9 ;offset 0x9e8
832 MTSARCM %r31 ;offset 0x9ec
833 DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0
834 SUBI 64,%r31,%r26 ;offset 0x9f4
835 MTSAR %r26 ;offset 0x9f8
836 SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc
837 MTSARCM %r31 ;offset 0xa00
838 DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04
839$0006001A
840 DEPDI,Z -1,31,32,%r19 ;offset 0xa08
841 AND %r3,%r19,%r29 ;offset 0xa0c
842 EXTRD,U %r29,31,32,%r2 ;offset 0xa10
843 DEPDI,Z -1,63,32,%r6 ;offset 0xa14
844 MOVIB,TR 2,%r8,$0006001C ;offset 0xa18
845 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c
846$D2
847 ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20
848 ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24
849 ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28
850 ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28;
851 ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c
852 ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30
853 .CALL ;
854 B,L abort,%r2 ;offset 0xa34
855 NOP ;offset 0xa38
856 B $D3 ;offset 0xa3c
857 LDW -212(%r30),%r2 ;offset 0xa40
858$00060020
859 COPY %r4,%r26 ;offset 0xa44
860 EXTRD,U %r4,31,32,%r25 ;offset 0xa48
861 COPY %r2,%r24 ;offset 0xa4c
862 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
863 B,L $$div2U,%r31 ;offset 0xa50
864 EXTRD,U %r2,31,32,%r23 ;offset 0xa54
865 DEPD %r28,31,32,%r29 ;offset 0xa58
866$00060022
867 STD %r29,-152(%r30) ;offset 0xa5c
868$D1
869 AND %r5,%r19,%r24 ;offset 0xa60
870 EXTRD,U %r24,31,32,%r24 ;offset 0xa64
871 STW %r2,-160(%r30) ;offset 0xa68
872 STW %r7,-128(%r30) ;offset 0xa6c
873 FLDD -152(%r30),%fr4 ;offset 0xa70
874 FLDD -152(%r30),%fr7 ;offset 0xa74
875 FLDW -160(%r30),%fr8L ;offset 0xa78
876 FLDW -128(%r30),%fr5L ;offset 0xa7c
877 XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80
878 FSTD %fr10,-136(%r30) ;offset 0xa84
879 XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88
880 FSTD %fr22,-144(%r30) ;offset 0xa8c
881 XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90
882 XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94
883 FSTD %fr11,-112(%r30) ;offset 0xa98
884 FSTD %fr23,-120(%r30) ;offset 0xa9c
885 LDD -136(%r30),%r28 ;offset 0xaa0
886 DEPD,Z %r28,31,32,%r31 ;offset 0xaa4
887 LDD -144(%r30),%r20 ;offset 0xaa8
888 ADD,L %r20,%r31,%r31 ;offset 0xaac
889 LDD -112(%r30),%r22 ;offset 0xab0
890 DEPD,Z %r22,31,32,%r22 ;offset 0xab4
891 LDD -120(%r30),%r21 ;offset 0xab8
892 B $00060024 ;offset 0xabc
893 ADD,L %r21,%r22,%r23 ;offset 0xac0
894$D0
895 OR %r9,%r29,%r29 ;offset 0xac4
896$00060040
897 EXTRD,U %r29,31,32,%r28 ;offset 0xac8
898$00060002
899$L2
900 LDW -212(%r30),%r2 ;offset 0xacc
901$D3
902 LDW -168(%r30),%r9 ;offset 0xad0
903 LDD -176(%r30),%r8 ;offset 0xad4
904 EXTRD,U %r8,31,32,%r7 ;offset 0xad8
905 LDD -184(%r30),%r6 ;offset 0xadc
906 EXTRD,U %r6,31,32,%r5 ;offset 0xae0
907 LDW -188(%r30),%r4 ;offset 0xae4
908 BVE (%r2) ;offset 0xae8
909 .EXIT
910 LDW,MB -192(%r30),%r3 ;offset 0xaec
911 .PROCEND ;in=23,25;out=28,29;fpin=105,107;
912
913
914
915
916;----------------------------------------------------------------------------
917;
918; Registers to hold 64-bit values to manipulate. The "L" part
919; of the register corresponds to the upper 32-bits, while the "R"
920; part corresponds to the lower 32-bits
921;
922; Note, that when using b6 and b7, the code must save these before
923; using them because they are callee save registers
924;
925;
926; Floating point registers to use to save values that
927; are manipulated. These don't collide with ftemp1-6 and
928; are all caller save registers
929;
930a0 .reg %fr22
931a0L .reg %fr22L
932a0R .reg %fr22R
933
934a1 .reg %fr23
935a1L .reg %fr23L
936a1R .reg %fr23R
937
938a2 .reg %fr24
939a2L .reg %fr24L
940a2R .reg %fr24R
941
942a3 .reg %fr25
943a3L .reg %fr25L
944a3R .reg %fr25R
945
946a4 .reg %fr26
947a4L .reg %fr26L
948a4R .reg %fr26R
949
950a5 .reg %fr27
951a5L .reg %fr27L
952a5R .reg %fr27R
953
954a6 .reg %fr28
955a6L .reg %fr28L
956a6R .reg %fr28R
957
958a7 .reg %fr29
959a7L .reg %fr29L
960a7R .reg %fr29R
961
962b0 .reg %fr30
963b0L .reg %fr30L
964b0R .reg %fr30R
965
966b1 .reg %fr31
967b1L .reg %fr31L
968b1R .reg %fr31R
969
970;
971; Temporary floating point variables, these are all caller save
972; registers
973;
974ftemp1 .reg %fr4
975ftemp2 .reg %fr5
976ftemp3 .reg %fr6
977ftemp4 .reg %fr7
978
979;
980; The B set of registers when used.
981;
982
983b2 .reg %fr8
984b2L .reg %fr8L
985b2R .reg %fr8R
986
987b3 .reg %fr9
988b3L .reg %fr9L
989b3R .reg %fr9R
990
991b4 .reg %fr10
992b4L .reg %fr10L
993b4R .reg %fr10R
994
995b5 .reg %fr11
996b5L .reg %fr11L
997b5R .reg %fr11R
998
999b6 .reg %fr12
1000b6L .reg %fr12L
1001b6R .reg %fr12R
1002
1003b7 .reg %fr13
1004b7L .reg %fr13L
1005b7R .reg %fr13R
1006
1007c1 .reg %r21 ; only reg
1008temp1 .reg %r20 ; only reg
1009temp2 .reg %r19 ; only reg
1010temp3 .reg %r31 ; only reg
1011
1012m1 .reg %r28
1013c2 .reg %r23
1014high_one .reg %r1
1015ht .reg %r6
1016lt .reg %r5
1017m .reg %r4
1018c3 .reg %r3
1019
1020SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1021 XMPYU A0L,A0R,ftemp1 ; m
1022 FSTD ftemp1,-24(%sp) ; store m
1023
1024 XMPYU A0R,A0R,ftemp2 ; lt
1025 FSTD ftemp2,-16(%sp) ; store lt
1026
1027 XMPYU A0L,A0L,ftemp3 ; ht
1028 FSTD ftemp3,-8(%sp) ; store ht
1029
1030 LDD -24(%sp),m ; load m
1031 AND m,high_mask,temp2 ; m & Mask
1032 DEPD,Z m,30,31,temp3 ; m << 32+1
1033 LDD -16(%sp),lt ; lt
1034
1035 LDD -8(%sp),ht ; ht
1036 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1037 ADD temp3,lt,lt ; lt = lt+m
1038 ADD,L ht,temp1,ht ; ht += temp1
1039 ADD,DC ht,%r0,ht ; ht++
1040
1041 ADD C1,lt,C1 ; c1=c1+lt
1042 ADD,DC ht,%r0,ht ; ht++
1043
1044 ADD C2,ht,C2 ; c2=c2+ht
1045 ADD,DC C3,%r0,C3 ; c3++
1046.endm
1047
1048SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1049 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1050 FSTD ftemp1,-16(%sp) ;
1051 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1052 FSTD ftemp2,-8(%sp) ;
1053 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1054 FSTD ftemp3,-32(%sp)
1055 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1056 FSTD ftemp4,-24(%sp) ;
1057
1058 LDD -8(%sp),m ; r21 = m
1059 LDD -16(%sp),m1 ; r19 = m1
1060 ADD,L m,m1,m ; m+m1
1061
1062 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1063 LDD -24(%sp),ht ; r24 = ht
1064
1065 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1066 ADD,L ht,high_one,ht ; ht+=high_one
1067
1068 EXTRD,U m,31,32,temp1 ; m >> 32
1069 LDD -32(%sp),lt ; lt
1070 ADD,L ht,temp1,ht ; ht+= m>>32
1071 ADD lt,temp3,lt ; lt = lt+m1
1072 ADD,DC ht,%r0,ht ; ht++
1073
1074 ADD ht,ht,ht ; ht=ht+ht;
1075 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1076
1077 ADD lt,lt,lt ; lt=lt+lt;
1078 ADD,DC ht,%r0,ht ; add in carry (ht++)
1079
1080 ADD C1,lt,C1 ; c1=c1+lt
1081 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1082 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1083
1084 ADD C2,ht,C2 ; c2 = c2 + ht
1085 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1086.endm
1087
1088;
1089;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1090; arg0 = r_ptr
1091; arg1 = a_ptr
1092;
1093
1094bn_sqr_comba8
1095 .PROC
1096 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1097 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1098 .ENTRY
1099 .align 64
1100
1101 STD %r3,0(%sp) ; save r3
1102 STD %r4,8(%sp) ; save r4
1103 STD %r5,16(%sp) ; save r5
1104 STD %r6,24(%sp) ; save r6
1105
1106 ;
1107 ; Zero out carries
1108 ;
1109 COPY %r0,c1
1110 COPY %r0,c2
1111 COPY %r0,c3
1112
1113 LDO 128(%sp),%sp ; bump stack
1114 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1115 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1116
1117 ;
1118 ; Load up all of the values we are going to use
1119 ;
1120 FLDD 0(a_ptr),a0
1121 FLDD 8(a_ptr),a1
1122 FLDD 16(a_ptr),a2
1123 FLDD 24(a_ptr),a3
1124 FLDD 32(a_ptr),a4
1125 FLDD 40(a_ptr),a5
1126 FLDD 48(a_ptr),a6
1127 FLDD 56(a_ptr),a7
1128
1129 SQR_ADD_C a0L,a0R,c1,c2,c3
1130 STD c1,0(r_ptr) ; r[0] = c1;
1131 COPY %r0,c1
1132
1133 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1134 STD c2,8(r_ptr) ; r[1] = c2;
1135 COPY %r0,c2
1136
1137 SQR_ADD_C a1L,a1R,c3,c1,c2
1138 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1139 STD c3,16(r_ptr) ; r[2] = c3;
1140 COPY %r0,c3
1141
1142 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1143 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1144 STD c1,24(r_ptr) ; r[3] = c1;
1145 COPY %r0,c1
1146
1147 SQR_ADD_C a2L,a2R,c2,c3,c1
1148 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1149 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1150 STD c2,32(r_ptr) ; r[4] = c2;
1151 COPY %r0,c2
1152
1153 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1154 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1155 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1156 STD c3,40(r_ptr) ; r[5] = c3;
1157 COPY %r0,c3
1158
1159 SQR_ADD_C a3L,a3R,c1,c2,c3
1160 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1161 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1162 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1163 STD c1,48(r_ptr) ; r[6] = c1;
1164 COPY %r0,c1
1165
1166 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1167 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1168 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1169 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1170 STD c2,56(r_ptr) ; r[7] = c2;
1171 COPY %r0,c2
1172
1173 SQR_ADD_C a4L,a4R,c3,c1,c2
1174 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1175 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1176 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1177 STD c3,64(r_ptr) ; r[8] = c3;
1178 COPY %r0,c3
1179
1180 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1181 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1182 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1183 STD c1,72(r_ptr) ; r[9] = c1;
1184 COPY %r0,c1
1185
1186 SQR_ADD_C a5L,a5R,c2,c3,c1
1187 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1188 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1189 STD c2,80(r_ptr) ; r[10] = c2;
1190 COPY %r0,c2
1191
1192 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1193 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1194 STD c3,88(r_ptr) ; r[11] = c3;
1195 COPY %r0,c3
1196
1197 SQR_ADD_C a6L,a6R,c1,c2,c3
1198 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1199 STD c1,96(r_ptr) ; r[12] = c1;
1200 COPY %r0,c1
1201
1202 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1203 STD c2,104(r_ptr) ; r[13] = c2;
1204 COPY %r0,c2
1205
1206 SQR_ADD_C a7L,a7R,c3,c1,c2
1207 STD c3, 112(r_ptr) ; r[14] = c3
1208 STD c1, 120(r_ptr) ; r[15] = c1
1209
1210 .EXIT
1211 LDD -104(%sp),%r6 ; restore r6
1212 LDD -112(%sp),%r5 ; restore r5
1213 LDD -120(%sp),%r4 ; restore r4
1214 BVE (%rp)
1215 LDD,MB -128(%sp),%r3
1216
1217 .PROCEND
1218
1219;-----------------------------------------------------------------------------
1220;
1221;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1222; arg0 = r_ptr
1223; arg1 = a_ptr
1224;
1225
1226bn_sqr_comba4
1227 .proc
1228 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1229 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1230 .entry
1231 .align 64
1232 STD %r3,0(%sp) ; save r3
1233 STD %r4,8(%sp) ; save r4
1234 STD %r5,16(%sp) ; save r5
1235 STD %r6,24(%sp) ; save r6
1236
1237 ;
1238 ; Zero out carries
1239 ;
1240 COPY %r0,c1
1241 COPY %r0,c2
1242 COPY %r0,c3
1243
1244 LDO 128(%sp),%sp ; bump stack
1245 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1246 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1247
1248 ;
1249 ; Load up all of the values we are going to use
1250 ;
1251 FLDD 0(a_ptr),a0
1252 FLDD 8(a_ptr),a1
1253 FLDD 16(a_ptr),a2
1254 FLDD 24(a_ptr),a3
1255 FLDD 32(a_ptr),a4
1256 FLDD 40(a_ptr),a5
1257 FLDD 48(a_ptr),a6
1258 FLDD 56(a_ptr),a7
1259
1260 SQR_ADD_C a0L,a0R,c1,c2,c3
1261
1262 STD c1,0(r_ptr) ; r[0] = c1;
1263 COPY %r0,c1
1264
1265 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1266
1267 STD c2,8(r_ptr) ; r[1] = c2;
1268 COPY %r0,c2
1269
1270 SQR_ADD_C a1L,a1R,c3,c1,c2
1271 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1272
1273 STD c3,16(r_ptr) ; r[2] = c3;
1274 COPY %r0,c3
1275
1276 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1277 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1278
1279 STD c1,24(r_ptr) ; r[3] = c1;
1280 COPY %r0,c1
1281
1282 SQR_ADD_C a2L,a2R,c2,c3,c1
1283 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1284
1285 STD c2,32(r_ptr) ; r[4] = c2;
1286 COPY %r0,c2
1287
1288 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1289 STD c3,40(r_ptr) ; r[5] = c3;
1290 COPY %r0,c3
1291
1292 SQR_ADD_C a3L,a3R,c1,c2,c3
1293 STD c1,48(r_ptr) ; r[6] = c1;
1294 STD c2,56(r_ptr) ; r[7] = c2;
1295
1296 .EXIT
1297 LDD -104(%sp),%r6 ; restore r6
1298 LDD -112(%sp),%r5 ; restore r5
1299 LDD -120(%sp),%r4 ; restore r4
1300 BVE (%rp)
1301 LDD,MB -128(%sp),%r3
1302
1303 .PROCEND
1304
1305
1306;---------------------------------------------------------------------------
1307
1308MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1309 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1310 FSTD ftemp1,-16(%sp) ;
1311 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1312 FSTD ftemp2,-8(%sp) ;
1313 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1314 FSTD ftemp3,-32(%sp)
1315 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1316 FSTD ftemp4,-24(%sp) ;
1317
1318 LDD -8(%sp),m ; r21 = m
1319 LDD -16(%sp),m1 ; r19 = m1
1320 ADD,L m,m1,m ; m+m1
1321
1322 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1323 LDD -24(%sp),ht ; r24 = ht
1324
1325 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1326 ADD,L ht,high_one,ht ; ht+=high_one
1327
1328 EXTRD,U m,31,32,temp1 ; m >> 32
1329 LDD -32(%sp),lt ; lt
1330 ADD,L ht,temp1,ht ; ht+= m>>32
1331 ADD lt,temp3,lt ; lt = lt+m1
1332 ADD,DC ht,%r0,ht ; ht++
1333
1334 ADD C1,lt,C1 ; c1=c1+lt
1335 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1336
1337 ADD C2,ht,C2 ; c2 = c2 + ht
1338 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1339.endm
1340
1341
1342;
1343;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1344; arg0 = r_ptr
1345; arg1 = a_ptr
1346; arg2 = b_ptr
1347;
1348
1349bn_mul_comba8
1350 .proc
1351 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1352 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1353 .entry
1354 .align 64
1355
1356 STD %r3,0(%sp) ; save r3
1357 STD %r4,8(%sp) ; save r4
1358 STD %r5,16(%sp) ; save r5
1359 STD %r6,24(%sp) ; save r6
1360 FSTD %fr12,32(%sp) ; save r6
1361 FSTD %fr13,40(%sp) ; save r7
1362
1363 ;
1364 ; Zero out carries
1365 ;
1366 COPY %r0,c1
1367 COPY %r0,c2
1368 COPY %r0,c3
1369
1370 LDO 128(%sp),%sp ; bump stack
1371 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1372
1373 ;
1374 ; Load up all of the values we are going to use
1375 ;
1376 FLDD 0(a_ptr),a0
1377 FLDD 8(a_ptr),a1
1378 FLDD 16(a_ptr),a2
1379 FLDD 24(a_ptr),a3
1380 FLDD 32(a_ptr),a4
1381 FLDD 40(a_ptr),a5
1382 FLDD 48(a_ptr),a6
1383 FLDD 56(a_ptr),a7
1384
1385 FLDD 0(b_ptr),b0
1386 FLDD 8(b_ptr),b1
1387 FLDD 16(b_ptr),b2
1388 FLDD 24(b_ptr),b3
1389 FLDD 32(b_ptr),b4
1390 FLDD 40(b_ptr),b5
1391 FLDD 48(b_ptr),b6
1392 FLDD 56(b_ptr),b7
1393
1394 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1395 STD c1,0(r_ptr)
1396 COPY %r0,c1
1397
1398 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1399 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1400 STD c2,8(r_ptr)
1401 COPY %r0,c2
1402
1403 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1404 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1405 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1406 STD c3,16(r_ptr)
1407 COPY %r0,c3
1408
1409 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1410 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1411 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1412 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1413 STD c1,24(r_ptr)
1414 COPY %r0,c1
1415
1416 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1417 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1418 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1419 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1420 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1421 STD c2,32(r_ptr)
1422 COPY %r0,c2
1423
1424 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1425 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1426 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1427 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1428 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1429 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1430 STD c3,40(r_ptr)
1431 COPY %r0,c3
1432
1433 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1434 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1435 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1436 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1437 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1438 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1439 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1440 STD c1,48(r_ptr)
1441 COPY %r0,c1
1442
1443 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1444 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1445 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1446 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1447 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1448 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1449 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1450 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1451 STD c2,56(r_ptr)
1452 COPY %r0,c2
1453
1454 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1455 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1456 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1457 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1458 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1459 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1460 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1461 STD c3,64(r_ptr)
1462 COPY %r0,c3
1463
1464 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1465 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1466 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1467 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1468 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1469 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1470 STD c1,72(r_ptr)
1471 COPY %r0,c1
1472
1473 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1474 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1475 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1476 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1477 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1478 STD c2,80(r_ptr)
1479 COPY %r0,c2
1480
1481 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1482 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1483 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1484 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1485 STD c3,88(r_ptr)
1486 COPY %r0,c3
1487
1488 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1489 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1490 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1491 STD c1,96(r_ptr)
1492 COPY %r0,c1
1493
1494 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1495 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1496 STD c2,104(r_ptr)
1497 COPY %r0,c2
1498
1499 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1500 STD c3,112(r_ptr)
1501 STD c1,120(r_ptr)
1502
1503 .EXIT
1504 FLDD -88(%sp),%fr13
1505 FLDD -96(%sp),%fr12
1506 LDD -104(%sp),%r6 ; restore r6
1507 LDD -112(%sp),%r5 ; restore r5
1508 LDD -120(%sp),%r4 ; restore r4
1509 BVE (%rp)
1510 LDD,MB -128(%sp),%r3
1511
1512 .PROCEND
1513
1514;-----------------------------------------------------------------------------
1515;
1516;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1517; arg0 = r_ptr
1518; arg1 = a_ptr
1519; arg2 = b_ptr
1520;
1521
1522bn_mul_comba4
1523 .proc
1524 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1525 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1526 .entry
1527 .align 64
1528
1529 STD %r3,0(%sp) ; save r3
1530 STD %r4,8(%sp) ; save r4
1531 STD %r5,16(%sp) ; save r5
1532 STD %r6,24(%sp) ; save r6
1533 FSTD %fr12,32(%sp) ; save r6
1534 FSTD %fr13,40(%sp) ; save r7
1535
1536 ;
1537 ; Zero out carries
1538 ;
1539 COPY %r0,c1
1540 COPY %r0,c2
1541 COPY %r0,c3
1542
1543 LDO 128(%sp),%sp ; bump stack
1544 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1545
1546 ;
1547 ; Load up all of the values we are going to use
1548 ;
1549 FLDD 0(a_ptr),a0
1550 FLDD 8(a_ptr),a1
1551 FLDD 16(a_ptr),a2
1552 FLDD 24(a_ptr),a3
1553
1554 FLDD 0(b_ptr),b0
1555 FLDD 8(b_ptr),b1
1556 FLDD 16(b_ptr),b2
1557 FLDD 24(b_ptr),b3
1558
1559 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1560 STD c1,0(r_ptr)
1561 COPY %r0,c1
1562
1563 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1564 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1565 STD c2,8(r_ptr)
1566 COPY %r0,c2
1567
1568 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1569 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1570 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1571 STD c3,16(r_ptr)
1572 COPY %r0,c3
1573
1574 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1575 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1576 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1577 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1578 STD c1,24(r_ptr)
1579 COPY %r0,c1
1580
1581 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1582 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1583 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1584 STD c2,32(r_ptr)
1585 COPY %r0,c2
1586
1587 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1588 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1589 STD c3,40(r_ptr)
1590 COPY %r0,c3
1591
1592 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1593 STD c1,48(r_ptr)
1594 STD c2,56(r_ptr)
1595
1596 .EXIT
1597 FLDD -88(%sp),%fr13
1598 FLDD -96(%sp),%fr12
1599 LDD -104(%sp),%r6 ; restore r6
1600 LDD -112(%sp),%r5 ; restore r5
1601 LDD -120(%sp),%r4 ; restore r4
1602 BVE (%rp)
1603 LDD,MB -128(%sp),%r3
1604
1605 .PROCEND
1606
1607
1608;--- not PIC .SPACE $TEXT$
1609;--- not PIC .SUBSPA $CODE$
1610;--- not PIC .SPACE $PRIVATE$,SORT=16
1611;--- not PIC .IMPORT $global$,DATA
1612;--- not PIC .SPACE $TEXT$
1613;--- not PIC .SUBSPA $CODE$
1614;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c
1615;--- not PIC C$7
1616;--- not PIC .ALIGN 8
1617;--- not PIC .STRINGZ "Division would overflow (%d)\n"
1618 .END
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
deleted file mode 100644
index a99545754d..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2W.s
+++ /dev/null
@@ -1,1605 +0,0 @@
1;
2; PA-RISC 64-bit implementation of bn_asm code
3;
4; This code is approximately 2x faster than the C version
5; for RSA/DSA.
6;
7; See http://devresource.hp.com/ for more details on the PA-RISC
8; architecture. Also see the book "PA-RISC 2.0 Architecture"
9; by Gerry Kane for information on the instruction set architecture.
10;
11; Code written by Chris Ruemmler (with some help from the HP C
12; compiler).
13;
14; The code compiles with HP's assembler
15;
16
17 .level 2.0W
18 .space $TEXT$
19 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
20
21;
22; Global Register definitions used for the routines.
23;
24; Some information about HP's runtime architecture for 64-bits.
25;
26; "Caller save" means the calling function must save the register
27; if it wants the register to be preserved.
28; "Callee save" means if a function uses the register, it must save
29; the value before using it.
30;
31; For the floating point registers
32;
33; "caller save" registers: fr4-fr11, fr22-fr31
34; "callee save" registers: fr12-fr21
35; "special" registers: fr0-fr3 (status and exception registers)
36;
37; For the integer registers
38; value zero : r0
39; "caller save" registers: r1,r19-r26
40; "callee save" registers: r3-r18
41; return register : r2 (rp)
42; return values ; r28 (ret0,ret1)
43; Stack pointer ; r30 (sp)
44; global data pointer ; r27 (dp)
45; argument pointer ; r29 (ap)
46; millicode return ptr ; r31 (also a caller save register)
47
48
49;
50; Arguments to the routines
51;
52r_ptr .reg %r26
53a_ptr .reg %r25
54b_ptr .reg %r24
55num .reg %r24
56w .reg %r23
57n .reg %r23
58
59
60;
61; Globals used in some routines
62;
63
64top_overflow .reg %r29
65high_mask .reg %r22 ; value 0xffffffff80000000L
66
67
68;------------------------------------------------------------------------------
69;
70; bn_mul_add_words
71;
72;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
73; int num, BN_ULONG w)
74;
75; arg0 = r_ptr
76; arg1 = a_ptr
77; arg2 = num
78; arg3 = w
79;
80; Local register definitions
81;
82
83fm1 .reg %fr22
84fm .reg %fr23
85ht_temp .reg %fr24
86ht_temp_1 .reg %fr25
87lt_temp .reg %fr26
88lt_temp_1 .reg %fr27
89fm1_1 .reg %fr28
90fm_1 .reg %fr29
91
92fw_h .reg %fr7L
93fw_l .reg %fr7R
94fw .reg %fr7
95
96fht_0 .reg %fr8L
97flt_0 .reg %fr8R
98t_float_0 .reg %fr8
99
100fht_1 .reg %fr9L
101flt_1 .reg %fr9R
102t_float_1 .reg %fr9
103
104tmp_0 .reg %r31
105tmp_1 .reg %r21
106m_0 .reg %r20
107m_1 .reg %r19
108ht_0 .reg %r1
109ht_1 .reg %r3
110lt_0 .reg %r4
111lt_1 .reg %r5
112m1_0 .reg %r6
113m1_1 .reg %r7
114rp_val .reg %r8
115rp_val_1 .reg %r9
116
117bn_mul_add_words
118 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
119 .proc
120 .callinfo frame=128
121 .entry
122 .align 64
123
124 STD %r3,0(%sp) ; save r3
125 STD %r4,8(%sp) ; save r4
126 NOP ; Needed to make the loop 16-byte aligned
127 NOP ; Needed to make the loop 16-byte aligned
128
129 STD %r5,16(%sp) ; save r5
130 STD %r6,24(%sp) ; save r6
131 STD %r7,32(%sp) ; save r7
132 STD %r8,40(%sp) ; save r8
133
134 STD %r9,48(%sp) ; save r9
135 COPY %r0,%ret0 ; return 0 by default
136 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
137 STD w,56(%sp) ; store w on stack
138
139 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
140 LDO 128(%sp),%sp ; bump stack
141
142 ;
143 ; The loop is unrolled twice, so if there is only 1 number
144 ; then go straight to the cleanup code.
145 ;
146 CMPIB,= 1,num,bn_mul_add_words_single_top
147 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
148
149 ;
150 ; This loop is unrolled 2 times (64-byte aligned as well)
151 ;
152 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
153 ; two 32-bit mutiplies can be issued per cycle.
154 ;
155bn_mul_add_words_unroll2
156
157 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
158 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
159 LDD 0(r_ptr),rp_val ; rp[0]
160 LDD 8(r_ptr),rp_val_1 ; rp[1]
161
162 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
163 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
164 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
165 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
166
167 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
168 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
169 FSTD fm,-8(%sp) ; -8(sp) = m[0]
170 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
171
172 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
173 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
174 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
175 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
176
177 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
178 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
179 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
180 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
181
182 LDD -8(%sp),m_0 ; m[0]
183 LDD -40(%sp),m_1 ; m[1]
184 LDD -16(%sp),m1_0 ; m1[0]
185 LDD -48(%sp),m1_1 ; m1[1]
186
187 LDD -24(%sp),ht_0 ; ht[0]
188 LDD -56(%sp),ht_1 ; ht[1]
189 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
190 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
191
192 LDD -32(%sp),lt_0
193 LDD -64(%sp),lt_1
194 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
195 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
196
197 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
198 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
199 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
200 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
201
202 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
203 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
204 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
205 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
206
207 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
208 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
209 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
210 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
211
212 ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c;
213 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
214 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
215 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
216
217 LDO -2(num),num ; num = num - 2;
218 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
219 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
220 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
221
222 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
223 ADD,DC ht_1,%r0,%ret0 ; ht[1]++
224 LDO 16(a_ptr),a_ptr ; a_ptr += 2
225
226 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
227 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
228 LDO 16(r_ptr),r_ptr ; r_ptr += 2
229
230 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
231
232 ;
233 ; Top of loop aligned on 64-byte boundary
234 ;
235bn_mul_add_words_single_top
236 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
237 LDD 0(r_ptr),rp_val ; rp[0]
238 LDO 8(a_ptr),a_ptr ; a_ptr++
239 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
240 FSTD fm1,-16(%sp) ; -16(sp) = m1
241 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
242 FSTD fm,-8(%sp) ; -8(sp) = m
243 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
244 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
245 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
246 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
247
248 LDD -8(%sp),m_0
249 LDD -16(%sp),m1_0 ; m1 = temp1
250 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
251 LDD -24(%sp),ht_0
252 LDD -32(%sp),lt_0
253
254 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
255 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
256
257 EXTRD,U tmp_0,31,32,m_0 ; m>>32
258 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
259
260 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
261 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
262 ADD,DC ht_0,%r0,ht_0 ; ht++
263 ADD %ret0,tmp_0,lt_0 ; lt = lt + c;
264 ADD,DC ht_0,%r0,ht_0 ; ht++
265 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
266 ADD,DC ht_0,%r0,%ret0 ; ht++
267 STD lt_0,0(r_ptr) ; rp[0] = lt
268
269bn_mul_add_words_exit
270 .EXIT
271 LDD -80(%sp),%r9 ; restore r9
272 LDD -88(%sp),%r8 ; restore r8
273 LDD -96(%sp),%r7 ; restore r7
274 LDD -104(%sp),%r6 ; restore r6
275 LDD -112(%sp),%r5 ; restore r5
276 LDD -120(%sp),%r4 ; restore r4
277 BVE (%rp)
278 LDD,MB -128(%sp),%r3 ; restore r3
279 .PROCEND ;in=23,24,25,26,29;out=28;
280
281;----------------------------------------------------------------------------
282;
283;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
284;
285; arg0 = rp
286; arg1 = ap
287; arg2 = num
288; arg3 = w
289
290bn_mul_words
291 .proc
292 .callinfo frame=128
293 .entry
294 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
295 .align 64
296
297 STD %r3,0(%sp) ; save r3
298 STD %r4,8(%sp) ; save r4
299 STD %r5,16(%sp) ; save r5
300 STD %r6,24(%sp) ; save r6
301
302 STD %r7,32(%sp) ; save r7
303 COPY %r0,%ret0 ; return 0 by default
304 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
305 STD w,56(%sp) ; w on stack
306
307 CMPIB,>= 0,num,bn_mul_words_exit
308 LDO 128(%sp),%sp ; bump stack
309
310 ;
311 ; See if only 1 word to do, thus just do cleanup
312 ;
313 CMPIB,= 1,num,bn_mul_words_single_top
314 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
315
316 ;
317 ; This loop is unrolled 2 times (64-byte aligned as well)
318 ;
319 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
320 ; two 32-bit mutiplies can be issued per cycle.
321 ;
322bn_mul_words_unroll2
323
324 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
325 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
326 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
327 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
328
329 FSTD fm1,-16(%sp) ; -16(sp) = m1
330 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
331 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
332 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
333
334 FSTD fm,-8(%sp) ; -8(sp) = m
335 FSTD fm_1,-40(%sp) ; -40(sp) = m
336 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
337 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
338
339 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
340 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
341 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
342 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
343
344 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
345 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
346 LDD -8(%sp),m_0
347 LDD -40(%sp),m_1
348
349 LDD -16(%sp),m1_0
350 LDD -48(%sp),m1_1
351 LDD -24(%sp),ht_0
352 LDD -56(%sp),ht_1
353
354 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
355 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
356 LDD -32(%sp),lt_0
357 LDD -64(%sp),lt_1
358
359 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
360 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
361 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
362 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
363
364 EXTRD,U tmp_0,31,32,m_0 ; m>>32
365 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
366 EXTRD,U tmp_1,31,32,m_1 ; m>>32
367 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
368
369 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
370 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
371 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
372 ADD,DC ht_0,%r0,ht_0 ; ht++
373
374 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
375 ADD,DC ht_1,%r0,ht_1 ; ht++
376 ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0);
377 ADD,DC ht_0,%r0,ht_0 ; ht++
378
379 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
380 ADD,DC ht_1,%r0,ht_1 ; ht++
381 STD lt_0,0(r_ptr) ; rp[0] = lt
382 STD lt_1,8(r_ptr) ; rp[1] = lt
383
384 COPY ht_1,%ret0 ; carry = ht
385 LDO -2(num),num ; num = num - 2;
386 LDO 16(a_ptr),a_ptr ; ap += 2
387 CMPIB,<= 2,num,bn_mul_words_unroll2
388 LDO 16(r_ptr),r_ptr ; rp++
389
390 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
391
392 ;
393 ; Top of loop aligned on 64-byte boundary
394 ;
395bn_mul_words_single_top
396 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
397
398 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
399 FSTD fm1,-16(%sp) ; -16(sp) = m1
400 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
401 FSTD fm,-8(%sp) ; -8(sp) = m
402 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
403 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
404 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
405 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
406
407 LDD -8(%sp),m_0
408 LDD -16(%sp),m1_0
409 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
410 LDD -24(%sp),ht_0
411 LDD -32(%sp),lt_0
412
413 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
414 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
415
416 EXTRD,U tmp_0,31,32,m_0 ; m>>32
417 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
418
419 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
420 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
421 ADD,DC ht_0,%r0,ht_0 ; ht++
422
423 ADD %ret0,lt_0,lt_0 ; lt = lt + c;
424 ADD,DC ht_0,%r0,ht_0 ; ht++
425
426 COPY ht_0,%ret0 ; copy carry
427 STD lt_0,0(r_ptr) ; rp[0] = lt
428
429bn_mul_words_exit
430 .EXIT
431 LDD -96(%sp),%r7 ; restore r7
432 LDD -104(%sp),%r6 ; restore r6
433 LDD -112(%sp),%r5 ; restore r5
434 LDD -120(%sp),%r4 ; restore r4
435 BVE (%rp)
436 LDD,MB -128(%sp),%r3 ; restore r3
437 .PROCEND ;in=23,24,25,26,29;out=28;
438
439;----------------------------------------------------------------------------
440;
441;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
442;
443; arg0 = rp
444; arg1 = ap
445; arg2 = num
446;
447
448bn_sqr_words
449 .proc
450 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
451 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
452 .entry
453 .align 64
454
455 STD %r3,0(%sp) ; save r3
456 STD %r4,8(%sp) ; save r4
457 NOP
458 STD %r5,16(%sp) ; save r5
459
460 CMPIB,>= 0,num,bn_sqr_words_exit
461 LDO 128(%sp),%sp ; bump stack
462
463 ;
464 ; If only 1, the goto straight to cleanup
465 ;
466 CMPIB,= 1,num,bn_sqr_words_single_top
467 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
468
469 ;
470 ; This loop is unrolled 2 times (64-byte aligned as well)
471 ;
472
473bn_sqr_words_unroll2
474 FLDD 0(a_ptr),t_float_0 ; a[0]
475 FLDD 8(a_ptr),t_float_1 ; a[1]
476 XMPYU fht_0,flt_0,fm ; m[0]
477 XMPYU fht_1,flt_1,fm_1 ; m[1]
478
479 FSTD fm,-24(%sp) ; store m[0]
480 FSTD fm_1,-56(%sp) ; store m[1]
481 XMPYU flt_0,flt_0,lt_temp ; lt[0]
482 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
483
484 FSTD lt_temp,-16(%sp) ; store lt[0]
485 FSTD lt_temp_1,-48(%sp) ; store lt[1]
486 XMPYU fht_0,fht_0,ht_temp ; ht[0]
487 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
488
489 FSTD ht_temp,-8(%sp) ; store ht[0]
490 FSTD ht_temp_1,-40(%sp) ; store ht[1]
491 LDD -24(%sp),m_0
492 LDD -56(%sp),m_1
493
494 AND m_0,high_mask,tmp_0 ; m[0] & Mask
495 AND m_1,high_mask,tmp_1 ; m[1] & Mask
496 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
497 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
498
499 LDD -16(%sp),lt_0
500 LDD -48(%sp),lt_1
501 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
502 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
503
504 LDD -8(%sp),ht_0
505 LDD -40(%sp),ht_1
506 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
507 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
508
509 ADD lt_0,m_0,lt_0 ; lt = lt+m
510 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
511 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
512 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
513
514 ADD lt_1,m_1,lt_1 ; lt = lt+m
515 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
516 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
517 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
518
519 LDO -2(num),num ; num = num - 2;
520 LDO 16(a_ptr),a_ptr ; ap += 2
521 CMPIB,<= 2,num,bn_sqr_words_unroll2
522 LDO 32(r_ptr),r_ptr ; rp += 4
523
524 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
525
526 ;
527 ; Top of loop aligned on 64-byte boundary
528 ;
529bn_sqr_words_single_top
530 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
531
532 XMPYU fht_0,flt_0,fm ; m
533 FSTD fm,-24(%sp) ; store m
534
535 XMPYU flt_0,flt_0,lt_temp ; lt
536 FSTD lt_temp,-16(%sp) ; store lt
537
538 XMPYU fht_0,fht_0,ht_temp ; ht
539 FSTD ht_temp,-8(%sp) ; store ht
540
541 LDD -24(%sp),m_0 ; load m
542 AND m_0,high_mask,tmp_0 ; m & Mask
543 DEPD,Z m_0,30,31,m_0 ; m << 32+1
544 LDD -16(%sp),lt_0 ; lt
545
546 LDD -8(%sp),ht_0 ; ht
547 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
548 ADD m_0,lt_0,lt_0 ; lt = lt+m
549 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
550 ADD,DC ht_0,%r0,ht_0 ; ht++
551
552 STD lt_0,0(r_ptr) ; rp[0] = lt
553 STD ht_0,8(r_ptr) ; rp[1] = ht
554
555bn_sqr_words_exit
556 .EXIT
557 LDD -112(%sp),%r5 ; restore r5
558 LDD -120(%sp),%r4 ; restore r4
559 BVE (%rp)
560 LDD,MB -128(%sp),%r3
561 .PROCEND ;in=23,24,25,26,29;out=28;
562
563
564;----------------------------------------------------------------------------
565;
566;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
567;
568; arg0 = rp
569; arg1 = ap
570; arg2 = bp
571; arg3 = n
572
573t .reg %r22
574b .reg %r21
575l .reg %r20
576
577bn_add_words
578 .proc
579 .entry
580 .callinfo
581 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
582 .align 64
583
584 CMPIB,>= 0,n,bn_add_words_exit
585 COPY %r0,%ret0 ; return 0 by default
586
587 ;
588 ; If 2 or more numbers do the loop
589 ;
590 CMPIB,= 1,n,bn_add_words_single_top
591 NOP
592
593 ;
594 ; This loop is unrolled 2 times (64-byte aligned as well)
595 ;
596bn_add_words_unroll2
597 LDD 0(a_ptr),t
598 LDD 0(b_ptr),b
599 ADD t,%ret0,t ; t = t+c;
600 ADD,DC %r0,%r0,%ret0 ; set c to carry
601 ADD t,b,l ; l = t + b[0]
602 ADD,DC %ret0,%r0,%ret0 ; c+= carry
603 STD l,0(r_ptr)
604
605 LDD 8(a_ptr),t
606 LDD 8(b_ptr),b
607 ADD t,%ret0,t ; t = t+c;
608 ADD,DC %r0,%r0,%ret0 ; set c to carry
609 ADD t,b,l ; l = t + b[0]
610 ADD,DC %ret0,%r0,%ret0 ; c+= carry
611 STD l,8(r_ptr)
612
613 LDO -2(n),n
614 LDO 16(a_ptr),a_ptr
615 LDO 16(b_ptr),b_ptr
616
617 CMPIB,<= 2,n,bn_add_words_unroll2
618 LDO 16(r_ptr),r_ptr
619
620 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
621
622bn_add_words_single_top
623 LDD 0(a_ptr),t
624 LDD 0(b_ptr),b
625
626 ADD t,%ret0,t ; t = t+c;
627 ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??)
628 ADD t,b,l ; l = t + b[0]
629 ADD,DC %ret0,%r0,%ret0 ; c+= carry
630 STD l,0(r_ptr)
631
632bn_add_words_exit
633 .EXIT
634 BVE (%rp)
635 NOP
636 .PROCEND ;in=23,24,25,26,29;out=28;
637
638;----------------------------------------------------------------------------
639;
640;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
641;
642; arg0 = rp
643; arg1 = ap
644; arg2 = bp
645; arg3 = n
646
647t1 .reg %r22
648t2 .reg %r21
649sub_tmp1 .reg %r20
650sub_tmp2 .reg %r19
651
652
653bn_sub_words
654 .proc
655 .callinfo
656 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
657 .entry
658 .align 64
659
660 CMPIB,>= 0,n,bn_sub_words_exit
661 COPY %r0,%ret0 ; return 0 by default
662
663 ;
664 ; If 2 or more numbers do the loop
665 ;
666 CMPIB,= 1,n,bn_sub_words_single_top
667 NOP
668
669 ;
670 ; This loop is unrolled 2 times (64-byte aligned as well)
671 ;
672bn_sub_words_unroll2
673 LDD 0(a_ptr),t1
674 LDD 0(b_ptr),t2
675 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
676 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
677
678 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
679 LDO 1(%r0),sub_tmp2
680
681 CMPCLR,*= t1,t2,%r0
682 COPY sub_tmp2,%ret0
683 STD sub_tmp1,0(r_ptr)
684
685 LDD 8(a_ptr),t1
686 LDD 8(b_ptr),t2
687 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
688 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret0
694 STD sub_tmp1,8(r_ptr)
695
696 LDO -2(n),n
697 LDO 16(a_ptr),a_ptr
698 LDO 16(b_ptr),b_ptr
699
700 CMPIB,<= 2,n,bn_sub_words_unroll2
701 LDO 16(r_ptr),r_ptr
702
703 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
704
705bn_sub_words_single_top
706 LDD 0(a_ptr),t1
707 LDD 0(b_ptr),t2
708 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
709 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
710 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
711 LDO 1(%r0),sub_tmp2
712
713 CMPCLR,*= t1,t2,%r0
714 COPY sub_tmp2,%ret0
715
716 STD sub_tmp1,0(r_ptr)
717
718bn_sub_words_exit
719 .EXIT
720 BVE (%rp)
721 NOP
722 .PROCEND ;in=23,24,25,26,29;out=28;
723
724;------------------------------------------------------------------------------
725;
726; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
727;
728; arg0 = h
729; arg1 = l
730; arg2 = d
731;
732; This is mainly just modified assembly from the compiler, thus the
733; lack of variable names.
734;
735;------------------------------------------------------------------------------
736bn_div_words
737 .proc
738 .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
739 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
740 .IMPORT BN_num_bits_word,CODE,NO_RELOCATION
741 .IMPORT __iob,DATA
742 .IMPORT fprintf,CODE,NO_RELOCATION
743 .IMPORT abort,CODE,NO_RELOCATION
744 .IMPORT $$div2U,MILLICODE
745 .entry
746 STD %r2,-16(%r30)
747 STD,MA %r3,352(%r30)
748 STD %r4,-344(%r30)
749 STD %r5,-336(%r30)
750 STD %r6,-328(%r30)
751 STD %r7,-320(%r30)
752 STD %r8,-312(%r30)
753 STD %r9,-304(%r30)
754 STD %r10,-296(%r30)
755
756 STD %r27,-288(%r30) ; save gp
757
758 COPY %r24,%r3 ; save d
759 COPY %r26,%r4 ; save h (high 64-bits)
760 LDO -1(%r0),%ret0 ; return -1 by default
761
762 CMPB,*= %r0,%arg2,$D3 ; if (d == 0)
763 COPY %r25,%r5 ; save l (low 64-bits)
764
765 LDO -48(%r30),%r29 ; create ap
766 .CALL ;in=26,29;out=28;
767 B,L BN_num_bits_word,%r2
768 COPY %r3,%r26
769 LDD -288(%r30),%r27 ; restore gp
770 LDI 64,%r21
771
772 CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward)
773 COPY %ret0,%r24 ; i
774 MTSARCM %r24
775 DEPDI,Z -1,%sar,1,%r29
776 CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)
777
778$00000012
779 SUBI 64,%r24,%r31 ; i = 64 - i;
780 CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d)
781 SUB %r4,%r3,%r4 ; h -= d
782 CMPB,= %r31,%r0,$0000001A ; if (i)
783 COPY %r0,%r10 ; ret = 0
784 MTSARCM %r31 ; i to shift
785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786 SUBI 64,%r31,%r19 ; 64 - i; redundent
787 MTSAR %r19 ; (64 -i) to shift
788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789 MTSARCM %r31 ; i to shift
790 DEPD,Z %r5,%sar,64,%r5 ; l <<= i;
791
792$0000001A
793 DEPDI,Z -1,31,32,%r19
794 EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32
795 EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff
796 LDO 2(%r0),%r9
797 STD %r3,-280(%r30) ; "d" to stack
798
799$0000001C
800 DEPDI,Z -1,63,32,%r29 ;
801 EXTRD,U %r4,31,32,%r31 ; h >> 32
802 CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div
803 COPY %r4,%r26
804 EXTRD,U %r4,31,32,%r25
805 COPY %r6,%r24
806 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
807 B,L $$div2U,%r2
808 EXTRD,U %r6,31,32,%r23
809 DEPD %r28,31,32,%r29
810$D2
811 STD %r29,-272(%r30) ; q
812 AND %r5,%r19,%r24 ; t & 0xffffffff00000000;
813 EXTRD,U %r24,31,32,%r24 ; ???
814 FLDD -272(%r30),%fr7 ; q
815 FLDD -280(%r30),%fr8 ; d
816 XMPYU %fr8L,%fr7L,%fr10
817 FSTD %fr10,-256(%r30)
818 XMPYU %fr8L,%fr7R,%fr22
819 FSTD %fr22,-264(%r30)
820 XMPYU %fr8R,%fr7L,%fr11
821 XMPYU %fr8R,%fr7R,%fr23
822 FSTD %fr11,-232(%r30)
823 FSTD %fr23,-240(%r30)
824 LDD -256(%r30),%r28
825 DEPD,Z %r28,31,32,%r2
826 LDD -264(%r30),%r20
827 ADD,L %r20,%r2,%r31
828 LDD -232(%r30),%r22
829 DEPD,Z %r22,31,32,%r22
830 LDD -240(%r30),%r21
831 B $00000024 ; enter loop
832 ADD,L %r21,%r22,%r23
833
834$0000002A
835 LDO -1(%r29),%r29
836 SUB %r23,%r8,%r23
837$00000024
838 SUB %r4,%r31,%r25
839 AND %r25,%r19,%r26
840 CMPB,*<>,N %r0,%r26,$00000046 ; (forward)
841 DEPD,Z %r25,31,32,%r20
842 OR %r20,%r24,%r21
843 CMPB,*<<,N %r21,%r23,$0000002A ;(backward)
844 SUB %r31,%r6,%r31
845;-------------Break path---------------------
846
847$00000046
848 DEPD,Z %r23,31,32,%r25 ;tl
849 EXTRD,U %r23,31,32,%r26 ;t
850 AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L
851 ADD,L %r31,%r26,%r31 ;th += t;
852 CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl)
853 LDO 1(%r31),%r31 ; th++;
854 CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward)
855 LDO -1(%r29),%r29 ;q--;
856 ADD,L %r4,%r3,%r4 ;h += d;
857$00000036
858 ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward)
859 SUB %r5,%r24,%r28 ; l -= tl;
860 SUB %r4,%r31,%r24 ; h -= th;
861 SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32));
862 DEPD,Z %r29,31,32,%r10 ; ret = q<<32
863 b $0000001C
864 DEPD,Z %r28,31,32,%r5 ; l = l << 32
865
866$D1
867 OR %r10,%r29,%r28 ; ret |= q
868$D3
869 LDD -368(%r30),%r2
870$D0
871 LDD -296(%r30),%r10
872 LDD -304(%r30),%r9
873 LDD -312(%r30),%r8
874 LDD -320(%r30),%r7
875 LDD -328(%r30),%r6
876 LDD -336(%r30),%r5
877 LDD -344(%r30),%r4
878 BVE (%r2)
879 .EXIT
880 LDD,MB -352(%r30),%r3
881
882bn_div_err_case
883 MFIA %r6
884 ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1
885 LDO R'bn_div_words-bn_div_err_case(%r1),%r6
886 ADDIL LT'__iob,%r27,%r1
887 LDD RT'__iob(%r1),%r26
888 ADDIL L'C$4-bn_div_words,%r6,%r1
889 LDO R'C$4-bn_div_words(%r1),%r25
890 LDO 64(%r26),%r26
891 .CALL ;in=24,25,26,29;out=28;
892 B,L fprintf,%r2
893 LDO -48(%r30),%r29
894 LDD -288(%r30),%r27
895 .CALL ;in=29;
896 B,L abort,%r2
897 LDO -48(%r30),%r29
898 LDD -288(%r30),%r27
899 B $D0
900 LDD -368(%r30),%r2
901 .PROCEND ;in=24,25,26,29;out=28;
902
903;----------------------------------------------------------------------------
904;
905; Registers to hold 64-bit values to manipulate. The "L" part
906; of the register corresponds to the upper 32-bits, while the "R"
907; part corresponds to the lower 32-bits
908;
909; Note, that when using b6 and b7, the code must save these before
910; using them because they are callee save registers
911;
912;
913; Floating point registers to use to save values that
914; are manipulated. These don't collide with ftemp1-6 and
915; are all caller save registers
916;
917a0 .reg %fr22
918a0L .reg %fr22L
919a0R .reg %fr22R
920
921a1 .reg %fr23
922a1L .reg %fr23L
923a1R .reg %fr23R
924
925a2 .reg %fr24
926a2L .reg %fr24L
927a2R .reg %fr24R
928
929a3 .reg %fr25
930a3L .reg %fr25L
931a3R .reg %fr25R
932
933a4 .reg %fr26
934a4L .reg %fr26L
935a4R .reg %fr26R
936
937a5 .reg %fr27
938a5L .reg %fr27L
939a5R .reg %fr27R
940
941a6 .reg %fr28
942a6L .reg %fr28L
943a6R .reg %fr28R
944
945a7 .reg %fr29
946a7L .reg %fr29L
947a7R .reg %fr29R
948
949b0 .reg %fr30
950b0L .reg %fr30L
951b0R .reg %fr30R
952
953b1 .reg %fr31
954b1L .reg %fr31L
955b1R .reg %fr31R
956
957;
958; Temporary floating point variables, these are all caller save
959; registers
960;
961ftemp1 .reg %fr4
962ftemp2 .reg %fr5
963ftemp3 .reg %fr6
964ftemp4 .reg %fr7
965
966;
967; The B set of registers when used.
968;
969
970b2 .reg %fr8
971b2L .reg %fr8L
972b2R .reg %fr8R
973
974b3 .reg %fr9
975b3L .reg %fr9L
976b3R .reg %fr9R
977
978b4 .reg %fr10
979b4L .reg %fr10L
980b4R .reg %fr10R
981
982b5 .reg %fr11
983b5L .reg %fr11L
984b5R .reg %fr11R
985
986b6 .reg %fr12
987b6L .reg %fr12L
988b6R .reg %fr12R
989
990b7 .reg %fr13
991b7L .reg %fr13L
992b7R .reg %fr13R
993
994c1 .reg %r21 ; only reg
995temp1 .reg %r20 ; only reg
996temp2 .reg %r19 ; only reg
997temp3 .reg %r31 ; only reg
998
999m1 .reg %r28
1000c2 .reg %r23
1001high_one .reg %r1
1002ht .reg %r6
1003lt .reg %r5
1004m .reg %r4
1005c3 .reg %r3
1006
1007SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1008 XMPYU A0L,A0R,ftemp1 ; m
1009 FSTD ftemp1,-24(%sp) ; store m
1010
1011 XMPYU A0R,A0R,ftemp2 ; lt
1012 FSTD ftemp2,-16(%sp) ; store lt
1013
1014 XMPYU A0L,A0L,ftemp3 ; ht
1015 FSTD ftemp3,-8(%sp) ; store ht
1016
1017 LDD -24(%sp),m ; load m
1018 AND m,high_mask,temp2 ; m & Mask
1019 DEPD,Z m,30,31,temp3 ; m << 32+1
1020 LDD -16(%sp),lt ; lt
1021
1022 LDD -8(%sp),ht ; ht
1023 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1024 ADD temp3,lt,lt ; lt = lt+m
1025 ADD,L ht,temp1,ht ; ht += temp1
1026 ADD,DC ht,%r0,ht ; ht++
1027
1028 ADD C1,lt,C1 ; c1=c1+lt
1029 ADD,DC ht,%r0,ht ; ht++
1030
1031 ADD C2,ht,C2 ; c2=c2+ht
1032 ADD,DC C3,%r0,C3 ; c3++
1033.endm
1034
1035SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1036 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1037 FSTD ftemp1,-16(%sp) ;
1038 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1039 FSTD ftemp2,-8(%sp) ;
1040 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1041 FSTD ftemp3,-32(%sp)
1042 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1043 FSTD ftemp4,-24(%sp) ;
1044
1045 LDD -8(%sp),m ; r21 = m
1046 LDD -16(%sp),m1 ; r19 = m1
1047 ADD,L m,m1,m ; m+m1
1048
1049 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1050 LDD -24(%sp),ht ; r24 = ht
1051
1052 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1053 ADD,L ht,high_one,ht ; ht+=high_one
1054
1055 EXTRD,U m,31,32,temp1 ; m >> 32
1056 LDD -32(%sp),lt ; lt
1057 ADD,L ht,temp1,ht ; ht+= m>>32
1058 ADD lt,temp3,lt ; lt = lt+m1
1059 ADD,DC ht,%r0,ht ; ht++
1060
1061 ADD ht,ht,ht ; ht=ht+ht;
1062 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1063
1064 ADD lt,lt,lt ; lt=lt+lt;
1065 ADD,DC ht,%r0,ht ; add in carry (ht++)
1066
1067 ADD C1,lt,C1 ; c1=c1+lt
1068 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1069 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1070
1071 ADD C2,ht,C2 ; c2 = c2 + ht
1072 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1073.endm
1074
1075;
1076;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1077; arg0 = r_ptr
1078; arg1 = a_ptr
1079;
1080
1081bn_sqr_comba8
1082 .PROC
1083 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1084 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1085 .ENTRY
1086 .align 64
1087
1088 STD %r3,0(%sp) ; save r3
1089 STD %r4,8(%sp) ; save r4
1090 STD %r5,16(%sp) ; save r5
1091 STD %r6,24(%sp) ; save r6
1092
1093 ;
1094 ; Zero out carries
1095 ;
1096 COPY %r0,c1
1097 COPY %r0,c2
1098 COPY %r0,c3
1099
1100 LDO 128(%sp),%sp ; bump stack
1101 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1102 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1103
1104 ;
1105 ; Load up all of the values we are going to use
1106 ;
1107 FLDD 0(a_ptr),a0
1108 FLDD 8(a_ptr),a1
1109 FLDD 16(a_ptr),a2
1110 FLDD 24(a_ptr),a3
1111 FLDD 32(a_ptr),a4
1112 FLDD 40(a_ptr),a5
1113 FLDD 48(a_ptr),a6
1114 FLDD 56(a_ptr),a7
1115
1116 SQR_ADD_C a0L,a0R,c1,c2,c3
1117 STD c1,0(r_ptr) ; r[0] = c1;
1118 COPY %r0,c1
1119
1120 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1121 STD c2,8(r_ptr) ; r[1] = c2;
1122 COPY %r0,c2
1123
1124 SQR_ADD_C a1L,a1R,c3,c1,c2
1125 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1126 STD c3,16(r_ptr) ; r[2] = c3;
1127 COPY %r0,c3
1128
1129 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1130 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1131 STD c1,24(r_ptr) ; r[3] = c1;
1132 COPY %r0,c1
1133
1134 SQR_ADD_C a2L,a2R,c2,c3,c1
1135 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1136 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1137 STD c2,32(r_ptr) ; r[4] = c2;
1138 COPY %r0,c2
1139
1140 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1141 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1142 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1143 STD c3,40(r_ptr) ; r[5] = c3;
1144 COPY %r0,c3
1145
1146 SQR_ADD_C a3L,a3R,c1,c2,c3
1147 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1148 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1149 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1150 STD c1,48(r_ptr) ; r[6] = c1;
1151 COPY %r0,c1
1152
1153 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1154 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1155 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1156 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1157 STD c2,56(r_ptr) ; r[7] = c2;
1158 COPY %r0,c2
1159
1160 SQR_ADD_C a4L,a4R,c3,c1,c2
1161 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1162 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1163 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1164 STD c3,64(r_ptr) ; r[8] = c3;
1165 COPY %r0,c3
1166
1167 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1168 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1169 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1170 STD c1,72(r_ptr) ; r[9] = c1;
1171 COPY %r0,c1
1172
1173 SQR_ADD_C a5L,a5R,c2,c3,c1
1174 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1175 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1176 STD c2,80(r_ptr) ; r[10] = c2;
1177 COPY %r0,c2
1178
1179 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1180 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1181 STD c3,88(r_ptr) ; r[11] = c3;
1182 COPY %r0,c3
1183
1184 SQR_ADD_C a6L,a6R,c1,c2,c3
1185 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1186 STD c1,96(r_ptr) ; r[12] = c1;
1187 COPY %r0,c1
1188
1189 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1190 STD c2,104(r_ptr) ; r[13] = c2;
1191 COPY %r0,c2
1192
1193 SQR_ADD_C a7L,a7R,c3,c1,c2
1194 STD c3, 112(r_ptr) ; r[14] = c3
1195 STD c1, 120(r_ptr) ; r[15] = c1
1196
1197 .EXIT
1198 LDD -104(%sp),%r6 ; restore r6
1199 LDD -112(%sp),%r5 ; restore r5
1200 LDD -120(%sp),%r4 ; restore r4
1201 BVE (%rp)
1202 LDD,MB -128(%sp),%r3
1203
1204 .PROCEND
1205
1206;-----------------------------------------------------------------------------
1207;
1208;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1209; arg0 = r_ptr
1210; arg1 = a_ptr
1211;
1212
1213bn_sqr_comba4
1214 .proc
1215 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1216 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1217 .entry
1218 .align 64
1219 STD %r3,0(%sp) ; save r3
1220 STD %r4,8(%sp) ; save r4
1221 STD %r5,16(%sp) ; save r5
1222 STD %r6,24(%sp) ; save r6
1223
1224 ;
1225 ; Zero out carries
1226 ;
1227 COPY %r0,c1
1228 COPY %r0,c2
1229 COPY %r0,c3
1230
1231 LDO 128(%sp),%sp ; bump stack
1232 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1233 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1234
1235 ;
1236 ; Load up all of the values we are going to use
1237 ;
1238 FLDD 0(a_ptr),a0
1239 FLDD 8(a_ptr),a1
1240 FLDD 16(a_ptr),a2
1241 FLDD 24(a_ptr),a3
1242 FLDD 32(a_ptr),a4
1243 FLDD 40(a_ptr),a5
1244 FLDD 48(a_ptr),a6
1245 FLDD 56(a_ptr),a7
1246
1247 SQR_ADD_C a0L,a0R,c1,c2,c3
1248
1249 STD c1,0(r_ptr) ; r[0] = c1;
1250 COPY %r0,c1
1251
1252 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1253
1254 STD c2,8(r_ptr) ; r[1] = c2;
1255 COPY %r0,c2
1256
1257 SQR_ADD_C a1L,a1R,c3,c1,c2
1258 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1259
1260 STD c3,16(r_ptr) ; r[2] = c3;
1261 COPY %r0,c3
1262
1263 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1264 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1265
1266 STD c1,24(r_ptr) ; r[3] = c1;
1267 COPY %r0,c1
1268
1269 SQR_ADD_C a2L,a2R,c2,c3,c1
1270 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1271
1272 STD c2,32(r_ptr) ; r[4] = c2;
1273 COPY %r0,c2
1274
1275 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1276 STD c3,40(r_ptr) ; r[5] = c3;
1277 COPY %r0,c3
1278
1279 SQR_ADD_C a3L,a3R,c1,c2,c3
1280 STD c1,48(r_ptr) ; r[6] = c1;
1281 STD c2,56(r_ptr) ; r[7] = c2;
1282
1283 .EXIT
1284 LDD -104(%sp),%r6 ; restore r6
1285 LDD -112(%sp),%r5 ; restore r5
1286 LDD -120(%sp),%r4 ; restore r4
1287 BVE (%rp)
1288 LDD,MB -128(%sp),%r3
1289
1290 .PROCEND
1291
1292
1293;---------------------------------------------------------------------------
1294
1295MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1296 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1297 FSTD ftemp1,-16(%sp) ;
1298 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1299 FSTD ftemp2,-8(%sp) ;
1300 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1301 FSTD ftemp3,-32(%sp)
1302 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1303 FSTD ftemp4,-24(%sp) ;
1304
1305 LDD -8(%sp),m ; r21 = m
1306 LDD -16(%sp),m1 ; r19 = m1
1307 ADD,L m,m1,m ; m+m1
1308
1309 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1310 LDD -24(%sp),ht ; r24 = ht
1311
1312 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1313 ADD,L ht,high_one,ht ; ht+=high_one
1314
1315 EXTRD,U m,31,32,temp1 ; m >> 32
1316 LDD -32(%sp),lt ; lt
1317 ADD,L ht,temp1,ht ; ht+= m>>32
1318 ADD lt,temp3,lt ; lt = lt+m1
1319 ADD,DC ht,%r0,ht ; ht++
1320
1321 ADD C1,lt,C1 ; c1=c1+lt
1322 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1323
1324 ADD C2,ht,C2 ; c2 = c2 + ht
1325 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1326.endm
1327
1328
1329;
1330;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1331; arg0 = r_ptr
1332; arg1 = a_ptr
1333; arg2 = b_ptr
1334;
1335
1336bn_mul_comba8
1337 .proc
1338 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1339 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1340 .entry
1341 .align 64
1342
1343 STD %r3,0(%sp) ; save r3
1344 STD %r4,8(%sp) ; save r4
1345 STD %r5,16(%sp) ; save r5
1346 STD %r6,24(%sp) ; save r6
1347 FSTD %fr12,32(%sp) ; save r6
1348 FSTD %fr13,40(%sp) ; save r7
1349
1350 ;
1351 ; Zero out carries
1352 ;
1353 COPY %r0,c1
1354 COPY %r0,c2
1355 COPY %r0,c3
1356
1357 LDO 128(%sp),%sp ; bump stack
1358 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1359
1360 ;
1361 ; Load up all of the values we are going to use
1362 ;
1363 FLDD 0(a_ptr),a0
1364 FLDD 8(a_ptr),a1
1365 FLDD 16(a_ptr),a2
1366 FLDD 24(a_ptr),a3
1367 FLDD 32(a_ptr),a4
1368 FLDD 40(a_ptr),a5
1369 FLDD 48(a_ptr),a6
1370 FLDD 56(a_ptr),a7
1371
1372 FLDD 0(b_ptr),b0
1373 FLDD 8(b_ptr),b1
1374 FLDD 16(b_ptr),b2
1375 FLDD 24(b_ptr),b3
1376 FLDD 32(b_ptr),b4
1377 FLDD 40(b_ptr),b5
1378 FLDD 48(b_ptr),b6
1379 FLDD 56(b_ptr),b7
1380
1381 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1382 STD c1,0(r_ptr)
1383 COPY %r0,c1
1384
1385 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1386 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1387 STD c2,8(r_ptr)
1388 COPY %r0,c2
1389
1390 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1391 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1392 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1393 STD c3,16(r_ptr)
1394 COPY %r0,c3
1395
1396 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1397 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1398 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1399 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1400 STD c1,24(r_ptr)
1401 COPY %r0,c1
1402
1403 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1404 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1405 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1406 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1407 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1408 STD c2,32(r_ptr)
1409 COPY %r0,c2
1410
1411 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1412 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1413 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1414 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1415 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1416 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1417 STD c3,40(r_ptr)
1418 COPY %r0,c3
1419
1420 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1421 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1422 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1423 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1424 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1425 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1426 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1427 STD c1,48(r_ptr)
1428 COPY %r0,c1
1429
1430 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1431 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1432 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1433 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1434 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1435 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1436 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1437 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1438 STD c2,56(r_ptr)
1439 COPY %r0,c2
1440
1441 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1442 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1443 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1444 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1445 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1446 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1447 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1448 STD c3,64(r_ptr)
1449 COPY %r0,c3
1450
1451 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1452 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1453 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1454 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1455 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1456 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1457 STD c1,72(r_ptr)
1458 COPY %r0,c1
1459
1460 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1461 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1462 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1463 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1464 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1465 STD c2,80(r_ptr)
1466 COPY %r0,c2
1467
1468 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1469 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1470 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1471 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1472 STD c3,88(r_ptr)
1473 COPY %r0,c3
1474
1475 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1476 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1477 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1478 STD c1,96(r_ptr)
1479 COPY %r0,c1
1480
1481 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1482 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1483 STD c2,104(r_ptr)
1484 COPY %r0,c2
1485
1486 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1487 STD c3,112(r_ptr)
1488 STD c1,120(r_ptr)
1489
1490 .EXIT
1491 FLDD -88(%sp),%fr13
1492 FLDD -96(%sp),%fr12
1493 LDD -104(%sp),%r6 ; restore r6
1494 LDD -112(%sp),%r5 ; restore r5
1495 LDD -120(%sp),%r4 ; restore r4
1496 BVE (%rp)
1497 LDD,MB -128(%sp),%r3
1498
1499 .PROCEND
1500
1501;-----------------------------------------------------------------------------
1502;
1503;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1504; arg0 = r_ptr
1505; arg1 = a_ptr
1506; arg2 = b_ptr
1507;
1508
1509bn_mul_comba4
1510 .proc
1511 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1512 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1513 .entry
1514 .align 64
1515
1516 STD %r3,0(%sp) ; save r3
1517 STD %r4,8(%sp) ; save r4
1518 STD %r5,16(%sp) ; save r5
1519 STD %r6,24(%sp) ; save r6
1520 FSTD %fr12,32(%sp) ; save r6
1521 FSTD %fr13,40(%sp) ; save r7
1522
1523 ;
1524 ; Zero out carries
1525 ;
1526 COPY %r0,c1
1527 COPY %r0,c2
1528 COPY %r0,c3
1529
1530 LDO 128(%sp),%sp ; bump stack
1531 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1532
1533 ;
1534 ; Load up all of the values we are going to use
1535 ;
1536 FLDD 0(a_ptr),a0
1537 FLDD 8(a_ptr),a1
1538 FLDD 16(a_ptr),a2
1539 FLDD 24(a_ptr),a3
1540
1541 FLDD 0(b_ptr),b0
1542 FLDD 8(b_ptr),b1
1543 FLDD 16(b_ptr),b2
1544 FLDD 24(b_ptr),b3
1545
1546 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1547 STD c1,0(r_ptr)
1548 COPY %r0,c1
1549
1550 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1551 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1552 STD c2,8(r_ptr)
1553 COPY %r0,c2
1554
1555 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1556 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1557 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1558 STD c3,16(r_ptr)
1559 COPY %r0,c3
1560
1561 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1562 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1563 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1564 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1565 STD c1,24(r_ptr)
1566 COPY %r0,c1
1567
1568 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1569 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1570 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1571 STD c2,32(r_ptr)
1572 COPY %r0,c2
1573
1574 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1575 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1576 STD c3,40(r_ptr)
1577 COPY %r0,c3
1578
1579 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1580 STD c1,48(r_ptr)
1581 STD c2,56(r_ptr)
1582
1583 .EXIT
1584 FLDD -88(%sp),%fr13
1585 FLDD -96(%sp),%fr12
1586 LDD -104(%sp),%r6 ; restore r6
1587 LDD -112(%sp),%r5 ; restore r5
1588 LDD -120(%sp),%r4 ; restore r4
1589 BVE (%rp)
1590 LDD,MB -128(%sp),%r3
1591
1592 .PROCEND
1593
1594
1595 .SPACE $TEXT$
1596 .SUBSPA $CODE$
1597 .SPACE $PRIVATE$,SORT=16
1598 .IMPORT $global$,DATA
1599 .SPACE $TEXT$
1600 .SUBSPA $CODE$
1601 .SUBSPA $LIT$,ACCESS=0x2c
1602C$4
1603 .ALIGN 8
1604 .STRINGZ "Division would overflow (%d)\n"
1605 .END
diff --git a/src/lib/libcrypto/bn/asm/ppc-mont.pl b/src/lib/libcrypto/bn/asm/ppc-mont.pl
deleted file mode 100644
index 7849eae959..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc-mont.pl
+++ /dev/null
@@ -1,323 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2006
11
12# "Teaser" Montgomery multiplication module for PowerPC. It's possible
13# to gain a bit more by modulo-scheduling outer loop, then dedicated
14# squaring procedure should give further 20% and code can be adapted
15# for 32-bit application running on 64-bit CPU. As for the latter.
16# It won't be able to achieve "native" 64-bit performance, because in
17# 32-bit application context every addc instruction will have to be
18# expanded as addc, twice right shift by 32 and finally adde, etc.
19# So far RSA *sign* performance improvement over pre-bn_mul_mont asm
20# for 64-bit application running on PPC970/G5 is:
21#
22# 512-bit +65%
23# 1024-bit +35%
24# 2048-bit +18%
25# 4096-bit +4%
26
27$flavour = shift;
28
29if ($flavour =~ /32/) {
30 $BITS= 32;
31 $BNSZ= $BITS/8;
32 $SIZE_T=4;
33 $RZONE= 224;
34 $FRAME= $SIZE_T*16;
35
36 $LD= "lwz"; # load
37 $LDU= "lwzu"; # load and update
38 $LDX= "lwzx"; # load indexed
39 $ST= "stw"; # store
40 $STU= "stwu"; # store and update
41 $STX= "stwx"; # store indexed
42 $STUX= "stwux"; # store indexed and update
43 $UMULL= "mullw"; # unsigned multiply low
44 $UMULH= "mulhwu"; # unsigned multiply high
45 $UCMP= "cmplw"; # unsigned compare
46 $SHRI= "srwi"; # unsigned shift right by immediate
47 $PUSH= $ST;
48 $POP= $LD;
49} elsif ($flavour =~ /64/) {
50 $BITS= 64;
51 $BNSZ= $BITS/8;
52 $SIZE_T=8;
53 $RZONE= 288;
54 $FRAME= $SIZE_T*16;
55
56 # same as above, but 64-bit mnemonics...
57 $LD= "ld"; # load
58 $LDU= "ldu"; # load and update
59 $LDX= "ldx"; # load indexed
60 $ST= "std"; # store
61 $STU= "stdu"; # store and update
62 $STX= "stdx"; # store indexed
63 $STUX= "stdux"; # store indexed and update
64 $UMULL= "mulld"; # unsigned multiply low
65 $UMULH= "mulhdu"; # unsigned multiply high
66 $UCMP= "cmpld"; # unsigned compare
67 $SHRI= "srdi"; # unsigned shift right by immediate
68 $PUSH= $ST;
69 $POP= $LD;
70} else { die "nonsense $flavour"; }
71
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
75die "can't locate ppc-xlate.pl";
76
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78
79$sp="r1";
80$toc="r2";
81$rp="r3"; $ovf="r3";
82$ap="r4";
83$bp="r5";
84$np="r6";
85$n0="r7";
86$num="r8";
87$rp="r9"; # $rp is reassigned
88$aj="r10";
89$nj="r11";
90$tj="r12";
91# non-volatile registers
92$i="r14";
93$j="r15";
94$tp="r16";
95$m0="r17";
96$m1="r18";
97$lo0="r19";
98$hi0="r20";
99$lo1="r21";
100$hi1="r22";
101$alo="r23";
102$ahi="r24";
103$nlo="r25";
104#
105$nhi="r0";
106
107$code=<<___;
108.machine "any"
109.text
110
111.globl .bn_mul_mont
112.align 4
113.bn_mul_mont:
114 cmpwi $num,4
115 mr $rp,r3 ; $rp is reassigned
116 li r3,0
117 bltlr
118
119 slwi $num,$num,`log($BNSZ)/log(2)`
120 li $tj,-4096
121 addi $ovf,$num,`$FRAME+$RZONE`
122 subf $ovf,$ovf,$sp ; $sp-$ovf
123 and $ovf,$ovf,$tj ; minimize TLB usage
124 subf $ovf,$sp,$ovf ; $ovf-$sp
125 srwi $num,$num,`log($BNSZ)/log(2)`
126 $STUX $sp,$sp,$ovf
127
128 $PUSH r14,`4*$SIZE_T`($sp)
129 $PUSH r15,`5*$SIZE_T`($sp)
130 $PUSH r16,`6*$SIZE_T`($sp)
131 $PUSH r17,`7*$SIZE_T`($sp)
132 $PUSH r18,`8*$SIZE_T`($sp)
133 $PUSH r19,`9*$SIZE_T`($sp)
134 $PUSH r20,`10*$SIZE_T`($sp)
135 $PUSH r21,`11*$SIZE_T`($sp)
136 $PUSH r22,`12*$SIZE_T`($sp)
137 $PUSH r23,`13*$SIZE_T`($sp)
138 $PUSH r24,`14*$SIZE_T`($sp)
139 $PUSH r25,`15*$SIZE_T`($sp)
140
141 $LD $n0,0($n0) ; pull n0[0] value
142 addi $num,$num,-2 ; adjust $num for counter register
143
144 $LD $m0,0($bp) ; m0=bp[0]
145 $LD $aj,0($ap) ; ap[0]
146 addi $tp,$sp,$FRAME
147 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
148 $UMULH $hi0,$aj,$m0
149
150 $LD $aj,$BNSZ($ap) ; ap[1]
151 $LD $nj,0($np) ; np[0]
152
153 $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0
154
155 $UMULL $alo,$aj,$m0 ; ap[1]*bp[0]
156 $UMULH $ahi,$aj,$m0
157
158 $UMULL $lo1,$nj,$m1 ; np[0]*m1
159 $UMULH $hi1,$nj,$m1
160 $LD $nj,$BNSZ($np) ; np[1]
161 addc $lo1,$lo1,$lo0
162 addze $hi1,$hi1
163
164 $UMULL $nlo,$nj,$m1 ; np[1]*m1
165 $UMULH $nhi,$nj,$m1
166
167 mtctr $num
168 li $j,`2*$BNSZ`
169.align 4
170L1st:
171 $LDX $aj,$ap,$j ; ap[j]
172 addc $lo0,$alo,$hi0
173 $LDX $nj,$np,$j ; np[j]
174 addze $hi0,$ahi
175 $UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
176 addc $lo1,$nlo,$hi1
177 $UMULH $ahi,$aj,$m0
178 addze $hi1,$nhi
179 $UMULL $nlo,$nj,$m1 ; np[j]*m1
180 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
181 $UMULH $nhi,$nj,$m1
182 addze $hi1,$hi1
183 $ST $lo1,0($tp) ; tp[j-1]
184
185 addi $j,$j,$BNSZ ; j++
186 addi $tp,$tp,$BNSZ ; tp++
187 bdnz- L1st
188;L1st
189 addc $lo0,$alo,$hi0
190 addze $hi0,$ahi
191
192 addc $lo1,$nlo,$hi1
193 addze $hi1,$nhi
194 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
195 addze $hi1,$hi1
196 $ST $lo1,0($tp) ; tp[j-1]
197
198 li $ovf,0
199 addc $hi1,$hi1,$hi0
200 addze $ovf,$ovf ; upmost overflow bit
201 $ST $hi1,$BNSZ($tp)
202
203 li $i,$BNSZ
204.align 4
205Louter:
206 $LDX $m0,$bp,$i ; m0=bp[i]
207 $LD $aj,0($ap) ; ap[0]
208 addi $tp,$sp,$FRAME
209 $LD $tj,$FRAME($sp) ; tp[0]
210 $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
211 $UMULH $hi0,$aj,$m0
212 $LD $aj,$BNSZ($ap) ; ap[1]
213 $LD $nj,0($np) ; np[0]
214 addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
215 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
216 addze $hi0,$hi0
217 $UMULL $m1,$lo0,$n0 ; tp[0]*n0
218 $UMULH $ahi,$aj,$m0
219 $UMULL $lo1,$nj,$m1 ; np[0]*m1
220 $UMULH $hi1,$nj,$m1
221 $LD $nj,$BNSZ($np) ; np[1]
222 addc $lo1,$lo1,$lo0
223 $UMULL $nlo,$nj,$m1 ; np[1]*m1
224 addze $hi1,$hi1
225 $UMULH $nhi,$nj,$m1
226
227 mtctr $num
228 li $j,`2*$BNSZ`
229.align 4
230Linner:
231 $LDX $aj,$ap,$j ; ap[j]
232 addc $lo0,$alo,$hi0
233 $LD $tj,$BNSZ($tp) ; tp[j]
234 addze $hi0,$ahi
235 $LDX $nj,$np,$j ; np[j]
236 addc $lo1,$nlo,$hi1
237 $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
238 addze $hi1,$nhi
239 $UMULH $ahi,$aj,$m0
240 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
241 $UMULL $nlo,$nj,$m1 ; np[j]*m1
242 addze $hi0,$hi0
243 $UMULH $nhi,$nj,$m1
244 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
245 addi $j,$j,$BNSZ ; j++
246 addze $hi1,$hi1
247 $ST $lo1,0($tp) ; tp[j-1]
248 addi $tp,$tp,$BNSZ ; tp++
249 bdnz- Linner
250;Linner
251 $LD $tj,$BNSZ($tp) ; tp[j]
252 addc $lo0,$alo,$hi0
253 addze $hi0,$ahi
254 addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
255 addze $hi0,$hi0
256
257 addc $lo1,$nlo,$hi1
258 addze $hi1,$nhi
259 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
260 addze $hi1,$hi1
261 $ST $lo1,0($tp) ; tp[j-1]
262
263 addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA]
264 li $ovf,0
265 adde $hi1,$hi1,$hi0
266 addze $ovf,$ovf
267 $ST $hi1,$BNSZ($tp)
268;
269 slwi $tj,$num,`log($BNSZ)/log(2)`
270 $UCMP $i,$tj
271 addi $i,$i,$BNSZ
272 ble- Louter
273
274 addi $num,$num,2 ; restore $num
275 subfc $j,$j,$j ; j=0 and "clear" XER[CA]
276 addi $tp,$sp,$FRAME
277 mtctr $num
278
279.align 4
280Lsub: $LDX $tj,$tp,$j
281 $LDX $nj,$np,$j
282 subfe $aj,$nj,$tj ; tp[j]-np[j]
283 $STX $aj,$rp,$j
284 addi $j,$j,$BNSZ
285 bdnz- Lsub
286
287 li $j,0
288 mtctr $num
289 subfe $ovf,$j,$ovf ; handle upmost overflow bit
290 and $ap,$tp,$ovf
291 andc $np,$rp,$ovf
292 or $ap,$ap,$np ; ap=borrow?tp:rp
293
294.align 4
295Lcopy: ; copy or in-place refresh
296 $LDX $tj,$ap,$j
297 $STX $tj,$rp,$j
298 $STX $j,$tp,$j ; zap at once
299 addi $j,$j,$BNSZ
300 bdnz- Lcopy
301
302 $POP r14,`4*$SIZE_T`($sp)
303 $POP r15,`5*$SIZE_T`($sp)
304 $POP r16,`6*$SIZE_T`($sp)
305 $POP r17,`7*$SIZE_T`($sp)
306 $POP r18,`8*$SIZE_T`($sp)
307 $POP r19,`9*$SIZE_T`($sp)
308 $POP r20,`10*$SIZE_T`($sp)
309 $POP r21,`11*$SIZE_T`($sp)
310 $POP r22,`12*$SIZE_T`($sp)
311 $POP r23,`13*$SIZE_T`($sp)
312 $POP r24,`14*$SIZE_T`($sp)
313 $POP r25,`15*$SIZE_T`($sp)
314 $POP $sp,0($sp)
315 li r3,1
316 blr
317 .long 0
318.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
319___
320
321$code =~ s/\`([^\`]*)\`/eval $1/gem;
322print $code;
323close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
deleted file mode 100644
index f4093177e6..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ /dev/null
@@ -1,1981 +0,0 @@
1#!/usr/bin/env perl
2#
3# Implemented as a Perl wrapper as we want to support several different
4# architectures with single file. We pick up the target based on the
5# file name we are asked to generate.
6#
7# It should be noted though that this perl code is nothing like
8# <openssl>/crypto/perlasm/x86*. In this case perl is used pretty much
9# as pre-processor to cover for platform differences in name decoration,
10# linker tables, 32-/64-bit instruction sets...
11#
12# As you might know there're several PowerPC ABI in use. Most notably
13# Linux and AIX use different 32-bit ABIs. Good news are that these ABIs
14# are similar enough to implement leaf(!) functions, which would be ABI
15# neutral. And that's what you find here: ABI neutral leaf functions.
16# In case you wonder what that is...
17#
18# AIX performance
19#
20# MEASUREMENTS WITH cc ON a 200 MhZ PowerPC 604e.
21#
22# The following is the performance of 32-bit compiler
23# generated code:
24#
25# OpenSSL 0.9.6c 21 dec 2001
26# built on: Tue Jun 11 11:06:51 EDT 2002
27# options:bn(64,32) ...
28#compiler: cc -DTHREADS -DAIX -DB_ENDIAN -DBN_LLONG -O3
29# sign verify sign/s verify/s
30#rsa 512 bits 0.0098s 0.0009s 102.0 1170.6
31#rsa 1024 bits 0.0507s 0.0026s 19.7 387.5
32#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1
33#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4
34#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
35#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
36#
37# Same bechmark with this assembler code:
38#
39#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
40#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1
41#rsa 2048 bits 0.1744s 0.0050s 5.7 201.2
42#rsa 4096 bits 1.1644s 0.0179s 0.9 55.7
43#dsa 512 bits 0.0052s 0.0062s 191.6 162.0
44#dsa 1024 bits 0.0149s 0.0180s 67.0 55.5
45#
46# Number of operations increases by at almost 75%
47#
48# Here are performance numbers for 64-bit compiler
49# generated code:
50#
51# OpenSSL 0.9.6g [engine] 9 Aug 2002
52# built on: Fri Apr 18 16:59:20 EDT 2003
53# options:bn(64,64) ...
54# compiler: cc -DTHREADS -D_REENTRANT -q64 -DB_ENDIAN -O3
55# sign verify sign/s verify/s
56#rsa 512 bits 0.0028s 0.0003s 357.1 3844.4
57#rsa 1024 bits 0.0148s 0.0008s 67.5 1239.7
58#rsa 2048 bits 0.0963s 0.0028s 10.4 353.0
59#rsa 4096 bits 0.6538s 0.0102s 1.5 98.1
60#dsa 512 bits 0.0026s 0.0032s 382.5 313.7
61#dsa 1024 bits 0.0081s 0.0099s 122.8 100.6
62#
63# Same benchmark with this assembler code:
64#
65#rsa 512 bits 0.0020s 0.0002s 510.4 6273.7
66#rsa 1024 bits 0.0088s 0.0005s 114.1 2128.3
67#rsa 2048 bits 0.0540s 0.0016s 18.5 622.5
68#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0
69#dsa 512 bits 0.0016s 0.0020s 610.7 507.1
70#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2
71#
72# Again, performance increases by at about 75%
73#
74# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code)
75# OpenSSL 0.9.7c 30 Sep 2003
76#
77# Original code.
78#
79#rsa 512 bits 0.0011s 0.0001s 906.1 11012.5
80#rsa 1024 bits 0.0060s 0.0003s 166.6 3363.1
81#rsa 2048 bits 0.0370s 0.0010s 27.1 982.4
82#rsa 4096 bits 0.2426s 0.0036s 4.1 280.4
83#dsa 512 bits 0.0010s 0.0012s 1038.1 841.5
84#dsa 1024 bits 0.0030s 0.0037s 329.6 269.7
85#dsa 2048 bits 0.0101s 0.0127s 98.9 78.6
86#
87# Same benchmark with this assembler code:
88#
89#rsa 512 bits 0.0007s 0.0001s 1416.2 16645.9
90#rsa 1024 bits 0.0036s 0.0002s 274.4 5380.6
91#rsa 2048 bits 0.0222s 0.0006s 45.1 1589.5
92#rsa 4096 bits 0.1469s 0.0022s 6.8 449.6
93#dsa 512 bits 0.0006s 0.0007s 1664.2 1376.2
94#dsa 1024 bits 0.0018s 0.0023s 545.0 442.2
95#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8
96#
97# Performance increase of ~60%
98#
99# If you have comments or suggestions to improve code send
100# me a note at schari@us.ibm.com
101#
102
103$flavour = shift;
104
105if ($flavour =~ /32/) {
106 $BITS= 32;
107 $BNSZ= $BITS/8;
108 $ISA= "\"ppc\"";
109
110 $LD= "lwz"; # load
111 $LDU= "lwzu"; # load and update
112 $ST= "stw"; # store
113 $STU= "stwu"; # store and update
114 $UMULL= "mullw"; # unsigned multiply low
115 $UMULH= "mulhwu"; # unsigned multiply high
116 $UDIV= "divwu"; # unsigned divide
117 $UCMPI= "cmplwi"; # unsigned compare with immediate
118 $UCMP= "cmplw"; # unsigned compare
119 $CNTLZ= "cntlzw"; # count leading zeros
120 $SHL= "slw"; # shift left
121 $SHR= "srw"; # unsigned shift right
122 $SHRI= "srwi"; # unsigned shift right by immediate
123 $SHLI= "slwi"; # shift left by immediate
124 $CLRU= "clrlwi"; # clear upper bits
125 $INSR= "insrwi"; # insert right
126 $ROTL= "rotlwi"; # rotate left by immediate
127 $TR= "tw"; # conditional trap
128} elsif ($flavour =~ /64/) {
129 $BITS= 64;
130 $BNSZ= $BITS/8;
131 $ISA= "\"ppc64\"";
132
133 # same as above, but 64-bit mnemonics...
134 $LD= "ld"; # load
135 $LDU= "ldu"; # load and update
136 $ST= "std"; # store
137 $STU= "stdu"; # store and update
138 $UMULL= "mulld"; # unsigned multiply low
139 $UMULH= "mulhdu"; # unsigned multiply high
140 $UDIV= "divdu"; # unsigned divide
141 $UCMPI= "cmpldi"; # unsigned compare with immediate
142 $UCMP= "cmpld"; # unsigned compare
143 $CNTLZ= "cntlzd"; # count leading zeros
144 $SHL= "sld"; # shift left
145 $SHR= "srd"; # unsigned shift right
146 $SHRI= "srdi"; # unsigned shift right by immediate
147 $SHLI= "sldi"; # shift left by immediate
148 $CLRU= "clrldi"; # clear upper bits
149 $INSR= "insrdi"; # insert right
150 $ROTL= "rotldi"; # rotate left by immediate
151 $TR= "td"; # conditional trap
152} else { die "nonsense $flavour"; }
153
154$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
155( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
156( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
157die "can't locate ppc-xlate.pl";
158
159open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
160
161$data=<<EOF;
162#--------------------------------------------------------------------
163#
164#
165#
166#
167# File: ppc32.s
168#
169# Created by: Suresh Chari
170# IBM Thomas J. Watson Research Library
171# Hawthorne, NY
172#
173#
174# Description: Optimized assembly routines for OpenSSL crypto
175# on the 32 bitPowerPC platform.
176#
177#
178# Version History
179#
180# 2. Fixed bn_add,bn_sub and bn_div_words, added comments,
181# cleaned up code. Also made a single version which can
182# be used for both the AIX and Linux compilers. See NOTE
183# below.
184# 12/05/03 Suresh Chari
185# (with lots of help from) Andy Polyakov
186##
187# 1. Initial version 10/20/02 Suresh Chari
188#
189#
190# The following file works for the xlc,cc
191# and gcc compilers.
192#
193# NOTE: To get the file to link correctly with the gcc compiler
194# you have to change the names of the routines and remove
195# the first .(dot) character. This should automatically
196# be done in the build process.
197#
198# Hand optimized assembly code for the following routines
199#
200# bn_sqr_comba4
201# bn_sqr_comba8
202# bn_mul_comba4
203# bn_mul_comba8
204# bn_sub_words
205# bn_add_words
206# bn_div_words
207# bn_sqr_words
208# bn_mul_words
209# bn_mul_add_words
210#
211# NOTE: It is possible to optimize this code more for
212# specific PowerPC or Power architectures. On the Northstar
213# architecture the optimizations in this file do
214# NOT provide much improvement.
215#
216# If you have comments or suggestions to improve code send
217# me a note at schari\@us.ibm.com
218#
219#--------------------------------------------------------------------------
220#
221# Defines to be used in the assembly code.
222#
223#.set r0,0 # we use it as storage for value of 0
224#.set SP,1 # preserved
225#.set RTOC,2 # preserved
226#.set r3,3 # 1st argument/return value
227#.set r4,4 # 2nd argument/volatile register
228#.set r5,5 # 3rd argument/volatile register
229#.set r6,6 # ...
230#.set r7,7
231#.set r8,8
232#.set r9,9
233#.set r10,10
234#.set r11,11
235#.set r12,12
236#.set r13,13 # not used, nor any other "below" it...
237
238# Declare function names to be global
239# NOTE: For gcc these names MUST be changed to remove
240# the first . i.e. for example change ".bn_sqr_comba4"
241# to "bn_sqr_comba4". This should be automatically done
242# in the build.
243
244 .globl .bn_sqr_comba4
245 .globl .bn_sqr_comba8
246 .globl .bn_mul_comba4
247 .globl .bn_mul_comba8
248 .globl .bn_sub_words
249 .globl .bn_add_words
250 .globl .bn_div_words
251 .globl .bn_sqr_words
252 .globl .bn_mul_words
253 .globl .bn_mul_add_words
254
255# .text section
256
257 .machine "any"
258
259#
260# NOTE: The following label name should be changed to
261# "bn_sqr_comba4" i.e. remove the first dot
262# for the gcc compiler. This should be automatically
263# done in the build
264#
265
266.align 4
267.bn_sqr_comba4:
268#
269# Optimized version of bn_sqr_comba4.
270#
271# void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
272# r3 contains r
273# r4 contains a
274#
275# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
276#
277# r5,r6 are the two BN_ULONGs being multiplied.
278# r7,r8 are the results of the 32x32 giving 64 bit multiply.
279# r9,r10, r11 are the equivalents of c1,c2, c3.
280# Here's the assembly
281#
282#
283 xor r0,r0,r0 # set r0 = 0. Used in the addze
284 # instructions below
285
286 #sqr_add_c(a,0,c1,c2,c3)
287 $LD r5,`0*$BNSZ`(r4)
288 $UMULL r9,r5,r5
289 $UMULH r10,r5,r5 #in first iteration. No need
290 #to add since c1=c2=c3=0.
291 # Note c3(r11) is NOT set to 0
292 # but will be.
293
294 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
295 # sqr_add_c2(a,1,0,c2,c3,c1);
296 $LD r6,`1*$BNSZ`(r4)
297 $UMULL r7,r5,r6
298 $UMULH r8,r5,r6
299
300 addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8)
301 adde r8,r8,r8
302 addze r9,r0 # catch carry if any.
303 # r9= r0(=0) and carry
304
305 addc r10,r7,r10 # now add to temp result.
306 addze r11,r8 # r8 added to r11 which is 0
307 addze r9,r9
308
309 $ST r10,`1*$BNSZ`(r3) #r[1]=c2;
310 #sqr_add_c(a,1,c3,c1,c2)
311 $UMULL r7,r6,r6
312 $UMULH r8,r6,r6
313 addc r11,r7,r11
314 adde r9,r8,r9
315 addze r10,r0
316 #sqr_add_c2(a,2,0,c3,c1,c2)
317 $LD r6,`2*$BNSZ`(r4)
318 $UMULL r7,r5,r6
319 $UMULH r8,r5,r6
320
321 addc r7,r7,r7
322 adde r8,r8,r8
323 addze r10,r10
324
325 addc r11,r7,r11
326 adde r9,r8,r9
327 addze r10,r10
328 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
329 #sqr_add_c2(a,3,0,c1,c2,c3);
330 $LD r6,`3*$BNSZ`(r4)
331 $UMULL r7,r5,r6
332 $UMULH r8,r5,r6
333 addc r7,r7,r7
334 adde r8,r8,r8
335 addze r11,r0
336
337 addc r9,r7,r9
338 adde r10,r8,r10
339 addze r11,r11
340 #sqr_add_c2(a,2,1,c1,c2,c3);
341 $LD r5,`1*$BNSZ`(r4)
342 $LD r6,`2*$BNSZ`(r4)
343 $UMULL r7,r5,r6
344 $UMULH r8,r5,r6
345
346 addc r7,r7,r7
347 adde r8,r8,r8
348 addze r11,r11
349 addc r9,r7,r9
350 adde r10,r8,r10
351 addze r11,r11
352 $ST r9,`3*$BNSZ`(r3) #r[3]=c1
353 #sqr_add_c(a,2,c2,c3,c1);
354 $UMULL r7,r6,r6
355 $UMULH r8,r6,r6
356 addc r10,r7,r10
357 adde r11,r8,r11
358 addze r9,r0
359 #sqr_add_c2(a,3,1,c2,c3,c1);
360 $LD r6,`3*$BNSZ`(r4)
361 $UMULL r7,r5,r6
362 $UMULH r8,r5,r6
363 addc r7,r7,r7
364 adde r8,r8,r8
365 addze r9,r9
366
367 addc r10,r7,r10
368 adde r11,r8,r11
369 addze r9,r9
370 $ST r10,`4*$BNSZ`(r3) #r[4]=c2
371 #sqr_add_c2(a,3,2,c3,c1,c2);
372 $LD r5,`2*$BNSZ`(r4)
373 $UMULL r7,r5,r6
374 $UMULH r8,r5,r6
375 addc r7,r7,r7
376 adde r8,r8,r8
377 addze r10,r0
378
379 addc r11,r7,r11
380 adde r9,r8,r9
381 addze r10,r10
382 $ST r11,`5*$BNSZ`(r3) #r[5] = c3
383 #sqr_add_c(a,3,c1,c2,c3);
384 $UMULL r7,r6,r6
385 $UMULH r8,r6,r6
386 addc r9,r7,r9
387 adde r10,r8,r10
388
389 $ST r9,`6*$BNSZ`(r3) #r[6]=c1
390 $ST r10,`7*$BNSZ`(r3) #r[7]=c2
391 blr
392 .long 0x00000000
393
394#
395# NOTE: The following label name should be changed to
396# "bn_sqr_comba8" i.e. remove the first dot
397# for the gcc compiler. This should be automatically
398# done in the build
399#
400
401.align 4
402.bn_sqr_comba8:
403#
404# This is an optimized version of the bn_sqr_comba8 routine.
405# Tightly uses the adde instruction
406#
407#
408# void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
409# r3 contains r
410# r4 contains a
411#
412# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
413#
414# r5,r6 are the two BN_ULONGs being multiplied.
415# r7,r8 are the results of the 32x32 giving 64 bit multiply.
416# r9,r10, r11 are the equivalents of c1,c2, c3.
417#
418# Possible optimization of loading all 8 longs of a into registers
419# doesnt provide any speedup
420#
421
422 xor r0,r0,r0 #set r0 = 0.Used in addze
423 #instructions below.
424
425 #sqr_add_c(a,0,c1,c2,c3);
426 $LD r5,`0*$BNSZ`(r4)
427 $UMULL r9,r5,r5 #1st iteration: no carries.
428 $UMULH r10,r5,r5
429 $ST r9,`0*$BNSZ`(r3) # r[0]=c1;
430 #sqr_add_c2(a,1,0,c2,c3,c1);
431 $LD r6,`1*$BNSZ`(r4)
432 $UMULL r7,r5,r6
433 $UMULH r8,r5,r6
434
435 addc r10,r7,r10 #add the two register number
436 adde r11,r8,r0 # (r8,r7) to the three register
437 addze r9,r0 # number (r9,r11,r10).NOTE:r0=0
438
439 addc r10,r7,r10 #add the two register number
440 adde r11,r8,r11 # (r8,r7) to the three register
441 addze r9,r9 # number (r9,r11,r10).
442
443 $ST r10,`1*$BNSZ`(r3) # r[1]=c2
444
445 #sqr_add_c(a,1,c3,c1,c2);
446 $UMULL r7,r6,r6
447 $UMULH r8,r6,r6
448 addc r11,r7,r11
449 adde r9,r8,r9
450 addze r10,r0
451 #sqr_add_c2(a,2,0,c3,c1,c2);
452 $LD r6,`2*$BNSZ`(r4)
453 $UMULL r7,r5,r6
454 $UMULH r8,r5,r6
455
456 addc r11,r7,r11
457 adde r9,r8,r9
458 addze r10,r10
459
460 addc r11,r7,r11
461 adde r9,r8,r9
462 addze r10,r10
463
464 $ST r11,`2*$BNSZ`(r3) #r[2]=c3
465 #sqr_add_c2(a,3,0,c1,c2,c3);
466 $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0].
467 $UMULL r7,r5,r6
468 $UMULH r8,r5,r6
469
470 addc r9,r7,r9
471 adde r10,r8,r10
472 addze r11,r0
473
474 addc r9,r7,r9
475 adde r10,r8,r10
476 addze r11,r11
477 #sqr_add_c2(a,2,1,c1,c2,c3);
478 $LD r5,`1*$BNSZ`(r4)
479 $LD r6,`2*$BNSZ`(r4)
480 $UMULL r7,r5,r6
481 $UMULH r8,r5,r6
482
483 addc r9,r7,r9
484 adde r10,r8,r10
485 addze r11,r11
486
487 addc r9,r7,r9
488 adde r10,r8,r10
489 addze r11,r11
490
491 $ST r9,`3*$BNSZ`(r3) #r[3]=c1;
492 #sqr_add_c(a,2,c2,c3,c1);
493 $UMULL r7,r6,r6
494 $UMULH r8,r6,r6
495
496 addc r10,r7,r10
497 adde r11,r8,r11
498 addze r9,r0
499 #sqr_add_c2(a,3,1,c2,c3,c1);
500 $LD r6,`3*$BNSZ`(r4)
501 $UMULL r7,r5,r6
502 $UMULH r8,r5,r6
503
504 addc r10,r7,r10
505 adde r11,r8,r11
506 addze r9,r9
507
508 addc r10,r7,r10
509 adde r11,r8,r11
510 addze r9,r9
511 #sqr_add_c2(a,4,0,c2,c3,c1);
512 $LD r5,`0*$BNSZ`(r4)
513 $LD r6,`4*$BNSZ`(r4)
514 $UMULL r7,r5,r6
515 $UMULH r8,r5,r6
516
517 addc r10,r7,r10
518 adde r11,r8,r11
519 addze r9,r9
520
521 addc r10,r7,r10
522 adde r11,r8,r11
523 addze r9,r9
524 $ST r10,`4*$BNSZ`(r3) #r[4]=c2;
525 #sqr_add_c2(a,5,0,c3,c1,c2);
526 $LD r6,`5*$BNSZ`(r4)
527 $UMULL r7,r5,r6
528 $UMULH r8,r5,r6
529
530 addc r11,r7,r11
531 adde r9,r8,r9
532 addze r10,r0
533
534 addc r11,r7,r11
535 adde r9,r8,r9
536 addze r10,r10
537 #sqr_add_c2(a,4,1,c3,c1,c2);
538 $LD r5,`1*$BNSZ`(r4)
539 $LD r6,`4*$BNSZ`(r4)
540 $UMULL r7,r5,r6
541 $UMULH r8,r5,r6
542
543 addc r11,r7,r11
544 adde r9,r8,r9
545 addze r10,r10
546
547 addc r11,r7,r11
548 adde r9,r8,r9
549 addze r10,r10
550 #sqr_add_c2(a,3,2,c3,c1,c2);
551 $LD r5,`2*$BNSZ`(r4)
552 $LD r6,`3*$BNSZ`(r4)
553 $UMULL r7,r5,r6
554 $UMULH r8,r5,r6
555
556 addc r11,r7,r11
557 adde r9,r8,r9
558 addze r10,r10
559
560 addc r11,r7,r11
561 adde r9,r8,r9
562 addze r10,r10
563 $ST r11,`5*$BNSZ`(r3) #r[5]=c3;
564 #sqr_add_c(a,3,c1,c2,c3);
565 $UMULL r7,r6,r6
566 $UMULH r8,r6,r6
567 addc r9,r7,r9
568 adde r10,r8,r10
569 addze r11,r0
570 #sqr_add_c2(a,4,2,c1,c2,c3);
571 $LD r6,`4*$BNSZ`(r4)
572 $UMULL r7,r5,r6
573 $UMULH r8,r5,r6
574
575 addc r9,r7,r9
576 adde r10,r8,r10
577 addze r11,r11
578
579 addc r9,r7,r9
580 adde r10,r8,r10
581 addze r11,r11
582 #sqr_add_c2(a,5,1,c1,c2,c3);
583 $LD r5,`1*$BNSZ`(r4)
584 $LD r6,`5*$BNSZ`(r4)
585 $UMULL r7,r5,r6
586 $UMULH r8,r5,r6
587
588 addc r9,r7,r9
589 adde r10,r8,r10
590 addze r11,r11
591
592 addc r9,r7,r9
593 adde r10,r8,r10
594 addze r11,r11
595 #sqr_add_c2(a,6,0,c1,c2,c3);
596 $LD r5,`0*$BNSZ`(r4)
597 $LD r6,`6*$BNSZ`(r4)
598 $UMULL r7,r5,r6
599 $UMULH r8,r5,r6
600 addc r9,r7,r9
601 adde r10,r8,r10
602 addze r11,r11
603 addc r9,r7,r9
604 adde r10,r8,r10
605 addze r11,r11
606 $ST r9,`6*$BNSZ`(r3) #r[6]=c1;
607 #sqr_add_c2(a,7,0,c2,c3,c1);
608 $LD r6,`7*$BNSZ`(r4)
609 $UMULL r7,r5,r6
610 $UMULH r8,r5,r6
611
612 addc r10,r7,r10
613 adde r11,r8,r11
614 addze r9,r0
615 addc r10,r7,r10
616 adde r11,r8,r11
617 addze r9,r9
618 #sqr_add_c2(a,6,1,c2,c3,c1);
619 $LD r5,`1*$BNSZ`(r4)
620 $LD r6,`6*$BNSZ`(r4)
621 $UMULL r7,r5,r6
622 $UMULH r8,r5,r6
623
624 addc r10,r7,r10
625 adde r11,r8,r11
626 addze r9,r9
627 addc r10,r7,r10
628 adde r11,r8,r11
629 addze r9,r9
630 #sqr_add_c2(a,5,2,c2,c3,c1);
631 $LD r5,`2*$BNSZ`(r4)
632 $LD r6,`5*$BNSZ`(r4)
633 $UMULL r7,r5,r6
634 $UMULH r8,r5,r6
635 addc r10,r7,r10
636 adde r11,r8,r11
637 addze r9,r9
638 addc r10,r7,r10
639 adde r11,r8,r11
640 addze r9,r9
641 #sqr_add_c2(a,4,3,c2,c3,c1);
642 $LD r5,`3*$BNSZ`(r4)
643 $LD r6,`4*$BNSZ`(r4)
644 $UMULL r7,r5,r6
645 $UMULH r8,r5,r6
646
647 addc r10,r7,r10
648 adde r11,r8,r11
649 addze r9,r9
650 addc r10,r7,r10
651 adde r11,r8,r11
652 addze r9,r9
653 $ST r10,`7*$BNSZ`(r3) #r[7]=c2;
654 #sqr_add_c(a,4,c3,c1,c2);
655 $UMULL r7,r6,r6
656 $UMULH r8,r6,r6
657 addc r11,r7,r11
658 adde r9,r8,r9
659 addze r10,r0
660 #sqr_add_c2(a,5,3,c3,c1,c2);
661 $LD r6,`5*$BNSZ`(r4)
662 $UMULL r7,r5,r6
663 $UMULH r8,r5,r6
664 addc r11,r7,r11
665 adde r9,r8,r9
666 addze r10,r10
667 addc r11,r7,r11
668 adde r9,r8,r9
669 addze r10,r10
670 #sqr_add_c2(a,6,2,c3,c1,c2);
671 $LD r5,`2*$BNSZ`(r4)
672 $LD r6,`6*$BNSZ`(r4)
673 $UMULL r7,r5,r6
674 $UMULH r8,r5,r6
675 addc r11,r7,r11
676 adde r9,r8,r9
677 addze r10,r10
678
679 addc r11,r7,r11
680 adde r9,r8,r9
681 addze r10,r10
682 #sqr_add_c2(a,7,1,c3,c1,c2);
683 $LD r5,`1*$BNSZ`(r4)
684 $LD r6,`7*$BNSZ`(r4)
685 $UMULL r7,r5,r6
686 $UMULH r8,r5,r6
687 addc r11,r7,r11
688 adde r9,r8,r9
689 addze r10,r10
690 addc r11,r7,r11
691 adde r9,r8,r9
692 addze r10,r10
693 $ST r11,`8*$BNSZ`(r3) #r[8]=c3;
694 #sqr_add_c2(a,7,2,c1,c2,c3);
695 $LD r5,`2*$BNSZ`(r4)
696 $UMULL r7,r5,r6
697 $UMULH r8,r5,r6
698
699 addc r9,r7,r9
700 adde r10,r8,r10
701 addze r11,r0
702 addc r9,r7,r9
703 adde r10,r8,r10
704 addze r11,r11
705 #sqr_add_c2(a,6,3,c1,c2,c3);
706 $LD r5,`3*$BNSZ`(r4)
707 $LD r6,`6*$BNSZ`(r4)
708 $UMULL r7,r5,r6
709 $UMULH r8,r5,r6
710 addc r9,r7,r9
711 adde r10,r8,r10
712 addze r11,r11
713 addc r9,r7,r9
714 adde r10,r8,r10
715 addze r11,r11
716 #sqr_add_c2(a,5,4,c1,c2,c3);
717 $LD r5,`4*$BNSZ`(r4)
718 $LD r6,`5*$BNSZ`(r4)
719 $UMULL r7,r5,r6
720 $UMULH r8,r5,r6
721 addc r9,r7,r9
722 adde r10,r8,r10
723 addze r11,r11
724 addc r9,r7,r9
725 adde r10,r8,r10
726 addze r11,r11
727 $ST r9,`9*$BNSZ`(r3) #r[9]=c1;
728 #sqr_add_c(a,5,c2,c3,c1);
729 $UMULL r7,r6,r6
730 $UMULH r8,r6,r6
731 addc r10,r7,r10
732 adde r11,r8,r11
733 addze r9,r0
734 #sqr_add_c2(a,6,4,c2,c3,c1);
735 $LD r6,`6*$BNSZ`(r4)
736 $UMULL r7,r5,r6
737 $UMULH r8,r5,r6
738 addc r10,r7,r10
739 adde r11,r8,r11
740 addze r9,r9
741 addc r10,r7,r10
742 adde r11,r8,r11
743 addze r9,r9
744 #sqr_add_c2(a,7,3,c2,c3,c1);
745 $LD r5,`3*$BNSZ`(r4)
746 $LD r6,`7*$BNSZ`(r4)
747 $UMULL r7,r5,r6
748 $UMULH r8,r5,r6
749 addc r10,r7,r10
750 adde r11,r8,r11
751 addze r9,r9
752 addc r10,r7,r10
753 adde r11,r8,r11
754 addze r9,r9
755 $ST r10,`10*$BNSZ`(r3) #r[10]=c2;
756 #sqr_add_c2(a,7,4,c3,c1,c2);
757 $LD r5,`4*$BNSZ`(r4)
758 $UMULL r7,r5,r6
759 $UMULH r8,r5,r6
760 addc r11,r7,r11
761 adde r9,r8,r9
762 addze r10,r0
763 addc r11,r7,r11
764 adde r9,r8,r9
765 addze r10,r10
766 #sqr_add_c2(a,6,5,c3,c1,c2);
767 $LD r5,`5*$BNSZ`(r4)
768 $LD r6,`6*$BNSZ`(r4)
769 $UMULL r7,r5,r6
770 $UMULH r8,r5,r6
771 addc r11,r7,r11
772 adde r9,r8,r9
773 addze r10,r10
774 addc r11,r7,r11
775 adde r9,r8,r9
776 addze r10,r10
777 $ST r11,`11*$BNSZ`(r3) #r[11]=c3;
778 #sqr_add_c(a,6,c1,c2,c3);
779 $UMULL r7,r6,r6
780 $UMULH r8,r6,r6
781 addc r9,r7,r9
782 adde r10,r8,r10
783 addze r11,r0
784 #sqr_add_c2(a,7,5,c1,c2,c3)
785 $LD r6,`7*$BNSZ`(r4)
786 $UMULL r7,r5,r6
787 $UMULH r8,r5,r6
788 addc r9,r7,r9
789 adde r10,r8,r10
790 addze r11,r11
791 addc r9,r7,r9
792 adde r10,r8,r10
793 addze r11,r11
794 $ST r9,`12*$BNSZ`(r3) #r[12]=c1;
795
796 #sqr_add_c2(a,7,6,c2,c3,c1)
797 $LD r5,`6*$BNSZ`(r4)
798 $UMULL r7,r5,r6
799 $UMULH r8,r5,r6
800 addc r10,r7,r10
801 adde r11,r8,r11
802 addze r9,r0
803 addc r10,r7,r10
804 adde r11,r8,r11
805 addze r9,r9
806 $ST r10,`13*$BNSZ`(r3) #r[13]=c2;
807 #sqr_add_c(a,7,c3,c1,c2);
808 $UMULL r7,r6,r6
809 $UMULH r8,r6,r6
810 addc r11,r7,r11
811 adde r9,r8,r9
812 $ST r11,`14*$BNSZ`(r3) #r[14]=c3;
813 $ST r9, `15*$BNSZ`(r3) #r[15]=c1;
814
815
816 blr
817
818 .long 0x00000000
819
820#
821# NOTE: The following label name should be changed to
822# "bn_mul_comba4" i.e. remove the first dot
823# for the gcc compiler. This should be automatically
824# done in the build
825#
826
827.align 4
828.bn_mul_comba4:
829#
830# This is an optimized version of the bn_mul_comba4 routine.
831#
832# void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
833# r3 contains r
834# r4 contains a
835# r5 contains b
836# r6, r7 are the 2 BN_ULONGs being multiplied.
837# r8, r9 are the results of the 32x32 giving 64 multiply.
838# r10, r11, r12 are the equivalents of c1, c2, and c3.
839#
840 xor r0,r0,r0 #r0=0. Used in addze below.
841 #mul_add_c(a[0],b[0],c1,c2,c3);
842 $LD r6,`0*$BNSZ`(r4)
843 $LD r7,`0*$BNSZ`(r5)
844 $UMULL r10,r6,r7
845 $UMULH r11,r6,r7
846 $ST r10,`0*$BNSZ`(r3) #r[0]=c1
847 #mul_add_c(a[0],b[1],c2,c3,c1);
848 $LD r7,`1*$BNSZ`(r5)
849 $UMULL r8,r6,r7
850 $UMULH r9,r6,r7
851 addc r11,r8,r11
852 adde r12,r9,r0
853 addze r10,r0
854 #mul_add_c(a[1],b[0],c2,c3,c1);
855 $LD r6, `1*$BNSZ`(r4)
856 $LD r7, `0*$BNSZ`(r5)
857 $UMULL r8,r6,r7
858 $UMULH r9,r6,r7
859 addc r11,r8,r11
860 adde r12,r9,r12
861 addze r10,r10
862 $ST r11,`1*$BNSZ`(r3) #r[1]=c2
863 #mul_add_c(a[2],b[0],c3,c1,c2);
864 $LD r6,`2*$BNSZ`(r4)
865 $UMULL r8,r6,r7
866 $UMULH r9,r6,r7
867 addc r12,r8,r12
868 adde r10,r9,r10
869 addze r11,r0
870 #mul_add_c(a[1],b[1],c3,c1,c2);
871 $LD r6,`1*$BNSZ`(r4)
872 $LD r7,`1*$BNSZ`(r5)
873 $UMULL r8,r6,r7
874 $UMULH r9,r6,r7
875 addc r12,r8,r12
876 adde r10,r9,r10
877 addze r11,r11
878 #mul_add_c(a[0],b[2],c3,c1,c2);
879 $LD r6,`0*$BNSZ`(r4)
880 $LD r7,`2*$BNSZ`(r5)
881 $UMULL r8,r6,r7
882 $UMULH r9,r6,r7
883 addc r12,r8,r12
884 adde r10,r9,r10
885 addze r11,r11
886 $ST r12,`2*$BNSZ`(r3) #r[2]=c3
887 #mul_add_c(a[0],b[3],c1,c2,c3);
888 $LD r7,`3*$BNSZ`(r5)
889 $UMULL r8,r6,r7
890 $UMULH r9,r6,r7
891 addc r10,r8,r10
892 adde r11,r9,r11
893 addze r12,r0
894 #mul_add_c(a[1],b[2],c1,c2,c3);
895 $LD r6,`1*$BNSZ`(r4)
896 $LD r7,`2*$BNSZ`(r5)
897 $UMULL r8,r6,r7
898 $UMULH r9,r6,r7
899 addc r10,r8,r10
900 adde r11,r9,r11
901 addze r12,r12
902 #mul_add_c(a[2],b[1],c1,c2,c3);
903 $LD r6,`2*$BNSZ`(r4)
904 $LD r7,`1*$BNSZ`(r5)
905 $UMULL r8,r6,r7
906 $UMULH r9,r6,r7
907 addc r10,r8,r10
908 adde r11,r9,r11
909 addze r12,r12
910 #mul_add_c(a[3],b[0],c1,c2,c3);
911 $LD r6,`3*$BNSZ`(r4)
912 $LD r7,`0*$BNSZ`(r5)
913 $UMULL r8,r6,r7
914 $UMULH r9,r6,r7
915 addc r10,r8,r10
916 adde r11,r9,r11
917 addze r12,r12
918 $ST r10,`3*$BNSZ`(r3) #r[3]=c1
919 #mul_add_c(a[3],b[1],c2,c3,c1);
920 $LD r7,`1*$BNSZ`(r5)
921 $UMULL r8,r6,r7
922 $UMULH r9,r6,r7
923 addc r11,r8,r11
924 adde r12,r9,r12
925 addze r10,r0
926 #mul_add_c(a[2],b[2],c2,c3,c1);
927 $LD r6,`2*$BNSZ`(r4)
928 $LD r7,`2*$BNSZ`(r5)
929 $UMULL r8,r6,r7
930 $UMULH r9,r6,r7
931 addc r11,r8,r11
932 adde r12,r9,r12
933 addze r10,r10
934 #mul_add_c(a[1],b[3],c2,c3,c1);
935 $LD r6,`1*$BNSZ`(r4)
936 $LD r7,`3*$BNSZ`(r5)
937 $UMULL r8,r6,r7
938 $UMULH r9,r6,r7
939 addc r11,r8,r11
940 adde r12,r9,r12
941 addze r10,r10
942 $ST r11,`4*$BNSZ`(r3) #r[4]=c2
943 #mul_add_c(a[2],b[3],c3,c1,c2);
944 $LD r6,`2*$BNSZ`(r4)
945 $UMULL r8,r6,r7
946 $UMULH r9,r6,r7
947 addc r12,r8,r12
948 adde r10,r9,r10
949 addze r11,r0
950 #mul_add_c(a[3],b[2],c3,c1,c2);
951 $LD r6,`3*$BNSZ`(r4)
952 $LD r7,`2*$BNSZ`(r5)
953 $UMULL r8,r6,r7
954 $UMULH r9,r6,r7
955 addc r12,r8,r12
956 adde r10,r9,r10
957 addze r11,r11
958 $ST r12,`5*$BNSZ`(r3) #r[5]=c3
959 #mul_add_c(a[3],b[3],c1,c2,c3);
960 $LD r7,`3*$BNSZ`(r5)
961 $UMULL r8,r6,r7
962 $UMULH r9,r6,r7
963 addc r10,r8,r10
964 adde r11,r9,r11
965
966 $ST r10,`6*$BNSZ`(r3) #r[6]=c1
967 $ST r11,`7*$BNSZ`(r3) #r[7]=c2
968 blr
969 .long 0x00000000
970
971#
972# NOTE: The following label name should be changed to
973# "bn_mul_comba8" i.e. remove the first dot
974# for the gcc compiler. This should be automatically
975# done in the build
976#
977
978.align 4
979.bn_mul_comba8:
980#
981# Optimized version of the bn_mul_comba8 routine.
982#
983# void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
984# r3 contains r
985# r4 contains a
986# r5 contains b
987# r6, r7 are the 2 BN_ULONGs being multiplied.
988# r8, r9 are the results of the 32x32 giving 64 multiply.
989# r10, r11, r12 are the equivalents of c1, c2, and c3.
990#
991 xor r0,r0,r0 #r0=0. Used in addze below.
992
993 #mul_add_c(a[0],b[0],c1,c2,c3);
994 $LD r6,`0*$BNSZ`(r4) #a[0]
995 $LD r7,`0*$BNSZ`(r5) #b[0]
996 $UMULL r10,r6,r7
997 $UMULH r11,r6,r7
998 $ST r10,`0*$BNSZ`(r3) #r[0]=c1;
999 #mul_add_c(a[0],b[1],c2,c3,c1);
1000 $LD r7,`1*$BNSZ`(r5)
1001 $UMULL r8,r6,r7
1002 $UMULH r9,r6,r7
1003 addc r11,r11,r8
1004 addze r12,r9 # since we didnt set r12 to zero before.
1005 addze r10,r0
1006 #mul_add_c(a[1],b[0],c2,c3,c1);
1007 $LD r6,`1*$BNSZ`(r4)
1008 $LD r7,`0*$BNSZ`(r5)
1009 $UMULL r8,r6,r7
1010 $UMULH r9,r6,r7
1011 addc r11,r11,r8
1012 adde r12,r12,r9
1013 addze r10,r10
1014 $ST r11,`1*$BNSZ`(r3) #r[1]=c2;
1015 #mul_add_c(a[2],b[0],c3,c1,c2);
1016 $LD r6,`2*$BNSZ`(r4)
1017 $UMULL r8,r6,r7
1018 $UMULH r9,r6,r7
1019 addc r12,r12,r8
1020 adde r10,r10,r9
1021 addze r11,r0
1022 #mul_add_c(a[1],b[1],c3,c1,c2);
1023 $LD r6,`1*$BNSZ`(r4)
1024 $LD r7,`1*$BNSZ`(r5)
1025 $UMULL r8,r6,r7
1026 $UMULH r9,r6,r7
1027 addc r12,r12,r8
1028 adde r10,r10,r9
1029 addze r11,r11
1030 #mul_add_c(a[0],b[2],c3,c1,c2);
1031 $LD r6,`0*$BNSZ`(r4)
1032 $LD r7,`2*$BNSZ`(r5)
1033 $UMULL r8,r6,r7
1034 $UMULH r9,r6,r7
1035 addc r12,r12,r8
1036 adde r10,r10,r9
1037 addze r11,r11
1038 $ST r12,`2*$BNSZ`(r3) #r[2]=c3;
1039 #mul_add_c(a[0],b[3],c1,c2,c3);
1040 $LD r7,`3*$BNSZ`(r5)
1041 $UMULL r8,r6,r7
1042 $UMULH r9,r6,r7
1043 addc r10,r10,r8
1044 adde r11,r11,r9
1045 addze r12,r0
1046 #mul_add_c(a[1],b[2],c1,c2,c3);
1047 $LD r6,`1*$BNSZ`(r4)
1048 $LD r7,`2*$BNSZ`(r5)
1049 $UMULL r8,r6,r7
1050 $UMULH r9,r6,r7
1051 addc r10,r10,r8
1052 adde r11,r11,r9
1053 addze r12,r12
1054
1055 #mul_add_c(a[2],b[1],c1,c2,c3);
1056 $LD r6,`2*$BNSZ`(r4)
1057 $LD r7,`1*$BNSZ`(r5)
1058 $UMULL r8,r6,r7
1059 $UMULH r9,r6,r7
1060 addc r10,r10,r8
1061 adde r11,r11,r9
1062 addze r12,r12
1063 #mul_add_c(a[3],b[0],c1,c2,c3);
1064 $LD r6,`3*$BNSZ`(r4)
1065 $LD r7,`0*$BNSZ`(r5)
1066 $UMULL r8,r6,r7
1067 $UMULH r9,r6,r7
1068 addc r10,r10,r8
1069 adde r11,r11,r9
1070 addze r12,r12
1071 $ST r10,`3*$BNSZ`(r3) #r[3]=c1;
1072 #mul_add_c(a[4],b[0],c2,c3,c1);
1073 $LD r6,`4*$BNSZ`(r4)
1074 $UMULL r8,r6,r7
1075 $UMULH r9,r6,r7
1076 addc r11,r11,r8
1077 adde r12,r12,r9
1078 addze r10,r0
1079 #mul_add_c(a[3],b[1],c2,c3,c1);
1080 $LD r6,`3*$BNSZ`(r4)
1081 $LD r7,`1*$BNSZ`(r5)
1082 $UMULL r8,r6,r7
1083 $UMULH r9,r6,r7
1084 addc r11,r11,r8
1085 adde r12,r12,r9
1086 addze r10,r10
1087 #mul_add_c(a[2],b[2],c2,c3,c1);
1088 $LD r6,`2*$BNSZ`(r4)
1089 $LD r7,`2*$BNSZ`(r5)
1090 $UMULL r8,r6,r7
1091 $UMULH r9,r6,r7
1092 addc r11,r11,r8
1093 adde r12,r12,r9
1094 addze r10,r10
1095 #mul_add_c(a[1],b[3],c2,c3,c1);
1096 $LD r6,`1*$BNSZ`(r4)
1097 $LD r7,`3*$BNSZ`(r5)
1098 $UMULL r8,r6,r7
1099 $UMULH r9,r6,r7
1100 addc r11,r11,r8
1101 adde r12,r12,r9
1102 addze r10,r10
1103 #mul_add_c(a[0],b[4],c2,c3,c1);
1104 $LD r6,`0*$BNSZ`(r4)
1105 $LD r7,`4*$BNSZ`(r5)
1106 $UMULL r8,r6,r7
1107 $UMULH r9,r6,r7
1108 addc r11,r11,r8
1109 adde r12,r12,r9
1110 addze r10,r10
1111 $ST r11,`4*$BNSZ`(r3) #r[4]=c2;
1112 #mul_add_c(a[0],b[5],c3,c1,c2);
1113 $LD r7,`5*$BNSZ`(r5)
1114 $UMULL r8,r6,r7
1115 $UMULH r9,r6,r7
1116 addc r12,r12,r8
1117 adde r10,r10,r9
1118 addze r11,r0
1119 #mul_add_c(a[1],b[4],c3,c1,c2);
1120 $LD r6,`1*$BNSZ`(r4)
1121 $LD r7,`4*$BNSZ`(r5)
1122 $UMULL r8,r6,r7
1123 $UMULH r9,r6,r7
1124 addc r12,r12,r8
1125 adde r10,r10,r9
1126 addze r11,r11
1127 #mul_add_c(a[2],b[3],c3,c1,c2);
1128 $LD r6,`2*$BNSZ`(r4)
1129 $LD r7,`3*$BNSZ`(r5)
1130 $UMULL r8,r6,r7
1131 $UMULH r9,r6,r7
1132 addc r12,r12,r8
1133 adde r10,r10,r9
1134 addze r11,r11
1135 #mul_add_c(a[3],b[2],c3,c1,c2);
1136 $LD r6,`3*$BNSZ`(r4)
1137 $LD r7,`2*$BNSZ`(r5)
1138 $UMULL r8,r6,r7
1139 $UMULH r9,r6,r7
1140 addc r12,r12,r8
1141 adde r10,r10,r9
1142 addze r11,r11
1143 #mul_add_c(a[4],b[1],c3,c1,c2);
1144 $LD r6,`4*$BNSZ`(r4)
1145 $LD r7,`1*$BNSZ`(r5)
1146 $UMULL r8,r6,r7
1147 $UMULH r9,r6,r7
1148 addc r12,r12,r8
1149 adde r10,r10,r9
1150 addze r11,r11
1151 #mul_add_c(a[5],b[0],c3,c1,c2);
1152 $LD r6,`5*$BNSZ`(r4)
1153 $LD r7,`0*$BNSZ`(r5)
1154 $UMULL r8,r6,r7
1155 $UMULH r9,r6,r7
1156 addc r12,r12,r8
1157 adde r10,r10,r9
1158 addze r11,r11
1159 $ST r12,`5*$BNSZ`(r3) #r[5]=c3;
1160 #mul_add_c(a[6],b[0],c1,c2,c3);
1161 $LD r6,`6*$BNSZ`(r4)
1162 $UMULL r8,r6,r7
1163 $UMULH r9,r6,r7
1164 addc r10,r10,r8
1165 adde r11,r11,r9
1166 addze r12,r0
1167 #mul_add_c(a[5],b[1],c1,c2,c3);
1168 $LD r6,`5*$BNSZ`(r4)
1169 $LD r7,`1*$BNSZ`(r5)
1170 $UMULL r8,r6,r7
1171 $UMULH r9,r6,r7
1172 addc r10,r10,r8
1173 adde r11,r11,r9
1174 addze r12,r12
1175 #mul_add_c(a[4],b[2],c1,c2,c3);
1176 $LD r6,`4*$BNSZ`(r4)
1177 $LD r7,`2*$BNSZ`(r5)
1178 $UMULL r8,r6,r7
1179 $UMULH r9,r6,r7
1180 addc r10,r10,r8
1181 adde r11,r11,r9
1182 addze r12,r12
1183 #mul_add_c(a[3],b[3],c1,c2,c3);
1184 $LD r6,`3*$BNSZ`(r4)
1185 $LD r7,`3*$BNSZ`(r5)
1186 $UMULL r8,r6,r7
1187 $UMULH r9,r6,r7
1188 addc r10,r10,r8
1189 adde r11,r11,r9
1190 addze r12,r12
1191 #mul_add_c(a[2],b[4],c1,c2,c3);
1192 $LD r6,`2*$BNSZ`(r4)
1193 $LD r7,`4*$BNSZ`(r5)
1194 $UMULL r8,r6,r7
1195 $UMULH r9,r6,r7
1196 addc r10,r10,r8
1197 adde r11,r11,r9
1198 addze r12,r12
1199 #mul_add_c(a[1],b[5],c1,c2,c3);
1200 $LD r6,`1*$BNSZ`(r4)
1201 $LD r7,`5*$BNSZ`(r5)
1202 $UMULL r8,r6,r7
1203 $UMULH r9,r6,r7
1204 addc r10,r10,r8
1205 adde r11,r11,r9
1206 addze r12,r12
1207 #mul_add_c(a[0],b[6],c1,c2,c3);
1208 $LD r6,`0*$BNSZ`(r4)
1209 $LD r7,`6*$BNSZ`(r5)
1210 $UMULL r8,r6,r7
1211 $UMULH r9,r6,r7
1212 addc r10,r10,r8
1213 adde r11,r11,r9
1214 addze r12,r12
1215 $ST r10,`6*$BNSZ`(r3) #r[6]=c1;
1216 #mul_add_c(a[0],b[7],c2,c3,c1);
1217 $LD r7,`7*$BNSZ`(r5)
1218 $UMULL r8,r6,r7
1219 $UMULH r9,r6,r7
1220 addc r11,r11,r8
1221 adde r12,r12,r9
1222 addze r10,r0
1223 #mul_add_c(a[1],b[6],c2,c3,c1);
1224 $LD r6,`1*$BNSZ`(r4)
1225 $LD r7,`6*$BNSZ`(r5)
1226 $UMULL r8,r6,r7
1227 $UMULH r9,r6,r7
1228 addc r11,r11,r8
1229 adde r12,r12,r9
1230 addze r10,r10
1231 #mul_add_c(a[2],b[5],c2,c3,c1);
1232 $LD r6,`2*$BNSZ`(r4)
1233 $LD r7,`5*$BNSZ`(r5)
1234 $UMULL r8,r6,r7
1235 $UMULH r9,r6,r7
1236 addc r11,r11,r8
1237 adde r12,r12,r9
1238 addze r10,r10
1239 #mul_add_c(a[3],b[4],c2,c3,c1);
1240 $LD r6,`3*$BNSZ`(r4)
1241 $LD r7,`4*$BNSZ`(r5)
1242 $UMULL r8,r6,r7
1243 $UMULH r9,r6,r7
1244 addc r11,r11,r8
1245 adde r12,r12,r9
1246 addze r10,r10
1247 #mul_add_c(a[4],b[3],c2,c3,c1);
1248 $LD r6,`4*$BNSZ`(r4)
1249 $LD r7,`3*$BNSZ`(r5)
1250 $UMULL r8,r6,r7
1251 $UMULH r9,r6,r7
1252 addc r11,r11,r8
1253 adde r12,r12,r9
1254 addze r10,r10
1255 #mul_add_c(a[5],b[2],c2,c3,c1);
1256 $LD r6,`5*$BNSZ`(r4)
1257 $LD r7,`2*$BNSZ`(r5)
1258 $UMULL r8,r6,r7
1259 $UMULH r9,r6,r7
1260 addc r11,r11,r8
1261 adde r12,r12,r9
1262 addze r10,r10
1263 #mul_add_c(a[6],b[1],c2,c3,c1);
1264 $LD r6,`6*$BNSZ`(r4)
1265 $LD r7,`1*$BNSZ`(r5)
1266 $UMULL r8,r6,r7
1267 $UMULH r9,r6,r7
1268 addc r11,r11,r8
1269 adde r12,r12,r9
1270 addze r10,r10
1271 #mul_add_c(a[7],b[0],c2,c3,c1);
1272 $LD r6,`7*$BNSZ`(r4)
1273 $LD r7,`0*$BNSZ`(r5)
1274 $UMULL r8,r6,r7
1275 $UMULH r9,r6,r7
1276 addc r11,r11,r8
1277 adde r12,r12,r9
1278 addze r10,r10
1279 $ST r11,`7*$BNSZ`(r3) #r[7]=c2;
1280 #mul_add_c(a[7],b[1],c3,c1,c2);
1281 $LD r7,`1*$BNSZ`(r5)
1282 $UMULL r8,r6,r7
1283 $UMULH r9,r6,r7
1284 addc r12,r12,r8
1285 adde r10,r10,r9
1286 addze r11,r0
1287 #mul_add_c(a[6],b[2],c3,c1,c2);
1288 $LD r6,`6*$BNSZ`(r4)
1289 $LD r7,`2*$BNSZ`(r5)
1290 $UMULL r8,r6,r7
1291 $UMULH r9,r6,r7
1292 addc r12,r12,r8
1293 adde r10,r10,r9
1294 addze r11,r11
1295 #mul_add_c(a[5],b[3],c3,c1,c2);
1296 $LD r6,`5*$BNSZ`(r4)
1297 $LD r7,`3*$BNSZ`(r5)
1298 $UMULL r8,r6,r7
1299 $UMULH r9,r6,r7
1300 addc r12,r12,r8
1301 adde r10,r10,r9
1302 addze r11,r11
1303 #mul_add_c(a[4],b[4],c3,c1,c2);
1304 $LD r6,`4*$BNSZ`(r4)
1305 $LD r7,`4*$BNSZ`(r5)
1306 $UMULL r8,r6,r7
1307 $UMULH r9,r6,r7
1308 addc r12,r12,r8
1309 adde r10,r10,r9
1310 addze r11,r11
1311 #mul_add_c(a[3],b[5],c3,c1,c2);
1312 $LD r6,`3*$BNSZ`(r4)
1313 $LD r7,`5*$BNSZ`(r5)
1314 $UMULL r8,r6,r7
1315 $UMULH r9,r6,r7
1316 addc r12,r12,r8
1317 adde r10,r10,r9
1318 addze r11,r11
1319 #mul_add_c(a[2],b[6],c3,c1,c2);
1320 $LD r6,`2*$BNSZ`(r4)
1321 $LD r7,`6*$BNSZ`(r5)
1322 $UMULL r8,r6,r7
1323 $UMULH r9,r6,r7
1324 addc r12,r12,r8
1325 adde r10,r10,r9
1326 addze r11,r11
1327 #mul_add_c(a[1],b[7],c3,c1,c2);
1328 $LD r6,`1*$BNSZ`(r4)
1329 $LD r7,`7*$BNSZ`(r5)
1330 $UMULL r8,r6,r7
1331 $UMULH r9,r6,r7
1332 addc r12,r12,r8
1333 adde r10,r10,r9
1334 addze r11,r11
1335 $ST r12,`8*$BNSZ`(r3) #r[8]=c3;
1336 #mul_add_c(a[2],b[7],c1,c2,c3);
1337 $LD r6,`2*$BNSZ`(r4)
1338 $UMULL r8,r6,r7
1339 $UMULH r9,r6,r7
1340 addc r10,r10,r8
1341 adde r11,r11,r9
1342 addze r12,r0
1343 #mul_add_c(a[3],b[6],c1,c2,c3);
1344 $LD r6,`3*$BNSZ`(r4)
1345 $LD r7,`6*$BNSZ`(r5)
1346 $UMULL r8,r6,r7
1347 $UMULH r9,r6,r7
1348 addc r10,r10,r8
1349 adde r11,r11,r9
1350 addze r12,r12
1351 #mul_add_c(a[4],b[5],c1,c2,c3);
1352 $LD r6,`4*$BNSZ`(r4)
1353 $LD r7,`5*$BNSZ`(r5)
1354 $UMULL r8,r6,r7
1355 $UMULH r9,r6,r7
1356 addc r10,r10,r8
1357 adde r11,r11,r9
1358 addze r12,r12
1359 #mul_add_c(a[5],b[4],c1,c2,c3);
1360 $LD r6,`5*$BNSZ`(r4)
1361 $LD r7,`4*$BNSZ`(r5)
1362 $UMULL r8,r6,r7
1363 $UMULH r9,r6,r7
1364 addc r10,r10,r8
1365 adde r11,r11,r9
1366 addze r12,r12
1367 #mul_add_c(a[6],b[3],c1,c2,c3);
1368 $LD r6,`6*$BNSZ`(r4)
1369 $LD r7,`3*$BNSZ`(r5)
1370 $UMULL r8,r6,r7
1371 $UMULH r9,r6,r7
1372 addc r10,r10,r8
1373 adde r11,r11,r9
1374 addze r12,r12
1375 #mul_add_c(a[7],b[2],c1,c2,c3);
1376 $LD r6,`7*$BNSZ`(r4)
1377 $LD r7,`2*$BNSZ`(r5)
1378 $UMULL r8,r6,r7
1379 $UMULH r9,r6,r7
1380 addc r10,r10,r8
1381 adde r11,r11,r9
1382 addze r12,r12
1383 $ST r10,`9*$BNSZ`(r3) #r[9]=c1;
1384 #mul_add_c(a[7],b[3],c2,c3,c1);
1385 $LD r7,`3*$BNSZ`(r5)
1386 $UMULL r8,r6,r7
1387 $UMULH r9,r6,r7
1388 addc r11,r11,r8
1389 adde r12,r12,r9
1390 addze r10,r0
1391 #mul_add_c(a[6],b[4],c2,c3,c1);
1392 $LD r6,`6*$BNSZ`(r4)
1393 $LD r7,`4*$BNSZ`(r5)
1394 $UMULL r8,r6,r7
1395 $UMULH r9,r6,r7
1396 addc r11,r11,r8
1397 adde r12,r12,r9
1398 addze r10,r10
1399 #mul_add_c(a[5],b[5],c2,c3,c1);
1400 $LD r6,`5*$BNSZ`(r4)
1401 $LD r7,`5*$BNSZ`(r5)
1402 $UMULL r8,r6,r7
1403 $UMULH r9,r6,r7
1404 addc r11,r11,r8
1405 adde r12,r12,r9
1406 addze r10,r10
1407 #mul_add_c(a[4],b[6],c2,c3,c1);
1408 $LD r6,`4*$BNSZ`(r4)
1409 $LD r7,`6*$BNSZ`(r5)
1410 $UMULL r8,r6,r7
1411 $UMULH r9,r6,r7
1412 addc r11,r11,r8
1413 adde r12,r12,r9
1414 addze r10,r10
1415 #mul_add_c(a[3],b[7],c2,c3,c1);
1416 $LD r6,`3*$BNSZ`(r4)
1417 $LD r7,`7*$BNSZ`(r5)
1418 $UMULL r8,r6,r7
1419 $UMULH r9,r6,r7
1420 addc r11,r11,r8
1421 adde r12,r12,r9
1422 addze r10,r10
1423 $ST r11,`10*$BNSZ`(r3) #r[10]=c2;
1424 #mul_add_c(a[4],b[7],c3,c1,c2);
1425 $LD r6,`4*$BNSZ`(r4)
1426 $UMULL r8,r6,r7
1427 $UMULH r9,r6,r7
1428 addc r12,r12,r8
1429 adde r10,r10,r9
1430 addze r11,r0
1431 #mul_add_c(a[5],b[6],c3,c1,c2);
1432 $LD r6,`5*$BNSZ`(r4)
1433 $LD r7,`6*$BNSZ`(r5)
1434 $UMULL r8,r6,r7
1435 $UMULH r9,r6,r7
1436 addc r12,r12,r8
1437 adde r10,r10,r9
1438 addze r11,r11
1439 #mul_add_c(a[6],b[5],c3,c1,c2);
1440 $LD r6,`6*$BNSZ`(r4)
1441 $LD r7,`5*$BNSZ`(r5)
1442 $UMULL r8,r6,r7
1443 $UMULH r9,r6,r7
1444 addc r12,r12,r8
1445 adde r10,r10,r9
1446 addze r11,r11
1447 #mul_add_c(a[7],b[4],c3,c1,c2);
1448 $LD r6,`7*$BNSZ`(r4)
1449 $LD r7,`4*$BNSZ`(r5)
1450 $UMULL r8,r6,r7
1451 $UMULH r9,r6,r7
1452 addc r12,r12,r8
1453 adde r10,r10,r9
1454 addze r11,r11
1455 $ST r12,`11*$BNSZ`(r3) #r[11]=c3;
1456 #mul_add_c(a[7],b[5],c1,c2,c3);
1457 $LD r7,`5*$BNSZ`(r5)
1458 $UMULL r8,r6,r7
1459 $UMULH r9,r6,r7
1460 addc r10,r10,r8
1461 adde r11,r11,r9
1462 addze r12,r0
1463 #mul_add_c(a[6],b[6],c1,c2,c3);
1464 $LD r6,`6*$BNSZ`(r4)
1465 $LD r7,`6*$BNSZ`(r5)
1466 $UMULL r8,r6,r7
1467 $UMULH r9,r6,r7
1468 addc r10,r10,r8
1469 adde r11,r11,r9
1470 addze r12,r12
1471 #mul_add_c(a[5],b[7],c1,c2,c3);
1472 $LD r6,`5*$BNSZ`(r4)
1473 $LD r7,`7*$BNSZ`(r5)
1474 $UMULL r8,r6,r7
1475 $UMULH r9,r6,r7
1476 addc r10,r10,r8
1477 adde r11,r11,r9
1478 addze r12,r12
1479 $ST r10,`12*$BNSZ`(r3) #r[12]=c1;
1480 #mul_add_c(a[6],b[7],c2,c3,c1);
1481 $LD r6,`6*$BNSZ`(r4)
1482 $UMULL r8,r6,r7
1483 $UMULH r9,r6,r7
1484 addc r11,r11,r8
1485 adde r12,r12,r9
1486 addze r10,r0
1487 #mul_add_c(a[7],b[6],c2,c3,c1);
1488 $LD r6,`7*$BNSZ`(r4)
1489 $LD r7,`6*$BNSZ`(r5)
1490 $UMULL r8,r6,r7
1491 $UMULH r9,r6,r7
1492 addc r11,r11,r8
1493 adde r12,r12,r9
1494 addze r10,r10
1495 $ST r11,`13*$BNSZ`(r3) #r[13]=c2;
1496 #mul_add_c(a[7],b[7],c3,c1,c2);
1497 $LD r7,`7*$BNSZ`(r5)
1498 $UMULL r8,r6,r7
1499 $UMULH r9,r6,r7
1500 addc r12,r12,r8
1501 adde r10,r10,r9
1502 $ST r12,`14*$BNSZ`(r3) #r[14]=c3;
1503 $ST r10,`15*$BNSZ`(r3) #r[15]=c1;
1504 blr
1505 .long 0x00000000
1506
1507#
1508# NOTE: The following label name should be changed to
1509# "bn_sub_words" i.e. remove the first dot
1510# for the gcc compiler. This should be automatically
1511# done in the build
1512#
1513#
1514.align 4
1515.bn_sub_words:
1516#
1517# Handcoded version of bn_sub_words
1518#
1519#BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1520#
1521# r3 = r
1522# r4 = a
1523# r5 = b
1524# r6 = n
1525#
1526# Note: No loop unrolling done since this is not a performance
1527# critical loop.
1528
1529 xor r0,r0,r0 #set r0 = 0
1530#
1531# check for r6 = 0 AND set carry bit.
1532#
1533 subfc. r7,r0,r6 # If r6 is 0 then result is 0.
1534 # if r6 > 0 then result !=0
1535 # In either case carry bit is set.
1536 beq Lppcasm_sub_adios
1537 addi r4,r4,-$BNSZ
1538 addi r3,r3,-$BNSZ
1539 addi r5,r5,-$BNSZ
1540 mtctr r6
1541Lppcasm_sub_mainloop:
1542 $LDU r7,$BNSZ(r4)
1543 $LDU r8,$BNSZ(r5)
1544 subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8)
1545 # if carry = 1 this is r7-r8. Else it
1546 # is r7-r8 -1 as we need.
1547 $STU r6,$BNSZ(r3)
1548 bdnz- Lppcasm_sub_mainloop
1549Lppcasm_sub_adios:
1550 subfze r3,r0 # if carry bit is set then r3 = 0 else -1
1551 andi. r3,r3,1 # keep only last bit.
1552 blr
1553 .long 0x00000000
1554
1555
1556#
1557# NOTE: The following label name should be changed to
1558# "bn_add_words" i.e. remove the first dot
1559# for the gcc compiler. This should be automatically
1560# done in the build
1561#
1562
1563.align 4
1564.bn_add_words:
1565#
1566# Handcoded version of bn_add_words
1567#
1568#BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1569#
1570# r3 = r
1571# r4 = a
1572# r5 = b
1573# r6 = n
1574#
1575# Note: No loop unrolling done since this is not a performance
1576# critical loop.
1577
1578 xor r0,r0,r0
1579#
1580# check for r6 = 0. Is this needed?
1581#
1582 addic. r6,r6,0 #test r6 and clear carry bit.
1583 beq Lppcasm_add_adios
1584 addi r4,r4,-$BNSZ
1585 addi r3,r3,-$BNSZ
1586 addi r5,r5,-$BNSZ
1587 mtctr r6
1588Lppcasm_add_mainloop:
1589 $LDU r7,$BNSZ(r4)
1590 $LDU r8,$BNSZ(r5)
1591 adde r8,r7,r8
1592 $STU r8,$BNSZ(r3)
1593 bdnz- Lppcasm_add_mainloop
1594Lppcasm_add_adios:
1595 addze r3,r0 #return carry bit.
1596 blr
1597 .long 0x00000000
1598
1599#
1600# NOTE: The following label name should be changed to
1601# "bn_div_words" i.e. remove the first dot
1602# for the gcc compiler. This should be automatically
1603# done in the build
1604#
1605
1606.align 4
1607.bn_div_words:
1608#
1609# This is a cleaned up version of code generated by
1610# the AIX compiler. The only optimization is to use
1611# the PPC instruction to count leading zeros instead
1612# of call to num_bits_word. Since this was compiled
1613# only at level -O2 we can possibly squeeze it more?
1614#
1615# r3 = h
1616# r4 = l
1617# r5 = d
1618
1619 $UCMPI 0,r5,0 # compare r5 and 0
1620 bne Lppcasm_div1 # proceed if d!=0
1621 li r3,-1 # d=0 return -1
1622 blr
1623Lppcasm_div1:
1624 xor r0,r0,r0 #r0=0
1625 li r8,$BITS
1626 $CNTLZ. r7,r5 #r7 = num leading 0s in d.
1627 beq Lppcasm_div2 #proceed if no leading zeros
1628 subf r8,r7,r8 #r8 = BN_num_bits_word(d)
1629 $SHR. r9,r3,r8 #are there any bits above r8'th?
1630 $TR 16,r9,r0 #if there're, signal to dump core...
1631Lppcasm_div2:
1632 $UCMP 0,r3,r5 #h>=d?
1633 blt Lppcasm_div3 #goto Lppcasm_div3 if not
1634 subf r3,r5,r3 #h-=d ;
1635Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
1636 cmpi 0,0,r7,0 # is (i == 0)?
1637 beq Lppcasm_div4
1638 $SHL r3,r3,r7 # h = (h<< i)
1639 $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
1640 $SHL r5,r5,r7 # d<<=i
1641 or r3,r3,r8 # h = (h<<i)|(l>>(BN_BITS2-i))
1642 $SHL r4,r4,r7 # l <<=i
1643Lppcasm_div4:
1644 $SHRI r9,r5,`$BITS/2` # r9 = dh
1645 # dl will be computed when needed
1646 # as it saves registers.
1647 li r6,2 #r6=2
1648 mtctr r6 #counter will be in count.
1649Lppcasm_divouterloop:
1650 $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4)
1651 $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
1652 # compute here for innerloop.
1653 $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
1654 bne Lppcasm_div5 # goto Lppcasm_div5 if not
1655
1656 li r8,-1
1657 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
1658 b Lppcasm_div6
1659Lppcasm_div5:
1660 $UDIV r8,r3,r9 #q = h/dh
1661Lppcasm_div6:
1662 $UMULL r12,r9,r8 #th = q*dh
1663 $CLRU r10,r5,`$BITS/2` #r10=dl
1664 $UMULL r6,r8,r10 #tl = q*dl
1665
1666Lppcasm_divinnerloop:
1667 subf r10,r12,r3 #t = h -th
1668 $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of...
1669 addic. r7,r7,0 #test if r7 == 0. used below.
1670 # now want to compute
1671 # r7 = (t<<BN_BITS4)|((l&BN_MASK2h)>>BN_BITS4)
1672 # the following 2 instructions do that
1673 $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
1674 or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
1675 $UCMP cr1,r6,r7 # compare (tl <= r7)
1676 bne Lppcasm_divinnerexit
1677 ble cr1,Lppcasm_divinnerexit
1678 addi r8,r8,-1 #q--
1679 subf r12,r9,r12 #th -=dh
1680 $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
1681 subf r6,r10,r6 #tl -=dl
1682 b Lppcasm_divinnerloop
1683Lppcasm_divinnerexit:
1684 $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
1685 $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
1686 $UCMP cr1,r4,r11 # compare l and tl
1687 add r12,r12,r10 # th+=t
1688 bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
1689 addi r12,r12,1 # th++
1690Lppcasm_div7:
1691 subf r11,r11,r4 #r11=l-tl
1692 $UCMP cr1,r3,r12 #compare h and th
1693 bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
1694 addi r8,r8,-1 # q--
1695 add r3,r5,r3 # h+=d
1696Lppcasm_div8:
1697 subf r12,r12,r3 #r12 = h-th
1698 $SHLI r4,r11,`$BITS/2` #l=(l&BN_MASK2l)<<BN_BITS4
1699 # want to compute
1700 # h = ((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2
1701 # the following 2 instructions will do this.
1702 $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
1703 $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
1704 bdz Lppcasm_div9 #if (count==0) break ;
1705 $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
1706 b Lppcasm_divouterloop
1707Lppcasm_div9:
1708 or r3,r8,r0
1709 blr
1710 .long 0x00000000
1711
1712#
1713# NOTE: The following label name should be changed to
1714# "bn_sqr_words" i.e. remove the first dot
1715# for the gcc compiler. This should be automatically
1716# done in the build
1717#
1718.align 4
1719.bn_sqr_words:
1720#
1721# Optimized version of bn_sqr_words
1722#
1723# void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
1724#
1725# r3 = r
1726# r4 = a
1727# r5 = n
1728#
1729# r6 = a[i].
1730# r7,r8 = product.
1731#
1732# No unrolling done here. Not performance critical.
1733
1734 addic. r5,r5,0 #test r5.
1735 beq Lppcasm_sqr_adios
1736 addi r4,r4,-$BNSZ
1737 addi r3,r3,-$BNSZ
1738 mtctr r5
1739Lppcasm_sqr_mainloop:
1740 #sqr(r[0],r[1],a[0]);
1741 $LDU r6,$BNSZ(r4)
1742 $UMULL r7,r6,r6
1743 $UMULH r8,r6,r6
1744 $STU r7,$BNSZ(r3)
1745 $STU r8,$BNSZ(r3)
1746 bdnz- Lppcasm_sqr_mainloop
1747Lppcasm_sqr_adios:
1748 blr
1749 .long 0x00000000
1750
1751
1752#
1753# NOTE: The following label name should be changed to
1754# "bn_mul_words" i.e. remove the first dot
1755# for the gcc compiler. This should be automatically
1756# done in the build
1757#
1758
1759.align 4
1760.bn_mul_words:
1761#
1762# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1763#
1764# r3 = rp
1765# r4 = ap
1766# r5 = num
1767# r6 = w
1768 xor r0,r0,r0
1769 xor r12,r12,r12 # used for carry
1770 rlwinm. r7,r5,30,2,31 # num >> 2
1771 beq Lppcasm_mw_REM
1772 mtctr r7
1773Lppcasm_mw_LOOP:
1774 #mul(rp[0],ap[0],w,c1);
1775 $LD r8,`0*$BNSZ`(r4)
1776 $UMULL r9,r6,r8
1777 $UMULH r10,r6,r8
1778 addc r9,r9,r12
1779 #addze r10,r10 #carry is NOT ignored.
1780 #will be taken care of
1781 #in second spin below
1782 #using adde.
1783 $ST r9,`0*$BNSZ`(r3)
1784 #mul(rp[1],ap[1],w,c1);
1785 $LD r8,`1*$BNSZ`(r4)
1786 $UMULL r11,r6,r8
1787 $UMULH r12,r6,r8
1788 adde r11,r11,r10
1789 #addze r12,r12
1790 $ST r11,`1*$BNSZ`(r3)
1791 #mul(rp[2],ap[2],w,c1);
1792 $LD r8,`2*$BNSZ`(r4)
1793 $UMULL r9,r6,r8
1794 $UMULH r10,r6,r8
1795 adde r9,r9,r12
1796 #addze r10,r10
1797 $ST r9,`2*$BNSZ`(r3)
1798 #mul_add(rp[3],ap[3],w,c1);
1799 $LD r8,`3*$BNSZ`(r4)
1800 $UMULL r11,r6,r8
1801 $UMULH r12,r6,r8
1802 adde r11,r11,r10
1803 addze r12,r12 #this spin we collect carry into
1804 #r12
1805 $ST r11,`3*$BNSZ`(r3)
1806
1807 addi r3,r3,`4*$BNSZ`
1808 addi r4,r4,`4*$BNSZ`
1809 bdnz- Lppcasm_mw_LOOP
1810
1811Lppcasm_mw_REM:
1812 andi. r5,r5,0x3
1813 beq Lppcasm_mw_OVER
1814 #mul(rp[0],ap[0],w,c1);
1815 $LD r8,`0*$BNSZ`(r4)
1816 $UMULL r9,r6,r8
1817 $UMULH r10,r6,r8
1818 addc r9,r9,r12
1819 addze r10,r10
1820 $ST r9,`0*$BNSZ`(r3)
1821 addi r12,r10,0
1822
1823 addi r5,r5,-1
1824 cmpli 0,0,r5,0
1825 beq Lppcasm_mw_OVER
1826
1827
1828 #mul(rp[1],ap[1],w,c1);
1829 $LD r8,`1*$BNSZ`(r4)
1830 $UMULL r9,r6,r8
1831 $UMULH r10,r6,r8
1832 addc r9,r9,r12
1833 addze r10,r10
1834 $ST r9,`1*$BNSZ`(r3)
1835 addi r12,r10,0
1836
1837 addi r5,r5,-1
1838 cmpli 0,0,r5,0
1839 beq Lppcasm_mw_OVER
1840
1841 #mul_add(rp[2],ap[2],w,c1);
1842 $LD r8,`2*$BNSZ`(r4)
1843 $UMULL r9,r6,r8
1844 $UMULH r10,r6,r8
1845 addc r9,r9,r12
1846 addze r10,r10
1847 $ST r9,`2*$BNSZ`(r3)
1848 addi r12,r10,0
1849
1850Lppcasm_mw_OVER:
1851 addi r3,r12,0
1852 blr
1853 .long 0x00000000
1854
1855#
1856# NOTE: The following label name should be changed to
1857# "bn_mul_add_words" i.e. remove the first dot
1858# for the gcc compiler. This should be automatically
1859# done in the build
1860#
1861
1862.align 4
1863.bn_mul_add_words:
1864#
1865# BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
1866#
1867# r3 = rp
1868# r4 = ap
1869# r5 = num
1870# r6 = w
1871#
1872# empirical evidence suggests that unrolled version performs best!!
1873#
1874 xor r0,r0,r0 #r0 = 0
1875 xor r12,r12,r12 #r12 = 0 . used for carry
1876 rlwinm. r7,r5,30,2,31 # num >> 2
1877 beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
1878 mtctr r7
1879Lppcasm_maw_mainloop:
1880 #mul_add(rp[0],ap[0],w,c1);
1881 $LD r8,`0*$BNSZ`(r4)
1882 $LD r11,`0*$BNSZ`(r3)
1883 $UMULL r9,r6,r8
1884 $UMULH r10,r6,r8
1885 addc r9,r9,r12 #r12 is carry.
1886 addze r10,r10
1887 addc r9,r9,r11
1888 #addze r10,r10
1889 #the above instruction addze
1890 #is NOT needed. Carry will NOT
1891 #be ignored. It's not affected
1892 #by multiply and will be collected
1893 #in the next spin
1894 $ST r9,`0*$BNSZ`(r3)
1895
1896 #mul_add(rp[1],ap[1],w,c1);
1897 $LD r8,`1*$BNSZ`(r4)
1898 $LD r9,`1*$BNSZ`(r3)
1899 $UMULL r11,r6,r8
1900 $UMULH r12,r6,r8
1901 adde r11,r11,r10 #r10 is carry.
1902 addze r12,r12
1903 addc r11,r11,r9
1904 #addze r12,r12
1905 $ST r11,`1*$BNSZ`(r3)
1906
1907 #mul_add(rp[2],ap[2],w,c1);
1908 $LD r8,`2*$BNSZ`(r4)
1909 $UMULL r9,r6,r8
1910 $LD r11,`2*$BNSZ`(r3)
1911 $UMULH r10,r6,r8
1912 adde r9,r9,r12
1913 addze r10,r10
1914 addc r9,r9,r11
1915 #addze r10,r10
1916 $ST r9,`2*$BNSZ`(r3)
1917
1918 #mul_add(rp[3],ap[3],w,c1);
1919 $LD r8,`3*$BNSZ`(r4)
1920 $UMULL r11,r6,r8
1921 $LD r9,`3*$BNSZ`(r3)
1922 $UMULH r12,r6,r8
1923 adde r11,r11,r10
1924 addze r12,r12
1925 addc r11,r11,r9
1926 addze r12,r12
1927 $ST r11,`3*$BNSZ`(r3)
1928 addi r3,r3,`4*$BNSZ`
1929 addi r4,r4,`4*$BNSZ`
1930 bdnz- Lppcasm_maw_mainloop
1931
1932Lppcasm_maw_leftover:
1933 andi. r5,r5,0x3
1934 beq Lppcasm_maw_adios
1935 addi r3,r3,-$BNSZ
1936 addi r4,r4,-$BNSZ
1937 #mul_add(rp[0],ap[0],w,c1);
1938 mtctr r5
1939 $LDU r8,$BNSZ(r4)
1940 $UMULL r9,r6,r8
1941 $UMULH r10,r6,r8
1942 $LDU r11,$BNSZ(r3)
1943 addc r9,r9,r11
1944 addze r10,r10
1945 addc r9,r9,r12
1946 addze r12,r10
1947 $ST r9,0(r3)
1948
1949 bdz Lppcasm_maw_adios
1950 #mul_add(rp[1],ap[1],w,c1);
1951 $LDU r8,$BNSZ(r4)
1952 $UMULL r9,r6,r8
1953 $UMULH r10,r6,r8
1954 $LDU r11,$BNSZ(r3)
1955 addc r9,r9,r11
1956 addze r10,r10
1957 addc r9,r9,r12
1958 addze r12,r10
1959 $ST r9,0(r3)
1960
1961 bdz Lppcasm_maw_adios
1962 #mul_add(rp[2],ap[2],w,c1);
1963 $LDU r8,$BNSZ(r4)
1964 $UMULL r9,r6,r8
1965 $UMULH r10,r6,r8
1966 $LDU r11,$BNSZ(r3)
1967 addc r9,r9,r11
1968 addze r10,r10
1969 addc r9,r9,r12
1970 addze r12,r10
1971 $ST r9,0(r3)
1972
1973Lppcasm_maw_adios:
1974 addi r3,r12,0
1975 blr
1976 .long 0x00000000
1977 .align 4
1978EOF
1979$data =~ s/\`([^\`]*)\`/eval $1/gem;
1980print $data;
1981close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/ppc64-mont.pl b/src/lib/libcrypto/bn/asm/ppc64-mont.pl
deleted file mode 100644
index 3449b35855..0000000000
--- a/src/lib/libcrypto/bn/asm/ppc64-mont.pl
+++ /dev/null
@@ -1,918 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# December 2007
11
12# The reason for undertaken effort is basically following. Even though
13# Power 6 CPU operates at incredible 4.7GHz clock frequency, its PKI
14# performance was observed to be less than impressive, essentially as
15# fast as 1.8GHz PPC970, or 2.6 times(!) slower than one would hope.
16# Well, it's not surprising that IBM had to make some sacrifices to
17# boost the clock frequency that much, but no overall improvement?
18# Having observed how much difference did switching to FPU make on
19# UltraSPARC, playing same stunt on Power 6 appeared appropriate...
20# Unfortunately the resulting performance improvement is not as
21# impressive, ~30%, and in absolute terms is still very far from what
22# one would expect from 4.7GHz CPU. There is a chance that I'm doing
23# something wrong, but in the lack of assembler level micro-profiling
24# data or at least decent platform guide I can't tell... Or better
25# results might be achieved with VMX... Anyway, this module provides
26# *worse* performance on other PowerPC implementations, ~40-15% slower
27# on PPC970 depending on key length and ~40% slower on Power 5 for all
28# key lengths. As it's obviously inappropriate as "best all-round"
29# alternative, it has to be complemented with run-time CPU family
30# detection. Oh! It should also be noted that unlike other PowerPC
31# implementation IALU ppc-mont.pl module performs *suboptimaly* on
32# >=1024-bit key lengths on Power 6. It should also be noted that
33# *everything* said so far applies to 64-bit builds! As far as 32-bit
34# application executed on 64-bit CPU goes, this module is likely to
35# become preferred choice, because it's easy to adapt it for such
36# case and *is* faster than 32-bit ppc-mont.pl on *all* processors.
37
38# February 2008
39
40# Micro-profiling assisted optimization results in ~15% improvement
41# over original ppc64-mont.pl version, or overall ~50% improvement
42# over ppc.pl module on Power 6. If compared to ppc-mont.pl on same
43# Power 6 CPU, this module is 5-150% faster depending on key length,
44# [hereafter] more for longer keys. But if compared to ppc-mont.pl
45# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive
46# in absolute terms, but it's apparently the way Power 6 is...
47
48$flavour = shift;
49
50if ($flavour =~ /32/) {
51 $SIZE_T=4;
52 $RZONE= 224;
53 $FRAME= $SIZE_T*12+8*12;
54 $fname= "bn_mul_mont_ppc64";
55
56 $STUX= "stwux"; # store indexed and update
57 $PUSH= "stw";
58 $POP= "lwz";
59 die "not implemented yet";
60} elsif ($flavour =~ /64/) {
61 $SIZE_T=8;
62 $RZONE= 288;
63 $FRAME= $SIZE_T*12+8*12;
64 $fname= "bn_mul_mont";
65
66 # same as above, but 64-bit mnemonics...
67 $STUX= "stdux"; # store indexed and update
68 $PUSH= "std";
69 $POP= "ld";
70} else { die "nonsense $flavour"; }
71
72$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
73( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
74( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
75die "can't locate ppc-xlate.pl";
76
77open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
78
79$FRAME=($FRAME+63)&~63;
80$TRANSFER=16*8;
81
82$carry="r0";
83$sp="r1";
84$toc="r2";
85$rp="r3"; $ovf="r3";
86$ap="r4";
87$bp="r5";
88$np="r6";
89$n0="r7";
90$num="r8";
91$rp="r9"; # $rp is reassigned
92$tp="r10";
93$j="r11";
94$i="r12";
95# non-volatile registers
96$nap_d="r14"; # interleaved ap and np in double format
97$a0="r15"; # ap[0]
98$t0="r16"; # temporary registers
99$t1="r17";
100$t2="r18";
101$t3="r19";
102$t4="r20";
103$t5="r21";
104$t6="r22";
105$t7="r23";
106
107# PPC offers enough register bank capacity to unroll inner loops twice
108#
109# ..A3A2A1A0
110# dcba
111# -----------
112# A0a
113# A0b
114# A0c
115# A0d
116# A1a
117# A1b
118# A1c
119# A1d
120# A2a
121# A2b
122# A2c
123# A2d
124# A3a
125# A3b
126# A3c
127# A3d
128# ..a
129# ..b
130#
131$ba="f0"; $bb="f1"; $bc="f2"; $bd="f3";
132$na="f4"; $nb="f5"; $nc="f6"; $nd="f7";
133$dota="f8"; $dotb="f9";
134$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13";
135$N0="f14"; $N1="f15"; $N2="f16"; $N3="f17";
136$T0a="f18"; $T0b="f19";
137$T1a="f20"; $T1b="f21";
138$T2a="f22"; $T2b="f23";
139$T3a="f24"; $T3b="f25";
140
141# sp----------->+-------------------------------+
142# | saved sp |
143# +-------------------------------+
144# | |
145# +-------------------------------+
146# | 10 saved gpr, r14-r23 |
147# . .
148# . .
149# +12*size_t +-------------------------------+
150# | 12 saved fpr, f14-f25 |
151# . .
152# . .
153# +12*8 +-------------------------------+
154# | padding to 64 byte boundary |
155# . .
156# +X +-------------------------------+
157# | 16 gpr<->fpr transfer zone |
158# . .
159# . .
160# +16*8 +-------------------------------+
161# | __int64 tmp[-1] |
162# +-------------------------------+
163# | __int64 tmp[num] |
164# . .
165# . .
166# . .
167# +(num+1)*8 +-------------------------------+
168# | padding to 64 byte boundary |
169# . .
170# +X +-------------------------------+
171# | double nap_d[4*num] |
172# . .
173# . .
174# . .
175# +-------------------------------+
176
177$code=<<___;
178.machine "any"
179.text
180
181.globl .$fname
182.align 5
183.$fname:
184 cmpwi $num,4
185 mr $rp,r3 ; $rp is reassigned
186 li r3,0 ; possible "not handled" return code
187 bltlr-
188 andi. r0,$num,1 ; $num has to be even
189 bnelr-
190
191 slwi $num,$num,3 ; num*=8
192 li $i,-4096
193 slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num
194 add $tp,$tp,$num ; place for tp[num+1]
195 addi $tp,$tp,`$FRAME+$TRANSFER+8+64+$RZONE`
196 subf $tp,$tp,$sp ; $sp-$tp
197 and $tp,$tp,$i ; minimize TLB usage
198 subf $tp,$sp,$tp ; $tp-$sp
199 $STUX $sp,$sp,$tp ; alloca
200
201 $PUSH r14,`2*$SIZE_T`($sp)
202 $PUSH r15,`3*$SIZE_T`($sp)
203 $PUSH r16,`4*$SIZE_T`($sp)
204 $PUSH r17,`5*$SIZE_T`($sp)
205 $PUSH r18,`6*$SIZE_T`($sp)
206 $PUSH r19,`7*$SIZE_T`($sp)
207 $PUSH r20,`8*$SIZE_T`($sp)
208 $PUSH r21,`9*$SIZE_T`($sp)
209 $PUSH r22,`10*$SIZE_T`($sp)
210 $PUSH r23,`11*$SIZE_T`($sp)
211 stfd f14,`12*$SIZE_T+0`($sp)
212 stfd f15,`12*$SIZE_T+8`($sp)
213 stfd f16,`12*$SIZE_T+16`($sp)
214 stfd f17,`12*$SIZE_T+24`($sp)
215 stfd f18,`12*$SIZE_T+32`($sp)
216 stfd f19,`12*$SIZE_T+40`($sp)
217 stfd f20,`12*$SIZE_T+48`($sp)
218 stfd f21,`12*$SIZE_T+56`($sp)
219 stfd f22,`12*$SIZE_T+64`($sp)
220 stfd f23,`12*$SIZE_T+72`($sp)
221 stfd f24,`12*$SIZE_T+80`($sp)
222 stfd f25,`12*$SIZE_T+88`($sp)
223
224 ld $a0,0($ap) ; pull ap[0] value
225 ld $n0,0($n0) ; pull n0[0] value
226 ld $t3,0($bp) ; bp[0]
227
228 addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
229 li $i,-64
230 add $nap_d,$tp,$num
231 and $nap_d,$nap_d,$i ; align to 64 bytes
232
233 mulld $t7,$a0,$t3 ; ap[0]*bp[0]
234 ; nap_d is off by 1, because it's used with stfdu/lfdu
235 addi $nap_d,$nap_d,-8
236 srwi $j,$num,`3+1` ; counter register, num/2
237 mulld $t7,$t7,$n0 ; tp[0]*n0
238 addi $j,$j,-1
239 addi $tp,$sp,`$FRAME+$TRANSFER-8`
240 li $carry,0
241 mtctr $j
242
243 ; transfer bp[0] to FPU as 4x16-bit values
244 extrdi $t0,$t3,16,48
245 extrdi $t1,$t3,16,32
246 extrdi $t2,$t3,16,16
247 extrdi $t3,$t3,16,0
248 std $t0,`$FRAME+0`($sp)
249 std $t1,`$FRAME+8`($sp)
250 std $t2,`$FRAME+16`($sp)
251 std $t3,`$FRAME+24`($sp)
252 ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
253 extrdi $t4,$t7,16,48
254 extrdi $t5,$t7,16,32
255 extrdi $t6,$t7,16,16
256 extrdi $t7,$t7,16,0
257 std $t4,`$FRAME+32`($sp)
258 std $t5,`$FRAME+40`($sp)
259 std $t6,`$FRAME+48`($sp)
260 std $t7,`$FRAME+56`($sp)
261 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
262 lwz $t1,0($ap)
263 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
264 lwz $t3,8($ap)
265 lwz $t4,4($np) ; load n[j] as 32-bit word pair
266 lwz $t5,0($np)
267 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
268 lwz $t7,8($np)
269 lfd $ba,`$FRAME+0`($sp)
270 lfd $bb,`$FRAME+8`($sp)
271 lfd $bc,`$FRAME+16`($sp)
272 lfd $bd,`$FRAME+24`($sp)
273 lfd $na,`$FRAME+32`($sp)
274 lfd $nb,`$FRAME+40`($sp)
275 lfd $nc,`$FRAME+48`($sp)
276 lfd $nd,`$FRAME+56`($sp)
277 std $t0,`$FRAME+64`($sp)
278 std $t1,`$FRAME+72`($sp)
279 std $t2,`$FRAME+80`($sp)
280 std $t3,`$FRAME+88`($sp)
281 std $t4,`$FRAME+96`($sp)
282 std $t5,`$FRAME+104`($sp)
283 std $t6,`$FRAME+112`($sp)
284 std $t7,`$FRAME+120`($sp)
285 fcfid $ba,$ba
286 fcfid $bb,$bb
287 fcfid $bc,$bc
288 fcfid $bd,$bd
289 fcfid $na,$na
290 fcfid $nb,$nb
291 fcfid $nc,$nc
292 fcfid $nd,$nd
293
294 lfd $A0,`$FRAME+64`($sp)
295 lfd $A1,`$FRAME+72`($sp)
296 lfd $A2,`$FRAME+80`($sp)
297 lfd $A3,`$FRAME+88`($sp)
298 lfd $N0,`$FRAME+96`($sp)
299 lfd $N1,`$FRAME+104`($sp)
300 lfd $N2,`$FRAME+112`($sp)
301 lfd $N3,`$FRAME+120`($sp)
302 fcfid $A0,$A0
303 fcfid $A1,$A1
304 fcfid $A2,$A2
305 fcfid $A3,$A3
306 fcfid $N0,$N0
307 fcfid $N1,$N1
308 fcfid $N2,$N2
309 fcfid $N3,$N3
310 addi $ap,$ap,16
311 addi $np,$np,16
312
313 fmul $T1a,$A1,$ba
314 fmul $T1b,$A1,$bb
315 stfd $A0,8($nap_d) ; save a[j] in double format
316 stfd $A1,16($nap_d)
317 fmul $T2a,$A2,$ba
318 fmul $T2b,$A2,$bb
319 stfd $A2,24($nap_d) ; save a[j+1] in double format
320 stfd $A3,32($nap_d)
321 fmul $T3a,$A3,$ba
322 fmul $T3b,$A3,$bb
323 stfd $N0,40($nap_d) ; save n[j] in double format
324 stfd $N1,48($nap_d)
325 fmul $T0a,$A0,$ba
326 fmul $T0b,$A0,$bb
327 stfd $N2,56($nap_d) ; save n[j+1] in double format
328 stfdu $N3,64($nap_d)
329
330 fmadd $T1a,$A0,$bc,$T1a
331 fmadd $T1b,$A0,$bd,$T1b
332 fmadd $T2a,$A1,$bc,$T2a
333 fmadd $T2b,$A1,$bd,$T2b
334 fmadd $T3a,$A2,$bc,$T3a
335 fmadd $T3b,$A2,$bd,$T3b
336 fmul $dota,$A3,$bc
337 fmul $dotb,$A3,$bd
338
339 fmadd $T1a,$N1,$na,$T1a
340 fmadd $T1b,$N1,$nb,$T1b
341 fmadd $T2a,$N2,$na,$T2a
342 fmadd $T2b,$N2,$nb,$T2b
343 fmadd $T3a,$N3,$na,$T3a
344 fmadd $T3b,$N3,$nb,$T3b
345 fmadd $T0a,$N0,$na,$T0a
346 fmadd $T0b,$N0,$nb,$T0b
347
348 fmadd $T1a,$N0,$nc,$T1a
349 fmadd $T1b,$N0,$nd,$T1b
350 fmadd $T2a,$N1,$nc,$T2a
351 fmadd $T2b,$N1,$nd,$T2b
352 fmadd $T3a,$N2,$nc,$T3a
353 fmadd $T3b,$N2,$nd,$T3b
354 fmadd $dota,$N3,$nc,$dota
355 fmadd $dotb,$N3,$nd,$dotb
356
357 fctid $T0a,$T0a
358 fctid $T0b,$T0b
359 fctid $T1a,$T1a
360 fctid $T1b,$T1b
361 fctid $T2a,$T2a
362 fctid $T2b,$T2b
363 fctid $T3a,$T3a
364 fctid $T3b,$T3b
365
366 stfd $T0a,`$FRAME+0`($sp)
367 stfd $T0b,`$FRAME+8`($sp)
368 stfd $T1a,`$FRAME+16`($sp)
369 stfd $T1b,`$FRAME+24`($sp)
370 stfd $T2a,`$FRAME+32`($sp)
371 stfd $T2b,`$FRAME+40`($sp)
372 stfd $T3a,`$FRAME+48`($sp)
373 stfd $T3b,`$FRAME+56`($sp)
374
375.align 5
376L1st:
377 lwz $t0,4($ap) ; load a[j] as 32-bit word pair
378 lwz $t1,0($ap)
379 lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
380 lwz $t3,8($ap)
381 lwz $t4,4($np) ; load n[j] as 32-bit word pair
382 lwz $t5,0($np)
383 lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
384 lwz $t7,8($np)
385 std $t0,`$FRAME+64`($sp)
386 std $t1,`$FRAME+72`($sp)
387 std $t2,`$FRAME+80`($sp)
388 std $t3,`$FRAME+88`($sp)
389 std $t4,`$FRAME+96`($sp)
390 std $t5,`$FRAME+104`($sp)
391 std $t6,`$FRAME+112`($sp)
392 std $t7,`$FRAME+120`($sp)
393 ld $t0,`$FRAME+0`($sp)
394 ld $t1,`$FRAME+8`($sp)
395 ld $t2,`$FRAME+16`($sp)
396 ld $t3,`$FRAME+24`($sp)
397 ld $t4,`$FRAME+32`($sp)
398 ld $t5,`$FRAME+40`($sp)
399 ld $t6,`$FRAME+48`($sp)
400 ld $t7,`$FRAME+56`($sp)
401 lfd $A0,`$FRAME+64`($sp)
402 lfd $A1,`$FRAME+72`($sp)
403 lfd $A2,`$FRAME+80`($sp)
404 lfd $A3,`$FRAME+88`($sp)
405 lfd $N0,`$FRAME+96`($sp)
406 lfd $N1,`$FRAME+104`($sp)
407 lfd $N2,`$FRAME+112`($sp)
408 lfd $N3,`$FRAME+120`($sp)
409 fcfid $A0,$A0
410 fcfid $A1,$A1
411 fcfid $A2,$A2
412 fcfid $A3,$A3
413 fcfid $N0,$N0
414 fcfid $N1,$N1
415 fcfid $N2,$N2
416 fcfid $N3,$N3
417 addi $ap,$ap,16
418 addi $np,$np,16
419
420 fmul $T1a,$A1,$ba
421 fmul $T1b,$A1,$bb
422 fmul $T2a,$A2,$ba
423 fmul $T2b,$A2,$bb
424 stfd $A0,8($nap_d) ; save a[j] in double format
425 stfd $A1,16($nap_d)
426 fmul $T3a,$A3,$ba
427 fmul $T3b,$A3,$bb
428 fmadd $T0a,$A0,$ba,$dota
429 fmadd $T0b,$A0,$bb,$dotb
430 stfd $A2,24($nap_d) ; save a[j+1] in double format
431 stfd $A3,32($nap_d)
432
433 fmadd $T1a,$A0,$bc,$T1a
434 fmadd $T1b,$A0,$bd,$T1b
435 fmadd $T2a,$A1,$bc,$T2a
436 fmadd $T2b,$A1,$bd,$T2b
437 stfd $N0,40($nap_d) ; save n[j] in double format
438 stfd $N1,48($nap_d)
439 fmadd $T3a,$A2,$bc,$T3a
440 fmadd $T3b,$A2,$bd,$T3b
441 add $t0,$t0,$carry ; can not overflow
442 fmul $dota,$A3,$bc
443 fmul $dotb,$A3,$bd
444 stfd $N2,56($nap_d) ; save n[j+1] in double format
445 stfdu $N3,64($nap_d)
446 srdi $carry,$t0,16
447 add $t1,$t1,$carry
448 srdi $carry,$t1,16
449
450 fmadd $T1a,$N1,$na,$T1a
451 fmadd $T1b,$N1,$nb,$T1b
452 insrdi $t0,$t1,16,32
453 fmadd $T2a,$N2,$na,$T2a
454 fmadd $T2b,$N2,$nb,$T2b
455 add $t2,$t2,$carry
456 fmadd $T3a,$N3,$na,$T3a
457 fmadd $T3b,$N3,$nb,$T3b
458 srdi $carry,$t2,16
459 fmadd $T0a,$N0,$na,$T0a
460 fmadd $T0b,$N0,$nb,$T0b
461 insrdi $t0,$t2,16,16
462 add $t3,$t3,$carry
463 srdi $carry,$t3,16
464
465 fmadd $T1a,$N0,$nc,$T1a
466 fmadd $T1b,$N0,$nd,$T1b
467 insrdi $t0,$t3,16,0 ; 0..63 bits
468 fmadd $T2a,$N1,$nc,$T2a
469 fmadd $T2b,$N1,$nd,$T2b
470 add $t4,$t4,$carry
471 fmadd $T3a,$N2,$nc,$T3a
472 fmadd $T3b,$N2,$nd,$T3b
473 srdi $carry,$t4,16
474 fmadd $dota,$N3,$nc,$dota
475 fmadd $dotb,$N3,$nd,$dotb
476 add $t5,$t5,$carry
477 srdi $carry,$t5,16
478 insrdi $t4,$t5,16,32
479
480 fctid $T0a,$T0a
481 fctid $T0b,$T0b
482 add $t6,$t6,$carry
483 fctid $T1a,$T1a
484 fctid $T1b,$T1b
485 srdi $carry,$t6,16
486 fctid $T2a,$T2a
487 fctid $T2b,$T2b
488 insrdi $t4,$t6,16,16
489 fctid $T3a,$T3a
490 fctid $T3b,$T3b
491 add $t7,$t7,$carry
492 insrdi $t4,$t7,16,0 ; 64..127 bits
493 srdi $carry,$t7,16 ; upper 33 bits
494
495 stfd $T0a,`$FRAME+0`($sp)
496 stfd $T0b,`$FRAME+8`($sp)
497 stfd $T1a,`$FRAME+16`($sp)
498 stfd $T1b,`$FRAME+24`($sp)
499 stfd $T2a,`$FRAME+32`($sp)
500 stfd $T2b,`$FRAME+40`($sp)
501 stfd $T3a,`$FRAME+48`($sp)
502 stfd $T3b,`$FRAME+56`($sp)
503 std $t0,8($tp) ; tp[j-1]
504 stdu $t4,16($tp) ; tp[j]
505 bdnz- L1st
506
507 fctid $dota,$dota
508 fctid $dotb,$dotb
509
510 ld $t0,`$FRAME+0`($sp)
511 ld $t1,`$FRAME+8`($sp)
512 ld $t2,`$FRAME+16`($sp)
513 ld $t3,`$FRAME+24`($sp)
514 ld $t4,`$FRAME+32`($sp)
515 ld $t5,`$FRAME+40`($sp)
516 ld $t6,`$FRAME+48`($sp)
517 ld $t7,`$FRAME+56`($sp)
518 stfd $dota,`$FRAME+64`($sp)
519 stfd $dotb,`$FRAME+72`($sp)
520
521 add $t0,$t0,$carry ; can not overflow
522 srdi $carry,$t0,16
523 add $t1,$t1,$carry
524 srdi $carry,$t1,16
525 insrdi $t0,$t1,16,32
526 add $t2,$t2,$carry
527 srdi $carry,$t2,16
528 insrdi $t0,$t2,16,16
529 add $t3,$t3,$carry
530 srdi $carry,$t3,16
531 insrdi $t0,$t3,16,0 ; 0..63 bits
532 add $t4,$t4,$carry
533 srdi $carry,$t4,16
534 add $t5,$t5,$carry
535 srdi $carry,$t5,16
536 insrdi $t4,$t5,16,32
537 add $t6,$t6,$carry
538 srdi $carry,$t6,16
539 insrdi $t4,$t6,16,16
540 add $t7,$t7,$carry
541 insrdi $t4,$t7,16,0 ; 64..127 bits
542 srdi $carry,$t7,16 ; upper 33 bits
543 ld $t6,`$FRAME+64`($sp)
544 ld $t7,`$FRAME+72`($sp)
545
546 std $t0,8($tp) ; tp[j-1]
547 stdu $t4,16($tp) ; tp[j]
548
549 add $t6,$t6,$carry ; can not overflow
550 srdi $carry,$t6,16
551 add $t7,$t7,$carry
552 insrdi $t6,$t7,48,0
553 srdi $ovf,$t7,48
554 std $t6,8($tp) ; tp[num-1]
555
556 slwi $t7,$num,2
557 subf $nap_d,$t7,$nap_d ; rewind pointer
558
559 li $i,8 ; i=1
560.align 5
561Louter:
562 ldx $t3,$bp,$i ; bp[i]
563 ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
564 mulld $t7,$a0,$t3 ; ap[0]*bp[i]
565
566 addi $tp,$sp,`$FRAME+$TRANSFER`
567 add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
568 li $carry,0
569 mulld $t7,$t7,$n0 ; tp[0]*n0
570 mtctr $j
571
572 ; transfer bp[i] to FPU as 4x16-bit values
573 extrdi $t0,$t3,16,48
574 extrdi $t1,$t3,16,32
575 extrdi $t2,$t3,16,16
576 extrdi $t3,$t3,16,0
577 std $t0,`$FRAME+0`($sp)
578 std $t1,`$FRAME+8`($sp)
579 std $t2,`$FRAME+16`($sp)
580 std $t3,`$FRAME+24`($sp)
581 ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
582 extrdi $t4,$t7,16,48
583 extrdi $t5,$t7,16,32
584 extrdi $t6,$t7,16,16
585 extrdi $t7,$t7,16,0
586 std $t4,`$FRAME+32`($sp)
587 std $t5,`$FRAME+40`($sp)
588 std $t6,`$FRAME+48`($sp)
589 std $t7,`$FRAME+56`($sp)
590
591 lfd $A0,8($nap_d) ; load a[j] in double format
592 lfd $A1,16($nap_d)
593 lfd $A2,24($nap_d) ; load a[j+1] in double format
594 lfd $A3,32($nap_d)
595 lfd $N0,40($nap_d) ; load n[j] in double format
596 lfd $N1,48($nap_d)
597 lfd $N2,56($nap_d) ; load n[j+1] in double format
598 lfdu $N3,64($nap_d)
599
600 lfd $ba,`$FRAME+0`($sp)
601 lfd $bb,`$FRAME+8`($sp)
602 lfd $bc,`$FRAME+16`($sp)
603 lfd $bd,`$FRAME+24`($sp)
604 lfd $na,`$FRAME+32`($sp)
605 lfd $nb,`$FRAME+40`($sp)
606 lfd $nc,`$FRAME+48`($sp)
607 lfd $nd,`$FRAME+56`($sp)
608
609 fcfid $ba,$ba
610 fcfid $bb,$bb
611 fcfid $bc,$bc
612 fcfid $bd,$bd
613 fcfid $na,$na
614 fcfid $nb,$nb
615 fcfid $nc,$nc
616 fcfid $nd,$nd
617
618 fmul $T1a,$A1,$ba
619 fmul $T1b,$A1,$bb
620 fmul $T2a,$A2,$ba
621 fmul $T2b,$A2,$bb
622 fmul $T3a,$A3,$ba
623 fmul $T3b,$A3,$bb
624 fmul $T0a,$A0,$ba
625 fmul $T0b,$A0,$bb
626
627 fmadd $T1a,$A0,$bc,$T1a
628 fmadd $T1b,$A0,$bd,$T1b
629 fmadd $T2a,$A1,$bc,$T2a
630 fmadd $T2b,$A1,$bd,$T2b
631 fmadd $T3a,$A2,$bc,$T3a
632 fmadd $T3b,$A2,$bd,$T3b
633 fmul $dota,$A3,$bc
634 fmul $dotb,$A3,$bd
635
636 fmadd $T1a,$N1,$na,$T1a
637 fmadd $T1b,$N1,$nb,$T1b
638 lfd $A0,8($nap_d) ; load a[j] in double format
639 lfd $A1,16($nap_d)
640 fmadd $T2a,$N2,$na,$T2a
641 fmadd $T2b,$N2,$nb,$T2b
642 lfd $A2,24($nap_d) ; load a[j+1] in double format
643 lfd $A3,32($nap_d)
644 fmadd $T3a,$N3,$na,$T3a
645 fmadd $T3b,$N3,$nb,$T3b
646 fmadd $T0a,$N0,$na,$T0a
647 fmadd $T0b,$N0,$nb,$T0b
648
649 fmadd $T1a,$N0,$nc,$T1a
650 fmadd $T1b,$N0,$nd,$T1b
651 fmadd $T2a,$N1,$nc,$T2a
652 fmadd $T2b,$N1,$nd,$T2b
653 fmadd $T3a,$N2,$nc,$T3a
654 fmadd $T3b,$N2,$nd,$T3b
655 fmadd $dota,$N3,$nc,$dota
656 fmadd $dotb,$N3,$nd,$dotb
657
658 fctid $T0a,$T0a
659 fctid $T0b,$T0b
660 fctid $T1a,$T1a
661 fctid $T1b,$T1b
662 fctid $T2a,$T2a
663 fctid $T2b,$T2b
664 fctid $T3a,$T3a
665 fctid $T3b,$T3b
666
667 stfd $T0a,`$FRAME+0`($sp)
668 stfd $T0b,`$FRAME+8`($sp)
669 stfd $T1a,`$FRAME+16`($sp)
670 stfd $T1b,`$FRAME+24`($sp)
671 stfd $T2a,`$FRAME+32`($sp)
672 stfd $T2b,`$FRAME+40`($sp)
673 stfd $T3a,`$FRAME+48`($sp)
674 stfd $T3b,`$FRAME+56`($sp)
675
676.align 5
677Linner:
678 fmul $T1a,$A1,$ba
679 fmul $T1b,$A1,$bb
680 fmul $T2a,$A2,$ba
681 fmul $T2b,$A2,$bb
682 lfd $N0,40($nap_d) ; load n[j] in double format
683 lfd $N1,48($nap_d)
684 fmul $T3a,$A3,$ba
685 fmul $T3b,$A3,$bb
686 fmadd $T0a,$A0,$ba,$dota
687 fmadd $T0b,$A0,$bb,$dotb
688 lfd $N2,56($nap_d) ; load n[j+1] in double format
689 lfdu $N3,64($nap_d)
690
691 fmadd $T1a,$A0,$bc,$T1a
692 fmadd $T1b,$A0,$bd,$T1b
693 fmadd $T2a,$A1,$bc,$T2a
694 fmadd $T2b,$A1,$bd,$T2b
695 lfd $A0,8($nap_d) ; load a[j] in double format
696 lfd $A1,16($nap_d)
697 fmadd $T3a,$A2,$bc,$T3a
698 fmadd $T3b,$A2,$bd,$T3b
699 fmul $dota,$A3,$bc
700 fmul $dotb,$A3,$bd
701 lfd $A2,24($nap_d) ; load a[j+1] in double format
702 lfd $A3,32($nap_d)
703
704 fmadd $T1a,$N1,$na,$T1a
705 fmadd $T1b,$N1,$nb,$T1b
706 ld $t0,`$FRAME+0`($sp)
707 ld $t1,`$FRAME+8`($sp)
708 fmadd $T2a,$N2,$na,$T2a
709 fmadd $T2b,$N2,$nb,$T2b
710 ld $t2,`$FRAME+16`($sp)
711 ld $t3,`$FRAME+24`($sp)
712 fmadd $T3a,$N3,$na,$T3a
713 fmadd $T3b,$N3,$nb,$T3b
714 add $t0,$t0,$carry ; can not overflow
715 ld $t4,`$FRAME+32`($sp)
716 ld $t5,`$FRAME+40`($sp)
717 fmadd $T0a,$N0,$na,$T0a
718 fmadd $T0b,$N0,$nb,$T0b
719 srdi $carry,$t0,16
720 add $t1,$t1,$carry
721 srdi $carry,$t1,16
722 ld $t6,`$FRAME+48`($sp)
723 ld $t7,`$FRAME+56`($sp)
724
725 fmadd $T1a,$N0,$nc,$T1a
726 fmadd $T1b,$N0,$nd,$T1b
727 insrdi $t0,$t1,16,32
728 ld $t1,8($tp) ; tp[j]
729 fmadd $T2a,$N1,$nc,$T2a
730 fmadd $T2b,$N1,$nd,$T2b
731 add $t2,$t2,$carry
732 fmadd $T3a,$N2,$nc,$T3a
733 fmadd $T3b,$N2,$nd,$T3b
734 srdi $carry,$t2,16
735 insrdi $t0,$t2,16,16
736 fmadd $dota,$N3,$nc,$dota
737 fmadd $dotb,$N3,$nd,$dotb
738 add $t3,$t3,$carry
739 ldu $t2,16($tp) ; tp[j+1]
740 srdi $carry,$t3,16
741 insrdi $t0,$t3,16,0 ; 0..63 bits
742 add $t4,$t4,$carry
743
744 fctid $T0a,$T0a
745 fctid $T0b,$T0b
746 srdi $carry,$t4,16
747 fctid $T1a,$T1a
748 fctid $T1b,$T1b
749 add $t5,$t5,$carry
750 fctid $T2a,$T2a
751 fctid $T2b,$T2b
752 srdi $carry,$t5,16
753 insrdi $t4,$t5,16,32
754 fctid $T3a,$T3a
755 fctid $T3b,$T3b
756 add $t6,$t6,$carry
757 srdi $carry,$t6,16
758 insrdi $t4,$t6,16,16
759
760 stfd $T0a,`$FRAME+0`($sp)
761 stfd $T0b,`$FRAME+8`($sp)
762 add $t7,$t7,$carry
763 addc $t3,$t0,$t1
764 stfd $T1a,`$FRAME+16`($sp)
765 stfd $T1b,`$FRAME+24`($sp)
766 insrdi $t4,$t7,16,0 ; 64..127 bits
767 srdi $carry,$t7,16 ; upper 33 bits
768 stfd $T2a,`$FRAME+32`($sp)
769 stfd $T2b,`$FRAME+40`($sp)
770 adde $t5,$t4,$t2
771 stfd $T3a,`$FRAME+48`($sp)
772 stfd $T3b,`$FRAME+56`($sp)
773 addze $carry,$carry
774 std $t3,-16($tp) ; tp[j-1]
775 std $t5,-8($tp) ; tp[j]
776 bdnz- Linner
777
778 fctid $dota,$dota
779 fctid $dotb,$dotb
780 ld $t0,`$FRAME+0`($sp)
781 ld $t1,`$FRAME+8`($sp)
782 ld $t2,`$FRAME+16`($sp)
783 ld $t3,`$FRAME+24`($sp)
784 ld $t4,`$FRAME+32`($sp)
785 ld $t5,`$FRAME+40`($sp)
786 ld $t6,`$FRAME+48`($sp)
787 ld $t7,`$FRAME+56`($sp)
788 stfd $dota,`$FRAME+64`($sp)
789 stfd $dotb,`$FRAME+72`($sp)
790
791 add $t0,$t0,$carry ; can not overflow
792 srdi $carry,$t0,16
793 add $t1,$t1,$carry
794 srdi $carry,$t1,16
795 insrdi $t0,$t1,16,32
796 add $t2,$t2,$carry
797 ld $t1,8($tp) ; tp[j]
798 srdi $carry,$t2,16
799 insrdi $t0,$t2,16,16
800 add $t3,$t3,$carry
801 ldu $t2,16($tp) ; tp[j+1]
802 srdi $carry,$t3,16
803 insrdi $t0,$t3,16,0 ; 0..63 bits
804 add $t4,$t4,$carry
805 srdi $carry,$t4,16
806 add $t5,$t5,$carry
807 srdi $carry,$t5,16
808 insrdi $t4,$t5,16,32
809 add $t6,$t6,$carry
810 srdi $carry,$t6,16
811 insrdi $t4,$t6,16,16
812 add $t7,$t7,$carry
813 insrdi $t4,$t7,16,0 ; 64..127 bits
814 srdi $carry,$t7,16 ; upper 33 bits
815 ld $t6,`$FRAME+64`($sp)
816 ld $t7,`$FRAME+72`($sp)
817
818 addc $t3,$t0,$t1
819 adde $t5,$t4,$t2
820 addze $carry,$carry
821
822 std $t3,-16($tp) ; tp[j-1]
823 std $t5,-8($tp) ; tp[j]
824
825 add $carry,$carry,$ovf ; comsume upmost overflow
826 add $t6,$t6,$carry ; can not overflow
827 srdi $carry,$t6,16
828 add $t7,$t7,$carry
829 insrdi $t6,$t7,48,0
830 srdi $ovf,$t7,48
831 std $t6,0($tp) ; tp[num-1]
832
833 slwi $t7,$num,2
834 addi $i,$i,8
835 subf $nap_d,$t7,$nap_d ; rewind pointer
836 cmpw $i,$num
837 blt- Louter
838
839 subf $np,$num,$np ; rewind np
840 addi $j,$j,1 ; restore counter
841 subfc $i,$i,$i ; j=0 and "clear" XER[CA]
842 addi $tp,$sp,`$FRAME+$TRANSFER+8`
843 addi $t4,$sp,`$FRAME+$TRANSFER+16`
844 addi $t5,$np,8
845 addi $t6,$rp,8
846 mtctr $j
847
848.align 4
849Lsub: ldx $t0,$tp,$i
850 ldx $t1,$np,$i
851 ldx $t2,$t4,$i
852 ldx $t3,$t5,$i
853 subfe $t0,$t1,$t0 ; tp[j]-np[j]
854 subfe $t2,$t3,$t2 ; tp[j+1]-np[j+1]
855 stdx $t0,$rp,$i
856 stdx $t2,$t6,$i
857 addi $i,$i,16
858 bdnz- Lsub
859
860 li $i,0
861 subfe $ovf,$i,$ovf ; handle upmost overflow bit
862 and $ap,$tp,$ovf
863 andc $np,$rp,$ovf
864 or $ap,$ap,$np ; ap=borrow?tp:rp
865 addi $t7,$ap,8
866 mtctr $j
867
868.align 4
869Lcopy: ; copy or in-place refresh
870 ldx $t0,$ap,$i
871 ldx $t1,$t7,$i
872 std $i,8($nap_d) ; zap nap_d
873 std $i,16($nap_d)
874 std $i,24($nap_d)
875 std $i,32($nap_d)
876 std $i,40($nap_d)
877 std $i,48($nap_d)
878 std $i,56($nap_d)
879 stdu $i,64($nap_d)
880 stdx $t0,$rp,$i
881 stdx $t1,$t6,$i
882 stdx $i,$tp,$i ; zap tp at once
883 stdx $i,$t4,$i
884 addi $i,$i,16
885 bdnz- Lcopy
886
887 $POP r14,`2*$SIZE_T`($sp)
888 $POP r15,`3*$SIZE_T`($sp)
889 $POP r16,`4*$SIZE_T`($sp)
890 $POP r17,`5*$SIZE_T`($sp)
891 $POP r18,`6*$SIZE_T`($sp)
892 $POP r19,`7*$SIZE_T`($sp)
893 $POP r20,`8*$SIZE_T`($sp)
894 $POP r21,`9*$SIZE_T`($sp)
895 $POP r22,`10*$SIZE_T`($sp)
896 $POP r23,`11*$SIZE_T`($sp)
897 lfd f14,`12*$SIZE_T+0`($sp)
898 lfd f15,`12*$SIZE_T+8`($sp)
899 lfd f16,`12*$SIZE_T+16`($sp)
900 lfd f17,`12*$SIZE_T+24`($sp)
901 lfd f18,`12*$SIZE_T+32`($sp)
902 lfd f19,`12*$SIZE_T+40`($sp)
903 lfd f20,`12*$SIZE_T+48`($sp)
904 lfd f21,`12*$SIZE_T+56`($sp)
905 lfd f22,`12*$SIZE_T+64`($sp)
906 lfd f23,`12*$SIZE_T+72`($sp)
907 lfd f24,`12*$SIZE_T+80`($sp)
908 lfd f25,`12*$SIZE_T+88`($sp)
909 $POP $sp,0($sp)
910 li r3,1 ; signal "handled"
911 blr
912 .long 0
913.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>"
914___
915
916$code =~ s/\`([^\`]*)\`/eval $1/gem;
917print $code;
918close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/s390x-mont.pl b/src/lib/libcrypto/bn/asm/s390x-mont.pl
deleted file mode 100644
index f61246f5b6..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x-mont.pl
+++ /dev/null
@@ -1,225 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# April 2007.
11#
12# Performance improvement over vanilla C code varies from 85% to 45%
13# depending on key length and benchmark. Unfortunately in this context
14# these are not very impressive results [for code that utilizes "wide"
15# 64x64=128-bit multiplication, which is not commonly available to C
16# programmers], at least hand-coded bn_asm.c replacement is known to
17# provide 30-40% better results for longest keys. Well, on a second
18# thought it's not very surprising, because z-CPUs are single-issue
19# and _strictly_ in-order execution, while bn_mul_mont is more or less
20# dependent on CPU ability to pipe-line instructions and have several
21# of them "in-flight" at the same time. I mean while other methods,
22# for example Karatsuba, aim to minimize amount of multiplications at
23# the cost of other operations increase, bn_mul_mont aim to neatly
24# "overlap" multiplications and the other operations [and on most
25# platforms even minimize the amount of the other operations, in
26# particular references to memory]. But it's possible to improve this
27# module performance by implementing dedicated squaring code-path and
28# possibly by unrolling loops...
29
30# January 2009.
31#
32# Reschedule to minimize/avoid Address Generation Interlock hazard,
33# make inner loops counter-based.
34
35$mn0="%r0";
36$num="%r1";
37
38# int bn_mul_mont(
39$rp="%r2"; # BN_ULONG *rp,
40$ap="%r3"; # const BN_ULONG *ap,
41$bp="%r4"; # const BN_ULONG *bp,
42$np="%r5"; # const BN_ULONG *np,
43$n0="%r6"; # const BN_ULONG *n0,
44#$num="160(%r15)" # int num);
45
46$bi="%r2"; # zaps rp
47$j="%r7";
48
49$ahi="%r8";
50$alo="%r9";
51$nhi="%r10";
52$nlo="%r11";
53$AHI="%r12";
54$NHI="%r13";
55$count="%r14";
56$sp="%r15";
57
58$code.=<<___;
59.text
60.globl bn_mul_mont
61.type bn_mul_mont,\@function
62bn_mul_mont:
63 lgf $num,164($sp) # pull $num
64 sla $num,3 # $num to enumerate bytes
65 la $bp,0($num,$bp)
66
67 stg %r2,16($sp)
68
69 cghi $num,16 #
70 lghi %r2,0 #
71 blr %r14 # if($num<16) return 0;
72 cghi $num,96 #
73 bhr %r14 # if($num>96) return 0;
74
75 stmg %r3,%r15,24($sp)
76
77 lghi $rp,-160-8 # leave room for carry bit
78 lcgr $j,$num # -$num
79 lgr %r0,$sp
80 la $rp,0($rp,$sp)
81 la $sp,0($j,$rp) # alloca
82 stg %r0,0($sp) # back chain
83
84 sra $num,3 # restore $num
85 la $bp,0($j,$bp) # restore $bp
86 ahi $num,-1 # adjust $num for inner loop
87 lg $n0,0($n0) # pull n0
88
89 lg $bi,0($bp)
90 lg $alo,0($ap)
91 mlgr $ahi,$bi # ap[0]*bp[0]
92 lgr $AHI,$ahi
93
94 lgr $mn0,$alo # "tp[0]"*n0
95 msgr $mn0,$n0
96
97 lg $nlo,0($np) #
98 mlgr $nhi,$mn0 # np[0]*m1
99 algr $nlo,$alo # +="tp[0]"
100 lghi $NHI,0
101 alcgr $NHI,$nhi
102
103 la $j,8(%r0) # j=1
104 lr $count,$num
105
106.align 16
107.L1st:
108 lg $alo,0($j,$ap)
109 mlgr $ahi,$bi # ap[j]*bp[0]
110 algr $alo,$AHI
111 lghi $AHI,0
112 alcgr $AHI,$ahi
113
114 lg $nlo,0($j,$np)
115 mlgr $nhi,$mn0 # np[j]*m1
116 algr $nlo,$NHI
117 lghi $NHI,0
118 alcgr $nhi,$NHI # +="tp[j]"
119 algr $nlo,$alo
120 alcgr $NHI,$nhi
121
122 stg $nlo,160-8($j,$sp) # tp[j-1]=
123 la $j,8($j) # j++
124 brct $count,.L1st
125
126 algr $NHI,$AHI
127 lghi $AHI,0
128 alcgr $AHI,$AHI # upmost overflow bit
129 stg $NHI,160-8($j,$sp)
130 stg $AHI,160($j,$sp)
131 la $bp,8($bp) # bp++
132
133.Louter:
134 lg $bi,0($bp) # bp[i]
135 lg $alo,0($ap)
136 mlgr $ahi,$bi # ap[0]*bp[i]
137 alg $alo,160($sp) # +=tp[0]
138 lghi $AHI,0
139 alcgr $AHI,$ahi
140
141 lgr $mn0,$alo
142 msgr $mn0,$n0 # tp[0]*n0
143
144 lg $nlo,0($np) # np[0]
145 mlgr $nhi,$mn0 # np[0]*m1
146 algr $nlo,$alo # +="tp[0]"
147 lghi $NHI,0
148 alcgr $NHI,$nhi
149
150 la $j,8(%r0) # j=1
151 lr $count,$num
152
153.align 16
154.Linner:
155 lg $alo,0($j,$ap)
156 mlgr $ahi,$bi # ap[j]*bp[i]
157 algr $alo,$AHI
158 lghi $AHI,0
159 alcgr $ahi,$AHI
160 alg $alo,160($j,$sp)# +=tp[j]
161 alcgr $AHI,$ahi
162
163 lg $nlo,0($j,$np)
164 mlgr $nhi,$mn0 # np[j]*m1
165 algr $nlo,$NHI
166 lghi $NHI,0
167 alcgr $nhi,$NHI
168 algr $nlo,$alo # +="tp[j]"
169 alcgr $NHI,$nhi
170
171 stg $nlo,160-8($j,$sp) # tp[j-1]=
172 la $j,8($j) # j++
173 brct $count,.Linner
174
175 algr $NHI,$AHI
176 lghi $AHI,0
177 alcgr $AHI,$AHI
178 alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit
179 lghi $ahi,0
180 alcgr $AHI,$ahi # new upmost overflow bit
181 stg $NHI,160-8($j,$sp)
182 stg $AHI,160($j,$sp)
183
184 la $bp,8($bp) # bp++
185 clg $bp,160+8+32($j,$sp) # compare to &bp[num]
186 jne .Louter
187
188 lg $rp,160+8+16($j,$sp) # reincarnate rp
189 la $ap,160($sp)
190 ahi $num,1 # restore $num, incidentally clears "borrow"
191
192 la $j,0(%r0)
193 lr $count,$num
194.Lsub: lg $alo,0($j,$ap)
195 slbg $alo,0($j,$np)
196 stg $alo,0($j,$rp)
197 la $j,8($j)
198 brct $count,.Lsub
199 lghi $ahi,0
200 slbgr $AHI,$ahi # handle upmost carry
201
202 ngr $ap,$AHI
203 lghi $np,-1
204 xgr $np,$AHI
205 ngr $np,$rp
206 ogr $ap,$np # ap=borrow?tp:rp
207
208 la $j,0(%r0)
209 lgr $count,$num
210.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
211 stg $j,160($j,$sp) # zap tp
212 stg $alo,0($j,$rp)
213 la $j,8($j)
214 brct $count,.Lcopy
215
216 la %r1,160+8+48($j,$sp)
217 lmg %r6,%r15,0(%r1)
218 lghi %r2,1 # signal "processed"
219 br %r14
220.size bn_mul_mont,.-bn_mul_mont
221.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
222___
223
224print $code;
225close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/s390x.S b/src/lib/libcrypto/bn/asm/s390x.S
deleted file mode 100755
index 43fcb79bc0..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x.S
+++ /dev/null
@@ -1,678 +0,0 @@
1.ident "s390x.S, version 1.1"
2// ====================================================================
3// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4// project.
5//
6// Rights for redistribution and usage in source and binary forms are
7// granted according to the OpenSSL license. Warranty of any kind is
8// disclaimed.
9// ====================================================================
10
11.text
12
13#define zero %r0
14
15// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
16.globl bn_mul_add_words
17.type bn_mul_add_words,@function
18.align 4
19bn_mul_add_words:
20 lghi zero,0 // zero = 0
21 la %r1,0(%r2) // put rp aside
22 lghi %r2,0 // i=0;
23 ltgfr %r4,%r4
24 bler %r14 // if (len<=0) return 0;
25
26 stmg %r6,%r10,48(%r15)
27 lghi %r10,3
28 lghi %r8,0 // carry = 0
29 nr %r10,%r4 // len%4
30 sra %r4,2 // cnt=len/4
31 jz .Loop1_madd // carry is incidentally cleared if branch taken
32 algr zero,zero // clear carry
33
34.Loop4_madd:
35 lg %r7,0(%r2,%r3) // ap[i]
36 mlgr %r6,%r5 // *=w
37 alcgr %r7,%r8 // +=carry
38 alcgr %r6,zero
39 alg %r7,0(%r2,%r1) // +=rp[i]
40 stg %r7,0(%r2,%r1) // rp[i]=
41
42 lg %r9,8(%r2,%r3)
43 mlgr %r8,%r5
44 alcgr %r9,%r6
45 alcgr %r8,zero
46 alg %r9,8(%r2,%r1)
47 stg %r9,8(%r2,%r1)
48
49 lg %r7,16(%r2,%r3)
50 mlgr %r6,%r5
51 alcgr %r7,%r8
52 alcgr %r6,zero
53 alg %r7,16(%r2,%r1)
54 stg %r7,16(%r2,%r1)
55
56 lg %r9,24(%r2,%r3)
57 mlgr %r8,%r5
58 alcgr %r9,%r6
59 alcgr %r8,zero
60 alg %r9,24(%r2,%r1)
61 stg %r9,24(%r2,%r1)
62
63 la %r2,32(%r2) // i+=4
64 brct %r4,.Loop4_madd
65
66 la %r10,1(%r10) // see if len%4 is zero ...
67 brct %r10,.Loop1_madd // without touching condition code:-)
68
69.Lend_madd:
70 alcgr %r8,zero // collect carry bit
71 lgr %r2,%r8
72 lmg %r6,%r10,48(%r15)
73 br %r14
74
75.Loop1_madd:
76 lg %r7,0(%r2,%r3) // ap[i]
77 mlgr %r6,%r5 // *=w
78 alcgr %r7,%r8 // +=carry
79 alcgr %r6,zero
80 alg %r7,0(%r2,%r1) // +=rp[i]
81 stg %r7,0(%r2,%r1) // rp[i]=
82
83 lgr %r8,%r6
84 la %r2,8(%r2) // i++
85 brct %r10,.Loop1_madd
86
87 j .Lend_madd
88.size bn_mul_add_words,.-bn_mul_add_words
89
90// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
91.globl bn_mul_words
92.type bn_mul_words,@function
93.align 4
94bn_mul_words:
95 lghi zero,0 // zero = 0
96 la %r1,0(%r2) // put rp aside
97 lghi %r2,0 // i=0;
98 ltgfr %r4,%r4
99 bler %r14 // if (len<=0) return 0;
100
101 stmg %r6,%r10,48(%r15)
102 lghi %r10,3
103 lghi %r8,0 // carry = 0
104 nr %r10,%r4 // len%4
105 sra %r4,2 // cnt=len/4
106 jz .Loop1_mul // carry is incidentally cleared if branch taken
107 algr zero,zero // clear carry
108
109.Loop4_mul:
110 lg %r7,0(%r2,%r3) // ap[i]
111 mlgr %r6,%r5 // *=w
112 alcgr %r7,%r8 // +=carry
113 stg %r7,0(%r2,%r1) // rp[i]=
114
115 lg %r9,8(%r2,%r3)
116 mlgr %r8,%r5
117 alcgr %r9,%r6
118 stg %r9,8(%r2,%r1)
119
120 lg %r7,16(%r2,%r3)
121 mlgr %r6,%r5
122 alcgr %r7,%r8
123 stg %r7,16(%r2,%r1)
124
125 lg %r9,24(%r2,%r3)
126 mlgr %r8,%r5
127 alcgr %r9,%r6
128 stg %r9,24(%r2,%r1)
129
130 la %r2,32(%r2) // i+=4
131 brct %r4,.Loop4_mul
132
133 la %r10,1(%r10) // see if len%4 is zero ...
134 brct %r10,.Loop1_mul // without touching condition code:-)
135
136.Lend_mul:
137 alcgr %r8,zero // collect carry bit
138 lgr %r2,%r8
139 lmg %r6,%r10,48(%r15)
140 br %r14
141
142.Loop1_mul:
143 lg %r7,0(%r2,%r3) // ap[i]
144 mlgr %r6,%r5 // *=w
145 alcgr %r7,%r8 // +=carry
146 stg %r7,0(%r2,%r1) // rp[i]=
147
148 lgr %r8,%r6
149 la %r2,8(%r2) // i++
150 brct %r10,.Loop1_mul
151
152 j .Lend_mul
153.size bn_mul_words,.-bn_mul_words
154
155// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
156.globl bn_sqr_words
157.type bn_sqr_words,@function
158.align 4
159bn_sqr_words:
160 ltgfr %r4,%r4
161 bler %r14
162
163 stmg %r6,%r7,48(%r15)
164 srag %r1,%r4,2 // cnt=len/4
165 jz .Loop1_sqr
166
167.Loop4_sqr:
168 lg %r7,0(%r3)
169 mlgr %r6,%r7
170 stg %r7,0(%r2)
171 stg %r6,8(%r2)
172
173 lg %r7,8(%r3)
174 mlgr %r6,%r7
175 stg %r7,16(%r2)
176 stg %r6,24(%r2)
177
178 lg %r7,16(%r3)
179 mlgr %r6,%r7
180 stg %r7,32(%r2)
181 stg %r6,40(%r2)
182
183 lg %r7,24(%r3)
184 mlgr %r6,%r7
185 stg %r7,48(%r2)
186 stg %r6,56(%r2)
187
188 la %r3,32(%r3)
189 la %r2,64(%r2)
190 brct %r1,.Loop4_sqr
191
192 lghi %r1,3
193 nr %r4,%r1 // cnt=len%4
194 jz .Lend_sqr
195
196.Loop1_sqr:
197 lg %r7,0(%r3)
198 mlgr %r6,%r7
199 stg %r7,0(%r2)
200 stg %r6,8(%r2)
201
202 la %r3,8(%r3)
203 la %r2,16(%r2)
204 brct %r4,.Loop1_sqr
205
206.Lend_sqr:
207 lmg %r6,%r7,48(%r15)
208 br %r14
209.size bn_sqr_words,.-bn_sqr_words
210
211// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
212.globl bn_div_words
213.type bn_div_words,@function
214.align 4
215bn_div_words:
216 dlgr %r2,%r4
217 lgr %r2,%r3
218 br %r14
219.size bn_div_words,.-bn_div_words
220
221// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
222.globl bn_add_words
223.type bn_add_words,@function
224.align 4
225bn_add_words:
226 la %r1,0(%r2) // put rp aside
227 lghi %r2,0 // i=0
228 ltgfr %r5,%r5
229 bler %r14 // if (len<=0) return 0;
230
231 stg %r6,48(%r15)
232 lghi %r6,3
233 nr %r6,%r5 // len%4
234 sra %r5,2 // len/4, use sra because it sets condition code
235 jz .Loop1_add // carry is incidentally cleared if branch taken
236 algr %r2,%r2 // clear carry
237
238.Loop4_add:
239 lg %r0,0(%r2,%r3)
240 alcg %r0,0(%r2,%r4)
241 stg %r0,0(%r2,%r1)
242 lg %r0,8(%r2,%r3)
243 alcg %r0,8(%r2,%r4)
244 stg %r0,8(%r2,%r1)
245 lg %r0,16(%r2,%r3)
246 alcg %r0,16(%r2,%r4)
247 stg %r0,16(%r2,%r1)
248 lg %r0,24(%r2,%r3)
249 alcg %r0,24(%r2,%r4)
250 stg %r0,24(%r2,%r1)
251
252 la %r2,32(%r2) // i+=4
253 brct %r5,.Loop4_add
254
255 la %r6,1(%r6) // see if len%4 is zero ...
256 brct %r6,.Loop1_add // without touching condition code:-)
257
258.Lexit_add:
259 lghi %r2,0
260 alcgr %r2,%r2
261 lg %r6,48(%r15)
262 br %r14
263
264.Loop1_add:
265 lg %r0,0(%r2,%r3)
266 alcg %r0,0(%r2,%r4)
267 stg %r0,0(%r2,%r1)
268
269 la %r2,8(%r2) // i++
270 brct %r6,.Loop1_add
271
272 j .Lexit_add
273.size bn_add_words,.-bn_add_words
274
275// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
276.globl bn_sub_words
277.type bn_sub_words,@function
278.align 4
279bn_sub_words:
280 la %r1,0(%r2) // put rp aside
281 lghi %r2,0 // i=0
282 ltgfr %r5,%r5
283 bler %r14 // if (len<=0) return 0;
284
285 stg %r6,48(%r15)
286 lghi %r6,3
287 nr %r6,%r5 // len%4
288 sra %r5,2 // len/4, use sra because it sets condition code
289 jnz .Loop4_sub // borrow is incidentally cleared if branch taken
290 slgr %r2,%r2 // clear borrow
291
292.Loop1_sub:
293 lg %r0,0(%r2,%r3)
294 slbg %r0,0(%r2,%r4)
295 stg %r0,0(%r2,%r1)
296
297 la %r2,8(%r2) // i++
298 brct %r6,.Loop1_sub
299 j .Lexit_sub
300
301.Loop4_sub:
302 lg %r0,0(%r2,%r3)
303 slbg %r0,0(%r2,%r4)
304 stg %r0,0(%r2,%r1)
305 lg %r0,8(%r2,%r3)
306 slbg %r0,8(%r2,%r4)
307 stg %r0,8(%r2,%r1)
308 lg %r0,16(%r2,%r3)
309 slbg %r0,16(%r2,%r4)
310 stg %r0,16(%r2,%r1)
311 lg %r0,24(%r2,%r3)
312 slbg %r0,24(%r2,%r4)
313 stg %r0,24(%r2,%r1)
314
315 la %r2,32(%r2) // i+=4
316 brct %r5,.Loop4_sub
317
318 la %r6,1(%r6) // see if len%4 is zero ...
319 brct %r6,.Loop1_sub // without touching condition code:-)
320
321.Lexit_sub:
322 lghi %r2,0
323 slbgr %r2,%r2
324 lcgr %r2,%r2
325 lg %r6,48(%r15)
326 br %r14
327.size bn_sub_words,.-bn_sub_words
328
329#define c1 %r1
330#define c2 %r5
331#define c3 %r8
332
333#define mul_add_c(ai,bi,c1,c2,c3) \
334 lg %r7,ai*8(%r3); \
335 mlg %r6,bi*8(%r4); \
336 algr c1,%r7; \
337 alcgr c2,%r6; \
338 alcgr c3,zero
339
340// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
341.globl bn_mul_comba8
342.type bn_mul_comba8,@function
343.align 4
344bn_mul_comba8:
345 stmg %r6,%r8,48(%r15)
346
347 lghi c1,0
348 lghi c2,0
349 lghi c3,0
350 lghi zero,0
351
352 mul_add_c(0,0,c1,c2,c3);
353 stg c1,0*8(%r2)
354 lghi c1,0
355
356 mul_add_c(0,1,c2,c3,c1);
357 mul_add_c(1,0,c2,c3,c1);
358 stg c2,1*8(%r2)
359 lghi c2,0
360
361 mul_add_c(2,0,c3,c1,c2);
362 mul_add_c(1,1,c3,c1,c2);
363 mul_add_c(0,2,c3,c1,c2);
364 stg c3,2*8(%r2)
365 lghi c3,0
366
367 mul_add_c(0,3,c1,c2,c3);
368 mul_add_c(1,2,c1,c2,c3);
369 mul_add_c(2,1,c1,c2,c3);
370 mul_add_c(3,0,c1,c2,c3);
371 stg c1,3*8(%r2)
372 lghi c1,0
373
374 mul_add_c(4,0,c2,c3,c1);
375 mul_add_c(3,1,c2,c3,c1);
376 mul_add_c(2,2,c2,c3,c1);
377 mul_add_c(1,3,c2,c3,c1);
378 mul_add_c(0,4,c2,c3,c1);
379 stg c2,4*8(%r2)
380 lghi c2,0
381
382 mul_add_c(0,5,c3,c1,c2);
383 mul_add_c(1,4,c3,c1,c2);
384 mul_add_c(2,3,c3,c1,c2);
385 mul_add_c(3,2,c3,c1,c2);
386 mul_add_c(4,1,c3,c1,c2);
387 mul_add_c(5,0,c3,c1,c2);
388 stg c3,5*8(%r2)
389 lghi c3,0
390
391 mul_add_c(6,0,c1,c2,c3);
392 mul_add_c(5,1,c1,c2,c3);
393 mul_add_c(4,2,c1,c2,c3);
394 mul_add_c(3,3,c1,c2,c3);
395 mul_add_c(2,4,c1,c2,c3);
396 mul_add_c(1,5,c1,c2,c3);
397 mul_add_c(0,6,c1,c2,c3);
398 stg c1,6*8(%r2)
399 lghi c1,0
400
401 mul_add_c(0,7,c2,c3,c1);
402 mul_add_c(1,6,c2,c3,c1);
403 mul_add_c(2,5,c2,c3,c1);
404 mul_add_c(3,4,c2,c3,c1);
405 mul_add_c(4,3,c2,c3,c1);
406 mul_add_c(5,2,c2,c3,c1);
407 mul_add_c(6,1,c2,c3,c1);
408 mul_add_c(7,0,c2,c3,c1);
409 stg c2,7*8(%r2)
410 lghi c2,0
411
412 mul_add_c(7,1,c3,c1,c2);
413 mul_add_c(6,2,c3,c1,c2);
414 mul_add_c(5,3,c3,c1,c2);
415 mul_add_c(4,4,c3,c1,c2);
416 mul_add_c(3,5,c3,c1,c2);
417 mul_add_c(2,6,c3,c1,c2);
418 mul_add_c(1,7,c3,c1,c2);
419 stg c3,8*8(%r2)
420 lghi c3,0
421
422 mul_add_c(2,7,c1,c2,c3);
423 mul_add_c(3,6,c1,c2,c3);
424 mul_add_c(4,5,c1,c2,c3);
425 mul_add_c(5,4,c1,c2,c3);
426 mul_add_c(6,3,c1,c2,c3);
427 mul_add_c(7,2,c1,c2,c3);
428 stg c1,9*8(%r2)
429 lghi c1,0
430
431 mul_add_c(7,3,c2,c3,c1);
432 mul_add_c(6,4,c2,c3,c1);
433 mul_add_c(5,5,c2,c3,c1);
434 mul_add_c(4,6,c2,c3,c1);
435 mul_add_c(3,7,c2,c3,c1);
436 stg c2,10*8(%r2)
437 lghi c2,0
438
439 mul_add_c(4,7,c3,c1,c2);
440 mul_add_c(5,6,c3,c1,c2);
441 mul_add_c(6,5,c3,c1,c2);
442 mul_add_c(7,4,c3,c1,c2);
443 stg c3,11*8(%r2)
444 lghi c3,0
445
446 mul_add_c(7,5,c1,c2,c3);
447 mul_add_c(6,6,c1,c2,c3);
448 mul_add_c(5,7,c1,c2,c3);
449 stg c1,12*8(%r2)
450 lghi c1,0
451
452
453 mul_add_c(6,7,c2,c3,c1);
454 mul_add_c(7,6,c2,c3,c1);
455 stg c2,13*8(%r2)
456 lghi c2,0
457
458 mul_add_c(7,7,c3,c1,c2);
459 stg c3,14*8(%r2)
460 stg c1,15*8(%r2)
461
462 lmg %r6,%r8,48(%r15)
463 br %r14
464.size bn_mul_comba8,.-bn_mul_comba8
465
466// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
467.globl bn_mul_comba4
468.type bn_mul_comba4,@function
469.align 4
470bn_mul_comba4:
471 stmg %r6,%r8,48(%r15)
472
473 lghi c1,0
474 lghi c2,0
475 lghi c3,0
476 lghi zero,0
477
478 mul_add_c(0,0,c1,c2,c3);
479 stg c1,0*8(%r3)
480 lghi c1,0
481
482 mul_add_c(0,1,c2,c3,c1);
483 mul_add_c(1,0,c2,c3,c1);
484 stg c2,1*8(%r2)
485 lghi c2,0
486
487 mul_add_c(2,0,c3,c1,c2);
488 mul_add_c(1,1,c3,c1,c2);
489 mul_add_c(0,2,c3,c1,c2);
490 stg c3,2*8(%r2)
491 lghi c3,0
492
493 mul_add_c(0,3,c1,c2,c3);
494 mul_add_c(1,2,c1,c2,c3);
495 mul_add_c(2,1,c1,c2,c3);
496 mul_add_c(3,0,c1,c2,c3);
497 stg c1,3*8(%r2)
498 lghi c1,0
499
500 mul_add_c(3,1,c2,c3,c1);
501 mul_add_c(2,2,c2,c3,c1);
502 mul_add_c(1,3,c2,c3,c1);
503 stg c2,4*8(%r2)
504 lghi c2,0
505
506 mul_add_c(2,3,c3,c1,c2);
507 mul_add_c(3,2,c3,c1,c2);
508 stg c3,5*8(%r2)
509 lghi c3,0
510
511 mul_add_c(3,3,c1,c2,c3);
512 stg c1,6*8(%r2)
513 stg c2,7*8(%r2)
514
515 stmg %r6,%r8,48(%r15)
516 br %r14
517.size bn_mul_comba4,.-bn_mul_comba4
518
519#define sqr_add_c(ai,c1,c2,c3) \
520 lg %r7,ai*8(%r3); \
521 mlgr %r6,%r7; \
522 algr c1,%r7; \
523 alcgr c2,%r6; \
524 alcgr c3,zero
525
526#define sqr_add_c2(ai,aj,c1,c2,c3) \
527 lg %r7,ai*8(%r3); \
528 mlg %r6,aj*8(%r3); \
529 algr c1,%r7; \
530 alcgr c2,%r6; \
531 alcgr c3,zero; \
532 algr c1,%r7; \
533 alcgr c2,%r6; \
534 alcgr c3,zero
535
536// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
537.globl bn_sqr_comba8
538.type bn_sqr_comba8,@function
539.align 4
540bn_sqr_comba8:
541 stmg %r6,%r8,48(%r15)
542
543 lghi c1,0
544 lghi c2,0
545 lghi c3,0
546 lghi zero,0
547
548 sqr_add_c(0,c1,c2,c3);
549 stg c1,0*8(%r2)
550 lghi c1,0
551
552 sqr_add_c2(1,0,c2,c3,c1);
553 stg c2,1*8(%r2)
554 lghi c2,0
555
556 sqr_add_c(1,c3,c1,c2);
557 sqr_add_c2(2,0,c3,c1,c2);
558 stg c3,2*8(%r2)
559 lghi c3,0
560
561 sqr_add_c2(3,0,c1,c2,c3);
562 sqr_add_c2(2,1,c1,c2,c3);
563 stg c1,3*8(%r2)
564 lghi c1,0
565
566 sqr_add_c(2,c2,c3,c1);
567 sqr_add_c2(3,1,c2,c3,c1);
568 sqr_add_c2(4,0,c2,c3,c1);
569 stg c2,4*8(%r2)
570 lghi c2,0
571
572 sqr_add_c2(5,0,c3,c1,c2);
573 sqr_add_c2(4,1,c3,c1,c2);
574 sqr_add_c2(3,2,c3,c1,c2);
575 stg c3,5*8(%r2)
576 lghi c3,0
577
578 sqr_add_c(3,c1,c2,c3);
579 sqr_add_c2(4,2,c1,c2,c3);
580 sqr_add_c2(5,1,c1,c2,c3);
581 sqr_add_c2(6,0,c1,c2,c3);
582 stg c1,6*8(%r2)
583 lghi c1,0
584
585 sqr_add_c2(7,0,c2,c3,c1);
586 sqr_add_c2(6,1,c2,c3,c1);
587 sqr_add_c2(5,2,c2,c3,c1);
588 sqr_add_c2(4,3,c2,c3,c1);
589 stg c2,7*8(%r2)
590 lghi c2,0
591
592 sqr_add_c(4,c3,c1,c2);
593 sqr_add_c2(5,3,c3,c1,c2);
594 sqr_add_c2(6,2,c3,c1,c2);
595 sqr_add_c2(7,1,c3,c1,c2);
596 stg c3,8*8(%r2)
597 lghi c3,0
598
599 sqr_add_c2(7,2,c1,c2,c3);
600 sqr_add_c2(6,3,c1,c2,c3);
601 sqr_add_c2(5,4,c1,c2,c3);
602 stg c1,9*8(%r2)
603 lghi c1,0
604
605 sqr_add_c(5,c2,c3,c1);
606 sqr_add_c2(6,4,c2,c3,c1);
607 sqr_add_c2(7,3,c2,c3,c1);
608 stg c2,10*8(%r2)
609 lghi c2,0
610
611 sqr_add_c2(7,4,c3,c1,c2);
612 sqr_add_c2(6,5,c3,c1,c2);
613 stg c3,11*8(%r2)
614 lghi c3,0
615
616 sqr_add_c(6,c1,c2,c3);
617 sqr_add_c2(7,5,c1,c2,c3);
618 stg c1,12*8(%r2)
619 lghi c1,0
620
621 sqr_add_c2(7,6,c2,c3,c1);
622 stg c2,13*8(%r2)
623 lghi c2,0
624
625 sqr_add_c(7,c3,c1,c2);
626 stg c3,14*8(%r2)
627 stg c1,15*8(%r2)
628
629 lmg %r6,%r8,48(%r15)
630 br %r14
631.size bn_sqr_comba8,.-bn_sqr_comba8
632
633// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
634.globl bn_sqr_comba4
635.type bn_sqr_comba4,@function
636.align 4
637bn_sqr_comba4:
638 stmg %r6,%r8,48(%r15)
639
640 lghi c1,0
641 lghi c2,0
642 lghi c3,0
643 lghi zero,0
644
645 sqr_add_c(0,c1,c2,c3);
646 stg c1,0*8(%r2)
647 lghi c1,0
648
649 sqr_add_c2(1,0,c2,c3,c1);
650 stg c2,1*8(%r2)
651 lghi c2,0
652
653 sqr_add_c(1,c3,c1,c2);
654 sqr_add_c2(2,0,c3,c1,c2);
655 stg c3,2*8(%r2)
656 lghi c3,0
657
658 sqr_add_c2(3,0,c1,c2,c3);
659 sqr_add_c2(2,1,c1,c2,c3);
660 stg c1,3*8(%r2)
661 lghi c1,0
662
663 sqr_add_c(2,c2,c3,c1);
664 sqr_add_c2(3,1,c2,c3,c1);
665 stg c2,4*8(%r2)
666 lghi c2,0
667
668 sqr_add_c2(3,2,c3,c1,c2);
669 stg c3,5*8(%r2)
670 lghi c3,0
671
672 sqr_add_c(3,c1,c2,c3);
673 stg c1,6*8(%r2)
674 stg c2,7*8(%r2)
675
676 lmg %r6,%r8,48(%r15)
677 br %r14
678.size bn_sqr_comba4,.-bn_sqr_comba4
diff --git a/src/lib/libcrypto/bn/asm/sparcv8.S b/src/lib/libcrypto/bn/asm/sparcv8.S
deleted file mode 100644
index 88c5dc480a..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8.S
+++ /dev/null
@@ -1,1458 +0,0 @@
1.ident "sparcv8.s, Version 1.4"
2.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * See bn_asm.sparc.v8plus.S for more details.
22 */
23
24/*
25 * Revision history.
26 *
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
30 * 1.4 - some retunes;
31 *
32 * (*) see bn_asm.sparc.v8plus.S for details
33 */
34
35.section ".text",#alloc,#execinstr
36.file "bn_asm.sparc.v8.S"
37
38.align 32
39
40.global bn_mul_add_words
41/*
42 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43 * BN_ULONG *rp,*ap;
44 * int num;
45 * BN_ULONG w;
46 */
47bn_mul_add_words:
48 cmp %o2,0
49 bg,a .L_bn_mul_add_words_proceed
50 ld [%o1],%g2
51 retl
52 clr %o0
53
54.L_bn_mul_add_words_proceed:
55 andcc %o2,-4,%g0
56 bz .L_bn_mul_add_words_tail
57 clr %o5
58
59.L_bn_mul_add_words_loop:
60 ld [%o0],%o4
61 ld [%o1+4],%g3
62 umul %o3,%g2,%g2
63 rd %y,%g1
64 addcc %o4,%o5,%o4
65 addx %g1,0,%g1
66 addcc %o4,%g2,%o4
67 st %o4,[%o0]
68 addx %g1,0,%o5
69
70 ld [%o0+4],%o4
71 ld [%o1+8],%g2
72 umul %o3,%g3,%g3
73 dec 4,%o2
74 rd %y,%g1
75 addcc %o4,%o5,%o4
76 addx %g1,0,%g1
77 addcc %o4,%g3,%o4
78 st %o4,[%o0+4]
79 addx %g1,0,%o5
80
81 ld [%o0+8],%o4
82 ld [%o1+12],%g3
83 umul %o3,%g2,%g2
84 inc 16,%o1
85 rd %y,%g1
86 addcc %o4,%o5,%o4
87 addx %g1,0,%g1
88 addcc %o4,%g2,%o4
89 st %o4,[%o0+8]
90 addx %g1,0,%o5
91
92 ld [%o0+12],%o4
93 umul %o3,%g3,%g3
94 inc 16,%o0
95 rd %y,%g1
96 addcc %o4,%o5,%o4
97 addx %g1,0,%g1
98 addcc %o4,%g3,%o4
99 st %o4,[%o0-4]
100 addx %g1,0,%o5
101 andcc %o2,-4,%g0
102 bnz,a .L_bn_mul_add_words_loop
103 ld [%o1],%g2
104
105 tst %o2
106 bnz,a .L_bn_mul_add_words_tail
107 ld [%o1],%g2
108.L_bn_mul_add_words_return:
109 retl
110 mov %o5,%o0
111 nop
112
113.L_bn_mul_add_words_tail:
114 ld [%o0],%o4
115 umul %o3,%g2,%g2
116 addcc %o4,%o5,%o4
117 rd %y,%g1
118 addx %g1,0,%g1
119 addcc %o4,%g2,%o4
120 addx %g1,0,%o5
121 deccc %o2
122 bz .L_bn_mul_add_words_return
123 st %o4,[%o0]
124
125 ld [%o1+4],%g2
126 ld [%o0+4],%o4
127 umul %o3,%g2,%g2
128 rd %y,%g1
129 addcc %o4,%o5,%o4
130 addx %g1,0,%g1
131 addcc %o4,%g2,%o4
132 addx %g1,0,%o5
133 deccc %o2
134 bz .L_bn_mul_add_words_return
135 st %o4,[%o0+4]
136
137 ld [%o1+8],%g2
138 ld [%o0+8],%o4
139 umul %o3,%g2,%g2
140 rd %y,%g1
141 addcc %o4,%o5,%o4
142 addx %g1,0,%g1
143 addcc %o4,%g2,%o4
144 st %o4,[%o0+8]
145 retl
146 addx %g1,0,%o0
147
148.type bn_mul_add_words,#function
149.size bn_mul_add_words,(.-bn_mul_add_words)
150
151.align 32
152
153.global bn_mul_words
154/*
155 * BN_ULONG bn_mul_words(rp,ap,num,w)
156 * BN_ULONG *rp,*ap;
157 * int num;
158 * BN_ULONG w;
159 */
160bn_mul_words:
161 cmp %o2,0
162 bg,a .L_bn_mul_words_proceeed
163 ld [%o1],%g2
164 retl
165 clr %o0
166
167.L_bn_mul_words_proceeed:
168 andcc %o2,-4,%g0
169 bz .L_bn_mul_words_tail
170 clr %o5
171
172.L_bn_mul_words_loop:
173 ld [%o1+4],%g3
174 umul %o3,%g2,%g2
175 addcc %g2,%o5,%g2
176 rd %y,%g1
177 addx %g1,0,%o5
178 st %g2,[%o0]
179
180 ld [%o1+8],%g2
181 umul %o3,%g3,%g3
182 addcc %g3,%o5,%g3
183 rd %y,%g1
184 dec 4,%o2
185 addx %g1,0,%o5
186 st %g3,[%o0+4]
187
188 ld [%o1+12],%g3
189 umul %o3,%g2,%g2
190 addcc %g2,%o5,%g2
191 rd %y,%g1
192 inc 16,%o1
193 st %g2,[%o0+8]
194 addx %g1,0,%o5
195
196 umul %o3,%g3,%g3
197 addcc %g3,%o5,%g3
198 rd %y,%g1
199 inc 16,%o0
200 addx %g1,0,%o5
201 st %g3,[%o0-4]
202 andcc %o2,-4,%g0
203 nop
204 bnz,a .L_bn_mul_words_loop
205 ld [%o1],%g2
206
207 tst %o2
208 bnz,a .L_bn_mul_words_tail
209 ld [%o1],%g2
210.L_bn_mul_words_return:
211 retl
212 mov %o5,%o0
213 nop
214
215.L_bn_mul_words_tail:
216 umul %o3,%g2,%g2
217 addcc %g2,%o5,%g2
218 rd %y,%g1
219 addx %g1,0,%o5
220 deccc %o2
221 bz .L_bn_mul_words_return
222 st %g2,[%o0]
223 nop
224
225 ld [%o1+4],%g2
226 umul %o3,%g2,%g2
227 addcc %g2,%o5,%g2
228 rd %y,%g1
229 addx %g1,0,%o5
230 deccc %o2
231 bz .L_bn_mul_words_return
232 st %g2,[%o0+4]
233
234 ld [%o1+8],%g2
235 umul %o3,%g2,%g2
236 addcc %g2,%o5,%g2
237 rd %y,%g1
238 st %g2,[%o0+8]
239 retl
240 addx %g1,0,%o0
241
242.type bn_mul_words,#function
243.size bn_mul_words,(.-bn_mul_words)
244
245.align 32
246.global bn_sqr_words
247/*
248 * void bn_sqr_words(r,a,n)
249 * BN_ULONG *r,*a;
250 * int n;
251 */
252bn_sqr_words:
253 cmp %o2,0
254 bg,a .L_bn_sqr_words_proceeed
255 ld [%o1],%g2
256 retl
257 clr %o0
258
259.L_bn_sqr_words_proceeed:
260 andcc %o2,-4,%g0
261 bz .L_bn_sqr_words_tail
262 clr %o5
263
264.L_bn_sqr_words_loop:
265 ld [%o1+4],%g3
266 umul %g2,%g2,%o4
267 st %o4,[%o0]
268 rd %y,%o5
269 st %o5,[%o0+4]
270
271 ld [%o1+8],%g2
272 umul %g3,%g3,%o4
273 dec 4,%o2
274 st %o4,[%o0+8]
275 rd %y,%o5
276 st %o5,[%o0+12]
277 nop
278
279 ld [%o1+12],%g3
280 umul %g2,%g2,%o4
281 st %o4,[%o0+16]
282 rd %y,%o5
283 inc 16,%o1
284 st %o5,[%o0+20]
285
286 umul %g3,%g3,%o4
287 inc 32,%o0
288 st %o4,[%o0-8]
289 rd %y,%o5
290 st %o5,[%o0-4]
291 andcc %o2,-4,%g2
292 bnz,a .L_bn_sqr_words_loop
293 ld [%o1],%g2
294
295 tst %o2
296 nop
297 bnz,a .L_bn_sqr_words_tail
298 ld [%o1],%g2
299.L_bn_sqr_words_return:
300 retl
301 clr %o0
302
303.L_bn_sqr_words_tail:
304 umul %g2,%g2,%o4
305 st %o4,[%o0]
306 deccc %o2
307 rd %y,%o5
308 bz .L_bn_sqr_words_return
309 st %o5,[%o0+4]
310
311 ld [%o1+4],%g2
312 umul %g2,%g2,%o4
313 st %o4,[%o0+8]
314 deccc %o2
315 rd %y,%o5
316 nop
317 bz .L_bn_sqr_words_return
318 st %o5,[%o0+12]
319
320 ld [%o1+8],%g2
321 umul %g2,%g2,%o4
322 st %o4,[%o0+16]
323 rd %y,%o5
324 st %o5,[%o0+20]
325 retl
326 clr %o0
327
328.type bn_sqr_words,#function
329.size bn_sqr_words,(.-bn_sqr_words)
330
331.align 32
332
333.global bn_div_words
334/*
335 * BN_ULONG bn_div_words(h,l,d)
336 * BN_ULONG h,l,d;
337 */
338bn_div_words:
339 wr %o0,%y
340 udiv %o1,%o2,%o0
341 retl
342 nop
343
344.type bn_div_words,#function
345.size bn_div_words,(.-bn_div_words)
346
347.align 32
348
349.global bn_add_words
350/*
351 * BN_ULONG bn_add_words(rp,ap,bp,n)
352 * BN_ULONG *rp,*ap,*bp;
353 * int n;
354 */
355bn_add_words:
356 cmp %o3,0
357 bg,a .L_bn_add_words_proceed
358 ld [%o1],%o4
359 retl
360 clr %o0
361
362.L_bn_add_words_proceed:
363 andcc %o3,-4,%g0
364 bz .L_bn_add_words_tail
365 clr %g1
366 ba .L_bn_add_words_warn_loop
367 addcc %g0,0,%g0 ! clear carry flag
368
369.L_bn_add_words_loop:
370 ld [%o1],%o4
371.L_bn_add_words_warn_loop:
372 ld [%o2],%o5
373 ld [%o1+4],%g3
374 ld [%o2+4],%g4
375 dec 4,%o3
376 addxcc %o5,%o4,%o5
377 st %o5,[%o0]
378
379 ld [%o1+8],%o4
380 ld [%o2+8],%o5
381 inc 16,%o1
382 addxcc %g3,%g4,%g3
383 st %g3,[%o0+4]
384
385 ld [%o1-4],%g3
386 ld [%o2+12],%g4
387 inc 16,%o2
388 addxcc %o5,%o4,%o5
389 st %o5,[%o0+8]
390
391 inc 16,%o0
392 addxcc %g3,%g4,%g3
393 st %g3,[%o0-4]
394 addx %g0,0,%g1
395 andcc %o3,-4,%g0
396 bnz,a .L_bn_add_words_loop
397 addcc %g1,-1,%g0
398
399 tst %o3
400 bnz,a .L_bn_add_words_tail
401 ld [%o1],%o4
402.L_bn_add_words_return:
403 retl
404 mov %g1,%o0
405
406.L_bn_add_words_tail:
407 addcc %g1,-1,%g0
408 ld [%o2],%o5
409 addxcc %o5,%o4,%o5
410 addx %g0,0,%g1
411 deccc %o3
412 bz .L_bn_add_words_return
413 st %o5,[%o0]
414
415 ld [%o1+4],%o4
416 addcc %g1,-1,%g0
417 ld [%o2+4],%o5
418 addxcc %o5,%o4,%o5
419 addx %g0,0,%g1
420 deccc %o3
421 bz .L_bn_add_words_return
422 st %o5,[%o0+4]
423
424 ld [%o1+8],%o4
425 addcc %g1,-1,%g0
426 ld [%o2+8],%o5
427 addxcc %o5,%o4,%o5
428 st %o5,[%o0+8]
429 retl
430 addx %g0,0,%o0
431
432.type bn_add_words,#function
433.size bn_add_words,(.-bn_add_words)
434
435.align 32
436
437.global bn_sub_words
438/*
439 * BN_ULONG bn_sub_words(rp,ap,bp,n)
440 * BN_ULONG *rp,*ap,*bp;
441 * int n;
442 */
443bn_sub_words:
444 cmp %o3,0
445 bg,a .L_bn_sub_words_proceed
446 ld [%o1],%o4
447 retl
448 clr %o0
449
450.L_bn_sub_words_proceed:
451 andcc %o3,-4,%g0
452 bz .L_bn_sub_words_tail
453 clr %g1
454 ba .L_bn_sub_words_warm_loop
455 addcc %g0,0,%g0 ! clear carry flag
456
457.L_bn_sub_words_loop:
458 ld [%o1],%o4
459.L_bn_sub_words_warm_loop:
460 ld [%o2],%o5
461 ld [%o1+4],%g3
462 ld [%o2+4],%g4
463 dec 4,%o3
464 subxcc %o4,%o5,%o5
465 st %o5,[%o0]
466
467 ld [%o1+8],%o4
468 ld [%o2+8],%o5
469 inc 16,%o1
470 subxcc %g3,%g4,%g4
471 st %g4,[%o0+4]
472
473 ld [%o1-4],%g3
474 ld [%o2+12],%g4
475 inc 16,%o2
476 subxcc %o4,%o5,%o5
477 st %o5,[%o0+8]
478
479 inc 16,%o0
480 subxcc %g3,%g4,%g4
481 st %g4,[%o0-4]
482 addx %g0,0,%g1
483 andcc %o3,-4,%g0
484 bnz,a .L_bn_sub_words_loop
485 addcc %g1,-1,%g0
486
487 tst %o3
488 nop
489 bnz,a .L_bn_sub_words_tail
490 ld [%o1],%o4
491.L_bn_sub_words_return:
492 retl
493 mov %g1,%o0
494
495.L_bn_sub_words_tail:
496 addcc %g1,-1,%g0
497 ld [%o2],%o5
498 subxcc %o4,%o5,%o5
499 addx %g0,0,%g1
500 deccc %o3
501 bz .L_bn_sub_words_return
502 st %o5,[%o0]
503 nop
504
505 ld [%o1+4],%o4
506 addcc %g1,-1,%g0
507 ld [%o2+4],%o5
508 subxcc %o4,%o5,%o5
509 addx %g0,0,%g1
510 deccc %o3
511 bz .L_bn_sub_words_return
512 st %o5,[%o0+4]
513
514 ld [%o1+8],%o4
515 addcc %g1,-1,%g0
516 ld [%o2+8],%o5
517 subxcc %o4,%o5,%o5
518 st %o5,[%o0+8]
519 retl
520 addx %g0,0,%o0
521
522.type bn_sub_words,#function
523.size bn_sub_words,(.-bn_sub_words)
524
525#define FRAME_SIZE -96
526
527/*
528 * Here is register usage map for *all* routines below.
529 */
530#define t_1 %o0
531#define t_2 %o1
532#define c_1 %o2
533#define c_2 %o3
534#define c_3 %o4
535
536#define ap(I) [%i1+4*I]
537#define bp(I) [%i2+4*I]
538#define rp(I) [%i0+4*I]
539
540#define a_0 %l0
541#define a_1 %l1
542#define a_2 %l2
543#define a_3 %l3
544#define a_4 %l4
545#define a_5 %l5
546#define a_6 %l6
547#define a_7 %l7
548
549#define b_0 %i3
550#define b_1 %i4
551#define b_2 %i5
552#define b_3 %o5
553#define b_4 %g1
554#define b_5 %g2
555#define b_6 %g3
556#define b_7 %g4
557
558.align 32
559.global bn_mul_comba8
560/*
561 * void bn_mul_comba8(r,a,b)
562 * BN_ULONG *r,*a,*b;
563 */
564bn_mul_comba8:
565 save %sp,FRAME_SIZE,%sp
566 ld ap(0),a_0
567 ld bp(0),b_0
568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
569 ld bp(1),b_1
570 rd %y,c_2
571 st c_1,rp(0) !r[0]=c1;
572
573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
574 ld ap(1),a_1
575 addcc c_2,t_1,c_2
576 rd %y,t_2
577 addxcc %g0,t_2,c_3 !=
578 addx %g0,%g0,c_1
579 ld ap(2),a_2
580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
581 addcc c_2,t_1,c_2 !=
582 rd %y,t_2
583 addxcc c_3,t_2,c_3
584 st c_2,rp(1) !r[1]=c2;
585 addx c_1,%g0,c_1 !=
586
587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
588 addcc c_3,t_1,c_3
589 rd %y,t_2
590 addxcc c_1,t_2,c_1 !=
591 addx %g0,%g0,c_2
592 ld bp(2),b_2
593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
594 addcc c_3,t_1,c_3 !=
595 rd %y,t_2
596 addxcc c_1,t_2,c_1
597 ld bp(3),b_3
598 addx c_2,%g0,c_2 !=
599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
600 addcc c_3,t_1,c_3
601 rd %y,t_2
602 addxcc c_1,t_2,c_1 !=
603 addx c_2,%g0,c_2
604 st c_3,rp(2) !r[2]=c3;
605
606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
607 addcc c_1,t_1,c_1 !=
608 rd %y,t_2
609 addxcc c_2,t_2,c_2
610 addx %g0,%g0,c_3
611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
612 addcc c_1,t_1,c_1
613 rd %y,t_2
614 addxcc c_2,t_2,c_2
615 addx c_3,%g0,c_3 !=
616 ld ap(3),a_3
617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
618 addcc c_1,t_1,c_1
619 rd %y,t_2 !=
620 addxcc c_2,t_2,c_2
621 addx c_3,%g0,c_3
622 ld ap(4),a_4
623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
624 addcc c_1,t_1,c_1
625 rd %y,t_2
626 addxcc c_2,t_2,c_2
627 addx c_3,%g0,c_3 !=
628 st c_1,rp(3) !r[3]=c1;
629
630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
631 addcc c_2,t_1,c_2
632 rd %y,t_2 !=
633 addxcc c_3,t_2,c_3
634 addx %g0,%g0,c_1
635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
636 addcc c_2,t_1,c_2 !=
637 rd %y,t_2
638 addxcc c_3,t_2,c_3
639 addx c_1,%g0,c_1
640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
641 addcc c_2,t_1,c_2
642 rd %y,t_2
643 addxcc c_3,t_2,c_3
644 addx c_1,%g0,c_1 !=
645 ld bp(4),b_4
646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
647 addcc c_2,t_1,c_2
648 rd %y,t_2 !=
649 addxcc c_3,t_2,c_3
650 addx c_1,%g0,c_1
651 ld bp(5),b_5
652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
653 addcc c_2,t_1,c_2
654 rd %y,t_2
655 addxcc c_3,t_2,c_3
656 addx c_1,%g0,c_1 !=
657 st c_2,rp(4) !r[4]=c2;
658
659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
660 addcc c_3,t_1,c_3
661 rd %y,t_2 !=
662 addxcc c_1,t_2,c_1
663 addx %g0,%g0,c_2
664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
665 addcc c_3,t_1,c_3 !=
666 rd %y,t_2
667 addxcc c_1,t_2,c_1
668 addx c_2,%g0,c_2
669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
670 addcc c_3,t_1,c_3
671 rd %y,t_2
672 addxcc c_1,t_2,c_1
673 addx c_2,%g0,c_2 !=
674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
675 addcc c_3,t_1,c_3
676 rd %y,t_2
677 addxcc c_1,t_2,c_1 !=
678 addx c_2,%g0,c_2
679 ld ap(5),a_5
680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
681 addcc c_3,t_1,c_3 !=
682 rd %y,t_2
683 addxcc c_1,t_2,c_1
684 ld ap(6),a_6
685 addx c_2,%g0,c_2 !=
686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
687 addcc c_3,t_1,c_3
688 rd %y,t_2
689 addxcc c_1,t_2,c_1 !=
690 addx c_2,%g0,c_2
691 st c_3,rp(5) !r[5]=c3;
692
693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
694 addcc c_1,t_1,c_1 !=
695 rd %y,t_2
696 addxcc c_2,t_2,c_2
697 addx %g0,%g0,c_3
698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
699 addcc c_1,t_1,c_1
700 rd %y,t_2
701 addxcc c_2,t_2,c_2
702 addx c_3,%g0,c_3 !=
703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
704 addcc c_1,t_1,c_1
705 rd %y,t_2
706 addxcc c_2,t_2,c_2 !=
707 addx c_3,%g0,c_3
708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
709 addcc c_1,t_1,c_1
710 rd %y,t_2 !=
711 addxcc c_2,t_2,c_2
712 addx c_3,%g0,c_3
713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
714 addcc c_1,t_1,c_1 !=
715 rd %y,t_2
716 addxcc c_2,t_2,c_2
717 ld bp(6),b_6
718 addx c_3,%g0,c_3 !=
719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
720 addcc c_1,t_1,c_1
721 rd %y,t_2
722 addxcc c_2,t_2,c_2 !=
723 addx c_3,%g0,c_3
724 ld bp(7),b_7
725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
726 addcc c_1,t_1,c_1 !=
727 rd %y,t_2
728 addxcc c_2,t_2,c_2
729 st c_1,rp(6) !r[6]=c1;
730 addx c_3,%g0,c_3 !=
731
732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
733 addcc c_2,t_1,c_2
734 rd %y,t_2
735 addxcc c_3,t_2,c_3 !=
736 addx %g0,%g0,c_1
737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
738 addcc c_2,t_1,c_2
739 rd %y,t_2 !=
740 addxcc c_3,t_2,c_3
741 addx c_1,%g0,c_1
742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
743 addcc c_2,t_1,c_2 !=
744 rd %y,t_2
745 addxcc c_3,t_2,c_3
746 addx c_1,%g0,c_1
747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
748 addcc c_2,t_1,c_2
749 rd %y,t_2
750 addxcc c_3,t_2,c_3
751 addx c_1,%g0,c_1 !=
752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
753 addcc c_2,t_1,c_2
754 rd %y,t_2
755 addxcc c_3,t_2,c_3 !=
756 addx c_1,%g0,c_1
757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
758 addcc c_2,t_1,c_2
759 rd %y,t_2 !=
760 addxcc c_3,t_2,c_3
761 addx c_1,%g0,c_1
762 ld ap(7),a_7
763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
764 addcc c_2,t_1,c_2
765 rd %y,t_2
766 addxcc c_3,t_2,c_3
767 addx c_1,%g0,c_1 !=
768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
769 addcc c_2,t_1,c_2
770 rd %y,t_2
771 addxcc c_3,t_2,c_3 !=
772 addx c_1,%g0,c_1
773 st c_2,rp(7) !r[7]=c2;
774
775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
776 addcc c_3,t_1,c_3 !=
777 rd %y,t_2
778 addxcc c_1,t_2,c_1
779 addx %g0,%g0,c_2
780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
781 addcc c_3,t_1,c_3
782 rd %y,t_2
783 addxcc c_1,t_2,c_1
784 addx c_2,%g0,c_2 !=
785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
786 addcc c_3,t_1,c_3
787 rd %y,t_2
788 addxcc c_1,t_2,c_1 !=
789 addx c_2,%g0,c_2
790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
791 addcc c_3,t_1,c_3
792 rd %y,t_2 !=
793 addxcc c_1,t_2,c_1
794 addx c_2,%g0,c_2
795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
796 addcc c_3,t_1,c_3 !=
797 rd %y,t_2
798 addxcc c_1,t_2,c_1
799 addx c_2,%g0,c_2
800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
801 addcc c_3,t_1,c_3
802 rd %y,t_2
803 addxcc c_1,t_2,c_1
804 addx c_2,%g0,c_2 !=
805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
806 addcc c_3,t_1,c_3
807 rd %y,t_2
808 addxcc c_1,t_2,c_1 !
809 addx c_2,%g0,c_2
810 st c_3,rp(8) !r[8]=c3;
811
812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
813 addcc c_1,t_1,c_1 !=
814 rd %y,t_2
815 addxcc c_2,t_2,c_2
816 addx %g0,%g0,c_3
817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
818 addcc c_1,t_1,c_1
819 rd %y,t_2
820 addxcc c_2,t_2,c_2
821 addx c_3,%g0,c_3 !=
822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
823 addcc c_1,t_1,c_1
824 rd %y,t_2
825 addxcc c_2,t_2,c_2 !=
826 addx c_3,%g0,c_3
827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
828 addcc c_1,t_1,c_1
829 rd %y,t_2 !=
830 addxcc c_2,t_2,c_2
831 addx c_3,%g0,c_3
832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
833 addcc c_1,t_1,c_1 !=
834 rd %y,t_2
835 addxcc c_2,t_2,c_2
836 addx c_3,%g0,c_3
837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
838 addcc c_1,t_1,c_1
839 rd %y,t_2
840 addxcc c_2,t_2,c_2
841 addx c_3,%g0,c_3 !=
842 st c_1,rp(9) !r[9]=c1;
843
844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
845 addcc c_2,t_1,c_2
846 rd %y,t_2 !=
847 addxcc c_3,t_2,c_3
848 addx %g0,%g0,c_1
849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
850 addcc c_2,t_1,c_2 !=
851 rd %y,t_2
852 addxcc c_3,t_2,c_3
853 addx c_1,%g0,c_1
854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
855 addcc c_2,t_1,c_2
856 rd %y,t_2
857 addxcc c_3,t_2,c_3
858 addx c_1,%g0,c_1 !=
859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
860 addcc c_2,t_1,c_2
861 rd %y,t_2
862 addxcc c_3,t_2,c_3 !=
863 addx c_1,%g0,c_1
864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
865 addcc c_2,t_1,c_2
866 rd %y,t_2 !=
867 addxcc c_3,t_2,c_3
868 addx c_1,%g0,c_1
869 st c_2,rp(10) !r[10]=c2;
870
871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
872 addcc c_3,t_1,c_3
873 rd %y,t_2
874 addxcc c_1,t_2,c_1
875 addx %g0,%g0,c_2 !=
876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
877 addcc c_3,t_1,c_3
878 rd %y,t_2
879 addxcc c_1,t_2,c_1 !=
880 addx c_2,%g0,c_2
881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
882 addcc c_3,t_1,c_3
883 rd %y,t_2 !=
884 addxcc c_1,t_2,c_1
885 addx c_2,%g0,c_2
886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
887 addcc c_3,t_1,c_3 !=
888 rd %y,t_2
889 addxcc c_1,t_2,c_1
890 st c_3,rp(11) !r[11]=c3;
891 addx c_2,%g0,c_2 !=
892
893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
894 addcc c_1,t_1,c_1
895 rd %y,t_2
896 addxcc c_2,t_2,c_2 !=
897 addx %g0,%g0,c_3
898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
899 addcc c_1,t_1,c_1
900 rd %y,t_2 !=
901 addxcc c_2,t_2,c_2
902 addx c_3,%g0,c_3
903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
904 addcc c_1,t_1,c_1 !=
905 rd %y,t_2
906 addxcc c_2,t_2,c_2
907 st c_1,rp(12) !r[12]=c1;
908 addx c_3,%g0,c_3 !=
909
910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
911 addcc c_2,t_1,c_2
912 rd %y,t_2
913 addxcc c_3,t_2,c_3 !=
914 addx %g0,%g0,c_1
915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
916 addcc c_2,t_1,c_2
917 rd %y,t_2 !=
918 addxcc c_3,t_2,c_3
919 addx c_1,%g0,c_1
920 st c_2,rp(13) !r[13]=c2;
921
922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
923 addcc c_3,t_1,c_3
924 rd %y,t_2
925 addxcc c_1,t_2,c_1
926 nop !=
927 st c_3,rp(14) !r[14]=c3;
928 st c_1,rp(15) !r[15]=c1;
929
930 ret
931 restore %g0,%g0,%o0
932
933.type bn_mul_comba8,#function
934.size bn_mul_comba8,(.-bn_mul_comba8)
935
936.align 32
937
938.global bn_mul_comba4
939/*
940 * void bn_mul_comba4(r,a,b)
941 * BN_ULONG *r,*a,*b;
942 */
943bn_mul_comba4:
944 save %sp,FRAME_SIZE,%sp
945 ld ap(0),a_0
946 ld bp(0),b_0
947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
948 ld bp(1),b_1
949 rd %y,c_2
950 st c_1,rp(0) !r[0]=c1;
951
952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
953 ld ap(1),a_1
954 addcc c_2,t_1,c_2
955 rd %y,t_2 !=
956 addxcc %g0,t_2,c_3
957 addx %g0,%g0,c_1
958 ld ap(2),a_2
959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
960 addcc c_2,t_1,c_2
961 rd %y,t_2
962 addxcc c_3,t_2,c_3
963 addx c_1,%g0,c_1 !=
964 st c_2,rp(1) !r[1]=c2;
965
966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
967 addcc c_3,t_1,c_3
968 rd %y,t_2 !=
969 addxcc c_1,t_2,c_1
970 addx %g0,%g0,c_2
971 ld bp(2),b_2
972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
973 addcc c_3,t_1,c_3
974 rd %y,t_2
975 addxcc c_1,t_2,c_1
976 addx c_2,%g0,c_2 !=
977 ld bp(3),b_3
978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
979 addcc c_3,t_1,c_3
980 rd %y,t_2 !=
981 addxcc c_1,t_2,c_1
982 addx c_2,%g0,c_2
983 st c_3,rp(2) !r[2]=c3;
984
985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
986 addcc c_1,t_1,c_1
987 rd %y,t_2
988 addxcc c_2,t_2,c_2
989 addx %g0,%g0,c_3 !=
990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
991 addcc c_1,t_1,c_1
992 rd %y,t_2
993 addxcc c_2,t_2,c_2 !=
994 addx c_3,%g0,c_3
995 ld ap(3),a_3
996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
997 addcc c_1,t_1,c_1 !=
998 rd %y,t_2
999 addxcc c_2,t_2,c_2
1000 addx c_3,%g0,c_3
1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1002 addcc c_1,t_1,c_1
1003 rd %y,t_2
1004 addxcc c_2,t_2,c_2
1005 addx c_3,%g0,c_3 !=
1006 st c_1,rp(3) !r[3]=c1;
1007
1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1009 addcc c_2,t_1,c_2
1010 rd %y,t_2 !=
1011 addxcc c_3,t_2,c_3
1012 addx %g0,%g0,c_1
1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1014 addcc c_2,t_1,c_2 !=
1015 rd %y,t_2
1016 addxcc c_3,t_2,c_3
1017 addx c_1,%g0,c_1
1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1019 addcc c_2,t_1,c_2
1020 rd %y,t_2
1021 addxcc c_3,t_2,c_3
1022 addx c_1,%g0,c_1 !=
1023 st c_2,rp(4) !r[4]=c2;
1024
1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1026 addcc c_3,t_1,c_3
1027 rd %y,t_2 !=
1028 addxcc c_1,t_2,c_1
1029 addx %g0,%g0,c_2
1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1031 addcc c_3,t_1,c_3 !=
1032 rd %y,t_2
1033 addxcc c_1,t_2,c_1
1034 st c_3,rp(5) !r[5]=c3;
1035 addx c_2,%g0,c_2 !=
1036
1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1038 addcc c_1,t_1,c_1
1039 rd %y,t_2
1040 addxcc c_2,t_2,c_2 !=
1041 st c_1,rp(6) !r[6]=c1;
1042 st c_2,rp(7) !r[7]=c2;
1043
1044 ret
1045 restore %g0,%g0,%o0
1046
1047.type bn_mul_comba4,#function
1048.size bn_mul_comba4,(.-bn_mul_comba4)
1049
1050.align 32
1051
1052.global bn_sqr_comba8
1053bn_sqr_comba8:
1054 save %sp,FRAME_SIZE,%sp
1055 ld ap(0),a_0
1056 ld ap(1),a_1
1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1058 rd %y,c_2
1059 st c_1,rp(0) !r[0]=c1;
1060
1061 ld ap(2),a_2
1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1063 addcc c_2,t_1,c_2
1064 rd %y,t_2
1065 addxcc %g0,t_2,c_3
1066 addx %g0,%g0,c_1 !=
1067 addcc c_2,t_1,c_2
1068 addxcc c_3,t_2,c_3
1069 st c_2,rp(1) !r[1]=c2;
1070 addx c_1,%g0,c_1 !=
1071
1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1073 addcc c_3,t_1,c_3
1074 rd %y,t_2
1075 addxcc c_1,t_2,c_1 !=
1076 addx %g0,%g0,c_2
1077 addcc c_3,t_1,c_3
1078 addxcc c_1,t_2,c_1
1079 addx c_2,%g0,c_2 !=
1080 ld ap(3),a_3
1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1082 addcc c_3,t_1,c_3
1083 rd %y,t_2 !=
1084 addxcc c_1,t_2,c_1
1085 addx c_2,%g0,c_2
1086 st c_3,rp(2) !r[2]=c3;
1087
1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1089 addcc c_1,t_1,c_1
1090 rd %y,t_2
1091 addxcc c_2,t_2,c_2
1092 addx %g0,%g0,c_3 !=
1093 addcc c_1,t_1,c_1
1094 addxcc c_2,t_2,c_2
1095 ld ap(4),a_4
1096 addx c_3,%g0,c_3 !=
1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1098 addcc c_1,t_1,c_1
1099 rd %y,t_2
1100 addxcc c_2,t_2,c_2 !=
1101 addx c_3,%g0,c_3
1102 addcc c_1,t_1,c_1
1103 addxcc c_2,t_2,c_2
1104 addx c_3,%g0,c_3 !=
1105 st c_1,rp(3) !r[3]=c1;
1106
1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1108 addcc c_2,t_1,c_2
1109 rd %y,t_2 !=
1110 addxcc c_3,t_2,c_3
1111 addx %g0,%g0,c_1
1112 addcc c_2,t_1,c_2
1113 addxcc c_3,t_2,c_3 !=
1114 addx c_1,%g0,c_1
1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1116 addcc c_2,t_1,c_2
1117 rd %y,t_2 !=
1118 addxcc c_3,t_2,c_3
1119 addx c_1,%g0,c_1
1120 addcc c_2,t_1,c_2
1121 addxcc c_3,t_2,c_3 !=
1122 addx c_1,%g0,c_1
1123 ld ap(5),a_5
1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1125 addcc c_2,t_1,c_2 !=
1126 rd %y,t_2
1127 addxcc c_3,t_2,c_3
1128 st c_2,rp(4) !r[4]=c2;
1129 addx c_1,%g0,c_1 !=
1130
1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1132 addcc c_3,t_1,c_3
1133 rd %y,t_2
1134 addxcc c_1,t_2,c_1 !=
1135 addx %g0,%g0,c_2
1136 addcc c_3,t_1,c_3
1137 addxcc c_1,t_2,c_1
1138 addx c_2,%g0,c_2 !=
1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1140 addcc c_3,t_1,c_3
1141 rd %y,t_2
1142 addxcc c_1,t_2,c_1 !=
1143 addx c_2,%g0,c_2
1144 addcc c_3,t_1,c_3
1145 addxcc c_1,t_2,c_1
1146 addx c_2,%g0,c_2 !=
1147 ld ap(6),a_6
1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1149 addcc c_3,t_1,c_3
1150 rd %y,t_2 !=
1151 addxcc c_1,t_2,c_1
1152 addx c_2,%g0,c_2
1153 addcc c_3,t_1,c_3
1154 addxcc c_1,t_2,c_1 !=
1155 addx c_2,%g0,c_2
1156 st c_3,rp(5) !r[5]=c3;
1157
1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1159 addcc c_1,t_1,c_1 !=
1160 rd %y,t_2
1161 addxcc c_2,t_2,c_2
1162 addx %g0,%g0,c_3
1163 addcc c_1,t_1,c_1 !=
1164 addxcc c_2,t_2,c_2
1165 addx c_3,%g0,c_3
1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1167 addcc c_1,t_1,c_1 !=
1168 rd %y,t_2
1169 addxcc c_2,t_2,c_2
1170 addx c_3,%g0,c_3
1171 addcc c_1,t_1,c_1 !=
1172 addxcc c_2,t_2,c_2
1173 addx c_3,%g0,c_3
1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1175 addcc c_1,t_1,c_1 !=
1176 rd %y,t_2
1177 addxcc c_2,t_2,c_2
1178 addx c_3,%g0,c_3
1179 addcc c_1,t_1,c_1 !=
1180 addxcc c_2,t_2,c_2
1181 addx c_3,%g0,c_3
1182 ld ap(7),a_7
1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1184 addcc c_1,t_1,c_1
1185 rd %y,t_2
1186 addxcc c_2,t_2,c_2
1187 addx c_3,%g0,c_3 !=
1188 st c_1,rp(6) !r[6]=c1;
1189
1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1191 addcc c_2,t_1,c_2
1192 rd %y,t_2 !=
1193 addxcc c_3,t_2,c_3
1194 addx %g0,%g0,c_1
1195 addcc c_2,t_1,c_2
1196 addxcc c_3,t_2,c_3 !=
1197 addx c_1,%g0,c_1
1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1199 addcc c_2,t_1,c_2
1200 rd %y,t_2 !=
1201 addxcc c_3,t_2,c_3
1202 addx c_1,%g0,c_1
1203 addcc c_2,t_1,c_2
1204 addxcc c_3,t_2,c_3 !=
1205 addx c_1,%g0,c_1
1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1207 addcc c_2,t_1,c_2
1208 rd %y,t_2 !=
1209 addxcc c_3,t_2,c_3
1210 addx c_1,%g0,c_1
1211 addcc c_2,t_1,c_2
1212 addxcc c_3,t_2,c_3 !=
1213 addx c_1,%g0,c_1
1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1215 addcc c_2,t_1,c_2
1216 rd %y,t_2 !=
1217 addxcc c_3,t_2,c_3
1218 addx c_1,%g0,c_1
1219 addcc c_2,t_1,c_2
1220 addxcc c_3,t_2,c_3 !=
1221 addx c_1,%g0,c_1
1222 st c_2,rp(7) !r[7]=c2;
1223
1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1225 addcc c_3,t_1,c_3 !=
1226 rd %y,t_2
1227 addxcc c_1,t_2,c_1
1228 addx %g0,%g0,c_2
1229 addcc c_3,t_1,c_3 !=
1230 addxcc c_1,t_2,c_1
1231 addx c_2,%g0,c_2
1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1233 addcc c_3,t_1,c_3 !=
1234 rd %y,t_2
1235 addxcc c_1,t_2,c_1
1236 addx c_2,%g0,c_2
1237 addcc c_3,t_1,c_3 !=
1238 addxcc c_1,t_2,c_1
1239 addx c_2,%g0,c_2
1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1241 addcc c_3,t_1,c_3 !=
1242 rd %y,t_2
1243 addxcc c_1,t_2,c_1
1244 addx c_2,%g0,c_2
1245 addcc c_3,t_1,c_3 !=
1246 addxcc c_1,t_2,c_1
1247 addx c_2,%g0,c_2
1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1249 addcc c_3,t_1,c_3 !=
1250 rd %y,t_2
1251 addxcc c_1,t_2,c_1
1252 st c_3,rp(8) !r[8]=c3;
1253 addx c_2,%g0,c_2 !=
1254
1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1256 addcc c_1,t_1,c_1
1257 rd %y,t_2
1258 addxcc c_2,t_2,c_2 !=
1259 addx %g0,%g0,c_3
1260 addcc c_1,t_1,c_1
1261 addxcc c_2,t_2,c_2
1262 addx c_3,%g0,c_3 !=
1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1264 addcc c_1,t_1,c_1
1265 rd %y,t_2
1266 addxcc c_2,t_2,c_2 !=
1267 addx c_3,%g0,c_3
1268 addcc c_1,t_1,c_1
1269 addxcc c_2,t_2,c_2
1270 addx c_3,%g0,c_3 !=
1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1272 addcc c_1,t_1,c_1
1273 rd %y,t_2
1274 addxcc c_2,t_2,c_2 !=
1275 addx c_3,%g0,c_3
1276 addcc c_1,t_1,c_1
1277 addxcc c_2,t_2,c_2
1278 addx c_3,%g0,c_3 !=
1279 st c_1,rp(9) !r[9]=c1;
1280
1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1282 addcc c_2,t_1,c_2
1283 rd %y,t_2 !=
1284 addxcc c_3,t_2,c_3
1285 addx %g0,%g0,c_1
1286 addcc c_2,t_1,c_2
1287 addxcc c_3,t_2,c_3 !=
1288 addx c_1,%g0,c_1
1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1290 addcc c_2,t_1,c_2
1291 rd %y,t_2 !=
1292 addxcc c_3,t_2,c_3
1293 addx c_1,%g0,c_1
1294 addcc c_2,t_1,c_2
1295 addxcc c_3,t_2,c_3 !=
1296 addx c_1,%g0,c_1
1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1298 addcc c_2,t_1,c_2
1299 rd %y,t_2 !=
1300 addxcc c_3,t_2,c_3
1301 addx c_1,%g0,c_1
1302 st c_2,rp(10) !r[10]=c2;
1303
1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1305 addcc c_3,t_1,c_3
1306 rd %y,t_2
1307 addxcc c_1,t_2,c_1
1308 addx %g0,%g0,c_2 !=
1309 addcc c_3,t_1,c_3
1310 addxcc c_1,t_2,c_1
1311 addx c_2,%g0,c_2
1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1313 addcc c_3,t_1,c_3
1314 rd %y,t_2
1315 addxcc c_1,t_2,c_1
1316 addx c_2,%g0,c_2 !=
1317 addcc c_3,t_1,c_3
1318 addxcc c_1,t_2,c_1
1319 st c_3,rp(11) !r[11]=c3;
1320 addx c_2,%g0,c_2 !=
1321
1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1323 addcc c_1,t_1,c_1
1324 rd %y,t_2
1325 addxcc c_2,t_2,c_2 !=
1326 addx %g0,%g0,c_3
1327 addcc c_1,t_1,c_1
1328 addxcc c_2,t_2,c_2
1329 addx c_3,%g0,c_3 !=
1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1331 addcc c_1,t_1,c_1
1332 rd %y,t_2
1333 addxcc c_2,t_2,c_2 !=
1334 addx c_3,%g0,c_3
1335 st c_1,rp(12) !r[12]=c1;
1336
1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1338 addcc c_2,t_1,c_2 !=
1339 rd %y,t_2
1340 addxcc c_3,t_2,c_3
1341 addx %g0,%g0,c_1
1342 addcc c_2,t_1,c_2 !=
1343 addxcc c_3,t_2,c_3
1344 st c_2,rp(13) !r[13]=c2;
1345 addx c_1,%g0,c_1 !=
1346
1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1348 addcc c_3,t_1,c_3
1349 rd %y,t_2
1350 addxcc c_1,t_2,c_1 !=
1351 st c_3,rp(14) !r[14]=c3;
1352 st c_1,rp(15) !r[15]=c1;
1353
1354 ret
1355 restore %g0,%g0,%o0
1356
1357.type bn_sqr_comba8,#function
1358.size bn_sqr_comba8,(.-bn_sqr_comba8)
1359
1360.align 32
1361
1362.global bn_sqr_comba4
1363/*
1364 * void bn_sqr_comba4(r,a)
1365 * BN_ULONG *r,*a;
1366 */
1367bn_sqr_comba4:
1368 save %sp,FRAME_SIZE,%sp
1369 ld ap(0),a_0
1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1371 ld ap(1),a_1 !=
1372 rd %y,c_2
1373 st c_1,rp(0) !r[0]=c1;
1374
1375 ld ap(2),a_2
1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1377 addcc c_2,t_1,c_2
1378 rd %y,t_2
1379 addxcc %g0,t_2,c_3
1380 addx %g0,%g0,c_1 !=
1381 addcc c_2,t_1,c_2
1382 addxcc c_3,t_2,c_3
1383 addx c_1,%g0,c_1 !=
1384 st c_2,rp(1) !r[1]=c2;
1385
1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1387 addcc c_3,t_1,c_3
1388 rd %y,t_2 !=
1389 addxcc c_1,t_2,c_1
1390 addx %g0,%g0,c_2
1391 addcc c_3,t_1,c_3
1392 addxcc c_1,t_2,c_1 !=
1393 addx c_2,%g0,c_2
1394 ld ap(3),a_3
1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1396 addcc c_3,t_1,c_3 !=
1397 rd %y,t_2
1398 addxcc c_1,t_2,c_1
1399 st c_3,rp(2) !r[2]=c3;
1400 addx c_2,%g0,c_2 !=
1401
1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1403 addcc c_1,t_1,c_1
1404 rd %y,t_2
1405 addxcc c_2,t_2,c_2 !=
1406 addx %g0,%g0,c_3
1407 addcc c_1,t_1,c_1
1408 addxcc c_2,t_2,c_2
1409 addx c_3,%g0,c_3 !=
1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1411 addcc c_1,t_1,c_1
1412 rd %y,t_2
1413 addxcc c_2,t_2,c_2 !=
1414 addx c_3,%g0,c_3
1415 addcc c_1,t_1,c_1
1416 addxcc c_2,t_2,c_2
1417 addx c_3,%g0,c_3 !=
1418 st c_1,rp(3) !r[3]=c1;
1419
1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1421 addcc c_2,t_1,c_2
1422 rd %y,t_2 !=
1423 addxcc c_3,t_2,c_3
1424 addx %g0,%g0,c_1
1425 addcc c_2,t_1,c_2
1426 addxcc c_3,t_2,c_3 !=
1427 addx c_1,%g0,c_1
1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1429 addcc c_2,t_1,c_2
1430 rd %y,t_2 !=
1431 addxcc c_3,t_2,c_3
1432 addx c_1,%g0,c_1
1433 st c_2,rp(4) !r[4]=c2;
1434
1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1436 addcc c_3,t_1,c_3
1437 rd %y,t_2
1438 addxcc c_1,t_2,c_1
1439 addx %g0,%g0,c_2 !=
1440 addcc c_3,t_1,c_3
1441 addxcc c_1,t_2,c_1
1442 st c_3,rp(5) !r[5]=c3;
1443 addx c_2,%g0,c_2 !=
1444
1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1446 addcc c_1,t_1,c_1
1447 rd %y,t_2
1448 addxcc c_2,t_2,c_2 !=
1449 st c_1,rp(6) !r[6]=c1;
1450 st c_2,rp(7) !r[7]=c2;
1451
1452 ret
1453 restore %g0,%g0,%o0
1454
1455.type bn_sqr_comba4,#function
1456.size bn_sqr_comba4,(.-bn_sqr_comba4)
1457
1458.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
deleted file mode 100644
index 63de1860f2..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8plus.S
+++ /dev/null
@@ -1,1558 +0,0 @@
1.ident "sparcv8plus.s, Version 1.4"
2.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * Questions-n-answers.
22 *
23 * Q. How to compile?
24 * A. With SC4.x/SC5.x:
25 *
26 * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
27 *
28 * and with gcc:
29 *
30 * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o
31 *
32 * or if above fails (it does if you have gas installed):
33 *
34 * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o
35 *
36 * Quick-n-dirty way to fuse the module into the library.
37 * Provided that the library is already configured and built
38 * (in 0.9.2 case with no-asm option):
39 *
40 * # cd crypto/bn
41 * # cp /some/place/bn_asm.sparc.v8plus.S .
42 * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
43 * # make
44 * # cd ../..
45 * # make; make test
46 *
47 * Quick-n-dirty way to get rid of it:
48 *
49 * # cd crypto/bn
50 * # touch bn_asm.c
51 * # make
52 * # cd ../..
53 * # make; make test
54 *
55 * Q. V8plus achitecture? What kind of beast is that?
56 * A. Well, it's rather a programming model than an architecture...
57 * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
58 * special conditions, namely when kernel doesn't preserve upper
59 * 32 bits of otherwise 64-bit registers during a context switch.
60 *
61 * Q. Why just UltraSPARC? What about SuperSPARC?
62 * A. Original release did target UltraSPARC only. Now SuperSPARC
63 * version is provided along. Both version share bn_*comba[48]
64 * implementations (see comment later in code for explanation).
65 * But what's so special about this UltraSPARC implementation?
66 * Why didn't I let compiler do the job? Trouble is that most of
67 * available compilers (well, SC5.0 is the only exception) don't
68 * attempt to take advantage of UltraSPARC's 64-bitness under
69 * 32-bit kernels even though it's perfectly possible (see next
70 * question).
71 *
72 * Q. 64-bit registers under 32-bit kernels? Didn't you just say it
73 * doesn't work?
74 * A. You can't adress *all* registers as 64-bit wide:-( The catch is
75 * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
76 * preserved if you're in a leaf function, i.e. such never calling
77 * any other functions. All functions in this module are leaf and
78 * 10 registers is a handful. And as a matter of fact none-"comba"
79 * routines don't require even that much and I could even afford to
80 * not allocate own stack frame for 'em:-)
81 *
82 * Q. What about 64-bit kernels?
83 * A. What about 'em? Just kidding:-) Pure 64-bit version is currently
84 * under evaluation and development...
85 *
86 * Q. What about shared libraries?
87 * A. What about 'em? Kidding again:-) Code does *not* contain any
88 * code position dependencies and it's safe to include it into
89 * shared library as is.
90 *
91 * Q. How much faster does it go?
92 * A. Do you have a good benchmark? In either case below is what I
93 * experience with crypto/bn/expspeed.c test program:
94 *
95 * v8plus module on U10/300MHz against bn_asm.c compiled with:
96 *
97 * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12%
98 * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35%
99 * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45%
100 *
101 * v8 module on SS10/60MHz against bn_asm.c compiled with:
102 *
103 * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10%
104 * cc-4.2 -xarch=v8 -xO5 -xdepend +10%
105 * egcs-1.1.2 -mv8 -O3 +35-45%
106 *
107 * As you can see it's damn hard to beat the new Sun C compiler
108 * and it's in first place GNU C users who will appreciate this
109 * assembler implementation:-)
110 */
111
112/*
113 * Revision history.
114 *
115 * 1.0 - initial release;
116 * 1.1 - new loop unrolling model(*);
117 * - some more fine tuning;
118 * 1.2 - made gas friendly;
119 * - updates to documentation concerning v9;
120 * - new performance comparison matrix;
121 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
122 * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient)
123 * resulting in slight overall performance kick;
124 * - some retunes;
125 * - support for GNU as added;
126 *
127 * (*) Originally unrolled loop looked like this:
128 * for (;;) {
129 * op(p+0); if (--n==0) break;
130 * op(p+1); if (--n==0) break;
131 * op(p+2); if (--n==0) break;
132 * op(p+3); if (--n==0) break;
133 * p+=4;
134 * }
135 * I unroll according to following:
136 * while (n&~3) {
137 * op(p+0); op(p+1); op(p+2); op(p+3);
138 * p+=4; n=-4;
139 * }
140 * if (n) {
141 * op(p+0); if (--n==0) return;
142 * op(p+2); if (--n==0) return;
143 * op(p+3); return;
144 * }
145 */
146
147#if defined(__SUNPRO_C) && defined(__sparcv9)
148 /* They've said -xarch=v9 at command line */
149 .register %g2,#scratch
150 .register %g3,#scratch
151# define FRAME_SIZE -192
152#elif defined(__GNUC__) && defined(__arch64__)
153 /* They've said -m64 at command line */
154 .register %g2,#scratch
155 .register %g3,#scratch
156# define FRAME_SIZE -192
157#else
158# define FRAME_SIZE -96
159#endif
160/*
161 * GNU assembler can't stand stuw:-(
162 */
163#define stuw st
164
165.section ".text",#alloc,#execinstr
166.file "bn_asm.sparc.v8plus.S"
167
168.align 32
169
170.global bn_mul_add_words
171/*
172 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
173 * BN_ULONG *rp,*ap;
174 * int num;
175 * BN_ULONG w;
176 */
177bn_mul_add_words:
178 sra %o2,%g0,%o2 ! signx %o2
179 brgz,a %o2,.L_bn_mul_add_words_proceed
180 lduw [%o1],%g2
181 retl
182 clr %o0
183 nop
184 nop
185 nop
186
187.L_bn_mul_add_words_proceed:
188 srl %o3,%g0,%o3 ! clruw %o3
189 andcc %o2,-4,%g0
190 bz,pn %icc,.L_bn_mul_add_words_tail
191 clr %o5
192
193.L_bn_mul_add_words_loop: ! wow! 32 aligned!
194 lduw [%o0],%g1
195 lduw [%o1+4],%g3
196 mulx %o3,%g2,%g2
197 add %g1,%o5,%o4
198 nop
199 add %o4,%g2,%o4
200 stuw %o4,[%o0]
201 srlx %o4,32,%o5
202
203 lduw [%o0+4],%g1
204 lduw [%o1+8],%g2
205 mulx %o3,%g3,%g3
206 add %g1,%o5,%o4
207 dec 4,%o2
208 add %o4,%g3,%o4
209 stuw %o4,[%o0+4]
210 srlx %o4,32,%o5
211
212 lduw [%o0+8],%g1
213 lduw [%o1+12],%g3
214 mulx %o3,%g2,%g2
215 add %g1,%o5,%o4
216 inc 16,%o1
217 add %o4,%g2,%o4
218 stuw %o4,[%o0+8]
219 srlx %o4,32,%o5
220
221 lduw [%o0+12],%g1
222 mulx %o3,%g3,%g3
223 add %g1,%o5,%o4
224 inc 16,%o0
225 add %o4,%g3,%o4
226 andcc %o2,-4,%g0
227 stuw %o4,[%o0-4]
228 srlx %o4,32,%o5
229 bnz,a,pt %icc,.L_bn_mul_add_words_loop
230 lduw [%o1],%g2
231
232 brnz,a,pn %o2,.L_bn_mul_add_words_tail
233 lduw [%o1],%g2
234.L_bn_mul_add_words_return:
235 retl
236 mov %o5,%o0
237
238.L_bn_mul_add_words_tail:
239 lduw [%o0],%g1
240 mulx %o3,%g2,%g2
241 add %g1,%o5,%o4
242 dec %o2
243 add %o4,%g2,%o4
244 srlx %o4,32,%o5
245 brz,pt %o2,.L_bn_mul_add_words_return
246 stuw %o4,[%o0]
247
248 lduw [%o1+4],%g2
249 lduw [%o0+4],%g1
250 mulx %o3,%g2,%g2
251 add %g1,%o5,%o4
252 dec %o2
253 add %o4,%g2,%o4
254 srlx %o4,32,%o5
255 brz,pt %o2,.L_bn_mul_add_words_return
256 stuw %o4,[%o0+4]
257
258 lduw [%o1+8],%g2
259 lduw [%o0+8],%g1
260 mulx %o3,%g2,%g2
261 add %g1,%o5,%o4
262 add %o4,%g2,%o4
263 stuw %o4,[%o0+8]
264 retl
265 srlx %o4,32,%o0
266
267.type bn_mul_add_words,#function
268.size bn_mul_add_words,(.-bn_mul_add_words)
269
270.align 32
271
272.global bn_mul_words
273/*
274 * BN_ULONG bn_mul_words(rp,ap,num,w)
275 * BN_ULONG *rp,*ap;
276 * int num;
277 * BN_ULONG w;
278 */
279bn_mul_words:
280 sra %o2,%g0,%o2 ! signx %o2
281 brgz,a %o2,.L_bn_mul_words_proceeed
282 lduw [%o1],%g2
283 retl
284 clr %o0
285 nop
286 nop
287 nop
288
289.L_bn_mul_words_proceeed:
290 srl %o3,%g0,%o3 ! clruw %o3
291 andcc %o2,-4,%g0
292 bz,pn %icc,.L_bn_mul_words_tail
293 clr %o5
294
295.L_bn_mul_words_loop: ! wow! 32 aligned!
296 lduw [%o1+4],%g3
297 mulx %o3,%g2,%g2
298 add %g2,%o5,%o4
299 nop
300 stuw %o4,[%o0]
301 srlx %o4,32,%o5
302
303 lduw [%o1+8],%g2
304 mulx %o3,%g3,%g3
305 add %g3,%o5,%o4
306 dec 4,%o2
307 stuw %o4,[%o0+4]
308 srlx %o4,32,%o5
309
310 lduw [%o1+12],%g3
311 mulx %o3,%g2,%g2
312 add %g2,%o5,%o4
313 inc 16,%o1
314 stuw %o4,[%o0+8]
315 srlx %o4,32,%o5
316
317 mulx %o3,%g3,%g3
318 add %g3,%o5,%o4
319 inc 16,%o0
320 stuw %o4,[%o0-4]
321 srlx %o4,32,%o5
322 andcc %o2,-4,%g0
323 bnz,a,pt %icc,.L_bn_mul_words_loop
324 lduw [%o1],%g2
325 nop
326 nop
327
328 brnz,a,pn %o2,.L_bn_mul_words_tail
329 lduw [%o1],%g2
330.L_bn_mul_words_return:
331 retl
332 mov %o5,%o0
333
334.L_bn_mul_words_tail:
335 mulx %o3,%g2,%g2
336 add %g2,%o5,%o4
337 dec %o2
338 srlx %o4,32,%o5
339 brz,pt %o2,.L_bn_mul_words_return
340 stuw %o4,[%o0]
341
342 lduw [%o1+4],%g2
343 mulx %o3,%g2,%g2
344 add %g2,%o5,%o4
345 dec %o2
346 srlx %o4,32,%o5
347 brz,pt %o2,.L_bn_mul_words_return
348 stuw %o4,[%o0+4]
349
350 lduw [%o1+8],%g2
351 mulx %o3,%g2,%g2
352 add %g2,%o5,%o4
353 stuw %o4,[%o0+8]
354 retl
355 srlx %o4,32,%o0
356
357.type bn_mul_words,#function
358.size bn_mul_words,(.-bn_mul_words)
359
360.align 32
361.global bn_sqr_words
362/*
363 * void bn_sqr_words(r,a,n)
364 * BN_ULONG *r,*a;
365 * int n;
366 */
367bn_sqr_words:
368 sra %o2,%g0,%o2 ! signx %o2
369 brgz,a %o2,.L_bn_sqr_words_proceeed
370 lduw [%o1],%g2
371 retl
372 clr %o0
373 nop
374 nop
375 nop
376
377.L_bn_sqr_words_proceeed:
378 andcc %o2,-4,%g0
379 nop
380 bz,pn %icc,.L_bn_sqr_words_tail
381 nop
382
383.L_bn_sqr_words_loop: ! wow! 32 aligned!
384 lduw [%o1+4],%g3
385 mulx %g2,%g2,%o4
386 stuw %o4,[%o0]
387 srlx %o4,32,%o5
388 stuw %o5,[%o0+4]
389 nop
390
391 lduw [%o1+8],%g2
392 mulx %g3,%g3,%o4
393 dec 4,%o2
394 stuw %o4,[%o0+8]
395 srlx %o4,32,%o5
396 stuw %o5,[%o0+12]
397
398 lduw [%o1+12],%g3
399 mulx %g2,%g2,%o4
400 srlx %o4,32,%o5
401 stuw %o4,[%o0+16]
402 inc 16,%o1
403 stuw %o5,[%o0+20]
404
405 mulx %g3,%g3,%o4
406 inc 32,%o0
407 stuw %o4,[%o0-8]
408 srlx %o4,32,%o5
409 andcc %o2,-4,%g2
410 stuw %o5,[%o0-4]
411 bnz,a,pt %icc,.L_bn_sqr_words_loop
412 lduw [%o1],%g2
413 nop
414
415 brnz,a,pn %o2,.L_bn_sqr_words_tail
416 lduw [%o1],%g2
417.L_bn_sqr_words_return:
418 retl
419 clr %o0
420
421.L_bn_sqr_words_tail:
422 mulx %g2,%g2,%o4
423 dec %o2
424 stuw %o4,[%o0]
425 srlx %o4,32,%o5
426 brz,pt %o2,.L_bn_sqr_words_return
427 stuw %o5,[%o0+4]
428
429 lduw [%o1+4],%g2
430 mulx %g2,%g2,%o4
431 dec %o2
432 stuw %o4,[%o0+8]
433 srlx %o4,32,%o5
434 brz,pt %o2,.L_bn_sqr_words_return
435 stuw %o5,[%o0+12]
436
437 lduw [%o1+8],%g2
438 mulx %g2,%g2,%o4
439 srlx %o4,32,%o5
440 stuw %o4,[%o0+16]
441 stuw %o5,[%o0+20]
442 retl
443 clr %o0
444
445.type bn_sqr_words,#function
446.size bn_sqr_words,(.-bn_sqr_words)
447
448.align 32
449.global bn_div_words
450/*
451 * BN_ULONG bn_div_words(h,l,d)
452 * BN_ULONG h,l,d;
453 */
454bn_div_words:
455 sllx %o0,32,%o0
456 or %o0,%o1,%o0
457 udivx %o0,%o2,%o0
458 retl
459 srl %o0,%g0,%o0 ! clruw %o0
460
461.type bn_div_words,#function
462.size bn_div_words,(.-bn_div_words)
463
464.align 32
465
466.global bn_add_words
467/*
468 * BN_ULONG bn_add_words(rp,ap,bp,n)
469 * BN_ULONG *rp,*ap,*bp;
470 * int n;
471 */
472bn_add_words:
473 sra %o3,%g0,%o3 ! signx %o3
474 brgz,a %o3,.L_bn_add_words_proceed
475 lduw [%o1],%o4
476 retl
477 clr %o0
478
479.L_bn_add_words_proceed:
480 andcc %o3,-4,%g0
481 bz,pn %icc,.L_bn_add_words_tail
482 addcc %g0,0,%g0 ! clear carry flag
483
484.L_bn_add_words_loop: ! wow! 32 aligned!
485 dec 4,%o3
486 lduw [%o2],%o5
487 lduw [%o1+4],%g1
488 lduw [%o2+4],%g2
489 lduw [%o1+8],%g3
490 lduw [%o2+8],%g4
491 addccc %o5,%o4,%o5
492 stuw %o5,[%o0]
493
494 lduw [%o1+12],%o4
495 lduw [%o2+12],%o5
496 inc 16,%o1
497 addccc %g1,%g2,%g1
498 stuw %g1,[%o0+4]
499
500 inc 16,%o2
501 addccc %g3,%g4,%g3
502 stuw %g3,[%o0+8]
503
504 inc 16,%o0
505 addccc %o5,%o4,%o5
506 stuw %o5,[%o0-4]
507 and %o3,-4,%g1
508 brnz,a,pt %g1,.L_bn_add_words_loop
509 lduw [%o1],%o4
510
511 brnz,a,pn %o3,.L_bn_add_words_tail
512 lduw [%o1],%o4
513.L_bn_add_words_return:
514 clr %o0
515 retl
516 movcs %icc,1,%o0
517 nop
518
519.L_bn_add_words_tail:
520 lduw [%o2],%o5
521 dec %o3
522 addccc %o5,%o4,%o5
523 brz,pt %o3,.L_bn_add_words_return
524 stuw %o5,[%o0]
525
526 lduw [%o1+4],%o4
527 lduw [%o2+4],%o5
528 dec %o3
529 addccc %o5,%o4,%o5
530 brz,pt %o3,.L_bn_add_words_return
531 stuw %o5,[%o0+4]
532
533 lduw [%o1+8],%o4
534 lduw [%o2+8],%o5
535 addccc %o5,%o4,%o5
536 stuw %o5,[%o0+8]
537 clr %o0
538 retl
539 movcs %icc,1,%o0
540
541.type bn_add_words,#function
542.size bn_add_words,(.-bn_add_words)
543
544.global bn_sub_words
545/*
546 * BN_ULONG bn_sub_words(rp,ap,bp,n)
547 * BN_ULONG *rp,*ap,*bp;
548 * int n;
549 */
550bn_sub_words:
551 sra %o3,%g0,%o3 ! signx %o3
552 brgz,a %o3,.L_bn_sub_words_proceed
553 lduw [%o1],%o4
554 retl
555 clr %o0
556
557.L_bn_sub_words_proceed:
558 andcc %o3,-4,%g0
559 bz,pn %icc,.L_bn_sub_words_tail
560 addcc %g0,0,%g0 ! clear carry flag
561
562.L_bn_sub_words_loop: ! wow! 32 aligned!
563 dec 4,%o3
564 lduw [%o2],%o5
565 lduw [%o1+4],%g1
566 lduw [%o2+4],%g2
567 lduw [%o1+8],%g3
568 lduw [%o2+8],%g4
569 subccc %o4,%o5,%o5
570 stuw %o5,[%o0]
571
572 lduw [%o1+12],%o4
573 lduw [%o2+12],%o5
574 inc 16,%o1
575 subccc %g1,%g2,%g2
576 stuw %g2,[%o0+4]
577
578 inc 16,%o2
579 subccc %g3,%g4,%g4
580 stuw %g4,[%o0+8]
581
582 inc 16,%o0
583 subccc %o4,%o5,%o5
584 stuw %o5,[%o0-4]
585 and %o3,-4,%g1
586 brnz,a,pt %g1,.L_bn_sub_words_loop
587 lduw [%o1],%o4
588
589 brnz,a,pn %o3,.L_bn_sub_words_tail
590 lduw [%o1],%o4
591.L_bn_sub_words_return:
592 clr %o0
593 retl
594 movcs %icc,1,%o0
595 nop
596
597.L_bn_sub_words_tail: ! wow! 32 aligned!
598 lduw [%o2],%o5
599 dec %o3
600 subccc %o4,%o5,%o5
601 brz,pt %o3,.L_bn_sub_words_return
602 stuw %o5,[%o0]
603
604 lduw [%o1+4],%o4
605 lduw [%o2+4],%o5
606 dec %o3
607 subccc %o4,%o5,%o5
608 brz,pt %o3,.L_bn_sub_words_return
609 stuw %o5,[%o0+4]
610
611 lduw [%o1+8],%o4
612 lduw [%o2+8],%o5
613 subccc %o4,%o5,%o5
614 stuw %o5,[%o0+8]
615 clr %o0
616 retl
617 movcs %icc,1,%o0
618
619.type bn_sub_words,#function
620.size bn_sub_words,(.-bn_sub_words)
621
622/*
623 * Code below depends on the fact that upper parts of the %l0-%l7
624 * and %i0-%i7 are zeroed by kernel after context switch. In
625 * previous versions this comment stated that "the trouble is that
626 * it's not feasible to implement the mumbo-jumbo in less V9
627 * instructions:-(" which apparently isn't true thanks to
628 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement
629 * results not from the shorter code, but from elimination of
630 * multicycle none-pairable 'rd %y,%rd' instructions.
631 *
632 * Andy.
633 */
634
635/*
636 * Here is register usage map for *all* routines below.
637 */
638#define t_1 %o0
639#define t_2 %o1
640#define c_12 %o2
641#define c_3 %o3
642
643#define ap(I) [%i1+4*I]
644#define bp(I) [%i2+4*I]
645#define rp(I) [%i0+4*I]
646
647#define a_0 %l0
648#define a_1 %l1
649#define a_2 %l2
650#define a_3 %l3
651#define a_4 %l4
652#define a_5 %l5
653#define a_6 %l6
654#define a_7 %l7
655
656#define b_0 %i3
657#define b_1 %i4
658#define b_2 %i5
659#define b_3 %o4
660#define b_4 %o5
661#define b_5 %o7
662#define b_6 %g1
663#define b_7 %g4
664
665.align 32
666.global bn_mul_comba8
667/*
668 * void bn_mul_comba8(r,a,b)
669 * BN_ULONG *r,*a,*b;
670 */
671bn_mul_comba8:
672 save %sp,FRAME_SIZE,%sp
673 mov 1,t_2
674 lduw ap(0),a_0
675 sllx t_2,32,t_2
676 lduw bp(0),b_0 !=
677 lduw bp(1),b_1
678 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
679 srlx t_1,32,c_12
680 stuw t_1,rp(0) !=!r[0]=c1;
681
682 lduw ap(1),a_1
683 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
684 addcc c_12,t_1,c_12
685 clr c_3 !=
686 bcs,a %xcc,.+8
687 add c_3,t_2,c_3
688 lduw ap(2),a_2
689 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
690 addcc c_12,t_1,t_1
691 bcs,a %xcc,.+8
692 add c_3,t_2,c_3
693 srlx t_1,32,c_12 !=
694 stuw t_1,rp(1) !r[1]=c2;
695 or c_12,c_3,c_12
696
697 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
698 addcc c_12,t_1,c_12 !=
699 clr c_3
700 bcs,a %xcc,.+8
701 add c_3,t_2,c_3
702 lduw bp(2),b_2 !=
703 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
704 addcc c_12,t_1,c_12
705 bcs,a %xcc,.+8
706 add c_3,t_2,c_3 !=
707 lduw bp(3),b_3
708 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
709 addcc c_12,t_1,t_1
710 bcs,a %xcc,.+8 !=
711 add c_3,t_2,c_3
712 srlx t_1,32,c_12
713 stuw t_1,rp(2) !r[2]=c3;
714 or c_12,c_3,c_12 !=
715
716 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
717 addcc c_12,t_1,c_12
718 clr c_3
719 bcs,a %xcc,.+8 !=
720 add c_3,t_2,c_3
721 mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
722 addcc c_12,t_1,c_12
723 bcs,a %xcc,.+8 !=
724 add c_3,t_2,c_3
725 lduw ap(3),a_3
726 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
727 addcc c_12,t_1,c_12 !=
728 bcs,a %xcc,.+8
729 add c_3,t_2,c_3
730 lduw ap(4),a_4
731 mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!=
732 addcc c_12,t_1,t_1
733 bcs,a %xcc,.+8
734 add c_3,t_2,c_3
735 srlx t_1,32,c_12 !=
736 stuw t_1,rp(3) !r[3]=c1;
737 or c_12,c_3,c_12
738
739 mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
740 addcc c_12,t_1,c_12 !=
741 clr c_3
742 bcs,a %xcc,.+8
743 add c_3,t_2,c_3
744 mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1);
745 addcc c_12,t_1,c_12
746 bcs,a %xcc,.+8
747 add c_3,t_2,c_3
748 mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
749 addcc c_12,t_1,c_12
750 bcs,a %xcc,.+8
751 add c_3,t_2,c_3
752 lduw bp(4),b_4 !=
753 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
754 addcc c_12,t_1,c_12
755 bcs,a %xcc,.+8
756 add c_3,t_2,c_3 !=
757 lduw bp(5),b_5
758 mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1);
759 addcc c_12,t_1,t_1
760 bcs,a %xcc,.+8 !=
761 add c_3,t_2,c_3
762 srlx t_1,32,c_12
763 stuw t_1,rp(4) !r[4]=c2;
764 or c_12,c_3,c_12 !=
765
766 mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
767 addcc c_12,t_1,c_12
768 clr c_3
769 bcs,a %xcc,.+8 !=
770 add c_3,t_2,c_3
771 mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
772 addcc c_12,t_1,c_12
773 bcs,a %xcc,.+8 !=
774 add c_3,t_2,c_3
775 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
776 addcc c_12,t_1,c_12
777 bcs,a %xcc,.+8 !=
778 add c_3,t_2,c_3
779 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
780 addcc c_12,t_1,c_12
781 bcs,a %xcc,.+8 !=
782 add c_3,t_2,c_3
783 lduw ap(5),a_5
784 mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
785 addcc c_12,t_1,c_12 !=
786 bcs,a %xcc,.+8
787 add c_3,t_2,c_3
788 lduw ap(6),a_6
789 mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2);
790 addcc c_12,t_1,t_1
791 bcs,a %xcc,.+8
792 add c_3,t_2,c_3
793 srlx t_1,32,c_12 !=
794 stuw t_1,rp(5) !r[5]=c3;
795 or c_12,c_3,c_12
796
797 mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
798 addcc c_12,t_1,c_12 !=
799 clr c_3
800 bcs,a %xcc,.+8
801 add c_3,t_2,c_3
802 mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
803 addcc c_12,t_1,c_12
804 bcs,a %xcc,.+8
805 add c_3,t_2,c_3
806 mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3);
807 addcc c_12,t_1,c_12
808 bcs,a %xcc,.+8
809 add c_3,t_2,c_3
810 mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3);
811 addcc c_12,t_1,c_12
812 bcs,a %xcc,.+8
813 add c_3,t_2,c_3
814 mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3);
815 addcc c_12,t_1,c_12
816 bcs,a %xcc,.+8
817 add c_3,t_2,c_3
818 lduw bp(6),b_6 !=
819 mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
820 addcc c_12,t_1,c_12
821 bcs,a %xcc,.+8
822 add c_3,t_2,c_3 !=
823 lduw bp(7),b_7
824 mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
825 addcc c_12,t_1,t_1
826 bcs,a %xcc,.+8 !=
827 add c_3,t_2,c_3
828 srlx t_1,32,c_12
829 stuw t_1,rp(6) !r[6]=c1;
830 or c_12,c_3,c_12 !=
831
832 mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
833 addcc c_12,t_1,c_12
834 clr c_3
835 bcs,a %xcc,.+8 !=
836 add c_3,t_2,c_3
837 mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
838 addcc c_12,t_1,c_12
839 bcs,a %xcc,.+8 !=
840 add c_3,t_2,c_3
841 mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
842 addcc c_12,t_1,c_12
843 bcs,a %xcc,.+8 !=
844 add c_3,t_2,c_3
845 mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1);
846 addcc c_12,t_1,c_12
847 bcs,a %xcc,.+8 !=
848 add c_3,t_2,c_3
849 mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
850 addcc c_12,t_1,c_12
851 bcs,a %xcc,.+8 !=
852 add c_3,t_2,c_3
853 mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
854 addcc c_12,t_1,c_12
855 bcs,a %xcc,.+8 !=
856 add c_3,t_2,c_3
857 lduw ap(7),a_7
858 mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
859 addcc c_12,t_1,c_12
860 bcs,a %xcc,.+8
861 add c_3,t_2,c_3
862 mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1);
863 addcc c_12,t_1,t_1
864 bcs,a %xcc,.+8
865 add c_3,t_2,c_3
866 srlx t_1,32,c_12 !=
867 stuw t_1,rp(7) !r[7]=c2;
868 or c_12,c_3,c_12
869
870 mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2);
871 addcc c_12,t_1,c_12
872 clr c_3
873 bcs,a %xcc,.+8
874 add c_3,t_2,c_3 !=
875 mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2);
876 addcc c_12,t_1,c_12
877 bcs,a %xcc,.+8
878 add c_3,t_2,c_3 !=
879 mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
880 addcc c_12,t_1,c_12
881 bcs,a %xcc,.+8
882 add c_3,t_2,c_3 !=
883 mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
884 addcc c_12,t_1,c_12
885 bcs,a %xcc,.+8
886 add c_3,t_2,c_3 !=
887 mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
888 addcc c_12,t_1,c_12
889 bcs,a %xcc,.+8
890 add c_3,t_2,c_3 !=
891 mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2);
892 addcc c_12,t_1,c_12
893 bcs,a %xcc,.+8
894 add c_3,t_2,c_3 !=
895 mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
896 addcc c_12,t_1,t_1
897 bcs,a %xcc,.+8
898 add c_3,t_2,c_3 !=
899 srlx t_1,32,c_12
900 stuw t_1,rp(8) !r[8]=c3;
901 or c_12,c_3,c_12
902
903 mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3);
904 addcc c_12,t_1,c_12
905 clr c_3
906 bcs,a %xcc,.+8
907 add c_3,t_2,c_3 !=
908 mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3);
909 addcc c_12,t_1,c_12
910 bcs,a %xcc,.+8 !=
911 add c_3,t_2,c_3
912 mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
913 addcc c_12,t_1,c_12
914 bcs,a %xcc,.+8 !=
915 add c_3,t_2,c_3
916 mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
917 addcc c_12,t_1,c_12
918 bcs,a %xcc,.+8 !=
919 add c_3,t_2,c_3
920 mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
921 addcc c_12,t_1,c_12
922 bcs,a %xcc,.+8 !=
923 add c_3,t_2,c_3
924 mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3);
925 addcc c_12,t_1,t_1
926 bcs,a %xcc,.+8 !=
927 add c_3,t_2,c_3
928 srlx t_1,32,c_12
929 stuw t_1,rp(9) !r[9]=c1;
930 or c_12,c_3,c_12 !=
931
932 mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
933 addcc c_12,t_1,c_12
934 clr c_3
935 bcs,a %xcc,.+8 !=
936 add c_3,t_2,c_3
937 mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
938 addcc c_12,t_1,c_12
939 bcs,a %xcc,.+8 !=
940 add c_3,t_2,c_3
941 mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1);
942 addcc c_12,t_1,c_12
943 bcs,a %xcc,.+8 !=
944 add c_3,t_2,c_3
945 mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
946 addcc c_12,t_1,c_12
947 bcs,a %xcc,.+8 !=
948 add c_3,t_2,c_3
949 mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
950 addcc c_12,t_1,t_1
951 bcs,a %xcc,.+8 !=
952 add c_3,t_2,c_3
953 srlx t_1,32,c_12
954 stuw t_1,rp(10) !r[10]=c2;
955 or c_12,c_3,c_12 !=
956
957 mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2);
958 addcc c_12,t_1,c_12
959 clr c_3
960 bcs,a %xcc,.+8 !=
961 add c_3,t_2,c_3
962 mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
963 addcc c_12,t_1,c_12
964 bcs,a %xcc,.+8 !=
965 add c_3,t_2,c_3
966 mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
967 addcc c_12,t_1,c_12
968 bcs,a %xcc,.+8 !=
969 add c_3,t_2,c_3
970 mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
971 addcc c_12,t_1,t_1
972 bcs,a %xcc,.+8 !=
973 add c_3,t_2,c_3
974 srlx t_1,32,c_12
975 stuw t_1,rp(11) !r[11]=c3;
976 or c_12,c_3,c_12 !=
977
978 mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
979 addcc c_12,t_1,c_12
980 clr c_3
981 bcs,a %xcc,.+8 !=
982 add c_3,t_2,c_3
983 mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
984 addcc c_12,t_1,c_12
985 bcs,a %xcc,.+8 !=
986 add c_3,t_2,c_3
987 mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
988 addcc c_12,t_1,t_1
989 bcs,a %xcc,.+8 !=
990 add c_3,t_2,c_3
991 srlx t_1,32,c_12
992 stuw t_1,rp(12) !r[12]=c1;
993 or c_12,c_3,c_12 !=
994
995 mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
996 addcc c_12,t_1,c_12
997 clr c_3
998 bcs,a %xcc,.+8 !=
999 add c_3,t_2,c_3
1000 mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
1001 addcc c_12,t_1,t_1
1002 bcs,a %xcc,.+8 !=
1003 add c_3,t_2,c_3
1004 srlx t_1,32,c_12
1005 st t_1,rp(13) !r[13]=c2;
1006 or c_12,c_3,c_12 !=
1007
1008 mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2);
1009 addcc c_12,t_1,t_1
1010 srlx t_1,32,c_12 !=
1011 stuw t_1,rp(14) !r[14]=c3;
1012 stuw c_12,rp(15) !r[15]=c1;
1013
1014 ret
1015 restore %g0,%g0,%o0 !=
1016
1017.type bn_mul_comba8,#function
1018.size bn_mul_comba8,(.-bn_mul_comba8)
1019
1020.align 32
1021
1022.global bn_mul_comba4
1023/*
1024 * void bn_mul_comba4(r,a,b)
1025 * BN_ULONG *r,*a,*b;
1026 */
1027bn_mul_comba4:
1028 save %sp,FRAME_SIZE,%sp
1029 lduw ap(0),a_0
1030 mov 1,t_2
1031 lduw bp(0),b_0
1032 sllx t_2,32,t_2 !=
1033 lduw bp(1),b_1
1034 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
1035 srlx t_1,32,c_12
1036 stuw t_1,rp(0) !=!r[0]=c1;
1037
1038 lduw ap(1),a_1
1039 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
1040 addcc c_12,t_1,c_12
1041 clr c_3 !=
1042 bcs,a %xcc,.+8
1043 add c_3,t_2,c_3
1044 lduw ap(2),a_2
1045 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
1046 addcc c_12,t_1,t_1
1047 bcs,a %xcc,.+8
1048 add c_3,t_2,c_3
1049 srlx t_1,32,c_12 !=
1050 stuw t_1,rp(1) !r[1]=c2;
1051 or c_12,c_3,c_12
1052
1053 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1054 addcc c_12,t_1,c_12 !=
1055 clr c_3
1056 bcs,a %xcc,.+8
1057 add c_3,t_2,c_3
1058 lduw bp(2),b_2 !=
1059 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
1060 addcc c_12,t_1,c_12
1061 bcs,a %xcc,.+8
1062 add c_3,t_2,c_3 !=
1063 lduw bp(3),b_3
1064 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1065 addcc c_12,t_1,t_1
1066 bcs,a %xcc,.+8 !=
1067 add c_3,t_2,c_3
1068 srlx t_1,32,c_12
1069 stuw t_1,rp(2) !r[2]=c3;
1070 or c_12,c_3,c_12 !=
1071
1072 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
1073 addcc c_12,t_1,c_12
1074 clr c_3
1075 bcs,a %xcc,.+8 !=
1076 add c_3,t_2,c_3
1077 mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1078 addcc c_12,t_1,c_12
1079 bcs,a %xcc,.+8 !=
1080 add c_3,t_2,c_3
1081 lduw ap(3),a_3
1082 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1083 addcc c_12,t_1,c_12 !=
1084 bcs,a %xcc,.+8
1085 add c_3,t_2,c_3
1086 mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
1087 addcc c_12,t_1,t_1 !=
1088 bcs,a %xcc,.+8
1089 add c_3,t_2,c_3
1090 srlx t_1,32,c_12
1091 stuw t_1,rp(3) !=!r[3]=c1;
1092 or c_12,c_3,c_12
1093
1094 mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1095 addcc c_12,t_1,c_12
1096 clr c_3 !=
1097 bcs,a %xcc,.+8
1098 add c_3,t_2,c_3
1099 mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1100 addcc c_12,t_1,c_12 !=
1101 bcs,a %xcc,.+8
1102 add c_3,t_2,c_3
1103 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
1104 addcc c_12,t_1,t_1 !=
1105 bcs,a %xcc,.+8
1106 add c_3,t_2,c_3
1107 srlx t_1,32,c_12
1108 stuw t_1,rp(4) !=!r[4]=c2;
1109 or c_12,c_3,c_12
1110
1111 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1112 addcc c_12,t_1,c_12
1113 clr c_3 !=
1114 bcs,a %xcc,.+8
1115 add c_3,t_2,c_3
1116 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1117 addcc c_12,t_1,t_1 !=
1118 bcs,a %xcc,.+8
1119 add c_3,t_2,c_3
1120 srlx t_1,32,c_12
1121 stuw t_1,rp(5) !=!r[5]=c3;
1122 or c_12,c_3,c_12
1123
1124 mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1125 addcc c_12,t_1,t_1
1126 srlx t_1,32,c_12 !=
1127 stuw t_1,rp(6) !r[6]=c1;
1128 stuw c_12,rp(7) !r[7]=c2;
1129
1130 ret
1131 restore %g0,%g0,%o0
1132
1133.type bn_mul_comba4,#function
1134.size bn_mul_comba4,(.-bn_mul_comba4)
1135
1136.align 32
1137
1138.global bn_sqr_comba8
1139bn_sqr_comba8:
1140 save %sp,FRAME_SIZE,%sp
1141 mov 1,t_2
1142 lduw ap(0),a_0
1143 sllx t_2,32,t_2
1144 lduw ap(1),a_1
1145 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1146 srlx t_1,32,c_12
1147 stuw t_1,rp(0) !r[0]=c1;
1148
1149 lduw ap(2),a_2
1150 mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1151 addcc c_12,t_1,c_12
1152 clr c_3
1153 bcs,a %xcc,.+8
1154 add c_3,t_2,c_3
1155 addcc c_12,t_1,t_1
1156 bcs,a %xcc,.+8
1157 add c_3,t_2,c_3
1158 srlx t_1,32,c_12
1159 stuw t_1,rp(1) !r[1]=c2;
1160 or c_12,c_3,c_12
1161
1162 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1163 addcc c_12,t_1,c_12
1164 clr c_3
1165 bcs,a %xcc,.+8
1166 add c_3,t_2,c_3
1167 addcc c_12,t_1,c_12
1168 bcs,a %xcc,.+8
1169 add c_3,t_2,c_3
1170 lduw ap(3),a_3
1171 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1172 addcc c_12,t_1,t_1
1173 bcs,a %xcc,.+8
1174 add c_3,t_2,c_3
1175 srlx t_1,32,c_12
1176 stuw t_1,rp(2) !r[2]=c3;
1177 or c_12,c_3,c_12
1178
1179 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1180 addcc c_12,t_1,c_12
1181 clr c_3
1182 bcs,a %xcc,.+8
1183 add c_3,t_2,c_3
1184 addcc c_12,t_1,c_12
1185 bcs,a %xcc,.+8
1186 add c_3,t_2,c_3
1187 lduw ap(4),a_4
1188 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1189 addcc c_12,t_1,c_12
1190 bcs,a %xcc,.+8
1191 add c_3,t_2,c_3
1192 addcc c_12,t_1,t_1
1193 bcs,a %xcc,.+8
1194 add c_3,t_2,c_3
1195 srlx t_1,32,c_12
1196 st t_1,rp(3) !r[3]=c1;
1197 or c_12,c_3,c_12
1198
1199 mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1200 addcc c_12,t_1,c_12
1201 clr c_3
1202 bcs,a %xcc,.+8
1203 add c_3,t_2,c_3
1204 addcc c_12,t_1,c_12
1205 bcs,a %xcc,.+8
1206 add c_3,t_2,c_3
1207 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1208 addcc c_12,t_1,c_12
1209 bcs,a %xcc,.+8
1210 add c_3,t_2,c_3
1211 addcc c_12,t_1,c_12
1212 bcs,a %xcc,.+8
1213 add c_3,t_2,c_3
1214 lduw ap(5),a_5
1215 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1216 addcc c_12,t_1,t_1
1217 bcs,a %xcc,.+8
1218 add c_3,t_2,c_3
1219 srlx t_1,32,c_12
1220 stuw t_1,rp(4) !r[4]=c2;
1221 or c_12,c_3,c_12
1222
1223 mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1224 addcc c_12,t_1,c_12
1225 clr c_3
1226 bcs,a %xcc,.+8
1227 add c_3,t_2,c_3
1228 addcc c_12,t_1,c_12
1229 bcs,a %xcc,.+8
1230 add c_3,t_2,c_3
1231 mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1232 addcc c_12,t_1,c_12
1233 bcs,a %xcc,.+8
1234 add c_3,t_2,c_3
1235 addcc c_12,t_1,c_12
1236 bcs,a %xcc,.+8
1237 add c_3,t_2,c_3
1238 lduw ap(6),a_6
1239 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1240 addcc c_12,t_1,c_12
1241 bcs,a %xcc,.+8
1242 add c_3,t_2,c_3
1243 addcc c_12,t_1,t_1
1244 bcs,a %xcc,.+8
1245 add c_3,t_2,c_3
1246 srlx t_1,32,c_12
1247 stuw t_1,rp(5) !r[5]=c3;
1248 or c_12,c_3,c_12
1249
1250 mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1251 addcc c_12,t_1,c_12
1252 clr c_3
1253 bcs,a %xcc,.+8
1254 add c_3,t_2,c_3
1255 addcc c_12,t_1,c_12
1256 bcs,a %xcc,.+8
1257 add c_3,t_2,c_3
1258 mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1259 addcc c_12,t_1,c_12
1260 bcs,a %xcc,.+8
1261 add c_3,t_2,c_3
1262 addcc c_12,t_1,c_12
1263 bcs,a %xcc,.+8
1264 add c_3,t_2,c_3
1265 mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1266 addcc c_12,t_1,c_12
1267 bcs,a %xcc,.+8
1268 add c_3,t_2,c_3
1269 addcc c_12,t_1,c_12
1270 bcs,a %xcc,.+8
1271 add c_3,t_2,c_3
1272 lduw ap(7),a_7
1273 mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1274 addcc c_12,t_1,t_1
1275 bcs,a %xcc,.+8
1276 add c_3,t_2,c_3
1277 srlx t_1,32,c_12
1278 stuw t_1,rp(6) !r[6]=c1;
1279 or c_12,c_3,c_12
1280
1281 mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1282 addcc c_12,t_1,c_12
1283 clr c_3
1284 bcs,a %xcc,.+8
1285 add c_3,t_2,c_3
1286 addcc c_12,t_1,c_12
1287 bcs,a %xcc,.+8
1288 add c_3,t_2,c_3
1289 mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1290 addcc c_12,t_1,c_12
1291 bcs,a %xcc,.+8
1292 add c_3,t_2,c_3
1293 addcc c_12,t_1,c_12
1294 bcs,a %xcc,.+8
1295 add c_3,t_2,c_3
1296 mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1297 addcc c_12,t_1,c_12
1298 bcs,a %xcc,.+8
1299 add c_3,t_2,c_3
1300 addcc c_12,t_1,c_12
1301 bcs,a %xcc,.+8
1302 add c_3,t_2,c_3
1303 mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1304 addcc c_12,t_1,c_12
1305 bcs,a %xcc,.+8
1306 add c_3,t_2,c_3
1307 addcc c_12,t_1,t_1
1308 bcs,a %xcc,.+8
1309 add c_3,t_2,c_3
1310 srlx t_1,32,c_12
1311 stuw t_1,rp(7) !r[7]=c2;
1312 or c_12,c_3,c_12
1313
1314 mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1315 addcc c_12,t_1,c_12
1316 clr c_3
1317 bcs,a %xcc,.+8
1318 add c_3,t_2,c_3
1319 addcc c_12,t_1,c_12
1320 bcs,a %xcc,.+8
1321 add c_3,t_2,c_3
1322 mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1323 addcc c_12,t_1,c_12
1324 bcs,a %xcc,.+8
1325 add c_3,t_2,c_3
1326 addcc c_12,t_1,c_12
1327 bcs,a %xcc,.+8
1328 add c_3,t_2,c_3
1329 mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1330 addcc c_12,t_1,c_12
1331 bcs,a %xcc,.+8
1332 add c_3,t_2,c_3
1333 addcc c_12,t_1,c_12
1334 bcs,a %xcc,.+8
1335 add c_3,t_2,c_3
1336 mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1337 addcc c_12,t_1,t_1
1338 bcs,a %xcc,.+8
1339 add c_3,t_2,c_3
1340 srlx t_1,32,c_12
1341 stuw t_1,rp(8) !r[8]=c3;
1342 or c_12,c_3,c_12
1343
1344 mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1345 addcc c_12,t_1,c_12
1346 clr c_3
1347 bcs,a %xcc,.+8
1348 add c_3,t_2,c_3
1349 addcc c_12,t_1,c_12
1350 bcs,a %xcc,.+8
1351 add c_3,t_2,c_3
1352 mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1353 addcc c_12,t_1,c_12
1354 bcs,a %xcc,.+8
1355 add c_3,t_2,c_3
1356 addcc c_12,t_1,c_12
1357 bcs,a %xcc,.+8
1358 add c_3,t_2,c_3
1359 mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1360 addcc c_12,t_1,c_12
1361 bcs,a %xcc,.+8
1362 add c_3,t_2,c_3
1363 addcc c_12,t_1,t_1
1364 bcs,a %xcc,.+8
1365 add c_3,t_2,c_3
1366 srlx t_1,32,c_12
1367 stuw t_1,rp(9) !r[9]=c1;
1368 or c_12,c_3,c_12
1369
1370 mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1371 addcc c_12,t_1,c_12
1372 clr c_3
1373 bcs,a %xcc,.+8
1374 add c_3,t_2,c_3
1375 addcc c_12,t_1,c_12
1376 bcs,a %xcc,.+8
1377 add c_3,t_2,c_3
1378 mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1379 addcc c_12,t_1,c_12
1380 bcs,a %xcc,.+8
1381 add c_3,t_2,c_3
1382 addcc c_12,t_1,c_12
1383 bcs,a %xcc,.+8
1384 add c_3,t_2,c_3
1385 mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1386 addcc c_12,t_1,t_1
1387 bcs,a %xcc,.+8
1388 add c_3,t_2,c_3
1389 srlx t_1,32,c_12
1390 stuw t_1,rp(10) !r[10]=c2;
1391 or c_12,c_3,c_12
1392
1393 mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2);
1394 addcc c_12,t_1,c_12
1395 clr c_3
1396 bcs,a %xcc,.+8
1397 add c_3,t_2,c_3
1398 addcc c_12,t_1,c_12
1399 bcs,a %xcc,.+8
1400 add c_3,t_2,c_3
1401 mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2);
1402 addcc c_12,t_1,c_12
1403 bcs,a %xcc,.+8
1404 add c_3,t_2,c_3
1405 addcc c_12,t_1,t_1
1406 bcs,a %xcc,.+8
1407 add c_3,t_2,c_3
1408 srlx t_1,32,c_12
1409 stuw t_1,rp(11) !r[11]=c3;
1410 or c_12,c_3,c_12
1411
1412 mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1413 addcc c_12,t_1,c_12
1414 clr c_3
1415 bcs,a %xcc,.+8
1416 add c_3,t_2,c_3
1417 addcc c_12,t_1,c_12
1418 bcs,a %xcc,.+8
1419 add c_3,t_2,c_3
1420 mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1421 addcc c_12,t_1,t_1
1422 bcs,a %xcc,.+8
1423 add c_3,t_2,c_3
1424 srlx t_1,32,c_12
1425 stuw t_1,rp(12) !r[12]=c1;
1426 or c_12,c_3,c_12
1427
1428 mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1429 addcc c_12,t_1,c_12
1430 clr c_3
1431 bcs,a %xcc,.+8
1432 add c_3,t_2,c_3
1433 addcc c_12,t_1,t_1
1434 bcs,a %xcc,.+8
1435 add c_3,t_2,c_3
1436 srlx t_1,32,c_12
1437 stuw t_1,rp(13) !r[13]=c2;
1438 or c_12,c_3,c_12
1439
1440 mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1441 addcc c_12,t_1,t_1
1442 srlx t_1,32,c_12
1443 stuw t_1,rp(14) !r[14]=c3;
1444 stuw c_12,rp(15) !r[15]=c1;
1445
1446 ret
1447 restore %g0,%g0,%o0
1448
1449.type bn_sqr_comba8,#function
1450.size bn_sqr_comba8,(.-bn_sqr_comba8)
1451
1452.align 32
1453
1454.global bn_sqr_comba4
1455/*
1456 * void bn_sqr_comba4(r,a)
1457 * BN_ULONG *r,*a;
1458 */
1459bn_sqr_comba4:
1460 save %sp,FRAME_SIZE,%sp
1461 mov 1,t_2
1462 lduw ap(0),a_0
1463 sllx t_2,32,t_2
1464 lduw ap(1),a_1
1465 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1466 srlx t_1,32,c_12
1467 stuw t_1,rp(0) !r[0]=c1;
1468
1469 lduw ap(2),a_2
1470 mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1);
1471 addcc c_12,t_1,c_12
1472 clr c_3
1473 bcs,a %xcc,.+8
1474 add c_3,t_2,c_3
1475 addcc c_12,t_1,t_1
1476 bcs,a %xcc,.+8
1477 add c_3,t_2,c_3
1478 srlx t_1,32,c_12
1479 stuw t_1,rp(1) !r[1]=c2;
1480 or c_12,c_3,c_12
1481
1482 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1483 addcc c_12,t_1,c_12
1484 clr c_3
1485 bcs,a %xcc,.+8
1486 add c_3,t_2,c_3
1487 addcc c_12,t_1,c_12
1488 bcs,a %xcc,.+8
1489 add c_3,t_2,c_3
1490 lduw ap(3),a_3
1491 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1492 addcc c_12,t_1,t_1
1493 bcs,a %xcc,.+8
1494 add c_3,t_2,c_3
1495 srlx t_1,32,c_12
1496 stuw t_1,rp(2) !r[2]=c3;
1497 or c_12,c_3,c_12
1498
1499 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1500 addcc c_12,t_1,c_12
1501 clr c_3
1502 bcs,a %xcc,.+8
1503 add c_3,t_2,c_3
1504 addcc c_12,t_1,c_12
1505 bcs,a %xcc,.+8
1506 add c_3,t_2,c_3
1507 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1508 addcc c_12,t_1,c_12
1509 bcs,a %xcc,.+8
1510 add c_3,t_2,c_3
1511 addcc c_12,t_1,t_1
1512 bcs,a %xcc,.+8
1513 add c_3,t_2,c_3
1514 srlx t_1,32,c_12
1515 stuw t_1,rp(3) !r[3]=c1;
1516 or c_12,c_3,c_12
1517
1518 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1519 addcc c_12,t_1,c_12
1520 clr c_3
1521 bcs,a %xcc,.+8
1522 add c_3,t_2,c_3
1523 addcc c_12,t_1,c_12
1524 bcs,a %xcc,.+8
1525 add c_3,t_2,c_3
1526 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1527 addcc c_12,t_1,t_1
1528 bcs,a %xcc,.+8
1529 add c_3,t_2,c_3
1530 srlx t_1,32,c_12
1531 stuw t_1,rp(4) !r[4]=c2;
1532 or c_12,c_3,c_12
1533
1534 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1535 addcc c_12,t_1,c_12
1536 clr c_3
1537 bcs,a %xcc,.+8
1538 add c_3,t_2,c_3
1539 addcc c_12,t_1,t_1
1540 bcs,a %xcc,.+8
1541 add c_3,t_2,c_3
1542 srlx t_1,32,c_12
1543 stuw t_1,rp(5) !r[5]=c3;
1544 or c_12,c_3,c_12
1545
1546 mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1547 addcc c_12,t_1,t_1
1548 srlx t_1,32,c_12
1549 stuw t_1,rp(6) !r[6]=c1;
1550 stuw c_12,rp(7) !r[7]=c2;
1551
1552 ret
1553 restore %g0,%g0,%o0
1554
1555.type bn_sqr_comba4,#function
1556.size bn_sqr_comba4,(.-bn_sqr_comba4)
1557
1558.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
deleted file mode 100644
index b8fb1e8a25..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv9-mont.pl
+++ /dev/null
@@ -1,606 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# December 2005
11#
12# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
13# for undertaken effort are multiple. First of all, UltraSPARC is not
14# the whole SPARCv9 universe and other VIS-free implementations deserve
15# optimized code as much. Secondly, newly introduced UltraSPARC T1,
16# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
17# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
18# several integrated RSA/DSA accelerator circuits accessible through
19# kernel driver [only(*)], but having decent user-land software
20# implementation is important too. Finally, reasons like desire to
21# experiment with dedicated squaring procedure. Yes, this module
22# implements one, because it was easiest to draft it in SPARCv9
23# instructions...
24
25# (*) Engine accessing the driver in question is on my TODO list.
26# For reference, acceleator is estimated to give 6 to 10 times
27# improvement on single-threaded RSA sign. It should be noted
28# that 6-10x improvement coefficient does not actually mean
29# something extraordinary in terms of absolute [single-threaded]
30# performance, as SPARCv9 instruction set is by all means least
31# suitable for high performance crypto among other 64 bit
32# platforms. 6-10x factor simply places T1 in same performance
33# domain as say AMD64 and IA-64. Improvement of RSA verify don't
34# appear impressive at all, but it's the sign operation which is
35# far more critical/interesting.
36
37# You might notice that inner loops are modulo-scheduled:-) This has
38# essentially negligible impact on UltraSPARC performance, it's
39# Fujitsu SPARC64 V users who should notice and hopefully appreciate
40# the advantage... Currently this module surpasses sparcv9a-mont.pl
41# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
42# module still have hidden potential [see TODO list there], which is
43# estimated to be larger than 20%...
44
45# int bn_mul_mont(
46$rp="%i0"; # BN_ULONG *rp,
47$ap="%i1"; # const BN_ULONG *ap,
48$bp="%i2"; # const BN_ULONG *bp,
49$np="%i3"; # const BN_ULONG *np,
50$n0="%i4"; # const BN_ULONG *n0,
51$num="%i5"; # int num);
52
53$bits=32;
54for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
55if ($bits==64) { $bias=2047; $frame=192; }
56else { $bias=0; $frame=128; }
57
58$car0="%o0";
59$car1="%o1";
60$car2="%o2"; # 1 bit
61$acc0="%o3";
62$acc1="%o4";
63$mask="%g1"; # 32 bits, what a waste...
64$tmp0="%g4";
65$tmp1="%g5";
66
67$i="%l0";
68$j="%l1";
69$mul0="%l2";
70$mul1="%l3";
71$tp="%l4";
72$apj="%l5";
73$npj="%l6";
74$tpj="%l7";
75
76$fname="bn_mul_mont_int";
77
78$code=<<___;
79.section ".text",#alloc,#execinstr
80
81.global $fname
82.align 32
83$fname:
84 cmp %o5,4 ! 128 bits minimum
85 bge,pt %icc,.Lenter
86 sethi %hi(0xffffffff),$mask
87 retl
88 clr %o0
89.align 32
90.Lenter:
91 save %sp,-$frame,%sp
92 sll $num,2,$num ! num*=4
93 or $mask,%lo(0xffffffff),$mask
94 ld [$n0],$n0
95 cmp $ap,$bp
96 and $num,$mask,$num
97 ld [$bp],$mul0 ! bp[0]
98 nop
99
100 add %sp,$bias,%o7 ! real top of stack
101 ld [$ap],$car0 ! ap[0] ! redundant in squaring context
102 sub %o7,$num,%o7
103 ld [$ap+4],$apj ! ap[1]
104 and %o7,-1024,%o7
105 ld [$np],$car1 ! np[0]
106 sub %o7,$bias,%sp ! alloca
107 ld [$np+4],$npj ! np[1]
108 be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
109 mov 12,$j
110
111 mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
112 mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0]
113 and $car0,$mask,$acc0
114 add %sp,$bias+$frame,$tp
115 ld [$ap+8],$apj !prologue!
116
117 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
118 and $mul1,$mask,$mul1
119
120 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
121 mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0
122 srlx $car0,32,$car0
123 add $acc0,$car1,$car1
124 ld [$np+8],$npj !prologue!
125 srlx $car1,32,$car1
126 mov $tmp0,$acc0 !prologue!
127
128.L1st:
129 mulx $apj,$mul0,$tmp0
130 mulx $npj,$mul1,$tmp1
131 add $acc0,$car0,$car0
132 ld [$ap+$j],$apj ! ap[j]
133 and $car0,$mask,$acc0
134 add $acc1,$car1,$car1
135 ld [$np+$j],$npj ! np[j]
136 srlx $car0,32,$car0
137 add $acc0,$car1,$car1
138 add $j,4,$j ! j++
139 mov $tmp0,$acc0
140 st $car1,[$tp]
141 cmp $j,$num
142 mov $tmp1,$acc1
143 srlx $car1,32,$car1
144 bl %icc,.L1st
145 add $tp,4,$tp ! tp++
146!.L1st
147
148 mulx $apj,$mul0,$tmp0 !epilogue!
149 mulx $npj,$mul1,$tmp1
150 add $acc0,$car0,$car0
151 and $car0,$mask,$acc0
152 add $acc1,$car1,$car1
153 srlx $car0,32,$car0
154 add $acc0,$car1,$car1
155 st $car1,[$tp]
156 srlx $car1,32,$car1
157
158 add $tmp0,$car0,$car0
159 and $car0,$mask,$acc0
160 add $tmp1,$car1,$car1
161 srlx $car0,32,$car0
162 add $acc0,$car1,$car1
163 st $car1,[$tp+4]
164 srlx $car1,32,$car1
165
166 add $car0,$car1,$car1
167 st $car1,[$tp+8]
168 srlx $car1,32,$car2
169
170 mov 4,$i ! i++
171 ld [$bp+4],$mul0 ! bp[1]
172.Louter:
173 add %sp,$bias+$frame,$tp
174 ld [$ap],$car0 ! ap[0]
175 ld [$ap+4],$apj ! ap[1]
176 ld [$np],$car1 ! np[0]
177 ld [$np+4],$npj ! np[1]
178 ld [$tp],$tmp1 ! tp[0]
179 ld [$tp+4],$tpj ! tp[1]
180 mov 12,$j
181
182 mulx $car0,$mul0,$car0
183 mulx $apj,$mul0,$tmp0 !prologue!
184 add $tmp1,$car0,$car0
185 ld [$ap+8],$apj !prologue!
186 and $car0,$mask,$acc0
187
188 mulx $n0,$acc0,$mul1
189 and $mul1,$mask,$mul1
190
191 mulx $car1,$mul1,$car1
192 mulx $npj,$mul1,$acc1 !prologue!
193 srlx $car0,32,$car0
194 add $acc0,$car1,$car1
195 ld [$np+8],$npj !prologue!
196 srlx $car1,32,$car1
197 mov $tmp0,$acc0 !prologue!
198
199.Linner:
200 mulx $apj,$mul0,$tmp0
201 mulx $npj,$mul1,$tmp1
202 add $tpj,$car0,$car0
203 ld [$ap+$j],$apj ! ap[j]
204 add $acc0,$car0,$car0
205 add $acc1,$car1,$car1
206 ld [$np+$j],$npj ! np[j]
207 and $car0,$mask,$acc0
208 ld [$tp+8],$tpj ! tp[j]
209 srlx $car0,32,$car0
210 add $acc0,$car1,$car1
211 add $j,4,$j ! j++
212 mov $tmp0,$acc0
213 st $car1,[$tp] ! tp[j-1]
214 srlx $car1,32,$car1
215 mov $tmp1,$acc1
216 cmp $j,$num
217 bl %icc,.Linner
218 add $tp,4,$tp ! tp++
219!.Linner
220
221 mulx $apj,$mul0,$tmp0 !epilogue!
222 mulx $npj,$mul1,$tmp1
223 add $tpj,$car0,$car0
224 add $acc0,$car0,$car0
225 ld [$tp+8],$tpj ! tp[j]
226 and $car0,$mask,$acc0
227 add $acc1,$car1,$car1
228 srlx $car0,32,$car0
229 add $acc0,$car1,$car1
230 st $car1,[$tp] ! tp[j-1]
231 srlx $car1,32,$car1
232
233 add $tpj,$car0,$car0
234 add $tmp0,$car0,$car0
235 and $car0,$mask,$acc0
236 add $tmp1,$car1,$car1
237 add $acc0,$car1,$car1
238 st $car1,[$tp+4] ! tp[j-1]
239 srlx $car0,32,$car0
240 add $i,4,$i ! i++
241 srlx $car1,32,$car1
242
243 add $car0,$car1,$car1
244 cmp $i,$num
245 add $car2,$car1,$car1
246 st $car1,[$tp+8]
247
248 srlx $car1,32,$car2
249 bl,a %icc,.Louter
250 ld [$bp+$i],$mul0 ! bp[i]
251!.Louter
252
253 add $tp,12,$tp
254
255.Ltail:
256 add $np,$num,$np
257 add $rp,$num,$rp
258 mov $tp,$ap
259 sub %g0,$num,%o7 ! k=-num
260 ba .Lsub
261 subcc %g0,%g0,%g0 ! clear %icc.c
262.align 16
263.Lsub:
264 ld [$tp+%o7],%o0
265 ld [$np+%o7],%o1
266 subccc %o0,%o1,%o1 ! tp[j]-np[j]
267 add $rp,%o7,$i
268 add %o7,4,%o7
269 brnz %o7,.Lsub
270 st %o1,[$i]
271 subc $car2,0,$car2 ! handle upmost overflow bit
272 and $tp,$car2,$ap
273 andn $rp,$car2,$np
274 or $ap,$np,$ap
275 sub %g0,$num,%o7
276
277.Lcopy:
278 ld [$ap+%o7],%o0 ! copy or in-place refresh
279 st %g0,[$tp+%o7] ! zap tp
280 st %o0,[$rp+%o7]
281 add %o7,4,%o7
282 brnz %o7,.Lcopy
283 nop
284 mov 1,%i0
285 ret
286 restore
287___
288
289########
290######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
291######## code without following dedicated squaring procedure.
292########
293$sbit="%i2"; # re-use $bp!
294
295$code.=<<___;
296.align 32
297.Lbn_sqr_mont:
298 mulx $mul0,$mul0,$car0 ! ap[0]*ap[0]
299 mulx $apj,$mul0,$tmp0 !prologue!
300 and $car0,$mask,$acc0
301 add %sp,$bias+$frame,$tp
302 ld [$ap+8],$apj !prologue!
303
304 mulx $n0,$acc0,$mul1 ! "t[0]"*n0
305 srlx $car0,32,$car0
306 and $mul1,$mask,$mul1
307
308 mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
309 mulx $npj,$mul1,$acc1 !prologue!
310 and $car0,1,$sbit
311 ld [$np+8],$npj !prologue!
312 srlx $car0,1,$car0
313 add $acc0,$car1,$car1
314 srlx $car1,32,$car1
315 mov $tmp0,$acc0 !prologue!
316
317.Lsqr_1st:
318 mulx $apj,$mul0,$tmp0
319 mulx $npj,$mul1,$tmp1
320 add $acc0,$car0,$car0 ! ap[j]*a0+c0
321 add $acc1,$car1,$car1
322 ld [$ap+$j],$apj ! ap[j]
323 and $car0,$mask,$acc0
324 ld [$np+$j],$npj ! np[j]
325 srlx $car0,32,$car0
326 add $acc0,$acc0,$acc0
327 or $sbit,$acc0,$acc0
328 mov $tmp1,$acc1
329 srlx $acc0,32,$sbit
330 add $j,4,$j ! j++
331 and $acc0,$mask,$acc0
332 cmp $j,$num
333 add $acc0,$car1,$car1
334 st $car1,[$tp]
335 mov $tmp0,$acc0
336 srlx $car1,32,$car1
337 bl %icc,.Lsqr_1st
338 add $tp,4,$tp ! tp++
339!.Lsqr_1st
340
341 mulx $apj,$mul0,$tmp0 ! epilogue
342 mulx $npj,$mul1,$tmp1
343 add $acc0,$car0,$car0 ! ap[j]*a0+c0
344 add $acc1,$car1,$car1
345 and $car0,$mask,$acc0
346 srlx $car0,32,$car0
347 add $acc0,$acc0,$acc0
348 or $sbit,$acc0,$acc0
349 srlx $acc0,32,$sbit
350 and $acc0,$mask,$acc0
351 add $acc0,$car1,$car1
352 st $car1,[$tp]
353 srlx $car1,32,$car1
354
355 add $tmp0,$car0,$car0 ! ap[j]*a0+c0
356 add $tmp1,$car1,$car1
357 and $car0,$mask,$acc0
358 srlx $car0,32,$car0
359 add $acc0,$acc0,$acc0
360 or $sbit,$acc0,$acc0
361 srlx $acc0,32,$sbit
362 and $acc0,$mask,$acc0
363 add $acc0,$car1,$car1
364 st $car1,[$tp+4]
365 srlx $car1,32,$car1
366
367 add $car0,$car0,$car0
368 or $sbit,$car0,$car0
369 add $car0,$car1,$car1
370 st $car1,[$tp+8]
371 srlx $car1,32,$car2
372
373 ld [%sp+$bias+$frame],$tmp0 ! tp[0]
374 ld [%sp+$bias+$frame+4],$tmp1 ! tp[1]
375 ld [%sp+$bias+$frame+8],$tpj ! tp[2]
376 ld [$ap+4],$mul0 ! ap[1]
377 ld [$ap+8],$apj ! ap[2]
378 ld [$np],$car1 ! np[0]
379 ld [$np+4],$npj ! np[1]
380 mulx $n0,$tmp0,$mul1
381
382 mulx $mul0,$mul0,$car0
383 and $mul1,$mask,$mul1
384
385 mulx $car1,$mul1,$car1
386 mulx $npj,$mul1,$acc1
387 add $tmp0,$car1,$car1
388 and $car0,$mask,$acc0
389 ld [$np+8],$npj ! np[2]
390 srlx $car1,32,$car1
391 add $tmp1,$car1,$car1
392 srlx $car0,32,$car0
393 add $acc0,$car1,$car1
394 and $car0,1,$sbit
395 add $acc1,$car1,$car1
396 srlx $car0,1,$car0
397 mov 12,$j
398 st $car1,[%sp+$bias+$frame] ! tp[0]=
399 srlx $car1,32,$car1
400 add %sp,$bias+$frame+4,$tp
401
402.Lsqr_2nd:
403 mulx $apj,$mul0,$acc0
404 mulx $npj,$mul1,$acc1
405 add $acc0,$car0,$car0
406 add $tpj,$car1,$car1
407 ld [$ap+$j],$apj ! ap[j]
408 and $car0,$mask,$acc0
409 ld [$np+$j],$npj ! np[j]
410 srlx $car0,32,$car0
411 add $acc1,$car1,$car1
412 ld [$tp+8],$tpj ! tp[j]
413 add $acc0,$acc0,$acc0
414 add $j,4,$j ! j++
415 or $sbit,$acc0,$acc0
416 srlx $acc0,32,$sbit
417 and $acc0,$mask,$acc0
418 cmp $j,$num
419 add $acc0,$car1,$car1
420 st $car1,[$tp] ! tp[j-1]
421 srlx $car1,32,$car1
422 bl %icc,.Lsqr_2nd
423 add $tp,4,$tp ! tp++
424!.Lsqr_2nd
425
426 mulx $apj,$mul0,$acc0
427 mulx $npj,$mul1,$acc1
428 add $acc0,$car0,$car0
429 add $tpj,$car1,$car1
430 and $car0,$mask,$acc0
431 srlx $car0,32,$car0
432 add $acc1,$car1,$car1
433 add $acc0,$acc0,$acc0
434 or $sbit,$acc0,$acc0
435 srlx $acc0,32,$sbit
436 and $acc0,$mask,$acc0
437 add $acc0,$car1,$car1
438 st $car1,[$tp] ! tp[j-1]
439 srlx $car1,32,$car1
440
441 add $car0,$car0,$car0
442 or $sbit,$car0,$car0
443 add $car0,$car1,$car1
444 add $car2,$car1,$car1
445 st $car1,[$tp+4]
446 srlx $car1,32,$car2
447
448 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
449 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
450 ld [$ap+8],$mul0 ! ap[2]
451 ld [$np],$car1 ! np[0]
452 ld [$np+4],$npj ! np[1]
453 mulx $n0,$tmp1,$mul1
454 and $mul1,$mask,$mul1
455 mov 8,$i
456
457 mulx $mul0,$mul0,$car0
458 mulx $car1,$mul1,$car1
459 and $car0,$mask,$acc0
460 add $tmp1,$car1,$car1
461 srlx $car0,32,$car0
462 add %sp,$bias+$frame,$tp
463 srlx $car1,32,$car1
464 and $car0,1,$sbit
465 srlx $car0,1,$car0
466 mov 4,$j
467
468.Lsqr_outer:
469.Lsqr_inner1:
470 mulx $npj,$mul1,$acc1
471 add $tpj,$car1,$car1
472 add $j,4,$j
473 ld [$tp+8],$tpj
474 cmp $j,$i
475 add $acc1,$car1,$car1
476 ld [$np+$j],$npj
477 st $car1,[$tp]
478 srlx $car1,32,$car1
479 bl %icc,.Lsqr_inner1
480 add $tp,4,$tp
481!.Lsqr_inner1
482
483 add $j,4,$j
484 ld [$ap+$j],$apj ! ap[j]
485 mulx $npj,$mul1,$acc1
486 add $tpj,$car1,$car1
487 ld [$np+$j],$npj ! np[j]
488 add $acc0,$car1,$car1
489 ld [$tp+8],$tpj ! tp[j]
490 add $acc1,$car1,$car1
491 st $car1,[$tp]
492 srlx $car1,32,$car1
493
494 add $j,4,$j
495 cmp $j,$num
496 be,pn %icc,.Lsqr_no_inner2
497 add $tp,4,$tp
498
499.Lsqr_inner2:
500 mulx $apj,$mul0,$acc0
501 mulx $npj,$mul1,$acc1
502 add $tpj,$car1,$car1
503 add $acc0,$car0,$car0
504 ld [$ap+$j],$apj ! ap[j]
505 and $car0,$mask,$acc0
506 ld [$np+$j],$npj ! np[j]
507 srlx $car0,32,$car0
508 add $acc0,$acc0,$acc0
509 ld [$tp+8],$tpj ! tp[j]
510 or $sbit,$acc0,$acc0
511 add $j,4,$j ! j++
512 srlx $acc0,32,$sbit
513 and $acc0,$mask,$acc0
514 cmp $j,$num
515 add $acc0,$car1,$car1
516 add $acc1,$car1,$car1
517 st $car1,[$tp] ! tp[j-1]
518 srlx $car1,32,$car1
519 bl %icc,.Lsqr_inner2
520 add $tp,4,$tp ! tp++
521
522.Lsqr_no_inner2:
523 mulx $apj,$mul0,$acc0
524 mulx $npj,$mul1,$acc1
525 add $tpj,$car1,$car1
526 add $acc0,$car0,$car0
527 and $car0,$mask,$acc0
528 srlx $car0,32,$car0
529 add $acc0,$acc0,$acc0
530 or $sbit,$acc0,$acc0
531 srlx $acc0,32,$sbit
532 and $acc0,$mask,$acc0
533 add $acc0,$car1,$car1
534 add $acc1,$car1,$car1
535 st $car1,[$tp] ! tp[j-1]
536 srlx $car1,32,$car1
537
538 add $car0,$car0,$car0
539 or $sbit,$car0,$car0
540 add $car0,$car1,$car1
541 add $car2,$car1,$car1
542 st $car1,[$tp+4]
543 srlx $car1,32,$car2
544
545 add $i,4,$i ! i++
546 ld [%sp+$bias+$frame],$tmp1 ! tp[0]
547 ld [%sp+$bias+$frame+4],$tpj ! tp[1]
548 ld [$ap+$i],$mul0 ! ap[j]
549 ld [$np],$car1 ! np[0]
550 ld [$np+4],$npj ! np[1]
551 mulx $n0,$tmp1,$mul1
552 and $mul1,$mask,$mul1
553 add $i,4,$tmp0
554
555 mulx $mul0,$mul0,$car0
556 mulx $car1,$mul1,$car1
557 and $car0,$mask,$acc0
558 add $tmp1,$car1,$car1
559 srlx $car0,32,$car0
560 add %sp,$bias+$frame,$tp
561 srlx $car1,32,$car1
562 and $car0,1,$sbit
563 srlx $car0,1,$car0
564
565 cmp $tmp0,$num ! i<num-1
566 bl %icc,.Lsqr_outer
567 mov 4,$j
568
569.Lsqr_last:
570 mulx $npj,$mul1,$acc1
571 add $tpj,$car1,$car1
572 add $j,4,$j
573 ld [$tp+8],$tpj
574 cmp $j,$i
575 add $acc1,$car1,$car1
576 ld [$np+$j],$npj
577 st $car1,[$tp]
578 srlx $car1,32,$car1
579 bl %icc,.Lsqr_last
580 add $tp,4,$tp
581!.Lsqr_last
582
583 mulx $npj,$mul1,$acc1
584 add $tpj,$car1,$car1
585 add $acc0,$car1,$car1
586 add $acc1,$car1,$car1
587 st $car1,[$tp]
588 srlx $car1,32,$car1
589
590 add $car0,$car0,$car0 ! recover $car0
591 or $sbit,$car0,$car0
592 add $car0,$car1,$car1
593 add $car2,$car1,$car1
594 st $car1,[$tp+4]
595 srlx $car1,32,$car2
596
597 ba .Ltail
598 add $tp,8,$tp
599.type $fname,#function
600.size $fname,(.-$fname)
601.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
602.align 32
603___
604$code =~ s/\`([^\`]*)\`/eval($1)/gem;
605print $code;
606close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl b/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
deleted file mode 100755
index a14205f2f0..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv9a-mont.pl
+++ /dev/null
@@ -1,882 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU?
13# Because unlike integer multiplier, which simply stalls whole CPU,
14# FPU is fully pipelined and can effectively emit 48 bit partial
15# product every cycle. Why not blended SPARC v9? One can argue that
16# making this module dependent on UltraSPARC VIS extension limits its
17# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!)
18# implementations from compatibility matrix. But the rest, whole Sun
19# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support
20# VIS extension instructions used in this module. This is considered
21# good enough to not care about HAL SPARC64 users [if any] who have
22# integer-only pure SPARCv9 module to "fall down" to.
23
24# USI&II cores currently exhibit uniform 2x improvement [over pre-
25# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII
26# performance improves few percents for shorter keys and worsens few
27# percents for longer keys. This is because USIII integer multiplier
28# is >3x faster than USI&II one, which is harder to match [but see
29# TODO list below]. It should also be noted that SPARC64 V features
30# out-of-order execution, which *might* mean that integer multiplier
31# is pipelined, which in turn *might* be impossible to match... On
32# additional note, SPARC64 V implements FP Multiply-Add instruction,
33# which is perfectly usable in this context... In other words, as far
34# as Fujitsu SPARC64 V goes, talk to the author:-)
35
36# The implementation implies following "non-natural" limitations on
37# input arguments:
38# - num may not be less than 4;
39# - num has to be even;
40# Failure to meet either condition has no fatal effects, simply
41# doesn't give any performance gain.
42
43# TODO:
44# - modulo-schedule inner loop for better performance (on in-order
45# execution core such as UltraSPARC this shall result in further
46# noticeable(!) improvement);
47# - dedicated squaring procedure[?];
48
49######################################################################
50# November 2006
51#
52# Modulo-scheduled inner loops allow to interleave floating point and
53# integer instructions and minimize Read-After-Write penalties. This
54# results in *further* 20-50% perfromance improvement [depending on
55# key length, more for longer keys] on USI&II cores and 30-80% - on
56# USIII&IV.
57
58$fname="bn_mul_mont_fpu";
59$bits=32;
60for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
61
62if ($bits==64) {
63 $bias=2047;
64 $frame=192;
65} else {
66 $bias=0;
67 $frame=128; # 96 rounded up to largest known cache-line
68}
69$locals=64;
70
71# In order to provide for 32-/64-bit ABI duality, I keep integers wider
72# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used
73# exclusively for pointers, indexes and other small values...
74# int bn_mul_mont(
75$rp="%i0"; # BN_ULONG *rp,
76$ap="%i1"; # const BN_ULONG *ap,
77$bp="%i2"; # const BN_ULONG *bp,
78$np="%i3"; # const BN_ULONG *np,
79$n0="%i4"; # const BN_ULONG *n0,
80$num="%i5"; # int num);
81
82$tp="%l0"; # t[num]
83$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved
84$ap_h="%l2"; # to these four vectors as double-precision FP values.
85$np_l="%l3"; # This way a bunch of fxtods are eliminated in second
86$np_h="%l4"; # loop and L1-cache aliasing is minimized...
87$i="%l5";
88$j="%l6";
89$mask="%l7"; # 16-bit mask, 0xffff
90
91$n0="%g4"; # reassigned(!) to "64-bit" register
92$carry="%i4"; # %i4 reused(!) for a carry bit
93
94# FP register naming chart
95#
96# ..HILO
97# dcba
98# --------
99# LOa
100# LOb
101# LOc
102# LOd
103# HIa
104# HIb
105# HIc
106# HId
107# ..a
108# ..b
109$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6";
110$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14";
111$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19";
112$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23";
113
114$dota="%f24"; $dotb="%f26";
115
116$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38";
117$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46";
118$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54";
119$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
120
121$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load
122
123$code=<<___;
124.section ".text",#alloc,#execinstr
125
126.global $fname
127.align 32
128$fname:
129 save %sp,-$frame-$locals,%sp
130
131 cmp $num,4
132 bl,a,pn %icc,.Lret
133 clr %i0
134 andcc $num,1,%g0 ! $num has to be even...
135 bnz,a,pn %icc,.Lret
136 clr %i0 ! signal "unsupported input value"
137
138 srl $num,1,$num
139 sethi %hi(0xffff),$mask
140 ld [%i4+0],$n0 ! $n0 reassigned, remember?
141 or $mask,%lo(0xffff),$mask
142 ld [%i4+4],%o0
143 sllx %o0,32,%o0
144 or %o0,$n0,$n0 ! $n0=n0[1].n0[0]
145
146 sll $num,3,$num ! num*=8
147
148 add %sp,$bias,%o0 ! real top of stack
149 sll $num,2,%o1
150 add %o1,$num,%o1 ! %o1=num*5
151 sub %o0,%o1,%o0
152 and %o0,-2048,%o0 ! optimize TLB utilization
153 sub %o0,$bias,%sp ! alloca(5*num*8)
154
155 rd %asi,%o7 ! save %asi
156 add %sp,$bias+$frame+$locals,$tp
157 add $tp,$num,$ap_l
158 add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends !
159 add $ap_l,$num,$ap_h
160 add $ap_h,$num,$np_l
161 add $np_l,$num,$np_h
162
163 wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads
164
165 add $rp,$num,$rp ! readjust input pointers to point
166 add $ap,$num,$ap ! at the ends too...
167 add $bp,$num,$bp
168 add $np,$num,$np
169
170 stx %o7,[%sp+$bias+$frame+48] ! save %asi
171
172 sub %g0,$num,$i ! i=-num
173 sub %g0,$num,$j ! j=-num
174
175 add $ap,$j,%o3
176 add $bp,$i,%o4
177
178 ld [%o3+4],%g1 ! bp[0]
179 ld [%o3+0],%o0
180 ld [%o4+4],%g5 ! ap[0]
181 sllx %g1,32,%g1
182 ld [%o4+0],%o1
183 sllx %g5,32,%g5
184 or %g1,%o0,%o0
185 or %g5,%o1,%o1
186
187 add $np,$j,%o5
188
189 mulx %o1,%o0,%o0 ! ap[0]*bp[0]
190 mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0
191 stx %o0,[%sp+$bias+$frame+0]
192
193 ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
194 fzeros $alo
195 ld [%o3+4],$ahi_
196 fzeros $ahi
197 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
198 fzeros $nlo
199 ld [%o5+4],$nhi_
200 fzeros $nhi
201
202 ! transfer b[i] to FPU as 4x16-bit values
203 ldda [%o4+2]%asi,$ba
204 fxtod $alo,$alo
205 ldda [%o4+0]%asi,$bb
206 fxtod $ahi,$ahi
207 ldda [%o4+6]%asi,$bc
208 fxtod $nlo,$nlo
209 ldda [%o4+4]%asi,$bd
210 fxtod $nhi,$nhi
211
212 ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
213 ldda [%sp+$bias+$frame+6]%asi,$na
214 fxtod $ba,$ba
215 ldda [%sp+$bias+$frame+4]%asi,$nb
216 fxtod $bb,$bb
217 ldda [%sp+$bias+$frame+2]%asi,$nc
218 fxtod $bc,$bc
219 ldda [%sp+$bias+$frame+0]%asi,$nd
220 fxtod $bd,$bd
221
222 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
223 fxtod $na,$na
224 std $ahi,[$ap_h+$j]
225 fxtod $nb,$nb
226 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
227 fxtod $nc,$nc
228 std $nhi,[$np_h+$j]
229 fxtod $nd,$nd
230
231 fmuld $alo,$ba,$aloa
232 fmuld $nlo,$na,$nloa
233 fmuld $alo,$bb,$alob
234 fmuld $nlo,$nb,$nlob
235 fmuld $alo,$bc,$aloc
236 faddd $aloa,$nloa,$nloa
237 fmuld $nlo,$nc,$nloc
238 fmuld $alo,$bd,$alod
239 faddd $alob,$nlob,$nlob
240 fmuld $nlo,$nd,$nlod
241 fmuld $ahi,$ba,$ahia
242 faddd $aloc,$nloc,$nloc
243 fmuld $nhi,$na,$nhia
244 fmuld $ahi,$bb,$ahib
245 faddd $alod,$nlod,$nlod
246 fmuld $nhi,$nb,$nhib
247 fmuld $ahi,$bc,$ahic
248 faddd $ahia,$nhia,$nhia
249 fmuld $nhi,$nc,$nhic
250 fmuld $ahi,$bd,$ahid
251 faddd $ahib,$nhib,$nhib
252 fmuld $nhi,$nd,$nhid
253
254 faddd $ahic,$nhic,$dota ! $nhic
255 faddd $ahid,$nhid,$dotb ! $nhid
256
257 faddd $nloc,$nhia,$nloc
258 faddd $nlod,$nhib,$nlod
259
260 fdtox $nloa,$nloa
261 fdtox $nlob,$nlob
262 fdtox $nloc,$nloc
263 fdtox $nlod,$nlod
264
265 std $nloa,[%sp+$bias+$frame+0]
266 add $j,8,$j
267 std $nlob,[%sp+$bias+$frame+8]
268 add $ap,$j,%o4
269 std $nloc,[%sp+$bias+$frame+16]
270 add $np,$j,%o5
271 std $nlod,[%sp+$bias+$frame+24]
272
273 ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
274 fzeros $alo
275 ld [%o4+4],$ahi_
276 fzeros $ahi
277 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
278 fzeros $nlo
279 ld [%o5+4],$nhi_
280 fzeros $nhi
281
282 fxtod $alo,$alo
283 fxtod $ahi,$ahi
284 fxtod $nlo,$nlo
285 fxtod $nhi,$nhi
286
287 ldx [%sp+$bias+$frame+0],%o0
288 fmuld $alo,$ba,$aloa
289 ldx [%sp+$bias+$frame+8],%o1
290 fmuld $nlo,$na,$nloa
291 ldx [%sp+$bias+$frame+16],%o2
292 fmuld $alo,$bb,$alob
293 ldx [%sp+$bias+$frame+24],%o3
294 fmuld $nlo,$nb,$nlob
295
296 srlx %o0,16,%o7
297 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
298 fmuld $alo,$bc,$aloc
299 add %o7,%o1,%o1
300 std $ahi,[$ap_h+$j]
301 faddd $aloa,$nloa,$nloa
302 fmuld $nlo,$nc,$nloc
303 srlx %o1,16,%o7
304 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
305 fmuld $alo,$bd,$alod
306 add %o7,%o2,%o2
307 std $nhi,[$np_h+$j]
308 faddd $alob,$nlob,$nlob
309 fmuld $nlo,$nd,$nlod
310 srlx %o2,16,%o7
311 fmuld $ahi,$ba,$ahia
312 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
313 faddd $aloc,$nloc,$nloc
314 fmuld $nhi,$na,$nhia
315 !and %o0,$mask,%o0
316 !and %o1,$mask,%o1
317 !and %o2,$mask,%o2
318 !sllx %o1,16,%o1
319 !sllx %o2,32,%o2
320 !sllx %o3,48,%o7
321 !or %o1,%o0,%o0
322 !or %o2,%o0,%o0
323 !or %o7,%o0,%o0 ! 64-bit result
324 srlx %o3,16,%g1 ! 34-bit carry
325 fmuld $ahi,$bb,$ahib
326
327 faddd $alod,$nlod,$nlod
328 fmuld $nhi,$nb,$nhib
329 fmuld $ahi,$bc,$ahic
330 faddd $ahia,$nhia,$nhia
331 fmuld $nhi,$nc,$nhic
332 fmuld $ahi,$bd,$ahid
333 faddd $ahib,$nhib,$nhib
334 fmuld $nhi,$nd,$nhid
335
336 faddd $dota,$nloa,$nloa
337 faddd $dotb,$nlob,$nlob
338 faddd $ahic,$nhic,$dota ! $nhic
339 faddd $ahid,$nhid,$dotb ! $nhid
340
341 faddd $nloc,$nhia,$nloc
342 faddd $nlod,$nhib,$nlod
343
344 fdtox $nloa,$nloa
345 fdtox $nlob,$nlob
346 fdtox $nloc,$nloc
347 fdtox $nlod,$nlod
348
349 std $nloa,[%sp+$bias+$frame+0]
350 std $nlob,[%sp+$bias+$frame+8]
351 addcc $j,8,$j
352 std $nloc,[%sp+$bias+$frame+16]
353 bz,pn %icc,.L1stskip
354 std $nlod,[%sp+$bias+$frame+24]
355
356.align 32 ! incidentally already aligned !
357.L1st:
358 add $ap,$j,%o4
359 add $np,$j,%o5
360 ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
361 fzeros $alo
362 ld [%o4+4],$ahi_
363 fzeros $ahi
364 ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
365 fzeros $nlo
366 ld [%o5+4],$nhi_
367 fzeros $nhi
368
369 fxtod $alo,$alo
370 fxtod $ahi,$ahi
371 fxtod $nlo,$nlo
372 fxtod $nhi,$nhi
373
374 ldx [%sp+$bias+$frame+0],%o0
375 fmuld $alo,$ba,$aloa
376 ldx [%sp+$bias+$frame+8],%o1
377 fmuld $nlo,$na,$nloa
378 ldx [%sp+$bias+$frame+16],%o2
379 fmuld $alo,$bb,$alob
380 ldx [%sp+$bias+$frame+24],%o3
381 fmuld $nlo,$nb,$nlob
382
383 srlx %o0,16,%o7
384 std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
385 fmuld $alo,$bc,$aloc
386 add %o7,%o1,%o1
387 std $ahi,[$ap_h+$j]
388 faddd $aloa,$nloa,$nloa
389 fmuld $nlo,$nc,$nloc
390 srlx %o1,16,%o7
391 std $nlo,[$np_l+$j] ! save smashed np[j] in double format
392 fmuld $alo,$bd,$alod
393 add %o7,%o2,%o2
394 std $nhi,[$np_h+$j]
395 faddd $alob,$nlob,$nlob
396 fmuld $nlo,$nd,$nlod
397 srlx %o2,16,%o7
398 fmuld $ahi,$ba,$ahia
399 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
400 and %o0,$mask,%o0
401 faddd $aloc,$nloc,$nloc
402 fmuld $nhi,$na,$nhia
403 and %o1,$mask,%o1
404 and %o2,$mask,%o2
405 fmuld $ahi,$bb,$ahib
406 sllx %o1,16,%o1
407 faddd $alod,$nlod,$nlod
408 fmuld $nhi,$nb,$nhib
409 sllx %o2,32,%o2
410 fmuld $ahi,$bc,$ahic
411 sllx %o3,48,%o7
412 or %o1,%o0,%o0
413 faddd $ahia,$nhia,$nhia
414 fmuld $nhi,$nc,$nhic
415 or %o2,%o0,%o0
416 fmuld $ahi,$bd,$ahid
417 or %o7,%o0,%o0 ! 64-bit result
418 faddd $ahib,$nhib,$nhib
419 fmuld $nhi,$nd,$nhid
420 addcc %g1,%o0,%o0
421 faddd $dota,$nloa,$nloa
422 srlx %o3,16,%g1 ! 34-bit carry
423 faddd $dotb,$nlob,$nlob
424 bcs,a %xcc,.+8
425 add %g1,1,%g1
426
427 stx %o0,[$tp] ! tp[j-1]=
428
429 faddd $ahic,$nhic,$dota ! $nhic
430 faddd $ahid,$nhid,$dotb ! $nhid
431
432 faddd $nloc,$nhia,$nloc
433 faddd $nlod,$nhib,$nlod
434
435 fdtox $nloa,$nloa
436 fdtox $nlob,$nlob
437 fdtox $nloc,$nloc
438 fdtox $nlod,$nlod
439
440 std $nloa,[%sp+$bias+$frame+0]
441 std $nlob,[%sp+$bias+$frame+8]
442 std $nloc,[%sp+$bias+$frame+16]
443 std $nlod,[%sp+$bias+$frame+24]
444
445 addcc $j,8,$j
446 bnz,pt %icc,.L1st
447 add $tp,8,$tp
448
449.L1stskip:
450 fdtox $dota,$dota
451 fdtox $dotb,$dotb
452
453 ldx [%sp+$bias+$frame+0],%o0
454 ldx [%sp+$bias+$frame+8],%o1
455 ldx [%sp+$bias+$frame+16],%o2
456 ldx [%sp+$bias+$frame+24],%o3
457
458 srlx %o0,16,%o7
459 std $dota,[%sp+$bias+$frame+32]
460 add %o7,%o1,%o1
461 std $dotb,[%sp+$bias+$frame+40]
462 srlx %o1,16,%o7
463 add %o7,%o2,%o2
464 srlx %o2,16,%o7
465 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
466 and %o0,$mask,%o0
467 and %o1,$mask,%o1
468 and %o2,$mask,%o2
469 sllx %o1,16,%o1
470 sllx %o2,32,%o2
471 sllx %o3,48,%o7
472 or %o1,%o0,%o0
473 or %o2,%o0,%o0
474 or %o7,%o0,%o0 ! 64-bit result
475 ldx [%sp+$bias+$frame+32],%o4
476 addcc %g1,%o0,%o0
477 ldx [%sp+$bias+$frame+40],%o5
478 srlx %o3,16,%g1 ! 34-bit carry
479 bcs,a %xcc,.+8
480 add %g1,1,%g1
481
482 stx %o0,[$tp] ! tp[j-1]=
483 add $tp,8,$tp
484
485 srlx %o4,16,%o7
486 add %o7,%o5,%o5
487 and %o4,$mask,%o4
488 sllx %o5,16,%o7
489 or %o7,%o4,%o4
490 addcc %g1,%o4,%o4
491 srlx %o5,48,%g1
492 bcs,a %xcc,.+8
493 add %g1,1,%g1
494
495 mov %g1,$carry
496 stx %o4,[$tp] ! tp[num-1]=
497
498 ba .Louter
499 add $i,8,$i
500.align 32
501.Louter:
502 sub %g0,$num,$j ! j=-num
503 add %sp,$bias+$frame+$locals,$tp
504
505 add $ap,$j,%o3
506 add $bp,$i,%o4
507
508 ld [%o3+4],%g1 ! bp[i]
509 ld [%o3+0],%o0
510 ld [%o4+4],%g5 ! ap[0]
511 sllx %g1,32,%g1
512 ld [%o4+0],%o1
513 sllx %g5,32,%g5
514 or %g1,%o0,%o0
515 or %g5,%o1,%o1
516
517 ldx [$tp],%o2 ! tp[0]
518 mulx %o1,%o0,%o0
519 addcc %o2,%o0,%o0
520 mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
521 stx %o0,[%sp+$bias+$frame+0]
522
523 ! transfer b[i] to FPU as 4x16-bit values
524 ldda [%o4+2]%asi,$ba
525 ldda [%o4+0]%asi,$bb
526 ldda [%o4+6]%asi,$bc
527 ldda [%o4+4]%asi,$bd
528
529 ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
530 ldda [%sp+$bias+$frame+6]%asi,$na
531 fxtod $ba,$ba
532 ldda [%sp+$bias+$frame+4]%asi,$nb
533 fxtod $bb,$bb
534 ldda [%sp+$bias+$frame+2]%asi,$nc
535 fxtod $bc,$bc
536 ldda [%sp+$bias+$frame+0]%asi,$nd
537 fxtod $bd,$bd
538 ldd [$ap_l+$j],$alo ! load a[j] in double format
539 fxtod $na,$na
540 ldd [$ap_h+$j],$ahi
541 fxtod $nb,$nb
542 ldd [$np_l+$j],$nlo ! load n[j] in double format
543 fxtod $nc,$nc
544 ldd [$np_h+$j],$nhi
545 fxtod $nd,$nd
546
547 fmuld $alo,$ba,$aloa
548 fmuld $nlo,$na,$nloa
549 fmuld $alo,$bb,$alob
550 fmuld $nlo,$nb,$nlob
551 fmuld $alo,$bc,$aloc
552 faddd $aloa,$nloa,$nloa
553 fmuld $nlo,$nc,$nloc
554 fmuld $alo,$bd,$alod
555 faddd $alob,$nlob,$nlob
556 fmuld $nlo,$nd,$nlod
557 fmuld $ahi,$ba,$ahia
558 faddd $aloc,$nloc,$nloc
559 fmuld $nhi,$na,$nhia
560 fmuld $ahi,$bb,$ahib
561 faddd $alod,$nlod,$nlod
562 fmuld $nhi,$nb,$nhib
563 fmuld $ahi,$bc,$ahic
564 faddd $ahia,$nhia,$nhia
565 fmuld $nhi,$nc,$nhic
566 fmuld $ahi,$bd,$ahid
567 faddd $ahib,$nhib,$nhib
568 fmuld $nhi,$nd,$nhid
569
570 faddd $ahic,$nhic,$dota ! $nhic
571 faddd $ahid,$nhid,$dotb ! $nhid
572
573 faddd $nloc,$nhia,$nloc
574 faddd $nlod,$nhib,$nlod
575
576 fdtox $nloa,$nloa
577 fdtox $nlob,$nlob
578 fdtox $nloc,$nloc
579 fdtox $nlod,$nlod
580
581 std $nloa,[%sp+$bias+$frame+0]
582 std $nlob,[%sp+$bias+$frame+8]
583 std $nloc,[%sp+$bias+$frame+16]
584 add $j,8,$j
585 std $nlod,[%sp+$bias+$frame+24]
586
587 ldd [$ap_l+$j],$alo ! load a[j] in double format
588 ldd [$ap_h+$j],$ahi
589 ldd [$np_l+$j],$nlo ! load n[j] in double format
590 ldd [$np_h+$j],$nhi
591
592 fmuld $alo,$ba,$aloa
593 fmuld $nlo,$na,$nloa
594 fmuld $alo,$bb,$alob
595 fmuld $nlo,$nb,$nlob
596 fmuld $alo,$bc,$aloc
597 ldx [%sp+$bias+$frame+0],%o0
598 faddd $aloa,$nloa,$nloa
599 fmuld $nlo,$nc,$nloc
600 ldx [%sp+$bias+$frame+8],%o1
601 fmuld $alo,$bd,$alod
602 ldx [%sp+$bias+$frame+16],%o2
603 faddd $alob,$nlob,$nlob
604 fmuld $nlo,$nd,$nlod
605 ldx [%sp+$bias+$frame+24],%o3
606 fmuld $ahi,$ba,$ahia
607
608 srlx %o0,16,%o7
609 faddd $aloc,$nloc,$nloc
610 fmuld $nhi,$na,$nhia
611 add %o7,%o1,%o1
612 fmuld $ahi,$bb,$ahib
613 srlx %o1,16,%o7
614 faddd $alod,$nlod,$nlod
615 fmuld $nhi,$nb,$nhib
616 add %o7,%o2,%o2
617 fmuld $ahi,$bc,$ahic
618 srlx %o2,16,%o7
619 faddd $ahia,$nhia,$nhia
620 fmuld $nhi,$nc,$nhic
621 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
622 ! why?
623 and %o0,$mask,%o0
624 fmuld $ahi,$bd,$ahid
625 and %o1,$mask,%o1
626 and %o2,$mask,%o2
627 faddd $ahib,$nhib,$nhib
628 fmuld $nhi,$nd,$nhid
629 sllx %o1,16,%o1
630 faddd $dota,$nloa,$nloa
631 sllx %o2,32,%o2
632 faddd $dotb,$nlob,$nlob
633 sllx %o3,48,%o7
634 or %o1,%o0,%o0
635 faddd $ahic,$nhic,$dota ! $nhic
636 or %o2,%o0,%o0
637 faddd $ahid,$nhid,$dotb ! $nhid
638 or %o7,%o0,%o0 ! 64-bit result
639 ldx [$tp],%o7
640 faddd $nloc,$nhia,$nloc
641 addcc %o7,%o0,%o0
642 ! end-of-why?
643 faddd $nlod,$nhib,$nlod
644 srlx %o3,16,%g1 ! 34-bit carry
645 fdtox $nloa,$nloa
646 bcs,a %xcc,.+8
647 add %g1,1,%g1
648
649 fdtox $nlob,$nlob
650 fdtox $nloc,$nloc
651 fdtox $nlod,$nlod
652
653 std $nloa,[%sp+$bias+$frame+0]
654 std $nlob,[%sp+$bias+$frame+8]
655 addcc $j,8,$j
656 std $nloc,[%sp+$bias+$frame+16]
657 bz,pn %icc,.Linnerskip
658 std $nlod,[%sp+$bias+$frame+24]
659
660 ba .Linner
661 nop
662.align 32
663.Linner:
664 ldd [$ap_l+$j],$alo ! load a[j] in double format
665 ldd [$ap_h+$j],$ahi
666 ldd [$np_l+$j],$nlo ! load n[j] in double format
667 ldd [$np_h+$j],$nhi
668
669 fmuld $alo,$ba,$aloa
670 fmuld $nlo,$na,$nloa
671 fmuld $alo,$bb,$alob
672 fmuld $nlo,$nb,$nlob
673 fmuld $alo,$bc,$aloc
674 ldx [%sp+$bias+$frame+0],%o0
675 faddd $aloa,$nloa,$nloa
676 fmuld $nlo,$nc,$nloc
677 ldx [%sp+$bias+$frame+8],%o1
678 fmuld $alo,$bd,$alod
679 ldx [%sp+$bias+$frame+16],%o2
680 faddd $alob,$nlob,$nlob
681 fmuld $nlo,$nd,$nlod
682 ldx [%sp+$bias+$frame+24],%o3
683 fmuld $ahi,$ba,$ahia
684
685 srlx %o0,16,%o7
686 faddd $aloc,$nloc,$nloc
687 fmuld $nhi,$na,$nhia
688 add %o7,%o1,%o1
689 fmuld $ahi,$bb,$ahib
690 srlx %o1,16,%o7
691 faddd $alod,$nlod,$nlod
692 fmuld $nhi,$nb,$nhib
693 add %o7,%o2,%o2
694 fmuld $ahi,$bc,$ahic
695 srlx %o2,16,%o7
696 faddd $ahia,$nhia,$nhia
697 fmuld $nhi,$nc,$nhic
698 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
699 and %o0,$mask,%o0
700 fmuld $ahi,$bd,$ahid
701 and %o1,$mask,%o1
702 and %o2,$mask,%o2
703 faddd $ahib,$nhib,$nhib
704 fmuld $nhi,$nd,$nhid
705 sllx %o1,16,%o1
706 faddd $dota,$nloa,$nloa
707 sllx %o2,32,%o2
708 faddd $dotb,$nlob,$nlob
709 sllx %o3,48,%o7
710 or %o1,%o0,%o0
711 faddd $ahic,$nhic,$dota ! $nhic
712 or %o2,%o0,%o0
713 faddd $ahid,$nhid,$dotb ! $nhid
714 or %o7,%o0,%o0 ! 64-bit result
715 faddd $nloc,$nhia,$nloc
716 addcc %g1,%o0,%o0
717 ldx [$tp+8],%o7 ! tp[j]
718 faddd $nlod,$nhib,$nlod
719 srlx %o3,16,%g1 ! 34-bit carry
720 fdtox $nloa,$nloa
721 bcs,a %xcc,.+8
722 add %g1,1,%g1
723 fdtox $nlob,$nlob
724 addcc %o7,%o0,%o0
725 fdtox $nloc,$nloc
726 bcs,a %xcc,.+8
727 add %g1,1,%g1
728
729 stx %o0,[$tp] ! tp[j-1]
730 fdtox $nlod,$nlod
731
732 std $nloa,[%sp+$bias+$frame+0]
733 std $nlob,[%sp+$bias+$frame+8]
734 std $nloc,[%sp+$bias+$frame+16]
735 addcc $j,8,$j
736 std $nlod,[%sp+$bias+$frame+24]
737 bnz,pt %icc,.Linner
738 add $tp,8,$tp
739
740.Linnerskip:
741 fdtox $dota,$dota
742 fdtox $dotb,$dotb
743
744 ldx [%sp+$bias+$frame+0],%o0
745 ldx [%sp+$bias+$frame+8],%o1
746 ldx [%sp+$bias+$frame+16],%o2
747 ldx [%sp+$bias+$frame+24],%o3
748
749 srlx %o0,16,%o7
750 std $dota,[%sp+$bias+$frame+32]
751 add %o7,%o1,%o1
752 std $dotb,[%sp+$bias+$frame+40]
753 srlx %o1,16,%o7
754 add %o7,%o2,%o2
755 srlx %o2,16,%o7
756 add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
757 and %o0,$mask,%o0
758 and %o1,$mask,%o1
759 and %o2,$mask,%o2
760 sllx %o1,16,%o1
761 sllx %o2,32,%o2
762 sllx %o3,48,%o7
763 or %o1,%o0,%o0
764 or %o2,%o0,%o0
765 ldx [%sp+$bias+$frame+32],%o4
766 or %o7,%o0,%o0 ! 64-bit result
767 ldx [%sp+$bias+$frame+40],%o5
768 addcc %g1,%o0,%o0
769 ldx [$tp+8],%o7 ! tp[j]
770 srlx %o3,16,%g1 ! 34-bit carry
771 bcs,a %xcc,.+8
772 add %g1,1,%g1
773
774 addcc %o7,%o0,%o0
775 bcs,a %xcc,.+8
776 add %g1,1,%g1
777
778 stx %o0,[$tp] ! tp[j-1]
779 add $tp,8,$tp
780
781 srlx %o4,16,%o7
782 add %o7,%o5,%o5
783 and %o4,$mask,%o4
784 sllx %o5,16,%o7
785 or %o7,%o4,%o4
786 addcc %g1,%o4,%o4
787 srlx %o5,48,%g1
788 bcs,a %xcc,.+8
789 add %g1,1,%g1
790
791 addcc $carry,%o4,%o4
792 stx %o4,[$tp] ! tp[num-1]
793 mov %g1,$carry
794 bcs,a %xcc,.+8
795 add $carry,1,$carry
796
797 addcc $i,8,$i
798 bnz %icc,.Louter
799 nop
800
801 add $tp,8,$tp ! adjust tp to point at the end
802 orn %g0,%g0,%g4
803 sub %g0,$num,%o7 ! n=-num
804 ba .Lsub
805 subcc %g0,%g0,%g0 ! clear %icc.c
806
807.align 32
808.Lsub:
809 ldx [$tp+%o7],%o0
810 add $np,%o7,%g1
811 ld [%g1+0],%o2
812 ld [%g1+4],%o3
813 srlx %o0,32,%o1
814 subccc %o0,%o2,%o2
815 add $rp,%o7,%g1
816 subccc %o1,%o3,%o3
817 st %o2,[%g1+0]
818 add %o7,8,%o7
819 brnz,pt %o7,.Lsub
820 st %o3,[%g1+4]
821 subc $carry,0,%g4
822 sub %g0,$num,%o7 ! n=-num
823 ba .Lcopy
824 nop
825
826.align 32
827.Lcopy:
828 ldx [$tp+%o7],%o0
829 add $rp,%o7,%g1
830 ld [%g1+0],%o2
831 ld [%g1+4],%o3
832 stx %g0,[$tp+%o7]
833 and %o0,%g4,%o0
834 srlx %o0,32,%o1
835 andn %o2,%g4,%o2
836 andn %o3,%g4,%o3
837 or %o2,%o0,%o0
838 or %o3,%o1,%o1
839 st %o0,[%g1+0]
840 add %o7,8,%o7
841 brnz,pt %o7,.Lcopy
842 st %o1,[%g1+4]
843 sub %g0,$num,%o7 ! n=-num
844
845.Lzap:
846 stx %g0,[$ap_l+%o7]
847 stx %g0,[$ap_h+%o7]
848 stx %g0,[$np_l+%o7]
849 stx %g0,[$np_h+%o7]
850 add %o7,8,%o7
851 brnz,pt %o7,.Lzap
852 nop
853
854 ldx [%sp+$bias+$frame+48],%o7
855 wr %g0,%o7,%asi ! restore %asi
856
857 mov 1,%i0
858.Lret:
859 ret
860 restore
861.type $fname,#function
862.size $fname,(.-$fname)
863.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
864.align 32
865___
866
867$code =~ s/\`([^\`]*)\`/eval($1)/gem;
868
869# Below substitution makes it possible to compile without demanding
870# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
871# dare to do this, because VIS capability is detected at run-time now
872# and this routine is not called on CPU not capable to execute it. Do
873# note that fzeros is not the only VIS dependency! Another dependency
874# is implicit and is just _a_ numerical value loaded to %asi register,
875# which assembler can't recognize as VIS specific...
876$code =~ s/fzeros\s+%f([0-9]+)/
877 sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1)
878 /gem;
879
880print $code;
881# flush
882close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/via-mont.pl b/src/lib/libcrypto/bn/asm/via-mont.pl
deleted file mode 100644
index c046a514c8..0000000000
--- a/src/lib/libcrypto/bn/asm/via-mont.pl
+++ /dev/null
@@ -1,242 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# Wrapper around 'rep montmul', VIA-specific instruction accessing
11# PadLock Montgomery Multiplier. The wrapper is designed as drop-in
12# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9].
13#
14# Below are interleaved outputs from 'openssl speed rsa dsa' for 4
15# different software configurations on 1.5GHz VIA Esther processor.
16# Lines marked with "software integer" denote performance of hand-
17# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2"
18# refers to hand-coded SSE2 Montgomery multiplication procedure found
19# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from
20# Padlock SDK 2.0.1 available for download from VIA, which naturally
21# utilizes the magic 'repz montmul' instruction. And finally "hardware
22# this" refers to *this* implementation which also uses 'repz montmul'
23#
24# sign verify sign/s verify/s
25# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer
26# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2
27# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK
28# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this
29#
30# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer
31# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2
32# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK
33# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this
34#
35# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer
36# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2
37# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK
38# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this
39#
40# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer
41# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2
42# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK
43# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this
44#
45# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer
46# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2
47# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK
48# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this
49#
50# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer
51# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2
52# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK
53# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this
54#
55# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer
56# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2
57# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK
58# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this
59#
60# To give you some other reference point here is output for 2.4GHz P4
61# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software
62# SSE2" in above terms.
63#
64# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0
65# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0
66# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9
67# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3
68# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1
69# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0
70# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1
71#
72# Conclusions:
73# - VIA SDK leaves a *lot* of room for improvement (which this
74# implementation successfully fills:-);
75# - 'rep montmul' gives up to >3x performance improvement depending on
76# key length;
77# - in terms of absolute performance it delivers approximately as much
78# as modern out-of-order 32-bit cores [again, for longer keys].
79
80$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
81push(@INC,"${dir}","${dir}../../perlasm");
82require "x86asm.pl";
83
84&asm_init($ARGV[0],"via-mont.pl");
85
86# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
87$func="bn_mul_mont_padlock";
88
89$pad=16*1; # amount of reserved bytes on top of every vector
90
91# stack layout
92$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA
93$A=&DWP(4,"esp");
94$B=&DWP(8,"esp");
95$T=&DWP(12,"esp");
96$M=&DWP(16,"esp");
97$scratch=&DWP(20,"esp");
98$rp=&DWP(24,"esp"); # these are mine
99$sp=&DWP(28,"esp");
100# &DWP(32,"esp") # 32 byte scratch area
101# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num]
102# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num]
103# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num]
104# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num]
105# Note that SDK suggests to unconditionally allocate 2K per vector. This
106# has quite an impact on performance. It naturally depends on key length,
107# but to give an example 1024 bit private RSA key operations suffer >30%
108# penalty. I allocate only as much as actually required...
109
110&function_begin($func);
111 &xor ("eax","eax");
112 &mov ("ecx",&wparam(5)); # num
113 # meet VIA's limitations for num [note that the specification
114 # expresses them in bits, while we work with amount of 32-bit words]
115 &test ("ecx",3);
116 &jnz (&label("leave")); # num % 4 != 0
117 &cmp ("ecx",8);
118 &jb (&label("leave")); # num < 8
119 &cmp ("ecx",1024);
120 &ja (&label("leave")); # num > 1024
121
122 &pushf ();
123 &cld ();
124
125 &mov ("edi",&wparam(0)); # rp
126 &mov ("eax",&wparam(1)); # ap
127 &mov ("ebx",&wparam(2)); # bp
128 &mov ("edx",&wparam(3)); # np
129 &mov ("esi",&wparam(4)); # n0
130 &mov ("esi",&DWP(0,"esi")); # *n0
131
132 &lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes
133 &lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes
134 &neg ("ebp");
135 &add ("ebp","esp");
136 &and ("ebp",-64); # align to cache-line
137 &xchg ("ebp","esp"); # alloca
138
139 &mov ($rp,"edi"); # save rp
140 &mov ($sp,"ebp"); # save esp
141
142 &mov ($mZeroPrime,"esi");
143 &lea ("esi",&DWP(64,"esp")); # tp
144 &mov ($T,"esi");
145 &lea ("edi",&DWP(32,"esp")); # scratch area
146 &mov ($scratch,"edi");
147 &mov ("esi","eax");
148
149 &lea ("ebp",&DWP(-$pad,"ecx"));
150 &shr ("ebp",2); # restore original num value in ebp
151
152 &xor ("eax","eax");
153
154 &mov ("ecx","ebp");
155 &lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch
156 &data_byte(0xf3,0xab); # rep stosl, bzero
157
158 &mov ("ecx","ebp");
159 &lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy
160 &mov ($A,"edi");
161 &data_byte(0xf3,0xa5); # rep movsl, memcpy
162 &mov ("ecx",$pad/4);
163 &data_byte(0xf3,0xab); # rep stosl, bzero pad
164 # edi points at the end of padded ap copy...
165
166 &mov ("ecx","ebp");
167 &mov ("esi","ebx");
168 &mov ($B,"edi");
169 &data_byte(0xf3,0xa5); # rep movsl, memcpy
170 &mov ("ecx",$pad/4);
171 &data_byte(0xf3,0xab); # rep stosl, bzero pad
172 # edi points at the end of padded bp copy...
173
174 &mov ("ecx","ebp");
175 &mov ("esi","edx");
176 &mov ($M,"edi");
177 &data_byte(0xf3,0xa5); # rep movsl, memcpy
178 &mov ("ecx",$pad/4);
179 &data_byte(0xf3,0xab); # rep stosl, bzero pad
180 # edi points at the end of padded np copy...
181
182 # let magic happen...
183 &mov ("ecx","ebp");
184 &mov ("esi","esp");
185 &shl ("ecx",5); # convert word counter to bit counter
186 &align (4);
187 &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
188
189 &mov ("ecx","ebp");
190 &lea ("esi",&DWP(64,"esp")); # tp
191 # edi still points at the end of padded np copy...
192 &neg ("ebp");
193 &lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind"
194 &mov ("edi",$rp); # restore rp
195 &xor ("edx","edx"); # i=0 and clear CF
196
197&set_label("sub",8);
198 &mov ("eax",&DWP(0,"esi","edx",4));
199 &sbb ("eax",&DWP(0,"ebp","edx",4));
200 &mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i]
201 &lea ("edx",&DWP(1,"edx")); # i++
202 &loop (&label("sub")); # doesn't affect CF!
203
204 &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit
205 &sbb ("eax",0);
206 &and ("esi","eax");
207 &not ("eax");
208 &mov ("ebp","edi");
209 &and ("ebp","eax");
210 &or ("esi","ebp"); # tp=carry?tp:rp
211
212 &mov ("ecx","edx"); # num
213 &xor ("edx","edx"); # i=0
214
215&set_label("copy",8);
216 &mov ("eax",&DWP(0,"esi","edx",4));
217 &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp
218 &mov (&DWP(0,"edi","edx",4),"eax");
219 &lea ("edx",&DWP(1,"edx")); # i++
220 &loop (&label("copy"));
221
222 &mov ("ebp",$sp);
223 &xor ("eax","eax");
224
225 &mov ("ecx",64/4);
226 &mov ("edi","esp"); # zap frame including scratch area
227 &data_byte(0xf3,0xab); # rep stosl, bzero
228
229 # zap copies of ap, bp and np
230 &lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap
231 &lea ("ecx",&DWP(3*$pad/4,"edx","edx",2));
232 &data_byte(0xf3,0xab); # rep stosl, bzero
233
234 &mov ("esp","ebp");
235 &inc ("eax"); # signal "done"
236 &popf ();
237&set_label("leave");
238&function_end($func);
239
240&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
241
242&asm_finish();
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar
deleted file mode 100644
index aefab15cdb..0000000000
--- a/src/lib/libcrypto/bn/asm/vms.mar
+++ /dev/null
@@ -1,6440 +0,0 @@
1 .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64
2;
3; w.j.m. 15-jan-1999
4;
5; it's magic ...
6;
7; ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
8; ULONG c = 0;
9; int i;
10; for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
11; return c;
12; }
13
14r=4 ;(AP)
15a=8 ;(AP)
16n=12 ;(AP) n by value (input)
17w=16 ;(AP) w by value (input)
18
19
20 .psect code,nowrt
21
22.entry bn_mul_add_words,^m<r2,r3,r4,r5,r6>
23
24 moval @r(ap),r2
25 moval @a(ap),r3
26 movl n(ap),r4 ; assumed >0 by C code
27 movl w(ap),r5
28 clrl r6 ; c
29
300$:
31 emul r5,(r3),(r2),r0 ; w, a[], r[] considered signed
32
33 ; fixup for "negative" r[]
34 tstl (r2)
35 bgeq 10$
36 incl r1
3710$:
38
39 ; add in c
40 addl2 r6,r0
41 adwc #0,r1
42
43 ; combined fixup for "negative" w, a[]
44 tstl r5
45 bgeq 20$
46 addl2 (r3),r1
4720$:
48 tstl (r3)
49 bgeq 30$
50 addl2 r5,r1
5130$:
52
53 movl r0,(r2)+ ; store lo result in r[] & advance
54 addl #4,r3 ; advance a[]
55 movl r1,r6 ; store hi result => c
56
57 sobgtr r4,0$
58
59 movl r6,r0 ; return c
60 ret
61
62 .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
63;
64; w.j.m. 15-jan-1999
65;
66; it's magic ...
67;
68; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
69; ULONG c = 0;
70; int i;
71; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
72; return(c);
73; }
74
75r=4 ;(AP)
76a=8 ;(AP)
77n=12 ;(AP) n by value (input)
78w=16 ;(AP) w by value (input)
79
80
81 .psect code,nowrt
82
83.entry bn_mul_words,^m<r2,r3,r4,r5,r6>
84
85 moval @r(ap),r2 ; r2 -> r[]
86 moval @a(ap),r3 ; r3 -> a[]
87 movl n(ap),r4 ; r4 = loop count (assumed >0 by C code)
88 movl w(ap),r5 ; r5 = w
89 clrl r6 ; r6 = c
90
910$:
92 ; <r1,r0> := w * a[] + c
93 emul r5,(r3),r6,r0 ; w, a[], c considered signed
94
95 ; fixup for "negative" c
96 tstl r6 ; c
97 bgeq 10$
98 incl r1
9910$:
100
101 ; combined fixup for "negative" w, a[]
102 tstl r5 ; w
103 bgeq 20$
104 addl2 (r3),r1 ; a[]
10520$:
106 tstl (r3) ; a[]
107 bgeq 30$
108 addl2 r5,r1 ; w
10930$:
110
111 movl r0,(r2)+ ; store lo result in r[] & advance
112 addl #4,r3 ; advance a[]
113 movl r1,r6 ; store hi result => c
114
115 sobgtr r4,0$
116
117 movl r6,r0 ; return c
118 ret
119
120 .title vax_bn_sqr_words unsigned square, 32*32=>64
121;
122; w.j.m. 15-jan-1999
123;
124; it's magic ...
125;
126; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
127; int i;
128; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
129; }
130
131r=4 ;(AP)
132a=8 ;(AP)
133n=12 ;(AP) n by value (input)
134
135
136 .psect code,nowrt
137
138.entry bn_sqr_words,^m<r2,r3,r4,r5>
139
140 moval @r(ap),r2 ; r2 -> r[]
141 moval @a(ap),r3 ; r3 -> a[]
142 movl n(ap),r4 ; r4 = n (assumed >0 by C code)
143
1440$:
145 movl (r3)+,r5 ; r5 = a[] & advance
146
147 ; <r1,r0> := a[] * a[]
148 emul r5,r5,#0,r0 ; a[] considered signed
149
150 ; fixup for "negative" a[]
151 tstl r5 ; a[]
152 bgeq 30$
153 addl2 r5,r1 ; a[]
154 addl2 r5,r1 ; a[]
15530$:
156
157 movl r0,(r2)+ ; store lo result in r[] & advance
158 movl r1,(r2)+ ; store hi result in r[] & advance
159
160 sobgtr r4,0$
161
162 movl #1,r0 ; return SS$_NORMAL
163 ret
164
165 .title vax_bn_div_words unsigned divide
166;
167; Richard Levitte 20-Nov-2000
168;
169; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
170; {
171; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
172; }
173;
174; Using EDIV would be very easy, if it didn't do signed calculations.
175; Any time any of the input numbers are signed, there are problems,
176; usually with integer overflow, at which point it returns useless
177; data (the quotient gets the value of l, and the remainder becomes 0).
178;
179; If it was just for the dividend, it would be very easy, just divide
180; it by 2 (unsigned), do the division, multiply the resulting quotient
181; and remainder by 2, add the bit that was dropped when dividing by 2
182; to the remainder, and do some adjustment so the remainder doesn't
183; end up larger than the divisor. For some cases when the divisor is
184; negative (from EDIV's point of view, i.e. when the highest bit is set),
185; dividing the dividend by 2 isn't enough, and since some operations
186; might generate integer overflows even when the dividend is divided by
187; 4 (when the high part of the shifted down dividend ends up being exactly
188; half of the divisor, the result is the quotient 0x80000000, which is
189; negative...) it needs to be divided by 8. Furthermore, the divisor needs
190; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
191; In this case, a little extra fiddling with the remainder is required.
192;
193; So, the simplest way to handle this is always to divide the dividend
194; by 8, and to divide the divisor by 2 if it's highest bit is set.
195; After EDIV has been used, the quotient gets multiplied by 8 if the
196; original divisor was positive, otherwise 4. The remainder, oddly
197; enough, is *always* multiplied by 8.
198; NOTE: in the case mentioned above, where the high part of the shifted
199; down dividend ends up being exactly half the shifted down divisor, we
200; end up with a 33 bit quotient. That's no problem however, it usually
201; means we have ended up with a too large remainder as well, and the
202; problem is fixed by the last part of the algorithm (next paragraph).
203;
204; The routine ends with comparing the resulting remainder with the
205; original divisor and if the remainder is larger, subtract the
206; original divisor from it, and increase the quotient by 1. This is
207; done until the remainder is smaller than the divisor.
208;
209; The complete algorithm looks like this:
210;
211; d' = d
212; l' = l & 7
213; [h,l] = [h,l] >> 3
214; [q,r] = floor([h,l] / d) # This is the EDIV operation
215; if (q < 0) q = -q # I doubt this is necessary any more
216;
217; r' = r >> 29
218; if (d' >= 0)
219; q' = q >> 29
220; q = q << 3
221; else
222; q' = q >> 30
223; q = q << 2
224; r = (r << 3) + l'
225;
226; if (d' < 0)
227; {
228; [r',r] = [r',r] - q
229; while ([r',r] < 0)
230; {
231; [r',r] = [r',r] + d
232; [q',q] = [q',q] - 1
233; }
234; }
235;
236; while ([r',r] >= d')
237; {
238; [r',r] = [r',r] - d'
239; [q',q] = [q',q] + 1
240; }
241;
242; return q
243
244h=4 ;(AP) h by value (input)
245l=8 ;(AP) l by value (input)
246d=12 ;(AP) d by value (input)
247
248;r2 = l, q
249;r3 = h, r
250;r4 = d
251;r5 = l'
252;r6 = r'
253;r7 = d'
254;r8 = q'
255
256 .psect code,nowrt
257
258.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8>
259 movl l(ap),r2
260 movl h(ap),r3
261 movl d(ap),r4
262
263 bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7
264 bicl3 #^X00000007,r2,r2
265
266 bicl3 #^XFFFFFFF8,r3,r6
267 bicl3 #^X00000007,r3,r3
268
269 addl r6,r2
270
271 rotl #-3,r2,r2 ; l = l >> 3
272 rotl #-3,r3,r3 ; h = h >> 3
273
274 movl r4,r7 ; d' = d
275
276 movl #0,r6 ; r' = 0
277 movl #0,r8 ; q' = 0
278
279 tstl r4
280 beql 666$ ; Uh-oh, the divisor is 0...
281 bgtr 1$
282 rotl #-1,r4,r4 ; If d is negative, shift it right.
283 bicl2 #^X80000000,r4 ; Since d is then a large number, the
284 ; lowest bit is insignificant
285 ; (contradict that, and I'll fix the problem!)
2861$:
287 ediv r4,r2,r2,r3 ; Do the actual division
288
289 tstl r2
290 bgeq 3$
291 mnegl r2,r2 ; if q < 0, negate it
2923$:
293 tstl r7
294 blss 4$
295 rotl #3,r2,r2 ; q = q << 3
296 bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q
297 bicl3 #^X00000007,r2,r2
298 bsb 41$
2994$: ; else
300 rotl #2,r2,r2 ; q = q << 2
301 bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q
302 bicl3 #^X00000003,r2,r2
30341$:
304 rotl #3,r3,r3 ; r = r << 3
305 bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r
306 bicl3 #^X00000007,r3,r3
307 addl r5,r3 ; r = r + l'
308
309 tstl r7
310 bgeq 5$
311 bitl #1,r7
312 beql 5$ ; if d' < 0 && d' & 1
313 subl r2,r3 ; [r',r] = [r',r] - [q',q]
314 sbwc r8,r6
31545$:
316 bgeq 5$ ; while r < 0
317 decl r2 ; [q',q] = [q',q] - 1
318 sbwc #0,r8
319 addl r7,r3 ; [r',r] = [r',r] + d'
320 adwc #0,r6
321 brb 45$
322
323; The return points are placed in the middle to keep a short distance from
324; all the branch points
32542$:
326; movl r3,r1
327 movl r2,r0
328 ret
329666$:
330 movl #^XFFFFFFFF,r0
331 ret
332
3335$:
334 tstl r6
335 bneq 6$
336 cmpl r3,r7
337 blssu 42$ ; while [r',r] >= d'
3386$:
339 subl r7,r3 ; [r',r] = [r',r] - d'
340 sbwc #0,r6
341 incl r2 ; [q',q] = [q',q] + 1
342 adwc #0,r8
343 brb 5$
344
345 .title vax_bn_add_words unsigned add of two arrays
346;
347; Richard Levitte 20-Nov-2000
348;
349; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
350; ULONG c = 0;
351; int i;
352; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
353; return(c);
354; }
355
356r=4 ;(AP) r by reference (output)
357a=8 ;(AP) a by reference (input)
358b=12 ;(AP) b by reference (input)
359n=16 ;(AP) n by value (input)
360
361
362 .psect code,nowrt
363
364.entry bn_add_words,^m<r2,r3,r4,r5,r6>
365
366 moval @r(ap),r2
367 moval @a(ap),r3
368 moval @b(ap),r4
369 movl n(ap),r5 ; assumed >0 by C code
370 clrl r0 ; c
371
372 tstl r5 ; carry = 0
373 bleq 666$
374
3750$:
376 movl (r3)+,r6 ; carry untouched
377 adwc (r4)+,r6 ; carry used and touched
378 movl r6,(r2)+ ; carry untouched
379 sobgtr r5,0$ ; carry untouched
380
381 adwc #0,r0
382666$:
383 ret
384
385 .title vax_bn_sub_words unsigned add of two arrays
386;
387; Richard Levitte 20-Nov-2000
388;
389; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
390; ULONG c = 0;
391; int i;
392; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
393; return(c);
394; }
395
396r=4 ;(AP) r by reference (output)
397a=8 ;(AP) a by reference (input)
398b=12 ;(AP) b by reference (input)
399n=16 ;(AP) n by value (input)
400
401
402 .psect code,nowrt
403
404.entry bn_sub_words,^m<r2,r3,r4,r5,r6>
405
406 moval @r(ap),r2
407 moval @a(ap),r3
408 moval @b(ap),r4
409 movl n(ap),r5 ; assumed >0 by C code
410 clrl r0 ; c
411
412 tstl r5 ; carry = 0
413 bleq 666$
414
4150$:
416 movl (r3)+,r6 ; carry untouched
417 sbwc (r4)+,r6 ; carry used and touched
418 movl r6,(r2)+ ; carry untouched
419 sobgtr r5,0$ ; carry untouched
420
421 adwc #0,r0
422666$:
423 ret
424
425
426;r=4 ;(AP)
427;a=8 ;(AP)
428;b=12 ;(AP)
429;n=16 ;(AP) n by value (input)
430
431 .psect code,nowrt
432
433.entry BN_MUL_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
434 movab -924(sp),sp
435 clrq r8
436
437 clrl r10
438
439 movl 8(ap),r6
440 movzwl 2(r6),r3
441 movl 12(ap),r7
442 bicl3 #-65536,(r7),r2
443 movzwl 2(r7),r0
444 bicl2 #-65536,r0
445 bicl3 #-65536,(r6),-12(fp)
446 bicl3 #-65536,r3,-16(fp)
447 mull3 r0,-12(fp),-4(fp)
448 mull2 r2,-12(fp)
449 mull3 r2,-16(fp),-8(fp)
450 mull2 r0,-16(fp)
451 addl3 -4(fp),-8(fp),r0
452 bicl3 #0,r0,-4(fp)
453 cmpl -4(fp),-8(fp)
454 bgequ noname.45
455 addl2 #65536,-16(fp)
456noname.45:
457 movzwl -2(fp),r0
458 bicl2 #-65536,r0
459 addl2 r0,-16(fp)
460 bicl3 #-65536,-4(fp),r0
461 ashl #16,r0,-8(fp)
462 addl3 -8(fp),-12(fp),r0
463 bicl3 #0,r0,-12(fp)
464 cmpl -12(fp),-8(fp)
465 bgequ noname.46
466 incl -16(fp)
467noname.46:
468 movl -12(fp),r1
469 movl -16(fp),r2
470 addl2 r1,r9
471 bicl2 #0,r9
472 cmpl r9,r1
473 bgequ noname.47
474 incl r2
475noname.47:
476 addl2 r2,r8
477 bicl2 #0,r8
478 cmpl r8,r2
479 bgequ noname.48
480 incl r10
481noname.48:
482
483 movl 4(ap),r11
484 movl r9,(r11)
485
486 clrl r9
487
488 movzwl 2(r6),r2
489 bicl3 #-65536,4(r7),r3
490 movzwl 6(r7),r0
491 bicl2 #-65536,r0
492 bicl3 #-65536,(r6),-28(fp)
493 bicl3 #-65536,r2,-32(fp)
494 mull3 r0,-28(fp),-20(fp)
495 mull2 r3,-28(fp)
496 mull3 r3,-32(fp),-24(fp)
497 mull2 r0,-32(fp)
498 addl3 -20(fp),-24(fp),r0
499 bicl3 #0,r0,-20(fp)
500 cmpl -20(fp),-24(fp)
501 bgequ noname.49
502 addl2 #65536,-32(fp)
503noname.49:
504 movzwl -18(fp),r0
505 bicl2 #-65536,r0
506 addl2 r0,-32(fp)
507 bicl3 #-65536,-20(fp),r0
508 ashl #16,r0,-24(fp)
509 addl3 -24(fp),-28(fp),r0
510 bicl3 #0,r0,-28(fp)
511 cmpl -28(fp),-24(fp)
512 bgequ noname.50
513 incl -32(fp)
514noname.50:
515 movl -28(fp),r1
516 movl -32(fp),r2
517 addl2 r1,r8
518 bicl2 #0,r8
519 cmpl r8,r1
520 bgequ noname.51
521 incl r2
522noname.51:
523 addl2 r2,r10
524 bicl2 #0,r10
525 cmpl r10,r2
526 bgequ noname.52
527 incl r9
528noname.52:
529
530 movzwl 6(r6),r2
531 bicl3 #-65536,(r7),r3
532 movzwl 2(r7),r0
533 bicl2 #-65536,r0
534 bicl3 #-65536,4(r6),-44(fp)
535 bicl3 #-65536,r2,-48(fp)
536 mull3 r0,-44(fp),-36(fp)
537 mull2 r3,-44(fp)
538 mull3 r3,-48(fp),-40(fp)
539 mull2 r0,-48(fp)
540 addl3 -36(fp),-40(fp),r0
541 bicl3 #0,r0,-36(fp)
542 cmpl -36(fp),-40(fp)
543 bgequ noname.53
544 addl2 #65536,-48(fp)
545noname.53:
546 movzwl -34(fp),r0
547 bicl2 #-65536,r0
548 addl2 r0,-48(fp)
549 bicl3 #-65536,-36(fp),r0
550 ashl #16,r0,-40(fp)
551 addl3 -40(fp),-44(fp),r0
552 bicl3 #0,r0,-44(fp)
553 cmpl -44(fp),-40(fp)
554 bgequ noname.54
555 incl -48(fp)
556noname.54:
557 movl -44(fp),r1
558 movl -48(fp),r2
559 addl2 r1,r8
560 bicl2 #0,r8
561 cmpl r8,r1
562 bgequ noname.55
563 incl r2
564noname.55:
565 addl2 r2,r10
566 bicl2 #0,r10
567 cmpl r10,r2
568 bgequ noname.56
569 incl r9
570noname.56:
571
572 movl r8,4(r11)
573
574 clrl r8
575
576 movzwl 10(r6),r2
577 bicl3 #-65536,(r7),r3
578 movzwl 2(r7),r0
579 bicl2 #-65536,r0
580 bicl3 #-65536,8(r6),-60(fp)
581 bicl3 #-65536,r2,-64(fp)
582 mull3 r0,-60(fp),-52(fp)
583 mull2 r3,-60(fp)
584 mull3 r3,-64(fp),-56(fp)
585 mull2 r0,-64(fp)
586 addl3 -52(fp),-56(fp),r0
587 bicl3 #0,r0,-52(fp)
588 cmpl -52(fp),-56(fp)
589 bgequ noname.57
590 addl2 #65536,-64(fp)
591noname.57:
592 movzwl -50(fp),r0
593 bicl2 #-65536,r0
594 addl2 r0,-64(fp)
595 bicl3 #-65536,-52(fp),r0
596 ashl #16,r0,-56(fp)
597 addl3 -56(fp),-60(fp),r0
598 bicl3 #0,r0,-60(fp)
599 cmpl -60(fp),-56(fp)
600 bgequ noname.58
601 incl -64(fp)
602noname.58:
603 movl -60(fp),r1
604 movl -64(fp),r2
605 addl2 r1,r10
606 bicl2 #0,r10
607 cmpl r10,r1
608 bgequ noname.59
609 incl r2
610noname.59:
611 addl2 r2,r9
612 bicl2 #0,r9
613 cmpl r9,r2
614 bgequ noname.60
615 incl r8
616noname.60:
617
618 movzwl 6(r6),r2
619 bicl3 #-65536,4(r7),r3
620 movzwl 6(r7),r0
621 bicl2 #-65536,r0
622 bicl3 #-65536,4(r6),-76(fp)
623 bicl3 #-65536,r2,-80(fp)
624 mull3 r0,-76(fp),-68(fp)
625 mull2 r3,-76(fp)
626 mull3 r3,-80(fp),-72(fp)
627 mull2 r0,-80(fp)
628 addl3 -68(fp),-72(fp),r0
629 bicl3 #0,r0,-68(fp)
630 cmpl -68(fp),-72(fp)
631 bgequ noname.61
632 addl2 #65536,-80(fp)
633noname.61:
634 movzwl -66(fp),r0
635 bicl2 #-65536,r0
636 addl2 r0,-80(fp)
637 bicl3 #-65536,-68(fp),r0
638 ashl #16,r0,-72(fp)
639 addl3 -72(fp),-76(fp),r0
640 bicl3 #0,r0,-76(fp)
641 cmpl -76(fp),-72(fp)
642 bgequ noname.62
643 incl -80(fp)
644noname.62:
645 movl -76(fp),r1
646 movl -80(fp),r2
647 addl2 r1,r10
648 bicl2 #0,r10
649 cmpl r10,r1
650 bgequ noname.63
651 incl r2
652noname.63:
653 addl2 r2,r9
654 bicl2 #0,r9
655 cmpl r9,r2
656 bgequ noname.64
657 incl r8
658noname.64:
659
660 movzwl 2(r6),r2
661 bicl3 #-65536,8(r7),r3
662 movzwl 10(r7),r0
663 bicl2 #-65536,r0
664 bicl3 #-65536,(r6),-92(fp)
665 bicl3 #-65536,r2,-96(fp)
666 mull3 r0,-92(fp),-84(fp)
667 mull2 r3,-92(fp)
668 mull3 r3,-96(fp),-88(fp)
669 mull2 r0,-96(fp)
670 addl3 -84(fp),-88(fp),r0
671 bicl3 #0,r0,-84(fp)
672 cmpl -84(fp),-88(fp)
673 bgequ noname.65
674 addl2 #65536,-96(fp)
675noname.65:
676 movzwl -82(fp),r0
677 bicl2 #-65536,r0
678 addl2 r0,-96(fp)
679 bicl3 #-65536,-84(fp),r0
680 ashl #16,r0,-88(fp)
681 addl3 -88(fp),-92(fp),r0
682 bicl3 #0,r0,-92(fp)
683 cmpl -92(fp),-88(fp)
684 bgequ noname.66
685 incl -96(fp)
686noname.66:
687 movl -92(fp),r1
688 movl -96(fp),r2
689 addl2 r1,r10
690 bicl2 #0,r10
691 cmpl r10,r1
692 bgequ noname.67
693 incl r2
694noname.67:
695 addl2 r2,r9
696 bicl2 #0,r9
697 cmpl r9,r2
698 bgequ noname.68
699 incl r8
700noname.68:
701
702 movl r10,8(r11)
703
704 clrl r10
705
706 movzwl 2(r6),r2
707 bicl3 #-65536,12(r7),r3
708 movzwl 14(r7),r0
709 bicl2 #-65536,r0
710 bicl3 #-65536,(r6),-108(fp)
711 bicl3 #-65536,r2,-112(fp)
712 mull3 r0,-108(fp),-100(fp)
713 mull2 r3,-108(fp)
714 mull3 r3,-112(fp),-104(fp)
715 mull2 r0,-112(fp)
716 addl3 -100(fp),-104(fp),r0
717 bicl3 #0,r0,-100(fp)
718 cmpl -100(fp),-104(fp)
719 bgequ noname.69
720 addl2 #65536,-112(fp)
721noname.69:
722 movzwl -98(fp),r0
723 bicl2 #-65536,r0
724 addl2 r0,-112(fp)
725 bicl3 #-65536,-100(fp),r0
726 ashl #16,r0,-104(fp)
727 addl3 -104(fp),-108(fp),r0
728 bicl3 #0,r0,-108(fp)
729 cmpl -108(fp),-104(fp)
730 bgequ noname.70
731 incl -112(fp)
732noname.70:
733 movl -108(fp),r1
734 movl -112(fp),r2
735 addl2 r1,r9
736 bicl2 #0,r9
737 cmpl r9,r1
738 bgequ noname.71
739 incl r2
740noname.71:
741 addl2 r2,r8
742 bicl2 #0,r8
743 cmpl r8,r2
744 bgequ noname.72
745 incl r10
746noname.72:
747
748 movzwl 6(r6),r2
749 bicl3 #-65536,8(r7),r3
750 movzwl 10(r7),r0
751 bicl2 #-65536,r0
752 bicl3 #-65536,4(r6),-124(fp)
753 bicl3 #-65536,r2,-128(fp)
754 mull3 r0,-124(fp),-116(fp)
755 mull2 r3,-124(fp)
756 mull3 r3,-128(fp),-120(fp)
757 mull2 r0,-128(fp)
758 addl3 -116(fp),-120(fp),r0
759 bicl3 #0,r0,-116(fp)
760 cmpl -116(fp),-120(fp)
761 bgequ noname.73
762 addl2 #65536,-128(fp)
763noname.73:
764 movzwl -114(fp),r0
765 bicl2 #-65536,r0
766 addl2 r0,-128(fp)
767 bicl3 #-65536,-116(fp),r0
768 ashl #16,r0,-120(fp)
769 addl3 -120(fp),-124(fp),r0
770 bicl3 #0,r0,-124(fp)
771 cmpl -124(fp),-120(fp)
772 bgequ noname.74
773 incl -128(fp)
774noname.74:
775 movl -124(fp),r1
776 movl -128(fp),r2
777 addl2 r1,r9
778 bicl2 #0,r9
779 cmpl r9,r1
780 bgequ noname.75
781 incl r2
782noname.75:
783 addl2 r2,r8
784 bicl2 #0,r8
785 cmpl r8,r2
786 bgequ noname.76
787 incl r10
788noname.76:
789
790 movzwl 10(r6),r2
791 bicl3 #-65536,4(r7),r3
792 movzwl 6(r7),r0
793 bicl2 #-65536,r0
794 bicl3 #-65536,8(r6),-140(fp)
795 bicl3 #-65536,r2,-144(fp)
796 mull3 r0,-140(fp),-132(fp)
797 mull2 r3,-140(fp)
798 mull3 r3,-144(fp),-136(fp)
799 mull2 r0,-144(fp)
800 addl3 -132(fp),-136(fp),r0
801 bicl3 #0,r0,-132(fp)
802 cmpl -132(fp),-136(fp)
803 bgequ noname.77
804 addl2 #65536,-144(fp)
805noname.77:
806 movzwl -130(fp),r0
807 bicl2 #-65536,r0
808 addl2 r0,-144(fp)
809 bicl3 #-65536,-132(fp),r0
810 ashl #16,r0,-136(fp)
811 addl3 -136(fp),-140(fp),r0
812 bicl3 #0,r0,-140(fp)
813 cmpl -140(fp),-136(fp)
814 bgequ noname.78
815 incl -144(fp)
816noname.78:
817 movl -140(fp),r1
818 movl -144(fp),r2
819 addl2 r1,r9
820 bicl2 #0,r9
821 cmpl r9,r1
822 bgequ noname.79
823 incl r2
824noname.79:
825 addl2 r2,r8
826 bicl2 #0,r8
827 cmpl r8,r2
828 bgequ noname.80
829 incl r10
830noname.80:
831
832 movzwl 14(r6),r2
833 bicl3 #-65536,(r7),r3
834 movzwl 2(r7),r0
835 bicl2 #-65536,r0
836 bicl3 #-65536,12(r6),-156(fp)
837 bicl3 #-65536,r2,-160(fp)
838 mull3 r0,-156(fp),-148(fp)
839 mull2 r3,-156(fp)
840 mull3 r3,-160(fp),-152(fp)
841 mull2 r0,-160(fp)
842 addl3 -148(fp),-152(fp),r0
843 bicl3 #0,r0,-148(fp)
844 cmpl -148(fp),-152(fp)
845 bgequ noname.81
846 addl2 #65536,-160(fp)
847noname.81:
848 movzwl -146(fp),r0
849 bicl2 #-65536,r0
850 addl2 r0,-160(fp)
851 bicl3 #-65536,-148(fp),r0
852 ashl #16,r0,-152(fp)
853 addl3 -152(fp),-156(fp),r0
854 bicl3 #0,r0,-156(fp)
855 cmpl -156(fp),-152(fp)
856 bgequ noname.82
857 incl -160(fp)
858noname.82:
859 movl -156(fp),r1
860 movl -160(fp),r2
861 addl2 r1,r9
862 bicl2 #0,r9
863 cmpl r9,r1
864 bgequ noname.83
865 incl r2
866noname.83:
867 addl2 r2,r8
868 bicl2 #0,r8
869 cmpl r8,r2
870 bgequ noname.84
871 incl r10
872noname.84:
873
874 movl r9,12(r11)
875
876 clrl r9
877
878 movzwl 18(r6),r2
879 bicl3 #-65536,(r7),r3
880 movzwl 2(r7),r0
881 bicl2 #-65536,r0
882 bicl3 #-65536,16(r6),-172(fp)
883 bicl3 #-65536,r2,-176(fp)
884 mull3 r0,-172(fp),-164(fp)
885 mull2 r3,-172(fp)
886 mull3 r3,-176(fp),-168(fp)
887 mull2 r0,-176(fp)
888 addl3 -164(fp),-168(fp),r0
889 bicl3 #0,r0,-164(fp)
890 cmpl -164(fp),-168(fp)
891 bgequ noname.85
892 addl2 #65536,-176(fp)
893noname.85:
894 movzwl -162(fp),r0
895 bicl2 #-65536,r0
896 addl2 r0,-176(fp)
897 bicl3 #-65536,-164(fp),r0
898 ashl #16,r0,-168(fp)
899 addl3 -168(fp),-172(fp),r0
900 bicl3 #0,r0,-172(fp)
901 cmpl -172(fp),-168(fp)
902 bgequ noname.86
903 incl -176(fp)
904noname.86:
905 movl -172(fp),r1
906 movl -176(fp),r2
907 addl2 r1,r8
908 bicl2 #0,r8
909 cmpl r8,r1
910 bgequ noname.87
911 incl r2
912noname.87:
913 addl2 r2,r10
914 bicl2 #0,r10
915 cmpl r10,r2
916 bgequ noname.88
917 incl r9
918noname.88:
919
920 movzwl 14(r6),r2
921 bicl3 #-65536,4(r7),r3
922 movzwl 6(r7),r0
923 bicl2 #-65536,r0
924 bicl3 #-65536,12(r6),-188(fp)
925 bicl3 #-65536,r2,-192(fp)
926 mull3 r0,-188(fp),-180(fp)
927 mull2 r3,-188(fp)
928 mull3 r3,-192(fp),-184(fp)
929 mull2 r0,-192(fp)
930 addl3 -180(fp),-184(fp),r0
931 bicl3 #0,r0,-180(fp)
932 cmpl -180(fp),-184(fp)
933 bgequ noname.89
934 addl2 #65536,-192(fp)
935noname.89:
936 movzwl -178(fp),r0
937 bicl2 #-65536,r0
938 addl2 r0,-192(fp)
939 bicl3 #-65536,-180(fp),r0
940 ashl #16,r0,-184(fp)
941 addl3 -184(fp),-188(fp),r0
942 bicl3 #0,r0,-188(fp)
943 cmpl -188(fp),-184(fp)
944 bgequ noname.90
945 incl -192(fp)
946noname.90:
947 movl -188(fp),r1
948 movl -192(fp),r2
949 addl2 r1,r8
950 bicl2 #0,r8
951 cmpl r8,r1
952 bgequ noname.91
953 incl r2
954noname.91:
955 addl2 r2,r10
956 bicl2 #0,r10
957 cmpl r10,r2
958 bgequ noname.92
959 incl r9
960noname.92:
961
962 movzwl 10(r6),r2
963 bicl3 #-65536,8(r7),r3
964 movzwl 10(r7),r0
965 bicl2 #-65536,r0
966 bicl3 #-65536,8(r6),-204(fp)
967 bicl3 #-65536,r2,-208(fp)
968 mull3 r0,-204(fp),-196(fp)
969 mull2 r3,-204(fp)
970 mull3 r3,-208(fp),-200(fp)
971 mull2 r0,-208(fp)
972 addl3 -196(fp),-200(fp),r0
973 bicl3 #0,r0,-196(fp)
974 cmpl -196(fp),-200(fp)
975 bgequ noname.93
976 addl2 #65536,-208(fp)
977noname.93:
978 movzwl -194(fp),r0
979 bicl2 #-65536,r0
980 addl2 r0,-208(fp)
981 bicl3 #-65536,-196(fp),r0
982 ashl #16,r0,-200(fp)
983 addl3 -200(fp),-204(fp),r0
984 bicl3 #0,r0,-204(fp)
985 cmpl -204(fp),-200(fp)
986 bgequ noname.94
987 incl -208(fp)
988noname.94:
989 movl -204(fp),r1
990 movl -208(fp),r2
991 addl2 r1,r8
992 bicl2 #0,r8
993 cmpl r8,r1
994 bgequ noname.95
995 incl r2
996noname.95:
997 addl2 r2,r10
998 bicl2 #0,r10
999 cmpl r10,r2
1000 bgequ noname.96
1001 incl r9
1002noname.96:
1003
1004 movzwl 6(r6),r2
1005 bicl3 #-65536,12(r7),r3
1006 movzwl 14(r7),r0
1007 bicl2 #-65536,r0
1008 bicl3 #-65536,4(r6),-220(fp)
1009 bicl3 #-65536,r2,-224(fp)
1010 mull3 r0,-220(fp),-212(fp)
1011 mull2 r3,-220(fp)
1012 mull3 r3,-224(fp),-216(fp)
1013 mull2 r0,-224(fp)
1014 addl3 -212(fp),-216(fp),r0
1015 bicl3 #0,r0,-212(fp)
1016 cmpl -212(fp),-216(fp)
1017 bgequ noname.97
1018 addl2 #65536,-224(fp)
1019noname.97:
1020 movzwl -210(fp),r0
1021 bicl2 #-65536,r0
1022 addl2 r0,-224(fp)
1023 bicl3 #-65536,-212(fp),r0
1024 ashl #16,r0,-216(fp)
1025 addl3 -216(fp),-220(fp),r0
1026 bicl3 #0,r0,-220(fp)
1027 cmpl -220(fp),-216(fp)
1028 bgequ noname.98
1029 incl -224(fp)
1030noname.98:
1031 movl -220(fp),r1
1032 movl -224(fp),r2
1033 addl2 r1,r8
1034 bicl2 #0,r8
1035 cmpl r8,r1
1036 bgequ noname.99
1037 incl r2
1038noname.99:
1039 addl2 r2,r10
1040 bicl2 #0,r10
1041 cmpl r10,r2
1042 bgequ noname.100
1043 incl r9
1044noname.100:
1045
1046 movzwl 2(r6),r2
1047 bicl3 #-65536,16(r7),r3
1048 movzwl 18(r7),r0
1049 bicl2 #-65536,r0
1050 bicl3 #-65536,(r6),-236(fp)
1051 bicl3 #-65536,r2,-240(fp)
1052 mull3 r0,-236(fp),-228(fp)
1053 mull2 r3,-236(fp)
1054 mull3 r3,-240(fp),-232(fp)
1055 mull2 r0,-240(fp)
1056 addl3 -228(fp),-232(fp),r0
1057 bicl3 #0,r0,-228(fp)
1058 cmpl -228(fp),-232(fp)
1059 bgequ noname.101
1060 addl2 #65536,-240(fp)
1061noname.101:
1062 movzwl -226(fp),r0
1063 bicl2 #-65536,r0
1064 addl2 r0,-240(fp)
1065 bicl3 #-65536,-228(fp),r0
1066 ashl #16,r0,-232(fp)
1067 addl3 -232(fp),-236(fp),r0
1068 bicl3 #0,r0,-236(fp)
1069 cmpl -236(fp),-232(fp)
1070 bgequ noname.102
1071 incl -240(fp)
1072noname.102:
1073 movl -236(fp),r1
1074 movl -240(fp),r2
1075 addl2 r1,r8
1076 bicl2 #0,r8
1077 cmpl r8,r1
1078 bgequ noname.103
1079 incl r2
1080noname.103:
1081 addl2 r2,r10
1082 bicl2 #0,r10
1083 cmpl r10,r2
1084 bgequ noname.104
1085 incl r9
1086noname.104:
1087
1088 movl r8,16(r11)
1089
1090 clrl r8
1091
1092 movzwl 2(r6),r2
1093 bicl3 #-65536,20(r7),r3
1094 movzwl 22(r7),r0
1095 bicl2 #-65536,r0
1096 bicl3 #-65536,(r6),-252(fp)
1097 bicl3 #-65536,r2,-256(fp)
1098 mull3 r0,-252(fp),-244(fp)
1099 mull2 r3,-252(fp)
1100 mull3 r3,-256(fp),-248(fp)
1101 mull2 r0,-256(fp)
1102 addl3 -244(fp),-248(fp),r0
1103 bicl3 #0,r0,-244(fp)
1104 cmpl -244(fp),-248(fp)
1105 bgequ noname.105
1106 addl2 #65536,-256(fp)
1107noname.105:
1108 movzwl -242(fp),r0
1109 bicl2 #-65536,r0
1110 addl2 r0,-256(fp)
1111 bicl3 #-65536,-244(fp),r0
1112 ashl #16,r0,-248(fp)
1113 addl3 -248(fp),-252(fp),r0
1114 bicl3 #0,r0,-252(fp)
1115 cmpl -252(fp),-248(fp)
1116 bgequ noname.106
1117 incl -256(fp)
1118noname.106:
1119 movl -252(fp),r1
1120 movl -256(fp),r2
1121 addl2 r1,r10
1122 bicl2 #0,r10
1123 cmpl r10,r1
1124 bgequ noname.107
1125 incl r2
1126noname.107:
1127 addl2 r2,r9
1128 bicl2 #0,r9
1129 cmpl r9,r2
1130 bgequ noname.108
1131 incl r8
1132noname.108:
1133
1134 movzwl 6(r6),r2
1135 bicl3 #-65536,16(r7),r3
1136 movzwl 18(r7),r0
1137 bicl2 #-65536,r0
1138 bicl3 #-65536,4(r6),-268(fp)
1139 bicl3 #-65536,r2,-272(fp)
1140 mull3 r0,-268(fp),-260(fp)
1141 mull2 r3,-268(fp)
1142 mull3 r3,-272(fp),-264(fp)
1143 mull2 r0,-272(fp)
1144 addl3 -260(fp),-264(fp),r0
1145 bicl3 #0,r0,-260(fp)
1146 cmpl -260(fp),-264(fp)
1147 bgequ noname.109
1148 addl2 #65536,-272(fp)
1149noname.109:
1150 movzwl -258(fp),r0
1151 bicl2 #-65536,r0
1152 addl2 r0,-272(fp)
1153 bicl3 #-65536,-260(fp),r0
1154 ashl #16,r0,-264(fp)
1155 addl3 -264(fp),-268(fp),r0
1156 bicl3 #0,r0,-268(fp)
1157 cmpl -268(fp),-264(fp)
1158 bgequ noname.110
1159 incl -272(fp)
1160noname.110:
1161 movl -268(fp),r1
1162 movl -272(fp),r2
1163 addl2 r1,r10
1164 bicl2 #0,r10
1165 cmpl r10,r1
1166 bgequ noname.111
1167 incl r2
1168noname.111:
1169 addl2 r2,r9
1170 bicl2 #0,r9
1171 cmpl r9,r2
1172 bgequ noname.112
1173 incl r8
1174noname.112:
1175
1176 movzwl 10(r6),r2
1177 bicl3 #-65536,12(r7),r3
1178 movzwl 14(r7),r0
1179 bicl2 #-65536,r0
1180 bicl3 #-65536,8(r6),-284(fp)
1181 bicl3 #-65536,r2,-288(fp)
1182 mull3 r0,-284(fp),-276(fp)
1183 mull2 r3,-284(fp)
1184 mull3 r3,-288(fp),-280(fp)
1185 mull2 r0,-288(fp)
1186 addl3 -276(fp),-280(fp),r0
1187 bicl3 #0,r0,-276(fp)
1188 cmpl -276(fp),-280(fp)
1189 bgequ noname.113
1190 addl2 #65536,-288(fp)
1191noname.113:
1192 movzwl -274(fp),r0
1193 bicl2 #-65536,r0
1194 addl2 r0,-288(fp)
1195 bicl3 #-65536,-276(fp),r0
1196 ashl #16,r0,-280(fp)
1197 addl3 -280(fp),-284(fp),r0
1198 bicl3 #0,r0,-284(fp)
1199 cmpl -284(fp),-280(fp)
1200 bgequ noname.114
1201 incl -288(fp)
1202noname.114:
1203 movl -284(fp),r1
1204 movl -288(fp),r2
1205 addl2 r1,r10
1206 bicl2 #0,r10
1207 cmpl r10,r1
1208 bgequ noname.115
1209 incl r2
1210noname.115:
1211 addl2 r2,r9
1212 bicl2 #0,r9
1213 cmpl r9,r2
1214 bgequ noname.116
1215 incl r8
1216noname.116:
1217
1218 movzwl 14(r6),r2
1219 bicl3 #-65536,8(r7),r3
1220 movzwl 10(r7),r0
1221 bicl2 #-65536,r0
1222 bicl3 #-65536,12(r6),-300(fp)
1223 bicl3 #-65536,r2,-304(fp)
1224 mull3 r0,-300(fp),-292(fp)
1225 mull2 r3,-300(fp)
1226 mull3 r3,-304(fp),-296(fp)
1227 mull2 r0,-304(fp)
1228 addl3 -292(fp),-296(fp),r0
1229 bicl3 #0,r0,-292(fp)
1230 cmpl -292(fp),-296(fp)
1231 bgequ noname.117
1232 addl2 #65536,-304(fp)
1233noname.117:
1234 movzwl -290(fp),r0
1235 bicl2 #-65536,r0
1236 addl2 r0,-304(fp)
1237 bicl3 #-65536,-292(fp),r0
1238 ashl #16,r0,-296(fp)
1239 addl3 -296(fp),-300(fp),r0
1240 bicl3 #0,r0,-300(fp)
1241 cmpl -300(fp),-296(fp)
1242 bgequ noname.118
1243 incl -304(fp)
1244noname.118:
1245 movl -300(fp),r1
1246 movl -304(fp),r2
1247 addl2 r1,r10
1248 bicl2 #0,r10
1249 cmpl r10,r1
1250 bgequ noname.119
1251 incl r2
1252noname.119:
1253 addl2 r2,r9
1254 bicl2 #0,r9
1255 cmpl r9,r2
1256 bgequ noname.120
1257 incl r8
1258noname.120:
1259
1260 movzwl 18(r6),r2
1261 bicl3 #-65536,4(r7),r3
1262 movzwl 6(r7),r0
1263 bicl2 #-65536,r0
1264 bicl3 #-65536,16(r6),-316(fp)
1265 bicl3 #-65536,r2,-320(fp)
1266 mull3 r0,-316(fp),-308(fp)
1267 mull2 r3,-316(fp)
1268 mull3 r3,-320(fp),-312(fp)
1269 mull2 r0,-320(fp)
1270 addl3 -308(fp),-312(fp),r0
1271 bicl3 #0,r0,-308(fp)
1272 cmpl -308(fp),-312(fp)
1273 bgequ noname.121
1274 addl2 #65536,-320(fp)
1275noname.121:
1276 movzwl -306(fp),r0
1277 bicl2 #-65536,r0
1278 addl2 r0,-320(fp)
1279 bicl3 #-65536,-308(fp),r0
1280 ashl #16,r0,-312(fp)
1281 addl3 -312(fp),-316(fp),r0
1282 bicl3 #0,r0,-316(fp)
1283 cmpl -316(fp),-312(fp)
1284 bgequ noname.122
1285 incl -320(fp)
1286noname.122:
1287 movl -316(fp),r1
1288 movl -320(fp),r2
1289 addl2 r1,r10
1290 bicl2 #0,r10
1291 cmpl r10,r1
1292 bgequ noname.123
1293 incl r2
1294
1295noname.123:
1296 addl2 r2,r9
1297 bicl2 #0,r9
1298 cmpl r9,r2
1299 bgequ noname.124
1300 incl r8
1301noname.124:
1302
1303 movzwl 22(r6),r2
1304 bicl3 #-65536,(r7),r3
1305 movzwl 2(r7),r0
1306 bicl2 #-65536,r0
1307 bicl3 #-65536,20(r6),-332(fp)
1308 bicl3 #-65536,r2,-336(fp)
1309 mull3 r0,-332(fp),-324(fp)
1310 mull2 r3,-332(fp)
1311 mull3 r3,-336(fp),-328(fp)
1312 mull2 r0,-336(fp)
1313 addl3 -324(fp),-328(fp),r0
1314 bicl3 #0,r0,-324(fp)
1315 cmpl -324(fp),-328(fp)
1316 bgequ noname.125
1317 addl2 #65536,-336(fp)
1318noname.125:
1319 movzwl -322(fp),r0
1320 bicl2 #-65536,r0
1321 addl2 r0,-336(fp)
1322 bicl3 #-65536,-324(fp),r0
1323 ashl #16,r0,-328(fp)
1324 addl3 -328(fp),-332(fp),r0
1325 bicl3 #0,r0,-332(fp)
1326 cmpl -332(fp),-328(fp)
1327 bgequ noname.126
1328 incl -336(fp)
1329noname.126:
1330 movl -332(fp),r1
1331 movl -336(fp),r2
1332 addl2 r1,r10
1333 bicl2 #0,r10
1334 cmpl r10,r1
1335 bgequ noname.127
1336 incl r2
1337noname.127:
1338 addl2 r2,r9
1339 bicl2 #0,r9
1340 cmpl r9,r2
1341 bgequ noname.128
1342 incl r8
1343noname.128:
1344
1345 movl r10,20(r11)
1346
1347 clrl r10
1348
1349 movzwl 26(r6),r2
1350 bicl3 #-65536,(r7),r3
1351 movzwl 2(r7),r0
1352 bicl2 #-65536,r0
1353 bicl3 #-65536,24(r6),-348(fp)
1354 bicl3 #-65536,r2,-352(fp)
1355 mull3 r0,-348(fp),-340(fp)
1356 mull2 r3,-348(fp)
1357 mull3 r3,-352(fp),-344(fp)
1358 mull2 r0,-352(fp)
1359 addl3 -340(fp),-344(fp),r0
1360 bicl3 #0,r0,-340(fp)
1361 cmpl -340(fp),-344(fp)
1362 bgequ noname.129
1363 addl2 #65536,-352(fp)
1364noname.129:
1365 movzwl -338(fp),r0
1366 bicl2 #-65536,r0
1367 addl2 r0,-352(fp)
1368 bicl3 #-65536,-340(fp),r0
1369 ashl #16,r0,-344(fp)
1370 addl3 -344(fp),-348(fp),r0
1371 bicl3 #0,r0,-348(fp)
1372 cmpl -348(fp),-344(fp)
1373 bgequ noname.130
1374 incl -352(fp)
1375noname.130:
1376 movl -348(fp),r1
1377 movl -352(fp),r2
1378 addl2 r1,r9
1379 bicl2 #0,r9
1380 cmpl r9,r1
1381 bgequ noname.131
1382 incl r2
1383noname.131:
1384 addl2 r2,r8
1385 bicl2 #0,r8
1386 cmpl r8,r2
1387 bgequ noname.132
1388 incl r10
1389noname.132:
1390
1391 movzwl 22(r6),r2
1392 bicl3 #-65536,4(r7),r3
1393 movzwl 6(r7),r0
1394 bicl2 #-65536,r0
1395 bicl3 #-65536,20(r6),-364(fp)
1396 bicl3 #-65536,r2,-368(fp)
1397 mull3 r0,-364(fp),-356(fp)
1398 mull2 r3,-364(fp)
1399 mull3 r3,-368(fp),-360(fp)
1400 mull2 r0,-368(fp)
1401 addl3 -356(fp),-360(fp),r0
1402 bicl3 #0,r0,-356(fp)
1403 cmpl -356(fp),-360(fp)
1404 bgequ noname.133
1405 addl2 #65536,-368(fp)
1406noname.133:
1407 movzwl -354(fp),r0
1408 bicl2 #-65536,r0
1409 addl2 r0,-368(fp)
1410 bicl3 #-65536,-356(fp),r0
1411 ashl #16,r0,-360(fp)
1412 addl3 -360(fp),-364(fp),r0
1413 bicl3 #0,r0,-364(fp)
1414 cmpl -364(fp),-360(fp)
1415 bgequ noname.134
1416 incl -368(fp)
1417noname.134:
1418 movl -364(fp),r1
1419 movl -368(fp),r2
1420 addl2 r1,r9
1421 bicl2 #0,r9
1422 cmpl r9,r1
1423 bgequ noname.135
1424 incl r2
1425noname.135:
1426 addl2 r2,r8
1427 bicl2 #0,r8
1428 cmpl r8,r2
1429 bgequ noname.136
1430 incl r10
1431noname.136:
1432
1433 movzwl 18(r6),r2
1434 bicl3 #-65536,8(r7),r3
1435 movzwl 10(r7),r0
1436 bicl2 #-65536,r0
1437 bicl3 #-65536,16(r6),-380(fp)
1438 bicl3 #-65536,r2,-384(fp)
1439 mull3 r0,-380(fp),-372(fp)
1440 mull2 r3,-380(fp)
1441 mull3 r3,-384(fp),-376(fp)
1442 mull2 r0,-384(fp)
1443 addl3 -372(fp),-376(fp),r0
1444 bicl3 #0,r0,-372(fp)
1445 cmpl -372(fp),-376(fp)
1446 bgequ noname.137
1447 addl2 #65536,-384(fp)
1448noname.137:
1449 movzwl -370(fp),r0
1450 bicl2 #-65536,r0
1451 addl2 r0,-384(fp)
1452 bicl3 #-65536,-372(fp),r0
1453 ashl #16,r0,-376(fp)
1454 addl3 -376(fp),-380(fp),r0
1455 bicl3 #0,r0,-380(fp)
1456 cmpl -380(fp),-376(fp)
1457 bgequ noname.138
1458 incl -384(fp)
1459noname.138:
1460 movl -380(fp),r1
1461 movl -384(fp),r2
1462 addl2 r1,r9
1463 bicl2 #0,r9
1464 cmpl r9,r1
1465 bgequ noname.139
1466 incl r2
1467noname.139:
1468 addl2 r2,r8
1469 bicl2 #0,r8
1470 cmpl r8,r2
1471 bgequ noname.140
1472 incl r10
1473noname.140:
1474
1475 movzwl 14(r6),r2
1476 bicl3 #-65536,12(r7),r3
1477 movzwl 14(r7),r0
1478 bicl2 #-65536,r0
1479 bicl3 #-65536,12(r6),-396(fp)
1480 bicl3 #-65536,r2,-400(fp)
1481 mull3 r0,-396(fp),-388(fp)
1482 mull2 r3,-396(fp)
1483 mull3 r3,-400(fp),-392(fp)
1484 mull2 r0,-400(fp)
1485 addl3 -388(fp),-392(fp),r0
1486 bicl3 #0,r0,-388(fp)
1487 cmpl -388(fp),-392(fp)
1488 bgequ noname.141
1489 addl2 #65536,-400(fp)
1490noname.141:
1491 movzwl -386(fp),r0
1492 bicl2 #-65536,r0
1493 addl2 r0,-400(fp)
1494 bicl3 #-65536,-388(fp),r0
1495 ashl #16,r0,-392(fp)
1496 addl3 -392(fp),-396(fp),r0
1497 bicl3 #0,r0,-396(fp)
1498 cmpl -396(fp),-392(fp)
1499 bgequ noname.142
1500 incl -400(fp)
1501noname.142:
1502 movl -396(fp),r1
1503 movl -400(fp),r2
1504 addl2 r1,r9
1505 bicl2 #0,r9
1506 cmpl r9,r1
1507 bgequ noname.143
1508 incl r2
1509noname.143:
1510 addl2 r2,r8
1511 bicl2 #0,r8
1512 cmpl r8,r2
1513 bgequ noname.144
1514 incl r10
1515noname.144:
1516
1517 movzwl 10(r6),r2
1518 bicl3 #-65536,16(r7),r3
1519 movzwl 18(r7),r0
1520 bicl2 #-65536,r0
1521 bicl3 #-65536,8(r6),-412(fp)
1522 bicl3 #-65536,r2,-416(fp)
1523 mull3 r0,-412(fp),-404(fp)
1524 mull2 r3,-412(fp)
1525 mull3 r3,-416(fp),-408(fp)
1526 mull2 r0,-416(fp)
1527 addl3 -404(fp),-408(fp),r0
1528 bicl3 #0,r0,-404(fp)
1529 cmpl -404(fp),-408(fp)
1530 bgequ noname.145
1531 addl2 #65536,-416(fp)
1532noname.145:
1533 movzwl -402(fp),r0
1534 bicl2 #-65536,r0
1535 addl2 r0,-416(fp)
1536 bicl3 #-65536,-404(fp),r0
1537 ashl #16,r0,-408(fp)
1538 addl3 -408(fp),-412(fp),r0
1539 bicl3 #0,r0,-412(fp)
1540 cmpl -412(fp),-408(fp)
1541 bgequ noname.146
1542 incl -416(fp)
1543noname.146:
1544 movl -412(fp),r1
1545 movl -416(fp),r2
1546 addl2 r1,r9
1547 bicl2 #0,r9
1548 cmpl r9,r1
1549 bgequ noname.147
1550 incl r2
1551noname.147:
1552 addl2 r2,r8
1553 bicl2 #0,r8
1554 cmpl r8,r2
1555 bgequ noname.148
1556 incl r10
1557noname.148:
1558
1559 movzwl 6(r6),r2
1560 bicl3 #-65536,20(r7),r3
1561 movzwl 22(r7),r0
1562 bicl2 #-65536,r0
1563 bicl3 #-65536,4(r6),-428(fp)
1564 bicl3 #-65536,r2,-432(fp)
1565 mull3 r0,-428(fp),-420(fp)
1566 mull2 r3,-428(fp)
1567 mull3 r3,-432(fp),-424(fp)
1568 mull2 r0,-432(fp)
1569 addl3 -420(fp),-424(fp),r0
1570 bicl3 #0,r0,-420(fp)
1571 cmpl -420(fp),-424(fp)
1572 bgequ noname.149
1573 addl2 #65536,-432(fp)
1574noname.149:
1575 movzwl -418(fp),r0
1576 bicl2 #-65536,r0
1577 addl2 r0,-432(fp)
1578 bicl3 #-65536,-420(fp),r0
1579 ashl #16,r0,-424(fp)
1580 addl3 -424(fp),-428(fp),r0
1581 bicl3 #0,r0,-428(fp)
1582 cmpl -428(fp),-424(fp)
1583 bgequ noname.150
1584 incl -432(fp)
1585noname.150:
1586 movl -428(fp),r1
1587 movl -432(fp),r2
1588 addl2 r1,r9
1589 bicl2 #0,r9
1590 cmpl r9,r1
1591 bgequ noname.151
1592 incl r2
1593noname.151:
1594 addl2 r2,r8
1595 bicl2 #0,r8
1596 cmpl r8,r2
1597 bgequ noname.152
1598 incl r10
1599noname.152:
1600
1601 movzwl 2(r6),r2
1602 bicl3 #-65536,24(r7),r3
1603 movzwl 26(r7),r0
1604 bicl2 #-65536,r0
1605 bicl3 #-65536,(r6),-444(fp)
1606 bicl3 #-65536,r2,-448(fp)
1607 mull3 r0,-444(fp),-436(fp)
1608 mull2 r3,-444(fp)
1609 mull3 r3,-448(fp),-440(fp)
1610 mull2 r0,-448(fp)
1611 addl3 -436(fp),-440(fp),r0
1612 bicl3 #0,r0,-436(fp)
1613 cmpl -436(fp),-440(fp)
1614 bgequ noname.153
1615 addl2 #65536,-448(fp)
1616noname.153:
1617 movzwl -434(fp),r0
1618 bicl2 #-65536,r0
1619 addl2 r0,-448(fp)
1620 bicl3 #-65536,-436(fp),r0
1621 ashl #16,r0,-440(fp)
1622 addl3 -440(fp),-444(fp),r0
1623 bicl3 #0,r0,-444(fp)
1624 cmpl -444(fp),-440(fp)
1625 bgequ noname.154
1626 incl -448(fp)
1627noname.154:
1628 movl -444(fp),r1
1629 movl -448(fp),r2
1630 addl2 r1,r9
1631 bicl2 #0,r9
1632 cmpl r9,r1
1633 bgequ noname.155
1634 incl r2
1635noname.155:
1636 addl2 r2,r8
1637 bicl2 #0,r8
1638 cmpl r8,r2
1639 bgequ noname.156
1640 incl r10
1641noname.156:
1642
1643 movl r9,24(r11)
1644
1645 clrl r9
1646
1647 movzwl 2(r6),r2
1648 bicl3 #-65536,28(r7),r3
1649 movzwl 30(r7),r0
1650 bicl2 #-65536,r0
1651 bicl3 #-65536,(r6),-460(fp)
1652 bicl3 #-65536,r2,-464(fp)
1653 mull3 r0,-460(fp),-452(fp)
1654 mull2 r3,-460(fp)
1655 mull3 r3,-464(fp),-456(fp)
1656 mull2 r0,-464(fp)
1657 addl3 -452(fp),-456(fp),r0
1658 bicl3 #0,r0,-452(fp)
1659 cmpl -452(fp),-456(fp)
1660 bgequ noname.157
1661 addl2 #65536,-464(fp)
1662noname.157:
1663 movzwl -450(fp),r0
1664 bicl2 #-65536,r0
1665 addl2 r0,-464(fp)
1666 bicl3 #-65536,-452(fp),r0
1667 ashl #16,r0,-456(fp)
1668 addl3 -456(fp),-460(fp),r0
1669 bicl3 #0,r0,-460(fp)
1670 cmpl -460(fp),-456(fp)
1671 bgequ noname.158
1672 incl -464(fp)
1673noname.158:
1674 movl -460(fp),r1
1675 movl -464(fp),r2
1676 addl2 r1,r8
1677 bicl2 #0,r8
1678 cmpl r8,r1
1679 bgequ noname.159
1680 incl r2
1681noname.159:
1682 addl2 r2,r10
1683 bicl2 #0,r10
1684 cmpl r10,r2
1685 bgequ noname.160
1686 incl r9
1687noname.160:
1688
1689 movzwl 6(r6),r2
1690 bicl3 #-65536,24(r7),r3
1691 movzwl 26(r7),r0
1692 bicl2 #-65536,r0
1693 bicl3 #-65536,4(r6),-476(fp)
1694 bicl3 #-65536,r2,-480(fp)
1695 mull3 r0,-476(fp),-468(fp)
1696 mull2 r3,-476(fp)
1697 mull3 r3,-480(fp),-472(fp)
1698 mull2 r0,-480(fp)
1699 addl3 -468(fp),-472(fp),r0
1700 bicl3 #0,r0,-468(fp)
1701 cmpl -468(fp),-472(fp)
1702 bgequ noname.161
1703 addl2 #65536,-480(fp)
1704noname.161:
1705 movzwl -466(fp),r0
1706 bicl2 #-65536,r0
1707 addl2 r0,-480(fp)
1708 bicl3 #-65536,-468(fp),r0
1709 ashl #16,r0,-472(fp)
1710 addl3 -472(fp),-476(fp),r0
1711 bicl3 #0,r0,-476(fp)
1712 cmpl -476(fp),-472(fp)
1713 bgequ noname.162
1714 incl -480(fp)
1715noname.162:
1716 movl -476(fp),r1
1717 movl -480(fp),r2
1718 addl2 r1,r8
1719 bicl2 #0,r8
1720 cmpl r8,r1
1721 bgequ noname.163
1722 incl r2
1723noname.163:
1724 addl2 r2,r10
1725 bicl2 #0,r10
1726 cmpl r10,r2
1727 bgequ noname.164
1728 incl r9
1729noname.164:
1730
1731 movzwl 10(r6),r2
1732 bicl3 #-65536,20(r7),r3
1733 movzwl 22(r7),r0
1734 bicl2 #-65536,r0
1735 bicl3 #-65536,8(r6),-492(fp)
1736 bicl3 #-65536,r2,-496(fp)
1737 mull3 r0,-492(fp),-484(fp)
1738 mull2 r3,-492(fp)
1739 mull3 r3,-496(fp),-488(fp)
1740 mull2 r0,-496(fp)
1741 addl3 -484(fp),-488(fp),r0
1742 bicl3 #0,r0,-484(fp)
1743 cmpl -484(fp),-488(fp)
1744 bgequ noname.165
1745 addl2 #65536,-496(fp)
1746noname.165:
1747 movzwl -482(fp),r0
1748 bicl2 #-65536,r0
1749 addl2 r0,-496(fp)
1750 bicl3 #-65536,-484(fp),r0
1751 ashl #16,r0,-488(fp)
1752 addl3 -488(fp),-492(fp),r0
1753 bicl3 #0,r0,-492(fp)
1754 cmpl -492(fp),-488(fp)
1755 bgequ noname.166
1756 incl -496(fp)
1757noname.166:
1758 movl -492(fp),r1
1759 movl -496(fp),r2
1760 addl2 r1,r8
1761 bicl2 #0,r8
1762 cmpl r8,r1
1763 bgequ noname.167
1764 incl r2
1765noname.167:
1766 addl2 r2,r10
1767 bicl2 #0,r10
1768 cmpl r10,r2
1769 bgequ noname.168
1770 incl r9
1771noname.168:
1772
1773 movzwl 14(r6),r2
1774 bicl3 #-65536,16(r7),r3
1775 movzwl 18(r7),r0
1776 bicl2 #-65536,r0
1777 bicl3 #-65536,12(r6),-508(fp)
1778 bicl3 #-65536,r2,-512(fp)
1779 mull3 r0,-508(fp),-500(fp)
1780 mull2 r3,-508(fp)
1781 mull3 r3,-512(fp),-504(fp)
1782 mull2 r0,-512(fp)
1783 addl3 -500(fp),-504(fp),r0
1784 bicl3 #0,r0,-500(fp)
1785 cmpl -500(fp),-504(fp)
1786 bgequ noname.169
1787 addl2 #65536,-512(fp)
1788noname.169:
1789 movzwl -498(fp),r0
1790 bicl2 #-65536,r0
1791 addl2 r0,-512(fp)
1792 bicl3 #-65536,-500(fp),r0
1793 ashl #16,r0,-504(fp)
1794 addl3 -504(fp),-508(fp),r0
1795 bicl3 #0,r0,-508(fp)
1796 cmpl -508(fp),-504(fp)
1797 bgequ noname.170
1798 incl -512(fp)
1799noname.170:
1800 movl -508(fp),r1
1801 movl -512(fp),r2
1802 addl2 r1,r8
1803 bicl2 #0,r8
1804 cmpl r8,r1
1805 bgequ noname.171
1806 incl r2
1807noname.171:
1808 addl2 r2,r10
1809 bicl2 #0,r10
1810 cmpl r10,r2
1811 bgequ noname.172
1812 incl r9
1813noname.172:
1814
1815 movzwl 18(r6),r2
1816 bicl3 #-65536,12(r7),r3
1817 movzwl 14(r7),r0
1818 bicl2 #-65536,r0
1819 bicl3 #-65536,16(r6),-524(fp)
1820 bicl3 #-65536,r2,-528(fp)
1821 mull3 r0,-524(fp),-516(fp)
1822 mull2 r3,-524(fp)
1823 mull3 r3,-528(fp),-520(fp)
1824 mull2 r0,-528(fp)
1825 addl3 -516(fp),-520(fp),r0
1826 bicl3 #0,r0,-516(fp)
1827 cmpl -516(fp),-520(fp)
1828 bgequ noname.173
1829 addl2 #65536,-528(fp)
1830noname.173:
1831 movzwl -514(fp),r0
1832 bicl2 #-65536,r0
1833 addl2 r0,-528(fp)
1834 bicl3 #-65536,-516(fp),r0
1835 ashl #16,r0,-520(fp)
1836 addl3 -520(fp),-524(fp),r0
1837 bicl3 #0,r0,-524(fp)
1838 cmpl -524(fp),-520(fp)
1839 bgequ noname.174
1840 incl -528(fp)
1841noname.174:
1842 movl -524(fp),r1
1843 movl -528(fp),r2
1844 addl2 r1,r8
1845 bicl2 #0,r8
1846 cmpl r8,r1
1847 bgequ noname.175
1848 incl r2
1849noname.175:
1850 addl2 r2,r10
1851 bicl2 #0,r10
1852 cmpl r10,r2
1853 bgequ noname.176
1854 incl r9
1855noname.176:
1856
1857 movzwl 22(r6),r2
1858 bicl3 #-65536,8(r7),r3
1859 movzwl 10(r7),r0
1860 bicl2 #-65536,r0
1861 bicl3 #-65536,20(r6),-540(fp)
1862 bicl3 #-65536,r2,-544(fp)
1863 mull3 r0,-540(fp),-532(fp)
1864 mull2 r3,-540(fp)
1865 mull3 r3,-544(fp),-536(fp)
1866 mull2 r0,-544(fp)
1867 addl3 -532(fp),-536(fp),r0
1868 bicl3 #0,r0,-532(fp)
1869 cmpl -532(fp),-536(fp)
1870 bgequ noname.177
1871 addl2 #65536,-544(fp)
1872noname.177:
1873 movzwl -530(fp),r0
1874 bicl2 #-65536,r0
1875 addl2 r0,-544(fp)
1876 bicl3 #-65536,-532(fp),r0
1877 ashl #16,r0,-536(fp)
1878 addl3 -536(fp),-540(fp),r0
1879 bicl3 #0,r0,-540(fp)
1880 cmpl -540(fp),-536(fp)
1881 bgequ noname.178
1882 incl -544(fp)
1883noname.178:
1884 movl -540(fp),r1
1885 movl -544(fp),r2
1886 addl2 r1,r8
1887 bicl2 #0,r8
1888 cmpl r8,r1
1889 bgequ noname.179
1890 incl r2
1891noname.179:
1892 addl2 r2,r10
1893 bicl2 #0,r10
1894 cmpl r10,r2
1895 bgequ noname.180
1896 incl r9
1897noname.180:
1898
1899 movzwl 26(r6),r2
1900 bicl3 #-65536,4(r7),r3
1901 movzwl 6(r7),r0
1902 bicl2 #-65536,r0
1903 bicl3 #-65536,24(r6),-556(fp)
1904 bicl3 #-65536,r2,-560(fp)
1905 mull3 r0,-556(fp),-548(fp)
1906 mull2 r3,-556(fp)
1907 mull3 r3,-560(fp),-552(fp)
1908 mull2 r0,-560(fp)
1909 addl3 -548(fp),-552(fp),r0
1910 bicl3 #0,r0,-548(fp)
1911 cmpl -548(fp),-552(fp)
1912 bgequ noname.181
1913 addl2 #65536,-560(fp)
1914noname.181:
1915 movzwl -546(fp),r0
1916 bicl2 #-65536,r0
1917 addl2 r0,-560(fp)
1918 bicl3 #-65536,-548(fp),r0
1919 ashl #16,r0,-552(fp)
1920 addl3 -552(fp),-556(fp),r0
1921 bicl3 #0,r0,-556(fp)
1922 cmpl -556(fp),-552(fp)
1923 bgequ noname.182
1924 incl -560(fp)
1925noname.182:
1926 movl -556(fp),r1
1927 movl -560(fp),r2
1928 addl2 r1,r8
1929 bicl2 #0,r8
1930 cmpl r8,r1
1931 bgequ noname.183
1932 incl r2
1933noname.183:
1934 addl2 r2,r10
1935 bicl2 #0,r10
1936 cmpl r10,r2
1937 bgequ noname.184
1938 incl r9
1939noname.184:
1940
1941 movzwl 30(r6),r2
1942 bicl3 #-65536,(r7),r3
1943 movzwl 2(r7),r0
1944 bicl2 #-65536,r0
1945 bicl3 #-65536,28(r6),-572(fp)
1946 bicl3 #-65536,r2,-576(fp)
1947 mull3 r0,-572(fp),-564(fp)
1948 mull2 r3,-572(fp)
1949 mull3 r3,-576(fp),-568(fp)
1950 mull2 r0,-576(fp)
1951 addl3 -564(fp),-568(fp),r0
1952 bicl3 #0,r0,-564(fp)
1953 cmpl -564(fp),-568(fp)
1954 bgequ noname.185
1955 addl2 #65536,-576(fp)
1956noname.185:
1957 movzwl -562(fp),r0
1958 bicl2 #-65536,r0
1959 addl2 r0,-576(fp)
1960 bicl3 #-65536,-564(fp),r0
1961 ashl #16,r0,-568(fp)
1962 addl3 -568(fp),-572(fp),r0
1963 bicl3 #0,r0,-572(fp)
1964 cmpl -572(fp),-568(fp)
1965 bgequ noname.186
1966 incl -576(fp)
1967noname.186:
1968 movl -572(fp),r1
1969 movl -576(fp),r2
1970 addl2 r1,r8
1971 bicl2 #0,r8
1972 cmpl r8,r1
1973 bgequ noname.187
1974 incl r2
1975noname.187:
1976 addl2 r2,r10
1977 bicl2 #0,r10
1978 cmpl r10,r2
1979 bgequ noname.188
1980 incl r9
1981noname.188:
1982
1983 movl r8,28(r11)
1984
1985 clrl r8
1986
1987 movzwl 30(r6),r2
1988 bicl3 #-65536,4(r7),r3
1989 movzwl 6(r7),r0
1990 bicl2 #-65536,r0
1991 bicl3 #-65536,28(r6),-588(fp)
1992 bicl3 #-65536,r2,-592(fp)
1993 mull3 r0,-588(fp),-580(fp)
1994 mull2 r3,-588(fp)
1995 mull3 r3,-592(fp),-584(fp)
1996 mull2 r0,-592(fp)
1997 addl3 -580(fp),-584(fp),r0
1998 bicl3 #0,r0,-580(fp)
1999 cmpl -580(fp),-584(fp)
2000 bgequ noname.189
2001 addl2 #65536,-592(fp)
2002noname.189:
2003 movzwl -578(fp),r0
2004 bicl2 #-65536,r0
2005 addl2 r0,-592(fp)
2006 bicl3 #-65536,-580(fp),r0
2007 ashl #16,r0,-584(fp)
2008 addl3 -584(fp),-588(fp),r0
2009 bicl3 #0,r0,-588(fp)
2010 cmpl -588(fp),-584(fp)
2011 bgequ noname.190
2012 incl -592(fp)
2013noname.190:
2014 movl -588(fp),r1
2015 movl -592(fp),r2
2016 addl2 r1,r10
2017 bicl2 #0,r10
2018 cmpl r10,r1
2019 bgequ noname.191
2020 incl r2
2021noname.191:
2022 addl2 r2,r9
2023 bicl2 #0,r9
2024 cmpl r9,r2
2025 bgequ noname.192
2026 incl r8
2027noname.192:
2028
2029 movzwl 26(r6),r2
2030 bicl3 #-65536,8(r7),r3
2031 movzwl 10(r7),r0
2032 bicl2 #-65536,r0
2033 bicl3 #-65536,24(r6),-604(fp)
2034 bicl3 #-65536,r2,-608(fp)
2035 mull3 r0,-604(fp),-596(fp)
2036 mull2 r3,-604(fp)
2037 mull3 r3,-608(fp),-600(fp)
2038 mull2 r0,-608(fp)
2039 addl3 -596(fp),-600(fp),r0
2040 bicl3 #0,r0,-596(fp)
2041 cmpl -596(fp),-600(fp)
2042 bgequ noname.193
2043 addl2 #65536,-608(fp)
2044noname.193:
2045 movzwl -594(fp),r0
2046 bicl2 #-65536,r0
2047 addl2 r0,-608(fp)
2048 bicl3 #-65536,-596(fp),r0
2049 ashl #16,r0,-600(fp)
2050 addl3 -600(fp),-604(fp),r0
2051 bicl3 #0,r0,-604(fp)
2052 cmpl -604(fp),-600(fp)
2053 bgequ noname.194
2054 incl -608(fp)
2055noname.194:
2056 movl -604(fp),r1
2057 movl -608(fp),r2
2058 addl2 r1,r10
2059 bicl2 #0,r10
2060 cmpl r10,r1
2061 bgequ noname.195
2062 incl r2
2063noname.195:
2064 addl2 r2,r9
2065 bicl2 #0,r9
2066 cmpl r9,r2
2067 bgequ noname.196
2068 incl r8
2069noname.196:
2070
2071 movzwl 22(r6),r2
2072 bicl3 #-65536,12(r7),r3
2073 movzwl 14(r7),r0
2074 bicl2 #-65536,r0
2075 bicl3 #-65536,20(r6),-620(fp)
2076 bicl3 #-65536,r2,-624(fp)
2077 mull3 r0,-620(fp),-612(fp)
2078 mull2 r3,-620(fp)
2079 mull3 r3,-624(fp),-616(fp)
2080 mull2 r0,-624(fp)
2081 addl3 -612(fp),-616(fp),r0
2082 bicl3 #0,r0,-612(fp)
2083 cmpl -612(fp),-616(fp)
2084 bgequ noname.197
2085 addl2 #65536,-624(fp)
2086noname.197:
2087 movzwl -610(fp),r0
2088 bicl2 #-65536,r0
2089 addl2 r0,-624(fp)
2090 bicl3 #-65536,-612(fp),r0
2091 ashl #16,r0,-616(fp)
2092 addl3 -616(fp),-620(fp),r0
2093 bicl3 #0,r0,-620(fp)
2094 cmpl -620(fp),-616(fp)
2095 bgequ noname.198
2096 incl -624(fp)
2097noname.198:
2098 movl -620(fp),r1
2099 movl -624(fp),r2
2100 addl2 r1,r10
2101 bicl2 #0,r10
2102 cmpl r10,r1
2103 bgequ noname.199
2104 incl r2
2105noname.199:
2106 addl2 r2,r9
2107 bicl2 #0,r9
2108 cmpl r9,r2
2109 bgequ noname.200
2110 incl r8
2111noname.200:
2112
2113 movzwl 18(r6),r2
2114 bicl3 #-65536,16(r7),r3
2115 movzwl 18(r7),r0
2116 bicl2 #-65536,r0
2117 bicl3 #-65536,16(r6),-636(fp)
2118 bicl3 #-65536,r2,-640(fp)
2119 mull3 r0,-636(fp),-628(fp)
2120 mull2 r3,-636(fp)
2121 mull3 r3,-640(fp),-632(fp)
2122 mull2 r0,-640(fp)
2123 addl3 -628(fp),-632(fp),r0
2124 bicl3 #0,r0,-628(fp)
2125 cmpl -628(fp),-632(fp)
2126 bgequ noname.201
2127 addl2 #65536,-640(fp)
2128noname.201:
2129 movzwl -626(fp),r0
2130 bicl2 #-65536,r0
2131 addl2 r0,-640(fp)
2132 bicl3 #-65536,-628(fp),r0
2133 ashl #16,r0,-632(fp)
2134 addl3 -632(fp),-636(fp),r0
2135 bicl3 #0,r0,-636(fp)
2136 cmpl -636(fp),-632(fp)
2137 bgequ noname.202
2138 incl -640(fp)
2139noname.202:
2140 movl -636(fp),r1
2141 movl -640(fp),r2
2142 addl2 r1,r10
2143 bicl2 #0,r10
2144 cmpl r10,r1
2145 bgequ noname.203
2146 incl r2
2147noname.203:
2148 addl2 r2,r9
2149 bicl2 #0,r9
2150 cmpl r9,r2
2151 bgequ noname.204
2152 incl r8
2153noname.204:
2154
2155 movzwl 14(r6),r2
2156 bicl3 #-65536,20(r7),r3
2157 movzwl 22(r7),r0
2158 bicl2 #-65536,r0
2159 bicl3 #-65536,12(r6),-652(fp)
2160 bicl3 #-65536,r2,-656(fp)
2161 mull3 r0,-652(fp),-644(fp)
2162 mull2 r3,-652(fp)
2163 mull3 r3,-656(fp),-648(fp)
2164 mull2 r0,-656(fp)
2165 addl3 -644(fp),-648(fp),r0
2166 bicl3 #0,r0,-644(fp)
2167 cmpl -644(fp),-648(fp)
2168 bgequ noname.205
2169 addl2 #65536,-656(fp)
2170noname.205:
2171 movzwl -642(fp),r0
2172 bicl2 #-65536,r0
2173 addl2 r0,-656(fp)
2174 bicl3 #-65536,-644(fp),r0
2175 ashl #16,r0,-648(fp)
2176 addl3 -648(fp),-652(fp),r0
2177 bicl3 #0,r0,-652(fp)
2178 cmpl -652(fp),-648(fp)
2179 bgequ noname.206
2180 incl -656(fp)
2181noname.206:
2182 movl -652(fp),r1
2183 movl -656(fp),r2
2184 addl2 r1,r10
2185 bicl2 #0,r10
2186 cmpl r10,r1
2187 bgequ noname.207
2188 incl r2
2189noname.207:
2190 addl2 r2,r9
2191 bicl2 #0,r9
2192 cmpl r9,r2
2193 bgequ noname.208
2194 incl r8
2195noname.208:
2196
2197 movzwl 10(r6),r2
2198 bicl3 #-65536,24(r7),r3
2199 movzwl 26(r7),r0
2200 bicl2 #-65536,r0
2201 bicl3 #-65536,8(r6),-668(fp)
2202 bicl3 #-65536,r2,-672(fp)
2203 mull3 r0,-668(fp),-660(fp)
2204 mull2 r3,-668(fp)
2205 mull3 r3,-672(fp),-664(fp)
2206 mull2 r0,-672(fp)
2207 addl3 -660(fp),-664(fp),r0
2208 bicl3 #0,r0,-660(fp)
2209 cmpl -660(fp),-664(fp)
2210 bgequ noname.209
2211 addl2 #65536,-672(fp)
2212noname.209:
2213 movzwl -658(fp),r0
2214 bicl2 #-65536,r0
2215 addl2 r0,-672(fp)
2216 bicl3 #-65536,-660(fp),r0
2217 ashl #16,r0,-664(fp)
2218 addl3 -664(fp),-668(fp),r0
2219 bicl3 #0,r0,-668(fp)
2220 cmpl -668(fp),-664(fp)
2221 bgequ noname.210
2222 incl -672(fp)
2223noname.210:
2224 movl -668(fp),r1
2225 movl -672(fp),r2
2226 addl2 r1,r10
2227 bicl2 #0,r10
2228 cmpl r10,r1
2229 bgequ noname.211
2230 incl r2
2231noname.211:
2232 addl2 r2,r9
2233 bicl2 #0,r9
2234 cmpl r9,r2
2235 bgequ noname.212
2236 incl r8
2237noname.212:
2238
2239 movzwl 6(r6),r2
2240 bicl3 #-65536,28(r7),r3
2241 movzwl 30(r7),r0
2242 bicl2 #-65536,r0
2243 bicl3 #-65536,4(r6),-684(fp)
2244 bicl3 #-65536,r2,-688(fp)
2245 mull3 r0,-684(fp),-676(fp)
2246 mull2 r3,-684(fp)
2247 mull3 r3,-688(fp),-680(fp)
2248 mull2 r0,-688(fp)
2249 addl3 -676(fp),-680(fp),r0
2250 bicl3 #0,r0,-676(fp)
2251 cmpl -676(fp),-680(fp)
2252 bgequ noname.213
2253 addl2 #65536,-688(fp)
2254noname.213:
2255 movzwl -674(fp),r0
2256 bicl2 #-65536,r0
2257 addl2 r0,-688(fp)
2258 bicl3 #-65536,-676(fp),r0
2259 ashl #16,r0,-680(fp)
2260 addl3 -680(fp),-684(fp),r0
2261 bicl3 #0,r0,-684(fp)
2262 cmpl -684(fp),-680(fp)
2263 bgequ noname.214
2264 incl -688(fp)
2265noname.214:
2266 movl -684(fp),r1
2267 movl -688(fp),r2
2268 addl2 r1,r10
2269 bicl2 #0,r10
2270 cmpl r10,r1
2271 bgequ noname.215
2272 incl r2
2273noname.215:
2274 addl2 r2,r9
2275 bicl2 #0,r9
2276 cmpl r9,r2
2277 bgequ noname.216
2278 incl r8
2279noname.216:
2280
2281 movl r10,32(r11)
2282
2283 clrl r10
2284
2285 movzwl 10(r6),r2
2286 bicl3 #-65536,28(r7),r3
2287 movzwl 30(r7),r0
2288 bicl2 #-65536,r0
2289 bicl3 #-65536,8(r6),-700(fp)
2290 bicl3 #-65536,r2,-704(fp)
2291 mull3 r0,-700(fp),-692(fp)
2292 mull2 r3,-700(fp)
2293 mull3 r3,-704(fp),-696(fp)
2294 mull2 r0,-704(fp)
2295 addl3 -692(fp),-696(fp),r0
2296 bicl3 #0,r0,-692(fp)
2297 cmpl -692(fp),-696(fp)
2298 bgequ noname.217
2299 addl2 #65536,-704(fp)
2300noname.217:
2301 movzwl -690(fp),r0
2302 bicl2 #-65536,r0
2303 addl2 r0,-704(fp)
2304 bicl3 #-65536,-692(fp),r0
2305 ashl #16,r0,-696(fp)
2306 addl3 -696(fp),-700(fp),r0
2307 bicl3 #0,r0,-700(fp)
2308 cmpl -700(fp),-696(fp)
2309 bgequ noname.218
2310 incl -704(fp)
2311noname.218:
2312 movl -700(fp),r1
2313 movl -704(fp),r2
2314 addl2 r1,r9
2315 bicl2 #0,r9
2316 cmpl r9,r1
2317 bgequ noname.219
2318 incl r2
2319noname.219:
2320 addl2 r2,r8
2321 bicl2 #0,r8
2322 cmpl r8,r2
2323 bgequ noname.220
2324 incl r10
2325noname.220:
2326
2327 movzwl 14(r6),r2
2328 bicl3 #-65536,24(r7),r3
2329 movzwl 26(r7),r0
2330 bicl2 #-65536,r0
2331 bicl3 #-65536,12(r6),-716(fp)
2332 bicl3 #-65536,r2,-720(fp)
2333 mull3 r0,-716(fp),-708(fp)
2334 mull2 r3,-716(fp)
2335 mull3 r3,-720(fp),-712(fp)
2336 mull2 r0,-720(fp)
2337 addl3 -708(fp),-712(fp),r0
2338 bicl3 #0,r0,-708(fp)
2339 cmpl -708(fp),-712(fp)
2340 bgequ noname.221
2341 addl2 #65536,-720(fp)
2342noname.221:
2343 movzwl -706(fp),r0
2344 bicl2 #-65536,r0
2345 addl2 r0,-720(fp)
2346 bicl3 #-65536,-708(fp),r0
2347 ashl #16,r0,-712(fp)
2348 addl3 -712(fp),-716(fp),r0
2349 bicl3 #0,r0,-716(fp)
2350 cmpl -716(fp),-712(fp)
2351 bgequ noname.222
2352 incl -720(fp)
2353noname.222:
2354 movl -716(fp),r1
2355 movl -720(fp),r2
2356 addl2 r1,r9
2357 bicl2 #0,r9
2358 cmpl r9,r1
2359 bgequ noname.223
2360 incl r2
2361noname.223:
2362 addl2 r2,r8
2363 bicl2 #0,r8
2364 cmpl r8,r2
2365 bgequ noname.224
2366 incl r10
2367noname.224:
2368
2369 movzwl 18(r6),r2
2370 bicl3 #-65536,20(r7),r3
2371 movzwl 22(r7),r0
2372 bicl2 #-65536,r0
2373 bicl3 #-65536,16(r6),-732(fp)
2374 bicl3 #-65536,r2,-736(fp)
2375 mull3 r0,-732(fp),-724(fp)
2376 mull2 r3,-732(fp)
2377 mull3 r3,-736(fp),-728(fp)
2378 mull2 r0,-736(fp)
2379 addl3 -724(fp),-728(fp),r0
2380 bicl3 #0,r0,-724(fp)
2381 cmpl -724(fp),-728(fp)
2382 bgequ noname.225
2383 addl2 #65536,-736(fp)
2384noname.225:
2385 movzwl -722(fp),r0
2386 bicl2 #-65536,r0
2387 addl2 r0,-736(fp)
2388 bicl3 #-65536,-724(fp),r0
2389 ashl #16,r0,-728(fp)
2390 addl3 -728(fp),-732(fp),r0
2391 bicl3 #0,r0,-732(fp)
2392 cmpl -732(fp),-728(fp)
2393 bgequ noname.226
2394 incl -736(fp)
2395noname.226:
2396 movl -732(fp),r1
2397 movl -736(fp),r2
2398 addl2 r1,r9
2399 bicl2 #0,r9
2400 cmpl r9,r1
2401 bgequ noname.227
2402 incl r2
2403noname.227:
2404 addl2 r2,r8
2405 bicl2 #0,r8
2406 cmpl r8,r2
2407 bgequ noname.228
2408 incl r10
2409noname.228:
2410
2411 movzwl 22(r6),r2
2412 bicl3 #-65536,16(r7),r3
2413 movzwl 18(r7),r0
2414 bicl2 #-65536,r0
2415 bicl3 #-65536,20(r6),-748(fp)
2416 bicl3 #-65536,r2,-752(fp)
2417 mull3 r0,-748(fp),-740(fp)
2418 mull2 r3,-748(fp)
2419 mull3 r3,-752(fp),-744(fp)
2420 mull2 r0,-752(fp)
2421 addl3 -740(fp),-744(fp),r0
2422 bicl3 #0,r0,-740(fp)
2423 cmpl -740(fp),-744(fp)
2424 bgequ noname.229
2425 addl2 #65536,-752(fp)
2426noname.229:
2427 movzwl -738(fp),r0
2428 bicl2 #-65536,r0
2429 addl2 r0,-752(fp)
2430 bicl3 #-65536,-740(fp),r0
2431 ashl #16,r0,-744(fp)
2432 addl3 -744(fp),-748(fp),r0
2433 bicl3 #0,r0,-748(fp)
2434 cmpl -748(fp),-744(fp)
2435 bgequ noname.230
2436 incl -752(fp)
2437noname.230:
2438 movl -748(fp),r1
2439 movl -752(fp),r2
2440 addl2 r1,r9
2441 bicl2 #0,r9
2442 cmpl r9,r1
2443 bgequ noname.231
2444 incl r2
2445noname.231:
2446 addl2 r2,r8
2447 bicl2 #0,r8
2448 cmpl r8,r2
2449 bgequ noname.232
2450 incl r10
2451noname.232:
2452
2453 movzwl 26(r6),r2
2454 bicl3 #-65536,12(r7),r3
2455 movzwl 14(r7),r0
2456 bicl2 #-65536,r0
2457 bicl3 #-65536,24(r6),-764(fp)
2458 bicl3 #-65536,r2,-768(fp)
2459 mull3 r0,-764(fp),-756(fp)
2460 mull2 r3,-764(fp)
2461 mull3 r3,-768(fp),-760(fp)
2462 mull2 r0,-768(fp)
2463 addl3 -756(fp),-760(fp),r0
2464 bicl3 #0,r0,-756(fp)
2465 cmpl -756(fp),-760(fp)
2466 bgequ noname.233
2467 addl2 #65536,-768(fp)
2468noname.233:
2469 movzwl -754(fp),r0
2470 bicl2 #-65536,r0
2471 addl2 r0,-768(fp)
2472 bicl3 #-65536,-756(fp),r0
2473 ashl #16,r0,-760(fp)
2474 addl3 -760(fp),-764(fp),r0
2475 bicl3 #0,r0,-764(fp)
2476 cmpl -764(fp),-760(fp)
2477 bgequ noname.234
2478 incl -768(fp)
2479noname.234:
2480 movl -764(fp),r1
2481 movl -768(fp),r2
2482 addl2 r1,r9
2483 bicl2 #0,r9
2484 cmpl r9,r1
2485 bgequ noname.235
2486 incl r2
2487noname.235:
2488 addl2 r2,r8
2489 bicl2 #0,r8
2490 cmpl r8,r2
2491 bgequ noname.236
2492 incl r10
2493noname.236:
2494
2495 bicl3 #-65536,28(r6),r3
2496 movzwl 30(r6),r1
2497 bicl2 #-65536,r1
2498 bicl3 #-65536,8(r7),r2
2499 movzwl 10(r7),r0
2500 bicl2 #-65536,r0
2501 movl r3,r5
2502 movl r1,r4
2503 mull3 r0,r5,-772(fp)
2504 mull2 r2,r5
2505 mull3 r2,r4,-776(fp)
2506 mull2 r0,r4
2507 addl3 -772(fp),-776(fp),r0
2508 bicl3 #0,r0,-772(fp)
2509 cmpl -772(fp),-776(fp)
2510 bgequ noname.237
2511 addl2 #65536,r4
2512noname.237:
2513 movzwl -770(fp),r0
2514 bicl2 #-65536,r0
2515 addl2 r0,r4
2516 bicl3 #-65536,-772(fp),r0
2517 ashl #16,r0,-776(fp)
2518 addl2 -776(fp),r5
2519 bicl2 #0,r5
2520 cmpl r5,-776(fp)
2521 bgequ noname.238
2522 incl r4
2523noname.238:
2524 movl r5,r1
2525 movl r4,r2
2526 addl2 r1,r9
2527 bicl2 #0,r9
2528 cmpl r9,r1
2529 bgequ noname.239
2530 incl r2
2531noname.239:
2532 addl2 r2,r8
2533 bicl2 #0,r8
2534 cmpl r8,r2
2535 bgequ noname.240
2536 incl r10
2537noname.240:
2538
2539 movl r9,36(r11)
2540
2541 clrl r9
2542
2543 bicl3 #-65536,28(r6),r3
2544 movzwl 30(r6),r1
2545 bicl2 #-65536,r1
2546 bicl3 #-65536,12(r7),r2
2547 movzwl 14(r7),r0
2548 bicl2 #-65536,r0
2549 movl r3,r5
2550 movl r1,r4
2551 mull3 r0,r5,-780(fp)
2552 mull2 r2,r5
2553 mull3 r2,r4,-784(fp)
2554 mull2 r0,r4
2555 addl3 -780(fp),-784(fp),r0
2556 bicl3 #0,r0,-780(fp)
2557 cmpl -780(fp),-784(fp)
2558 bgequ noname.241
2559 addl2 #65536,r4
2560noname.241:
2561 movzwl -778(fp),r0
2562 bicl2 #-65536,r0
2563 addl2 r0,r4
2564 bicl3 #-65536,-780(fp),r0
2565 ashl #16,r0,-784(fp)
2566 addl2 -784(fp),r5
2567 bicl2 #0,r5
2568 cmpl r5,-784(fp)
2569 bgequ noname.242
2570 incl r4
2571noname.242:
2572 movl r5,r1
2573 movl r4,r2
2574 addl2 r1,r8
2575 bicl2 #0,r8
2576 cmpl r8,r1
2577 bgequ noname.243
2578 incl r2
2579noname.243:
2580 addl2 r2,r10
2581 bicl2 #0,r10
2582 cmpl r10,r2
2583 bgequ noname.244
2584 incl r9
2585noname.244:
2586
2587 bicl3 #-65536,24(r6),r3
2588 movzwl 26(r6),r1
2589 bicl2 #-65536,r1
2590 bicl3 #-65536,16(r7),r2
2591 movzwl 18(r7),r0
2592 bicl2 #-65536,r0
2593 movl r3,r5
2594 movl r1,r4
2595 mull3 r0,r5,-788(fp)
2596 mull2 r2,r5
2597 mull3 r2,r4,-792(fp)
2598 mull2 r0,r4
2599 addl3 -788(fp),-792(fp),r0
2600 bicl3 #0,r0,-788(fp)
2601 cmpl -788(fp),-792(fp)
2602 bgequ noname.245
2603 addl2 #65536,r4
2604noname.245:
2605 movzwl -786(fp),r0
2606 bicl2 #-65536,r0
2607 addl2 r0,r4
2608 bicl3 #-65536,-788(fp),r0
2609 ashl #16,r0,-792(fp)
2610 addl2 -792(fp),r5
2611 bicl2 #0,r5
2612 cmpl r5,-792(fp)
2613 bgequ noname.246
2614 incl r4
2615noname.246:
2616 movl r5,r1
2617 movl r4,r2
2618 addl2 r1,r8
2619 bicl2 #0,r8
2620 cmpl r8,r1
2621 bgequ noname.247
2622 incl r2
2623noname.247:
2624 addl2 r2,r10
2625 bicl2 #0,r10
2626 cmpl r10,r2
2627 bgequ noname.248
2628 incl r9
2629noname.248:
2630
2631 bicl3 #-65536,20(r6),r3
2632 movzwl 22(r6),r1
2633 bicl2 #-65536,r1
2634 bicl3 #-65536,20(r7),r2
2635 movzwl 22(r7),r0
2636 bicl2 #-65536,r0
2637 movl r3,r5
2638 movl r1,r4
2639 mull3 r0,r5,-796(fp)
2640 mull2 r2,r5
2641 mull3 r2,r4,-800(fp)
2642 mull2 r0,r4
2643 addl3 -796(fp),-800(fp),r0
2644 bicl3 #0,r0,-796(fp)
2645 cmpl -796(fp),-800(fp)
2646 bgequ noname.249
2647 addl2 #65536,r4
2648noname.249:
2649 movzwl -794(fp),r0
2650 bicl2 #-65536,r0
2651 addl2 r0,r4
2652 bicl3 #-65536,-796(fp),r0
2653 ashl #16,r0,-800(fp)
2654 addl2 -800(fp),r5
2655 bicl2 #0,r5
2656 cmpl r5,-800(fp)
2657 bgequ noname.250
2658 incl r4
2659noname.250:
2660 movl r5,r1
2661 movl r4,r2
2662 addl2 r1,r8
2663 bicl2 #0,r8
2664 cmpl r8,r1
2665 bgequ noname.251
2666 incl r2
2667noname.251:
2668 addl2 r2,r10
2669 bicl2 #0,r10
2670 cmpl r10,r2
2671 bgequ noname.252
2672 incl r9
2673noname.252:
2674
2675 bicl3 #-65536,16(r6),r3
2676 movzwl 18(r6),r1
2677 bicl2 #-65536,r1
2678 bicl3 #-65536,24(r7),r2
2679 movzwl 26(r7),r0
2680 bicl2 #-65536,r0
2681 movl r3,r5
2682 movl r1,r4
2683 mull3 r0,r5,-804(fp)
2684 mull2 r2,r5
2685 mull3 r2,r4,-808(fp)
2686 mull2 r0,r4
2687 addl3 -804(fp),-808(fp),r0
2688 bicl3 #0,r0,-804(fp)
2689 cmpl -804(fp),-808(fp)
2690 bgequ noname.253
2691 addl2 #65536,r4
2692noname.253:
2693 movzwl -802(fp),r0
2694 bicl2 #-65536,r0
2695 addl2 r0,r4
2696 bicl3 #-65536,-804(fp),r0
2697 ashl #16,r0,-808(fp)
2698 addl2 -808(fp),r5
2699 bicl2 #0,r5
2700 cmpl r5,-808(fp)
2701 bgequ noname.254
2702 incl r4
2703noname.254:
2704 movl r5,r1
2705 movl r4,r2
2706 addl2 r1,r8
2707 bicl2 #0,r8
2708 cmpl r8,r1
2709 bgequ noname.255
2710 incl r2
2711noname.255:
2712 addl2 r2,r10
2713 bicl2 #0,r10
2714 cmpl r10,r2
2715 bgequ noname.256
2716 incl r9
2717noname.256:
2718
2719 bicl3 #-65536,12(r6),r3
2720 movzwl 14(r6),r1
2721 bicl2 #-65536,r1
2722 bicl3 #-65536,28(r7),r2
2723 movzwl 30(r7),r0
2724 bicl2 #-65536,r0
2725 movl r3,r5
2726 movl r1,r4
2727 mull3 r0,r5,-812(fp)
2728 mull2 r2,r5
2729 mull3 r2,r4,-816(fp)
2730 mull2 r0,r4
2731 addl3 -812(fp),-816(fp),r0
2732 bicl3 #0,r0,-812(fp)
2733 cmpl -812(fp),-816(fp)
2734 bgequ noname.257
2735 addl2 #65536,r4
2736noname.257:
2737 movzwl -810(fp),r0
2738 bicl2 #-65536,r0
2739 addl2 r0,r4
2740 bicl3 #-65536,-812(fp),r0
2741 ashl #16,r0,-816(fp)
2742 addl2 -816(fp),r5
2743 bicl2 #0,r5
2744 cmpl r5,-816(fp)
2745 bgequ noname.258
2746 incl r4
2747noname.258:
2748 movl r5,r1
2749 movl r4,r2
2750 addl2 r1,r8
2751 bicl2 #0,r8
2752 cmpl r8,r1
2753 bgequ noname.259
2754 incl r2
2755noname.259:
2756 addl2 r2,r10
2757 bicl2 #0,r10
2758 cmpl r10,r2
2759 bgequ noname.260
2760 incl r9
2761noname.260:
2762
2763 movl r8,40(r11)
2764
2765 clrl r8
2766
2767 bicl3 #-65536,16(r6),r3
2768 movzwl 18(r6),r2
2769 bicl3 #-65536,28(r7),r1
2770 movzwl 30(r7),r0
2771 bicl2 #-65536,r0
2772 movl r3,r4
2773 bicl3 #-65536,r2,-828(fp)
2774 mull3 r0,r4,-820(fp)
2775 mull2 r1,r4
2776 mull3 r1,-828(fp),-824(fp)
2777 mull2 r0,-828(fp)
2778 addl3 -820(fp),-824(fp),r0
2779 bicl3 #0,r0,-820(fp)
2780 cmpl -820(fp),-824(fp)
2781 bgequ noname.261
2782 addl2 #65536,-828(fp)
2783noname.261:
2784 movzwl -818(fp),r0
2785 bicl2 #-65536,r0
2786 addl2 r0,-828(fp)
2787 bicl3 #-65536,-820(fp),r0
2788 ashl #16,r0,-824(fp)
2789 addl2 -824(fp),r4
2790 bicl2 #0,r4
2791 cmpl r4,-824(fp)
2792 bgequ noname.262
2793 incl -828(fp)
2794noname.262:
2795 movl r4,r1
2796 movl -828(fp),r2
2797 addl2 r1,r10
2798 bicl2 #0,r10
2799 cmpl r10,r1
2800 bgequ noname.263
2801 incl r2
2802noname.263:
2803 addl2 r2,r9
2804 bicl2 #0,r9
2805 cmpl r9,r2
2806 bgequ noname.264
2807 incl r8
2808noname.264:
2809
2810 movzwl 22(r6),r2
2811 bicl3 #-65536,24(r7),r3
2812 movzwl 26(r7),r0
2813 bicl2 #-65536,r0
2814 bicl3 #-65536,20(r6),-840(fp)
2815 bicl3 #-65536,r2,-844(fp)
2816 mull3 r0,-840(fp),-832(fp)
2817 mull2 r3,-840(fp)
2818 mull3 r3,-844(fp),-836(fp)
2819 mull2 r0,-844(fp)
2820 addl3 -832(fp),-836(fp),r0
2821 bicl3 #0,r0,-832(fp)
2822 cmpl -832(fp),-836(fp)
2823 bgequ noname.265
2824 addl2 #65536,-844(fp)
2825noname.265:
2826 movzwl -830(fp),r0
2827 bicl2 #-65536,r0
2828 addl2 r0,-844(fp)
2829 bicl3 #-65536,-832(fp),r0
2830 ashl #16,r0,-836(fp)
2831 addl3 -836(fp),-840(fp),r0
2832 bicl3 #0,r0,-840(fp)
2833 cmpl -840(fp),-836(fp)
2834 bgequ noname.266
2835 incl -844(fp)
2836noname.266:
2837 movl -840(fp),r1
2838 movl -844(fp),r2
2839 addl2 r1,r10
2840 bicl2 #0,r10
2841 cmpl r10,r1
2842 bgequ noname.267
2843 incl r2
2844noname.267:
2845 addl2 r2,r9
2846 bicl2 #0,r9
2847 cmpl r9,r2
2848 bgequ noname.268
2849 incl r8
2850noname.268:
2851
2852 bicl3 #-65536,24(r6),r3
2853 movzwl 26(r6),r1
2854 bicl2 #-65536,r1
2855 bicl3 #-65536,20(r7),r2
2856 movzwl 22(r7),r0
2857 bicl2 #-65536,r0
2858 movl r3,r5
2859 movl r1,r4
2860 mull3 r0,r5,-848(fp)
2861 mull2 r2,r5
2862 mull3 r2,r4,-852(fp)
2863 mull2 r0,r4
2864 addl3 -848(fp),-852(fp),r0
2865 bicl3 #0,r0,-848(fp)
2866 cmpl -848(fp),-852(fp)
2867 bgequ noname.269
2868 addl2 #65536,r4
2869noname.269:
2870 movzwl -846(fp),r0
2871 bicl2 #-65536,r0
2872 addl2 r0,r4
2873 bicl3 #-65536,-848(fp),r0
2874 ashl #16,r0,-852(fp)
2875 addl2 -852(fp),r5
2876 bicl2 #0,r5
2877 cmpl r5,-852(fp)
2878 bgequ noname.270
2879 incl r4
2880noname.270:
2881 movl r5,r1
2882 movl r4,r2
2883 addl2 r1,r10
2884 bicl2 #0,r10
2885 cmpl r10,r1
2886 bgequ noname.271
2887 incl r2
2888noname.271:
2889 addl2 r2,r9
2890 bicl2 #0,r9
2891 cmpl r9,r2
2892 bgequ noname.272
2893 incl r8
2894noname.272:
2895
2896 bicl3 #-65536,28(r6),r3
2897 movzwl 30(r6),r1
2898 bicl2 #-65536,r1
2899 bicl3 #-65536,16(r7),r2
2900 movzwl 18(r7),r0
2901 bicl2 #-65536,r0
2902 movl r3,r5
2903 movl r1,r4
2904 mull3 r0,r5,-856(fp)
2905 mull2 r2,r5
2906 mull3 r2,r4,-860(fp)
2907 mull2 r0,r4
2908 addl3 -856(fp),-860(fp),r0
2909 bicl3 #0,r0,-856(fp)
2910 cmpl -856(fp),-860(fp)
2911 bgequ noname.273
2912 addl2 #65536,r4
2913noname.273:
2914 movzwl -854(fp),r0
2915 bicl2 #-65536,r0
2916 addl2 r0,r4
2917 bicl3 #-65536,-856(fp),r0
2918 ashl #16,r0,-860(fp)
2919 addl2 -860(fp),r5
2920 bicl2 #0,r5
2921 cmpl r5,-860(fp)
2922 bgequ noname.274
2923 incl r4
2924noname.274:
2925 movl r5,r1
2926 movl r4,r2
2927 addl2 r1,r10
2928 bicl2 #0,r10
2929 cmpl r10,r1
2930 bgequ noname.275
2931 incl r2
2932noname.275:
2933 addl2 r2,r9
2934 bicl2 #0,r9
2935 cmpl r9,r2
2936 bgequ noname.276
2937 incl r8
2938noname.276:
2939
2940 movl r10,44(r11)
2941
2942 clrl r10
2943
2944 bicl3 #-65536,28(r6),r3
2945 movzwl 30(r6),r1
2946 bicl2 #-65536,r1
2947 bicl3 #-65536,20(r7),r2
2948 movzwl 22(r7),r0
2949 bicl2 #-65536,r0
2950 movl r3,r5
2951 movl r1,r4
2952 mull3 r0,r5,-864(fp)
2953 mull2 r2,r5
2954 mull3 r2,r4,-868(fp)
2955 mull2 r0,r4
2956 addl3 -864(fp),-868(fp),r0
2957 bicl3 #0,r0,-864(fp)
2958 cmpl -864(fp),-868(fp)
2959 bgequ noname.277
2960 addl2 #65536,r4
2961noname.277:
2962 movzwl -862(fp),r0
2963 bicl2 #-65536,r0
2964 addl2 r0,r4
2965 bicl3 #-65536,-864(fp),r0
2966 ashl #16,r0,-868(fp)
2967 addl2 -868(fp),r5
2968 bicl2 #0,r5
2969 cmpl r5,-868(fp)
2970 bgequ noname.278
2971 incl r4
2972noname.278:
2973 movl r5,r1
2974 movl r4,r2
2975 addl2 r1,r9
2976 bicl2 #0,r9
2977 cmpl r9,r1
2978 bgequ noname.279
2979 incl r2
2980noname.279:
2981 addl2 r2,r8
2982 bicl2 #0,r8
2983 cmpl r8,r2
2984 bgequ noname.280
2985 incl r10
2986noname.280:
2987
2988 bicl3 #-65536,24(r6),r3
2989 movzwl 26(r6),r1
2990 bicl2 #-65536,r1
2991 bicl3 #-65536,24(r7),r2
2992 movzwl 26(r7),r0
2993 bicl2 #-65536,r0
2994 movl r3,r5
2995 movl r1,r4
2996 mull3 r0,r5,-872(fp)
2997 mull2 r2,r5
2998 mull3 r2,r4,-876(fp)
2999 mull2 r0,r4
3000 addl3 -872(fp),-876(fp),r0
3001 bicl3 #0,r0,-872(fp)
3002 cmpl -872(fp),-876(fp)
3003 bgequ noname.281
3004 addl2 #65536,r4
3005noname.281:
3006 movzwl -870(fp),r0
3007 bicl2 #-65536,r0
3008 addl2 r0,r4
3009 bicl3 #-65536,-872(fp),r0
3010 ashl #16,r0,-876(fp)
3011 addl2 -876(fp),r5
3012 bicl2 #0,r5
3013 cmpl r5,-876(fp)
3014 bgequ noname.282
3015 incl r4
3016noname.282:
3017 movl r5,r1
3018 movl r4,r2
3019 addl2 r1,r9
3020 bicl2 #0,r9
3021 cmpl r9,r1
3022 bgequ noname.283
3023 incl r2
3024noname.283:
3025 addl2 r2,r8
3026 bicl2 #0,r8
3027 cmpl r8,r2
3028 bgequ noname.284
3029 incl r10
3030noname.284:
3031
3032 bicl3 #-65536,20(r6),r3
3033 movzwl 22(r6),r1
3034 bicl2 #-65536,r1
3035 bicl3 #-65536,28(r7),r2
3036 movzwl 30(r7),r0
3037 bicl2 #-65536,r0
3038 movl r3,r5
3039 movl r1,r4
3040 mull3 r0,r5,-880(fp)
3041 mull2 r2,r5
3042 mull3 r2,r4,-884(fp)
3043 mull2 r0,r4
3044 addl3 -880(fp),-884(fp),r0
3045 bicl3 #0,r0,-880(fp)
3046 cmpl -880(fp),-884(fp)
3047 bgequ noname.285
3048 addl2 #65536,r4
3049noname.285:
3050 movzwl -878(fp),r0
3051 bicl2 #-65536,r0
3052 addl2 r0,r4
3053 bicl3 #-65536,-880(fp),r0
3054 ashl #16,r0,-884(fp)
3055 addl2 -884(fp),r5
3056 bicl2 #0,r5
3057 cmpl r5,-884(fp)
3058 bgequ noname.286
3059 incl r4
3060noname.286:
3061 movl r5,r1
3062 movl r4,r2
3063 addl2 r1,r9
3064 bicl2 #0,r9
3065 cmpl r9,r1
3066 bgequ noname.287
3067 incl r2
3068noname.287:
3069 addl2 r2,r8
3070 bicl2 #0,r8
3071 cmpl r8,r2
3072 bgequ noname.288
3073 incl r10
3074noname.288:
3075
3076 movl r9,48(r11)
3077
3078 clrl r9
3079
3080 bicl3 #-65536,24(r6),r3
3081 movzwl 26(r6),r1
3082 bicl2 #-65536,r1
3083 bicl3 #-65536,28(r7),r2
3084 movzwl 30(r7),r0
3085 bicl2 #-65536,r0
3086 movl r3,r5
3087 movl r1,r4
3088 mull3 r0,r5,-888(fp)
3089 mull2 r2,r5
3090 mull3 r2,r4,-892(fp)
3091 mull2 r0,r4
3092 addl3 -888(fp),-892(fp),r0
3093 bicl3 #0,r0,-888(fp)
3094 cmpl -888(fp),-892(fp)
3095 bgequ noname.289
3096 addl2 #65536,r4
3097noname.289:
3098 movzwl -886(fp),r0
3099 bicl2 #-65536,r0
3100 addl2 r0,r4
3101 bicl3 #-65536,-888(fp),r0
3102 ashl #16,r0,-892(fp)
3103 addl2 -892(fp),r5
3104 bicl2 #0,r5
3105 cmpl r5,-892(fp)
3106 bgequ noname.290
3107 incl r4
3108noname.290:
3109 movl r5,r1
3110 movl r4,r2
3111 addl2 r1,r8
3112 bicl2 #0,r8
3113 cmpl r8,r1
3114 bgequ noname.291
3115 incl r2
3116noname.291:
3117 addl2 r2,r10
3118 bicl2 #0,r10
3119 cmpl r10,r2
3120 bgequ noname.292
3121 incl r9
3122noname.292:
3123
3124 movzwl 30(r6),r2
3125 bicl3 #-65536,24(r7),r3
3126 movzwl 26(r7),r0
3127 bicl2 #-65536,r0
3128 bicl3 #-65536,28(r6),-904(fp)
3129 bicl3 #-65536,r2,-908(fp)
3130 mull3 r0,-904(fp),-896(fp)
3131 mull2 r3,-904(fp)
3132 mull3 r3,-908(fp),-900(fp)
3133 mull2 r0,-908(fp)
3134 addl3 -896(fp),-900(fp),r0
3135 bicl3 #0,r0,-896(fp)
3136 cmpl -896(fp),-900(fp)
3137 bgequ noname.293
3138 addl2 #65536,-908(fp)
3139noname.293:
3140 movzwl -894(fp),r0
3141 bicl2 #-65536,r0
3142 addl2 r0,-908(fp)
3143 bicl3 #-65536,-896(fp),r0
3144 ashl #16,r0,-900(fp)
3145 addl3 -900(fp),-904(fp),r0
3146 bicl3 #0,r0,-904(fp)
3147 cmpl -904(fp),-900(fp)
3148 bgequ noname.294
3149 incl -908(fp)
3150noname.294:
3151 movl -904(fp),r1
3152 movl -908(fp),r2
3153 addl2 r1,r8
3154 bicl2 #0,r8
3155 cmpl r8,r1
3156 bgequ noname.295
3157 incl r2
3158noname.295:
3159 addl2 r2,r10
3160 bicl2 #0,r10
3161 cmpl r10,r2
3162 bgequ noname.296
3163 incl r9
3164noname.296:
3165
3166 movl r8,52(r11)
3167
3168 clrl r8
3169
3170 movzwl 30(r6),r2
3171 bicl3 #-65536,28(r7),r3
3172 movzwl 30(r7),r0
3173 bicl2 #-65536,r0
3174 bicl3 #-65536,28(r6),-920(fp)
3175 bicl3 #-65536,r2,-924(fp)
3176 mull3 r0,-920(fp),-912(fp)
3177 mull2 r3,-920(fp)
3178 mull3 r3,-924(fp),-916(fp)
3179 mull2 r0,-924(fp)
3180 addl3 -912(fp),-916(fp),r0
3181 bicl3 #0,r0,-912(fp)
3182 cmpl -912(fp),-916(fp)
3183 bgequ noname.297
3184 addl2 #65536,-924(fp)
3185noname.297:
3186 movzwl -910(fp),r0
3187 bicl2 #-65536,r0
3188 addl2 r0,-924(fp)
3189 bicl3 #-65536,-912(fp),r0
3190 ashl #16,r0,-916(fp)
3191 addl3 -916(fp),-920(fp),r0
3192 bicl3 #0,r0,-920(fp)
3193 cmpl -920(fp),-916(fp)
3194 bgequ noname.298
3195 incl -924(fp)
3196noname.298:
3197 movl -920(fp),r1
3198 movl -924(fp),r2
3199 addl2 r1,r10
3200 bicl2 #0,r10
3201 cmpl r10,r1
3202 bgequ noname.299
3203 incl r2
3204noname.299:
3205 addl2 r2,r9
3206 bicl2 #0,r9
3207 cmpl r9,r2
3208 bgequ noname.300
3209 incl r8
3210noname.300:
3211
3212 movl r10,56(r11)
3213
3214 movl r9,60(r11)
3215
3216 ret
3217
3218
3219
3220;r=4 ;(AP)
3221;a=8 ;(AP)
3222;b=12 ;(AP)
3223;n=16 ;(AP) n by value (input)
3224
3225 .psect code,nowrt
3226
3227.entry BN_MUL_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
3228 movab -156(sp),sp
3229
3230 clrq r9
3231
3232 clrl r8
3233
3234 movl 8(ap),r6
3235 bicl3 #-65536,(r6),r3
3236 movzwl 2(r6),r2
3237 bicl2 #-65536,r2
3238 movl 12(ap),r7
3239 bicl3 #-65536,(r7),r1
3240 movzwl 2(r7),r0
3241 bicl2 #-65536,r0
3242 movl r3,r5
3243 movl r2,r4
3244 mull3 r0,r5,-4(fp)
3245 mull2 r1,r5
3246 mull3 r1,r4,-8(fp)
3247 mull2 r0,r4
3248 addl3 -4(fp),-8(fp),r0
3249 bicl3 #0,r0,-4(fp)
3250 cmpl -4(fp),-8(fp)
3251 bgequ noname.303
3252 addl2 #65536,r4
3253noname.303:
3254 movzwl -2(fp),r0
3255 bicl2 #-65536,r0
3256 addl2 r0,r4
3257 bicl3 #-65536,-4(fp),r0
3258 ashl #16,r0,-8(fp)
3259 addl2 -8(fp),r5
3260 bicl2 #0,r5
3261 cmpl r5,-8(fp)
3262 bgequ noname.304
3263 incl r4
3264noname.304:
3265 movl r5,r1
3266 movl r4,r2
3267 addl2 r1,r10
3268 bicl2 #0,r10
3269 cmpl r10,r1
3270 bgequ noname.305
3271 incl r2
3272noname.305:
3273 addl2 r2,r9
3274 bicl2 #0,r9
3275 cmpl r9,r2
3276 bgequ noname.306
3277 incl r8
3278noname.306:
3279
3280 movl 4(ap),r11
3281 movl r10,(r11)
3282
3283 clrl r10
3284
3285 bicl3 #-65536,(r6),r3
3286 movzwl 2(r6),r1
3287 bicl2 #-65536,r1
3288 bicl3 #-65536,4(r7),r2
3289 movzwl 6(r7),r0
3290 bicl2 #-65536,r0
3291 movl r3,r5
3292 movl r1,r4
3293 mull3 r0,r5,-12(fp)
3294 mull2 r2,r5
3295 mull3 r2,r4,-16(fp)
3296 mull2 r0,r4
3297 addl3 -12(fp),-16(fp),r0
3298 bicl3 #0,r0,-12(fp)
3299 cmpl -12(fp),-16(fp)
3300 bgequ noname.307
3301 addl2 #65536,r4
3302noname.307:
3303 movzwl -10(fp),r0
3304 bicl2 #-65536,r0
3305 addl2 r0,r4
3306 bicl3 #-65536,-12(fp),r0
3307 ashl #16,r0,-16(fp)
3308 addl2 -16(fp),r5
3309 bicl2 #0,r5
3310 cmpl r5,-16(fp)
3311 bgequ noname.308
3312 incl r4
3313noname.308:
3314 movl r5,r1
3315 movl r4,r2
3316 addl2 r1,r9
3317 bicl2 #0,r9
3318 cmpl r9,r1
3319 bgequ noname.309
3320 incl r2
3321noname.309:
3322 addl2 r2,r8
3323 bicl2 #0,r8
3324 cmpl r8,r2
3325 bgequ noname.310
3326 incl r10
3327noname.310:
3328
3329 bicl3 #-65536,4(r6),r3
3330 movzwl 6(r6),r1
3331 bicl2 #-65536,r1
3332 bicl3 #-65536,(r7),r2
3333 movzwl 2(r7),r0
3334 bicl2 #-65536,r0
3335 movl r3,r5
3336 movl r1,r4
3337 mull3 r0,r5,-20(fp)
3338 mull2 r2,r5
3339 mull3 r2,r4,-24(fp)
3340 mull2 r0,r4
3341 addl3 -20(fp),-24(fp),r0
3342 bicl3 #0,r0,-20(fp)
3343 cmpl -20(fp),-24(fp)
3344 bgequ noname.311
3345 addl2 #65536,r4
3346noname.311:
3347 movzwl -18(fp),r0
3348 bicl2 #-65536,r0
3349 addl2 r0,r4
3350 bicl3 #-65536,-20(fp),r0
3351 ashl #16,r0,-24(fp)
3352 addl2 -24(fp),r5
3353 bicl2 #0,r5
3354 cmpl r5,-24(fp)
3355 bgequ noname.312
3356 incl r4
3357noname.312:
3358 movl r5,r1
3359 movl r4,r2
3360 addl2 r1,r9
3361 bicl2 #0,r9
3362 cmpl r9,r1
3363 bgequ noname.313
3364 incl r2
3365noname.313:
3366 addl2 r2,r8
3367 bicl2 #0,r8
3368 cmpl r8,r2
3369 bgequ noname.314
3370 incl r10
3371noname.314:
3372
3373 movl r9,4(r11)
3374
3375 clrl r9
3376
3377 bicl3 #-65536,8(r6),r3
3378 movzwl 10(r6),r1
3379 bicl2 #-65536,r1
3380 bicl3 #-65536,(r7),r2
3381 movzwl 2(r7),r0
3382 bicl2 #-65536,r0
3383 movl r3,r5
3384 movl r1,r4
3385 mull3 r0,r5,-28(fp)
3386 mull2 r2,r5
3387 mull3 r2,r4,-32(fp)
3388 mull2 r0,r4
3389 addl3 -28(fp),-32(fp),r0
3390 bicl3 #0,r0,-28(fp)
3391 cmpl -28(fp),-32(fp)
3392 bgequ noname.315
3393 addl2 #65536,r4
3394noname.315:
3395 movzwl -26(fp),r0
3396 bicl2 #-65536,r0
3397 addl2 r0,r4
3398 bicl3 #-65536,-28(fp),r0
3399 ashl #16,r0,-32(fp)
3400 addl2 -32(fp),r5
3401 bicl2 #0,r5
3402 cmpl r5,-32(fp)
3403 bgequ noname.316
3404 incl r4
3405noname.316:
3406 movl r5,r1
3407 movl r4,r2
3408 addl2 r1,r8
3409 bicl2 #0,r8
3410 cmpl r8,r1
3411 bgequ noname.317
3412 incl r2
3413noname.317:
3414 addl2 r2,r10
3415 bicl2 #0,r10
3416 cmpl r10,r2
3417 bgequ noname.318
3418 incl r9
3419noname.318:
3420
3421 bicl3 #-65536,4(r6),r3
3422 movzwl 6(r6),r1
3423 bicl2 #-65536,r1
3424 bicl3 #-65536,4(r7),r2
3425 movzwl 6(r7),r0
3426 bicl2 #-65536,r0
3427 movl r3,r5
3428 movl r1,r4
3429 mull3 r0,r5,-36(fp)
3430 mull2 r2,r5
3431 mull3 r2,r4,-40(fp)
3432 mull2 r0,r4
3433 addl3 -36(fp),-40(fp),r0
3434 bicl3 #0,r0,-36(fp)
3435 cmpl -36(fp),-40(fp)
3436 bgequ noname.319
3437 addl2 #65536,r4
3438noname.319:
3439 movzwl -34(fp),r0
3440 bicl2 #-65536,r0
3441 addl2 r0,r4
3442 bicl3 #-65536,-36(fp),r0
3443 ashl #16,r0,-40(fp)
3444 addl2 -40(fp),r5
3445 bicl2 #0,r5
3446 cmpl r5,-40(fp)
3447 bgequ noname.320
3448 incl r4
3449noname.320:
3450 movl r5,r1
3451 movl r4,r2
3452 addl2 r1,r8
3453 bicl2 #0,r8
3454 cmpl r8,r1
3455 bgequ noname.321
3456 incl r2
3457noname.321:
3458 addl2 r2,r10
3459 bicl2 #0,r10
3460 cmpl r10,r2
3461 bgequ noname.322
3462 incl r9
3463noname.322:
3464
3465 bicl3 #-65536,(r6),r3
3466 movzwl 2(r6),r1
3467 bicl2 #-65536,r1
3468 bicl3 #-65536,8(r7),r2
3469 movzwl 10(r7),r0
3470 bicl2 #-65536,r0
3471 movl r3,r5
3472 movl r1,r4
3473 mull3 r0,r5,-44(fp)
3474 mull2 r2,r5
3475 mull3 r2,r4,-48(fp)
3476 mull2 r0,r4
3477 addl3 -44(fp),-48(fp),r0
3478 bicl3 #0,r0,-44(fp)
3479 cmpl -44(fp),-48(fp)
3480 bgequ noname.323
3481 addl2 #65536,r4
3482noname.323:
3483 movzwl -42(fp),r0
3484 bicl2 #-65536,r0
3485 addl2 r0,r4
3486 bicl3 #-65536,-44(fp),r0
3487 ashl #16,r0,-48(fp)
3488 addl2 -48(fp),r5
3489 bicl2 #0,r5
3490 cmpl r5,-48(fp)
3491 bgequ noname.324
3492 incl r4
3493noname.324:
3494 movl r5,r1
3495 movl r4,r2
3496 addl2 r1,r8
3497 bicl2 #0,r8
3498 cmpl r8,r1
3499 bgequ noname.325
3500 incl r2
3501noname.325:
3502 addl2 r2,r10
3503 bicl2 #0,r10
3504 cmpl r10,r2
3505 bgequ noname.326
3506 incl r9
3507noname.326:
3508
3509 movl r8,8(r11)
3510
3511 clrl r8
3512
3513 bicl3 #-65536,(r6),r3
3514 movzwl 2(r6),r2
3515 bicl3 #-65536,12(r7),r1
3516 movzwl 14(r7),r0
3517 bicl2 #-65536,r0
3518 movl r3,r4
3519 bicl3 #-65536,r2,-60(fp)
3520 mull3 r0,r4,-52(fp)
3521 mull2 r1,r4
3522 mull3 r1,-60(fp),-56(fp)
3523 mull2 r0,-60(fp)
3524 addl3 -52(fp),-56(fp),r0
3525 bicl3 #0,r0,-52(fp)
3526 cmpl -52(fp),-56(fp)
3527 bgequ noname.327
3528 addl2 #65536,-60(fp)
3529noname.327:
3530 movzwl -50(fp),r0
3531 bicl2 #-65536,r0
3532 addl2 r0,-60(fp)
3533 bicl3 #-65536,-52(fp),r0
3534 ashl #16,r0,-56(fp)
3535 addl2 -56(fp),r4
3536 bicl2 #0,r4
3537 cmpl r4,-56(fp)
3538 bgequ noname.328
3539 incl -60(fp)
3540noname.328:
3541 movl r4,r1
3542 movl -60(fp),r2
3543 addl2 r1,r10
3544 bicl2 #0,r10
3545 cmpl r10,r1
3546 bgequ noname.329
3547 incl r2
3548noname.329:
3549 addl2 r2,r9
3550 bicl2 #0,r9
3551 cmpl r9,r2
3552 bgequ noname.330
3553 incl r8
3554noname.330:
3555
3556 movzwl 6(r6),r2
3557 bicl3 #-65536,8(r7),r3
3558 movzwl 10(r7),r0
3559 bicl2 #-65536,r0
3560 bicl3 #-65536,4(r6),-72(fp)
3561 bicl3 #-65536,r2,-76(fp)
3562 mull3 r0,-72(fp),-64(fp)
3563 mull2 r3,-72(fp)
3564 mull3 r3,-76(fp),-68(fp)
3565 mull2 r0,-76(fp)
3566 addl3 -64(fp),-68(fp),r0
3567 bicl3 #0,r0,-64(fp)
3568 cmpl -64(fp),-68(fp)
3569 bgequ noname.331
3570 addl2 #65536,-76(fp)
3571noname.331:
3572 movzwl -62(fp),r0
3573 bicl2 #-65536,r0
3574 addl2 r0,-76(fp)
3575 bicl3 #-65536,-64(fp),r0
3576 ashl #16,r0,-68(fp)
3577 addl3 -68(fp),-72(fp),r0
3578 bicl3 #0,r0,-72(fp)
3579 cmpl -72(fp),-68(fp)
3580 bgequ noname.332
3581 incl -76(fp)
3582noname.332:
3583 movl -72(fp),r1
3584 movl -76(fp),r2
3585 addl2 r1,r10
3586 bicl2 #0,r10
3587 cmpl r10,r1
3588 bgequ noname.333
3589 incl r2
3590noname.333:
3591 addl2 r2,r9
3592 bicl2 #0,r9
3593 cmpl r9,r2
3594 bgequ noname.334
3595 incl r8
3596noname.334:
3597
3598 bicl3 #-65536,8(r6),r3
3599 movzwl 10(r6),r1
3600 bicl2 #-65536,r1
3601 bicl3 #-65536,4(r7),r2
3602 movzwl 6(r7),r0
3603 bicl2 #-65536,r0
3604 movl r3,r5
3605 movl r1,r4
3606 mull3 r0,r5,-80(fp)
3607 mull2 r2,r5
3608 mull3 r2,r4,-84(fp)
3609 mull2 r0,r4
3610 addl3 -80(fp),-84(fp),r0
3611 bicl3 #0,r0,-80(fp)
3612 cmpl -80(fp),-84(fp)
3613 bgequ noname.335
3614 addl2 #65536,r4
3615noname.335:
3616 movzwl -78(fp),r0
3617 bicl2 #-65536,r0
3618 addl2 r0,r4
3619 bicl3 #-65536,-80(fp),r0
3620 ashl #16,r0,-84(fp)
3621 addl2 -84(fp),r5
3622 bicl2 #0,r5
3623 cmpl r5,-84(fp)
3624 bgequ noname.336
3625 incl r4
3626noname.336:
3627 movl r5,r1
3628 movl r4,r2
3629 addl2 r1,r10
3630 bicl2 #0,r10
3631 cmpl r10,r1
3632 bgequ noname.337
3633 incl r2
3634noname.337:
3635 addl2 r2,r9
3636 bicl2 #0,r9
3637 cmpl r9,r2
3638 bgequ noname.338
3639 incl r8
3640noname.338:
3641
3642 bicl3 #-65536,12(r6),r3
3643 movzwl 14(r6),r1
3644 bicl2 #-65536,r1
3645 bicl3 #-65536,(r7),r2
3646 movzwl 2(r7),r0
3647 bicl2 #-65536,r0
3648 movl r3,r5
3649 movl r1,r4
3650 mull3 r0,r5,-88(fp)
3651 mull2 r2,r5
3652 mull3 r2,r4,-92(fp)
3653 mull2 r0,r4
3654 addl3 -88(fp),-92(fp),r0
3655 bicl3 #0,r0,-88(fp)
3656 cmpl -88(fp),-92(fp)
3657 bgequ noname.339
3658 addl2 #65536,r4
3659noname.339:
3660 movzwl -86(fp),r0
3661 bicl2 #-65536,r0
3662 addl2 r0,r4
3663 bicl3 #-65536,-88(fp),r0
3664 ashl #16,r0,-92(fp)
3665 addl2 -92(fp),r5
3666 bicl2 #0,r5
3667 cmpl r5,-92(fp)
3668 bgequ noname.340
3669 incl r4
3670noname.340:
3671 movl r5,r1
3672 movl r4,r2
3673 addl2 r1,r10
3674 bicl2 #0,r10
3675 cmpl r10,r1
3676 bgequ noname.341
3677 incl r2
3678noname.341:
3679 addl2 r2,r9
3680 bicl2 #0,r9
3681 cmpl r9,r2
3682 bgequ noname.342
3683 incl r8
3684noname.342:
3685
3686 movl r10,12(r11)
3687
3688 clrl r10
3689
3690 bicl3 #-65536,12(r6),r3
3691 movzwl 14(r6),r1
3692 bicl2 #-65536,r1
3693 bicl3 #-65536,4(r7),r2
3694 movzwl 6(r7),r0
3695 bicl2 #-65536,r0
3696 movl r3,r5
3697 movl r1,r4
3698 mull3 r0,r5,-96(fp)
3699 mull2 r2,r5
3700 mull3 r2,r4,-100(fp)
3701 mull2 r0,r4
3702 addl3 -96(fp),-100(fp),r0
3703 bicl3 #0,r0,-96(fp)
3704 cmpl -96(fp),-100(fp)
3705 bgequ noname.343
3706 addl2 #65536,r4
3707noname.343:
3708 movzwl -94(fp),r0
3709 bicl2 #-65536,r0
3710 addl2 r0,r4
3711 bicl3 #-65536,-96(fp),r0
3712 ashl #16,r0,-100(fp)
3713 addl2 -100(fp),r5
3714 bicl2 #0,r5
3715 cmpl r5,-100(fp)
3716 bgequ noname.344
3717 incl r4
3718noname.344:
3719 movl r5,r1
3720 movl r4,r2
3721 addl2 r1,r9
3722 bicl2 #0,r9
3723 cmpl r9,r1
3724 bgequ noname.345
3725 incl r2
3726noname.345:
3727 addl2 r2,r8
3728 bicl2 #0,r8
3729 cmpl r8,r2
3730 bgequ noname.346
3731 incl r10
3732noname.346:
3733
3734 bicl3 #-65536,8(r6),r3
3735 movzwl 10(r6),r1
3736 bicl2 #-65536,r1
3737 bicl3 #-65536,8(r7),r2
3738 movzwl 10(r7),r0
3739 bicl2 #-65536,r0
3740 movl r3,r5
3741 movl r1,r4
3742 mull3 r0,r5,-104(fp)
3743 mull2 r2,r5
3744 mull3 r2,r4,-108(fp)
3745 mull2 r0,r4
3746 addl3 -104(fp),-108(fp),r0
3747 bicl3 #0,r0,-104(fp)
3748 cmpl -104(fp),-108(fp)
3749 bgequ noname.347
3750 addl2 #65536,r4
3751noname.347:
3752 movzwl -102(fp),r0
3753 bicl2 #-65536,r0
3754 addl2 r0,r4
3755 bicl3 #-65536,-104(fp),r0
3756 ashl #16,r0,-108(fp)
3757 addl2 -108(fp),r5
3758 bicl2 #0,r5
3759 cmpl r5,-108(fp)
3760 bgequ noname.348
3761 incl r4
3762noname.348:
3763 movl r5,r1
3764 movl r4,r2
3765 addl2 r1,r9
3766 bicl2 #0,r9
3767 cmpl r9,r1
3768 bgequ noname.349
3769 incl r2
3770noname.349:
3771 addl2 r2,r8
3772 bicl2 #0,r8
3773 cmpl r8,r2
3774 bgequ noname.350
3775 incl r10
3776noname.350:
3777
3778 bicl3 #-65536,4(r6),r3
3779 movzwl 6(r6),r1
3780 bicl2 #-65536,r1
3781 bicl3 #-65536,12(r7),r2
3782 movzwl 14(r7),r0
3783 bicl2 #-65536,r0
3784 movl r3,r5
3785 movl r1,r4
3786 mull3 r0,r5,-112(fp)
3787 mull2 r2,r5
3788 mull3 r2,r4,-116(fp)
3789 mull2 r0,r4
3790 addl3 -112(fp),-116(fp),r0
3791 bicl3 #0,r0,-112(fp)
3792 cmpl -112(fp),-116(fp)
3793 bgequ noname.351
3794 addl2 #65536,r4
3795noname.351:
3796 movzwl -110(fp),r0
3797 bicl2 #-65536,r0
3798 addl2 r0,r4
3799 bicl3 #-65536,-112(fp),r0
3800 ashl #16,r0,-116(fp)
3801 addl2 -116(fp),r5
3802 bicl2 #0,r5
3803 cmpl r5,-116(fp)
3804 bgequ noname.352
3805 incl r4
3806noname.352:
3807 movl r5,r1
3808 movl r4,r2
3809 addl2 r1,r9
3810 bicl2 #0,r9
3811 cmpl r9,r1
3812 bgequ noname.353
3813 incl r2
3814noname.353:
3815 addl2 r2,r8
3816 bicl2 #0,r8
3817 cmpl r8,r2
3818 bgequ noname.354
3819 incl r10
3820noname.354:
3821
3822 movl r9,16(r11)
3823
3824 clrl r9
3825
3826 bicl3 #-65536,8(r6),r3
3827 movzwl 10(r6),r1
3828 bicl2 #-65536,r1
3829 bicl3 #-65536,12(r7),r2
3830 movzwl 14(r7),r0
3831 bicl2 #-65536,r0
3832 movl r3,r5
3833 movl r1,r4
3834 mull3 r0,r5,-120(fp)
3835 mull2 r2,r5
3836 mull3 r2,r4,-124(fp)
3837 mull2 r0,r4
3838 addl3 -120(fp),-124(fp),r0
3839 bicl3 #0,r0,-120(fp)
3840 cmpl -120(fp),-124(fp)
3841 bgequ noname.355
3842 addl2 #65536,r4
3843noname.355:
3844 movzwl -118(fp),r0
3845 bicl2 #-65536,r0
3846 addl2 r0,r4
3847 bicl3 #-65536,-120(fp),r0
3848 ashl #16,r0,-124(fp)
3849 addl2 -124(fp),r5
3850 bicl2 #0,r5
3851 cmpl r5,-124(fp)
3852 bgequ noname.356
3853 incl r4
3854noname.356:
3855 movl r5,r1
3856 movl r4,r2
3857 addl2 r1,r8
3858 bicl2 #0,r8
3859 cmpl r8,r1
3860 bgequ noname.357
3861 incl r2
3862noname.357:
3863 addl2 r2,r10
3864 bicl2 #0,r10
3865 cmpl r10,r2
3866 bgequ noname.358
3867 incl r9
3868noname.358:
3869
3870 movzwl 14(r6),r2
3871 bicl3 #-65536,8(r7),r3
3872 movzwl 10(r7),r0
3873 bicl2 #-65536,r0
3874 bicl3 #-65536,12(r6),-136(fp)
3875 bicl3 #-65536,r2,-140(fp)
3876 mull3 r0,-136(fp),-128(fp)
3877 mull2 r3,-136(fp)
3878 mull3 r3,-140(fp),-132(fp)
3879 mull2 r0,-140(fp)
3880 addl3 -128(fp),-132(fp),r0
3881 bicl3 #0,r0,-128(fp)
3882 cmpl -128(fp),-132(fp)
3883 bgequ noname.359
3884 addl2 #65536,-140(fp)
3885noname.359:
3886 movzwl -126(fp),r0
3887 bicl2 #-65536,r0
3888 addl2 r0,-140(fp)
3889 bicl3 #-65536,-128(fp),r0
3890 ashl #16,r0,-132(fp)
3891 addl3 -132(fp),-136(fp),r0
3892 bicl3 #0,r0,-136(fp)
3893 cmpl -136(fp),-132(fp)
3894 bgequ noname.360
3895 incl -140(fp)
3896noname.360:
3897 movl -136(fp),r1
3898 movl -140(fp),r2
3899 addl2 r1,r8
3900 bicl2 #0,r8
3901 cmpl r8,r1
3902 bgequ noname.361
3903 incl r2
3904noname.361:
3905 addl2 r2,r10
3906 bicl2 #0,r10
3907 cmpl r10,r2
3908 bgequ noname.362
3909 incl r9
3910noname.362:
3911
3912 movl r8,20(r11)
3913
3914 clrl r8
3915
3916 movzwl 14(r6),r2
3917 bicl3 #-65536,12(r7),r3
3918 movzwl 14(r7),r0
3919 bicl2 #-65536,r0
3920 bicl3 #-65536,12(r6),-152(fp)
3921 bicl3 #-65536,r2,-156(fp)
3922 mull3 r0,-152(fp),-144(fp)
3923 mull2 r3,-152(fp)
3924 mull3 r3,-156(fp),-148(fp)
3925 mull2 r0,-156(fp)
3926 addl3 -144(fp),-148(fp),r0
3927 bicl3 #0,r0,-144(fp)
3928 cmpl -144(fp),-148(fp)
3929 bgequ noname.363
3930 addl2 #65536,-156(fp)
3931noname.363:
3932 movzwl -142(fp),r0
3933 bicl2 #-65536,r0
3934 addl2 r0,-156(fp)
3935 bicl3 #-65536,-144(fp),r0
3936 ashl #16,r0,-148(fp)
3937 addl3 -148(fp),-152(fp),r0
3938 bicl3 #0,r0,-152(fp)
3939 cmpl -152(fp),-148(fp)
3940 bgequ noname.364
3941 incl -156(fp)
3942noname.364:
3943 movl -152(fp),r1
3944 movl -156(fp),r2
3945 addl2 r1,r10
3946 bicl2 #0,r10
3947 cmpl r10,r1
3948 bgequ noname.365
3949 incl r2
3950noname.365:
3951 addl2 r2,r9
3952 bicl2 #0,r9
3953 cmpl r9,r2
3954 bgequ noname.366
3955 incl r8
3956noname.366:
3957
3958 movl r10,24(r11)
3959
3960 movl r9,28(r11)
3961
3962 ret
3963
3964
3965
3966;r=4 ;(AP)
3967;a=8 ;(AP)
3968;b=12 ;(AP)
3969;n=16 ;(AP) n by value (input)
3970
3971 .psect code,nowrt
3972
3973.entry BN_SQR_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9>
3974 movab -444(sp),sp
3975
3976 clrq r8
3977
3978 clrl r7
3979
3980 movl 8(ap),r4
3981 movl (r4),r3
3982 bicl3 #-65536,r3,-4(fp)
3983 extzv #16,#16,r3,r0
3984 bicl3 #-65536,r0,r3
3985 movl -4(fp),r0
3986 mull3 r0,r3,-8(fp)
3987 mull3 r0,r0,-4(fp)
3988 mull2 r3,r3
3989 bicl3 #32767,-8(fp),r0
3990 extzv #15,#17,r0,r0
3991 addl2 r0,r3
3992 bicl3 #-65536,-8(fp),r0
3993 ashl #17,r0,-8(fp)
3994 addl3 -4(fp),-8(fp),r0
3995 bicl3 #0,r0,-4(fp)
3996 cmpl -4(fp),-8(fp)
3997 bgequ noname.369
3998 incl r3
3999noname.369:
4000 movl -4(fp),r1
4001 movl r3,r2
4002 addl2 r1,r9
4003 bicl2 #0,r9
4004 cmpl r9,r1
4005 bgequ noname.370
4006 incl r2
4007noname.370:
4008 addl2 r2,r8
4009 bicl2 #0,r8
4010 cmpl r8,r2
4011 bgequ noname.371
4012 incl r7
4013noname.371:
4014
4015 movl r9,@4(ap)
4016
4017 clrl r9
4018
4019 movzwl 6(r4),r2
4020 bicl3 #-65536,(r4),r3
4021 movzwl 2(r4),r0
4022 bicl2 #-65536,r0
4023 bicl3 #-65536,4(r4),-20(fp)
4024 bicl3 #-65536,r2,-24(fp)
4025 mull3 r0,-20(fp),-12(fp)
4026 mull2 r3,-20(fp)
4027 mull3 r3,-24(fp),-16(fp)
4028 mull2 r0,-24(fp)
4029 addl3 -12(fp),-16(fp),r0
4030 bicl3 #0,r0,-12(fp)
4031 cmpl -12(fp),-16(fp)
4032 bgequ noname.372
4033 addl2 #65536,-24(fp)
4034noname.372:
4035 movzwl -10(fp),r0
4036 bicl2 #-65536,r0
4037 addl2 r0,-24(fp)
4038 bicl3 #-65536,-12(fp),r0
4039 ashl #16,r0,-16(fp)
4040 addl3 -16(fp),-20(fp),r0
4041 bicl3 #0,r0,-20(fp)
4042 cmpl -20(fp),-16(fp)
4043 bgequ noname.373
4044 incl -24(fp)
4045noname.373:
4046 movl -20(fp),r3
4047 movl -24(fp),r2
4048 bbc #31,r2,noname.374
4049 incl r9
4050noname.374:
4051 addl2 r2,r2
4052 bicl2 #0,r2
4053 bbc #31,r3,noname.375
4054 incl r2
4055noname.375:
4056 addl2 r3,r3
4057 bicl2 #0,r3
4058 addl2 r3,r8
4059 bicl2 #0,r8
4060 cmpl r8,r3
4061 bgequ noname.376
4062 incl r2
4063 bicl3 #0,r2,r0
4064 bneq noname.376
4065 incl r9
4066noname.376:
4067 addl2 r2,r7
4068 bicl2 #0,r7
4069 cmpl r7,r2
4070 bgequ noname.377
4071 incl r9
4072noname.377:
4073
4074 movl 4(ap),r0
4075 movl r8,4(r0)
4076
4077 clrl r8
4078
4079 movl 8(ap),r4
4080 movl 4(r4),r3
4081 bicl3 #-65536,r3,-28(fp)
4082 extzv #16,#16,r3,r0
4083 bicl3 #-65536,r0,r3
4084 movl -28(fp),r0
4085 mull3 r0,r3,-32(fp)
4086 mull3 r0,r0,-28(fp)
4087 mull2 r3,r3
4088 bicl3 #32767,-32(fp),r0
4089 extzv #15,#17,r0,r0
4090 addl2 r0,r3
4091 bicl3 #-65536,-32(fp),r0
4092 ashl #17,r0,-32(fp)
4093 addl3 -28(fp),-32(fp),r0
4094 bicl3 #0,r0,-28(fp)
4095 cmpl -28(fp),-32(fp)
4096 bgequ noname.378
4097 incl r3
4098noname.378:
4099 movl -28(fp),r1
4100 movl r3,r2
4101 addl2 r1,r7
4102 bicl2 #0,r7
4103 cmpl r7,r1
4104 bgequ noname.379
4105 incl r2
4106noname.379:
4107 addl2 r2,r9
4108 bicl2 #0,r9
4109 cmpl r9,r2
4110 bgequ noname.380
4111 incl r8
4112noname.380:
4113
4114 movzwl 10(r4),r2
4115 bicl3 #-65536,(r4),r3
4116 movzwl 2(r4),r0
4117 bicl2 #-65536,r0
4118 bicl3 #-65536,8(r4),-44(fp)
4119 bicl3 #-65536,r2,-48(fp)
4120 mull3 r0,-44(fp),-36(fp)
4121 mull2 r3,-44(fp)
4122 mull3 r3,-48(fp),-40(fp)
4123 mull2 r0,-48(fp)
4124 addl3 -36(fp),-40(fp),r0
4125 bicl3 #0,r0,-36(fp)
4126 cmpl -36(fp),-40(fp)
4127 bgequ noname.381
4128 addl2 #65536,-48(fp)
4129noname.381:
4130 movzwl -34(fp),r0
4131 bicl2 #-65536,r0
4132 addl2 r0,-48(fp)
4133 bicl3 #-65536,-36(fp),r0
4134 ashl #16,r0,-40(fp)
4135 addl3 -40(fp),-44(fp),r0
4136 bicl3 #0,r0,-44(fp)
4137 cmpl -44(fp),-40(fp)
4138 bgequ noname.382
4139 incl -48(fp)
4140noname.382:
4141 movl -44(fp),r3
4142 movl -48(fp),r2
4143 bbc #31,r2,noname.383
4144 incl r8
4145noname.383:
4146 addl2 r2,r2
4147 bicl2 #0,r2
4148 bbc #31,r3,noname.384
4149 incl r2
4150noname.384:
4151 addl2 r3,r3
4152 bicl2 #0,r3
4153 addl2 r3,r7
4154 bicl2 #0,r7
4155 cmpl r7,r3
4156 bgequ noname.385
4157 incl r2
4158 bicl3 #0,r2,r0
4159 bneq noname.385
4160 incl r8
4161noname.385:
4162 addl2 r2,r9
4163 bicl2 #0,r9
4164 cmpl r9,r2
4165 bgequ noname.386
4166 incl r8
4167noname.386:
4168
4169 movl 4(ap),r0
4170 movl r7,8(r0)
4171
4172 clrl r7
4173
4174 movl 8(ap),r0
4175 movzwl 14(r0),r2
4176 bicl3 #-65536,(r0),r3
4177 movzwl 2(r0),r1
4178 bicl2 #-65536,r1
4179 bicl3 #-65536,12(r0),-60(fp)
4180 bicl3 #-65536,r2,-64(fp)
4181 mull3 r1,-60(fp),-52(fp)
4182 mull2 r3,-60(fp)
4183 mull3 r3,-64(fp),-56(fp)
4184 mull2 r1,-64(fp)
4185 addl3 -52(fp),-56(fp),r0
4186 bicl3 #0,r0,-52(fp)
4187 cmpl -52(fp),-56(fp)
4188 bgequ noname.387
4189 addl2 #65536,-64(fp)
4190noname.387:
4191 movzwl -50(fp),r0
4192 bicl2 #-65536,r0
4193 addl2 r0,-64(fp)
4194 bicl3 #-65536,-52(fp),r0
4195 ashl #16,r0,-56(fp)
4196 addl3 -56(fp),-60(fp),r0
4197 bicl3 #0,r0,-60(fp)
4198 cmpl -60(fp),-56(fp)
4199 bgequ noname.388
4200 incl -64(fp)
4201noname.388:
4202 movl -60(fp),r3
4203 movl -64(fp),r2
4204 bbc #31,r2,noname.389
4205 incl r7
4206noname.389:
4207 addl2 r2,r2
4208 bicl2 #0,r2
4209 bbc #31,r3,noname.390
4210 incl r2
4211noname.390:
4212 addl2 r3,r3
4213 bicl2 #0,r3
4214 addl2 r3,r9
4215 bicl2 #0,r9
4216 cmpl r9,r3
4217 bgequ noname.391
4218 incl r2
4219 bicl3 #0,r2,r0
4220 bneq noname.391
4221 incl r7
4222noname.391:
4223 addl2 r2,r8
4224 bicl2 #0,r8
4225 cmpl r8,r2
4226 bgequ noname.392
4227 incl r7
4228noname.392:
4229
4230 movl 8(ap),r0
4231 movzwl 10(r0),r2
4232 bicl3 #-65536,4(r0),r3
4233 movzwl 6(r0),r1
4234 bicl2 #-65536,r1
4235 bicl3 #-65536,8(r0),-76(fp)
4236 bicl3 #-65536,r2,-80(fp)
4237 mull3 r1,-76(fp),-68(fp)
4238 mull2 r3,-76(fp)
4239 mull3 r3,-80(fp),-72(fp)
4240 mull2 r1,-80(fp)
4241 addl3 -68(fp),-72(fp),r0
4242 bicl3 #0,r0,-68(fp)
4243 cmpl -68(fp),-72(fp)
4244 bgequ noname.393
4245 addl2 #65536,-80(fp)
4246noname.393:
4247 movzwl -66(fp),r0
4248 bicl2 #-65536,r0
4249 addl2 r0,-80(fp)
4250 bicl3 #-65536,-68(fp),r0
4251 ashl #16,r0,-72(fp)
4252 addl3 -72(fp),-76(fp),r0
4253 bicl3 #0,r0,-76(fp)
4254 cmpl -76(fp),-72(fp)
4255 bgequ noname.394
4256 incl -80(fp)
4257noname.394:
4258 movl -76(fp),r3
4259 movl -80(fp),r2
4260 bbc #31,r2,noname.395
4261 incl r7
4262noname.395:
4263 addl2 r2,r2
4264 bicl2 #0,r2
4265 bbc #31,r3,noname.396
4266 incl r2
4267noname.396:
4268 addl2 r3,r3
4269 bicl2 #0,r3
4270 addl2 r3,r9
4271 bicl2 #0,r9
4272 cmpl r9,r3
4273 bgequ noname.397
4274 incl r2
4275 bicl3 #0,r2,r0
4276 bneq noname.397
4277 incl r7
4278noname.397:
4279 addl2 r2,r8
4280 bicl2 #0,r8
4281 cmpl r8,r2
4282 bgequ noname.398
4283 incl r7
4284noname.398:
4285
4286 movl 4(ap),r0
4287 movl r9,12(r0)
4288
4289 clrl r9
4290
4291 movl 8(ap),r2
4292 movl 8(r2),r4
4293 bicl3 #-65536,r4,-84(fp)
4294 extzv #16,#16,r4,r0
4295 bicl3 #-65536,r0,r4
4296 movl -84(fp),r0
4297 mull3 r0,r4,-88(fp)
4298 mull3 r0,r0,-84(fp)
4299 mull2 r4,r4
4300 bicl3 #32767,-88(fp),r0
4301 extzv #15,#17,r0,r0
4302 addl2 r0,r4
4303 bicl3 #-65536,-88(fp),r0
4304 ashl #17,r0,-88(fp)
4305 addl3 -84(fp),-88(fp),r0
4306 bicl3 #0,r0,-84(fp)
4307 cmpl -84(fp),-88(fp)
4308 bgequ noname.399
4309 incl r4
4310noname.399:
4311 movl -84(fp),r1
4312 movl r4,r3
4313 addl2 r1,r8
4314 bicl2 #0,r8
4315 cmpl r8,r1
4316 bgequ noname.400
4317 incl r3
4318noname.400:
4319 addl2 r3,r7
4320 bicl2 #0,r7
4321 cmpl r7,r3
4322 bgequ noname.401
4323 incl r9
4324noname.401:
4325
4326 movzwl 14(r2),r3
4327 bicl3 #-65536,4(r2),r1
4328 movzwl 6(r2),r0
4329 bicl2 #-65536,r0
4330 bicl3 #-65536,12(r2),-100(fp)
4331 bicl3 #-65536,r3,-104(fp)
4332 mull3 r0,-100(fp),-92(fp)
4333 mull2 r1,-100(fp)
4334 mull3 r1,-104(fp),-96(fp)
4335 mull2 r0,-104(fp)
4336 addl3 -92(fp),-96(fp),r0
4337 bicl3 #0,r0,-92(fp)
4338 cmpl -92(fp),-96(fp)
4339 bgequ noname.402
4340 addl2 #65536,-104(fp)
4341noname.402:
4342 movzwl -90(fp),r0
4343 bicl2 #-65536,r0
4344 addl2 r0,-104(fp)
4345 bicl3 #-65536,-92(fp),r0
4346 ashl #16,r0,-96(fp)
4347 addl3 -96(fp),-100(fp),r0
4348 bicl3 #0,r0,-100(fp)
4349 cmpl -100(fp),-96(fp)
4350 bgequ noname.403
4351 incl -104(fp)
4352noname.403:
4353 movl -100(fp),r3
4354 movl -104(fp),r2
4355 bbc #31,r2,noname.404
4356 incl r9
4357noname.404:
4358 addl2 r2,r2
4359 bicl2 #0,r2
4360 bbc #31,r3,noname.405
4361 incl r2
4362noname.405:
4363 addl2 r3,r3
4364 bicl2 #0,r3
4365 addl2 r3,r8
4366 bicl2 #0,r8
4367 cmpl r8,r3
4368 bgequ noname.406
4369 incl r2
4370 bicl3 #0,r2,r0
4371 bneq noname.406
4372 incl r9
4373noname.406:
4374 addl2 r2,r7
4375 bicl2 #0,r7
4376 cmpl r7,r2
4377 bgequ noname.407
4378 incl r9
4379noname.407:
4380
4381 movl 8(ap),r0
4382 movzwl 18(r0),r2
4383 bicl3 #-65536,(r0),r3
4384 movzwl 2(r0),r1
4385 bicl2 #-65536,r1
4386 bicl3 #-65536,16(r0),-116(fp)
4387 bicl3 #-65536,r2,-120(fp)
4388 mull3 r1,-116(fp),-108(fp)
4389 mull2 r3,-116(fp)
4390 mull3 r3,-120(fp),-112(fp)
4391 mull2 r1,-120(fp)
4392 addl3 -108(fp),-112(fp),r0
4393 bicl3 #0,r0,-108(fp)
4394 cmpl -108(fp),-112(fp)
4395 bgequ noname.408
4396 addl2 #65536,-120(fp)
4397noname.408:
4398 movzwl -106(fp),r0
4399 bicl2 #-65536,r0
4400 addl2 r0,-120(fp)
4401 bicl3 #-65536,-108(fp),r0
4402 ashl #16,r0,-112(fp)
4403 addl3 -112(fp),-116(fp),r0
4404 bicl3 #0,r0,-116(fp)
4405 cmpl -116(fp),-112(fp)
4406 bgequ noname.409
4407 incl -120(fp)
4408noname.409:
4409 movl -116(fp),r3
4410 movl -120(fp),r2
4411 bbc #31,r2,noname.410
4412 incl r9
4413noname.410:
4414 addl2 r2,r2
4415 bicl2 #0,r2
4416 bbc #31,r3,noname.411
4417 incl r2
4418noname.411:
4419 addl2 r3,r3
4420 bicl2 #0,r3
4421 addl2 r3,r8
4422 bicl2 #0,r8
4423 cmpl r8,r3
4424 bgequ noname.412
4425 incl r2
4426 bicl3 #0,r2,r0
4427 bneq noname.412
4428 incl r9
4429noname.412:
4430 addl2 r2,r7
4431 bicl2 #0,r7
4432 cmpl r7,r2
4433 bgequ noname.413
4434 incl r9
4435noname.413:
4436
4437 movl 4(ap),r0
4438 movl r8,16(r0)
4439
4440 clrl r8
4441
4442 movl 8(ap),r0
4443 movzwl 22(r0),r2
4444 bicl3 #-65536,(r0),r3
4445 movzwl 2(r0),r1
4446 bicl2 #-65536,r1
4447 bicl3 #-65536,20(r0),-132(fp)
4448 bicl3 #-65536,r2,-136(fp)
4449 mull3 r1,-132(fp),-124(fp)
4450 mull2 r3,-132(fp)
4451 mull3 r3,-136(fp),-128(fp)
4452 mull2 r1,-136(fp)
4453 addl3 -124(fp),-128(fp),r0
4454 bicl3 #0,r0,-124(fp)
4455 cmpl -124(fp),-128(fp)
4456 bgequ noname.414
4457 addl2 #65536,-136(fp)
4458noname.414:
4459 movzwl -122(fp),r0
4460 bicl2 #-65536,r0
4461 addl2 r0,-136(fp)
4462 bicl3 #-65536,-124(fp),r0
4463 ashl #16,r0,-128(fp)
4464 addl3 -128(fp),-132(fp),r0
4465 bicl3 #0,r0,-132(fp)
4466 cmpl -132(fp),-128(fp)
4467 bgequ noname.415
4468 incl -136(fp)
4469noname.415:
4470 movl -132(fp),r3
4471 movl -136(fp),r2
4472 bbc #31,r2,noname.416
4473 incl r8
4474noname.416:
4475 addl2 r2,r2
4476 bicl2 #0,r2
4477 bbc #31,r3,noname.417
4478 incl r2
4479noname.417:
4480 addl2 r3,r3
4481 bicl2 #0,r3
4482 addl2 r3,r7
4483 bicl2 #0,r7
4484 cmpl r7,r3
4485 bgequ noname.418
4486 incl r2
4487 bicl3 #0,r2,r0
4488 bneq noname.418
4489 incl r8
4490noname.418:
4491 addl2 r2,r9
4492 bicl2 #0,r9
4493 cmpl r9,r2
4494 bgequ noname.419
4495 incl r8
4496noname.419:
4497
4498 movl 8(ap),r0
4499 movzwl 18(r0),r2
4500 bicl3 #-65536,4(r0),r3
4501 movzwl 6(r0),r1
4502 bicl2 #-65536,r1
4503 bicl3 #-65536,16(r0),-148(fp)
4504 bicl3 #-65536,r2,-152(fp)
4505 mull3 r1,-148(fp),-140(fp)
4506 mull2 r3,-148(fp)
4507 mull3 r3,-152(fp),-144(fp)
4508 mull2 r1,-152(fp)
4509 addl3 -140(fp),-144(fp),r0
4510 bicl3 #0,r0,-140(fp)
4511 cmpl -140(fp),-144(fp)
4512 bgequ noname.420
4513 addl2 #65536,-152(fp)
4514noname.420:
4515 movzwl -138(fp),r0
4516 bicl2 #-65536,r0
4517 addl2 r0,-152(fp)
4518 bicl3 #-65536,-140(fp),r0
4519 ashl #16,r0,-144(fp)
4520 addl3 -144(fp),-148(fp),r0
4521 bicl3 #0,r0,-148(fp)
4522 cmpl -148(fp),-144(fp)
4523 bgequ noname.421
4524 incl -152(fp)
4525noname.421:
4526 movl -148(fp),r3
4527 movl -152(fp),r2
4528 bbc #31,r2,noname.422
4529 incl r8
4530noname.422:
4531 addl2 r2,r2
4532 bicl2 #0,r2
4533 bbc #31,r3,noname.423
4534 incl r2
4535noname.423:
4536 addl2 r3,r3
4537 bicl2 #0,r3
4538 addl2 r3,r7
4539 bicl2 #0,r7
4540 cmpl r7,r3
4541 bgequ noname.424
4542 incl r2
4543 bicl3 #0,r2,r0
4544 bneq noname.424
4545 incl r8
4546noname.424:
4547 addl2 r2,r9
4548 bicl2 #0,r9
4549 cmpl r9,r2
4550 bgequ noname.425
4551 incl r8
4552noname.425:
4553
4554 movl 8(ap),r0
4555 movzwl 14(r0),r2
4556 bicl3 #-65536,8(r0),r3
4557 movzwl 10(r0),r1
4558 bicl2 #-65536,r1
4559 bicl3 #-65536,12(r0),-164(fp)
4560 bicl3 #-65536,r2,-168(fp)
4561 mull3 r1,-164(fp),-156(fp)
4562 mull2 r3,-164(fp)
4563 mull3 r3,-168(fp),-160(fp)
4564 mull2 r1,-168(fp)
4565 addl3 -156(fp),-160(fp),r0
4566 bicl3 #0,r0,-156(fp)
4567 cmpl -156(fp),-160(fp)
4568 bgequ noname.426
4569 addl2 #65536,-168(fp)
4570noname.426:
4571 movzwl -154(fp),r0
4572 bicl2 #-65536,r0
4573 addl2 r0,-168(fp)
4574 bicl3 #-65536,-156(fp),r0
4575 ashl #16,r0,-160(fp)
4576 addl3 -160(fp),-164(fp),r0
4577 bicl3 #0,r0,-164(fp)
4578 cmpl -164(fp),-160(fp)
4579 bgequ noname.427
4580 incl -168(fp)
4581noname.427:
4582 movl -164(fp),r3
4583 movl -168(fp),r2
4584 bbc #31,r2,noname.428
4585 incl r8
4586noname.428:
4587 addl2 r2,r2
4588 bicl2 #0,r2
4589 bbc #31,r3,noname.429
4590 incl r2
4591noname.429:
4592 addl2 r3,r3
4593 bicl2 #0,r3
4594 addl2 r3,r7
4595 bicl2 #0,r7
4596 cmpl r7,r3
4597 bgequ noname.430
4598 incl r2
4599 bicl3 #0,r2,r0
4600 bneq noname.430
4601 incl r8
4602noname.430:
4603 addl2 r2,r9
4604 bicl2 #0,r9
4605 cmpl r9,r2
4606 bgequ noname.431
4607 incl r8
4608noname.431:
4609
4610 movl 4(ap),r0
4611 movl r7,20(r0)
4612
4613 clrl r7
4614
4615 movl 8(ap),r2
4616 movl 12(r2),r4
4617 bicl3 #-65536,r4,-172(fp)
4618 extzv #16,#16,r4,r0
4619 bicl3 #-65536,r0,r4
4620 movl -172(fp),r0
4621 mull3 r0,r4,-176(fp)
4622 mull3 r0,r0,-172(fp)
4623 mull2 r4,r4
4624 bicl3 #32767,-176(fp),r0
4625 extzv #15,#17,r0,r0
4626 addl2 r0,r4
4627 bicl3 #-65536,-176(fp),r0
4628 ashl #17,r0,-176(fp)
4629 addl3 -172(fp),-176(fp),r0
4630 bicl3 #0,r0,-172(fp)
4631 cmpl -172(fp),-176(fp)
4632 bgequ noname.432
4633 incl r4
4634noname.432:
4635 movl -172(fp),r1
4636 movl r4,r3
4637 addl2 r1,r9
4638 bicl2 #0,r9
4639 cmpl r9,r1
4640 bgequ noname.433
4641 incl r3
4642noname.433:
4643 addl2 r3,r8
4644 bicl2 #0,r8
4645 cmpl r8,r3
4646 bgequ noname.434
4647 incl r7
4648noname.434:
4649
4650 movzwl 18(r2),r3
4651 bicl3 #-65536,8(r2),r1
4652 movzwl 10(r2),r0
4653 bicl2 #-65536,r0
4654 bicl3 #-65536,16(r2),-188(fp)
4655 bicl3 #-65536,r3,-192(fp)
4656 mull3 r0,-188(fp),-180(fp)
4657 mull2 r1,-188(fp)
4658 mull3 r1,-192(fp),-184(fp)
4659 mull2 r0,-192(fp)
4660 addl3 -180(fp),-184(fp),r0
4661 bicl3 #0,r0,-180(fp)
4662 cmpl -180(fp),-184(fp)
4663 bgequ noname.435
4664 addl2 #65536,-192(fp)
4665noname.435:
4666 movzwl -178(fp),r0
4667 bicl2 #-65536,r0
4668 addl2 r0,-192(fp)
4669 bicl3 #-65536,-180(fp),r0
4670 ashl #16,r0,-184(fp)
4671 addl3 -184(fp),-188(fp),r0
4672 bicl3 #0,r0,-188(fp)
4673 cmpl -188(fp),-184(fp)
4674 bgequ noname.436
4675 incl -192(fp)
4676noname.436:
4677 movl -188(fp),r3
4678 movl -192(fp),r2
4679 bbc #31,r2,noname.437
4680 incl r7
4681noname.437:
4682 addl2 r2,r2
4683 bicl2 #0,r2
4684 bbc #31,r3,noname.438
4685 incl r2
4686noname.438:
4687 addl2 r3,r3
4688 bicl2 #0,r3
4689 addl2 r3,r9
4690 bicl2 #0,r9
4691 cmpl r9,r3
4692 bgequ noname.439
4693 incl r2
4694 bicl3 #0,r2,r0
4695 bneq noname.439
4696 incl r7
4697noname.439:
4698 addl2 r2,r8
4699 bicl2 #0,r8
4700 cmpl r8,r2
4701 bgequ noname.440
4702 incl r7
4703noname.440:
4704
4705 movl 8(ap),r0
4706 movzwl 22(r0),r2
4707 bicl3 #-65536,4(r0),r3
4708 movzwl 6(r0),r1
4709 bicl2 #-65536,r1
4710 bicl3 #-65536,20(r0),-204(fp)
4711 bicl3 #-65536,r2,-208(fp)
4712 mull3 r1,-204(fp),-196(fp)
4713 mull2 r3,-204(fp)
4714 mull3 r3,-208(fp),-200(fp)
4715 mull2 r1,-208(fp)
4716 addl3 -196(fp),-200(fp),r0
4717 bicl3 #0,r0,-196(fp)
4718 cmpl -196(fp),-200(fp)
4719 bgequ noname.441
4720 addl2 #65536,-208(fp)
4721noname.441:
4722 movzwl -194(fp),r0
4723 bicl2 #-65536,r0
4724 addl2 r0,-208(fp)
4725 bicl3 #-65536,-196(fp),r0
4726 ashl #16,r0,-200(fp)
4727 addl3 -200(fp),-204(fp),r0
4728 bicl3 #0,r0,-204(fp)
4729 cmpl -204(fp),-200(fp)
4730 bgequ noname.442
4731 incl -208(fp)
4732noname.442:
4733 movl -204(fp),r3
4734 movl -208(fp),r2
4735 bbc #31,r2,noname.443
4736 incl r7
4737noname.443:
4738 addl2 r2,r2
4739 bicl2 #0,r2
4740 bbc #31,r3,noname.444
4741 incl r2
4742noname.444:
4743 addl2 r3,r3
4744 bicl2 #0,r3
4745 addl2 r3,r9
4746 bicl2 #0,r9
4747 cmpl r9,r3
4748 bgequ noname.445
4749 incl r2
4750 bicl3 #0,r2,r0
4751 bneq noname.445
4752 incl r7
4753noname.445:
4754 addl2 r2,r8
4755 bicl2 #0,r8
4756 cmpl r8,r2
4757 bgequ noname.446
4758 incl r7
4759noname.446:
4760
4761 movl 8(ap),r0
4762 movzwl 26(r0),r2
4763 bicl3 #-65536,(r0),r3
4764 movzwl 2(r0),r1
4765 bicl2 #-65536,r1
4766 bicl3 #-65536,24(r0),-220(fp)
4767 bicl3 #-65536,r2,-224(fp)
4768 mull3 r1,-220(fp),-212(fp)
4769 mull2 r3,-220(fp)
4770 mull3 r3,-224(fp),-216(fp)
4771 mull2 r1,-224(fp)
4772 addl3 -212(fp),-216(fp),r0
4773 bicl3 #0,r0,-212(fp)
4774 cmpl -212(fp),-216(fp)
4775 bgequ noname.447
4776 addl2 #65536,-224(fp)
4777noname.447:
4778 movzwl -210(fp),r0
4779 bicl2 #-65536,r0
4780 addl2 r0,-224(fp)
4781 bicl3 #-65536,-212(fp),r0
4782 ashl #16,r0,-216(fp)
4783 addl3 -216(fp),-220(fp),r0
4784 bicl3 #0,r0,-220(fp)
4785 cmpl -220(fp),-216(fp)
4786 bgequ noname.448
4787 incl -224(fp)
4788noname.448:
4789 movl -220(fp),r3
4790 movl -224(fp),r2
4791 bbc #31,r2,noname.449
4792 incl r7
4793noname.449:
4794 addl2 r2,r2
4795 bicl2 #0,r2
4796 bbc #31,r3,noname.450
4797 incl r2
4798noname.450:
4799 addl2 r3,r3
4800 bicl2 #0,r3
4801 addl2 r3,r9
4802 bicl2 #0,r9
4803 cmpl r9,r3
4804 bgequ noname.451
4805 incl r2
4806 bicl3 #0,r2,r0
4807 bneq noname.451
4808 incl r7
4809noname.451:
4810 addl2 r2,r8
4811 bicl2 #0,r8
4812 cmpl r8,r2
4813 bgequ noname.452
4814 incl r7
4815noname.452:
4816
4817 movl 4(ap),r0
4818 movl r9,24(r0)
4819
4820 clrl r9
4821
4822 movl 8(ap),r0
4823 movzwl 30(r0),r2
4824 bicl3 #-65536,(r0),r3
4825 movzwl 2(r0),r1
4826 bicl2 #-65536,r1
4827 bicl3 #-65536,28(r0),-236(fp)
4828 bicl3 #-65536,r2,-240(fp)
4829 mull3 r1,-236(fp),-228(fp)
4830 mull2 r3,-236(fp)
4831 mull3 r3,-240(fp),-232(fp)
4832 mull2 r1,-240(fp)
4833 addl3 -228(fp),-232(fp),r0
4834 bicl3 #0,r0,-228(fp)
4835 cmpl -228(fp),-232(fp)
4836 bgequ noname.453
4837 addl2 #65536,-240(fp)
4838noname.453:
4839 movzwl -226(fp),r0
4840 bicl2 #-65536,r0
4841 addl2 r0,-240(fp)
4842 bicl3 #-65536,-228(fp),r0
4843 ashl #16,r0,-232(fp)
4844 addl3 -232(fp),-236(fp),r0
4845 bicl3 #0,r0,-236(fp)
4846 cmpl -236(fp),-232(fp)
4847 bgequ noname.454
4848 incl -240(fp)
4849noname.454:
4850 movl -236(fp),r3
4851 movl -240(fp),r2
4852 bbc #31,r2,noname.455
4853 incl r9
4854noname.455:
4855 addl2 r2,r2
4856 bicl2 #0,r2
4857 bbc #31,r3,noname.456
4858 incl r2
4859noname.456:
4860 addl2 r3,r3
4861 bicl2 #0,r3
4862 addl2 r3,r8
4863 bicl2 #0,r8
4864 cmpl r8,r3
4865 bgequ noname.457
4866 incl r2
4867 bicl3 #0,r2,r0
4868 bneq noname.457
4869 incl r9
4870noname.457:
4871 addl2 r2,r7
4872 bicl2 #0,r7
4873 cmpl r7,r2
4874 bgequ noname.458
4875 incl r9
4876noname.458:
4877
4878 movl 8(ap),r0
4879 movzwl 26(r0),r2
4880 bicl3 #-65536,4(r0),r3
4881 movzwl 6(r0),r1
4882 bicl2 #-65536,r1
4883 bicl3 #-65536,24(r0),-252(fp)
4884 bicl3 #-65536,r2,-256(fp)
4885 mull3 r1,-252(fp),-244(fp)
4886 mull2 r3,-252(fp)
4887 mull3 r3,-256(fp),-248(fp)
4888 mull2 r1,-256(fp)
4889 addl3 -244(fp),-248(fp),r0
4890 bicl3 #0,r0,-244(fp)
4891 cmpl -244(fp),-248(fp)
4892 bgequ noname.459
4893 addl2 #65536,-256(fp)
4894noname.459:
4895 movzwl -242(fp),r0
4896 bicl2 #-65536,r0
4897 addl2 r0,-256(fp)
4898 bicl3 #-65536,-244(fp),r0
4899 ashl #16,r0,-248(fp)
4900 addl3 -248(fp),-252(fp),r0
4901 bicl3 #0,r0,-252(fp)
4902 cmpl -252(fp),-248(fp)
4903 bgequ noname.460
4904 incl -256(fp)
4905noname.460:
4906 movl -252(fp),r3
4907 movl -256(fp),r2
4908 bbc #31,r2,noname.461
4909 incl r9
4910noname.461:
4911 addl2 r2,r2
4912 bicl2 #0,r2
4913 bbc #31,r3,noname.462
4914 incl r2
4915noname.462:
4916 addl2 r3,r3
4917 bicl2 #0,r3
4918 addl2 r3,r8
4919 bicl2 #0,r8
4920 cmpl r8,r3
4921 bgequ noname.463
4922 incl r2
4923 bicl3 #0,r2,r0
4924 bneq noname.463
4925 incl r9
4926noname.463:
4927 addl2 r2,r7
4928 bicl2 #0,r7
4929 cmpl r7,r2
4930 bgequ noname.464
4931 incl r9
4932noname.464:
4933
4934 movl 8(ap),r0
4935 movzwl 22(r0),r2
4936 bicl3 #-65536,8(r0),r3
4937 movzwl 10(r0),r1
4938 bicl2 #-65536,r1
4939 bicl3 #-65536,20(r0),-268(fp)
4940 bicl3 #-65536,r2,-272(fp)
4941 mull3 r1,-268(fp),-260(fp)
4942 mull2 r3,-268(fp)
4943 mull3 r3,-272(fp),-264(fp)
4944 mull2 r1,-272(fp)
4945 addl3 -260(fp),-264(fp),r0
4946 bicl3 #0,r0,-260(fp)
4947 cmpl -260(fp),-264(fp)
4948 bgequ noname.465
4949 addl2 #65536,-272(fp)
4950noname.465:
4951 movzwl -258(fp),r0
4952 bicl2 #-65536,r0
4953 addl2 r0,-272(fp)
4954 bicl3 #-65536,-260(fp),r0
4955 ashl #16,r0,-264(fp)
4956 addl3 -264(fp),-268(fp),r0
4957 bicl3 #0,r0,-268(fp)
4958 cmpl -268(fp),-264(fp)
4959 bgequ noname.466
4960 incl -272(fp)
4961noname.466:
4962 movl -268(fp),r3
4963 movl -272(fp),r2
4964 bbc #31,r2,noname.467
4965 incl r9
4966noname.467:
4967 addl2 r2,r2
4968 bicl2 #0,r2
4969 bbc #31,r3,noname.468
4970 incl r2
4971noname.468:
4972 addl2 r3,r3
4973 bicl2 #0,r3
4974 addl2 r3,r8
4975 bicl2 #0,r8
4976 cmpl r8,r3
4977 bgequ noname.469
4978 incl r2
4979 bicl3 #0,r2,r0
4980 bneq noname.469
4981 incl r9
4982noname.469:
4983 addl2 r2,r7
4984 bicl2 #0,r7
4985 cmpl r7,r2
4986 bgequ noname.470
4987 incl r9
4988noname.470:
4989
4990 movl 8(ap),r0
4991 movzwl 18(r0),r2
4992 bicl3 #-65536,12(r0),r3
4993 movzwl 14(r0),r1
4994 bicl2 #-65536,r1
4995 bicl3 #-65536,16(r0),-284(fp)
4996 bicl3 #-65536,r2,-288(fp)
4997 mull3 r1,-284(fp),-276(fp)
4998 mull2 r3,-284(fp)
4999 mull3 r3,-288(fp),-280(fp)
5000 mull2 r1,-288(fp)
5001 addl3 -276(fp),-280(fp),r0
5002 bicl3 #0,r0,-276(fp)
5003 cmpl -276(fp),-280(fp)
5004 bgequ noname.471
5005 addl2 #65536,-288(fp)
5006noname.471:
5007 movzwl -274(fp),r0
5008 bicl2 #-65536,r0
5009 addl2 r0,-288(fp)
5010 bicl3 #-65536,-276(fp),r0
5011 ashl #16,r0,-280(fp)
5012 addl3 -280(fp),-284(fp),r0
5013 bicl3 #0,r0,-284(fp)
5014 cmpl -284(fp),-280(fp)
5015 bgequ noname.472
5016 incl -288(fp)
5017noname.472:
5018 movl -284(fp),r3
5019 movl -288(fp),r2
5020 bbc #31,r2,noname.473
5021 incl r9
5022noname.473:
5023 addl2 r2,r2
5024 bicl2 #0,r2
5025 bbc #31,r3,noname.474
5026 incl r2
5027noname.474:
5028 addl2 r3,r3
5029 bicl2 #0,r3
5030 addl2 r3,r8
5031 bicl2 #0,r8
5032 cmpl r8,r3
5033 bgequ noname.475
5034 incl r2
5035 bicl3 #0,r2,r0
5036 bneq noname.475
5037 incl r9
5038noname.475:
5039 addl2 r2,r7
5040 bicl2 #0,r7
5041 cmpl r7,r2
5042 bgequ noname.476
5043 incl r9
5044noname.476:
5045
5046 movl 4(ap),r0
5047 movl r8,28(r0)
5048
5049 clrl r8
5050
5051 movl 8(ap),r3
5052 movl 16(r3),r4
5053 bicl3 #-65536,r4,r5
5054 extzv #16,#16,r4,r0
5055 bicl3 #-65536,r0,r4
5056 mull3 r5,r4,-292(fp)
5057 mull2 r5,r5
5058 mull2 r4,r4
5059 bicl3 #32767,-292(fp),r0
5060 extzv #15,#17,r0,r0
5061 addl2 r0,r4
5062 bicl3 #-65536,-292(fp),r0
5063 ashl #17,r0,-292(fp)
5064 addl2 -292(fp),r5
5065 bicl2 #0,r5
5066 cmpl r5,-292(fp)
5067 bgequ noname.477
5068 incl r4
5069noname.477:
5070 movl r5,r1
5071 movl r4,r2
5072 addl2 r1,r7
5073 bicl2 #0,r7
5074 cmpl r7,r1
5075 bgequ noname.478
5076 incl r2
5077noname.478:
5078 addl2 r2,r9
5079 bicl2 #0,r9
5080 cmpl r9,r2
5081 bgequ noname.479
5082 incl r8
5083noname.479:
5084
5085 bicl3 #-65536,20(r3),r4
5086 movzwl 22(r3),r1
5087 bicl2 #-65536,r1
5088 bicl3 #-65536,12(r3),r2
5089 movzwl 14(r3),r0
5090 bicl2 #-65536,r0
5091 movl r4,r6
5092 movl r1,r5
5093 mull3 r0,r6,-296(fp)
5094 mull2 r2,r6
5095 mull3 r2,r5,-300(fp)
5096 mull2 r0,r5
5097 addl3 -296(fp),-300(fp),r0
5098 bicl3 #0,r0,-296(fp)
5099 cmpl -296(fp),-300(fp)
5100 bgequ noname.480
5101 addl2 #65536,r5
5102noname.480:
5103 movzwl -294(fp),r0
5104 bicl2 #-65536,r0
5105 addl2 r0,r5
5106 bicl3 #-65536,-296(fp),r0
5107 ashl #16,r0,-300(fp)
5108 addl2 -300(fp),r6
5109 bicl2 #0,r6
5110 cmpl r6,-300(fp)
5111 bgequ noname.481
5112 incl r5
5113noname.481:
5114 movl r6,r3
5115 movl r5,r2
5116 bbc #31,r2,noname.482
5117 incl r8
5118noname.482:
5119 addl2 r2,r2
5120 bicl2 #0,r2
5121 bbc #31,r3,noname.483
5122 incl r2
5123noname.483:
5124 addl2 r3,r3
5125 bicl2 #0,r3
5126 addl2 r3,r7
5127 bicl2 #0,r7
5128 cmpl r7,r3
5129 bgequ noname.484
5130 incl r2
5131 bicl3 #0,r2,r0
5132 bneq noname.484
5133 incl r8
5134noname.484:
5135 addl2 r2,r9
5136 bicl2 #0,r9
5137 cmpl r9,r2
5138 bgequ noname.485
5139 incl r8
5140noname.485:
5141
5142 movl 8(ap),r0
5143 bicl3 #-65536,24(r0),r3
5144 movzwl 26(r0),r1
5145 bicl2 #-65536,r1
5146 bicl3 #-65536,8(r0),r2
5147 movzwl 10(r0),r0
5148 bicl2 #-65536,r0
5149 movl r3,r5
5150 movl r1,r4
5151 mull3 r0,r5,-304(fp)
5152 mull2 r2,r5
5153 mull3 r2,r4,-308(fp)
5154 mull2 r0,r4
5155 addl3 -304(fp),-308(fp),r0
5156 bicl3 #0,r0,-304(fp)
5157 cmpl -304(fp),-308(fp)
5158 bgequ noname.486
5159 addl2 #65536,r4
5160noname.486:
5161 movzwl -302(fp),r0
5162 bicl2 #-65536,r0
5163 addl2 r0,r4
5164 bicl3 #-65536,-304(fp),r0
5165 ashl #16,r0,-308(fp)
5166 addl2 -308(fp),r5
5167 bicl2 #0,r5
5168 cmpl r5,-308(fp)
5169 bgequ noname.487
5170 incl r4
5171noname.487:
5172 movl r5,r3
5173 movl r4,r2
5174 bbc #31,r2,noname.488
5175 incl r8
5176noname.488:
5177 addl2 r2,r2
5178 bicl2 #0,r2
5179 bbc #31,r3,noname.489
5180 incl r2
5181noname.489:
5182 addl2 r3,r3
5183 bicl2 #0,r3
5184 addl2 r3,r7
5185 bicl2 #0,r7
5186 cmpl r7,r3
5187 bgequ noname.490
5188 incl r2
5189 bicl3 #0,r2,r0
5190 bneq noname.490
5191 incl r8
5192noname.490:
5193 addl2 r2,r9
5194 bicl2 #0,r9
5195 cmpl r9,r2
5196 bgequ noname.491
5197 incl r8
5198noname.491:
5199
5200 movl 8(ap),r0
5201 bicl3 #-65536,28(r0),r3
5202 movzwl 30(r0),r1
5203 bicl2 #-65536,r1
5204 bicl3 #-65536,4(r0),r2
5205 movzwl 6(r0),r0
5206 bicl2 #-65536,r0
5207 movl r3,r5
5208 movl r1,r4
5209 mull3 r0,r5,-312(fp)
5210 mull2 r2,r5
5211 mull3 r2,r4,-316(fp)
5212 mull2 r0,r4
5213 addl3 -312(fp),-316(fp),r0
5214 bicl3 #0,r0,-312(fp)
5215 cmpl -312(fp),-316(fp)
5216 bgequ noname.492
5217 addl2 #65536,r4
5218noname.492:
5219 movzwl -310(fp),r0
5220 bicl2 #-65536,r0
5221 addl2 r0,r4
5222 bicl3 #-65536,-312(fp),r0
5223 ashl #16,r0,-316(fp)
5224 addl2 -316(fp),r5
5225 bicl2 #0,r5
5226 cmpl r5,-316(fp)
5227 bgequ noname.493
5228 incl r4
5229noname.493:
5230 movl r5,r3
5231 movl r4,r2
5232 bbc #31,r2,noname.494
5233 incl r8
5234noname.494:
5235 addl2 r2,r2
5236 bicl2 #0,r2
5237 bbc #31,r3,noname.495
5238 incl r2
5239noname.495:
5240 addl2 r3,r3
5241 bicl2 #0,r3
5242 addl2 r3,r7
5243 bicl2 #0,r7
5244 cmpl r7,r3
5245 bgequ noname.496
5246 incl r2
5247 bicl3 #0,r2,r0
5248 bneq noname.496
5249 incl r8
5250noname.496:
5251 addl2 r2,r9
5252 bicl2 #0,r9
5253 cmpl r9,r2
5254 bgequ noname.497
5255 incl r8
5256noname.497:
5257
5258 movl 4(ap),r0
5259 movl r7,32(r0)
5260
5261 clrl r7
5262
5263 movl 8(ap),r0
5264 bicl3 #-65536,28(r0),r3
5265 movzwl 30(r0),r2
5266 bicl3 #-65536,8(r0),r1
5267 movzwl 10(r0),r0
5268 bicl2 #-65536,r0
5269 movl r3,r4
5270 bicl3 #-65536,r2,-328(fp)
5271 mull3 r0,r4,-320(fp)
5272 mull2 r1,r4
5273 mull3 r1,-328(fp),-324(fp)
5274 mull2 r0,-328(fp)
5275 addl3 -320(fp),-324(fp),r0
5276 bicl3 #0,r0,-320(fp)
5277 cmpl -320(fp),-324(fp)
5278 bgequ noname.498
5279 addl2 #65536,-328(fp)
5280noname.498:
5281 movzwl -318(fp),r0
5282 bicl2 #-65536,r0
5283 addl2 r0,-328(fp)
5284 bicl3 #-65536,-320(fp),r0
5285 ashl #16,r0,-324(fp)
5286 addl2 -324(fp),r4
5287 bicl2 #0,r4
5288 cmpl r4,-324(fp)
5289 bgequ noname.499
5290 incl -328(fp)
5291noname.499:
5292 movl r4,r3
5293 movl -328(fp),r2
5294 bbc #31,r2,noname.500
5295 incl r7
5296noname.500:
5297 addl2 r2,r2
5298 bicl2 #0,r2
5299 bbc #31,r3,noname.501
5300 incl r2
5301noname.501:
5302 addl2 r3,r3
5303 bicl2 #0,r3
5304 addl2 r3,r9
5305 bicl2 #0,r9
5306 cmpl r9,r3
5307 bgequ noname.502
5308 incl r2
5309 bicl3 #0,r2,r0
5310 bneq noname.502
5311 incl r7
5312noname.502:
5313 addl2 r2,r8
5314 bicl2 #0,r8
5315 cmpl r8,r2
5316 bgequ noname.503
5317 incl r7
5318noname.503:
5319
5320 movl 8(ap),r0
5321 movzwl 26(r0),r2
5322 bicl3 #-65536,12(r0),r3
5323 movzwl 14(r0),r1
5324 bicl2 #-65536,r1
5325 bicl3 #-65536,24(r0),-340(fp)
5326 bicl3 #-65536,r2,-344(fp)
5327 mull3 r1,-340(fp),-332(fp)
5328 mull2 r3,-340(fp)
5329 mull3 r3,-344(fp),-336(fp)
5330 mull2 r1,-344(fp)
5331 addl3 -332(fp),-336(fp),r0
5332 bicl3 #0,r0,-332(fp)
5333 cmpl -332(fp),-336(fp)
5334 bgequ noname.504
5335 addl2 #65536,-344(fp)
5336noname.504:
5337 movzwl -330(fp),r0
5338 bicl2 #-65536,r0
5339 addl2 r0,-344(fp)
5340 bicl3 #-65536,-332(fp),r0
5341 ashl #16,r0,-336(fp)
5342 addl3 -336(fp),-340(fp),r0
5343 bicl3 #0,r0,-340(fp)
5344 cmpl -340(fp),-336(fp)
5345 bgequ noname.505
5346 incl -344(fp)
5347noname.505:
5348 movl -340(fp),r3
5349 movl -344(fp),r2
5350 bbc #31,r2,noname.506
5351 incl r7
5352noname.506:
5353 addl2 r2,r2
5354 bicl2 #0,r2
5355 bbc #31,r3,noname.507
5356 incl r2
5357noname.507:
5358 addl2 r3,r3
5359 bicl2 #0,r3
5360 addl2 r3,r9
5361 bicl2 #0,r9
5362 cmpl r9,r3
5363 bgequ noname.508
5364 incl r2
5365 bicl3 #0,r2,r0
5366 bneq noname.508
5367 incl r7
5368noname.508:
5369 addl2 r2,r8
5370 bicl2 #0,r8
5371 cmpl r8,r2
5372 bgequ noname.509
5373 incl r7
5374noname.509:
5375
5376 movl 8(ap),r0
5377 movzwl 22(r0),r2
5378 bicl3 #-65536,16(r0),r3
5379 movzwl 18(r0),r1
5380 bicl2 #-65536,r1
5381 bicl3 #-65536,20(r0),-356(fp)
5382 bicl3 #-65536,r2,-360(fp)
5383 mull3 r1,-356(fp),-348(fp)
5384 mull2 r3,-356(fp)
5385 mull3 r3,-360(fp),-352(fp)
5386 mull2 r1,-360(fp)
5387 addl3 -348(fp),-352(fp),r0
5388 bicl3 #0,r0,-348(fp)
5389 cmpl -348(fp),-352(fp)
5390 bgequ noname.510
5391 addl2 #65536,-360(fp)
5392noname.510:
5393 movzwl -346(fp),r0
5394 bicl2 #-65536,r0
5395 addl2 r0,-360(fp)
5396 bicl3 #-65536,-348(fp),r0
5397 ashl #16,r0,-352(fp)
5398 addl3 -352(fp),-356(fp),r0
5399 bicl3 #0,r0,-356(fp)
5400 cmpl -356(fp),-352(fp)
5401 bgequ noname.511
5402 incl -360(fp)
5403noname.511:
5404 movl -356(fp),r3
5405 movl -360(fp),r2
5406 bbc #31,r2,noname.512
5407 incl r7
5408noname.512:
5409 addl2 r2,r2
5410 bicl2 #0,r2
5411 bbc #31,r3,noname.513
5412 incl r2
5413noname.513:
5414 addl2 r3,r3
5415 bicl2 #0,r3
5416 addl2 r3,r9
5417 bicl2 #0,r9
5418 cmpl r9,r3
5419 bgequ noname.514
5420 incl r2
5421 bicl3 #0,r2,r0
5422 bneq noname.514
5423 incl r7
5424noname.514:
5425 addl2 r2,r8
5426 bicl2 #0,r8
5427 cmpl r8,r2
5428 bgequ noname.515
5429 incl r7
5430noname.515:
5431
5432 movl 4(ap),r0
5433 movl r9,36(r0)
5434
5435 clrl r9
5436
5437 movl 8(ap),r3
5438 movl 20(r3),r4
5439 bicl3 #-65536,r4,-364(fp)
5440 extzv #16,#16,r4,r0
5441 bicl3 #-65536,r0,r4
5442 movl -364(fp),r0
5443 mull3 r0,r4,-368(fp)
5444 mull3 r0,r0,-364(fp)
5445 mull2 r4,r4
5446 bicl3 #32767,-368(fp),r0
5447 extzv #15,#17,r0,r0
5448 addl2 r0,r4
5449 bicl3 #-65536,-368(fp),r0
5450 ashl #17,r0,-368(fp)
5451 addl3 -364(fp),-368(fp),r0
5452 bicl3 #0,r0,-364(fp)
5453 cmpl -364(fp),-368(fp)
5454 bgequ noname.516
5455 incl r4
5456noname.516:
5457 movl -364(fp),r1
5458 movl r4,r2
5459 addl2 r1,r8
5460 bicl2 #0,r8
5461 cmpl r8,r1
5462 bgequ noname.517
5463 incl r2
5464noname.517:
5465 addl2 r2,r7
5466 bicl2 #0,r7
5467 cmpl r7,r2
5468 bgequ noname.518
5469 incl r9
5470noname.518:
5471
5472 bicl3 #-65536,24(r3),r4
5473 movzwl 26(r3),r1
5474 bicl2 #-65536,r1
5475 bicl3 #-65536,16(r3),r2
5476 movzwl 18(r3),r0
5477 bicl2 #-65536,r0
5478 movl r4,r6
5479 movl r1,r5
5480 mull3 r0,r6,-372(fp)
5481 mull2 r2,r6
5482 mull3 r2,r5,-376(fp)
5483 mull2 r0,r5
5484 addl3 -372(fp),-376(fp),r0
5485 bicl3 #0,r0,-372(fp)
5486 cmpl -372(fp),-376(fp)
5487 bgequ noname.519
5488 addl2 #65536,r5
5489noname.519:
5490 movzwl -370(fp),r0
5491 bicl2 #-65536,r0
5492 addl2 r0,r5
5493 bicl3 #-65536,-372(fp),r0
5494 ashl #16,r0,-376(fp)
5495 addl2 -376(fp),r6
5496 bicl2 #0,r6
5497 cmpl r6,-376(fp)
5498 bgequ noname.520
5499 incl r5
5500noname.520:
5501 movl r6,r3
5502 movl r5,r2
5503 bbc #31,r2,noname.521
5504 incl r9
5505noname.521:
5506 addl2 r2,r2
5507 bicl2 #0,r2
5508 bbc #31,r3,noname.522
5509 incl r2
5510noname.522:
5511 addl2 r3,r3
5512 bicl2 #0,r3
5513 addl2 r3,r8
5514 bicl2 #0,r8
5515 cmpl r8,r3
5516 bgequ noname.523
5517 incl r2
5518 bicl3 #0,r2,r0
5519 bneq noname.523
5520 incl r9
5521noname.523:
5522 addl2 r2,r7
5523 bicl2 #0,r7
5524 cmpl r7,r2
5525 bgequ noname.524
5526 incl r9
5527noname.524:
5528
5529 movl 8(ap),r0
5530 bicl3 #-65536,28(r0),r3
5531 movzwl 30(r0),r1
5532 bicl2 #-65536,r1
5533 bicl3 #-65536,12(r0),r2
5534 movzwl 14(r0),r0
5535 bicl2 #-65536,r0
5536 movl r3,r5
5537 movl r1,r4
5538 mull3 r0,r5,-380(fp)
5539 mull2 r2,r5
5540 mull3 r2,r4,-384(fp)
5541 mull2 r0,r4
5542 addl3 -380(fp),-384(fp),r0
5543 bicl3 #0,r0,-380(fp)
5544 cmpl -380(fp),-384(fp)
5545 bgequ noname.525
5546 addl2 #65536,r4
5547noname.525:
5548 movzwl -378(fp),r0
5549 bicl2 #-65536,r0
5550 addl2 r0,r4
5551 bicl3 #-65536,-380(fp),r0
5552 ashl #16,r0,-384(fp)
5553 addl2 -384(fp),r5
5554 bicl2 #0,r5
5555 cmpl r5,-384(fp)
5556 bgequ noname.526
5557 incl r4
5558noname.526:
5559 movl r5,r3
5560 movl r4,r2
5561 bbc #31,r2,noname.527
5562 incl r9
5563noname.527:
5564 addl2 r2,r2
5565 bicl2 #0,r2
5566 bbc #31,r3,noname.528
5567 incl r2
5568noname.528:
5569 addl2 r3,r3
5570 bicl2 #0,r3
5571 addl2 r3,r8
5572 bicl2 #0,r8
5573 cmpl r8,r3
5574 bgequ noname.529
5575 incl r2
5576 bicl3 #0,r2,r0
5577 bneq noname.529
5578 incl r9
5579noname.529:
5580 addl2 r2,r7
5581 bicl2 #0,r7
5582 cmpl r7,r2
5583 bgequ noname.530
5584 incl r9
5585noname.530:
5586 movl 4(ap),r0
5587 movl r8,40(r0)
5588
5589 clrl r8
5590
5591 movl 8(ap),r0
5592 bicl3 #-65536,28(r0),r3
5593 movzwl 30(r0),r1
5594 bicl2 #-65536,r1
5595 bicl3 #-65536,16(r0),r2
5596 movzwl 18(r0),r0
5597 bicl2 #-65536,r0
5598 movl r3,r5
5599 movl r1,r4
5600 mull3 r0,r5,-388(fp)
5601 mull2 r2,r5
5602 mull3 r2,r4,-392(fp)
5603 mull2 r0,r4
5604 addl3 -388(fp),-392(fp),r0
5605 bicl3 #0,r0,-388(fp)
5606 cmpl -388(fp),-392(fp)
5607 bgequ noname.531
5608 addl2 #65536,r4
5609noname.531:
5610 movzwl -386(fp),r0
5611 bicl2 #-65536,r0
5612 addl2 r0,r4
5613 bicl3 #-65536,-388(fp),r0
5614 ashl #16,r0,-392(fp)
5615 addl2 -392(fp),r5
5616 bicl2 #0,r5
5617 cmpl r5,-392(fp)
5618 bgequ noname.532
5619 incl r4
5620noname.532:
5621 movl r5,r3
5622 movl r4,r2
5623 bbc #31,r2,noname.533
5624 incl r8
5625noname.533:
5626 addl2 r2,r2
5627 bicl2 #0,r2
5628 bbc #31,r3,noname.534
5629 incl r2
5630noname.534:
5631 addl2 r3,r3
5632 bicl2 #0,r3
5633 addl2 r3,r7
5634 bicl2 #0,r7
5635 cmpl r7,r3
5636 bgequ noname.535
5637 incl r2
5638 bicl3 #0,r2,r0
5639 bneq noname.535
5640 incl r8
5641noname.535:
5642 addl2 r2,r9
5643 bicl2 #0,r9
5644 cmpl r9,r2
5645 bgequ noname.536
5646 incl r8
5647noname.536:
5648
5649 movl 8(ap),r0
5650 bicl3 #-65536,24(r0),r3
5651 movzwl 26(r0),r1
5652 bicl2 #-65536,r1
5653 bicl3 #-65536,20(r0),r2
5654 movzwl 22(r0),r0
5655 bicl2 #-65536,r0
5656 movl r3,r5
5657 movl r1,r4
5658 mull3 r0,r5,-396(fp)
5659 mull2 r2,r5
5660 mull3 r2,r4,-400(fp)
5661 mull2 r0,r4
5662 addl3 -396(fp),-400(fp),r0
5663 bicl3 #0,r0,-396(fp)
5664 cmpl -396(fp),-400(fp)
5665 bgequ noname.537
5666 addl2 #65536,r4
5667noname.537:
5668 movzwl -394(fp),r0
5669 bicl2 #-65536,r0
5670 addl2 r0,r4
5671 bicl3 #-65536,-396(fp),r0
5672 ashl #16,r0,-400(fp)
5673 addl2 -400(fp),r5
5674 bicl2 #0,r5
5675 cmpl r5,-400(fp)
5676 bgequ noname.538
5677 incl r4
5678noname.538:
5679 movl r5,r3
5680 movl r4,r2
5681 bbc #31,r2,noname.539
5682 incl r8
5683noname.539:
5684 addl2 r2,r2
5685 bicl2 #0,r2
5686 bbc #31,r3,noname.540
5687 incl r2
5688noname.540:
5689 addl2 r3,r3
5690 bicl2 #0,r3
5691 addl2 r3,r7
5692 bicl2 #0,r7
5693 cmpl r7,r3
5694 bgequ noname.541
5695 incl r2
5696 bicl3 #0,r2,r0
5697 bneq noname.541
5698 incl r8
5699noname.541:
5700 addl2 r2,r9
5701 bicl2 #0,r9
5702 cmpl r9,r2
5703 bgequ noname.542
5704 incl r8
5705noname.542:
5706
5707 movl 4(ap),r0
5708 movl r7,44(r0)
5709
5710 clrl r7
5711
5712 movl 8(ap),r3
5713 movl 24(r3),r4
5714 bicl3 #-65536,r4,r5
5715 extzv #16,#16,r4,r0
5716 bicl3 #-65536,r0,r4
5717 mull3 r5,r4,-404(fp)
5718 mull2 r5,r5
5719 mull2 r4,r4
5720 bicl3 #32767,-404(fp),r0
5721 extzv #15,#17,r0,r0
5722 addl2 r0,r4
5723 bicl3 #-65536,-404(fp),r0
5724 ashl #17,r0,-404(fp)
5725 addl2 -404(fp),r5
5726 bicl2 #0,r5
5727 cmpl r5,-404(fp)
5728 bgequ noname.543
5729 incl r4
5730noname.543:
5731 movl r5,r1
5732 movl r4,r2
5733 addl2 r1,r9
5734 bicl2 #0,r9
5735 cmpl r9,r1
5736 bgequ noname.544
5737 incl r2
5738noname.544:
5739 addl2 r2,r8
5740 bicl2 #0,r8
5741 cmpl r8,r2
5742 bgequ noname.545
5743 incl r7
5744noname.545:
5745
5746 movzwl 30(r3),r2
5747 bicl3 #-65536,20(r3),r1
5748 movzwl 22(r3),r0
5749 bicl2 #-65536,r0
5750 bicl3 #-65536,28(r3),-416(fp)
5751 bicl3 #-65536,r2,-420(fp)
5752 mull3 r0,-416(fp),-408(fp)
5753 mull2 r1,-416(fp)
5754 mull3 r1,-420(fp),-412(fp)
5755 mull2 r0,-420(fp)
5756 addl3 -408(fp),-412(fp),r0
5757 bicl3 #0,r0,-408(fp)
5758 cmpl -408(fp),-412(fp)
5759 bgequ noname.546
5760 addl2 #65536,-420(fp)
5761noname.546:
5762 movzwl -406(fp),r0
5763 bicl2 #-65536,r0
5764 addl2 r0,-420(fp)
5765 bicl3 #-65536,-408(fp),r0
5766 ashl #16,r0,-412(fp)
5767 addl3 -412(fp),-416(fp),r0
5768 bicl3 #0,r0,-416(fp)
5769 cmpl -416(fp),-412(fp)
5770 bgequ noname.547
5771 incl -420(fp)
5772noname.547:
5773 movl -416(fp),r3
5774 movl -420(fp),r2
5775 bbc #31,r2,noname.548
5776 incl r7
5777noname.548:
5778 addl2 r2,r2
5779 bicl2 #0,r2
5780 bbc #31,r3,noname.549
5781 incl r2
5782noname.549:
5783 addl2 r3,r3
5784 bicl2 #0,r3
5785 addl2 r3,r9
5786 bicl2 #0,r9
5787 cmpl r9,r3
5788 bgequ noname.550
5789 incl r2
5790 bicl3 #0,r2,r0
5791 bneq noname.550
5792 incl r7
5793noname.550:
5794 addl2 r2,r8
5795 bicl2 #0,r8
5796 cmpl r8,r2
5797 bgequ noname.551
5798 incl r7
5799noname.551:
5800
5801 movl 4(ap),r0
5802 movl r9,48(r0)
5803
5804 clrl r9
5805
5806 movl 8(ap),r0
5807 movzwl 30(r0),r2
5808 bicl3 #-65536,24(r0),r3
5809 movzwl 26(r0),r1
5810 bicl2 #-65536,r1
5811 bicl3 #-65536,28(r0),-432(fp)
5812 bicl3 #-65536,r2,-436(fp)
5813 mull3 r1,-432(fp),-424(fp)
5814 mull2 r3,-432(fp)
5815 mull3 r3,-436(fp),-428(fp)
5816 mull2 r1,-436(fp)
5817 addl3 -424(fp),-428(fp),r0
5818 bicl3 #0,r0,-424(fp)
5819 cmpl -424(fp),-428(fp)
5820 bgequ noname.552
5821 addl2 #65536,-436(fp)
5822noname.552:
5823 movzwl -422(fp),r0
5824 bicl2 #-65536,r0
5825 addl2 r0,-436(fp)
5826 bicl3 #-65536,-424(fp),r0
5827 ashl #16,r0,-428(fp)
5828 addl3 -428(fp),-432(fp),r0
5829 bicl3 #0,r0,-432(fp)
5830 cmpl -432(fp),-428(fp)
5831 bgequ noname.553
5832 incl -436(fp)
5833noname.553:
5834 movl -432(fp),r3
5835 movl -436(fp),r2
5836 bbc #31,r2,noname.554
5837 incl r9
5838noname.554:
5839 addl2 r2,r2
5840 bicl2 #0,r2
5841 bbc #31,r3,noname.555
5842 incl r2
5843noname.555:
5844 addl2 r3,r3
5845 bicl2 #0,r3
5846 addl2 r3,r8
5847 bicl2 #0,r8
5848 cmpl r8,r3
5849 bgequ noname.556
5850 incl r2
5851 bicl3 #0,r2,r0
5852 bneq noname.556
5853 incl r9
5854noname.556:
5855 addl2 r2,r7
5856 bicl2 #0,r7
5857 cmpl r7,r2
5858 bgequ noname.557
5859 incl r9
5860noname.557:
5861
5862 movl 4(ap),r4
5863 movl r8,52(r4)
5864
5865 clrl r8
5866
5867 movl 8(ap),r0
5868 movl 28(r0),r3
5869 bicl3 #-65536,r3,-440(fp)
5870 extzv #16,#16,r3,r0
5871 bicl3 #-65536,r0,r3
5872 movl -440(fp),r0
5873 mull3 r0,r3,-444(fp)
5874 mull3 r0,r0,-440(fp)
5875 mull2 r3,r3
5876 bicl3 #32767,-444(fp),r0
5877 extzv #15,#17,r0,r0
5878 addl2 r0,r3
5879 bicl3 #-65536,-444(fp),r0
5880 ashl #17,r0,-444(fp)
5881 addl3 -440(fp),-444(fp),r0
5882 bicl3 #0,r0,-440(fp)
5883 cmpl -440(fp),-444(fp)
5884 bgequ noname.558
5885 incl r3
5886noname.558:
5887 movl -440(fp),r1
5888 movl r3,r2
5889 addl2 r1,r7
5890 bicl2 #0,r7
5891 cmpl r7,r1
5892 bgequ noname.559
5893 incl r2
5894noname.559:
5895 addl2 r2,r9
5896 bicl2 #0,r9
5897 cmpl r9,r2
5898 bgequ noname.560
5899 incl r8
5900noname.560:
5901
5902 movl r7,56(r4)
5903
5904 movl r9,60(r4)
5905
5906 ret
5907
5908
5909
5910;r=4 ;(AP)
5911;a=8 ;(AP)
5912;b=12 ;(AP)
5913;n=16 ;(AP) n by value (input)
5914
5915 .psect code,nowrt
5916
5917.entry BN_SQR_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
5918 subl2 #44,sp
5919
5920 clrq r8
5921
5922 clrl r10
5923
5924 movl 8(ap),r5
5925 movl (r5),r3
5926 bicl3 #-65536,r3,r4
5927 extzv #16,#16,r3,r0
5928 bicl3 #-65536,r0,r3
5929 mull3 r4,r3,-4(fp)
5930 mull2 r4,r4
5931 mull2 r3,r3
5932 bicl3 #32767,-4(fp),r0
5933 extzv #15,#17,r0,r0
5934 addl2 r0,r3
5935 bicl3 #-65536,-4(fp),r0
5936 ashl #17,r0,-4(fp)
5937 addl2 -4(fp),r4
5938 bicl2 #0,r4
5939 cmpl r4,-4(fp)
5940 bgequ noname.563
5941 incl r3
5942noname.563:
5943 movl r4,r1
5944 movl r3,r2
5945 addl2 r1,r9
5946 bicl2 #0,r9
5947 cmpl r9,r1
5948 bgequ noname.564
5949 incl r2
5950noname.564:
5951 addl2 r2,r8
5952 bicl2 #0,r8
5953 cmpl r8,r2
5954 bgequ noname.565
5955 incl r10
5956noname.565:
5957
5958 movl r9,@4(ap)
5959
5960 clrl r9
5961
5962 bicl3 #-65536,4(r5),r3
5963 movzwl 6(r5),r1
5964 bicl2 #-65536,r1
5965 bicl3 #-65536,(r5),r2
5966 movzwl 2(r5),r0
5967 bicl2 #-65536,r0
5968 movl r3,r6
5969 movl r1,r4
5970 mull3 r0,r6,-8(fp)
5971 mull2 r2,r6
5972 mull2 r4,r2
5973 mull2 r0,r4
5974 addl3 -8(fp),r2,r0
5975 bicl3 #0,r0,-8(fp)
5976 cmpl -8(fp),r2
5977 bgequ noname.566
5978 addl2 #65536,r4
5979noname.566:
5980 movzwl -6(fp),r0
5981 bicl2 #-65536,r0
5982 addl2 r0,r4
5983 bicl3 #-65536,-8(fp),r0
5984 ashl #16,r0,r1
5985 addl2 r1,r6
5986 bicl2 #0,r6
5987 cmpl r6,r1
5988 bgequ noname.567
5989 incl r4
5990noname.567:
5991 movl r6,r3
5992 movl r4,r2
5993 bbc #31,r2,noname.568
5994 incl r9
5995noname.568:
5996 addl2 r2,r2
5997 bicl2 #0,r2
5998 bbc #31,r3,noname.569
5999 incl r2
6000noname.569:
6001 addl2 r3,r3
6002 bicl2 #0,r3
6003 addl2 r3,r8
6004 bicl2 #0,r8
6005 cmpl r8,r3
6006 bgequ noname.570
6007 incl r2
6008 bicl3 #0,r2,r0
6009 bneq noname.570
6010 incl r9
6011noname.570:
6012 addl2 r2,r10
6013 bicl2 #0,r10
6014 cmpl r10,r2
6015 bgequ noname.571
6016 incl r9
6017noname.571:
6018
6019 movl 4(ap),r0
6020 movl r8,4(r0)
6021
6022 clrl r8
6023
6024 movl 8(ap),r4
6025 movl 4(r4),r3
6026 bicl3 #-65536,r3,r5
6027 extzv #16,#16,r3,r0
6028 bicl3 #-65536,r0,r3
6029 mull3 r5,r3,r1
6030 mull2 r5,r5
6031 mull2 r3,r3
6032 bicl3 #32767,r1,r0
6033 extzv #15,#17,r0,r0
6034 addl2 r0,r3
6035 bicl2 #-65536,r1
6036 ashl #17,r1,r1
6037 addl2 r1,r5
6038 bicl2 #0,r5
6039 cmpl r5,r1
6040 bgequ noname.572
6041 incl r3
6042noname.572:
6043 movl r5,r1
6044 movl r3,r2
6045 addl2 r1,r10
6046 bicl2 #0,r10
6047 cmpl r10,r1
6048 bgequ noname.573
6049 incl r2
6050noname.573:
6051 addl2 r2,r9
6052 bicl2 #0,r9
6053 cmpl r9,r2
6054 bgequ noname.574
6055 incl r8
6056noname.574:
6057
6058 bicl3 #-65536,8(r4),r3
6059 movzwl 10(r4),r1
6060 bicl2 #-65536,r1
6061 bicl3 #-65536,(r4),r2
6062 movzwl 2(r4),r0
6063 bicl2 #-65536,r0
6064 movl r3,r6
6065 movl r1,r5
6066 mull3 r0,r6,r7
6067 mull2 r2,r6
6068 mull2 r5,r2
6069 mull2 r0,r5
6070 addl2 r2,r7
6071 bicl2 #0,r7
6072 cmpl r7,r2
6073 bgequ noname.575
6074 addl2 #65536,r5
6075noname.575:
6076 extzv #16,#16,r7,r0
6077 bicl2 #-65536,r0
6078 addl2 r0,r5
6079 bicl3 #-65536,r7,r0
6080 ashl #16,r0,r1
6081 addl2 r1,r6
6082 bicl2 #0,r6
6083 cmpl r6,r1
6084 bgequ noname.576
6085 incl r5
6086noname.576:
6087 movl r6,r3
6088 movl r5,r2
6089 bbc #31,r2,noname.577
6090 incl r8
6091noname.577:
6092 addl2 r2,r2
6093 bicl2 #0,r2
6094 bbc #31,r3,noname.578
6095 incl r2
6096noname.578:
6097 addl2 r3,r3
6098 bicl2 #0,r3
6099 addl2 r3,r10
6100 bicl2 #0,r10
6101 cmpl r10,r3
6102 bgequ noname.579
6103 incl r2
6104 bicl3 #0,r2,r0
6105 bneq noname.579
6106 incl r8
6107noname.579:
6108 addl2 r2,r9
6109 bicl2 #0,r9
6110 cmpl r9,r2
6111 bgequ noname.580
6112 incl r8
6113noname.580:
6114
6115 movl 4(ap),r0
6116 movl r10,8(r0)
6117
6118 clrl r10
6119
6120 movl 8(ap),r0
6121 bicl3 #-65536,12(r0),r3
6122 movzwl 14(r0),r1
6123 bicl2 #-65536,r1
6124 bicl3 #-65536,(r0),r2
6125 movzwl 2(r0),r0
6126 bicl2 #-65536,r0
6127 movl r3,r5
6128 movl r1,r4
6129 mull3 r0,r5,r6
6130 mull2 r2,r5
6131 mull3 r2,r4,-12(fp)
6132 mull2 r0,r4
6133 addl2 -12(fp),r6
6134 bicl2 #0,r6
6135 cmpl r6,-12(fp)
6136 bgequ noname.581
6137 addl2 #65536,r4
6138noname.581:
6139 extzv #16,#16,r6,r0
6140 bicl2 #-65536,r0
6141 addl2 r0,r4
6142 bicl3 #-65536,r6,r0
6143 ashl #16,r0,-12(fp)
6144 addl2 -12(fp),r5
6145 bicl2 #0,r5
6146 cmpl r5,-12(fp)
6147 bgequ noname.582
6148 incl r4
6149noname.582:
6150 movl r5,r3
6151 movl r4,r2
6152 bbc #31,r2,noname.583
6153 incl r10
6154noname.583:
6155 addl2 r2,r2
6156 bicl2 #0,r2
6157 bbc #31,r3,noname.584
6158 incl r2
6159noname.584:
6160 addl2 r3,r3
6161 bicl2 #0,r3
6162 addl2 r3,r9
6163 bicl2 #0,r9
6164 cmpl r9,r3
6165 bgequ noname.585
6166 incl r2
6167 bicl3 #0,r2,r0
6168 bneq noname.585
6169 incl r10
6170noname.585:
6171 addl2 r2,r8
6172 bicl2 #0,r8
6173 cmpl r8,r2
6174 bgequ noname.586
6175 incl r10
6176noname.586:
6177
6178 movl 8(ap),r0
6179 bicl3 #-65536,8(r0),r3
6180 movzwl 10(r0),r1
6181 bicl2 #-65536,r1
6182 bicl3 #-65536,4(r0),r2
6183 movzwl 6(r0),r0
6184 bicl2 #-65536,r0
6185 movl r3,r5
6186 movl r1,r4
6187 mull3 r0,r5,-16(fp)
6188 mull2 r2,r5
6189 mull3 r2,r4,-20(fp)
6190 mull2 r0,r4
6191 addl3 -16(fp),-20(fp),r0
6192 bicl3 #0,r0,-16(fp)
6193 cmpl -16(fp),-20(fp)
6194 bgequ noname.587
6195 addl2 #65536,r4
6196noname.587:
6197 movzwl -14(fp),r0
6198 bicl2 #-65536,r0
6199 addl2 r0,r4
6200 bicl3 #-65536,-16(fp),r0
6201 ashl #16,r0,-20(fp)
6202 addl2 -20(fp),r5
6203 bicl2 #0,r5
6204 cmpl r5,-20(fp)
6205 bgequ noname.588
6206 incl r4
6207noname.588:
6208 movl r5,r3
6209 movl r4,r2
6210 bbc #31,r2,noname.589
6211 incl r10
6212noname.589:
6213 addl2 r2,r2
6214 bicl2 #0,r2
6215 bbc #31,r3,noname.590
6216 incl r2
6217noname.590:
6218 addl2 r3,r3
6219 bicl2 #0,r3
6220 addl2 r3,r9
6221 bicl2 #0,r9
6222 cmpl r9,r3
6223 bgequ noname.591
6224 incl r2
6225 bicl3 #0,r2,r0
6226 bneq noname.591
6227 incl r10
6228noname.591:
6229 addl2 r2,r8
6230 bicl2 #0,r8
6231 cmpl r8,r2
6232 bgequ noname.592
6233 incl r10
6234noname.592:
6235 movl 4(ap),r0
6236 movl r9,12(r0)
6237
6238 clrl r9
6239
6240 movl 8(ap),r3
6241 movl 8(r3),r4
6242 bicl3 #-65536,r4,r5
6243 extzv #16,#16,r4,r0
6244 bicl3 #-65536,r0,r4
6245 mull3 r5,r4,-24(fp)
6246 mull2 r5,r5
6247 mull2 r4,r4
6248 bicl3 #32767,-24(fp),r0
6249 extzv #15,#17,r0,r0
6250 addl2 r0,r4
6251 bicl3 #-65536,-24(fp),r0
6252 ashl #17,r0,-24(fp)
6253 addl2 -24(fp),r5
6254 bicl2 #0,r5
6255 cmpl r5,-24(fp)
6256 bgequ noname.593
6257 incl r4
6258noname.593:
6259 movl r5,r1
6260 movl r4,r2
6261 addl2 r1,r8
6262 bicl2 #0,r8
6263 cmpl r8,r1
6264 bgequ noname.594
6265 incl r2
6266noname.594:
6267 addl2 r2,r10
6268 bicl2 #0,r10
6269 cmpl r10,r2
6270 bgequ noname.595
6271 incl r9
6272noname.595:
6273
6274 bicl3 #-65536,12(r3),r4
6275 movzwl 14(r3),r1
6276 bicl2 #-65536,r1
6277 bicl3 #-65536,4(r3),r2
6278 movzwl 6(r3),r0
6279 bicl2 #-65536,r0
6280 movl r4,r6
6281 movl r1,r5
6282 mull3 r0,r6,-28(fp)
6283 mull2 r2,r6
6284 mull3 r2,r5,-32(fp)
6285 mull2 r0,r5
6286 addl3 -28(fp),-32(fp),r0
6287 bicl3 #0,r0,-28(fp)
6288 cmpl -28(fp),-32(fp)
6289 bgequ noname.596
6290 addl2 #65536,r5
6291noname.596:
6292 movzwl -26(fp),r0
6293 bicl2 #-65536,r0
6294 addl2 r0,r5
6295 bicl3 #-65536,-28(fp),r0
6296 ashl #16,r0,-32(fp)
6297 addl2 -32(fp),r6
6298 bicl2 #0,r6
6299 cmpl r6,-32(fp)
6300 bgequ noname.597
6301 incl r5
6302noname.597:
6303 movl r6,r3
6304 movl r5,r2
6305 bbc #31,r2,noname.598
6306 incl r9
6307noname.598:
6308 addl2 r2,r2
6309 bicl2 #0,r2
6310 bbc #31,r3,noname.599
6311 incl r2
6312noname.599:
6313 addl2 r3,r3
6314 bicl2 #0,r3
6315 addl2 r3,r8
6316 bicl2 #0,r8
6317 cmpl r8,r3
6318 bgequ noname.600
6319 incl r2
6320 bicl3 #0,r2,r0
6321 bneq noname.600
6322 incl r9
6323noname.600:
6324 addl2 r2,r10
6325 bicl2 #0,r10
6326 cmpl r10,r2
6327 bgequ noname.601
6328 incl r9
6329noname.601:
6330
6331 movl 4(ap),r0
6332 movl r8,16(r0)
6333
6334 clrl r8
6335
6336 movl 8(ap),r0
6337 bicl3 #-65536,12(r0),r3
6338 movzwl 14(r0),r1
6339 bicl2 #-65536,r1
6340 bicl3 #-65536,8(r0),r2
6341 movzwl 10(r0),r0
6342 bicl2 #-65536,r0
6343 movl r3,r5
6344 movl r1,r4
6345 mull3 r0,r5,-36(fp)
6346 mull2 r2,r5
6347 mull3 r2,r4,-40(fp)
6348 mull2 r0,r4
6349 addl3 -36(fp),-40(fp),r0
6350 bicl3 #0,r0,-36(fp)
6351 cmpl -36(fp),-40(fp)
6352 bgequ noname.602
6353 addl2 #65536,r4
6354noname.602:
6355 movzwl -34(fp),r0
6356 bicl2 #-65536,r0
6357 addl2 r0,r4
6358 bicl3 #-65536,-36(fp),r0
6359 ashl #16,r0,-40(fp)
6360 addl2 -40(fp),r5
6361 bicl2 #0,r5
6362 cmpl r5,-40(fp)
6363 bgequ noname.603
6364 incl r4
6365noname.603:
6366 movl r5,r3
6367 movl r4,r2
6368 bbc #31,r2,noname.604
6369 incl r8
6370noname.604:
6371 addl2 r2,r2
6372 bicl2 #0,r2
6373 bbc #31,r3,noname.605
6374 incl r2
6375noname.605:
6376 addl2 r3,r3
6377 bicl2 #0,r3
6378 addl2 r3,r10
6379 bicl2 #0,r10
6380 cmpl r10,r3
6381 bgequ noname.606
6382 incl r2
6383 bicl3 #0,r2,r0
6384 bneq noname.606
6385 incl r8
6386noname.606:
6387 addl2 r2,r9
6388 bicl2 #0,r9
6389 cmpl r9,r2
6390 bgequ noname.607
6391 incl r8
6392noname.607:
6393
6394 movl 4(ap),r4
6395 movl r10,20(r4)
6396
6397 clrl r10
6398
6399 movl 8(ap),r0
6400 movl 12(r0),r3
6401 bicl3 #-65536,r3,r5
6402 extzv #16,#16,r3,r0
6403 bicl3 #-65536,r0,r3
6404 mull3 r5,r3,-44(fp)
6405 mull2 r5,r5
6406 mull2 r3,r3
6407 bicl3 #32767,-44(fp),r0
6408 extzv #15,#17,r0,r0
6409 addl2 r0,r3
6410 bicl3 #-65536,-44(fp),r0
6411 ashl #17,r0,-44(fp)
6412 addl2 -44(fp),r5
6413 bicl2 #0,r5
6414 cmpl r5,-44(fp)
6415 bgequ noname.608
6416 incl r3
6417noname.608:
6418 movl r5,r1
6419 movl r3,r2
6420 addl2 r1,r9
6421 bicl2 #0,r9
6422 cmpl r9,r1
6423 bgequ noname.609
6424 incl r2
6425noname.609:
6426 addl2 r2,r8
6427 bicl2 #0,r8
6428 cmpl r8,r2
6429 bgequ noname.610
6430 incl r10
6431noname.610:
6432
6433 movl r9,24(r4)
6434
6435 movl r8,28(r4)
6436
6437 ret
6438
6439; For now, the code below doesn't work, so I end this prematurely.
6440.end
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl
deleted file mode 100755
index e8f6b05084..0000000000
--- a/src/lib/libcrypto/bn/asm/x86-mont.pl
+++ /dev/null
@@ -1,593 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005
11#
12# This is a "teaser" code, as it can be improved in several ways...
13# First of all non-SSE2 path should be implemented (yes, for now it
14# performs Montgomery multiplication/convolution only on SSE2-capable
15# CPUs such as P4, others fall down to original code). Then inner loop
16# can be unrolled and modulo-scheduled to improve ILP and possibly
17# moved to 128-bit XMM register bank (though it would require input
18# rearrangement and/or increase bus bandwidth utilization). Dedicated
19# squaring procedure should give further performance improvement...
20# Yet, for being draft, the code improves rsa512 *sign* benchmark by
21# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
22
23# December 2006
24#
25# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
26# Integer-only code [being equipped with dedicated squaring procedure]
27# gives ~40% on rsa512 sign benchmark...
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30push(@INC,"${dir}","${dir}../../perlasm");
31require "x86asm.pl";
32
33&asm_init($ARGV[0],$0);
34
35$sse2=0;
36for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
37
38&external_label("OPENSSL_ia32cap_P") if ($sse2);
39
40&function_begin("bn_mul_mont");
41
42$i="edx";
43$j="ecx";
44$ap="esi"; $tp="esi"; # overlapping variables!!!
45$rp="edi"; $bp="edi"; # overlapping variables!!!
46$np="ebp";
47$num="ebx";
48
49$_num=&DWP(4*0,"esp"); # stack top layout
50$_rp=&DWP(4*1,"esp");
51$_ap=&DWP(4*2,"esp");
52$_bp=&DWP(4*3,"esp");
53$_np=&DWP(4*4,"esp");
54$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
55$_sp=&DWP(4*6,"esp");
56$_bpend=&DWP(4*7,"esp");
57$frame=32; # size of above frame rounded up to 16n
58
59 &xor ("eax","eax");
60 &mov ("edi",&wparam(5)); # int num
61 &cmp ("edi",4);
62 &jl (&label("just_leave"));
63
64 &lea ("esi",&wparam(0)); # put aside pointer to argument block
65 &lea ("edx",&wparam(1)); # load ap
66 &mov ("ebp","esp"); # saved stack pointer!
67 &add ("edi",2); # extra two words on top of tp
68 &neg ("edi");
69 &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70 &neg ("edi");
71
72 # minimize cache contention by arraning 2K window between stack
73 # pointer and ap argument [np is also position sensitive vector,
74 # but it's assumed to be near ap, as it's allocated at ~same
75 # time].
76 &mov ("eax","esp");
77 &sub ("eax","edx");
78 &and ("eax",2047);
79 &sub ("esp","eax"); # this aligns sp and ap modulo 2048
80
81 &xor ("edx","esp");
82 &and ("edx",2048);
83 &xor ("edx",2048);
84 &sub ("esp","edx"); # this splits them apart modulo 4096
85
86 &and ("esp",-64); # align to cache line
87
88 ################################# load argument block...
89 &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
90 &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
91 &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
92 &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
93 &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
94 #&mov ("edi",&DWP(5*4,"esi"));# int num
95
96 &mov ("esi",&DWP(0,"esi")); # pull n0[0]
97 &mov ($_rp,"eax"); # ... save a copy of argument block
98 &mov ($_ap,"ebx");
99 &mov ($_bp,"ecx");
100 &mov ($_np,"edx");
101 &mov ($_n0,"esi");
102 &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
103 #&mov ($_num,$num); # redundant as $num is not reused
104 &mov ($_sp,"ebp"); # saved stack pointer!
105
106if($sse2) {
107$acc0="mm0"; # mmx register bank layout
108$acc1="mm1";
109$car0="mm2";
110$car1="mm3";
111$mul0="mm4";
112$mul1="mm5";
113$temp="mm6";
114$mask="mm7";
115
116 &picmeup("eax","OPENSSL_ia32cap_P");
117 &bt (&DWP(0,"eax"),26);
118 &jnc (&label("non_sse2"));
119
120 &mov ("eax",-1);
121 &movd ($mask,"eax"); # mask 32 lower bits
122
123 &mov ($ap,$_ap); # load input pointers
124 &mov ($bp,$_bp);
125 &mov ($np,$_np);
126
127 &xor ($i,$i); # i=0
128 &xor ($j,$j); # j=0
129
130 &movd ($mul0,&DWP(0,$bp)); # bp[0]
131 &movd ($mul1,&DWP(0,$ap)); # ap[0]
132 &movd ($car1,&DWP(0,$np)); # np[0]
133
134 &pmuludq($mul1,$mul0); # ap[0]*bp[0]
135 &movq ($car0,$mul1);
136 &movq ($acc0,$mul1); # I wish movd worked for
137 &pand ($acc0,$mask); # inter-register transfers
138
139 &pmuludq($mul1,$_n0q); # *=n0
140
141 &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
142 &paddq ($car1,$acc0);
143
144 &movd ($acc1,&DWP(4,$np)); # np[1]
145 &movd ($acc0,&DWP(4,$ap)); # ap[1]
146
147 &psrlq ($car0,32);
148 &psrlq ($car1,32);
149
150 &inc ($j); # j++
151&set_label("1st",16);
152 &pmuludq($acc0,$mul0); # ap[j]*bp[0]
153 &pmuludq($acc1,$mul1); # np[j]*m1
154 &paddq ($car0,$acc0); # +=c0
155 &paddq ($car1,$acc1); # +=c1
156
157 &movq ($acc0,$car0);
158 &pand ($acc0,$mask);
159 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
160 &paddq ($car1,$acc0); # +=ap[j]*bp[0];
161 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
162 &psrlq ($car0,32);
163 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
164 &psrlq ($car1,32);
165
166 &lea ($j,&DWP(1,$j));
167 &cmp ($j,$num);
168 &jl (&label("1st"));
169
170 &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
171 &pmuludq($acc1,$mul1); # np[num-1]*m1
172 &paddq ($car0,$acc0); # +=c0
173 &paddq ($car1,$acc1); # +=c1
174
175 &movq ($acc0,$car0);
176 &pand ($acc0,$mask);
177 &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
178 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
179
180 &psrlq ($car0,32);
181 &psrlq ($car1,32);
182
183 &paddq ($car1,$car0);
184 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
185
186 &inc ($i); # i++
187&set_label("outer");
188 &xor ($j,$j); # j=0
189
190 &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
191 &movd ($mul1,&DWP(0,$ap)); # ap[0]
192 &movd ($temp,&DWP($frame,"esp")); # tp[0]
193 &movd ($car1,&DWP(0,$np)); # np[0]
194 &pmuludq($mul1,$mul0); # ap[0]*bp[i]
195
196 &paddq ($mul1,$temp); # +=tp[0]
197 &movq ($acc0,$mul1);
198 &movq ($car0,$mul1);
199 &pand ($acc0,$mask);
200
201 &pmuludq($mul1,$_n0q); # *=n0
202
203 &pmuludq($car1,$mul1);
204 &paddq ($car1,$acc0);
205
206 &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
207 &movd ($acc1,&DWP(4,$np)); # np[1]
208 &movd ($acc0,&DWP(4,$ap)); # ap[1]
209
210 &psrlq ($car0,32);
211 &psrlq ($car1,32);
212 &paddq ($car0,$temp); # +=tp[1]
213
214 &inc ($j); # j++
215 &dec ($num);
216&set_label("inner");
217 &pmuludq($acc0,$mul0); # ap[j]*bp[i]
218 &pmuludq($acc1,$mul1); # np[j]*m1
219 &paddq ($car0,$acc0); # +=c0
220 &paddq ($car1,$acc1); # +=c1
221
222 &movq ($acc0,$car0);
223 &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
224 &pand ($acc0,$mask);
225 &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
226 &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
227 &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
228 &psrlq ($car0,32);
229 &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
230 &psrlq ($car1,32);
231 &paddq ($car0,$temp); # +=tp[j+1]
232
233 &dec ($num);
234 &lea ($j,&DWP(1,$j)); # j++
235 &jnz (&label("inner"));
236
237 &mov ($num,$j);
238 &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
239 &pmuludq($acc1,$mul1); # np[num-1]*m1
240 &paddq ($car0,$acc0); # +=c0
241 &paddq ($car1,$acc1); # +=c1
242
243 &movq ($acc0,$car0);
244 &pand ($acc0,$mask);
245 &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
246 &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
247 &psrlq ($car0,32);
248 &psrlq ($car1,32);
249
250 &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
251 &paddq ($car1,$car0);
252 &paddq ($car1,$temp);
253 &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
254
255 &lea ($i,&DWP(1,$i)); # i++
256 &cmp ($i,$num);
257 &jle (&label("outer"));
258
259 &emms (); # done with mmx bank
260 &jmp (&label("common_tail"));
261
262&set_label("non_sse2",16);
263}
264
265if (0) {
266 &mov ("esp",$_sp);
267 &xor ("eax","eax"); # signal "not fast enough [yet]"
268 &jmp (&label("just_leave"));
269 # While the below code provides competitive performance for
270 # all key lengthes on modern Intel cores, it's still more
271 # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
272 # means compared to the original integer-only assembler.
273 # 512-bit RSA sign is better by ~40%, but that's about all
274 # one can say about all CPUs...
275} else {
276$inp="esi"; # integer path uses these registers differently
277$word="edi";
278$carry="ebp";
279
280 &mov ($inp,$_ap);
281 &lea ($carry,&DWP(1,$num));
282 &mov ($word,$_bp);
283 &xor ($j,$j); # j=0
284 &mov ("edx",$inp);
285 &and ($carry,1); # see if num is even
286 &sub ("edx",$word); # see if ap==bp
287 &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
288 &or ($carry,"edx");
289 &mov ($word,&DWP(0,$word)); # bp[0]
290 &jz (&label("bn_sqr_mont"));
291 &mov ($_bpend,"eax");
292 &mov ("eax",&DWP(0,$inp));
293 &xor ("edx","edx");
294
295&set_label("mull",16);
296 &mov ($carry,"edx");
297 &mul ($word); # ap[j]*bp[0]
298 &add ($carry,"eax");
299 &lea ($j,&DWP(1,$j));
300 &adc ("edx",0);
301 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
302 &cmp ($j,$num);
303 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
304 &jl (&label("mull"));
305
306 &mov ($carry,"edx");
307 &mul ($word); # ap[num-1]*bp[0]
308 &mov ($word,$_n0);
309 &add ("eax",$carry);
310 &mov ($inp,$_np);
311 &adc ("edx",0);
312 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
313
314 &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
315 &xor ($j,$j);
316 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
317 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
318
319 &mov ("eax",&DWP(0,$inp)); # np[0]
320 &mul ($word); # np[0]*m
321 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
322 &mov ("eax",&DWP(4,$inp)); # np[1]
323 &adc ("edx",0);
324 &inc ($j);
325
326 &jmp (&label("2ndmadd"));
327
328&set_label("1stmadd",16);
329 &mov ($carry,"edx");
330 &mul ($word); # ap[j]*bp[i]
331 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
332 &lea ($j,&DWP(1,$j));
333 &adc ("edx",0);
334 &add ($carry,"eax");
335 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
336 &adc ("edx",0);
337 &cmp ($j,$num);
338 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
339 &jl (&label("1stmadd"));
340
341 &mov ($carry,"edx");
342 &mul ($word); # ap[num-1]*bp[i]
343 &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
344 &mov ($word,$_n0);
345 &adc ("edx",0);
346 &mov ($inp,$_np);
347 &add ($carry,"eax");
348 &adc ("edx",0);
349 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
350
351 &xor ($j,$j);
352 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
353 &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
354 &adc ($j,0);
355 &mov ("eax",&DWP(0,$inp)); # np[0]
356 &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
357 &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
358
359 &mul ($word); # np[0]*m
360 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
361 &mov ("eax",&DWP(4,$inp)); # np[1]
362 &adc ("edx",0);
363 &mov ($j,1);
364
365&set_label("2ndmadd",16);
366 &mov ($carry,"edx");
367 &mul ($word); # np[j]*m
368 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
369 &lea ($j,&DWP(1,$j));
370 &adc ("edx",0);
371 &add ($carry,"eax");
372 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
373 &adc ("edx",0);
374 &cmp ($j,$num);
375 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
376 &jl (&label("2ndmadd"));
377
378 &mov ($carry,"edx");
379 &mul ($word); # np[j]*m
380 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
381 &adc ("edx",0);
382 &add ($carry,"eax");
383 &adc ("edx",0);
384 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
385
386 &xor ("eax","eax");
387 &mov ($j,$_bp); # &bp[i]
388 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
389 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
390 &lea ($j,&DWP(4,$j));
391 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
392 &cmp ($j,$_bpend);
393 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
394 &je (&label("common_tail"));
395
396 &mov ($word,&DWP(0,$j)); # bp[i+1]
397 &mov ($inp,$_ap);
398 &mov ($_bp,$j); # &bp[++i]
399 &xor ($j,$j);
400 &xor ("edx","edx");
401 &mov ("eax",&DWP(0,$inp));
402 &jmp (&label("1stmadd"));
403
404&set_label("bn_sqr_mont",16);
405$sbit=$num;
406 &mov ($_num,$num);
407 &mov ($_bp,$j); # i=0
408
409 &mov ("eax",$word); # ap[0]
410 &mul ($word); # ap[0]*ap[0]
411 &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
412 &mov ($sbit,"edx");
413 &shr ("edx",1);
414 &and ($sbit,1);
415 &inc ($j);
416&set_label("sqr",16);
417 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
418 &mov ($carry,"edx");
419 &mul ($word); # ap[j]*ap[0]
420 &add ("eax",$carry);
421 &lea ($j,&DWP(1,$j));
422 &adc ("edx",0);
423 &lea ($carry,&DWP(0,$sbit,"eax",2));
424 &shr ("eax",31);
425 &cmp ($j,$_num);
426 &mov ($sbit,"eax");
427 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
428 &jl (&label("sqr"));
429
430 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
431 &mov ($carry,"edx");
432 &mul ($word); # ap[num-1]*ap[0]
433 &add ("eax",$carry);
434 &mov ($word,$_n0);
435 &adc ("edx",0);
436 &mov ($inp,$_np);
437 &lea ($carry,&DWP(0,$sbit,"eax",2));
438 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
439 &shr ("eax",31);
440 &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
441
442 &lea ($carry,&DWP(0,"eax","edx",2));
443 &mov ("eax",&DWP(0,$inp)); # np[0]
444 &shr ("edx",31);
445 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
446 &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
447
448 &mul ($word); # np[0]*m
449 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
450 &mov ($num,$j);
451 &adc ("edx",0);
452 &mov ("eax",&DWP(4,$inp)); # np[1]
453 &mov ($j,1);
454
455&set_label("3rdmadd",16);
456 &mov ($carry,"edx");
457 &mul ($word); # np[j]*m
458 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
459 &adc ("edx",0);
460 &add ($carry,"eax");
461 &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
462 &adc ("edx",0);
463 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
464
465 &mov ($carry,"edx");
466 &mul ($word); # np[j+1]*m
467 &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
468 &lea ($j,&DWP(2,$j));
469 &adc ("edx",0);
470 &add ($carry,"eax");
471 &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
472 &adc ("edx",0);
473 &cmp ($j,$num);
474 &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
475 &jl (&label("3rdmadd"));
476
477 &mov ($carry,"edx");
478 &mul ($word); # np[j]*m
479 &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
480 &adc ("edx",0);
481 &add ($carry,"eax");
482 &adc ("edx",0);
483 &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
484
485 &mov ($j,$_bp); # i
486 &xor ("eax","eax");
487 &mov ($inp,$_ap);
488 &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
489 &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
490 &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
491 &cmp ($j,$num);
492 &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
493 &je (&label("common_tail"));
494
495 &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
496 &lea ($j,&DWP(1,$j));
497 &mov ("eax",$word);
498 &mov ($_bp,$j); # ++i
499 &mul ($word); # ap[i]*ap[i]
500 &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
501 &adc ("edx",0);
502 &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
503 &xor ($carry,$carry);
504 &cmp ($j,$num);
505 &lea ($j,&DWP(1,$j));
506 &je (&label("sqrlast"));
507
508 &mov ($sbit,"edx"); # zaps $num
509 &shr ("edx",1);
510 &and ($sbit,1);
511&set_label("sqradd",16);
512 &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
513 &mov ($carry,"edx");
514 &mul ($word); # ap[j]*ap[i]
515 &add ("eax",$carry);
516 &lea ($carry,&DWP(0,"eax","eax"));
517 &adc ("edx",0);
518 &shr ("eax",31);
519 &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
520 &lea ($j,&DWP(1,$j));
521 &adc ("eax",0);
522 &add ($carry,$sbit);
523 &adc ("eax",0);
524 &cmp ($j,$_num);
525 &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
526 &mov ($sbit,"eax");
527 &jle (&label("sqradd"));
528
529 &mov ($carry,"edx");
530 &add ("edx","edx");
531 &shr ($carry,31);
532 &add ("edx",$sbit);
533 &adc ($carry,0);
534&set_label("sqrlast");
535 &mov ($word,$_n0);
536 &mov ($inp,$_np);
537 &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
538
539 &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
540 &mov ("eax",&DWP(0,$inp)); # np[0]
541 &adc ($carry,0);
542 &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
543 &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
544
545 &mul ($word); # np[0]*m
546 &add ("eax",&DWP($frame,"esp")); # +=tp[0]
547 &lea ($num,&DWP(-1,$j));
548 &adc ("edx",0);
549 &mov ($j,1);
550 &mov ("eax",&DWP(4,$inp)); # np[1]
551
552 &jmp (&label("3rdmadd"));
553}
554
555&set_label("common_tail",16);
556 &mov ($np,$_np); # load modulus pointer
557 &mov ($rp,$_rp); # load result pointer
558 &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
559
560 &mov ("eax",&DWP(0,$tp)); # tp[0]
561 &mov ($j,$num); # j=num-1
562 &xor ($i,$i); # i=0 and clear CF!
563
564&set_label("sub",16);
565 &sbb ("eax",&DWP(0,$np,$i,4));
566 &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
567 &dec ($j); # doesn't affect CF!
568 &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
569 &lea ($i,&DWP(1,$i)); # i++
570 &jge (&label("sub"));
571
572 &sbb ("eax",0); # handle upmost overflow bit
573 &and ($tp,"eax");
574 &not ("eax");
575 &mov ($np,$rp);
576 &and ($np,"eax");
577 &or ($tp,$np); # tp=carry?tp:rp
578
579&set_label("copy",16); # copy or in-place refresh
580 &mov ("eax",&DWP(0,$tp,$num,4));
581 &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
582 &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
583 &dec ($num);
584 &jge (&label("copy"));
585
586 &mov ("esp",$_sp); # pull saved stack pointer
587 &mov ("eax",1);
588&set_label("just_leave");
589&function_end("bn_mul_mont");
590
591&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
592
593&asm_finish();
diff --git a/src/lib/libcrypto/bn/asm/x86.pl b/src/lib/libcrypto/bn/asm/x86.pl
deleted file mode 100644
index 1bc4f1bb27..0000000000
--- a/src/lib/libcrypto/bn/asm/x86.pl
+++ /dev/null
@@ -1,28 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6require("x86/mul_add.pl");
7require("x86/mul.pl");
8require("x86/sqr.pl");
9require("x86/div.pl");
10require("x86/add.pl");
11require("x86/sub.pl");
12require("x86/comba.pl");
13
14&asm_init($ARGV[0],$0);
15
16&bn_mul_add_words("bn_mul_add_words");
17&bn_mul_words("bn_mul_words");
18&bn_sqr_words("bn_sqr_words");
19&bn_div_words("bn_div_words");
20&bn_add_words("bn_add_words");
21&bn_sub_words("bn_sub_words");
22&bn_mul_comba("bn_mul_comba8",8);
23&bn_mul_comba("bn_mul_comba4",4);
24&bn_sqr_comba("bn_sqr_comba8",8);
25&bn_sqr_comba("bn_sqr_comba4",4);
26
27&asm_finish();
28
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl
deleted file mode 100644
index 0b5cf583e3..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/add.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &add($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &add($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &add($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &add($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
deleted file mode 100644
index 2291253629..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/comba.pl
+++ /dev/null
@@ -1,277 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub mul_add_c
5 {
6 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7
8 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
9 # words, and 1 if load return value
10
11 &comment("mul a[$ai]*b[$bi]");
12
13 # "eax" and "edx" will always be pre-loaded.
14 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
15 # &mov("edx",&DWP($bi*4,$b,"",0));
16
17 &mul("edx");
18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ###
22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
25 ###
26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
30 }
31
32sub sqr_add_c
33 {
34 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
35
36 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
37 # words, and 1 if load return value
38
39 &comment("sqr a[$ai]*a[$bi]");
40
41 # "eax" and "edx" will always be pre-loaded.
42 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
43 # &mov("edx",&DWP($bi*4,$b,"",0));
44
45 if ($ai == $bi)
46 { &mul("eax");}
47 else
48 { &mul("edx");}
49 &add($c0,"eax");
50 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
51 ###
52 &adc($c1,"edx");
53 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
54 ###
55 &adc($c2,0);
56 # is pos > 1, it means it is the last loop
57 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
58 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
59 }
60
61sub sqr_add_c2
62 {
63 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
64
65 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
66 # words, and 1 if load return value
67
68 &comment("sqr a[$ai]*a[$bi]");
69
70 # "eax" and "edx" will always be pre-loaded.
71 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
72 # &mov("edx",&DWP($bi*4,$a,"",0));
73
74 if ($ai == $bi)
75 { &mul("eax");}
76 else
77 { &mul("edx");}
78 &add("eax","eax");
79 ###
80 &adc("edx","edx");
81 ###
82 &adc($c2,0);
83 &add($c0,"eax");
84 &adc($c1,"edx");
85 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
86 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
87 &adc($c2,0);
88 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
89 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
90 ###
91 }
92
93sub bn_mul_comba
94 {
95 local($name,$num)=@_;
96 local($a,$b,$c0,$c1,$c2);
97 local($i,$as,$ae,$bs,$be,$ai,$bi);
98 local($tot,$end);
99
100 &function_begin_B($name,"");
101
102 $c0="ebx";
103 $c1="ecx";
104 $c2="ebp";
105 $a="esi";
106 $b="edi";
107
108 $as=0;
109 $ae=0;
110 $bs=0;
111 $be=0;
112 $tot=$num+$num-1;
113
114 &push("esi");
115 &mov($a,&wparam(1));
116 &push("edi");
117 &mov($b,&wparam(2));
118 &push("ebp");
119 &push("ebx");
120
121 &xor($c0,$c0);
122 &mov("eax",&DWP(0,$a,"",0)); # load the first word
123 &xor($c1,$c1);
124 &mov("edx",&DWP(0,$b,"",0)); # load the first second
125
126 for ($i=0; $i<$tot; $i++)
127 {
128 $ai=$as;
129 $bi=$bs;
130 $end=$be+1;
131
132 &comment("################## Calculate word $i");
133
134 for ($j=$bs; $j<$end; $j++)
135 {
136 &xor($c2,$c2) if ($j == $bs);
137 if (($j+1) == $end)
138 {
139 $v=1;
140 $v=2 if (($i+1) == $tot);
141 }
142 else
143 { $v=0; }
144 if (($j+1) != $end)
145 {
146 $na=($ai-1);
147 $nb=($bi+1);
148 }
149 else
150 {
151 $na=$as+($i < ($num-1));
152 $nb=$bs+($i >= ($num-1));
153 }
154#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
155 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
156 if ($v)
157 {
158 &comment("saved r[$i]");
159 # &mov("eax",&wparam(0));
160 # &mov(&DWP($i*4,"eax","",0),$c0);
161 ($c0,$c1,$c2)=($c1,$c2,$c0);
162 }
163 $ai--;
164 $bi++;
165 }
166 $as++ if ($i < ($num-1));
167 $ae++ if ($i >= ($num-1));
168
169 $bs++ if ($i >= ($num-1));
170 $be++ if ($i < ($num-1));
171 }
172 &comment("save r[$i]");
173 # &mov("eax",&wparam(0));
174 &mov(&DWP($i*4,"eax","",0),$c0);
175
176 &pop("ebx");
177 &pop("ebp");
178 &pop("edi");
179 &pop("esi");
180 &ret();
181 &function_end_B($name);
182 }
183
184sub bn_sqr_comba
185 {
186 local($name,$num)=@_;
187 local($r,$a,$c0,$c1,$c2)=@_;
188 local($i,$as,$ae,$bs,$be,$ai,$bi);
189 local($b,$tot,$end,$half);
190
191 &function_begin_B($name,"");
192
193 $c0="ebx";
194 $c1="ecx";
195 $c2="ebp";
196 $a="esi";
197 $r="edi";
198
199 &push("esi");
200 &push("edi");
201 &push("ebp");
202 &push("ebx");
203 &mov($r,&wparam(0));
204 &mov($a,&wparam(1));
205 &xor($c0,$c0);
206 &xor($c1,$c1);
207 &mov("eax",&DWP(0,$a,"",0)); # load the first word
208
209 $as=0;
210 $ae=0;
211 $bs=0;
212 $be=0;
213 $tot=$num+$num-1;
214
215 for ($i=0; $i<$tot; $i++)
216 {
217 $ai=$as;
218 $bi=$bs;
219 $end=$be+1;
220
221 &comment("############### Calculate word $i");
222 for ($j=$bs; $j<$end; $j++)
223 {
224 &xor($c2,$c2) if ($j == $bs);
225 if (($ai-1) < ($bi+1))
226 {
227 $v=1;
228 $v=2 if ($i+1) == $tot;
229 }
230 else
231 { $v=0; }
232 if (!$v)
233 {
234 $na=$ai-1;
235 $nb=$bi+1;
236 }
237 else
238 {
239 $na=$as+($i < ($num-1));
240 $nb=$bs+($i >= ($num-1));
241 }
242 if ($ai == $bi)
243 {
244 &sqr_add_c($r,$a,$ai,$bi,
245 $c0,$c1,$c2,$v,$i,$na,$nb);
246 }
247 else
248 {
249 &sqr_add_c2($r,$a,$ai,$bi,
250 $c0,$c1,$c2,$v,$i,$na,$nb);
251 }
252 if ($v)
253 {
254 &comment("saved r[$i]");
255 #&mov(&DWP($i*4,$r,"",0),$c0);
256 ($c0,$c1,$c2)=($c1,$c2,$c0);
257 last;
258 }
259 $ai--;
260 $bi++;
261 }
262 $as++ if ($i < ($num-1));
263 $ae++ if ($i >= ($num-1));
264
265 $bs++ if ($i >= ($num-1));
266 $be++ if ($i < ($num-1));
267 }
268 &mov(&DWP($i*4,$r,"",0),$c0);
269 &pop("ebx");
270 &pop("ebp");
271 &pop("edi");
272 &pop("esi");
273 &ret();
274 &function_end_B($name);
275 }
276
2771;
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl
deleted file mode 100644
index 0e90152caa..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/div.pl
+++ /dev/null
@@ -1,15 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_div_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9 &mov("edx",&wparam(0)); #
10 &mov("eax",&wparam(1)); #
11 &mov("ebx",&wparam(2)); #
12 &div("ebx");
13 &function_end($name);
14 }
151;
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f
deleted file mode 100644
index 22e4112224..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/f
+++ /dev/null
@@ -1,3 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl
deleted file mode 100644
index 674cb9b055..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul.pl
+++ /dev/null
@@ -1,77 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ecx";
15 $r="edi";
16 $c="esi";
17 $num="ebp";
18
19 &xor($c,$c); # clear carry
20 &mov($r,&wparam(0)); #
21 &mov($a,&wparam(1)); #
22 &mov($num,&wparam(2)); #
23 &mov($w,&wparam(3)); #
24
25 &and($num,0xfffffff8); # num / 8
26 &jz(&label("mw_finish"));
27
28 &set_label("mw_loop",0);
29 for ($i=0; $i<32; $i+=4)
30 {
31 &comment("Round $i");
32
33 &mov("eax",&DWP($i,$a,"",0)); # *a
34 &mul($w); # *a * w
35 &add("eax",$c); # L(t)+=c
36 # XXX
37
38 &adc("edx",0); # H(t)+=carry
39 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
40
41 &mov($c,"edx"); # c= H(t);
42 }
43
44 &comment("");
45 &add($a,32);
46 &add($r,32);
47 &sub($num,8);
48 &jz(&label("mw_finish"));
49 &jmp(&label("mw_loop"));
50
51 &set_label("mw_finish",0);
52 &mov($num,&wparam(2)); # get num
53 &and($num,7);
54 &jnz(&label("mw_finish2"));
55 &jmp(&label("mw_end"));
56
57 &set_label("mw_finish2",1);
58 for ($i=0; $i<7; $i++)
59 {
60 &comment("Tail Round $i");
61 &mov("eax",&DWP($i*4,$a,"",0));# *a
62 &mul($w); # *a * w
63 &add("eax",$c); # L(t)+=c
64 # XXX
65 &adc("edx",0); # H(t)+=carry
66 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
67 &mov($c,"edx"); # c= H(t);
68 &dec($num) if ($i != 7-1);
69 &jz(&label("mw_end")) if ($i != 7-1);
70 }
71 &set_label("mw_end",0);
72 &mov("eax",$c);
73
74 &function_end($name);
75 }
76
771;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
deleted file mode 100644
index 61830d3a90..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul_add.pl
+++ /dev/null
@@ -1,87 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ebp";
15 $r="edi";
16 $c="esi";
17
18 &xor($c,$c); # clear carry
19 &mov($r,&wparam(0)); #
20
21 &mov("ecx",&wparam(2)); #
22 &mov($a,&wparam(1)); #
23
24 &and("ecx",0xfffffff8); # num / 8
25 &mov($w,&wparam(3)); #
26
27 &push("ecx"); # Up the stack for a tmp variable
28
29 &jz(&label("maw_finish"));
30
31 &set_label("maw_loop",0);
32
33 &mov(&swtmp(0),"ecx"); #
34
35 for ($i=0; $i<32; $i+=4)
36 {
37 &comment("Round $i");
38
39 &mov("eax",&DWP($i,$a,"",0)); # *a
40 &mul($w); # *a * w
41 &add("eax",$c); # L(t)+= *r
42 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
43 &adc("edx",0); # H(t)+=carry
44 &add("eax",$c); # L(t)+=c
45 &adc("edx",0); # H(t)+=carry
46 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
47 &mov($c,"edx"); # c= H(t);
48 }
49
50 &comment("");
51 &mov("ecx",&swtmp(0)); #
52 &add($a,32);
53 &add($r,32);
54 &sub("ecx",8);
55 &jnz(&label("maw_loop"));
56
57 &set_label("maw_finish",0);
58 &mov("ecx",&wparam(2)); # get num
59 &and("ecx",7);
60 &jnz(&label("maw_finish2")); # helps branch prediction
61 &jmp(&label("maw_end"));
62
63 &set_label("maw_finish2",1);
64 for ($i=0; $i<7; $i++)
65 {
66 &comment("Tail Round $i");
67 &mov("eax",&DWP($i*4,$a,"",0));# *a
68 &mul($w); # *a * w
69 &add("eax",$c); # L(t)+=c
70 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
71 &adc("edx",0); # H(t)+=carry
72 &add("eax",$c);
73 &adc("edx",0); # H(t)+=carry
74 &dec("ecx") if ($i != 7-1);
75 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
76 &mov($c,"edx"); # c= H(t);
77 &jz(&label("maw_end")) if ($i != 7-1);
78 }
79 &set_label("maw_end",0);
80 &mov("eax",$c);
81
82 &pop("ecx"); # clear variable from
83
84 &function_end($name);
85 }
86
871;
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl
deleted file mode 100644
index 1f90993cf6..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sqr.pl
+++ /dev/null
@@ -1,60 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $r="esi";
12 $a="edi";
13 $num="ebx";
14
15 &mov($r,&wparam(0)); #
16 &mov($a,&wparam(1)); #
17 &mov($num,&wparam(2)); #
18
19 &and($num,0xfffffff8); # num / 8
20 &jz(&label("sw_finish"));
21
22 &set_label("sw_loop",0);
23 for ($i=0; $i<32; $i+=4)
24 {
25 &comment("Round $i");
26 &mov("eax",&DWP($i,$a,"",0)); # *a
27 # XXX
28 &mul("eax"); # *a * *a
29 &mov(&DWP($i*2,$r,"",0),"eax"); #
30 &mov(&DWP($i*2+4,$r,"",0),"edx");#
31 }
32
33 &comment("");
34 &add($a,32);
35 &add($r,64);
36 &sub($num,8);
37 &jnz(&label("sw_loop"));
38
39 &set_label("sw_finish",0);
40 &mov($num,&wparam(2)); # get num
41 &and($num,7);
42 &jz(&label("sw_end"));
43
44 for ($i=0; $i<7; $i++)
45 {
46 &comment("Tail Round $i");
47 &mov("eax",&DWP($i*4,$a,"",0)); # *a
48 # XXX
49 &mul("eax"); # *a * *a
50 &mov(&DWP($i*8,$r,"",0),"eax"); #
51 &dec($num) if ($i != 7-1);
52 &mov(&DWP($i*8+4,$r,"",0),"edx");
53 &jz(&label("sw_end")) if ($i != 7-1);
54 }
55 &set_label("sw_end",0);
56
57 &function_end($name);
58 }
59
601;
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl
deleted file mode 100644
index 837b0e1b07..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sub.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &sub($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &sub($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &sub($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &sub($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
deleted file mode 100644
index acb0b40118..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ /dev/null
@@ -1,606 +0,0 @@
1#include "../bn_lcl.h"
2#if !(defined(__GNUC__) && __GNUC__>=2)
3# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
4#else
5/*
6 * x86_64 BIGNUM accelerator version 0.1, December 2002.
7 *
8 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9 * project.
10 *
11 * Rights for redistribution and usage in source and binary forms are
12 * granted according to the OpenSSL license. Warranty of any kind is
13 * disclaimed.
14 *
15 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
16 * versions, like 1.0...
17 * A. Well, that's because this code is basically a quick-n-dirty
18 * proof-of-concept hack. As you can see it's implemented with
19 * inline assembler, which means that you're bound to GCC and that
20 * there might be enough room for further improvement.
21 *
22 * Q. Why inline assembler?
23 * A. x86_64 features own ABI which I'm not familiar with. This is
24 * why I decided to let the compiler take care of subroutine
25 * prologue/epilogue as well as register allocation. For reference.
26 * Win64 implements different ABI for AMD64, different from Linux.
27 *
28 * Q. How much faster does it get?
29 * A. 'apps/openssl speed rsa dsa' output with no-asm:
30 *
31 * sign verify sign/s verify/s
32 * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
33 * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
34 * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
35 * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
36 * sign verify sign/s verify/s
37 * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
38 * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
39 * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
40 *
41 * 'apps/openssl speed rsa dsa' output with this module:
42 *
43 * sign verify sign/s verify/s
44 * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
45 * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
46 * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
47 * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
48 * sign verify sign/s verify/s
49 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
50 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
51 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
52 *
53 * For the reference. IA-32 assembler implementation performs
54 * very much like 64-bit code compiled with no-asm on the same
55 * machine.
56 */
57
58#ifdef _WIN64
59#define BN_ULONG unsigned long long
60#else
61#define BN_ULONG unsigned long
62#endif
63
64#undef mul
65#undef mul_add
66#undef sqr
67
68/*
69 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
70 * "g"(0) let the compiler to decide where does it
71 * want to keep the value of zero;
72 */
73#define mul_add(r,a,word,carry) do { \
74 register BN_ULONG high,low; \
75 asm ("mulq %3" \
76 : "=a"(low),"=d"(high) \
77 : "a"(word),"m"(a) \
78 : "cc"); \
79 asm ("addq %2,%0; adcq %3,%1" \
80 : "+r"(carry),"+d"(high)\
81 : "a"(low),"g"(0) \
82 : "cc"); \
83 asm ("addq %2,%0; adcq %3,%1" \
84 : "+m"(r),"+d"(high) \
85 : "r"(carry),"g"(0) \
86 : "cc"); \
87 carry=high; \
88 } while (0)
89
90#define mul(r,a,word,carry) do { \
91 register BN_ULONG high,low; \
92 asm ("mulq %3" \
93 : "=a"(low),"=d"(high) \
94 : "a"(word),"g"(a) \
95 : "cc"); \
96 asm ("addq %2,%0; adcq %3,%1" \
97 : "+r"(carry),"+d"(high)\
98 : "a"(low),"g"(0) \
99 : "cc"); \
100 (r)=carry, carry=high; \
101 } while (0)
102
103#define sqr(r0,r1,a) \
104 asm ("mulq %2" \
105 : "=a"(r0),"=d"(r1) \
106 : "a"(a) \
107 : "cc");
108
109BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
110 {
111 BN_ULONG c1=0;
112
113 if (num <= 0) return(c1);
114
115 while (num&~3)
116 {
117 mul_add(rp[0],ap[0],w,c1);
118 mul_add(rp[1],ap[1],w,c1);
119 mul_add(rp[2],ap[2],w,c1);
120 mul_add(rp[3],ap[3],w,c1);
121 ap+=4; rp+=4; num-=4;
122 }
123 if (num)
124 {
125 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
126 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
127 mul_add(rp[2],ap[2],w,c1); return c1;
128 }
129
130 return(c1);
131 }
132
133BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
134 {
135 BN_ULONG c1=0;
136
137 if (num <= 0) return(c1);
138
139 while (num&~3)
140 {
141 mul(rp[0],ap[0],w,c1);
142 mul(rp[1],ap[1],w,c1);
143 mul(rp[2],ap[2],w,c1);
144 mul(rp[3],ap[3],w,c1);
145 ap+=4; rp+=4; num-=4;
146 }
147 if (num)
148 {
149 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
150 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
151 mul(rp[2],ap[2],w,c1);
152 }
153 return(c1);
154 }
155
156void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
157 {
158 if (n <= 0) return;
159
160 while (n&~3)
161 {
162 sqr(r[0],r[1],a[0]);
163 sqr(r[2],r[3],a[1]);
164 sqr(r[4],r[5],a[2]);
165 sqr(r[6],r[7],a[3]);
166 a+=4; r+=8; n-=4;
167 }
168 if (n)
169 {
170 sqr(r[0],r[1],a[0]); if (--n == 0) return;
171 sqr(r[2],r[3],a[1]); if (--n == 0) return;
172 sqr(r[4],r[5],a[2]);
173 }
174 }
175
176BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
177{ BN_ULONG ret,waste;
178
179 asm ("divq %4"
180 : "=a"(ret),"=d"(waste)
181 : "a"(l),"d"(h),"g"(d)
182 : "cc");
183
184 return ret;
185}
186
187BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
188{ BN_ULONG ret=0,i=0;
189
190 if (n <= 0) return 0;
191
192 asm (
193 " subq %2,%2 \n"
194 ".p2align 4 \n"
195 "1: movq (%4,%2,8),%0 \n"
196 " adcq (%5,%2,8),%0 \n"
197 " movq %0,(%3,%2,8) \n"
198 " leaq 1(%2),%2 \n"
199 " loop 1b \n"
200 " sbbq %0,%0 \n"
201 : "=&a"(ret),"+c"(n),"=&r"(i)
202 : "r"(rp),"r"(ap),"r"(bp)
203 : "cc"
204 );
205
206 return ret&1;
207}
208
209#ifndef SIMICS
210BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
211{ BN_ULONG ret=0,i=0;
212
213 if (n <= 0) return 0;
214
215 asm (
216 " subq %2,%2 \n"
217 ".p2align 4 \n"
218 "1: movq (%4,%2,8),%0 \n"
219 " sbbq (%5,%2,8),%0 \n"
220 " movq %0,(%3,%2,8) \n"
221 " leaq 1(%2),%2 \n"
222 " loop 1b \n"
223 " sbbq %0,%0 \n"
224 : "=&a"(ret),"+c"(n),"=&r"(i)
225 : "r"(rp),"r"(ap),"r"(bp)
226 : "cc"
227 );
228
229 return ret&1;
230}
231#else
232/* Simics 1.4<7 has buggy sbbq:-( */
233#define BN_MASK2 0xffffffffffffffffL
234BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
235 {
236 BN_ULONG t1,t2;
237 int c=0;
238
239 if (n <= 0) return((BN_ULONG)0);
240
241 for (;;)
242 {
243 t1=a[0]; t2=b[0];
244 r[0]=(t1-t2-c)&BN_MASK2;
245 if (t1 != t2) c=(t1 < t2);
246 if (--n <= 0) break;
247
248 t1=a[1]; t2=b[1];
249 r[1]=(t1-t2-c)&BN_MASK2;
250 if (t1 != t2) c=(t1 < t2);
251 if (--n <= 0) break;
252
253 t1=a[2]; t2=b[2];
254 r[2]=(t1-t2-c)&BN_MASK2;
255 if (t1 != t2) c=(t1 < t2);
256 if (--n <= 0) break;
257
258 t1=a[3]; t2=b[3];
259 r[3]=(t1-t2-c)&BN_MASK2;
260 if (t1 != t2) c=(t1 < t2);
261 if (--n <= 0) break;
262
263 a+=4;
264 b+=4;
265 r+=4;
266 }
267 return(c);
268 }
269#endif
270
271/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
272/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
273/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
274/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
275
276#if 0
277/* original macros are kept for reference purposes */
278#define mul_add_c(a,b,c0,c1,c2) { \
279 BN_ULONG ta=(a),tb=(b); \
280 t1 = ta * tb; \
281 t2 = BN_UMULT_HIGH(ta,tb); \
282 c0 += t1; t2 += (c0<t1)?1:0; \
283 c1 += t2; c2 += (c1<t2)?1:0; \
284 }
285
286#define mul_add_c2(a,b,c0,c1,c2) { \
287 BN_ULONG ta=(a),tb=(b),t0; \
288 t1 = BN_UMULT_HIGH(ta,tb); \
289 t0 = ta * tb; \
290 t2 = t1+t1; c2 += (t2<t1)?1:0; \
291 t1 = t0+t0; t2 += (t1<t0)?1:0; \
292 c0 += t1; t2 += (c0<t1)?1:0; \
293 c1 += t2; c2 += (c1<t2)?1:0; \
294 }
295#else
296#define mul_add_c(a,b,c0,c1,c2) do { \
297 asm ("mulq %3" \
298 : "=a"(t1),"=d"(t2) \
299 : "a"(a),"m"(b) \
300 : "cc"); \
301 asm ("addq %2,%0; adcq %3,%1" \
302 : "+r"(c0),"+d"(t2) \
303 : "a"(t1),"g"(0) \
304 : "cc"); \
305 asm ("addq %2,%0; adcq %3,%1" \
306 : "+r"(c1),"+r"(c2) \
307 : "d"(t2),"g"(0) \
308 : "cc"); \
309 } while (0)
310
311#define sqr_add_c(a,i,c0,c1,c2) do { \
312 asm ("mulq %2" \
313 : "=a"(t1),"=d"(t2) \
314 : "a"(a[i]) \
315 : "cc"); \
316 asm ("addq %2,%0; adcq %3,%1" \
317 : "+r"(c0),"+d"(t2) \
318 : "a"(t1),"g"(0) \
319 : "cc"); \
320 asm ("addq %2,%0; adcq %3,%1" \
321 : "+r"(c1),"+r"(c2) \
322 : "d"(t2),"g"(0) \
323 : "cc"); \
324 } while (0)
325
326#define mul_add_c2(a,b,c0,c1,c2) do { \
327 asm ("mulq %3" \
328 : "=a"(t1),"=d"(t2) \
329 : "a"(a),"m"(b) \
330 : "cc"); \
331 asm ("addq %0,%0; adcq %2,%1" \
332 : "+d"(t2),"+r"(c2) \
333 : "g"(0) \
334 : "cc"); \
335 asm ("addq %0,%0; adcq %2,%1" \
336 : "+a"(t1),"+d"(t2) \
337 : "g"(0) \
338 : "cc"); \
339 asm ("addq %2,%0; adcq %3,%1" \
340 : "+r"(c0),"+d"(t2) \
341 : "a"(t1),"g"(0) \
342 : "cc"); \
343 asm ("addq %2,%0; adcq %3,%1" \
344 : "+r"(c1),"+r"(c2) \
345 : "d"(t2),"g"(0) \
346 : "cc"); \
347 } while (0)
348#endif
349
350#define sqr_add_c2(a,i,j,c0,c1,c2) \
351 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
352
353void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
354 {
355 BN_ULONG t1,t2;
356 BN_ULONG c1,c2,c3;
357
358 c1=0;
359 c2=0;
360 c3=0;
361 mul_add_c(a[0],b[0],c1,c2,c3);
362 r[0]=c1;
363 c1=0;
364 mul_add_c(a[0],b[1],c2,c3,c1);
365 mul_add_c(a[1],b[0],c2,c3,c1);
366 r[1]=c2;
367 c2=0;
368 mul_add_c(a[2],b[0],c3,c1,c2);
369 mul_add_c(a[1],b[1],c3,c1,c2);
370 mul_add_c(a[0],b[2],c3,c1,c2);
371 r[2]=c3;
372 c3=0;
373 mul_add_c(a[0],b[3],c1,c2,c3);
374 mul_add_c(a[1],b[2],c1,c2,c3);
375 mul_add_c(a[2],b[1],c1,c2,c3);
376 mul_add_c(a[3],b[0],c1,c2,c3);
377 r[3]=c1;
378 c1=0;
379 mul_add_c(a[4],b[0],c2,c3,c1);
380 mul_add_c(a[3],b[1],c2,c3,c1);
381 mul_add_c(a[2],b[2],c2,c3,c1);
382 mul_add_c(a[1],b[3],c2,c3,c1);
383 mul_add_c(a[0],b[4],c2,c3,c1);
384 r[4]=c2;
385 c2=0;
386 mul_add_c(a[0],b[5],c3,c1,c2);
387 mul_add_c(a[1],b[4],c3,c1,c2);
388 mul_add_c(a[2],b[3],c3,c1,c2);
389 mul_add_c(a[3],b[2],c3,c1,c2);
390 mul_add_c(a[4],b[1],c3,c1,c2);
391 mul_add_c(a[5],b[0],c3,c1,c2);
392 r[5]=c3;
393 c3=0;
394 mul_add_c(a[6],b[0],c1,c2,c3);
395 mul_add_c(a[5],b[1],c1,c2,c3);
396 mul_add_c(a[4],b[2],c1,c2,c3);
397 mul_add_c(a[3],b[3],c1,c2,c3);
398 mul_add_c(a[2],b[4],c1,c2,c3);
399 mul_add_c(a[1],b[5],c1,c2,c3);
400 mul_add_c(a[0],b[6],c1,c2,c3);
401 r[6]=c1;
402 c1=0;
403 mul_add_c(a[0],b[7],c2,c3,c1);
404 mul_add_c(a[1],b[6],c2,c3,c1);
405 mul_add_c(a[2],b[5],c2,c3,c1);
406 mul_add_c(a[3],b[4],c2,c3,c1);
407 mul_add_c(a[4],b[3],c2,c3,c1);
408 mul_add_c(a[5],b[2],c2,c3,c1);
409 mul_add_c(a[6],b[1],c2,c3,c1);
410 mul_add_c(a[7],b[0],c2,c3,c1);
411 r[7]=c2;
412 c2=0;
413 mul_add_c(a[7],b[1],c3,c1,c2);
414 mul_add_c(a[6],b[2],c3,c1,c2);
415 mul_add_c(a[5],b[3],c3,c1,c2);
416 mul_add_c(a[4],b[4],c3,c1,c2);
417 mul_add_c(a[3],b[5],c3,c1,c2);
418 mul_add_c(a[2],b[6],c3,c1,c2);
419 mul_add_c(a[1],b[7],c3,c1,c2);
420 r[8]=c3;
421 c3=0;
422 mul_add_c(a[2],b[7],c1,c2,c3);
423 mul_add_c(a[3],b[6],c1,c2,c3);
424 mul_add_c(a[4],b[5],c1,c2,c3);
425 mul_add_c(a[5],b[4],c1,c2,c3);
426 mul_add_c(a[6],b[3],c1,c2,c3);
427 mul_add_c(a[7],b[2],c1,c2,c3);
428 r[9]=c1;
429 c1=0;
430 mul_add_c(a[7],b[3],c2,c3,c1);
431 mul_add_c(a[6],b[4],c2,c3,c1);
432 mul_add_c(a[5],b[5],c2,c3,c1);
433 mul_add_c(a[4],b[6],c2,c3,c1);
434 mul_add_c(a[3],b[7],c2,c3,c1);
435 r[10]=c2;
436 c2=0;
437 mul_add_c(a[4],b[7],c3,c1,c2);
438 mul_add_c(a[5],b[6],c3,c1,c2);
439 mul_add_c(a[6],b[5],c3,c1,c2);
440 mul_add_c(a[7],b[4],c3,c1,c2);
441 r[11]=c3;
442 c3=0;
443 mul_add_c(a[7],b[5],c1,c2,c3);
444 mul_add_c(a[6],b[6],c1,c2,c3);
445 mul_add_c(a[5],b[7],c1,c2,c3);
446 r[12]=c1;
447 c1=0;
448 mul_add_c(a[6],b[7],c2,c3,c1);
449 mul_add_c(a[7],b[6],c2,c3,c1);
450 r[13]=c2;
451 c2=0;
452 mul_add_c(a[7],b[7],c3,c1,c2);
453 r[14]=c3;
454 r[15]=c1;
455 }
456
457void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
458 {
459 BN_ULONG t1,t2;
460 BN_ULONG c1,c2,c3;
461
462 c1=0;
463 c2=0;
464 c3=0;
465 mul_add_c(a[0],b[0],c1,c2,c3);
466 r[0]=c1;
467 c1=0;
468 mul_add_c(a[0],b[1],c2,c3,c1);
469 mul_add_c(a[1],b[0],c2,c3,c1);
470 r[1]=c2;
471 c2=0;
472 mul_add_c(a[2],b[0],c3,c1,c2);
473 mul_add_c(a[1],b[1],c3,c1,c2);
474 mul_add_c(a[0],b[2],c3,c1,c2);
475 r[2]=c3;
476 c3=0;
477 mul_add_c(a[0],b[3],c1,c2,c3);
478 mul_add_c(a[1],b[2],c1,c2,c3);
479 mul_add_c(a[2],b[1],c1,c2,c3);
480 mul_add_c(a[3],b[0],c1,c2,c3);
481 r[3]=c1;
482 c1=0;
483 mul_add_c(a[3],b[1],c2,c3,c1);
484 mul_add_c(a[2],b[2],c2,c3,c1);
485 mul_add_c(a[1],b[3],c2,c3,c1);
486 r[4]=c2;
487 c2=0;
488 mul_add_c(a[2],b[3],c3,c1,c2);
489 mul_add_c(a[3],b[2],c3,c1,c2);
490 r[5]=c3;
491 c3=0;
492 mul_add_c(a[3],b[3],c1,c2,c3);
493 r[6]=c1;
494 r[7]=c2;
495 }
496
497void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
498 {
499 BN_ULONG t1,t2;
500 BN_ULONG c1,c2,c3;
501
502 c1=0;
503 c2=0;
504 c3=0;
505 sqr_add_c(a,0,c1,c2,c3);
506 r[0]=c1;
507 c1=0;
508 sqr_add_c2(a,1,0,c2,c3,c1);
509 r[1]=c2;
510 c2=0;
511 sqr_add_c(a,1,c3,c1,c2);
512 sqr_add_c2(a,2,0,c3,c1,c2);
513 r[2]=c3;
514 c3=0;
515 sqr_add_c2(a,3,0,c1,c2,c3);
516 sqr_add_c2(a,2,1,c1,c2,c3);
517 r[3]=c1;
518 c1=0;
519 sqr_add_c(a,2,c2,c3,c1);
520 sqr_add_c2(a,3,1,c2,c3,c1);
521 sqr_add_c2(a,4,0,c2,c3,c1);
522 r[4]=c2;
523 c2=0;
524 sqr_add_c2(a,5,0,c3,c1,c2);
525 sqr_add_c2(a,4,1,c3,c1,c2);
526 sqr_add_c2(a,3,2,c3,c1,c2);
527 r[5]=c3;
528 c3=0;
529 sqr_add_c(a,3,c1,c2,c3);
530 sqr_add_c2(a,4,2,c1,c2,c3);
531 sqr_add_c2(a,5,1,c1,c2,c3);
532 sqr_add_c2(a,6,0,c1,c2,c3);
533 r[6]=c1;
534 c1=0;
535 sqr_add_c2(a,7,0,c2,c3,c1);
536 sqr_add_c2(a,6,1,c2,c3,c1);
537 sqr_add_c2(a,5,2,c2,c3,c1);
538 sqr_add_c2(a,4,3,c2,c3,c1);
539 r[7]=c2;
540 c2=0;
541 sqr_add_c(a,4,c3,c1,c2);
542 sqr_add_c2(a,5,3,c3,c1,c2);
543 sqr_add_c2(a,6,2,c3,c1,c2);
544 sqr_add_c2(a,7,1,c3,c1,c2);
545 r[8]=c3;
546 c3=0;
547 sqr_add_c2(a,7,2,c1,c2,c3);
548 sqr_add_c2(a,6,3,c1,c2,c3);
549 sqr_add_c2(a,5,4,c1,c2,c3);
550 r[9]=c1;
551 c1=0;
552 sqr_add_c(a,5,c2,c3,c1);
553 sqr_add_c2(a,6,4,c2,c3,c1);
554 sqr_add_c2(a,7,3,c2,c3,c1);
555 r[10]=c2;
556 c2=0;
557 sqr_add_c2(a,7,4,c3,c1,c2);
558 sqr_add_c2(a,6,5,c3,c1,c2);
559 r[11]=c3;
560 c3=0;
561 sqr_add_c(a,6,c1,c2,c3);
562 sqr_add_c2(a,7,5,c1,c2,c3);
563 r[12]=c1;
564 c1=0;
565 sqr_add_c2(a,7,6,c2,c3,c1);
566 r[13]=c2;
567 c2=0;
568 sqr_add_c(a,7,c3,c1,c2);
569 r[14]=c3;
570 r[15]=c1;
571 }
572
573void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
574 {
575 BN_ULONG t1,t2;
576 BN_ULONG c1,c2,c3;
577
578 c1=0;
579 c2=0;
580 c3=0;
581 sqr_add_c(a,0,c1,c2,c3);
582 r[0]=c1;
583 c1=0;
584 sqr_add_c2(a,1,0,c2,c3,c1);
585 r[1]=c2;
586 c2=0;
587 sqr_add_c(a,1,c3,c1,c2);
588 sqr_add_c2(a,2,0,c3,c1,c2);
589 r[2]=c3;
590 c3=0;
591 sqr_add_c2(a,3,0,c1,c2,c3);
592 sqr_add_c2(a,2,1,c1,c2,c3);
593 r[3]=c1;
594 c1=0;
595 sqr_add_c(a,2,c2,c3,c1);
596 sqr_add_c2(a,3,1,c2,c3,c1);
597 r[4]=c2;
598 c2=0;
599 sqr_add_c2(a,3,2,c3,c1,c2);
600 r[5]=c3;
601 c3=0;
602 sqr_add_c(a,3,c1,c2,c3);
603 r[6]=c1;
604 r[7]=c2;
605 }
606#endif
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
deleted file mode 100755
index 3b7a6f243f..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl
+++ /dev/null
@@ -1,330 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# October 2005.
11#
12# Montgomery multiplication routine for x86_64. While it gives modest
13# 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more
14# than twice, >2x, as fast. Most common rsa1024 sign is improved by
15# respectful 50%. It remains to be seen if loop unrolling and
16# dedicated squaring routine can provide further improvement...
17
18$flavour = shift;
19$output = shift;
20if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
21
22$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
23
24$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
25( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
26( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
27die "can't locate x86_64-xlate.pl";
28
29open STDOUT,"| $^X $xlate $flavour $output";
30
31# int bn_mul_mont(
32$rp="%rdi"; # BN_ULONG *rp,
33$ap="%rsi"; # const BN_ULONG *ap,
34$bp="%rdx"; # const BN_ULONG *bp,
35$np="%rcx"; # const BN_ULONG *np,
36$n0="%r8"; # const BN_ULONG *n0,
37$num="%r9"; # int num);
38$lo0="%r10";
39$hi0="%r11";
40$bp="%r12"; # reassign $bp
41$hi1="%r13";
42$i="%r14";
43$j="%r15";
44$m0="%rbx";
45$m1="%rbp";
46
47$code=<<___;
48.text
49
50.globl bn_mul_mont
51.type bn_mul_mont,\@function,6
52.align 16
53bn_mul_mont:
54 push %rbx
55 push %rbp
56 push %r12
57 push %r13
58 push %r14
59 push %r15
60
61 mov ${num}d,${num}d
62 lea 2($num),%r10
63 mov %rsp,%r11
64 neg %r10
65 lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2))
66 and \$-1024,%rsp # minimize TLB usage
67
68 mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
69.Lprologue:
70 mov %rdx,$bp # $bp reassigned, remember?
71
72 mov ($n0),$n0 # pull n0[0] value
73
74 xor $i,$i # i=0
75 xor $j,$j # j=0
76
77 mov ($bp),$m0 # m0=bp[0]
78 mov ($ap),%rax
79 mulq $m0 # ap[0]*bp[0]
80 mov %rax,$lo0
81 mov %rdx,$hi0
82
83 imulq $n0,%rax # "tp[0]"*n0
84 mov %rax,$m1
85
86 mulq ($np) # np[0]*m1
87 add $lo0,%rax # discarded
88 adc \$0,%rdx
89 mov %rdx,$hi1
90
91 lea 1($j),$j # j++
92.L1st:
93 mov ($ap,$j,8),%rax
94 mulq $m0 # ap[j]*bp[0]
95 add $hi0,%rax
96 adc \$0,%rdx
97 mov %rax,$lo0
98 mov ($np,$j,8),%rax
99 mov %rdx,$hi0
100
101 mulq $m1 # np[j]*m1
102 add $hi1,%rax
103 lea 1($j),$j # j++
104 adc \$0,%rdx
105 add $lo0,%rax # np[j]*m1+ap[j]*bp[0]
106 adc \$0,%rdx
107 mov %rax,-16(%rsp,$j,8) # tp[j-1]
108 cmp $num,$j
109 mov %rdx,$hi1
110 jl .L1st
111
112 xor %rdx,%rdx
113 add $hi0,$hi1
114 adc \$0,%rdx
115 mov $hi1,-8(%rsp,$num,8)
116 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
117
118 lea 1($i),$i # i++
119.align 4
120.Louter:
121 xor $j,$j # j=0
122
123 mov ($bp,$i,8),$m0 # m0=bp[i]
124 mov ($ap),%rax # ap[0]
125 mulq $m0 # ap[0]*bp[i]
126 add (%rsp),%rax # ap[0]*bp[i]+tp[0]
127 adc \$0,%rdx
128 mov %rax,$lo0
129 mov %rdx,$hi0
130
131 imulq $n0,%rax # tp[0]*n0
132 mov %rax,$m1
133
134 mulq ($np,$j,8) # np[0]*m1
135 add $lo0,%rax # discarded
136 mov 8(%rsp),$lo0 # tp[1]
137 adc \$0,%rdx
138 mov %rdx,$hi1
139
140 lea 1($j),$j # j++
141.align 4
142.Linner:
143 mov ($ap,$j,8),%rax
144 mulq $m0 # ap[j]*bp[i]
145 add $hi0,%rax
146 adc \$0,%rdx
147 add %rax,$lo0 # ap[j]*bp[i]+tp[j]
148 mov ($np,$j,8),%rax
149 adc \$0,%rdx
150 mov %rdx,$hi0
151
152 mulq $m1 # np[j]*m1
153 add $hi1,%rax
154 lea 1($j),$j # j++
155 adc \$0,%rdx
156 add $lo0,%rax # np[j]*m1+ap[j]*bp[i]+tp[j]
157 adc \$0,%rdx
158 mov (%rsp,$j,8),$lo0
159 cmp $num,$j
160 mov %rax,-16(%rsp,$j,8) # tp[j-1]
161 mov %rdx,$hi1
162 jl .Linner
163
164 xor %rdx,%rdx
165 add $hi0,$hi1
166 adc \$0,%rdx
167 add $lo0,$hi1 # pull upmost overflow bit
168 adc \$0,%rdx
169 mov $hi1,-8(%rsp,$num,8)
170 mov %rdx,(%rsp,$num,8) # store upmost overflow bit
171
172 lea 1($i),$i # i++
173 cmp $num,$i
174 jl .Louter
175
176 lea (%rsp),$ap # borrow ap for tp
177 lea -1($num),$j # j=num-1
178
179 mov ($ap),%rax # tp[0]
180 xor $i,$i # i=0 and clear CF!
181 jmp .Lsub
182.align 16
183.Lsub: sbb ($np,$i,8),%rax
184 mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
185 dec $j # doesn't affect CF!
186 mov 8($ap,$i,8),%rax # tp[i+1]
187 lea 1($i),$i # i++
188 jge .Lsub
189
190 sbb \$0,%rax # handle upmost overflow bit
191 and %rax,$ap
192 not %rax
193 mov $rp,$np
194 and %rax,$np
195 lea -1($num),$j
196 or $np,$ap # ap=borrow?tp:rp
197.align 16
198.Lcopy: # copy or in-place refresh
199 mov ($ap,$j,8),%rax
200 mov %rax,($rp,$j,8) # rp[i]=tp[i]
201 mov $i,(%rsp,$j,8) # zap temporary vector
202 dec $j
203 jge .Lcopy
204
205 mov 8(%rsp,$num,8),%rsi # restore %rsp
206 mov \$1,%rax
207 mov (%rsi),%r15
208 mov 8(%rsi),%r14
209 mov 16(%rsi),%r13
210 mov 24(%rsi),%r12
211 mov 32(%rsi),%rbp
212 mov 40(%rsi),%rbx
213 lea 48(%rsi),%rsp
214.Lepilogue:
215 ret
216.size bn_mul_mont,.-bn_mul_mont
217.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
218.align 16
219___
220
221# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
222# CONTEXT *context,DISPATCHER_CONTEXT *disp)
223if ($win64) {
224$rec="%rcx";
225$frame="%rdx";
226$context="%r8";
227$disp="%r9";
228
229$code.=<<___;
230.extern __imp_RtlVirtualUnwind
231.type se_handler,\@abi-omnipotent
232.align 16
233se_handler:
234 push %rsi
235 push %rdi
236 push %rbx
237 push %rbp
238 push %r12
239 push %r13
240 push %r14
241 push %r15
242 pushfq
243 sub \$64,%rsp
244
245 mov 120($context),%rax # pull context->Rax
246 mov 248($context),%rbx # pull context->Rip
247
248 lea .Lprologue(%rip),%r10
249 cmp %r10,%rbx # context->Rip<.Lprologue
250 jb .Lin_prologue
251
252 mov 152($context),%rax # pull context->Rsp
253
254 lea .Lepilogue(%rip),%r10
255 cmp %r10,%rbx # context->Rip>=.Lepilogue
256 jae .Lin_prologue
257
258 mov 192($context),%r10 # pull $num
259 mov 8(%rax,%r10,8),%rax # pull saved stack pointer
260 lea 48(%rax),%rax
261
262 mov -8(%rax),%rbx
263 mov -16(%rax),%rbp
264 mov -24(%rax),%r12
265 mov -32(%rax),%r13
266 mov -40(%rax),%r14
267 mov -48(%rax),%r15
268 mov %rbx,144($context) # restore context->Rbx
269 mov %rbp,160($context) # restore context->Rbp
270 mov %r12,216($context) # restore context->R12
271 mov %r13,224($context) # restore context->R13
272 mov %r14,232($context) # restore context->R14
273 mov %r15,240($context) # restore context->R15
274
275.Lin_prologue:
276 mov 8(%rax),%rdi
277 mov 16(%rax),%rsi
278 mov %rax,152($context) # restore context->Rsp
279 mov %rsi,168($context) # restore context->Rsi
280 mov %rdi,176($context) # restore context->Rdi
281
282 mov 40($disp),%rdi # disp->ContextRecord
283 mov $context,%rsi # context
284 mov \$154,%ecx # sizeof(CONTEXT)
285 .long 0xa548f3fc # cld; rep movsq
286
287 mov $disp,%rsi
288 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
289 mov 8(%rsi),%rdx # arg2, disp->ImageBase
290 mov 0(%rsi),%r8 # arg3, disp->ControlPc
291 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
292 mov 40(%rsi),%r10 # disp->ContextRecord
293 lea 56(%rsi),%r11 # &disp->HandlerData
294 lea 24(%rsi),%r12 # &disp->EstablisherFrame
295 mov %r10,32(%rsp) # arg5
296 mov %r11,40(%rsp) # arg6
297 mov %r12,48(%rsp) # arg7
298 mov %rcx,56(%rsp) # arg8, (NULL)
299 call *__imp_RtlVirtualUnwind(%rip)
300
301 mov \$1,%eax # ExceptionContinueSearch
302 add \$64,%rsp
303 popfq
304 pop %r15
305 pop %r14
306 pop %r13
307 pop %r12
308 pop %rbp
309 pop %rbx
310 pop %rdi
311 pop %rsi
312 ret
313.size se_handler,.-se_handler
314
315.section .pdata
316.align 4
317 .rva .LSEH_begin_bn_mul_mont
318 .rva .LSEH_end_bn_mul_mont
319 .rva .LSEH_info_bn_mul_mont
320
321.section .xdata
322.align 8
323.LSEH_info_bn_mul_mont:
324 .byte 9,0,0,0
325 .rva se_handler
326___
327}
328
329print $code;
330close STDOUT;
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
deleted file mode 100644
index a0bc47837d..0000000000
--- a/src/lib/libcrypto/bn/bn.h
+++ /dev/null
@@ -1,876 +0,0 @@
1/* crypto/bn/bn.h */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111/* ====================================================================
112 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
113 *
114 * Portions of the attached software ("Contribution") are developed by
115 * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
116 *
117 * The Contribution is licensed pursuant to the Eric Young open source
118 * license provided above.
119 *
120 * The binary polynomial arithmetic software is originally written by
121 * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories.
122 *
123 */
124
125#ifndef HEADER_BN_H
126#define HEADER_BN_H
127
128#include <openssl/e_os2.h>
129#ifndef OPENSSL_NO_FP_API
130#include <stdio.h> /* FILE */
131#endif
132#include <openssl/ossl_typ.h>
133#include <openssl/crypto.h>
134
135#ifdef __cplusplus
136extern "C" {
137#endif
138
139/* These preprocessor symbols control various aspects of the bignum headers and
140 * library code. They're not defined by any "normal" configuration, as they are
141 * intended for development and testing purposes. NB: defining all three can be
142 * useful for debugging application code as well as openssl itself.
143 *
144 * BN_DEBUG - turn on various debugging alterations to the bignum code
145 * BN_DEBUG_RAND - uses random poisoning of unused words to trip up
146 * mismanagement of bignum internals. You must also define BN_DEBUG.
147 */
148/* #define BN_DEBUG */
149/* #define BN_DEBUG_RAND */
150
151#ifndef OPENSSL_SMALL_FOOTPRINT
152#define BN_MUL_COMBA
153#define BN_SQR_COMBA
154#define BN_RECURSION
155#endif
156
157/* This next option uses the C libraries (2 word)/(1 word) function.
158 * If it is not defined, I use my C version (which is slower).
159 * The reason for this flag is that when the particular C compiler
160 * library routine is used, and the library is linked with a different
161 * compiler, the library is missing. This mostly happens when the
162 * library is built with gcc and then linked using normal cc. This would
163 * be a common occurrence because gcc normally produces code that is
164 * 2 times faster than system compilers for the big number stuff.
165 * For machines with only one compiler (or shared libraries), this should
166 * be on. Again this in only really a problem on machines
167 * using "long long's", are 32bit, and are not using my assembler code. */
168#if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
169 defined(OPENSSL_SYS_WIN32) || defined(linux)
170# ifndef BN_DIV2W
171# define BN_DIV2W
172# endif
173#endif
174
175/* assuming long is 64bit - this is the DEC Alpha
176 * unsigned long long is only 64 bits :-(, don't define
177 * BN_LLONG for the DEC Alpha */
178#ifdef SIXTY_FOUR_BIT_LONG
179#define BN_ULLONG unsigned long long
180#define BN_ULONG unsigned long
181#define BN_LONG long
182#define BN_BITS 128
183#define BN_BYTES 8
184#define BN_BITS2 64
185#define BN_BITS4 32
186#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
187#define BN_MASK2 (0xffffffffffffffffL)
188#define BN_MASK2l (0xffffffffL)
189#define BN_MASK2h (0xffffffff00000000L)
190#define BN_MASK2h1 (0xffffffff80000000L)
191#define BN_TBIT (0x8000000000000000L)
192#define BN_DEC_CONV (10000000000000000000UL)
193#define BN_DEC_FMT1 "%lu"
194#define BN_DEC_FMT2 "%019lu"
195#define BN_DEC_NUM 19
196#define BN_HEX_FMT1 "%lX"
197#define BN_HEX_FMT2 "%016lX"
198#endif
199
200/* This is where the long long data type is 64 bits, but long is 32.
201 * For machines where there are 64bit registers, this is the mode to use.
202 * IRIX, on R4000 and above should use this mode, along with the relevant
203 * assembler code :-). Do NOT define BN_LLONG.
204 */
205#ifdef SIXTY_FOUR_BIT
206#undef BN_LLONG
207#undef BN_ULLONG
208#define BN_ULONG unsigned long long
209#define BN_LONG long long
210#define BN_BITS 128
211#define BN_BYTES 8
212#define BN_BITS2 64
213#define BN_BITS4 32
214#define BN_MASK2 (0xffffffffffffffffLL)
215#define BN_MASK2l (0xffffffffL)
216#define BN_MASK2h (0xffffffff00000000LL)
217#define BN_MASK2h1 (0xffffffff80000000LL)
218#define BN_TBIT (0x8000000000000000LL)
219#define BN_DEC_CONV (10000000000000000000ULL)
220#define BN_DEC_FMT1 "%llu"
221#define BN_DEC_FMT2 "%019llu"
222#define BN_DEC_NUM 19
223#define BN_HEX_FMT1 "%llX"
224#define BN_HEX_FMT2 "%016llX"
225#endif
226
227#ifdef THIRTY_TWO_BIT
228#ifdef BN_LLONG
229# if defined(_WIN32) && !defined(__GNUC__)
230# define BN_ULLONG unsigned __int64
231# define BN_MASK (0xffffffffffffffffI64)
232# else
233# define BN_ULLONG unsigned long long
234# define BN_MASK (0xffffffffffffffffLL)
235# endif
236#endif
237#define BN_ULONG unsigned int
238#define BN_LONG int
239#define BN_BITS 64
240#define BN_BYTES 4
241#define BN_BITS2 32
242#define BN_BITS4 16
243#define BN_MASK2 (0xffffffffL)
244#define BN_MASK2l (0xffff)
245#define BN_MASK2h1 (0xffff8000L)
246#define BN_MASK2h (0xffff0000L)
247#define BN_TBIT (0x80000000L)
248#define BN_DEC_CONV (1000000000L)
249#define BN_DEC_FMT1 "%u"
250#define BN_DEC_FMT2 "%09u"
251#define BN_DEC_NUM 9
252#define BN_HEX_FMT1 "%X"
253#define BN_HEX_FMT2 "%08X"
254#endif
255
256/* 2011-02-22 SMS.
257 * In various places, a size_t variable or a type cast to size_t was
258 * used to perform integer-only operations on pointers. This failed on
259 * VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t is
260 * still only 32 bits. What's needed in these cases is an integer type
261 * with the same size as a pointer, which size_t is not certain to be.
262 * The only fix here is VMS-specific.
263 */
264#if defined(OPENSSL_SYS_VMS)
265# if __INITIAL_POINTER_SIZE == 64
266# define PTR_SIZE_INT long long
267# else /* __INITIAL_POINTER_SIZE == 64 */
268# define PTR_SIZE_INT int
269# endif /* __INITIAL_POINTER_SIZE == 64 [else] */
270#else /* defined(OPENSSL_SYS_VMS) */
271# define PTR_SIZE_INT size_t
272#endif /* defined(OPENSSL_SYS_VMS) [else] */
273
274#define BN_DEFAULT_BITS 1280
275
276#define BN_FLG_MALLOCED 0x01
277#define BN_FLG_STATIC_DATA 0x02
278#define BN_FLG_CONSTTIME 0x04 /* avoid leaking exponent information through timing,
279 * BN_mod_exp_mont() will call BN_mod_exp_mont_consttime,
280 * BN_div() will call BN_div_no_branch,
281 * BN_mod_inverse() will call BN_mod_inverse_no_branch.
282 */
283
284#ifndef OPENSSL_NO_DEPRECATED
285#define BN_FLG_EXP_CONSTTIME BN_FLG_CONSTTIME /* deprecated name for the flag */
286 /* avoid leaking exponent information through timings
287 * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
288#endif
289
290#ifndef OPENSSL_NO_DEPRECATED
291#define BN_FLG_FREE 0x8000 /* used for debuging */
292#endif
293#define BN_set_flags(b,n) ((b)->flags|=(n))
294#define BN_get_flags(b,n) ((b)->flags&(n))
295
296/* get a clone of a BIGNUM with changed flags, for *temporary* use only
297 * (the two BIGNUMs cannot not be used in parallel!) */
298#define BN_with_flags(dest,b,n) ((dest)->d=(b)->d, \
299 (dest)->top=(b)->top, \
300 (dest)->dmax=(b)->dmax, \
301 (dest)->neg=(b)->neg, \
302 (dest)->flags=(((dest)->flags & BN_FLG_MALLOCED) \
303 | ((b)->flags & ~BN_FLG_MALLOCED) \
304 | BN_FLG_STATIC_DATA \
305 | (n)))
306
307/* Already declared in ossl_typ.h */
308#if 0
309typedef struct bignum_st BIGNUM;
310/* Used for temp variables (declaration hidden in bn_lcl.h) */
311typedef struct bignum_ctx BN_CTX;
312typedef struct bn_blinding_st BN_BLINDING;
313typedef struct bn_mont_ctx_st BN_MONT_CTX;
314typedef struct bn_recp_ctx_st BN_RECP_CTX;
315typedef struct bn_gencb_st BN_GENCB;
316#endif
317
318struct bignum_st
319 {
320 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
321 int top; /* Index of last used d +1. */
322 /* The next are internal book keeping for bn_expand. */
323 int dmax; /* Size of the d array. */
324 int neg; /* one if the number is negative */
325 int flags;
326 };
327
328/* Used for montgomery multiplication */
329struct bn_mont_ctx_st
330 {
331 int ri; /* number of bits in R */
332 BIGNUM RR; /* used to convert to montgomery form */
333 BIGNUM N; /* The modulus */
334 BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
335 * (Ni is only stored for bignum algorithm) */
336 BN_ULONG n0[2];/* least significant word(s) of Ni;
337 (type changed with 0.9.9, was "BN_ULONG n0;" before) */
338 int flags;
339 };
340
341/* Used for reciprocal division/mod functions
342 * It cannot be shared between threads
343 */
344struct bn_recp_ctx_st
345 {
346 BIGNUM N; /* the divisor */
347 BIGNUM Nr; /* the reciprocal */
348 int num_bits;
349 int shift;
350 int flags;
351 };
352
353/* Used for slow "generation" functions. */
354struct bn_gencb_st
355 {
356 unsigned int ver; /* To handle binary (in)compatibility */
357 void *arg; /* callback-specific data */
358 union
359 {
360 /* if(ver==1) - handles old style callbacks */
361 void (*cb_1)(int, int, void *);
362 /* if(ver==2) - new callback style */
363 int (*cb_2)(int, int, BN_GENCB *);
364 } cb;
365 };
366/* Wrapper function to make using BN_GENCB easier, */
367int BN_GENCB_call(BN_GENCB *cb, int a, int b);
368/* Macro to populate a BN_GENCB structure with an "old"-style callback */
369#define BN_GENCB_set_old(gencb, callback, cb_arg) { \
370 BN_GENCB *tmp_gencb = (gencb); \
371 tmp_gencb->ver = 1; \
372 tmp_gencb->arg = (cb_arg); \
373 tmp_gencb->cb.cb_1 = (callback); }
374/* Macro to populate a BN_GENCB structure with a "new"-style callback */
375#define BN_GENCB_set(gencb, callback, cb_arg) { \
376 BN_GENCB *tmp_gencb = (gencb); \
377 tmp_gencb->ver = 2; \
378 tmp_gencb->arg = (cb_arg); \
379 tmp_gencb->cb.cb_2 = (callback); }
380
381#define BN_prime_checks 0 /* default: select number of iterations
382 based on the size of the number */
383
384/* number of Miller-Rabin iterations for an error rate of less than 2^-80
385 * for random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook
386 * of Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996];
387 * original paper: Damgaard, Landrock, Pomerance: Average case error estimates
388 * for the strong probable prime test. -- Math. Comp. 61 (1993) 177-194) */
389#define BN_prime_checks_for_size(b) ((b) >= 1300 ? 2 : \
390 (b) >= 850 ? 3 : \
391 (b) >= 650 ? 4 : \
392 (b) >= 550 ? 5 : \
393 (b) >= 450 ? 6 : \
394 (b) >= 400 ? 7 : \
395 (b) >= 350 ? 8 : \
396 (b) >= 300 ? 9 : \
397 (b) >= 250 ? 12 : \
398 (b) >= 200 ? 15 : \
399 (b) >= 150 ? 18 : \
400 /* b >= 100 */ 27)
401
402#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
403
404/* Note that BN_abs_is_word didn't work reliably for w == 0 until 0.9.8 */
405#define BN_abs_is_word(a,w) ((((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) || \
406 (((w) == 0) && ((a)->top == 0)))
407#define BN_is_zero(a) ((a)->top == 0)
408#define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg)
409#define BN_is_word(a,w) (BN_abs_is_word((a),(w)) && (!(w) || !(a)->neg))
410#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
411
412#define BN_one(a) (BN_set_word((a),1))
413#define BN_zero_ex(a) \
414 do { \
415 BIGNUM *_tmp_bn = (a); \
416 _tmp_bn->top = 0; \
417 _tmp_bn->neg = 0; \
418 } while(0)
419#ifdef OPENSSL_NO_DEPRECATED
420#define BN_zero(a) BN_zero_ex(a)
421#else
422#define BN_zero(a) (BN_set_word((a),0))
423#endif
424
425const BIGNUM *BN_value_one(void);
426char * BN_options(void);
427BN_CTX *BN_CTX_new(void);
428#ifndef OPENSSL_NO_DEPRECATED
429void BN_CTX_init(BN_CTX *c);
430#endif
431void BN_CTX_free(BN_CTX *c);
432void BN_CTX_start(BN_CTX *ctx);
433BIGNUM *BN_CTX_get(BN_CTX *ctx);
434void BN_CTX_end(BN_CTX *ctx);
435int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
436int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom);
437int BN_rand_range(BIGNUM *rnd, const BIGNUM *range);
438int BN_pseudo_rand_range(BIGNUM *rnd, const BIGNUM *range);
439int BN_num_bits(const BIGNUM *a);
440int BN_num_bits_word(BN_ULONG);
441BIGNUM *BN_new(void);
442void BN_init(BIGNUM *);
443void BN_clear_free(BIGNUM *a);
444BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
445void BN_swap(BIGNUM *a, BIGNUM *b);
446BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret);
447int BN_bn2bin(const BIGNUM *a, unsigned char *to);
448BIGNUM *BN_mpi2bn(const unsigned char *s,int len,BIGNUM *ret);
449int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
450int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
451int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
452int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
453int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
454int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
455int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx);
456/** BN_set_negative sets sign of a BIGNUM
457 * \param b pointer to the BIGNUM object
458 * \param n 0 if the BIGNUM b should be positive and a value != 0 otherwise
459 */
460void BN_set_negative(BIGNUM *b, int n);
461/** BN_is_negative returns 1 if the BIGNUM is negative
462 * \param a pointer to the BIGNUM object
463 * \return 1 if a < 0 and 0 otherwise
464 */
465#define BN_is_negative(a) ((a)->neg != 0)
466
467int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
468 BN_CTX *ctx);
469#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
470int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
471int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
472int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
473int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
474int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
475int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
476 const BIGNUM *m, BN_CTX *ctx);
477int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
478int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
479int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m);
480int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx);
481int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m);
482
483BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w);
484BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
485int BN_mul_word(BIGNUM *a, BN_ULONG w);
486int BN_add_word(BIGNUM *a, BN_ULONG w);
487int BN_sub_word(BIGNUM *a, BN_ULONG w);
488int BN_set_word(BIGNUM *a, BN_ULONG w);
489BN_ULONG BN_get_word(const BIGNUM *a);
490
491int BN_cmp(const BIGNUM *a, const BIGNUM *b);
492void BN_free(BIGNUM *a);
493int BN_is_bit_set(const BIGNUM *a, int n);
494int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
495int BN_lshift1(BIGNUM *r, const BIGNUM *a);
496int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,BN_CTX *ctx);
497
498int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
499 const BIGNUM *m,BN_CTX *ctx);
500int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
501 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
502int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
503 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont);
504int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
505 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
506int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1,
507 const BIGNUM *a2, const BIGNUM *p2,const BIGNUM *m,
508 BN_CTX *ctx,BN_MONT_CTX *m_ctx);
509int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
510 const BIGNUM *m,BN_CTX *ctx);
511
512int BN_mask_bits(BIGNUM *a,int n);
513#ifndef OPENSSL_NO_FP_API
514int BN_print_fp(FILE *fp, const BIGNUM *a);
515#endif
516#ifdef HEADER_BIO_H
517int BN_print(BIO *fp, const BIGNUM *a);
518#else
519int BN_print(void *fp, const BIGNUM *a);
520#endif
521int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx);
522int BN_rshift(BIGNUM *r, const BIGNUM *a, int n);
523int BN_rshift1(BIGNUM *r, const BIGNUM *a);
524void BN_clear(BIGNUM *a);
525BIGNUM *BN_dup(const BIGNUM *a);
526int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
527int BN_set_bit(BIGNUM *a, int n);
528int BN_clear_bit(BIGNUM *a, int n);
529char * BN_bn2hex(const BIGNUM *a);
530char * BN_bn2dec(const BIGNUM *a);
531int BN_hex2bn(BIGNUM **a, const char *str);
532int BN_dec2bn(BIGNUM **a, const char *str);
533int BN_asc2bn(BIGNUM **a, const char *str);
534int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
535int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
536BIGNUM *BN_mod_inverse(BIGNUM *ret,
537 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
538BIGNUM *BN_mod_sqrt(BIGNUM *ret,
539 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
540
541/* Deprecated versions */
542#ifndef OPENSSL_NO_DEPRECATED
543BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
544 const BIGNUM *add, const BIGNUM *rem,
545 void (*callback)(int,int,void *),void *cb_arg);
546int BN_is_prime(const BIGNUM *p,int nchecks,
547 void (*callback)(int,int,void *),
548 BN_CTX *ctx,void *cb_arg);
549int BN_is_prime_fasttest(const BIGNUM *p,int nchecks,
550 void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg,
551 int do_trial_division);
552#endif /* !defined(OPENSSL_NO_DEPRECATED) */
553
554/* Newer versions */
555int BN_generate_prime_ex(BIGNUM *ret,int bits,int safe, const BIGNUM *add,
556 const BIGNUM *rem, BN_GENCB *cb);
557int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
558int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
559 int do_trial_division, BN_GENCB *cb);
560
561BN_MONT_CTX *BN_MONT_CTX_new(void );
562void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
563int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
564 BN_MONT_CTX *mont, BN_CTX *ctx);
565#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
566 (r),(a),&((mont)->RR),(mont),(ctx))
567int BN_from_montgomery(BIGNUM *r,const BIGNUM *a,
568 BN_MONT_CTX *mont, BN_CTX *ctx);
569void BN_MONT_CTX_free(BN_MONT_CTX *mont);
570int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *mod,BN_CTX *ctx);
571BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
572BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
573 const BIGNUM *mod, BN_CTX *ctx);
574
575/* BN_BLINDING flags */
576#define BN_BLINDING_NO_UPDATE 0x00000001
577#define BN_BLINDING_NO_RECREATE 0x00000002
578
579BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod);
580void BN_BLINDING_free(BN_BLINDING *b);
581int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
582int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
583int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
584int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *);
585int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *);
586#ifndef OPENSSL_NO_DEPRECATED
587unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *);
588void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long);
589#endif
590CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *);
591unsigned long BN_BLINDING_get_flags(const BN_BLINDING *);
592void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long);
593BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
594 const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
595 int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
596 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
597 BN_MONT_CTX *m_ctx);
598
599#ifndef OPENSSL_NO_DEPRECATED
600void BN_set_params(int mul,int high,int low,int mont);
601int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
602#endif
603
604void BN_RECP_CTX_init(BN_RECP_CTX *recp);
605BN_RECP_CTX *BN_RECP_CTX_new(void);
606void BN_RECP_CTX_free(BN_RECP_CTX *recp);
607int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx);
608int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
609 BN_RECP_CTX *recp,BN_CTX *ctx);
610int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
611 const BIGNUM *m, BN_CTX *ctx);
612int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
613 BN_RECP_CTX *recp, BN_CTX *ctx);
614
615/* Functions for arithmetic over binary polynomials represented by BIGNUMs.
616 *
617 * The BIGNUM::neg property of BIGNUMs representing binary polynomials is
618 * ignored.
619 *
620 * Note that input arguments are not const so that their bit arrays can
621 * be expanded to the appropriate size if needed.
622 */
623
624int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); /*r = a + b*/
625#define BN_GF2m_sub(r, a, b) BN_GF2m_add(r, a, b)
626int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p); /*r=a mod p*/
627int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
628 const BIGNUM *p, BN_CTX *ctx); /* r = (a * b) mod p */
629int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
630 BN_CTX *ctx); /* r = (a * a) mod p */
631int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *b, const BIGNUM *p,
632 BN_CTX *ctx); /* r = (1 / b) mod p */
633int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
634 const BIGNUM *p, BN_CTX *ctx); /* r = (a / b) mod p */
635int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
636 const BIGNUM *p, BN_CTX *ctx); /* r = (a ^ b) mod p */
637int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
638 BN_CTX *ctx); /* r = sqrt(a) mod p */
639int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
640 BN_CTX *ctx); /* r^2 + r = a mod p */
641#define BN_GF2m_cmp(a, b) BN_ucmp((a), (b))
642/* Some functions allow for representation of the irreducible polynomials
643 * as an unsigned int[], say p. The irreducible f(t) is then of the form:
644 * t^p[0] + t^p[1] + ... + t^p[k]
645 * where m = p[0] > p[1] > ... > p[k] = 0.
646 */
647int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]);
648 /* r = a mod p */
649int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
650 const int p[], BN_CTX *ctx); /* r = (a * b) mod p */
651int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],
652 BN_CTX *ctx); /* r = (a * a) mod p */
653int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[],
654 BN_CTX *ctx); /* r = (1 / b) mod p */
655int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
656 const int p[], BN_CTX *ctx); /* r = (a / b) mod p */
657int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
658 const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */
659int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a,
660 const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */
661int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a,
662 const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */
663int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max);
664int BN_GF2m_arr2poly(const int p[], BIGNUM *a);
665
666/* faster mod functions for the 'NIST primes'
667 * 0 <= a < p^2 */
668int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
669int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
670int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
671int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
672int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
673
674const BIGNUM *BN_get0_nist_prime_192(void);
675const BIGNUM *BN_get0_nist_prime_224(void);
676const BIGNUM *BN_get0_nist_prime_256(void);
677const BIGNUM *BN_get0_nist_prime_384(void);
678const BIGNUM *BN_get0_nist_prime_521(void);
679
680/* library internal functions */
681
682#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
683 (a):bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2))
684#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
685BIGNUM *bn_expand2(BIGNUM *a, int words);
686#ifndef OPENSSL_NO_DEPRECATED
687BIGNUM *bn_dup_expand(const BIGNUM *a, int words); /* unused */
688#endif
689
690/* Bignum consistency macros
691 * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from
692 * bignum data after direct manipulations on the data. There is also an
693 * "internal" macro, bn_check_top(), for verifying that there are no leading
694 * zeroes. Unfortunately, some auditing is required due to the fact that
695 * bn_fix_top() has become an overabused duct-tape because bignum data is
696 * occasionally passed around in an inconsistent state. So the following
697 * changes have been made to sort this out;
698 * - bn_fix_top()s implementation has been moved to bn_correct_top()
699 * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and
700 * bn_check_top() is as before.
701 * - if BN_DEBUG *is* defined;
702 * - bn_check_top() tries to pollute unused words even if the bignum 'top' is
703 * consistent. (ed: only if BN_DEBUG_RAND is defined)
704 * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything.
705 * The idea is to have debug builds flag up inconsistent bignums when they
706 * occur. If that occurs in a bn_fix_top(), we examine the code in question; if
707 * the use of bn_fix_top() was appropriate (ie. it follows directly after code
708 * that manipulates the bignum) it is converted to bn_correct_top(), and if it
709 * was not appropriate, we convert it permanently to bn_check_top() and track
710 * down the cause of the bug. Eventually, no internal code should be using the
711 * bn_fix_top() macro. External applications and libraries should try this with
712 * their own code too, both in terms of building against the openssl headers
713 * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it
714 * defined. This not only improves external code, it provides more test
715 * coverage for openssl's own code.
716 */
717
718#ifdef BN_DEBUG
719
720/* We only need assert() when debugging */
721#include <assert.h>
722
723#ifdef BN_DEBUG_RAND
724/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */
725#ifndef RAND_pseudo_bytes
726int RAND_pseudo_bytes(unsigned char *buf,int num);
727#define BN_DEBUG_TRIX
728#endif
729#define bn_pollute(a) \
730 do { \
731 const BIGNUM *_bnum1 = (a); \
732 if(_bnum1->top < _bnum1->dmax) { \
733 unsigned char _tmp_char; \
734 /* We cast away const without the compiler knowing, any \
735 * *genuinely* constant variables that aren't mutable \
736 * wouldn't be constructed with top!=dmax. */ \
737 BN_ULONG *_not_const; \
738 memcpy(&_not_const, &_bnum1->d, sizeof(BN_ULONG*)); \
739 RAND_pseudo_bytes(&_tmp_char, 1); \
740 memset((unsigned char *)(_not_const + _bnum1->top), _tmp_char, \
741 (_bnum1->dmax - _bnum1->top) * sizeof(BN_ULONG)); \
742 } \
743 } while(0)
744#ifdef BN_DEBUG_TRIX
745#undef RAND_pseudo_bytes
746#endif
747#else
748#define bn_pollute(a)
749#endif
750#define bn_check_top(a) \
751 do { \
752 const BIGNUM *_bnum2 = (a); \
753 if (_bnum2 != NULL) { \
754 assert((_bnum2->top == 0) || \
755 (_bnum2->d[_bnum2->top - 1] != 0)); \
756 bn_pollute(_bnum2); \
757 } \
758 } while(0)
759
760#define bn_fix_top(a) bn_check_top(a)
761
762#else /* !BN_DEBUG */
763
764#define bn_pollute(a)
765#define bn_check_top(a)
766#define bn_fix_top(a) bn_correct_top(a)
767
768#endif
769
770#define bn_correct_top(a) \
771 { \
772 BN_ULONG *ftl; \
773 int tmp_top = (a)->top; \
774 if (tmp_top > 0) \
775 { \
776 for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \
777 if (*(ftl--)) break; \
778 (a)->top = tmp_top; \
779 } \
780 bn_pollute(a); \
781 }
782
783BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
784BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
785void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
786BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
787BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
788BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
789
790/* Primes from RFC 2409 */
791BIGNUM *get_rfc2409_prime_768(BIGNUM *bn);
792BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn);
793
794/* Primes from RFC 3526 */
795BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn);
796BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn);
797BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn);
798BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn);
799BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn);
800BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn);
801
802int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
803
804/* BEGIN ERROR CODES */
805/* The following lines are auto generated by the script mkerr.pl. Any changes
806 * made after this point may be overwritten when the script is next run.
807 */
808void ERR_load_BN_strings(void);
809
810/* Error codes for the BN functions. */
811
812/* Function codes. */
813#define BN_F_BNRAND 127
814#define BN_F_BN_BLINDING_CONVERT_EX 100
815#define BN_F_BN_BLINDING_CREATE_PARAM 128
816#define BN_F_BN_BLINDING_INVERT_EX 101
817#define BN_F_BN_BLINDING_NEW 102
818#define BN_F_BN_BLINDING_UPDATE 103
819#define BN_F_BN_BN2DEC 104
820#define BN_F_BN_BN2HEX 105
821#define BN_F_BN_CTX_GET 116
822#define BN_F_BN_CTX_NEW 106
823#define BN_F_BN_CTX_START 129
824#define BN_F_BN_DIV 107
825#define BN_F_BN_DIV_NO_BRANCH 138
826#define BN_F_BN_DIV_RECP 130
827#define BN_F_BN_EXP 123
828#define BN_F_BN_EXPAND2 108
829#define BN_F_BN_EXPAND_INTERNAL 120
830#define BN_F_BN_GF2M_MOD 131
831#define BN_F_BN_GF2M_MOD_EXP 132
832#define BN_F_BN_GF2M_MOD_MUL 133
833#define BN_F_BN_GF2M_MOD_SOLVE_QUAD 134
834#define BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR 135
835#define BN_F_BN_GF2M_MOD_SQR 136
836#define BN_F_BN_GF2M_MOD_SQRT 137
837#define BN_F_BN_MOD_EXP2_MONT 118
838#define BN_F_BN_MOD_EXP_MONT 109
839#define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124
840#define BN_F_BN_MOD_EXP_MONT_WORD 117
841#define BN_F_BN_MOD_EXP_RECP 125
842#define BN_F_BN_MOD_EXP_SIMPLE 126
843#define BN_F_BN_MOD_INVERSE 110
844#define BN_F_BN_MOD_INVERSE_NO_BRANCH 139
845#define BN_F_BN_MOD_LSHIFT_QUICK 119
846#define BN_F_BN_MOD_MUL_RECIPROCAL 111
847#define BN_F_BN_MOD_SQRT 121
848#define BN_F_BN_MPI2BN 112
849#define BN_F_BN_NEW 113
850#define BN_F_BN_RAND 114
851#define BN_F_BN_RAND_RANGE 122
852#define BN_F_BN_USUB 115
853
854/* Reason codes. */
855#define BN_R_ARG2_LT_ARG3 100
856#define BN_R_BAD_RECIPROCAL 101
857#define BN_R_BIGNUM_TOO_LONG 114
858#define BN_R_CALLED_WITH_EVEN_MODULUS 102
859#define BN_R_DIV_BY_ZERO 103
860#define BN_R_ENCODING_ERROR 104
861#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
862#define BN_R_INPUT_NOT_REDUCED 110
863#define BN_R_INVALID_LENGTH 106
864#define BN_R_INVALID_RANGE 115
865#define BN_R_NOT_A_SQUARE 111
866#define BN_R_NOT_INITIALIZED 107
867#define BN_R_NO_INVERSE 108
868#define BN_R_NO_SOLUTION 116
869#define BN_R_P_IS_NOT_PRIME 112
870#define BN_R_TOO_MANY_ITERATIONS 113
871#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
872
873#ifdef __cplusplus
874}
875#endif
876#endif
diff --git a/src/lib/libcrypto/bn/bn.mul b/src/lib/libcrypto/bn/bn.mul
deleted file mode 100644
index 9728870d38..0000000000
--- a/src/lib/libcrypto/bn/bn.mul
+++ /dev/null
@@ -1,19 +0,0 @@
1We need
2
3* bn_mul_comba8
4* bn_mul_comba4
5* bn_mul_normal
6* bn_mul_recursive
7
8* bn_sqr_comba8
9* bn_sqr_comba4
10bn_sqr_normal -> BN_sqr
11* bn_sqr_recursive
12
13* bn_mul_low_recursive
14* bn_mul_low_normal
15* bn_mul_high
16
17* bn_mul_part_recursive # symetric but not power of 2
18
19bn_mul_asymetric_recursive # uneven, but do the chop up.
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
deleted file mode 100644
index 9405163706..0000000000
--- a/src/lib/libcrypto/bn/bn_add.c
+++ /dev/null
@@ -1,313 +0,0 @@
1/* crypto/bn/bn_add.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r can == a or b */
64int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
65 {
66 const BIGNUM *tmp;
67 int a_neg = a->neg, ret;
68
69 bn_check_top(a);
70 bn_check_top(b);
71
72 /* a + b a+b
73 * a + -b a-b
74 * -a + b b-a
75 * -a + -b -(a+b)
76 */
77 if (a_neg ^ b->neg)
78 {
79 /* only one is negative */
80 if (a_neg)
81 { tmp=a; a=b; b=tmp; }
82
83 /* we are now a - b */
84
85 if (BN_ucmp(a,b) < 0)
86 {
87 if (!BN_usub(r,b,a)) return(0);
88 r->neg=1;
89 }
90 else
91 {
92 if (!BN_usub(r,a,b)) return(0);
93 r->neg=0;
94 }
95 return(1);
96 }
97
98 ret = BN_uadd(r,a,b);
99 r->neg = a_neg;
100 bn_check_top(r);
101 return ret;
102 }
103
104/* unsigned add of b to a */
105int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
106 {
107 int max,min,dif;
108 BN_ULONG *ap,*bp,*rp,carry,t1,t2;
109 const BIGNUM *tmp;
110
111 bn_check_top(a);
112 bn_check_top(b);
113
114 if (a->top < b->top)
115 { tmp=a; a=b; b=tmp; }
116 max = a->top;
117 min = b->top;
118 dif = max - min;
119
120 if (bn_wexpand(r,max+1) == NULL)
121 return 0;
122
123 r->top=max;
124
125
126 ap=a->d;
127 bp=b->d;
128 rp=r->d;
129
130 carry=bn_add_words(rp,ap,bp,min);
131 rp+=min;
132 ap+=min;
133 bp+=min;
134
135 if (carry)
136 {
137 while (dif)
138 {
139 dif--;
140 t1 = *(ap++);
141 t2 = (t1+1) & BN_MASK2;
142 *(rp++) = t2;
143 if (t2)
144 {
145 carry=0;
146 break;
147 }
148 }
149 if (carry)
150 {
151 /* carry != 0 => dif == 0 */
152 *rp = 1;
153 r->top++;
154 }
155 }
156 if (dif && rp != ap)
157 while (dif--)
158 /* copy remaining words if ap != rp */
159 *(rp++) = *(ap++);
160 r->neg = 0;
161 bn_check_top(r);
162 return 1;
163 }
164
165/* unsigned subtraction of b from a, a must be larger than b. */
166int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
167 {
168 int max,min,dif;
169 register BN_ULONG t1,t2,*ap,*bp,*rp;
170 int i,carry;
171#if defined(IRIX_CC_BUG) && !defined(LINT)
172 int dummy;
173#endif
174
175 bn_check_top(a);
176 bn_check_top(b);
177
178 max = a->top;
179 min = b->top;
180 dif = max - min;
181
182 if (dif < 0) /* hmm... should not be happening */
183 {
184 BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
185 return(0);
186 }
187
188 if (bn_wexpand(r,max) == NULL) return(0);
189
190 ap=a->d;
191 bp=b->d;
192 rp=r->d;
193
194#if 1
195 carry=0;
196 for (i = min; i != 0; i--)
197 {
198 t1= *(ap++);
199 t2= *(bp++);
200 if (carry)
201 {
202 carry=(t1 <= t2);
203 t1=(t1-t2-1)&BN_MASK2;
204 }
205 else
206 {
207 carry=(t1 < t2);
208 t1=(t1-t2)&BN_MASK2;
209 }
210#if defined(IRIX_CC_BUG) && !defined(LINT)
211 dummy=t1;
212#endif
213 *(rp++)=t1&BN_MASK2;
214 }
215#else
216 carry=bn_sub_words(rp,ap,bp,min);
217 ap+=min;
218 bp+=min;
219 rp+=min;
220#endif
221 if (carry) /* subtracted */
222 {
223 if (!dif)
224 /* error: a < b */
225 return 0;
226 while (dif)
227 {
228 dif--;
229 t1 = *(ap++);
230 t2 = (t1-1)&BN_MASK2;
231 *(rp++) = t2;
232 if (t1)
233 break;
234 }
235 }
236#if 0
237 memcpy(rp,ap,sizeof(*rp)*(max-i));
238#else
239 if (rp != ap)
240 {
241 for (;;)
242 {
243 if (!dif--) break;
244 rp[0]=ap[0];
245 if (!dif--) break;
246 rp[1]=ap[1];
247 if (!dif--) break;
248 rp[2]=ap[2];
249 if (!dif--) break;
250 rp[3]=ap[3];
251 rp+=4;
252 ap+=4;
253 }
254 }
255#endif
256
257 r->top=max;
258 r->neg=0;
259 bn_correct_top(r);
260 return(1);
261 }
262
263int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
264 {
265 int max;
266 int add=0,neg=0;
267 const BIGNUM *tmp;
268
269 bn_check_top(a);
270 bn_check_top(b);
271
272 /* a - b a-b
273 * a - -b a+b
274 * -a - b -(a+b)
275 * -a - -b b-a
276 */
277 if (a->neg)
278 {
279 if (b->neg)
280 { tmp=a; a=b; b=tmp; }
281 else
282 { add=1; neg=1; }
283 }
284 else
285 {
286 if (b->neg) { add=1; neg=0; }
287 }
288
289 if (add)
290 {
291 if (!BN_uadd(r,a,b)) return(0);
292 r->neg=neg;
293 return(1);
294 }
295
296 /* We are actually doing a - b :-) */
297
298 max=(a->top > b->top)?a->top:b->top;
299 if (bn_wexpand(r,max) == NULL) return(0);
300 if (BN_ucmp(a,b) < 0)
301 {
302 if (!BN_usub(r,b,a)) return(0);
303 r->neg=1;
304 }
305 else
306 {
307 if (!BN_usub(r,a,b)) return(0);
308 r->neg=0;
309 }
310 bn_check_top(r);
311 return(1);
312 }
313
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
deleted file mode 100644
index c43c91cc09..0000000000
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ /dev/null
@@ -1,1030 +0,0 @@
1/* crypto/bn/bn_asm.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70
71BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
72 {
73 BN_ULONG c1=0;
74
75 assert(num >= 0);
76 if (num <= 0) return(c1);
77
78#ifndef OPENSSL_SMALL_FOOTPRINT
79 while (num&~3)
80 {
81 mul_add(rp[0],ap[0],w,c1);
82 mul_add(rp[1],ap[1],w,c1);
83 mul_add(rp[2],ap[2],w,c1);
84 mul_add(rp[3],ap[3],w,c1);
85 ap+=4; rp+=4; num-=4;
86 }
87#endif
88 while (num)
89 {
90 mul_add(rp[0],ap[0],w,c1);
91 ap++; rp++; num--;
92 }
93
94 return(c1);
95 }
96
97BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
98 {
99 BN_ULONG c1=0;
100
101 assert(num >= 0);
102 if (num <= 0) return(c1);
103
104#ifndef OPENSSL_SMALL_FOOTPRINT
105 while (num&~3)
106 {
107 mul(rp[0],ap[0],w,c1);
108 mul(rp[1],ap[1],w,c1);
109 mul(rp[2],ap[2],w,c1);
110 mul(rp[3],ap[3],w,c1);
111 ap+=4; rp+=4; num-=4;
112 }
113#endif
114 while (num)
115 {
116 mul(rp[0],ap[0],w,c1);
117 ap++; rp++; num--;
118 }
119 return(c1);
120 }
121
122void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
123 {
124 assert(n >= 0);
125 if (n <= 0) return;
126
127#ifndef OPENSSL_SMALL_FOOTPRINT
128 while (n&~3)
129 {
130 sqr(r[0],r[1],a[0]);
131 sqr(r[2],r[3],a[1]);
132 sqr(r[4],r[5],a[2]);
133 sqr(r[6],r[7],a[3]);
134 a+=4; r+=8; n-=4;
135 }
136#endif
137 while (n)
138 {
139 sqr(r[0],r[1],a[0]);
140 a++; r+=2; n--;
141 }
142 }
143
144#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
145
146BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
147 {
148 BN_ULONG c=0;
149 BN_ULONG bl,bh;
150
151 assert(num >= 0);
152 if (num <= 0) return((BN_ULONG)0);
153
154 bl=LBITS(w);
155 bh=HBITS(w);
156
157#ifndef OPENSSL_SMALL_FOOTPRINT
158 while (num&~3)
159 {
160 mul_add(rp[0],ap[0],bl,bh,c);
161 mul_add(rp[1],ap[1],bl,bh,c);
162 mul_add(rp[2],ap[2],bl,bh,c);
163 mul_add(rp[3],ap[3],bl,bh,c);
164 ap+=4; rp+=4; num-=4;
165 }
166#endif
167 while (num)
168 {
169 mul_add(rp[0],ap[0],bl,bh,c);
170 ap++; rp++; num--;
171 }
172 return(c);
173 }
174
175BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
176 {
177 BN_ULONG carry=0;
178 BN_ULONG bl,bh;
179
180 assert(num >= 0);
181 if (num <= 0) return((BN_ULONG)0);
182
183 bl=LBITS(w);
184 bh=HBITS(w);
185
186#ifndef OPENSSL_SMALL_FOOTPRINT
187 while (num&~3)
188 {
189 mul(rp[0],ap[0],bl,bh,carry);
190 mul(rp[1],ap[1],bl,bh,carry);
191 mul(rp[2],ap[2],bl,bh,carry);
192 mul(rp[3],ap[3],bl,bh,carry);
193 ap+=4; rp+=4; num-=4;
194 }
195#endif
196 while (num)
197 {
198 mul(rp[0],ap[0],bl,bh,carry);
199 ap++; rp++; num--;
200 }
201 return(carry);
202 }
203
204void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
205 {
206 assert(n >= 0);
207 if (n <= 0) return;
208
209#ifndef OPENSSL_SMALL_FOOTPRINT
210 while (n&~3)
211 {
212 sqr64(r[0],r[1],a[0]);
213 sqr64(r[2],r[3],a[1]);
214 sqr64(r[4],r[5],a[2]);
215 sqr64(r[6],r[7],a[3]);
216 a+=4; r+=8; n-=4;
217 }
218#endif
219 while (n)
220 {
221 sqr64(r[0],r[1],a[0]);
222 a++; r+=2; n--;
223 }
224 }
225
226#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
227
228#if defined(BN_LLONG) && defined(BN_DIV2W)
229
230BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
231 {
232 return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
233 }
234
235#else
236
237/* Divide h,l by d and return the result. */
238/* I need to test this some more :-( */
239BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
240 {
241 BN_ULONG dh,dl,q,ret=0,th,tl,t;
242 int i,count=2;
243
244 if (d == 0) return(BN_MASK2);
245
246 i=BN_num_bits_word(d);
247 assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i));
248
249 i=BN_BITS2-i;
250 if (h >= d) h-=d;
251
252 if (i)
253 {
254 d<<=i;
255 h=(h<<i)|(l>>(BN_BITS2-i));
256 l<<=i;
257 }
258 dh=(d&BN_MASK2h)>>BN_BITS4;
259 dl=(d&BN_MASK2l);
260 for (;;)
261 {
262 if ((h>>BN_BITS4) == dh)
263 q=BN_MASK2l;
264 else
265 q=h/dh;
266
267 th=q*dh;
268 tl=dl*q;
269 for (;;)
270 {
271 t=h-th;
272 if ((t&BN_MASK2h) ||
273 ((tl) <= (
274 (t<<BN_BITS4)|
275 ((l&BN_MASK2h)>>BN_BITS4))))
276 break;
277 q--;
278 th-=dh;
279 tl-=dl;
280 }
281 t=(tl>>BN_BITS4);
282 tl=(tl<<BN_BITS4)&BN_MASK2h;
283 th+=t;
284
285 if (l < tl) th++;
286 l-=tl;
287 if (h < th)
288 {
289 h+=d;
290 q--;
291 }
292 h-=th;
293
294 if (--count == 0) break;
295
296 ret=q<<BN_BITS4;
297 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
298 l=(l&BN_MASK2l)<<BN_BITS4;
299 }
300 ret|=q;
301 return(ret);
302 }
303#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
304
305#ifdef BN_LLONG
306BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
307 {
308 BN_ULLONG ll=0;
309
310 assert(n >= 0);
311 if (n <= 0) return((BN_ULONG)0);
312
313#ifndef OPENSSL_SMALL_FOOTPRINT
314 while (n&~3)
315 {
316 ll+=(BN_ULLONG)a[0]+b[0];
317 r[0]=(BN_ULONG)ll&BN_MASK2;
318 ll>>=BN_BITS2;
319 ll+=(BN_ULLONG)a[1]+b[1];
320 r[1]=(BN_ULONG)ll&BN_MASK2;
321 ll>>=BN_BITS2;
322 ll+=(BN_ULLONG)a[2]+b[2];
323 r[2]=(BN_ULONG)ll&BN_MASK2;
324 ll>>=BN_BITS2;
325 ll+=(BN_ULLONG)a[3]+b[3];
326 r[3]=(BN_ULONG)ll&BN_MASK2;
327 ll>>=BN_BITS2;
328 a+=4; b+=4; r+=4; n-=4;
329 }
330#endif
331 while (n)
332 {
333 ll+=(BN_ULLONG)a[0]+b[0];
334 r[0]=(BN_ULONG)ll&BN_MASK2;
335 ll>>=BN_BITS2;
336 a++; b++; r++; n--;
337 }
338 return((BN_ULONG)ll);
339 }
340#else /* !BN_LLONG */
341BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
342 {
343 BN_ULONG c,l,t;
344
345 assert(n >= 0);
346 if (n <= 0) return((BN_ULONG)0);
347
348 c=0;
349#ifndef OPENSSL_SMALL_FOOTPRINT
350 while (n&~3)
351 {
352 t=a[0];
353 t=(t+c)&BN_MASK2;
354 c=(t < c);
355 l=(t+b[0])&BN_MASK2;
356 c+=(l < t);
357 r[0]=l;
358 t=a[1];
359 t=(t+c)&BN_MASK2;
360 c=(t < c);
361 l=(t+b[1])&BN_MASK2;
362 c+=(l < t);
363 r[1]=l;
364 t=a[2];
365 t=(t+c)&BN_MASK2;
366 c=(t < c);
367 l=(t+b[2])&BN_MASK2;
368 c+=(l < t);
369 r[2]=l;
370 t=a[3];
371 t=(t+c)&BN_MASK2;
372 c=(t < c);
373 l=(t+b[3])&BN_MASK2;
374 c+=(l < t);
375 r[3]=l;
376 a+=4; b+=4; r+=4; n-=4;
377 }
378#endif
379 while(n)
380 {
381 t=a[0];
382 t=(t+c)&BN_MASK2;
383 c=(t < c);
384 l=(t+b[0])&BN_MASK2;
385 c+=(l < t);
386 r[0]=l;
387 a++; b++; r++; n--;
388 }
389 return((BN_ULONG)c);
390 }
391#endif /* !BN_LLONG */
392
393BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
394 {
395 BN_ULONG t1,t2;
396 int c=0;
397
398 assert(n >= 0);
399 if (n <= 0) return((BN_ULONG)0);
400
401#ifndef OPENSSL_SMALL_FOOTPRINT
402 while (n&~3)
403 {
404 t1=a[0]; t2=b[0];
405 r[0]=(t1-t2-c)&BN_MASK2;
406 if (t1 != t2) c=(t1 < t2);
407 t1=a[1]; t2=b[1];
408 r[1]=(t1-t2-c)&BN_MASK2;
409 if (t1 != t2) c=(t1 < t2);
410 t1=a[2]; t2=b[2];
411 r[2]=(t1-t2-c)&BN_MASK2;
412 if (t1 != t2) c=(t1 < t2);
413 t1=a[3]; t2=b[3];
414 r[3]=(t1-t2-c)&BN_MASK2;
415 if (t1 != t2) c=(t1 < t2);
416 a+=4; b+=4; r+=4; n-=4;
417 }
418#endif
419 while (n)
420 {
421 t1=a[0]; t2=b[0];
422 r[0]=(t1-t2-c)&BN_MASK2;
423 if (t1 != t2) c=(t1 < t2);
424 a++; b++; r++; n--;
425 }
426 return(c);
427 }
428
429#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
430
431#undef bn_mul_comba8
432#undef bn_mul_comba4
433#undef bn_sqr_comba8
434#undef bn_sqr_comba4
435
436/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
437/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
438/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
439/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
440
441#ifdef BN_LLONG
442#define mul_add_c(a,b,c0,c1,c2) \
443 t=(BN_ULLONG)a*b; \
444 t1=(BN_ULONG)Lw(t); \
445 t2=(BN_ULONG)Hw(t); \
446 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
447 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
448
449#define mul_add_c2(a,b,c0,c1,c2) \
450 t=(BN_ULLONG)a*b; \
451 tt=(t+t)&BN_MASK; \
452 if (tt < t) c2++; \
453 t1=(BN_ULONG)Lw(tt); \
454 t2=(BN_ULONG)Hw(tt); \
455 c0=(c0+t1)&BN_MASK2; \
456 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
457 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
458
459#define sqr_add_c(a,i,c0,c1,c2) \
460 t=(BN_ULLONG)a[i]*a[i]; \
461 t1=(BN_ULONG)Lw(t); \
462 t2=(BN_ULONG)Hw(t); \
463 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
464 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
465
466#define sqr_add_c2(a,i,j,c0,c1,c2) \
467 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
468
469#elif defined(BN_UMULT_LOHI)
470
471#define mul_add_c(a,b,c0,c1,c2) { \
472 BN_ULONG ta=(a),tb=(b); \
473 BN_UMULT_LOHI(t1,t2,ta,tb); \
474 c0 += t1; t2 += (c0<t1)?1:0; \
475 c1 += t2; c2 += (c1<t2)?1:0; \
476 }
477
478#define mul_add_c2(a,b,c0,c1,c2) { \
479 BN_ULONG ta=(a),tb=(b),t0; \
480 BN_UMULT_LOHI(t0,t1,ta,tb); \
481 t2 = t1+t1; c2 += (t2<t1)?1:0; \
482 t1 = t0+t0; t2 += (t1<t0)?1:0; \
483 c0 += t1; t2 += (c0<t1)?1:0; \
484 c1 += t2; c2 += (c1<t2)?1:0; \
485 }
486
487#define sqr_add_c(a,i,c0,c1,c2) { \
488 BN_ULONG ta=(a)[i]; \
489 BN_UMULT_LOHI(t1,t2,ta,ta); \
490 c0 += t1; t2 += (c0<t1)?1:0; \
491 c1 += t2; c2 += (c1<t2)?1:0; \
492 }
493
494#define sqr_add_c2(a,i,j,c0,c1,c2) \
495 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
496
497#elif defined(BN_UMULT_HIGH)
498
499#define mul_add_c(a,b,c0,c1,c2) { \
500 BN_ULONG ta=(a),tb=(b); \
501 t1 = ta * tb; \
502 t2 = BN_UMULT_HIGH(ta,tb); \
503 c0 += t1; t2 += (c0<t1)?1:0; \
504 c1 += t2; c2 += (c1<t2)?1:0; \
505 }
506
507#define mul_add_c2(a,b,c0,c1,c2) { \
508 BN_ULONG ta=(a),tb=(b),t0; \
509 t1 = BN_UMULT_HIGH(ta,tb); \
510 t0 = ta * tb; \
511 t2 = t1+t1; c2 += (t2<t1)?1:0; \
512 t1 = t0+t0; t2 += (t1<t0)?1:0; \
513 c0 += t1; t2 += (c0<t1)?1:0; \
514 c1 += t2; c2 += (c1<t2)?1:0; \
515 }
516
517#define sqr_add_c(a,i,c0,c1,c2) { \
518 BN_ULONG ta=(a)[i]; \
519 t1 = ta * ta; \
520 t2 = BN_UMULT_HIGH(ta,ta); \
521 c0 += t1; t2 += (c0<t1)?1:0; \
522 c1 += t2; c2 += (c1<t2)?1:0; \
523 }
524
525#define sqr_add_c2(a,i,j,c0,c1,c2) \
526 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
527
528#else /* !BN_LLONG */
529#define mul_add_c(a,b,c0,c1,c2) \
530 t1=LBITS(a); t2=HBITS(a); \
531 bl=LBITS(b); bh=HBITS(b); \
532 mul64(t1,t2,bl,bh); \
533 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
534 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
535
536#define mul_add_c2(a,b,c0,c1,c2) \
537 t1=LBITS(a); t2=HBITS(a); \
538 bl=LBITS(b); bh=HBITS(b); \
539 mul64(t1,t2,bl,bh); \
540 if (t2 & BN_TBIT) c2++; \
541 t2=(t2+t2)&BN_MASK2; \
542 if (t1 & BN_TBIT) t2++; \
543 t1=(t1+t1)&BN_MASK2; \
544 c0=(c0+t1)&BN_MASK2; \
545 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
546 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
547
548#define sqr_add_c(a,i,c0,c1,c2) \
549 sqr64(t1,t2,(a)[i]); \
550 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
551 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
552
553#define sqr_add_c2(a,i,j,c0,c1,c2) \
554 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
555#endif /* !BN_LLONG */
556
557void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
558 {
559#ifdef BN_LLONG
560 BN_ULLONG t;
561#else
562 BN_ULONG bl,bh;
563#endif
564 BN_ULONG t1,t2;
565 BN_ULONG c1,c2,c3;
566
567 c1=0;
568 c2=0;
569 c3=0;
570 mul_add_c(a[0],b[0],c1,c2,c3);
571 r[0]=c1;
572 c1=0;
573 mul_add_c(a[0],b[1],c2,c3,c1);
574 mul_add_c(a[1],b[0],c2,c3,c1);
575 r[1]=c2;
576 c2=0;
577 mul_add_c(a[2],b[0],c3,c1,c2);
578 mul_add_c(a[1],b[1],c3,c1,c2);
579 mul_add_c(a[0],b[2],c3,c1,c2);
580 r[2]=c3;
581 c3=0;
582 mul_add_c(a[0],b[3],c1,c2,c3);
583 mul_add_c(a[1],b[2],c1,c2,c3);
584 mul_add_c(a[2],b[1],c1,c2,c3);
585 mul_add_c(a[3],b[0],c1,c2,c3);
586 r[3]=c1;
587 c1=0;
588 mul_add_c(a[4],b[0],c2,c3,c1);
589 mul_add_c(a[3],b[1],c2,c3,c1);
590 mul_add_c(a[2],b[2],c2,c3,c1);
591 mul_add_c(a[1],b[3],c2,c3,c1);
592 mul_add_c(a[0],b[4],c2,c3,c1);
593 r[4]=c2;
594 c2=0;
595 mul_add_c(a[0],b[5],c3,c1,c2);
596 mul_add_c(a[1],b[4],c3,c1,c2);
597 mul_add_c(a[2],b[3],c3,c1,c2);
598 mul_add_c(a[3],b[2],c3,c1,c2);
599 mul_add_c(a[4],b[1],c3,c1,c2);
600 mul_add_c(a[5],b[0],c3,c1,c2);
601 r[5]=c3;
602 c3=0;
603 mul_add_c(a[6],b[0],c1,c2,c3);
604 mul_add_c(a[5],b[1],c1,c2,c3);
605 mul_add_c(a[4],b[2],c1,c2,c3);
606 mul_add_c(a[3],b[3],c1,c2,c3);
607 mul_add_c(a[2],b[4],c1,c2,c3);
608 mul_add_c(a[1],b[5],c1,c2,c3);
609 mul_add_c(a[0],b[6],c1,c2,c3);
610 r[6]=c1;
611 c1=0;
612 mul_add_c(a[0],b[7],c2,c3,c1);
613 mul_add_c(a[1],b[6],c2,c3,c1);
614 mul_add_c(a[2],b[5],c2,c3,c1);
615 mul_add_c(a[3],b[4],c2,c3,c1);
616 mul_add_c(a[4],b[3],c2,c3,c1);
617 mul_add_c(a[5],b[2],c2,c3,c1);
618 mul_add_c(a[6],b[1],c2,c3,c1);
619 mul_add_c(a[7],b[0],c2,c3,c1);
620 r[7]=c2;
621 c2=0;
622 mul_add_c(a[7],b[1],c3,c1,c2);
623 mul_add_c(a[6],b[2],c3,c1,c2);
624 mul_add_c(a[5],b[3],c3,c1,c2);
625 mul_add_c(a[4],b[4],c3,c1,c2);
626 mul_add_c(a[3],b[5],c3,c1,c2);
627 mul_add_c(a[2],b[6],c3,c1,c2);
628 mul_add_c(a[1],b[7],c3,c1,c2);
629 r[8]=c3;
630 c3=0;
631 mul_add_c(a[2],b[7],c1,c2,c3);
632 mul_add_c(a[3],b[6],c1,c2,c3);
633 mul_add_c(a[4],b[5],c1,c2,c3);
634 mul_add_c(a[5],b[4],c1,c2,c3);
635 mul_add_c(a[6],b[3],c1,c2,c3);
636 mul_add_c(a[7],b[2],c1,c2,c3);
637 r[9]=c1;
638 c1=0;
639 mul_add_c(a[7],b[3],c2,c3,c1);
640 mul_add_c(a[6],b[4],c2,c3,c1);
641 mul_add_c(a[5],b[5],c2,c3,c1);
642 mul_add_c(a[4],b[6],c2,c3,c1);
643 mul_add_c(a[3],b[7],c2,c3,c1);
644 r[10]=c2;
645 c2=0;
646 mul_add_c(a[4],b[7],c3,c1,c2);
647 mul_add_c(a[5],b[6],c3,c1,c2);
648 mul_add_c(a[6],b[5],c3,c1,c2);
649 mul_add_c(a[7],b[4],c3,c1,c2);
650 r[11]=c3;
651 c3=0;
652 mul_add_c(a[7],b[5],c1,c2,c3);
653 mul_add_c(a[6],b[6],c1,c2,c3);
654 mul_add_c(a[5],b[7],c1,c2,c3);
655 r[12]=c1;
656 c1=0;
657 mul_add_c(a[6],b[7],c2,c3,c1);
658 mul_add_c(a[7],b[6],c2,c3,c1);
659 r[13]=c2;
660 c2=0;
661 mul_add_c(a[7],b[7],c3,c1,c2);
662 r[14]=c3;
663 r[15]=c1;
664 }
665
666void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
667 {
668#ifdef BN_LLONG
669 BN_ULLONG t;
670#else
671 BN_ULONG bl,bh;
672#endif
673 BN_ULONG t1,t2;
674 BN_ULONG c1,c2,c3;
675
676 c1=0;
677 c2=0;
678 c3=0;
679 mul_add_c(a[0],b[0],c1,c2,c3);
680 r[0]=c1;
681 c1=0;
682 mul_add_c(a[0],b[1],c2,c3,c1);
683 mul_add_c(a[1],b[0],c2,c3,c1);
684 r[1]=c2;
685 c2=0;
686 mul_add_c(a[2],b[0],c3,c1,c2);
687 mul_add_c(a[1],b[1],c3,c1,c2);
688 mul_add_c(a[0],b[2],c3,c1,c2);
689 r[2]=c3;
690 c3=0;
691 mul_add_c(a[0],b[3],c1,c2,c3);
692 mul_add_c(a[1],b[2],c1,c2,c3);
693 mul_add_c(a[2],b[1],c1,c2,c3);
694 mul_add_c(a[3],b[0],c1,c2,c3);
695 r[3]=c1;
696 c1=0;
697 mul_add_c(a[3],b[1],c2,c3,c1);
698 mul_add_c(a[2],b[2],c2,c3,c1);
699 mul_add_c(a[1],b[3],c2,c3,c1);
700 r[4]=c2;
701 c2=0;
702 mul_add_c(a[2],b[3],c3,c1,c2);
703 mul_add_c(a[3],b[2],c3,c1,c2);
704 r[5]=c3;
705 c3=0;
706 mul_add_c(a[3],b[3],c1,c2,c3);
707 r[6]=c1;
708 r[7]=c2;
709 }
710
711void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
712 {
713#ifdef BN_LLONG
714 BN_ULLONG t,tt;
715#else
716 BN_ULONG bl,bh;
717#endif
718 BN_ULONG t1,t2;
719 BN_ULONG c1,c2,c3;
720
721 c1=0;
722 c2=0;
723 c3=0;
724 sqr_add_c(a,0,c1,c2,c3);
725 r[0]=c1;
726 c1=0;
727 sqr_add_c2(a,1,0,c2,c3,c1);
728 r[1]=c2;
729 c2=0;
730 sqr_add_c(a,1,c3,c1,c2);
731 sqr_add_c2(a,2,0,c3,c1,c2);
732 r[2]=c3;
733 c3=0;
734 sqr_add_c2(a,3,0,c1,c2,c3);
735 sqr_add_c2(a,2,1,c1,c2,c3);
736 r[3]=c1;
737 c1=0;
738 sqr_add_c(a,2,c2,c3,c1);
739 sqr_add_c2(a,3,1,c2,c3,c1);
740 sqr_add_c2(a,4,0,c2,c3,c1);
741 r[4]=c2;
742 c2=0;
743 sqr_add_c2(a,5,0,c3,c1,c2);
744 sqr_add_c2(a,4,1,c3,c1,c2);
745 sqr_add_c2(a,3,2,c3,c1,c2);
746 r[5]=c3;
747 c3=0;
748 sqr_add_c(a,3,c1,c2,c3);
749 sqr_add_c2(a,4,2,c1,c2,c3);
750 sqr_add_c2(a,5,1,c1,c2,c3);
751 sqr_add_c2(a,6,0,c1,c2,c3);
752 r[6]=c1;
753 c1=0;
754 sqr_add_c2(a,7,0,c2,c3,c1);
755 sqr_add_c2(a,6,1,c2,c3,c1);
756 sqr_add_c2(a,5,2,c2,c3,c1);
757 sqr_add_c2(a,4,3,c2,c3,c1);
758 r[7]=c2;
759 c2=0;
760 sqr_add_c(a,4,c3,c1,c2);
761 sqr_add_c2(a,5,3,c3,c1,c2);
762 sqr_add_c2(a,6,2,c3,c1,c2);
763 sqr_add_c2(a,7,1,c3,c1,c2);
764 r[8]=c3;
765 c3=0;
766 sqr_add_c2(a,7,2,c1,c2,c3);
767 sqr_add_c2(a,6,3,c1,c2,c3);
768 sqr_add_c2(a,5,4,c1,c2,c3);
769 r[9]=c1;
770 c1=0;
771 sqr_add_c(a,5,c2,c3,c1);
772 sqr_add_c2(a,6,4,c2,c3,c1);
773 sqr_add_c2(a,7,3,c2,c3,c1);
774 r[10]=c2;
775 c2=0;
776 sqr_add_c2(a,7,4,c3,c1,c2);
777 sqr_add_c2(a,6,5,c3,c1,c2);
778 r[11]=c3;
779 c3=0;
780 sqr_add_c(a,6,c1,c2,c3);
781 sqr_add_c2(a,7,5,c1,c2,c3);
782 r[12]=c1;
783 c1=0;
784 sqr_add_c2(a,7,6,c2,c3,c1);
785 r[13]=c2;
786 c2=0;
787 sqr_add_c(a,7,c3,c1,c2);
788 r[14]=c3;
789 r[15]=c1;
790 }
791
792void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
793 {
794#ifdef BN_LLONG
795 BN_ULLONG t,tt;
796#else
797 BN_ULONG bl,bh;
798#endif
799 BN_ULONG t1,t2;
800 BN_ULONG c1,c2,c3;
801
802 c1=0;
803 c2=0;
804 c3=0;
805 sqr_add_c(a,0,c1,c2,c3);
806 r[0]=c1;
807 c1=0;
808 sqr_add_c2(a,1,0,c2,c3,c1);
809 r[1]=c2;
810 c2=0;
811 sqr_add_c(a,1,c3,c1,c2);
812 sqr_add_c2(a,2,0,c3,c1,c2);
813 r[2]=c3;
814 c3=0;
815 sqr_add_c2(a,3,0,c1,c2,c3);
816 sqr_add_c2(a,2,1,c1,c2,c3);
817 r[3]=c1;
818 c1=0;
819 sqr_add_c(a,2,c2,c3,c1);
820 sqr_add_c2(a,3,1,c2,c3,c1);
821 r[4]=c2;
822 c2=0;
823 sqr_add_c2(a,3,2,c3,c1,c2);
824 r[5]=c3;
825 c3=0;
826 sqr_add_c(a,3,c1,c2,c3);
827 r[6]=c1;
828 r[7]=c2;
829 }
830
831#ifdef OPENSSL_NO_ASM
832#ifdef OPENSSL_BN_ASM_MONT
833#include <alloca.h>
834/*
835 * This is essentially reference implementation, which may or may not
836 * result in performance improvement. E.g. on IA-32 this routine was
837 * observed to give 40% faster rsa1024 private key operations and 10%
838 * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
839 * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
840 * reference implementation, one to be used as starting point for
841 * platform-specific assembler. Mentioned numbers apply to compiler
842 * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
843 * can vary not only from platform to platform, but even for compiler
844 * versions. Assembler vs. assembler improvement coefficients can
845 * [and are known to] differ and are to be documented elsewhere.
846 */
847int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
848 {
849 BN_ULONG c0,c1,ml,*tp,n0;
850#ifdef mul64
851 BN_ULONG mh;
852#endif
853 volatile BN_ULONG *vp;
854 int i=0,j;
855
856#if 0 /* template for platform-specific implementation */
857 if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
858#endif
859 vp = tp = alloca((num+2)*sizeof(BN_ULONG));
860
861 n0 = *n0p;
862
863 c0 = 0;
864 ml = bp[0];
865#ifdef mul64
866 mh = HBITS(ml);
867 ml = LBITS(ml);
868 for (j=0;j<num;++j)
869 mul(tp[j],ap[j],ml,mh,c0);
870#else
871 for (j=0;j<num;++j)
872 mul(tp[j],ap[j],ml,c0);
873#endif
874
875 tp[num] = c0;
876 tp[num+1] = 0;
877 goto enter;
878
879 for(i=0;i<num;i++)
880 {
881 c0 = 0;
882 ml = bp[i];
883#ifdef mul64
884 mh = HBITS(ml);
885 ml = LBITS(ml);
886 for (j=0;j<num;++j)
887 mul_add(tp[j],ap[j],ml,mh,c0);
888#else
889 for (j=0;j<num;++j)
890 mul_add(tp[j],ap[j],ml,c0);
891#endif
892 c1 = (tp[num] + c0)&BN_MASK2;
893 tp[num] = c1;
894 tp[num+1] = (c1<c0?1:0);
895 enter:
896 c1 = tp[0];
897 ml = (c1*n0)&BN_MASK2;
898 c0 = 0;
899#ifdef mul64
900 mh = HBITS(ml);
901 ml = LBITS(ml);
902 mul_add(c1,np[0],ml,mh,c0);
903#else
904 mul_add(c1,ml,np[0],c0);
905#endif
906 for(j=1;j<num;j++)
907 {
908 c1 = tp[j];
909#ifdef mul64
910 mul_add(c1,np[j],ml,mh,c0);
911#else
912 mul_add(c1,ml,np[j],c0);
913#endif
914 tp[j-1] = c1&BN_MASK2;
915 }
916 c1 = (tp[num] + c0)&BN_MASK2;
917 tp[num-1] = c1;
918 tp[num] = tp[num+1] + (c1<c0?1:0);
919 }
920
921 if (tp[num]!=0 || tp[num-1]>=np[num-1])
922 {
923 c0 = bn_sub_words(rp,tp,np,num);
924 if (tp[num]!=0 || c0==0)
925 {
926 for(i=0;i<num+2;i++) vp[i] = 0;
927 return 1;
928 }
929 }
930 for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;
931 vp[num] = 0;
932 vp[num+1] = 0;
933 return 1;
934 }
935#else
936/*
937 * Return value of 0 indicates that multiplication/convolution was not
938 * performed to signal the caller to fall down to alternative/original
939 * code-path.
940 */
941int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
942{ return 0; }
943#endif /* OPENSSL_BN_ASM_MONT */
944#endif
945
946#else /* !BN_MUL_COMBA */
947
948/* hmm... is it faster just to do a multiply? */
949#undef bn_sqr_comba4
950void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
951 {
952 BN_ULONG t[8];
953 bn_sqr_normal(r,a,4,t);
954 }
955
956#undef bn_sqr_comba8
957void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
958 {
959 BN_ULONG t[16];
960 bn_sqr_normal(r,a,8,t);
961 }
962
963void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
964 {
965 r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
966 r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
967 r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
968 r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
969 }
970
971void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
972 {
973 r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
974 r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
975 r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
976 r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
977 r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
978 r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
979 r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
980 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
981 }
982
983#ifdef OPENSSL_NO_ASM
984#ifdef OPENSSL_BN_ASM_MONT
985#include <alloca.h>
986int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
987 {
988 BN_ULONG c0,c1,*tp,n0=*n0p;
989 volatile BN_ULONG *vp;
990 int i=0,j;
991
992 vp = tp = alloca((num+2)*sizeof(BN_ULONG));
993
994 for(i=0;i<=num;i++) tp[i]=0;
995
996 for(i=0;i<num;i++)
997 {
998 c0 = bn_mul_add_words(tp,ap,num,bp[i]);
999 c1 = (tp[num] + c0)&BN_MASK2;
1000 tp[num] = c1;
1001 tp[num+1] = (c1<c0?1:0);
1002
1003 c0 = bn_mul_add_words(tp,np,num,tp[0]*n0);
1004 c1 = (tp[num] + c0)&BN_MASK2;
1005 tp[num] = c1;
1006 tp[num+1] += (c1<c0?1:0);
1007 for(j=0;j<=num;j++) tp[j]=tp[j+1];
1008 }
1009
1010 if (tp[num]!=0 || tp[num-1]>=np[num-1])
1011 {
1012 c0 = bn_sub_words(rp,tp,np,num);
1013 if (tp[num]!=0 || c0==0)
1014 {
1015 for(i=0;i<num+2;i++) vp[i] = 0;
1016 return 1;
1017 }
1018 }
1019 for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;
1020 vp[num] = 0;
1021 vp[num+1] = 0;
1022 return 1;
1023 }
1024#else
1025int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
1026{ return 0; }
1027#endif /* OPENSSL_BN_ASM_MONT */
1028#endif
1029
1030#endif /* !BN_MUL_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
deleted file mode 100644
index 9ed8bc2b40..0000000000
--- a/src/lib/libcrypto/bn/bn_blind.c
+++ /dev/null
@@ -1,385 +0,0 @@
1/* crypto/bn/bn_blind.c */
2/* ====================================================================
3 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
56 * All rights reserved.
57 *
58 * This package is an SSL implementation written
59 * by Eric Young (eay@cryptsoft.com).
60 * The implementation was written so as to conform with Netscapes SSL.
61 *
62 * This library is free for commercial and non-commercial use as long as
63 * the following conditions are aheared to. The following conditions
64 * apply to all code found in this distribution, be it the RC4, RSA,
65 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
66 * included with this distribution is covered by the same copyright terms
67 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
68 *
69 * Copyright remains Eric Young's, and as such any Copyright notices in
70 * the code are not to be removed.
71 * If this package is used in a product, Eric Young should be given attribution
72 * as the author of the parts of the library used.
73 * This can be in the form of a textual message at program startup or
74 * in documentation (online or textual) provided with the package.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * "This product includes cryptographic software written by
87 * Eric Young (eay@cryptsoft.com)"
88 * The word 'cryptographic' can be left out if the rouines from the library
89 * being used are not cryptographic related :-).
90 * 4. If you include any Windows specific code (or a derivative thereof) from
91 * the apps directory (application code) you must include an acknowledgement:
92 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
93 *
94 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
95 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
96 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
97 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
98 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
99 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
100 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
101 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
102 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
103 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
104 * SUCH DAMAGE.
105 *
106 * The licence and distribution terms for any publically available version or
107 * derivative of this code cannot be changed. i.e. this code cannot simply be
108 * copied and put under another distribution licence
109 * [including the GNU Public Licence.]
110 */
111
112#include <stdio.h>
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define BN_BLINDING_COUNTER 32
117
118struct bn_blinding_st
119 {
120 BIGNUM *A;
121 BIGNUM *Ai;
122 BIGNUM *e;
123 BIGNUM *mod; /* just a reference */
124#ifndef OPENSSL_NO_DEPRECATED
125 unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
126 * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
127#endif
128 CRYPTO_THREADID tid;
129 int counter;
130 unsigned long flags;
131 BN_MONT_CTX *m_ctx;
132 int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
133 const BIGNUM *m, BN_CTX *ctx,
134 BN_MONT_CTX *m_ctx);
135 };
136
137BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
138 {
139 BN_BLINDING *ret=NULL;
140
141 bn_check_top(mod);
142
143 if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL)
144 {
145 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
146 return(NULL);
147 }
148 memset(ret,0,sizeof(BN_BLINDING));
149 if (A != NULL)
150 {
151 if ((ret->A = BN_dup(A)) == NULL) goto err;
152 }
153 if (Ai != NULL)
154 {
155 if ((ret->Ai = BN_dup(Ai)) == NULL) goto err;
156 }
157
158 /* save a copy of mod in the BN_BLINDING structure */
159 if ((ret->mod = BN_dup(mod)) == NULL) goto err;
160 if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
161 BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
162
163 /* Set the counter to the special value -1
164 * to indicate that this is never-used fresh blinding
165 * that does not need updating before first use. */
166 ret->counter = -1;
167 CRYPTO_THREADID_current(&ret->tid);
168 return(ret);
169err:
170 if (ret != NULL) BN_BLINDING_free(ret);
171 return(NULL);
172 }
173
174void BN_BLINDING_free(BN_BLINDING *r)
175 {
176 if(r == NULL)
177 return;
178
179 if (r->A != NULL) BN_free(r->A );
180 if (r->Ai != NULL) BN_free(r->Ai);
181 if (r->e != NULL) BN_free(r->e );
182 if (r->mod != NULL) BN_free(r->mod);
183 OPENSSL_free(r);
184 }
185
186int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
187 {
188 int ret=0;
189
190 if ((b->A == NULL) || (b->Ai == NULL))
191 {
192 BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED);
193 goto err;
194 }
195
196 if (b->counter == -1)
197 b->counter = 0;
198
199 if (++b->counter == BN_BLINDING_COUNTER && b->e != NULL &&
200 !(b->flags & BN_BLINDING_NO_RECREATE))
201 {
202 /* re-create blinding parameters */
203 if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
204 goto err;
205 }
206 else if (!(b->flags & BN_BLINDING_NO_UPDATE))
207 {
208 if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err;
209 if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err;
210 }
211
212 ret=1;
213err:
214 if (b->counter == BN_BLINDING_COUNTER)
215 b->counter = 0;
216 return(ret);
217 }
218
219int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
220 {
221 return BN_BLINDING_convert_ex(n, NULL, b, ctx);
222 }
223
224int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
225 {
226 int ret = 1;
227
228 bn_check_top(n);
229
230 if ((b->A == NULL) || (b->Ai == NULL))
231 {
232 BNerr(BN_F_BN_BLINDING_CONVERT_EX,BN_R_NOT_INITIALIZED);
233 return(0);
234 }
235
236 if (b->counter == -1)
237 /* Fresh blinding, doesn't need updating. */
238 b->counter = 0;
239 else if (!BN_BLINDING_update(b,ctx))
240 return(0);
241
242 if (r != NULL)
243 {
244 if (!BN_copy(r, b->Ai)) ret=0;
245 }
246
247 if (!BN_mod_mul(n,n,b->A,b->mod,ctx)) ret=0;
248
249 return ret;
250 }
251
252int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
253 {
254 return BN_BLINDING_invert_ex(n, NULL, b, ctx);
255 }
256
257int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
258 {
259 int ret;
260
261 bn_check_top(n);
262
263 if (r != NULL)
264 ret = BN_mod_mul(n, n, r, b->mod, ctx);
265 else
266 {
267 if (b->Ai == NULL)
268 {
269 BNerr(BN_F_BN_BLINDING_INVERT_EX,BN_R_NOT_INITIALIZED);
270 return(0);
271 }
272 ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx);
273 }
274
275 bn_check_top(n);
276 return(ret);
277 }
278
279#ifndef OPENSSL_NO_DEPRECATED
280unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b)
281 {
282 return b->thread_id;
283 }
284
285void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n)
286 {
287 b->thread_id = n;
288 }
289#endif
290
291CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b)
292 {
293 return &b->tid;
294 }
295
296unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
297 {
298 return b->flags;
299 }
300
301void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
302 {
303 b->flags = flags;
304 }
305
306BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
307 const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
308 int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
309 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
310 BN_MONT_CTX *m_ctx)
311{
312 int retry_counter = 32;
313 BN_BLINDING *ret = NULL;
314
315 if (b == NULL)
316 ret = BN_BLINDING_new(NULL, NULL, m);
317 else
318 ret = b;
319
320 if (ret == NULL)
321 goto err;
322
323 if (ret->A == NULL && (ret->A = BN_new()) == NULL)
324 goto err;
325 if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL)
326 goto err;
327
328 if (e != NULL)
329 {
330 if (ret->e != NULL)
331 BN_free(ret->e);
332 ret->e = BN_dup(e);
333 }
334 if (ret->e == NULL)
335 goto err;
336
337 if (bn_mod_exp != NULL)
338 ret->bn_mod_exp = bn_mod_exp;
339 if (m_ctx != NULL)
340 ret->m_ctx = m_ctx;
341
342 do {
343 if (!BN_rand_range(ret->A, ret->mod)) goto err;
344 if (BN_mod_inverse(ret->Ai, ret->A, ret->mod, ctx) == NULL)
345 {
346 /* this should almost never happen for good RSA keys */
347 unsigned long error = ERR_peek_last_error();
348 if (ERR_GET_REASON(error) == BN_R_NO_INVERSE)
349 {
350 if (retry_counter-- == 0)
351 {
352 BNerr(BN_F_BN_BLINDING_CREATE_PARAM,
353 BN_R_TOO_MANY_ITERATIONS);
354 goto err;
355 }
356 ERR_clear_error();
357 }
358 else
359 goto err;
360 }
361 else
362 break;
363 } while (1);
364
365 if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL)
366 {
367 if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
368 goto err;
369 }
370 else
371 {
372 if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
373 goto err;
374 }
375
376 return ret;
377err:
378 if (b == NULL && ret != NULL)
379 {
380 BN_BLINDING_free(ret);
381 ret = NULL;
382 }
383
384 return ret;
385}
diff --git a/src/lib/libcrypto/bn/bn_const.c b/src/lib/libcrypto/bn/bn_const.c
deleted file mode 100644
index eb60a25b3c..0000000000
--- a/src/lib/libcrypto/bn/bn_const.c
+++ /dev/null
@@ -1,402 +0,0 @@
1/* crypto/bn/knownprimes.c */
2/* Insert boilerplate */
3
4#include "bn.h"
5
6/* "First Oakley Default Group" from RFC2409, section 6.1.
7 *
8 * The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 }
9 *
10 * RFC2409 specifies a generator of 2.
11 * RFC2412 specifies a generator of of 22.
12 */
13
14BIGNUM *get_rfc2409_prime_768(BIGNUM *bn)
15 {
16 static const unsigned char RFC2409_PRIME_768[]={
17 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
18 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
19 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
20 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
21 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
22 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
23 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
24 0xA6,0x3A,0x36,0x20,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
25 };
26 return BN_bin2bn(RFC2409_PRIME_768,sizeof(RFC2409_PRIME_768),bn);
27 }
28
29/* "Second Oakley Default Group" from RFC2409, section 6.2.
30 *
31 * The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }.
32 *
33 * RFC2409 specifies a generator of 2.
34 * RFC2412 specifies a generator of 22.
35 */
36
37BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn)
38 {
39 static const unsigned char RFC2409_PRIME_1024[]={
40 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
41 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
42 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
43 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
44 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
45 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
46 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
47 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
48 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
49 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE6,0x53,0x81,
50 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
51 };
52 return BN_bin2bn(RFC2409_PRIME_1024,sizeof(RFC2409_PRIME_1024),bn);
53 }
54
55/* "1536-bit MODP Group" from RFC3526, Section 2.
56 *
57 * The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 }
58 *
59 * RFC3526 specifies a generator of 2.
60 * RFC2312 specifies a generator of 22.
61 */
62
63BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn)
64 {
65 static const unsigned char RFC3526_PRIME_1536[]={
66 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
67 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
68 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
69 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
70 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
71 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
72 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
73 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
74 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
75 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
76 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
77 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
78 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
79 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
80 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
81 0xCA,0x23,0x73,0x27,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
82 };
83 return BN_bin2bn(RFC3526_PRIME_1536,sizeof(RFC3526_PRIME_1536),bn);
84 }
85
86/* "2048-bit MODP Group" from RFC3526, Section 3.
87 *
88 * The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 }
89 *
90 * RFC3526 specifies a generator of 2.
91 */
92
93BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn)
94 {
95 static const unsigned char RFC3526_PRIME_2048[]={
96 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
97 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
98 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
99 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
100 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
101 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
102 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
103 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
104 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
105 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
106 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
107 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
108 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
109 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
110 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
111 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
112 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
113 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
114 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
115 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
116 0x15,0x72,0x8E,0x5A,0x8A,0xAC,0xAA,0x68,0xFF,0xFF,0xFF,0xFF,
117 0xFF,0xFF,0xFF,0xFF,
118 };
119 return BN_bin2bn(RFC3526_PRIME_2048,sizeof(RFC3526_PRIME_2048),bn);
120 }
121
122/* "3072-bit MODP Group" from RFC3526, Section 4.
123 *
124 * The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 }
125 *
126 * RFC3526 specifies a generator of 2.
127 */
128
129BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn)
130 {
131 static const unsigned char RFC3526_PRIME_3072[]={
132 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
133 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
134 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
135 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
136 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
137 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
138 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
139 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
140 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
141 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
142 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
143 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
144 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
145 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
146 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
147 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
148 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
149 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
150 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
151 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
152 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
153 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
154 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
155 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
156 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
157 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
158 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
159 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
160 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
161 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
162 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
163 0xA9,0x3A,0xD2,0xCA,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
164 };
165 return BN_bin2bn(RFC3526_PRIME_3072,sizeof(RFC3526_PRIME_3072),bn);
166 }
167
168/* "4096-bit MODP Group" from RFC3526, Section 5.
169 *
170 * The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 }
171 *
172 * RFC3526 specifies a generator of 2.
173 */
174
175BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn)
176 {
177 static const unsigned char RFC3526_PRIME_4096[]={
178 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
179 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
180 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
181 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
182 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
183 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
184 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
185 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
186 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
187 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
188 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
189 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
190 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
191 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
192 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
193 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
194 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
195 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
196 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
197 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
198 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
199 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
200 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
201 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
202 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
203 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
204 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
205 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
206 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
207 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
208 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
209 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
210 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
211 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
212 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
213 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
214 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
215 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
216 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
217 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
218 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
219 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x06,0x31,0x99,
220 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
221 };
222 return BN_bin2bn(RFC3526_PRIME_4096,sizeof(RFC3526_PRIME_4096),bn);
223 }
224
225/* "6144-bit MODP Group" from RFC3526, Section 6.
226 *
227 * The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 }
228 *
229 * RFC3526 specifies a generator of 2.
230 */
231
232BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn)
233 {
234 static const unsigned char RFC3526_PRIME_6144[]={
235 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
236 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
237 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
238 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
239 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
240 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
241 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
242 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
243 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
244 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
245 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
246 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
247 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
248 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
249 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
250 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
251 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
252 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
253 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
254 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
255 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
256 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
257 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
258 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
259 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
260 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
261 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
262 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
263 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
264 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
265 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
266 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
267 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
268 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
269 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
270 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
271 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
272 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
273 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
274 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
275 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
276 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92,
277 0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2,
278 0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD,
279 0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F,
280 0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31,
281 0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB,
282 0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B,
283 0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51,
284 0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF,
285 0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15,
286 0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6,
287 0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31,
288 0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3,
289 0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7,
290 0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA,
291 0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2,
292 0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28,
293 0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D,
294 0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C,
295 0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7,
296 0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE,
297 0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E,
298 0x6D,0xCC,0x40,0x24,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
299 };
300 return BN_bin2bn(RFC3526_PRIME_6144,sizeof(RFC3526_PRIME_6144),bn);
301 }
302
303/* "8192-bit MODP Group" from RFC3526, Section 7.
304 *
305 * The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 }
306 *
307 * RFC3526 specifies a generator of 2.
308 */
309
310BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn)
311 {
312 static const unsigned char RFC3526_PRIME_8192[]={
313 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
314 0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
315 0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
316 0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
317 0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
318 0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
319 0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
320 0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
321 0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
322 0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
323 0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
324 0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
325 0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
326 0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
327 0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
328 0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
329 0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
330 0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
331 0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
332 0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
333 0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
334 0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
335 0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
336 0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
337 0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
338 0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
339 0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
340 0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
341 0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
342 0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
343 0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
344 0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
345 0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
346 0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
347 0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
348 0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
349 0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
350 0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
351 0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
352 0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
353 0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
354 0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92,
355 0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2,
356 0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD,
357 0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F,
358 0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31,
359 0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB,
360 0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B,
361 0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51,
362 0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF,
363 0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15,
364 0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6,
365 0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31,
366 0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3,
367 0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7,
368 0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA,
369 0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2,
370 0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28,
371 0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D,
372 0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C,
373 0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7,
374 0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE,
375 0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E,
376 0x6D,0xBE,0x11,0x59,0x74,0xA3,0x92,0x6F,0x12,0xFE,0xE5,0xE4,
377 0x38,0x77,0x7C,0xB6,0xA9,0x32,0xDF,0x8C,0xD8,0xBE,0xC4,0xD0,
378 0x73,0xB9,0x31,0xBA,0x3B,0xC8,0x32,0xB6,0x8D,0x9D,0xD3,0x00,
379 0x74,0x1F,0xA7,0xBF,0x8A,0xFC,0x47,0xED,0x25,0x76,0xF6,0x93,
380 0x6B,0xA4,0x24,0x66,0x3A,0xAB,0x63,0x9C,0x5A,0xE4,0xF5,0x68,
381 0x34,0x23,0xB4,0x74,0x2B,0xF1,0xC9,0x78,0x23,0x8F,0x16,0xCB,
382 0xE3,0x9D,0x65,0x2D,0xE3,0xFD,0xB8,0xBE,0xFC,0x84,0x8A,0xD9,
383 0x22,0x22,0x2E,0x04,0xA4,0x03,0x7C,0x07,0x13,0xEB,0x57,0xA8,
384 0x1A,0x23,0xF0,0xC7,0x34,0x73,0xFC,0x64,0x6C,0xEA,0x30,0x6B,
385 0x4B,0xCB,0xC8,0x86,0x2F,0x83,0x85,0xDD,0xFA,0x9D,0x4B,0x7F,
386 0xA2,0xC0,0x87,0xE8,0x79,0x68,0x33,0x03,0xED,0x5B,0xDD,0x3A,
387 0x06,0x2B,0x3C,0xF5,0xB3,0xA2,0x78,0xA6,0x6D,0x2A,0x13,0xF8,
388 0x3F,0x44,0xF8,0x2D,0xDF,0x31,0x0E,0xE0,0x74,0xAB,0x6A,0x36,
389 0x45,0x97,0xE8,0x99,0xA0,0x25,0x5D,0xC1,0x64,0xF3,0x1C,0xC5,
390 0x08,0x46,0x85,0x1D,0xF9,0xAB,0x48,0x19,0x5D,0xED,0x7E,0xA1,
391 0xB1,0xD5,0x10,0xBD,0x7E,0xE7,0x4D,0x73,0xFA,0xF3,0x6B,0xC3,
392 0x1E,0xCF,0xA2,0x68,0x35,0x90,0x46,0xF4,0xEB,0x87,0x9F,0x92,
393 0x40,0x09,0x43,0x8B,0x48,0x1C,0x6C,0xD7,0x88,0x9A,0x00,0x2E,
394 0xD5,0xEE,0x38,0x2B,0xC9,0x19,0x0D,0xA6,0xFC,0x02,0x6E,0x47,
395 0x95,0x58,0xE4,0x47,0x56,0x77,0xE9,0xAA,0x9E,0x30,0x50,0xE2,
396 0x76,0x56,0x94,0xDF,0xC8,0x1F,0x56,0xE8,0x80,0xB9,0x6E,0x71,
397 0x60,0xC9,0x80,0xDD,0x98,0xED,0xD3,0xDF,0xFF,0xFF,0xFF,0xFF,
398 0xFF,0xFF,0xFF,0xFF,
399 };
400 return BN_bin2bn(RFC3526_PRIME_8192,sizeof(RFC3526_PRIME_8192),bn);
401 }
402
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
deleted file mode 100644
index 3f2256f675..0000000000
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ /dev/null
@@ -1,454 +0,0 @@
1/* crypto/bn/bn_ctx.c */
2/* Written by Ulf Moeller for the OpenSSL project. */
3/* ====================================================================
4 * Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * 3. All advertising materials mentioning features or use of this
19 * software must display the following acknowledgment:
20 * "This product includes software developed by the OpenSSL Project
21 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
22 *
23 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
24 * endorse or promote products derived from this software without
25 * prior written permission. For written permission, please contact
26 * openssl-core@openssl.org.
27 *
28 * 5. Products derived from this software may not be called "OpenSSL"
29 * nor may "OpenSSL" appear in their names without prior written
30 * permission of the OpenSSL Project.
31 *
32 * 6. Redistributions of any form whatsoever must retain the following
33 * acknowledgment:
34 * "This product includes software developed by the OpenSSL Project
35 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
38 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
40 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
43 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
44 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
46 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
47 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
48 * OF THE POSSIBILITY OF SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This product includes cryptographic software written by Eric Young
52 * (eay@cryptsoft.com). This product includes software written by Tim
53 * Hudson (tjh@cryptsoft.com).
54 *
55 */
56
57#if !defined(BN_CTX_DEBUG) && !defined(BN_DEBUG)
58#ifndef NDEBUG
59#define NDEBUG
60#endif
61#endif
62
63#include <stdio.h>
64#include <assert.h>
65
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69/* TODO list
70 *
71 * 1. Check a bunch of "(words+1)" type hacks in various bignum functions and
72 * check they can be safely removed.
73 * - Check +1 and other ugliness in BN_from_montgomery()
74 *
75 * 2. Consider allowing a BN_new_ex() that, at least, lets you specify an
76 * appropriate 'block' size that will be honoured by bn_expand_internal() to
77 * prevent piddly little reallocations. OTOH, profiling bignum expansions in
78 * BN_CTX doesn't show this to be a big issue.
79 */
80
81/* How many bignums are in each "pool item"; */
82#define BN_CTX_POOL_SIZE 16
83/* The stack frame info is resizing, set a first-time expansion size; */
84#define BN_CTX_START_FRAMES 32
85
86/***********/
87/* BN_POOL */
88/***********/
89
90/* A bundle of bignums that can be linked with other bundles */
91typedef struct bignum_pool_item
92 {
93 /* The bignum values */
94 BIGNUM vals[BN_CTX_POOL_SIZE];
95 /* Linked-list admin */
96 struct bignum_pool_item *prev, *next;
97 } BN_POOL_ITEM;
98/* A linked-list of bignums grouped in bundles */
99typedef struct bignum_pool
100 {
101 /* Linked-list admin */
102 BN_POOL_ITEM *head, *current, *tail;
103 /* Stack depth and allocation size */
104 unsigned used, size;
105 } BN_POOL;
106static void BN_POOL_init(BN_POOL *);
107static void BN_POOL_finish(BN_POOL *);
108#ifndef OPENSSL_NO_DEPRECATED
109static void BN_POOL_reset(BN_POOL *);
110#endif
111static BIGNUM * BN_POOL_get(BN_POOL *);
112static void BN_POOL_release(BN_POOL *, unsigned int);
113
114/************/
115/* BN_STACK */
116/************/
117
118/* A wrapper to manage the "stack frames" */
119typedef struct bignum_ctx_stack
120 {
121 /* Array of indexes into the bignum stack */
122 unsigned int *indexes;
123 /* Number of stack frames, and the size of the allocated array */
124 unsigned int depth, size;
125 } BN_STACK;
126static void BN_STACK_init(BN_STACK *);
127static void BN_STACK_finish(BN_STACK *);
128#ifndef OPENSSL_NO_DEPRECATED
129static void BN_STACK_reset(BN_STACK *);
130#endif
131static int BN_STACK_push(BN_STACK *, unsigned int);
132static unsigned int BN_STACK_pop(BN_STACK *);
133
134/**********/
135/* BN_CTX */
136/**********/
137
138/* The opaque BN_CTX type */
139struct bignum_ctx
140 {
141 /* The bignum bundles */
142 BN_POOL pool;
143 /* The "stack frames", if you will */
144 BN_STACK stack;
145 /* The number of bignums currently assigned */
146 unsigned int used;
147 /* Depth of stack overflow */
148 int err_stack;
149 /* Block "gets" until an "end" (compatibility behaviour) */
150 int too_many;
151 };
152
153/* Enable this to find BN_CTX bugs */
154#ifdef BN_CTX_DEBUG
155static const char *ctxdbg_cur = NULL;
156static void ctxdbg(BN_CTX *ctx)
157 {
158 unsigned int bnidx = 0, fpidx = 0;
159 BN_POOL_ITEM *item = ctx->pool.head;
160 BN_STACK *stack = &ctx->stack;
161 fprintf(stderr,"(%08x): ", (unsigned int)ctx);
162 while(bnidx < ctx->used)
163 {
164 fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
165 if(!(bnidx % BN_CTX_POOL_SIZE))
166 item = item->next;
167 }
168 fprintf(stderr,"\n");
169 bnidx = 0;
170 fprintf(stderr," : ");
171 while(fpidx < stack->depth)
172 {
173 while(bnidx++ < stack->indexes[fpidx])
174 fprintf(stderr," ");
175 fprintf(stderr,"^^^ ");
176 bnidx++;
177 fpidx++;
178 }
179 fprintf(stderr,"\n");
180 }
181#define CTXDBG_ENTRY(str, ctx) do { \
182 ctxdbg_cur = (str); \
183 fprintf(stderr,"Starting %s\n", ctxdbg_cur); \
184 ctxdbg(ctx); \
185 } while(0)
186#define CTXDBG_EXIT(ctx) do { \
187 fprintf(stderr,"Ending %s\n", ctxdbg_cur); \
188 ctxdbg(ctx); \
189 } while(0)
190#define CTXDBG_RET(ctx,ret)
191#else
192#define CTXDBG_ENTRY(str, ctx)
193#define CTXDBG_EXIT(ctx)
194#define CTXDBG_RET(ctx,ret)
195#endif
196
197/* This function is an evil legacy and should not be used. This implementation
198 * is WYSIWYG, though I've done my best. */
199#ifndef OPENSSL_NO_DEPRECATED
200void BN_CTX_init(BN_CTX *ctx)
201 {
202 /* Assume the caller obtained the context via BN_CTX_new() and so is
203 * trying to reset it for use. Nothing else makes sense, least of all
204 * binary compatibility from a time when they could declare a static
205 * variable. */
206 BN_POOL_reset(&ctx->pool);
207 BN_STACK_reset(&ctx->stack);
208 ctx->used = 0;
209 ctx->err_stack = 0;
210 ctx->too_many = 0;
211 }
212#endif
213
214BN_CTX *BN_CTX_new(void)
215 {
216 BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX));
217 if(!ret)
218 {
219 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
220 return NULL;
221 }
222 /* Initialise the structure */
223 BN_POOL_init(&ret->pool);
224 BN_STACK_init(&ret->stack);
225 ret->used = 0;
226 ret->err_stack = 0;
227 ret->too_many = 0;
228 return ret;
229 }
230
231void BN_CTX_free(BN_CTX *ctx)
232 {
233 if (ctx == NULL)
234 return;
235#ifdef BN_CTX_DEBUG
236 {
237 BN_POOL_ITEM *pool = ctx->pool.head;
238 fprintf(stderr,"BN_CTX_free, stack-size=%d, pool-bignums=%d\n",
239 ctx->stack.size, ctx->pool.size);
240 fprintf(stderr,"dmaxs: ");
241 while(pool) {
242 unsigned loop = 0;
243 while(loop < BN_CTX_POOL_SIZE)
244 fprintf(stderr,"%02x ", pool->vals[loop++].dmax);
245 pool = pool->next;
246 }
247 fprintf(stderr,"\n");
248 }
249#endif
250 BN_STACK_finish(&ctx->stack);
251 BN_POOL_finish(&ctx->pool);
252 OPENSSL_free(ctx);
253 }
254
255void BN_CTX_start(BN_CTX *ctx)
256 {
257 CTXDBG_ENTRY("BN_CTX_start", ctx);
258 /* If we're already overflowing ... */
259 if(ctx->err_stack || ctx->too_many)
260 ctx->err_stack++;
261 /* (Try to) get a new frame pointer */
262 else if(!BN_STACK_push(&ctx->stack, ctx->used))
263 {
264 BNerr(BN_F_BN_CTX_START,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
265 ctx->err_stack++;
266 }
267 CTXDBG_EXIT(ctx);
268 }
269
270void BN_CTX_end(BN_CTX *ctx)
271 {
272 CTXDBG_ENTRY("BN_CTX_end", ctx);
273 if(ctx->err_stack)
274 ctx->err_stack--;
275 else
276 {
277 unsigned int fp = BN_STACK_pop(&ctx->stack);
278 /* Does this stack frame have anything to release? */
279 if(fp < ctx->used)
280 BN_POOL_release(&ctx->pool, ctx->used - fp);
281 ctx->used = fp;
282 /* Unjam "too_many" in case "get" had failed */
283 ctx->too_many = 0;
284 }
285 CTXDBG_EXIT(ctx);
286 }
287
288BIGNUM *BN_CTX_get(BN_CTX *ctx)
289 {
290 BIGNUM *ret;
291 CTXDBG_ENTRY("BN_CTX_get", ctx);
292 if(ctx->err_stack || ctx->too_many) return NULL;
293 if((ret = BN_POOL_get(&ctx->pool)) == NULL)
294 {
295 /* Setting too_many prevents repeated "get" attempts from
296 * cluttering the error stack. */
297 ctx->too_many = 1;
298 BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
299 return NULL;
300 }
301 /* OK, make sure the returned bignum is "zero" */
302 BN_zero(ret);
303 ctx->used++;
304 CTXDBG_RET(ctx, ret);
305 return ret;
306 }
307
308/************/
309/* BN_STACK */
310/************/
311
312static void BN_STACK_init(BN_STACK *st)
313 {
314 st->indexes = NULL;
315 st->depth = st->size = 0;
316 }
317
318static void BN_STACK_finish(BN_STACK *st)
319 {
320 if(st->size) OPENSSL_free(st->indexes);
321 }
322
323#ifndef OPENSSL_NO_DEPRECATED
324static void BN_STACK_reset(BN_STACK *st)
325 {
326 st->depth = 0;
327 }
328#endif
329
330static int BN_STACK_push(BN_STACK *st, unsigned int idx)
331 {
332 if(st->depth == st->size)
333 /* Need to expand */
334 {
335 unsigned int newsize = (st->size ?
336 (st->size * 3 / 2) : BN_CTX_START_FRAMES);
337 unsigned int *newitems = OPENSSL_malloc(newsize *
338 sizeof(unsigned int));
339 if(!newitems) return 0;
340 if(st->depth)
341 memcpy(newitems, st->indexes, st->depth *
342 sizeof(unsigned int));
343 if(st->size) OPENSSL_free(st->indexes);
344 st->indexes = newitems;
345 st->size = newsize;
346 }
347 st->indexes[(st->depth)++] = idx;
348 return 1;
349 }
350
351static unsigned int BN_STACK_pop(BN_STACK *st)
352 {
353 return st->indexes[--(st->depth)];
354 }
355
356/***********/
357/* BN_POOL */
358/***********/
359
360static void BN_POOL_init(BN_POOL *p)
361 {
362 p->head = p->current = p->tail = NULL;
363 p->used = p->size = 0;
364 }
365
366static void BN_POOL_finish(BN_POOL *p)
367 {
368 while(p->head)
369 {
370 unsigned int loop = 0;
371 BIGNUM *bn = p->head->vals;
372 while(loop++ < BN_CTX_POOL_SIZE)
373 {
374 if(bn->d) BN_clear_free(bn);
375 bn++;
376 }
377 p->current = p->head->next;
378 OPENSSL_free(p->head);
379 p->head = p->current;
380 }
381 }
382
383#ifndef OPENSSL_NO_DEPRECATED
384static void BN_POOL_reset(BN_POOL *p)
385 {
386 BN_POOL_ITEM *item = p->head;
387 while(item)
388 {
389 unsigned int loop = 0;
390 BIGNUM *bn = item->vals;
391 while(loop++ < BN_CTX_POOL_SIZE)
392 {
393 if(bn->d) BN_clear(bn);
394 bn++;
395 }
396 item = item->next;
397 }
398 p->current = p->head;
399 p->used = 0;
400 }
401#endif
402
403static BIGNUM *BN_POOL_get(BN_POOL *p)
404 {
405 if(p->used == p->size)
406 {
407 BIGNUM *bn;
408 unsigned int loop = 0;
409 BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM));
410 if(!item) return NULL;
411 /* Initialise the structure */
412 bn = item->vals;
413 while(loop++ < BN_CTX_POOL_SIZE)
414 BN_init(bn++);
415 item->prev = p->tail;
416 item->next = NULL;
417 /* Link it in */
418 if(!p->head)
419 p->head = p->current = p->tail = item;
420 else
421 {
422 p->tail->next = item;
423 p->tail = item;
424 p->current = item;
425 }
426 p->size += BN_CTX_POOL_SIZE;
427 p->used++;
428 /* Return the first bignum from the new pool */
429 return item->vals;
430 }
431 if(!p->used)
432 p->current = p->head;
433 else if((p->used % BN_CTX_POOL_SIZE) == 0)
434 p->current = p->current->next;
435 return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
436 }
437
438static void BN_POOL_release(BN_POOL *p, unsigned int num)
439 {
440 unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
441 p->used -= num;
442 while(num--)
443 {
444 bn_check_top(p->current->vals + offset);
445 if(!offset)
446 {
447 offset = BN_CTX_POOL_SIZE - 1;
448 p->current = p->current->prev;
449 }
450 else
451 offset--;
452 }
453 }
454
diff --git a/src/lib/libcrypto/bn/bn_depr.c b/src/lib/libcrypto/bn/bn_depr.c
deleted file mode 100644
index 27535e4fca..0000000000
--- a/src/lib/libcrypto/bn/bn_depr.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/* crypto/bn/bn_depr.c */
2/* ====================================================================
3 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56/* Support for deprecated functions goes here - static linkage will only slurp
57 * this code if applications are using them directly. */
58
59#include <stdio.h>
60#include <time.h>
61#include "cryptlib.h"
62#include "bn_lcl.h"
63#include <openssl/rand.h>
64
65static void *dummy=&dummy;
66
67#ifndef OPENSSL_NO_DEPRECATED
68BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
69 const BIGNUM *add, const BIGNUM *rem,
70 void (*callback)(int,int,void *), void *cb_arg)
71 {
72 BN_GENCB cb;
73 BIGNUM *rnd=NULL;
74 int found = 0;
75
76 BN_GENCB_set_old(&cb, callback, cb_arg);
77
78 if (ret == NULL)
79 {
80 if ((rnd=BN_new()) == NULL) goto err;
81 }
82 else
83 rnd=ret;
84 if(!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb))
85 goto err;
86
87 /* we have a prime :-) */
88 found = 1;
89err:
90 if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd);
91 return(found ? rnd : NULL);
92 }
93
94int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *),
95 BN_CTX *ctx_passed, void *cb_arg)
96 {
97 BN_GENCB cb;
98 BN_GENCB_set_old(&cb, callback, cb_arg);
99 return BN_is_prime_ex(a, checks, ctx_passed, &cb);
100 }
101
102int BN_is_prime_fasttest(const BIGNUM *a, int checks,
103 void (*callback)(int,int,void *),
104 BN_CTX *ctx_passed, void *cb_arg,
105 int do_trial_division)
106 {
107 BN_GENCB cb;
108 BN_GENCB_set_old(&cb, callback, cb_arg);
109 return BN_is_prime_fasttest_ex(a, checks, ctx_passed,
110 do_trial_division, &cb);
111 }
112#endif
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
deleted file mode 100644
index 802a43d642..0000000000
--- a/src/lib/libcrypto/bn/bn_div.c
+++ /dev/null
@@ -1,650 +0,0 @@
1/* crypto/bn/bn_div.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <openssl/bn.h>
61#include "cryptlib.h"
62#include "bn_lcl.h"
63
64
65/* The old slow way */
66#if 0
67int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
68 BN_CTX *ctx)
69 {
70 int i,nm,nd;
71 int ret = 0;
72 BIGNUM *D;
73
74 bn_check_top(m);
75 bn_check_top(d);
76 if (BN_is_zero(d))
77 {
78 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
79 return(0);
80 }
81
82 if (BN_ucmp(m,d) < 0)
83 {
84 if (rem != NULL)
85 { if (BN_copy(rem,m) == NULL) return(0); }
86 if (dv != NULL) BN_zero(dv);
87 return(1);
88 }
89
90 BN_CTX_start(ctx);
91 D = BN_CTX_get(ctx);
92 if (dv == NULL) dv = BN_CTX_get(ctx);
93 if (rem == NULL) rem = BN_CTX_get(ctx);
94 if (D == NULL || dv == NULL || rem == NULL)
95 goto end;
96
97 nd=BN_num_bits(d);
98 nm=BN_num_bits(m);
99 if (BN_copy(D,d) == NULL) goto end;
100 if (BN_copy(rem,m) == NULL) goto end;
101
102 /* The next 2 are needed so we can do a dv->d[0]|=1 later
103 * since BN_lshift1 will only work once there is a value :-) */
104 BN_zero(dv);
105 if(bn_wexpand(dv,1) == NULL) goto end;
106 dv->top=1;
107
108 if (!BN_lshift(D,D,nm-nd)) goto end;
109 for (i=nm-nd; i>=0; i--)
110 {
111 if (!BN_lshift1(dv,dv)) goto end;
112 if (BN_ucmp(rem,D) >= 0)
113 {
114 dv->d[0]|=1;
115 if (!BN_usub(rem,rem,D)) goto end;
116 }
117/* CAN IMPROVE (and have now :=) */
118 if (!BN_rshift1(D,D)) goto end;
119 }
120 rem->neg=BN_is_zero(rem)?0:m->neg;
121 dv->neg=m->neg^d->neg;
122 ret = 1;
123 end:
124 BN_CTX_end(ctx);
125 return(ret);
126 }
127
128#else
129
130#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
131 && !defined(PEDANTIC) && !defined(BN_DIV3W)
132# if defined(__GNUC__) && __GNUC__>=2
133# if defined(__i386) || defined (__i386__)
134 /*
135 * There were two reasons for implementing this template:
136 * - GNU C generates a call to a function (__udivdi3 to be exact)
137 * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
138 * understand why...);
139 * - divl doesn't only calculate quotient, but also leaves
140 * remainder in %edx which we can definitely use here:-)
141 *
142 * <appro@fy.chalmers.se>
143 */
144# define bn_div_words(n0,n1,d0) \
145 ({ asm volatile ( \
146 "divl %4" \
147 : "=a"(q), "=d"(rem) \
148 : "a"(n1), "d"(n0), "g"(d0) \
149 : "cc"); \
150 q; \
151 })
152# define REMAINDER_IS_ALREADY_CALCULATED
153# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
154 /*
155 * Same story here, but it's 128-bit by 64-bit division. Wow!
156 * <appro@fy.chalmers.se>
157 */
158# define bn_div_words(n0,n1,d0) \
159 ({ asm volatile ( \
160 "divq %4" \
161 : "=a"(q), "=d"(rem) \
162 : "a"(n1), "d"(n0), "g"(d0) \
163 : "cc"); \
164 q; \
165 })
166# define REMAINDER_IS_ALREADY_CALCULATED
167# endif /* __<cpu> */
168# endif /* __GNUC__ */
169#endif /* OPENSSL_NO_ASM */
170
171
172/* BN_div[_no_branch] computes dv := num / divisor, rounding towards
173 * zero, and sets up rm such that dv*divisor + rm = num holds.
174 * Thus:
175 * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
176 * rm->neg == num->neg (unless the remainder is zero)
177 * If 'dv' or 'rm' is NULL, the respective value is not returned.
178 */
179static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
180 const BIGNUM *divisor, BN_CTX *ctx);
181int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
182 BN_CTX *ctx)
183 {
184 int norm_shift,i,loop;
185 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
186 BN_ULONG *resp,*wnump;
187 BN_ULONG d0,d1;
188 int num_n,div_n;
189
190 /* Invalid zero-padding would have particularly bad consequences
191 * in the case of 'num', so don't just rely on bn_check_top() for this one
192 * (bn_check_top() works only for BN_DEBUG builds) */
193 if (num->top > 0 && num->d[num->top - 1] == 0)
194 {
195 BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED);
196 return 0;
197 }
198
199 bn_check_top(num);
200
201 if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
202 {
203 return BN_div_no_branch(dv, rm, num, divisor, ctx);
204 }
205
206 bn_check_top(dv);
207 bn_check_top(rm);
208 /* bn_check_top(num); */ /* 'num' has been checked already */
209 bn_check_top(divisor);
210
211 if (BN_is_zero(divisor))
212 {
213 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
214 return(0);
215 }
216
217 if (BN_ucmp(num,divisor) < 0)
218 {
219 if (rm != NULL)
220 { if (BN_copy(rm,num) == NULL) return(0); }
221 if (dv != NULL) BN_zero(dv);
222 return(1);
223 }
224
225 BN_CTX_start(ctx);
226 tmp=BN_CTX_get(ctx);
227 snum=BN_CTX_get(ctx);
228 sdiv=BN_CTX_get(ctx);
229 if (dv == NULL)
230 res=BN_CTX_get(ctx);
231 else res=dv;
232 if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
233 goto err;
234
235 /* First we normalise the numbers */
236 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
237 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
238 sdiv->neg=0;
239 norm_shift+=BN_BITS2;
240 if (!(BN_lshift(snum,num,norm_shift))) goto err;
241 snum->neg=0;
242 div_n=sdiv->top;
243 num_n=snum->top;
244 loop=num_n-div_n;
245 /* Lets setup a 'window' into snum
246 * This is the part that corresponds to the current
247 * 'area' being divided */
248 wnum.neg = 0;
249 wnum.d = &(snum->d[loop]);
250 wnum.top = div_n;
251 /* only needed when BN_ucmp messes up the values between top and max */
252 wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
253
254 /* Get the top 2 words of sdiv */
255 /* div_n=sdiv->top; */
256 d0=sdiv->d[div_n-1];
257 d1=(div_n == 1)?0:sdiv->d[div_n-2];
258
259 /* pointer to the 'top' of snum */
260 wnump= &(snum->d[num_n-1]);
261
262 /* Setup to 'res' */
263 res->neg= (num->neg^divisor->neg);
264 if (!bn_wexpand(res,(loop+1))) goto err;
265 res->top=loop;
266 resp= &(res->d[loop-1]);
267
268 /* space for temp */
269 if (!bn_wexpand(tmp,(div_n+1))) goto err;
270
271 if (BN_ucmp(&wnum,sdiv) >= 0)
272 {
273 /* If BN_DEBUG_RAND is defined BN_ucmp changes (via
274 * bn_pollute) the const bignum arguments =>
275 * clean the values between top and max again */
276 bn_clear_top2max(&wnum);
277 bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
278 *resp=1;
279 }
280 else
281 res->top--;
282 /* if res->top == 0 then clear the neg value otherwise decrease
283 * the resp pointer */
284 if (res->top == 0)
285 res->neg = 0;
286 else
287 resp--;
288
289 for (i=0; i<loop-1; i++, wnump--, resp--)
290 {
291 BN_ULONG q,l0;
292 /* the first part of the loop uses the top two words of
293 * snum and sdiv to calculate a BN_ULONG q such that
294 * | wnum - sdiv * q | < sdiv */
295#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
296 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
297 q=bn_div_3_words(wnump,d1,d0);
298#else
299 BN_ULONG n0,n1,rem=0;
300
301 n0=wnump[0];
302 n1=wnump[-1];
303 if (n0 == d0)
304 q=BN_MASK2;
305 else /* n0 < d0 */
306 {
307#ifdef BN_LLONG
308 BN_ULLONG t2;
309
310#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
311 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
312#else
313 q=bn_div_words(n0,n1,d0);
314#ifdef BN_DEBUG_LEVITTE
315 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
316X) -> 0x%08X\n",
317 n0, n1, d0, q);
318#endif
319#endif
320
321#ifndef REMAINDER_IS_ALREADY_CALCULATED
322 /*
323 * rem doesn't have to be BN_ULLONG. The least we
324 * know it's less that d0, isn't it?
325 */
326 rem=(n1-q*d0)&BN_MASK2;
327#endif
328 t2=(BN_ULLONG)d1*q;
329
330 for (;;)
331 {
332 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
333 break;
334 q--;
335 rem += d0;
336 if (rem < d0) break; /* don't let rem overflow */
337 t2 -= d1;
338 }
339#else /* !BN_LLONG */
340 BN_ULONG t2l,t2h;
341
342 q=bn_div_words(n0,n1,d0);
343#ifdef BN_DEBUG_LEVITTE
344 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
345X) -> 0x%08X\n",
346 n0, n1, d0, q);
347#endif
348#ifndef REMAINDER_IS_ALREADY_CALCULATED
349 rem=(n1-q*d0)&BN_MASK2;
350#endif
351
352#if defined(BN_UMULT_LOHI)
353 BN_UMULT_LOHI(t2l,t2h,d1,q);
354#elif defined(BN_UMULT_HIGH)
355 t2l = d1 * q;
356 t2h = BN_UMULT_HIGH(d1,q);
357#else
358 {
359 BN_ULONG ql, qh;
360 t2l=LBITS(d1); t2h=HBITS(d1);
361 ql =LBITS(q); qh =HBITS(q);
362 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
363 }
364#endif
365
366 for (;;)
367 {
368 if ((t2h < rem) ||
369 ((t2h == rem) && (t2l <= wnump[-2])))
370 break;
371 q--;
372 rem += d0;
373 if (rem < d0) break; /* don't let rem overflow */
374 if (t2l < d1) t2h--; t2l -= d1;
375 }
376#endif /* !BN_LLONG */
377 }
378#endif /* !BN_DIV3W */
379
380 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
381 tmp->d[div_n]=l0;
382 wnum.d--;
383 /* ingore top values of the bignums just sub the two
384 * BN_ULONG arrays with bn_sub_words */
385 if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
386 {
387 /* Note: As we have considered only the leading
388 * two BN_ULONGs in the calculation of q, sdiv * q
389 * might be greater than wnum (but then (q-1) * sdiv
390 * is less or equal than wnum)
391 */
392 q--;
393 if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
394 /* we can't have an overflow here (assuming
395 * that q != 0, but if q == 0 then tmp is
396 * zero anyway) */
397 (*wnump)++;
398 }
399 /* store part of the result */
400 *resp = q;
401 }
402 bn_correct_top(snum);
403 if (rm != NULL)
404 {
405 /* Keep a copy of the neg flag in num because if rm==num
406 * BN_rshift() will overwrite it.
407 */
408 int neg = num->neg;
409 BN_rshift(rm,snum,norm_shift);
410 if (!BN_is_zero(rm))
411 rm->neg = neg;
412 bn_check_top(rm);
413 }
414 BN_CTX_end(ctx);
415 return(1);
416err:
417 bn_check_top(rm);
418 BN_CTX_end(ctx);
419 return(0);
420 }
421
422
423/* BN_div_no_branch is a special version of BN_div. It does not contain
424 * branches that may leak sensitive information.
425 */
426static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
427 const BIGNUM *divisor, BN_CTX *ctx)
428 {
429 int norm_shift,i,loop;
430 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
431 BN_ULONG *resp,*wnump;
432 BN_ULONG d0,d1;
433 int num_n,div_n;
434
435 bn_check_top(dv);
436 bn_check_top(rm);
437 /* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
438 bn_check_top(divisor);
439
440 if (BN_is_zero(divisor))
441 {
442 BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
443 return(0);
444 }
445
446 BN_CTX_start(ctx);
447 tmp=BN_CTX_get(ctx);
448 snum=BN_CTX_get(ctx);
449 sdiv=BN_CTX_get(ctx);
450 if (dv == NULL)
451 res=BN_CTX_get(ctx);
452 else res=dv;
453 if (sdiv == NULL || res == NULL) goto err;
454
455 /* First we normalise the numbers */
456 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
457 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
458 sdiv->neg=0;
459 norm_shift+=BN_BITS2;
460 if (!(BN_lshift(snum,num,norm_shift))) goto err;
461 snum->neg=0;
462
463 /* Since we don't know whether snum is larger than sdiv,
464 * we pad snum with enough zeroes without changing its
465 * value.
466 */
467 if (snum->top <= sdiv->top+1)
468 {
469 if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
470 for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
471 snum->top = sdiv->top + 2;
472 }
473 else
474 {
475 if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
476 snum->d[snum->top] = 0;
477 snum->top ++;
478 }
479
480 div_n=sdiv->top;
481 num_n=snum->top;
482 loop=num_n-div_n;
483 /* Lets setup a 'window' into snum
484 * This is the part that corresponds to the current
485 * 'area' being divided */
486 wnum.neg = 0;
487 wnum.d = &(snum->d[loop]);
488 wnum.top = div_n;
489 /* only needed when BN_ucmp messes up the values between top and max */
490 wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
491
492 /* Get the top 2 words of sdiv */
493 /* div_n=sdiv->top; */
494 d0=sdiv->d[div_n-1];
495 d1=(div_n == 1)?0:sdiv->d[div_n-2];
496
497 /* pointer to the 'top' of snum */
498 wnump= &(snum->d[num_n-1]);
499
500 /* Setup to 'res' */
501 res->neg= (num->neg^divisor->neg);
502 if (!bn_wexpand(res,(loop+1))) goto err;
503 res->top=loop-1;
504 resp= &(res->d[loop-1]);
505
506 /* space for temp */
507 if (!bn_wexpand(tmp,(div_n+1))) goto err;
508
509 /* if res->top == 0 then clear the neg value otherwise decrease
510 * the resp pointer */
511 if (res->top == 0)
512 res->neg = 0;
513 else
514 resp--;
515
516 for (i=0; i<loop-1; i++, wnump--, resp--)
517 {
518 BN_ULONG q,l0;
519 /* the first part of the loop uses the top two words of
520 * snum and sdiv to calculate a BN_ULONG q such that
521 * | wnum - sdiv * q | < sdiv */
522#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
523 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
524 q=bn_div_3_words(wnump,d1,d0);
525#else
526 BN_ULONG n0,n1,rem=0;
527
528 n0=wnump[0];
529 n1=wnump[-1];
530 if (n0 == d0)
531 q=BN_MASK2;
532 else /* n0 < d0 */
533 {
534#ifdef BN_LLONG
535 BN_ULLONG t2;
536
537#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
538 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
539#else
540 q=bn_div_words(n0,n1,d0);
541#ifdef BN_DEBUG_LEVITTE
542 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
543X) -> 0x%08X\n",
544 n0, n1, d0, q);
545#endif
546#endif
547
548#ifndef REMAINDER_IS_ALREADY_CALCULATED
549 /*
550 * rem doesn't have to be BN_ULLONG. The least we
551 * know it's less that d0, isn't it?
552 */
553 rem=(n1-q*d0)&BN_MASK2;
554#endif
555 t2=(BN_ULLONG)d1*q;
556
557 for (;;)
558 {
559 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
560 break;
561 q--;
562 rem += d0;
563 if (rem < d0) break; /* don't let rem overflow */
564 t2 -= d1;
565 }
566#else /* !BN_LLONG */
567 BN_ULONG t2l,t2h;
568
569 q=bn_div_words(n0,n1,d0);
570#ifdef BN_DEBUG_LEVITTE
571 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
572X) -> 0x%08X\n",
573 n0, n1, d0, q);
574#endif
575#ifndef REMAINDER_IS_ALREADY_CALCULATED
576 rem=(n1-q*d0)&BN_MASK2;
577#endif
578
579#if defined(BN_UMULT_LOHI)
580 BN_UMULT_LOHI(t2l,t2h,d1,q);
581#elif defined(BN_UMULT_HIGH)
582 t2l = d1 * q;
583 t2h = BN_UMULT_HIGH(d1,q);
584#else
585 {
586 BN_ULONG ql, qh;
587 t2l=LBITS(d1); t2h=HBITS(d1);
588 ql =LBITS(q); qh =HBITS(q);
589 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
590 }
591#endif
592
593 for (;;)
594 {
595 if ((t2h < rem) ||
596 ((t2h == rem) && (t2l <= wnump[-2])))
597 break;
598 q--;
599 rem += d0;
600 if (rem < d0) break; /* don't let rem overflow */
601 if (t2l < d1) t2h--; t2l -= d1;
602 }
603#endif /* !BN_LLONG */
604 }
605#endif /* !BN_DIV3W */
606
607 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
608 tmp->d[div_n]=l0;
609 wnum.d--;
610 /* ingore top values of the bignums just sub the two
611 * BN_ULONG arrays with bn_sub_words */
612 if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
613 {
614 /* Note: As we have considered only the leading
615 * two BN_ULONGs in the calculation of q, sdiv * q
616 * might be greater than wnum (but then (q-1) * sdiv
617 * is less or equal than wnum)
618 */
619 q--;
620 if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
621 /* we can't have an overflow here (assuming
622 * that q != 0, but if q == 0 then tmp is
623 * zero anyway) */
624 (*wnump)++;
625 }
626 /* store part of the result */
627 *resp = q;
628 }
629 bn_correct_top(snum);
630 if (rm != NULL)
631 {
632 /* Keep a copy of the neg flag in num because if rm==num
633 * BN_rshift() will overwrite it.
634 */
635 int neg = num->neg;
636 BN_rshift(rm,snum,norm_shift);
637 if (!BN_is_zero(rm))
638 rm->neg = neg;
639 bn_check_top(rm);
640 }
641 bn_correct_top(res);
642 BN_CTX_end(ctx);
643 return(1);
644err:
645 bn_check_top(rm);
646 BN_CTX_end(ctx);
647 return(0);
648 }
649
650#endif
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
deleted file mode 100644
index cfe2eb94a0..0000000000
--- a/src/lib/libcrypto/bn/bn_err.c
+++ /dev/null
@@ -1,150 +0,0 @@
1/* crypto/bn/bn_err.c */
2/* ====================================================================
3 * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56/* NOTE: this file was auto generated by the mkerr.pl script: any changes
57 * made to it will be overwritten when the script next updates this file,
58 * only reason strings will be preserved.
59 */
60
61#include <stdio.h>
62#include <openssl/err.h>
63#include <openssl/bn.h>
64
65/* BEGIN ERROR CODES */
66#ifndef OPENSSL_NO_ERR
67
68#define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
69#define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
70
71static ERR_STRING_DATA BN_str_functs[]=
72 {
73{ERR_FUNC(BN_F_BNRAND), "BNRAND"},
74{ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"},
75{ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"},
76{ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"},
77{ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
78{ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
79{ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
80{ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
81{ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
82{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
83{ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
84{ERR_FUNC(BN_F_BN_DIV), "BN_div"},
85{ERR_FUNC(BN_F_BN_DIV_NO_BRANCH), "BN_div_no_branch"},
86{ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
87{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
88{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
89{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
90{ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
91{ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
92{ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
93{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"},
94{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"},
95{ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"},
96{ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"},
97{ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
98{ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
99{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
100{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
101{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
102{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
103{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
104{ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
105{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
106{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"},
107{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
108{ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
109{ERR_FUNC(BN_F_BN_NEW), "BN_new"},
110{ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
111{ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
112{ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
113{0,NULL}
114 };
115
116static ERR_STRING_DATA BN_str_reasons[]=
117 {
118{ERR_REASON(BN_R_ARG2_LT_ARG3) ,"arg2 lt arg3"},
119{ERR_REASON(BN_R_BAD_RECIPROCAL) ,"bad reciprocal"},
120{ERR_REASON(BN_R_BIGNUM_TOO_LONG) ,"bignum too long"},
121{ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS),"called with even modulus"},
122{ERR_REASON(BN_R_DIV_BY_ZERO) ,"div by zero"},
123{ERR_REASON(BN_R_ENCODING_ERROR) ,"encoding error"},
124{ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),"expand on static bignum data"},
125{ERR_REASON(BN_R_INPUT_NOT_REDUCED) ,"input not reduced"},
126{ERR_REASON(BN_R_INVALID_LENGTH) ,"invalid length"},
127{ERR_REASON(BN_R_INVALID_RANGE) ,"invalid range"},
128{ERR_REASON(BN_R_NOT_A_SQUARE) ,"not a square"},
129{ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"},
130{ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"},
131{ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"},
132{ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"},
133{ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"},
134{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"},
135{0,NULL}
136 };
137
138#endif
139
140void ERR_load_BN_strings(void)
141 {
142#ifndef OPENSSL_NO_ERR
143
144 if (ERR_func_error_string(BN_str_functs[0].error) == NULL)
145 {
146 ERR_load_strings(0,BN_str_functs);
147 ERR_load_strings(0,BN_str_reasons);
148 }
149#endif
150 }
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
deleted file mode 100644
index d9b6c737fc..0000000000
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ /dev/null
@@ -1,991 +0,0 @@
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116/* maximum precomputation table size for *variable* sliding windows */
117#define TABLE_SIZE 32
118
119/* this one works - simple but works */
120int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121 {
122 int i,bits,ret=0;
123 BIGNUM *v,*rr;
124
125 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
126 {
127 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
128 BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
129 return -1;
130 }
131
132 BN_CTX_start(ctx);
133 if ((r == a) || (r == p))
134 rr = BN_CTX_get(ctx);
135 else
136 rr = r;
137 v = BN_CTX_get(ctx);
138 if (rr == NULL || v == NULL) goto err;
139
140 if (BN_copy(v,a) == NULL) goto err;
141 bits=BN_num_bits(p);
142
143 if (BN_is_odd(p))
144 { if (BN_copy(rr,a) == NULL) goto err; }
145 else { if (!BN_one(rr)) goto err; }
146
147 for (i=1; i<bits; i++)
148 {
149 if (!BN_sqr(v,v,ctx)) goto err;
150 if (BN_is_bit_set(p,i))
151 {
152 if (!BN_mul(rr,rr,v,ctx)) goto err;
153 }
154 }
155 ret=1;
156err:
157 if (r != rr) BN_copy(r,rr);
158 BN_CTX_end(ctx);
159 bn_check_top(r);
160 return(ret);
161 }
162
163
164int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
165 BN_CTX *ctx)
166 {
167 int ret;
168
169 bn_check_top(a);
170 bn_check_top(p);
171 bn_check_top(m);
172
173 /* For even modulus m = 2^k*m_odd, it might make sense to compute
174 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
175 * exponentiation for the odd part), using appropriate exponent
176 * reductions, and combine the results using the CRT.
177 *
178 * For now, we use Montgomery only if the modulus is odd; otherwise,
179 * exponentiation using the reciprocal-based quick remaindering
180 * algorithm is used.
181 *
182 * (Timing obtained with expspeed.c [computations a^p mod m
183 * where a, p, m are of the same length: 256, 512, 1024, 2048,
184 * 4096, 8192 bits], compared to the running time of the
185 * standard algorithm:
186 *
187 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
188 * 55 .. 77 % [UltraSparc processor, but
189 * debug-solaris-sparcv8-gcc conf.]
190 *
191 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
192 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
193 *
194 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
195 * at 2048 and more bits, but at 512 and 1024 bits, it was
196 * slower even than the standard algorithm!
197 *
198 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
199 * should be obtained when the new Montgomery reduction code
200 * has been integrated into OpenSSL.)
201 */
202
203#define MONT_MUL_MOD
204#define MONT_EXP_WORD
205#define RECP_MUL_MOD
206
207#ifdef MONT_MUL_MOD
208 /* I have finally been able to take out this pre-condition of
209 * the top bit being set. It was caused by an error in BN_div
210 * with negatives. There was also another problem when for a^b%m
211 * a >= m. eay 07-May-97 */
212/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
213
214 if (BN_is_odd(m))
215 {
216# ifdef MONT_EXP_WORD
217 if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
218 {
219 BN_ULONG A = a->d[0];
220 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
221 }
222 else
223# endif
224 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
225 }
226 else
227#endif
228#ifdef RECP_MUL_MOD
229 { ret=BN_mod_exp_recp(r,a,p,m,ctx); }
230#else
231 { ret=BN_mod_exp_simple(r,a,p,m,ctx); }
232#endif
233
234 bn_check_top(r);
235 return(ret);
236 }
237
238
239int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
240 const BIGNUM *m, BN_CTX *ctx)
241 {
242 int i,j,bits,ret=0,wstart,wend,window,wvalue;
243 int start=1;
244 BIGNUM *aa;
245 /* Table of variables obtained from 'ctx' */
246 BIGNUM *val[TABLE_SIZE];
247 BN_RECP_CTX recp;
248
249 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
250 {
251 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
252 BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
253 return -1;
254 }
255
256 bits=BN_num_bits(p);
257
258 if (bits == 0)
259 {
260 ret = BN_one(r);
261 return ret;
262 }
263
264 BN_CTX_start(ctx);
265 aa = BN_CTX_get(ctx);
266 val[0] = BN_CTX_get(ctx);
267 if(!aa || !val[0]) goto err;
268
269 BN_RECP_CTX_init(&recp);
270 if (m->neg)
271 {
272 /* ignore sign of 'm' */
273 if (!BN_copy(aa, m)) goto err;
274 aa->neg = 0;
275 if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
276 }
277 else
278 {
279 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
280 }
281
282 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
283 if (BN_is_zero(val[0]))
284 {
285 BN_zero(r);
286 ret = 1;
287 goto err;
288 }
289
290 window = BN_window_bits_for_exponent_size(bits);
291 if (window > 1)
292 {
293 if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
294 goto err; /* 2 */
295 j=1<<(window-1);
296 for (i=1; i<j; i++)
297 {
298 if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
299 !BN_mod_mul_reciprocal(val[i],val[i-1],
300 aa,&recp,ctx))
301 goto err;
302 }
303 }
304
305 start=1; /* This is used to avoid multiplication etc
306 * when there is only the value '1' in the
307 * buffer. */
308 wvalue=0; /* The 'value' of the window */
309 wstart=bits-1; /* The top bit of the window */
310 wend=0; /* The bottom bit of the window */
311
312 if (!BN_one(r)) goto err;
313
314 for (;;)
315 {
316 if (BN_is_bit_set(p,wstart) == 0)
317 {
318 if (!start)
319 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
320 goto err;
321 if (wstart == 0) break;
322 wstart--;
323 continue;
324 }
325 /* We now have wstart on a 'set' bit, we now need to work out
326 * how bit a window to do. To do this we need to scan
327 * forward until the last set bit before the end of the
328 * window */
329 j=wstart;
330 wvalue=1;
331 wend=0;
332 for (i=1; i<window; i++)
333 {
334 if (wstart-i < 0) break;
335 if (BN_is_bit_set(p,wstart-i))
336 {
337 wvalue<<=(i-wend);
338 wvalue|=1;
339 wend=i;
340 }
341 }
342
343 /* wend is the size of the current window */
344 j=wend+1;
345 /* add the 'bytes above' */
346 if (!start)
347 for (i=0; i<j; i++)
348 {
349 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
350 goto err;
351 }
352
353 /* wvalue will be an odd number < 2^window */
354 if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
355 goto err;
356
357 /* move the 'window' down further */
358 wstart-=wend+1;
359 wvalue=0;
360 start=0;
361 if (wstart < 0) break;
362 }
363 ret=1;
364err:
365 BN_CTX_end(ctx);
366 BN_RECP_CTX_free(&recp);
367 bn_check_top(r);
368 return(ret);
369 }
370
371
372int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
373 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
374 {
375 int i,j,bits,ret=0,wstart,wend,window,wvalue;
376 int start=1;
377 BIGNUM *d,*r;
378 const BIGNUM *aa;
379 /* Table of variables obtained from 'ctx' */
380 BIGNUM *val[TABLE_SIZE];
381 BN_MONT_CTX *mont=NULL;
382
383 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
384 {
385 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
386 }
387
388 bn_check_top(a);
389 bn_check_top(p);
390 bn_check_top(m);
391
392 if (!BN_is_odd(m))
393 {
394 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
395 return(0);
396 }
397 bits=BN_num_bits(p);
398 if (bits == 0)
399 {
400 ret = BN_one(rr);
401 return ret;
402 }
403
404 BN_CTX_start(ctx);
405 d = BN_CTX_get(ctx);
406 r = BN_CTX_get(ctx);
407 val[0] = BN_CTX_get(ctx);
408 if (!d || !r || !val[0]) goto err;
409
410 /* If this is not done, things will break in the montgomery
411 * part */
412
413 if (in_mont != NULL)
414 mont=in_mont;
415 else
416 {
417 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
418 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
419 }
420
421 if (a->neg || BN_ucmp(a,m) >= 0)
422 {
423 if (!BN_nnmod(val[0],a,m,ctx))
424 goto err;
425 aa= val[0];
426 }
427 else
428 aa=a;
429 if (BN_is_zero(aa))
430 {
431 BN_zero(rr);
432 ret = 1;
433 goto err;
434 }
435 if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
436
437 window = BN_window_bits_for_exponent_size(bits);
438 if (window > 1)
439 {
440 if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
441 j=1<<(window-1);
442 for (i=1; i<j; i++)
443 {
444 if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
445 !BN_mod_mul_montgomery(val[i],val[i-1],
446 d,mont,ctx))
447 goto err;
448 }
449 }
450
451 start=1; /* This is used to avoid multiplication etc
452 * when there is only the value '1' in the
453 * buffer. */
454 wvalue=0; /* The 'value' of the window */
455 wstart=bits-1; /* The top bit of the window */
456 wend=0; /* The bottom bit of the window */
457
458 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
459 for (;;)
460 {
461 if (BN_is_bit_set(p,wstart) == 0)
462 {
463 if (!start)
464 {
465 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
466 goto err;
467 }
468 if (wstart == 0) break;
469 wstart--;
470 continue;
471 }
472 /* We now have wstart on a 'set' bit, we now need to work out
473 * how bit a window to do. To do this we need to scan
474 * forward until the last set bit before the end of the
475 * window */
476 j=wstart;
477 wvalue=1;
478 wend=0;
479 for (i=1; i<window; i++)
480 {
481 if (wstart-i < 0) break;
482 if (BN_is_bit_set(p,wstart-i))
483 {
484 wvalue<<=(i-wend);
485 wvalue|=1;
486 wend=i;
487 }
488 }
489
490 /* wend is the size of the current window */
491 j=wend+1;
492 /* add the 'bytes above' */
493 if (!start)
494 for (i=0; i<j; i++)
495 {
496 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
497 goto err;
498 }
499
500 /* wvalue will be an odd number < 2^window */
501 if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
502 goto err;
503
504 /* move the 'window' down further */
505 wstart-=wend+1;
506 wvalue=0;
507 start=0;
508 if (wstart < 0) break;
509 }
510 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
511 ret=1;
512err:
513 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
514 BN_CTX_end(ctx);
515 bn_check_top(rr);
516 return(ret);
517 }
518
519
520/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
521 * so that accessing any of these table values shows the same access pattern as far
522 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
523 * from/to that table. */
524
525static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
526 {
527 size_t i, j;
528
529 if (bn_wexpand(b, top) == NULL)
530 return 0;
531 while (b->top < top)
532 {
533 b->d[b->top++] = 0;
534 }
535
536 for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
537 {
538 buf[j] = ((unsigned char*)b->d)[i];
539 }
540
541 bn_correct_top(b);
542 return 1;
543 }
544
545static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
546 {
547 size_t i, j;
548
549 if (bn_wexpand(b, top) == NULL)
550 return 0;
551
552 for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
553 {
554 ((unsigned char*)b->d)[i] = buf[j];
555 }
556
557 b->top = top;
558 bn_correct_top(b);
559 return 1;
560 }
561
562/* Given a pointer value, compute the next address that is a cache line multiple. */
563#define MOD_EXP_CTIME_ALIGN(x_) \
564 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
565
566/* This variant of BN_mod_exp_mont() uses fixed windows and the special
567 * precomputation memory layout to limit data-dependency to a minimum
568 * to protect secret exponents (cf. the hyper-threading timing attacks
569 * pointed out by Colin Percival,
570 * http://www.daemonology.net/hyperthreading-considered-harmful/)
571 */
572int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
573 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
574 {
575 int i,bits,ret=0,idx,window,wvalue;
576 int top;
577 BIGNUM *r;
578 const BIGNUM *aa;
579 BN_MONT_CTX *mont=NULL;
580
581 int numPowers;
582 unsigned char *powerbufFree=NULL;
583 int powerbufLen = 0;
584 unsigned char *powerbuf=NULL;
585 BIGNUM *computeTemp=NULL, *am=NULL;
586
587 bn_check_top(a);
588 bn_check_top(p);
589 bn_check_top(m);
590
591 top = m->top;
592
593 if (!(m->d[0] & 1))
594 {
595 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
596 return(0);
597 }
598 bits=BN_num_bits(p);
599 if (bits == 0)
600 {
601 ret = BN_one(rr);
602 return ret;
603 }
604
605 /* Initialize BIGNUM context and allocate intermediate result */
606 BN_CTX_start(ctx);
607 r = BN_CTX_get(ctx);
608 if (r == NULL) goto err;
609
610 /* Allocate a montgomery context if it was not supplied by the caller.
611 * If this is not done, things will break in the montgomery part.
612 */
613 if (in_mont != NULL)
614 mont=in_mont;
615 else
616 {
617 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
618 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
619 }
620
621 /* Get the window size to use with size of p. */
622 window = BN_window_bits_for_ctime_exponent_size(bits);
623
624 /* Allocate a buffer large enough to hold all of the pre-computed
625 * powers of a.
626 */
627 numPowers = 1 << window;
628 powerbufLen = sizeof(m->d[0])*top*numPowers;
629 if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
630 goto err;
631
632 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
633 memset(powerbuf, 0, powerbufLen);
634
635 /* Initialize the intermediate result. Do this early to save double conversion,
636 * once each for a^0 and intermediate result.
637 */
638 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
639 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
640
641 /* Initialize computeTemp as a^1 with montgomery precalcs */
642 computeTemp = BN_CTX_get(ctx);
643 am = BN_CTX_get(ctx);
644 if (computeTemp==NULL || am==NULL) goto err;
645
646 if (a->neg || BN_ucmp(a,m) >= 0)
647 {
648 if (!BN_mod(am,a,m,ctx))
649 goto err;
650 aa= am;
651 }
652 else
653 aa=a;
654 if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
655 if (!BN_copy(computeTemp, am)) goto err;
656 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
657
658 /* If the window size is greater than 1, then calculate
659 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
660 * (even powers could instead be computed as (a^(i/2))^2
661 * to use the slight performance advantage of sqr over mul).
662 */
663 if (window > 1)
664 {
665 for (i=2; i<numPowers; i++)
666 {
667 /* Calculate a^i = a^(i-1) * a */
668 if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
669 goto err;
670 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
671 }
672 }
673
674 /* Adjust the number of bits up to a multiple of the window size.
675 * If the exponent length is not a multiple of the window size, then
676 * this pads the most significant bits with zeros to normalize the
677 * scanning loop to there's no special cases.
678 *
679 * * NOTE: Making the window size a power of two less than the native
680 * * word size ensures that the padded bits won't go past the last
681 * * word in the internal BIGNUM structure. Going past the end will
682 * * still produce the correct result, but causes a different branch
683 * * to be taken in the BN_is_bit_set function.
684 */
685 bits = ((bits+window-1)/window)*window;
686 idx=bits-1; /* The top bit of the window */
687
688 /* Scan the exponent one window at a time starting from the most
689 * significant bits.
690 */
691 while (idx >= 0)
692 {
693 wvalue=0; /* The 'value' of the window */
694
695 /* Scan the window, squaring the result as we go */
696 for (i=0; i<window; i++,idx--)
697 {
698 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
699 wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
700 }
701
702 /* Fetch the appropriate pre-computed value from the pre-buf */
703 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
704
705 /* Multiply the result into the intermediate result */
706 if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
707 }
708
709 /* Convert the final result from montgomery to standard format */
710 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
711 ret=1;
712err:
713 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
714 if (powerbuf!=NULL)
715 {
716 OPENSSL_cleanse(powerbuf,powerbufLen);
717 OPENSSL_free(powerbufFree);
718 }
719 if (am!=NULL) BN_clear(am);
720 if (computeTemp!=NULL) BN_clear(computeTemp);
721 BN_CTX_end(ctx);
722 return(ret);
723 }
724
725int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
726 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
727 {
728 BN_MONT_CTX *mont = NULL;
729 int b, bits, ret=0;
730 int r_is_one;
731 BN_ULONG w, next_w;
732 BIGNUM *d, *r, *t;
733 BIGNUM *swap_tmp;
734#define BN_MOD_MUL_WORD(r, w, m) \
735 (BN_mul_word(r, (w)) && \
736 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
737 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
738 /* BN_MOD_MUL_WORD is only used with 'w' large,
739 * so the BN_ucmp test is probably more overhead
740 * than always using BN_mod (which uses BN_copy if
741 * a similar test returns true). */
742 /* We can use BN_mod and do not need BN_nnmod because our
743 * accumulator is never negative (the result of BN_mod does
744 * not depend on the sign of the modulus).
745 */
746#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
747 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
748
749 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
750 {
751 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
752 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
753 return -1;
754 }
755
756 bn_check_top(p);
757 bn_check_top(m);
758
759 if (!BN_is_odd(m))
760 {
761 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
762 return(0);
763 }
764 if (m->top == 1)
765 a %= m->d[0]; /* make sure that 'a' is reduced */
766
767 bits = BN_num_bits(p);
768 if (bits == 0)
769 {
770 ret = BN_one(rr);
771 return ret;
772 }
773 if (a == 0)
774 {
775 BN_zero(rr);
776 ret = 1;
777 return ret;
778 }
779
780 BN_CTX_start(ctx);
781 d = BN_CTX_get(ctx);
782 r = BN_CTX_get(ctx);
783 t = BN_CTX_get(ctx);
784 if (d == NULL || r == NULL || t == NULL) goto err;
785
786 if (in_mont != NULL)
787 mont=in_mont;
788 else
789 {
790 if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
791 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
792 }
793
794 r_is_one = 1; /* except for Montgomery factor */
795
796 /* bits-1 >= 0 */
797
798 /* The result is accumulated in the product r*w. */
799 w = a; /* bit 'bits-1' of 'p' is always set */
800 for (b = bits-2; b >= 0; b--)
801 {
802 /* First, square r*w. */
803 next_w = w*w;
804 if ((next_w/w) != w) /* overflow */
805 {
806 if (r_is_one)
807 {
808 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
809 r_is_one = 0;
810 }
811 else
812 {
813 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
814 }
815 next_w = 1;
816 }
817 w = next_w;
818 if (!r_is_one)
819 {
820 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
821 }
822
823 /* Second, multiply r*w by 'a' if exponent bit is set. */
824 if (BN_is_bit_set(p, b))
825 {
826 next_w = w*a;
827 if ((next_w/a) != w) /* overflow */
828 {
829 if (r_is_one)
830 {
831 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
832 r_is_one = 0;
833 }
834 else
835 {
836 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
837 }
838 next_w = a;
839 }
840 w = next_w;
841 }
842 }
843
844 /* Finally, set r:=r*w. */
845 if (w != 1)
846 {
847 if (r_is_one)
848 {
849 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
850 r_is_one = 0;
851 }
852 else
853 {
854 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
855 }
856 }
857
858 if (r_is_one) /* can happen only if a == 1*/
859 {
860 if (!BN_one(rr)) goto err;
861 }
862 else
863 {
864 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
865 }
866 ret = 1;
867err:
868 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
869 BN_CTX_end(ctx);
870 bn_check_top(rr);
871 return(ret);
872 }
873
874
875/* The old fallback, simple version :-) */
876int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
877 const BIGNUM *m, BN_CTX *ctx)
878 {
879 int i,j,bits,ret=0,wstart,wend,window,wvalue;
880 int start=1;
881 BIGNUM *d;
882 /* Table of variables obtained from 'ctx' */
883 BIGNUM *val[TABLE_SIZE];
884
885 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
886 {
887 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
888 BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
889 return -1;
890 }
891
892 bits=BN_num_bits(p);
893
894 if (bits == 0)
895 {
896 ret = BN_one(r);
897 return ret;
898 }
899
900 BN_CTX_start(ctx);
901 d = BN_CTX_get(ctx);
902 val[0] = BN_CTX_get(ctx);
903 if(!d || !val[0]) goto err;
904
905 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
906 if (BN_is_zero(val[0]))
907 {
908 BN_zero(r);
909 ret = 1;
910 goto err;
911 }
912
913 window = BN_window_bits_for_exponent_size(bits);
914 if (window > 1)
915 {
916 if (!BN_mod_mul(d,val[0],val[0],m,ctx))
917 goto err; /* 2 */
918 j=1<<(window-1);
919 for (i=1; i<j; i++)
920 {
921 if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
922 !BN_mod_mul(val[i],val[i-1],d,m,ctx))
923 goto err;
924 }
925 }
926
927 start=1; /* This is used to avoid multiplication etc
928 * when there is only the value '1' in the
929 * buffer. */
930 wvalue=0; /* The 'value' of the window */
931 wstart=bits-1; /* The top bit of the window */
932 wend=0; /* The bottom bit of the window */
933
934 if (!BN_one(r)) goto err;
935
936 for (;;)
937 {
938 if (BN_is_bit_set(p,wstart) == 0)
939 {
940 if (!start)
941 if (!BN_mod_mul(r,r,r,m,ctx))
942 goto err;
943 if (wstart == 0) break;
944 wstart--;
945 continue;
946 }
947 /* We now have wstart on a 'set' bit, we now need to work out
948 * how bit a window to do. To do this we need to scan
949 * forward until the last set bit before the end of the
950 * window */
951 j=wstart;
952 wvalue=1;
953 wend=0;
954 for (i=1; i<window; i++)
955 {
956 if (wstart-i < 0) break;
957 if (BN_is_bit_set(p,wstart-i))
958 {
959 wvalue<<=(i-wend);
960 wvalue|=1;
961 wend=i;
962 }
963 }
964
965 /* wend is the size of the current window */
966 j=wend+1;
967 /* add the 'bytes above' */
968 if (!start)
969 for (i=0; i<j; i++)
970 {
971 if (!BN_mod_mul(r,r,r,m,ctx))
972 goto err;
973 }
974
975 /* wvalue will be an odd number < 2^window */
976 if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
977 goto err;
978
979 /* move the 'window' down further */
980 wstart-=wend+1;
981 wvalue=0;
982 start=0;
983 if (wstart < 0) break;
984 }
985 ret=1;
986err:
987 BN_CTX_end(ctx);
988 bn_check_top(r);
989 return(ret);
990 }
991
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
deleted file mode 100644
index bd0c34b91b..0000000000
--- a/src/lib/libcrypto/bn/bn_exp2.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/* crypto/bn/bn_exp2.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define TABLE_SIZE 32
117
118int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
119 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
120 BN_CTX *ctx, BN_MONT_CTX *in_mont)
121 {
122 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
123 int r_is_one=1;
124 BIGNUM *d,*r;
125 const BIGNUM *a_mod_m;
126 /* Tables of variables obtained from 'ctx' */
127 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
128 BN_MONT_CTX *mont=NULL;
129
130 bn_check_top(a1);
131 bn_check_top(p1);
132 bn_check_top(a2);
133 bn_check_top(p2);
134 bn_check_top(m);
135
136 if (!(m->d[0] & 1))
137 {
138 BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
139 return(0);
140 }
141 bits1=BN_num_bits(p1);
142 bits2=BN_num_bits(p2);
143 if ((bits1 == 0) && (bits2 == 0))
144 {
145 ret = BN_one(rr);
146 return ret;
147 }
148
149 bits=(bits1 > bits2)?bits1:bits2;
150
151 BN_CTX_start(ctx);
152 d = BN_CTX_get(ctx);
153 r = BN_CTX_get(ctx);
154 val1[0] = BN_CTX_get(ctx);
155 val2[0] = BN_CTX_get(ctx);
156 if(!d || !r || !val1[0] || !val2[0]) goto err;
157
158 if (in_mont != NULL)
159 mont=in_mont;
160 else
161 {
162 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
163 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
164 }
165
166 window1 = BN_window_bits_for_exponent_size(bits1);
167 window2 = BN_window_bits_for_exponent_size(bits2);
168
169 /*
170 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
171 */
172 if (a1->neg || BN_ucmp(a1,m) >= 0)
173 {
174 if (!BN_mod(val1[0],a1,m,ctx))
175 goto err;
176 a_mod_m = val1[0];
177 }
178 else
179 a_mod_m = a1;
180 if (BN_is_zero(a_mod_m))
181 {
182 BN_zero(rr);
183 ret = 1;
184 goto err;
185 }
186
187 if (!BN_to_montgomery(val1[0],a_mod_m,mont,ctx)) goto err;
188 if (window1 > 1)
189 {
190 if (!BN_mod_mul_montgomery(d,val1[0],val1[0],mont,ctx)) goto err;
191
192 j=1<<(window1-1);
193 for (i=1; i<j; i++)
194 {
195 if(((val1[i] = BN_CTX_get(ctx)) == NULL) ||
196 !BN_mod_mul_montgomery(val1[i],val1[i-1],
197 d,mont,ctx))
198 goto err;
199 }
200 }
201
202
203 /*
204 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
205 */
206 if (a2->neg || BN_ucmp(a2,m) >= 0)
207 {
208 if (!BN_mod(val2[0],a2,m,ctx))
209 goto err;
210 a_mod_m = val2[0];
211 }
212 else
213 a_mod_m = a2;
214 if (BN_is_zero(a_mod_m))
215 {
216 BN_zero(rr);
217 ret = 1;
218 goto err;
219 }
220 if (!BN_to_montgomery(val2[0],a_mod_m,mont,ctx)) goto err;
221 if (window2 > 1)
222 {
223 if (!BN_mod_mul_montgomery(d,val2[0],val2[0],mont,ctx)) goto err;
224
225 j=1<<(window2-1);
226 for (i=1; i<j; i++)
227 {
228 if(((val2[i] = BN_CTX_get(ctx)) == NULL) ||
229 !BN_mod_mul_montgomery(val2[i],val2[i-1],
230 d,mont,ctx))
231 goto err;
232 }
233 }
234
235
236 /* Now compute the power product, using independent windows. */
237 r_is_one=1;
238 wvalue1=0; /* The 'value' of the first window */
239 wvalue2=0; /* The 'value' of the second window */
240 wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */
241 wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */
242
243 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
244 for (b=bits-1; b>=0; b--)
245 {
246 if (!r_is_one)
247 {
248 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
249 goto err;
250 }
251
252 if (!wvalue1)
253 if (BN_is_bit_set(p1, b))
254 {
255 /* consider bits b-window1+1 .. b for this window */
256 i = b-window1+1;
257 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
258 i++;
259 wpos1 = i;
260 wvalue1 = 1;
261 for (i = b-1; i >= wpos1; i--)
262 {
263 wvalue1 <<= 1;
264 if (BN_is_bit_set(p1, i))
265 wvalue1++;
266 }
267 }
268
269 if (!wvalue2)
270 if (BN_is_bit_set(p2, b))
271 {
272 /* consider bits b-window2+1 .. b for this window */
273 i = b-window2+1;
274 while (!BN_is_bit_set(p2, i))
275 i++;
276 wpos2 = i;
277 wvalue2 = 1;
278 for (i = b-1; i >= wpos2; i--)
279 {
280 wvalue2 <<= 1;
281 if (BN_is_bit_set(p2, i))
282 wvalue2++;
283 }
284 }
285
286 if (wvalue1 && b == wpos1)
287 {
288 /* wvalue1 is odd and < 2^window1 */
289 if (!BN_mod_mul_montgomery(r,r,val1[wvalue1>>1],mont,ctx))
290 goto err;
291 wvalue1 = 0;
292 r_is_one = 0;
293 }
294
295 if (wvalue2 && b == wpos2)
296 {
297 /* wvalue2 is odd and < 2^window2 */
298 if (!BN_mod_mul_montgomery(r,r,val2[wvalue2>>1],mont,ctx))
299 goto err;
300 wvalue2 = 0;
301 r_is_one = 0;
302 }
303 }
304 if (!BN_from_montgomery(rr,r,mont,ctx))
305 goto err;
306 ret=1;
307err:
308 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
309 BN_CTX_end(ctx);
310 bn_check_top(rr);
311 return(ret);
312 }
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
deleted file mode 100644
index 4a352119ba..0000000000
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ /dev/null
@@ -1,654 +0,0 @@
1/* crypto/bn/bn_gcd.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "bn_lcl.h"
114
115static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
116
117int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
118 {
119 BIGNUM *a,*b,*t;
120 int ret=0;
121
122 bn_check_top(in_a);
123 bn_check_top(in_b);
124
125 BN_CTX_start(ctx);
126 a = BN_CTX_get(ctx);
127 b = BN_CTX_get(ctx);
128 if (a == NULL || b == NULL) goto err;
129
130 if (BN_copy(a,in_a) == NULL) goto err;
131 if (BN_copy(b,in_b) == NULL) goto err;
132 a->neg = 0;
133 b->neg = 0;
134
135 if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; }
136 t=euclid(a,b);
137 if (t == NULL) goto err;
138
139 if (BN_copy(r,t) == NULL) goto err;
140 ret=1;
141err:
142 BN_CTX_end(ctx);
143 bn_check_top(r);
144 return(ret);
145 }
146
147static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
148 {
149 BIGNUM *t;
150 int shifts=0;
151
152 bn_check_top(a);
153 bn_check_top(b);
154
155 /* 0 <= b <= a */
156 while (!BN_is_zero(b))
157 {
158 /* 0 < b <= a */
159
160 if (BN_is_odd(a))
161 {
162 if (BN_is_odd(b))
163 {
164 if (!BN_sub(a,a,b)) goto err;
165 if (!BN_rshift1(a,a)) goto err;
166 if (BN_cmp(a,b) < 0)
167 { t=a; a=b; b=t; }
168 }
169 else /* a odd - b even */
170 {
171 if (!BN_rshift1(b,b)) goto err;
172 if (BN_cmp(a,b) < 0)
173 { t=a; a=b; b=t; }
174 }
175 }
176 else /* a is even */
177 {
178 if (BN_is_odd(b))
179 {
180 if (!BN_rshift1(a,a)) goto err;
181 if (BN_cmp(a,b) < 0)
182 { t=a; a=b; b=t; }
183 }
184 else /* a even - b even */
185 {
186 if (!BN_rshift1(a,a)) goto err;
187 if (!BN_rshift1(b,b)) goto err;
188 shifts++;
189 }
190 }
191 /* 0 <= b <= a */
192 }
193
194 if (shifts)
195 {
196 if (!BN_lshift(a,a,shifts)) goto err;
197 }
198 bn_check_top(a);
199 return(a);
200err:
201 return(NULL);
202 }
203
204
205/* solves ax == 1 (mod n) */
206static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
207 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
208BIGNUM *BN_mod_inverse(BIGNUM *in,
209 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
210 {
211 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
212 BIGNUM *ret=NULL;
213 int sign;
214
215 if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0))
216 {
217 return BN_mod_inverse_no_branch(in, a, n, ctx);
218 }
219
220 bn_check_top(a);
221 bn_check_top(n);
222
223 BN_CTX_start(ctx);
224 A = BN_CTX_get(ctx);
225 B = BN_CTX_get(ctx);
226 X = BN_CTX_get(ctx);
227 D = BN_CTX_get(ctx);
228 M = BN_CTX_get(ctx);
229 Y = BN_CTX_get(ctx);
230 T = BN_CTX_get(ctx);
231 if (T == NULL) goto err;
232
233 if (in == NULL)
234 R=BN_new();
235 else
236 R=in;
237 if (R == NULL) goto err;
238
239 BN_one(X);
240 BN_zero(Y);
241 if (BN_copy(B,a) == NULL) goto err;
242 if (BN_copy(A,n) == NULL) goto err;
243 A->neg = 0;
244 if (B->neg || (BN_ucmp(B, A) >= 0))
245 {
246 if (!BN_nnmod(B, B, A, ctx)) goto err;
247 }
248 sign = -1;
249 /* From B = a mod |n|, A = |n| it follows that
250 *
251 * 0 <= B < A,
252 * -sign*X*a == B (mod |n|),
253 * sign*Y*a == A (mod |n|).
254 */
255
256 if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
257 {
258 /* Binary inversion algorithm; requires odd modulus.
259 * This is faster than the general algorithm if the modulus
260 * is sufficiently small (about 400 .. 500 bits on 32-bit
261 * sytems, but much more on 64-bit systems) */
262 int shift;
263
264 while (!BN_is_zero(B))
265 {
266 /*
267 * 0 < B < |n|,
268 * 0 < A <= |n|,
269 * (1) -sign*X*a == B (mod |n|),
270 * (2) sign*Y*a == A (mod |n|)
271 */
272
273 /* Now divide B by the maximum possible power of two in the integers,
274 * and divide X by the same value mod |n|.
275 * When we're done, (1) still holds. */
276 shift = 0;
277 while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
278 {
279 shift++;
280
281 if (BN_is_odd(X))
282 {
283 if (!BN_uadd(X, X, n)) goto err;
284 }
285 /* now X is even, so we can easily divide it by two */
286 if (!BN_rshift1(X, X)) goto err;
287 }
288 if (shift > 0)
289 {
290 if (!BN_rshift(B, B, shift)) goto err;
291 }
292
293
294 /* Same for A and Y. Afterwards, (2) still holds. */
295 shift = 0;
296 while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
297 {
298 shift++;
299
300 if (BN_is_odd(Y))
301 {
302 if (!BN_uadd(Y, Y, n)) goto err;
303 }
304 /* now Y is even */
305 if (!BN_rshift1(Y, Y)) goto err;
306 }
307 if (shift > 0)
308 {
309 if (!BN_rshift(A, A, shift)) goto err;
310 }
311
312
313 /* We still have (1) and (2).
314 * Both A and B are odd.
315 * The following computations ensure that
316 *
317 * 0 <= B < |n|,
318 * 0 < A < |n|,
319 * (1) -sign*X*a == B (mod |n|),
320 * (2) sign*Y*a == A (mod |n|),
321 *
322 * and that either A or B is even in the next iteration.
323 */
324 if (BN_ucmp(B, A) >= 0)
325 {
326 /* -sign*(X + Y)*a == B - A (mod |n|) */
327 if (!BN_uadd(X, X, Y)) goto err;
328 /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
329 * actually makes the algorithm slower */
330 if (!BN_usub(B, B, A)) goto err;
331 }
332 else
333 {
334 /* sign*(X + Y)*a == A - B (mod |n|) */
335 if (!BN_uadd(Y, Y, X)) goto err;
336 /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
337 if (!BN_usub(A, A, B)) goto err;
338 }
339 }
340 }
341 else
342 {
343 /* general inversion algorithm */
344
345 while (!BN_is_zero(B))
346 {
347 BIGNUM *tmp;
348
349 /*
350 * 0 < B < A,
351 * (*) -sign*X*a == B (mod |n|),
352 * sign*Y*a == A (mod |n|)
353 */
354
355 /* (D, M) := (A/B, A%B) ... */
356 if (BN_num_bits(A) == BN_num_bits(B))
357 {
358 if (!BN_one(D)) goto err;
359 if (!BN_sub(M,A,B)) goto err;
360 }
361 else if (BN_num_bits(A) == BN_num_bits(B) + 1)
362 {
363 /* A/B is 1, 2, or 3 */
364 if (!BN_lshift1(T,B)) goto err;
365 if (BN_ucmp(A,T) < 0)
366 {
367 /* A < 2*B, so D=1 */
368 if (!BN_one(D)) goto err;
369 if (!BN_sub(M,A,B)) goto err;
370 }
371 else
372 {
373 /* A >= 2*B, so D=2 or D=3 */
374 if (!BN_sub(M,A,T)) goto err;
375 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */
376 if (BN_ucmp(A,D) < 0)
377 {
378 /* A < 3*B, so D=2 */
379 if (!BN_set_word(D,2)) goto err;
380 /* M (= A - 2*B) already has the correct value */
381 }
382 else
383 {
384 /* only D=3 remains */
385 if (!BN_set_word(D,3)) goto err;
386 /* currently M = A - 2*B, but we need M = A - 3*B */
387 if (!BN_sub(M,M,B)) goto err;
388 }
389 }
390 }
391 else
392 {
393 if (!BN_div(D,M,A,B,ctx)) goto err;
394 }
395
396 /* Now
397 * A = D*B + M;
398 * thus we have
399 * (**) sign*Y*a == D*B + M (mod |n|).
400 */
401
402 tmp=A; /* keep the BIGNUM object, the value does not matter */
403
404 /* (A, B) := (B, A mod B) ... */
405 A=B;
406 B=M;
407 /* ... so we have 0 <= B < A again */
408
409 /* Since the former M is now B and the former B is now A,
410 * (**) translates into
411 * sign*Y*a == D*A + B (mod |n|),
412 * i.e.
413 * sign*Y*a - D*A == B (mod |n|).
414 * Similarly, (*) translates into
415 * -sign*X*a == A (mod |n|).
416 *
417 * Thus,
418 * sign*Y*a + D*sign*X*a == B (mod |n|),
419 * i.e.
420 * sign*(Y + D*X)*a == B (mod |n|).
421 *
422 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
423 * -sign*X*a == B (mod |n|),
424 * sign*Y*a == A (mod |n|).
425 * Note that X and Y stay non-negative all the time.
426 */
427
428 /* most of the time D is very small, so we can optimize tmp := D*X+Y */
429 if (BN_is_one(D))
430 {
431 if (!BN_add(tmp,X,Y)) goto err;
432 }
433 else
434 {
435 if (BN_is_word(D,2))
436 {
437 if (!BN_lshift1(tmp,X)) goto err;
438 }
439 else if (BN_is_word(D,4))
440 {
441 if (!BN_lshift(tmp,X,2)) goto err;
442 }
443 else if (D->top == 1)
444 {
445 if (!BN_copy(tmp,X)) goto err;
446 if (!BN_mul_word(tmp,D->d[0])) goto err;
447 }
448 else
449 {
450 if (!BN_mul(tmp,D,X,ctx)) goto err;
451 }
452 if (!BN_add(tmp,tmp,Y)) goto err;
453 }
454
455 M=Y; /* keep the BIGNUM object, the value does not matter */
456 Y=X;
457 X=tmp;
458 sign = -sign;
459 }
460 }
461
462 /*
463 * The while loop (Euclid's algorithm) ends when
464 * A == gcd(a,n);
465 * we have
466 * sign*Y*a == A (mod |n|),
467 * where Y is non-negative.
468 */
469
470 if (sign < 0)
471 {
472 if (!BN_sub(Y,n,Y)) goto err;
473 }
474 /* Now Y*a == A (mod |n|). */
475
476
477 if (BN_is_one(A))
478 {
479 /* Y*a == 1 (mod |n|) */
480 if (!Y->neg && BN_ucmp(Y,n) < 0)
481 {
482 if (!BN_copy(R,Y)) goto err;
483 }
484 else
485 {
486 if (!BN_nnmod(R,Y,n,ctx)) goto err;
487 }
488 }
489 else
490 {
491 BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE);
492 goto err;
493 }
494 ret=R;
495err:
496 if ((ret == NULL) && (in == NULL)) BN_free(R);
497 BN_CTX_end(ctx);
498 bn_check_top(ret);
499 return(ret);
500 }
501
502
503/* BN_mod_inverse_no_branch is a special version of BN_mod_inverse.
504 * It does not contain branches that may leak sensitive information.
505 */
506static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
507 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
508 {
509 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
510 BIGNUM local_A, local_B;
511 BIGNUM *pA, *pB;
512 BIGNUM *ret=NULL;
513 int sign;
514
515 bn_check_top(a);
516 bn_check_top(n);
517
518 BN_CTX_start(ctx);
519 A = BN_CTX_get(ctx);
520 B = BN_CTX_get(ctx);
521 X = BN_CTX_get(ctx);
522 D = BN_CTX_get(ctx);
523 M = BN_CTX_get(ctx);
524 Y = BN_CTX_get(ctx);
525 T = BN_CTX_get(ctx);
526 if (T == NULL) goto err;
527
528 if (in == NULL)
529 R=BN_new();
530 else
531 R=in;
532 if (R == NULL) goto err;
533
534 BN_one(X);
535 BN_zero(Y);
536 if (BN_copy(B,a) == NULL) goto err;
537 if (BN_copy(A,n) == NULL) goto err;
538 A->neg = 0;
539
540 if (B->neg || (BN_ucmp(B, A) >= 0))
541 {
542 /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
543 * BN_div_no_branch will be called eventually.
544 */
545 pB = &local_B;
546 BN_with_flags(pB, B, BN_FLG_CONSTTIME);
547 if (!BN_nnmod(B, pB, A, ctx)) goto err;
548 }
549 sign = -1;
550 /* From B = a mod |n|, A = |n| it follows that
551 *
552 * 0 <= B < A,
553 * -sign*X*a == B (mod |n|),
554 * sign*Y*a == A (mod |n|).
555 */
556
557 while (!BN_is_zero(B))
558 {
559 BIGNUM *tmp;
560
561 /*
562 * 0 < B < A,
563 * (*) -sign*X*a == B (mod |n|),
564 * sign*Y*a == A (mod |n|)
565 */
566
567 /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
568 * BN_div_no_branch will be called eventually.
569 */
570 pA = &local_A;
571 BN_with_flags(pA, A, BN_FLG_CONSTTIME);
572
573 /* (D, M) := (A/B, A%B) ... */
574 if (!BN_div(D,M,pA,B,ctx)) goto err;
575
576 /* Now
577 * A = D*B + M;
578 * thus we have
579 * (**) sign*Y*a == D*B + M (mod |n|).
580 */
581
582 tmp=A; /* keep the BIGNUM object, the value does not matter */
583
584 /* (A, B) := (B, A mod B) ... */
585 A=B;
586 B=M;
587 /* ... so we have 0 <= B < A again */
588
589 /* Since the former M is now B and the former B is now A,
590 * (**) translates into
591 * sign*Y*a == D*A + B (mod |n|),
592 * i.e.
593 * sign*Y*a - D*A == B (mod |n|).
594 * Similarly, (*) translates into
595 * -sign*X*a == A (mod |n|).
596 *
597 * Thus,
598 * sign*Y*a + D*sign*X*a == B (mod |n|),
599 * i.e.
600 * sign*(Y + D*X)*a == B (mod |n|).
601 *
602 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
603 * -sign*X*a == B (mod |n|),
604 * sign*Y*a == A (mod |n|).
605 * Note that X and Y stay non-negative all the time.
606 */
607
608 if (!BN_mul(tmp,D,X,ctx)) goto err;
609 if (!BN_add(tmp,tmp,Y)) goto err;
610
611 M=Y; /* keep the BIGNUM object, the value does not matter */
612 Y=X;
613 X=tmp;
614 sign = -sign;
615 }
616
617 /*
618 * The while loop (Euclid's algorithm) ends when
619 * A == gcd(a,n);
620 * we have
621 * sign*Y*a == A (mod |n|),
622 * where Y is non-negative.
623 */
624
625 if (sign < 0)
626 {
627 if (!BN_sub(Y,n,Y)) goto err;
628 }
629 /* Now Y*a == A (mod |n|). */
630
631 if (BN_is_one(A))
632 {
633 /* Y*a == 1 (mod |n|) */
634 if (!Y->neg && BN_ucmp(Y,n) < 0)
635 {
636 if (!BN_copy(R,Y)) goto err;
637 }
638 else
639 {
640 if (!BN_nnmod(R,Y,n,ctx)) goto err;
641 }
642 }
643 else
644 {
645 BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH,BN_R_NO_INVERSE);
646 goto err;
647 }
648 ret=R;
649err:
650 if ((ret == NULL) && (in == NULL)) BN_free(R);
651 BN_CTX_end(ctx);
652 bn_check_top(ret);
653 return(ret);
654 }
diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c
deleted file mode 100644
index 432a3aa338..0000000000
--- a/src/lib/libcrypto/bn/bn_gf2m.c
+++ /dev/null
@@ -1,1035 +0,0 @@
1/* crypto/bn/bn_gf2m.c */
2/* ====================================================================
3 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
4 *
5 * The Elliptic Curve Public-Key Crypto Library (ECC Code) included
6 * herein is developed by SUN MICROSYSTEMS, INC., and is contributed
7 * to the OpenSSL project.
8 *
9 * The ECC Code is licensed pursuant to the OpenSSL open source
10 * license provided below.
11 *
12 * In addition, Sun covenants to all licensees who provide a reciprocal
13 * covenant with respect to their own patents if any, not to sue under
14 * current and future patent claims necessarily infringed by the making,
15 * using, practicing, selling, offering for sale and/or otherwise
16 * disposing of the ECC Code as delivered hereunder (or portions thereof),
17 * provided that such covenant shall not apply:
18 * 1) for code that a licensee deletes from the ECC Code;
19 * 2) separates from the ECC Code; or
20 * 3) for infringements caused by:
21 * i) the modification of the ECC Code or
22 * ii) the combination of the ECC Code with other software or
23 * devices where such combination causes the infringement.
24 *
25 * The software is originally written by Sheueling Chang Shantz and
26 * Douglas Stebila of Sun Microsystems Laboratories.
27 *
28 */
29
30/* NOTE: This file is licensed pursuant to the OpenSSL license below
31 * and may be modified; but after modifications, the above covenant
32 * may no longer apply! In such cases, the corresponding paragraph
33 * ["In addition, Sun covenants ... causes the infringement."] and
34 * this note can be edited out; but please keep the Sun copyright
35 * notice and attribution. */
36
37/* ====================================================================
38 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 *
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 *
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in
49 * the documentation and/or other materials provided with the
50 * distribution.
51 *
52 * 3. All advertising materials mentioning features or use of this
53 * software must display the following acknowledgment:
54 * "This product includes software developed by the OpenSSL Project
55 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
56 *
57 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
58 * endorse or promote products derived from this software without
59 * prior written permission. For written permission, please contact
60 * openssl-core@openssl.org.
61 *
62 * 5. Products derived from this software may not be called "OpenSSL"
63 * nor may "OpenSSL" appear in their names without prior written
64 * permission of the OpenSSL Project.
65 *
66 * 6. Redistributions of any form whatsoever must retain the following
67 * acknowledgment:
68 * "This product includes software developed by the OpenSSL Project
69 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
70 *
71 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
72 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
73 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
74 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
75 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
76 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
77 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
78 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
79 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
80 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
81 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
82 * OF THE POSSIBILITY OF SUCH DAMAGE.
83 * ====================================================================
84 *
85 * This product includes cryptographic software written by Eric Young
86 * (eay@cryptsoft.com). This product includes software written by Tim
87 * Hudson (tjh@cryptsoft.com).
88 *
89 */
90
91#include <assert.h>
92#include <limits.h>
93#include <stdio.h>
94#include "cryptlib.h"
95#include "bn_lcl.h"
96
97/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */
98#define MAX_ITERATIONS 50
99
100static const BN_ULONG SQR_tb[16] =
101 { 0, 1, 4, 5, 16, 17, 20, 21,
102 64, 65, 68, 69, 80, 81, 84, 85 };
103/* Platform-specific macros to accelerate squaring. */
104#if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
105#define SQR1(w) \
106 SQR_tb[(w) >> 60 & 0xF] << 56 | SQR_tb[(w) >> 56 & 0xF] << 48 | \
107 SQR_tb[(w) >> 52 & 0xF] << 40 | SQR_tb[(w) >> 48 & 0xF] << 32 | \
108 SQR_tb[(w) >> 44 & 0xF] << 24 | SQR_tb[(w) >> 40 & 0xF] << 16 | \
109 SQR_tb[(w) >> 36 & 0xF] << 8 | SQR_tb[(w) >> 32 & 0xF]
110#define SQR0(w) \
111 SQR_tb[(w) >> 28 & 0xF] << 56 | SQR_tb[(w) >> 24 & 0xF] << 48 | \
112 SQR_tb[(w) >> 20 & 0xF] << 40 | SQR_tb[(w) >> 16 & 0xF] << 32 | \
113 SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \
114 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
115#endif
116#ifdef THIRTY_TWO_BIT
117#define SQR1(w) \
118 SQR_tb[(w) >> 28 & 0xF] << 24 | SQR_tb[(w) >> 24 & 0xF] << 16 | \
119 SQR_tb[(w) >> 20 & 0xF] << 8 | SQR_tb[(w) >> 16 & 0xF]
120#define SQR0(w) \
121 SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \
122 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
123#endif
124
125/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
126 * result is a polynomial r with degree < 2 * BN_BITS - 1
127 * The caller MUST ensure that the variables have the right amount
128 * of space allocated.
129 */
130#ifdef THIRTY_TWO_BIT
131static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
132 {
133 register BN_ULONG h, l, s;
134 BN_ULONG tab[8], top2b = a >> 30;
135 register BN_ULONG a1, a2, a4;
136
137 a1 = a & (0x3FFFFFFF); a2 = a1 << 1; a4 = a2 << 1;
138
139 tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
140 tab[4] = a4; tab[5] = a1^a4; tab[6] = a2^a4; tab[7] = a1^a2^a4;
141
142 s = tab[b & 0x7]; l = s;
143 s = tab[b >> 3 & 0x7]; l ^= s << 3; h = s >> 29;
144 s = tab[b >> 6 & 0x7]; l ^= s << 6; h ^= s >> 26;
145 s = tab[b >> 9 & 0x7]; l ^= s << 9; h ^= s >> 23;
146 s = tab[b >> 12 & 0x7]; l ^= s << 12; h ^= s >> 20;
147 s = tab[b >> 15 & 0x7]; l ^= s << 15; h ^= s >> 17;
148 s = tab[b >> 18 & 0x7]; l ^= s << 18; h ^= s >> 14;
149 s = tab[b >> 21 & 0x7]; l ^= s << 21; h ^= s >> 11;
150 s = tab[b >> 24 & 0x7]; l ^= s << 24; h ^= s >> 8;
151 s = tab[b >> 27 & 0x7]; l ^= s << 27; h ^= s >> 5;
152 s = tab[b >> 30 ]; l ^= s << 30; h ^= s >> 2;
153
154 /* compensate for the top two bits of a */
155
156 if (top2b & 01) { l ^= b << 30; h ^= b >> 2; }
157 if (top2b & 02) { l ^= b << 31; h ^= b >> 1; }
158
159 *r1 = h; *r0 = l;
160 }
161#endif
162#if defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
163static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
164 {
165 register BN_ULONG h, l, s;
166 BN_ULONG tab[16], top3b = a >> 61;
167 register BN_ULONG a1, a2, a4, a8;
168
169 a1 = a & (0x1FFFFFFFFFFFFFFFULL); a2 = a1 << 1; a4 = a2 << 1; a8 = a4 << 1;
170
171 tab[ 0] = 0; tab[ 1] = a1; tab[ 2] = a2; tab[ 3] = a1^a2;
172 tab[ 4] = a4; tab[ 5] = a1^a4; tab[ 6] = a2^a4; tab[ 7] = a1^a2^a4;
173 tab[ 8] = a8; tab[ 9] = a1^a8; tab[10] = a2^a8; tab[11] = a1^a2^a8;
174 tab[12] = a4^a8; tab[13] = a1^a4^a8; tab[14] = a2^a4^a8; tab[15] = a1^a2^a4^a8;
175
176 s = tab[b & 0xF]; l = s;
177 s = tab[b >> 4 & 0xF]; l ^= s << 4; h = s >> 60;
178 s = tab[b >> 8 & 0xF]; l ^= s << 8; h ^= s >> 56;
179 s = tab[b >> 12 & 0xF]; l ^= s << 12; h ^= s >> 52;
180 s = tab[b >> 16 & 0xF]; l ^= s << 16; h ^= s >> 48;
181 s = tab[b >> 20 & 0xF]; l ^= s << 20; h ^= s >> 44;
182 s = tab[b >> 24 & 0xF]; l ^= s << 24; h ^= s >> 40;
183 s = tab[b >> 28 & 0xF]; l ^= s << 28; h ^= s >> 36;
184 s = tab[b >> 32 & 0xF]; l ^= s << 32; h ^= s >> 32;
185 s = tab[b >> 36 & 0xF]; l ^= s << 36; h ^= s >> 28;
186 s = tab[b >> 40 & 0xF]; l ^= s << 40; h ^= s >> 24;
187 s = tab[b >> 44 & 0xF]; l ^= s << 44; h ^= s >> 20;
188 s = tab[b >> 48 & 0xF]; l ^= s << 48; h ^= s >> 16;
189 s = tab[b >> 52 & 0xF]; l ^= s << 52; h ^= s >> 12;
190 s = tab[b >> 56 & 0xF]; l ^= s << 56; h ^= s >> 8;
191 s = tab[b >> 60 ]; l ^= s << 60; h ^= s >> 4;
192
193 /* compensate for the top three bits of a */
194
195 if (top3b & 01) { l ^= b << 61; h ^= b >> 3; }
196 if (top3b & 02) { l ^= b << 62; h ^= b >> 2; }
197 if (top3b & 04) { l ^= b << 63; h ^= b >> 1; }
198
199 *r1 = h; *r0 = l;
200 }
201#endif
202
203/* Product of two polynomials a, b each with degree < 2 * BN_BITS2 - 1,
204 * result is a polynomial r with degree < 4 * BN_BITS2 - 1
205 * The caller MUST ensure that the variables have the right amount
206 * of space allocated.
207 */
208static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, const BN_ULONG b1, const BN_ULONG b0)
209 {
210 BN_ULONG m1, m0;
211 /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
212 bn_GF2m_mul_1x1(r+3, r+2, a1, b1);
213 bn_GF2m_mul_1x1(r+1, r, a0, b0);
214 bn_GF2m_mul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
215 /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
216 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
217 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
218 }
219
220
221/* Add polynomials a and b and store result in r; r could be a or b, a and b
222 * could be equal; r is the bitwise XOR of a and b.
223 */
224int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
225 {
226 int i;
227 const BIGNUM *at, *bt;
228
229 bn_check_top(a);
230 bn_check_top(b);
231
232 if (a->top < b->top) { at = b; bt = a; }
233 else { at = a; bt = b; }
234
235 if(bn_wexpand(r, at->top) == NULL)
236 return 0;
237
238 for (i = 0; i < bt->top; i++)
239 {
240 r->d[i] = at->d[i] ^ bt->d[i];
241 }
242 for (; i < at->top; i++)
243 {
244 r->d[i] = at->d[i];
245 }
246
247 r->top = at->top;
248 bn_correct_top(r);
249
250 return 1;
251 }
252
253
254/* Some functions allow for representation of the irreducible polynomials
255 * as an int[], say p. The irreducible f(t) is then of the form:
256 * t^p[0] + t^p[1] + ... + t^p[k]
257 * where m = p[0] > p[1] > ... > p[k] = 0.
258 */
259
260
261/* Performs modular reduction of a and store result in r. r could be a. */
262int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
263 {
264 int j, k;
265 int n, dN, d0, d1;
266 BN_ULONG zz, *z;
267
268 bn_check_top(a);
269
270 if (!p[0])
271 {
272 /* reduction mod 1 => return 0 */
273 BN_zero(r);
274 return 1;
275 }
276
277 /* Since the algorithm does reduction in the r value, if a != r, copy
278 * the contents of a into r so we can do reduction in r.
279 */
280 if (a != r)
281 {
282 if (!bn_wexpand(r, a->top)) return 0;
283 for (j = 0; j < a->top; j++)
284 {
285 r->d[j] = a->d[j];
286 }
287 r->top = a->top;
288 }
289 z = r->d;
290
291 /* start reduction */
292 dN = p[0] / BN_BITS2;
293 for (j = r->top - 1; j > dN;)
294 {
295 zz = z[j];
296 if (z[j] == 0) { j--; continue; }
297 z[j] = 0;
298
299 for (k = 1; p[k] != 0; k++)
300 {
301 /* reducing component t^p[k] */
302 n = p[0] - p[k];
303 d0 = n % BN_BITS2; d1 = BN_BITS2 - d0;
304 n /= BN_BITS2;
305 z[j-n] ^= (zz>>d0);
306 if (d0) z[j-n-1] ^= (zz<<d1);
307 }
308
309 /* reducing component t^0 */
310 n = dN;
311 d0 = p[0] % BN_BITS2;
312 d1 = BN_BITS2 - d0;
313 z[j-n] ^= (zz >> d0);
314 if (d0) z[j-n-1] ^= (zz << d1);
315 }
316
317 /* final round of reduction */
318 while (j == dN)
319 {
320
321 d0 = p[0] % BN_BITS2;
322 zz = z[dN] >> d0;
323 if (zz == 0) break;
324 d1 = BN_BITS2 - d0;
325
326 /* clear up the top d1 bits */
327 if (d0)
328 z[dN] = (z[dN] << d1) >> d1;
329 else
330 z[dN] = 0;
331 z[0] ^= zz; /* reduction t^0 component */
332
333 for (k = 1; p[k] != 0; k++)
334 {
335 BN_ULONG tmp_ulong;
336
337 /* reducing component t^p[k]*/
338 n = p[k] / BN_BITS2;
339 d0 = p[k] % BN_BITS2;
340 d1 = BN_BITS2 - d0;
341 z[n] ^= (zz << d0);
342 tmp_ulong = zz >> d1;
343 if (d0 && tmp_ulong)
344 z[n+1] ^= tmp_ulong;
345 }
346
347
348 }
349
350 bn_correct_top(r);
351 return 1;
352 }
353
354/* Performs modular reduction of a by p and store result in r. r could be a.
355 *
356 * This function calls down to the BN_GF2m_mod_arr implementation; this wrapper
357 * function is only provided for convenience; for best performance, use the
358 * BN_GF2m_mod_arr function.
359 */
360int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
361 {
362 int ret = 0;
363 const int max = BN_num_bits(p) + 1;
364 int *arr=NULL;
365 bn_check_top(a);
366 bn_check_top(p);
367 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
368 ret = BN_GF2m_poly2arr(p, arr, max);
369 if (!ret || ret > max)
370 {
371 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
372 goto err;
373 }
374 ret = BN_GF2m_mod_arr(r, a, arr);
375 bn_check_top(r);
376err:
377 if (arr) OPENSSL_free(arr);
378 return ret;
379 }
380
381
382/* Compute the product of two polynomials a and b, reduce modulo p, and store
383 * the result in r. r could be a or b; a could be b.
384 */
385int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
386 {
387 int zlen, i, j, k, ret = 0;
388 BIGNUM *s;
389 BN_ULONG x1, x0, y1, y0, zz[4];
390
391 bn_check_top(a);
392 bn_check_top(b);
393
394 if (a == b)
395 {
396 return BN_GF2m_mod_sqr_arr(r, a, p, ctx);
397 }
398
399 BN_CTX_start(ctx);
400 if ((s = BN_CTX_get(ctx)) == NULL) goto err;
401
402 zlen = a->top + b->top + 4;
403 if (!bn_wexpand(s, zlen)) goto err;
404 s->top = zlen;
405
406 for (i = 0; i < zlen; i++) s->d[i] = 0;
407
408 for (j = 0; j < b->top; j += 2)
409 {
410 y0 = b->d[j];
411 y1 = ((j+1) == b->top) ? 0 : b->d[j+1];
412 for (i = 0; i < a->top; i += 2)
413 {
414 x0 = a->d[i];
415 x1 = ((i+1) == a->top) ? 0 : a->d[i+1];
416 bn_GF2m_mul_2x2(zz, x1, x0, y1, y0);
417 for (k = 0; k < 4; k++) s->d[i+j+k] ^= zz[k];
418 }
419 }
420
421 bn_correct_top(s);
422 if (BN_GF2m_mod_arr(r, s, p))
423 ret = 1;
424 bn_check_top(r);
425
426err:
427 BN_CTX_end(ctx);
428 return ret;
429 }
430
431/* Compute the product of two polynomials a and b, reduce modulo p, and store
432 * the result in r. r could be a or b; a could equal b.
433 *
434 * This function calls down to the BN_GF2m_mod_mul_arr implementation; this wrapper
435 * function is only provided for convenience; for best performance, use the
436 * BN_GF2m_mod_mul_arr function.
437 */
438int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
439 {
440 int ret = 0;
441 const int max = BN_num_bits(p) + 1;
442 int *arr=NULL;
443 bn_check_top(a);
444 bn_check_top(b);
445 bn_check_top(p);
446 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
447 ret = BN_GF2m_poly2arr(p, arr, max);
448 if (!ret || ret > max)
449 {
450 BNerr(BN_F_BN_GF2M_MOD_MUL,BN_R_INVALID_LENGTH);
451 goto err;
452 }
453 ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx);
454 bn_check_top(r);
455err:
456 if (arr) OPENSSL_free(arr);
457 return ret;
458 }
459
460
461/* Square a, reduce the result mod p, and store it in a. r could be a. */
462int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
463 {
464 int i, ret = 0;
465 BIGNUM *s;
466
467 bn_check_top(a);
468 BN_CTX_start(ctx);
469 if ((s = BN_CTX_get(ctx)) == NULL) return 0;
470 if (!bn_wexpand(s, 2 * a->top)) goto err;
471
472 for (i = a->top - 1; i >= 0; i--)
473 {
474 s->d[2*i+1] = SQR1(a->d[i]);
475 s->d[2*i ] = SQR0(a->d[i]);
476 }
477
478 s->top = 2 * a->top;
479 bn_correct_top(s);
480 if (!BN_GF2m_mod_arr(r, s, p)) goto err;
481 bn_check_top(r);
482 ret = 1;
483err:
484 BN_CTX_end(ctx);
485 return ret;
486 }
487
488/* Square a, reduce the result mod p, and store it in a. r could be a.
489 *
490 * This function calls down to the BN_GF2m_mod_sqr_arr implementation; this wrapper
491 * function is only provided for convenience; for best performance, use the
492 * BN_GF2m_mod_sqr_arr function.
493 */
494int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
495 {
496 int ret = 0;
497 const int max = BN_num_bits(p) + 1;
498 int *arr=NULL;
499
500 bn_check_top(a);
501 bn_check_top(p);
502 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
503 ret = BN_GF2m_poly2arr(p, arr, max);
504 if (!ret || ret > max)
505 {
506 BNerr(BN_F_BN_GF2M_MOD_SQR,BN_R_INVALID_LENGTH);
507 goto err;
508 }
509 ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx);
510 bn_check_top(r);
511err:
512 if (arr) OPENSSL_free(arr);
513 return ret;
514 }
515
516
517/* Invert a, reduce modulo p, and store the result in r. r could be a.
518 * Uses Modified Almost Inverse Algorithm (Algorithm 10) from
519 * Hankerson, D., Hernandez, J.L., and Menezes, A. "Software Implementation
520 * of Elliptic Curve Cryptography Over Binary Fields".
521 */
522int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
523 {
524 BIGNUM *b, *c, *u, *v, *tmp;
525 int ret = 0;
526
527 bn_check_top(a);
528 bn_check_top(p);
529
530 BN_CTX_start(ctx);
531
532 b = BN_CTX_get(ctx);
533 c = BN_CTX_get(ctx);
534 u = BN_CTX_get(ctx);
535 v = BN_CTX_get(ctx);
536 if (v == NULL) goto err;
537
538 if (!BN_one(b)) goto err;
539 if (!BN_GF2m_mod(u, a, p)) goto err;
540 if (!BN_copy(v, p)) goto err;
541
542 if (BN_is_zero(u)) goto err;
543
544 while (1)
545 {
546 while (!BN_is_odd(u))
547 {
548 if (BN_is_zero(u)) goto err;
549 if (!BN_rshift1(u, u)) goto err;
550 if (BN_is_odd(b))
551 {
552 if (!BN_GF2m_add(b, b, p)) goto err;
553 }
554 if (!BN_rshift1(b, b)) goto err;
555 }
556
557 if (BN_abs_is_word(u, 1)) break;
558
559 if (BN_num_bits(u) < BN_num_bits(v))
560 {
561 tmp = u; u = v; v = tmp;
562 tmp = b; b = c; c = tmp;
563 }
564
565 if (!BN_GF2m_add(u, u, v)) goto err;
566 if (!BN_GF2m_add(b, b, c)) goto err;
567 }
568
569
570 if (!BN_copy(r, b)) goto err;
571 bn_check_top(r);
572 ret = 1;
573
574err:
575 BN_CTX_end(ctx);
576 return ret;
577 }
578
579/* Invert xx, reduce modulo p, and store the result in r. r could be xx.
580 *
581 * This function calls down to the BN_GF2m_mod_inv implementation; this wrapper
582 * function is only provided for convenience; for best performance, use the
583 * BN_GF2m_mod_inv function.
584 */
585int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx)
586 {
587 BIGNUM *field;
588 int ret = 0;
589
590 bn_check_top(xx);
591 BN_CTX_start(ctx);
592 if ((field = BN_CTX_get(ctx)) == NULL) goto err;
593 if (!BN_GF2m_arr2poly(p, field)) goto err;
594
595 ret = BN_GF2m_mod_inv(r, xx, field, ctx);
596 bn_check_top(r);
597
598err:
599 BN_CTX_end(ctx);
600 return ret;
601 }
602
603
604#ifndef OPENSSL_SUN_GF2M_DIV
605/* Divide y by x, reduce modulo p, and store the result in r. r could be x
606 * or y, x could equal y.
607 */
608int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, BN_CTX *ctx)
609 {
610 BIGNUM *xinv = NULL;
611 int ret = 0;
612
613 bn_check_top(y);
614 bn_check_top(x);
615 bn_check_top(p);
616
617 BN_CTX_start(ctx);
618 xinv = BN_CTX_get(ctx);
619 if (xinv == NULL) goto err;
620
621 if (!BN_GF2m_mod_inv(xinv, x, p, ctx)) goto err;
622 if (!BN_GF2m_mod_mul(r, y, xinv, p, ctx)) goto err;
623 bn_check_top(r);
624 ret = 1;
625
626err:
627 BN_CTX_end(ctx);
628 return ret;
629 }
630#else
631/* Divide y by x, reduce modulo p, and store the result in r. r could be x
632 * or y, x could equal y.
633 * Uses algorithm Modular_Division_GF(2^m) from
634 * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
635 * the Great Divide".
636 */
637int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, BN_CTX *ctx)
638 {
639 BIGNUM *a, *b, *u, *v;
640 int ret = 0;
641
642 bn_check_top(y);
643 bn_check_top(x);
644 bn_check_top(p);
645
646 BN_CTX_start(ctx);
647
648 a = BN_CTX_get(ctx);
649 b = BN_CTX_get(ctx);
650 u = BN_CTX_get(ctx);
651 v = BN_CTX_get(ctx);
652 if (v == NULL) goto err;
653
654 /* reduce x and y mod p */
655 if (!BN_GF2m_mod(u, y, p)) goto err;
656 if (!BN_GF2m_mod(a, x, p)) goto err;
657 if (!BN_copy(b, p)) goto err;
658
659 while (!BN_is_odd(a))
660 {
661 if (!BN_rshift1(a, a)) goto err;
662 if (BN_is_odd(u)) if (!BN_GF2m_add(u, u, p)) goto err;
663 if (!BN_rshift1(u, u)) goto err;
664 }
665
666 do
667 {
668 if (BN_GF2m_cmp(b, a) > 0)
669 {
670 if (!BN_GF2m_add(b, b, a)) goto err;
671 if (!BN_GF2m_add(v, v, u)) goto err;
672 do
673 {
674 if (!BN_rshift1(b, b)) goto err;
675 if (BN_is_odd(v)) if (!BN_GF2m_add(v, v, p)) goto err;
676 if (!BN_rshift1(v, v)) goto err;
677 } while (!BN_is_odd(b));
678 }
679 else if (BN_abs_is_word(a, 1))
680 break;
681 else
682 {
683 if (!BN_GF2m_add(a, a, b)) goto err;
684 if (!BN_GF2m_add(u, u, v)) goto err;
685 do
686 {
687 if (!BN_rshift1(a, a)) goto err;
688 if (BN_is_odd(u)) if (!BN_GF2m_add(u, u, p)) goto err;
689 if (!BN_rshift1(u, u)) goto err;
690 } while (!BN_is_odd(a));
691 }
692 } while (1);
693
694 if (!BN_copy(r, u)) goto err;
695 bn_check_top(r);
696 ret = 1;
697
698err:
699 BN_CTX_end(ctx);
700 return ret;
701 }
702#endif
703
704/* Divide yy by xx, reduce modulo p, and store the result in r. r could be xx
705 * or yy, xx could equal yy.
706 *
707 * This function calls down to the BN_GF2m_mod_div implementation; this wrapper
708 * function is only provided for convenience; for best performance, use the
709 * BN_GF2m_mod_div function.
710 */
711int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx)
712 {
713 BIGNUM *field;
714 int ret = 0;
715
716 bn_check_top(yy);
717 bn_check_top(xx);
718
719 BN_CTX_start(ctx);
720 if ((field = BN_CTX_get(ctx)) == NULL) goto err;
721 if (!BN_GF2m_arr2poly(p, field)) goto err;
722
723 ret = BN_GF2m_mod_div(r, yy, xx, field, ctx);
724 bn_check_top(r);
725
726err:
727 BN_CTX_end(ctx);
728 return ret;
729 }
730
731
732/* Compute the bth power of a, reduce modulo p, and store
733 * the result in r. r could be a.
734 * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.
735 */
736int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
737 {
738 int ret = 0, i, n;
739 BIGNUM *u;
740
741 bn_check_top(a);
742 bn_check_top(b);
743
744 if (BN_is_zero(b))
745 return(BN_one(r));
746
747 if (BN_abs_is_word(b, 1))
748 return (BN_copy(r, a) != NULL);
749
750 BN_CTX_start(ctx);
751 if ((u = BN_CTX_get(ctx)) == NULL) goto err;
752
753 if (!BN_GF2m_mod_arr(u, a, p)) goto err;
754
755 n = BN_num_bits(b) - 1;
756 for (i = n - 1; i >= 0; i--)
757 {
758 if (!BN_GF2m_mod_sqr_arr(u, u, p, ctx)) goto err;
759 if (BN_is_bit_set(b, i))
760 {
761 if (!BN_GF2m_mod_mul_arr(u, u, a, p, ctx)) goto err;
762 }
763 }
764 if (!BN_copy(r, u)) goto err;
765 bn_check_top(r);
766 ret = 1;
767err:
768 BN_CTX_end(ctx);
769 return ret;
770 }
771
772/* Compute the bth power of a, reduce modulo p, and store
773 * the result in r. r could be a.
774 *
775 * This function calls down to the BN_GF2m_mod_exp_arr implementation; this wrapper
776 * function is only provided for convenience; for best performance, use the
777 * BN_GF2m_mod_exp_arr function.
778 */
779int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
780 {
781 int ret = 0;
782 const int max = BN_num_bits(p) + 1;
783 int *arr=NULL;
784 bn_check_top(a);
785 bn_check_top(b);
786 bn_check_top(p);
787 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
788 ret = BN_GF2m_poly2arr(p, arr, max);
789 if (!ret || ret > max)
790 {
791 BNerr(BN_F_BN_GF2M_MOD_EXP,BN_R_INVALID_LENGTH);
792 goto err;
793 }
794 ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx);
795 bn_check_top(r);
796err:
797 if (arr) OPENSSL_free(arr);
798 return ret;
799 }
800
801/* Compute the square root of a, reduce modulo p, and store
802 * the result in r. r could be a.
803 * Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
804 */
805int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
806 {
807 int ret = 0;
808 BIGNUM *u;
809
810 bn_check_top(a);
811
812 if (!p[0])
813 {
814 /* reduction mod 1 => return 0 */
815 BN_zero(r);
816 return 1;
817 }
818
819 BN_CTX_start(ctx);
820 if ((u = BN_CTX_get(ctx)) == NULL) goto err;
821
822 if (!BN_set_bit(u, p[0] - 1)) goto err;
823 ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx);
824 bn_check_top(r);
825
826err:
827 BN_CTX_end(ctx);
828 return ret;
829 }
830
831/* Compute the square root of a, reduce modulo p, and store
832 * the result in r. r could be a.
833 *
834 * This function calls down to the BN_GF2m_mod_sqrt_arr implementation; this wrapper
835 * function is only provided for convenience; for best performance, use the
836 * BN_GF2m_mod_sqrt_arr function.
837 */
838int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
839 {
840 int ret = 0;
841 const int max = BN_num_bits(p) + 1;
842 int *arr=NULL;
843 bn_check_top(a);
844 bn_check_top(p);
845 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
846 ret = BN_GF2m_poly2arr(p, arr, max);
847 if (!ret || ret > max)
848 {
849 BNerr(BN_F_BN_GF2M_MOD_SQRT,BN_R_INVALID_LENGTH);
850 goto err;
851 }
852 ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx);
853 bn_check_top(r);
854err:
855 if (arr) OPENSSL_free(arr);
856 return ret;
857 }
858
859/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0.
860 * Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
861 */
862int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx)
863 {
864 int ret = 0, count = 0, j;
865 BIGNUM *a, *z, *rho, *w, *w2, *tmp;
866
867 bn_check_top(a_);
868
869 if (!p[0])
870 {
871 /* reduction mod 1 => return 0 */
872 BN_zero(r);
873 return 1;
874 }
875
876 BN_CTX_start(ctx);
877 a = BN_CTX_get(ctx);
878 z = BN_CTX_get(ctx);
879 w = BN_CTX_get(ctx);
880 if (w == NULL) goto err;
881
882 if (!BN_GF2m_mod_arr(a, a_, p)) goto err;
883
884 if (BN_is_zero(a))
885 {
886 BN_zero(r);
887 ret = 1;
888 goto err;
889 }
890
891 if (p[0] & 0x1) /* m is odd */
892 {
893 /* compute half-trace of a */
894 if (!BN_copy(z, a)) goto err;
895 for (j = 1; j <= (p[0] - 1) / 2; j++)
896 {
897 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err;
898 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err;
899 if (!BN_GF2m_add(z, z, a)) goto err;
900 }
901
902 }
903 else /* m is even */
904 {
905 rho = BN_CTX_get(ctx);
906 w2 = BN_CTX_get(ctx);
907 tmp = BN_CTX_get(ctx);
908 if (tmp == NULL) goto err;
909 do
910 {
911 if (!BN_rand(rho, p[0], 0, 0)) goto err;
912 if (!BN_GF2m_mod_arr(rho, rho, p)) goto err;
913 BN_zero(z);
914 if (!BN_copy(w, rho)) goto err;
915 for (j = 1; j <= p[0] - 1; j++)
916 {
917 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) goto err;
918 if (!BN_GF2m_mod_sqr_arr(w2, w, p, ctx)) goto err;
919 if (!BN_GF2m_mod_mul_arr(tmp, w2, a, p, ctx)) goto err;
920 if (!BN_GF2m_add(z, z, tmp)) goto err;
921 if (!BN_GF2m_add(w, w2, rho)) goto err;
922 }
923 count++;
924 } while (BN_is_zero(w) && (count < MAX_ITERATIONS));
925 if (BN_is_zero(w))
926 {
927 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR,BN_R_TOO_MANY_ITERATIONS);
928 goto err;
929 }
930 }
931
932 if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx)) goto err;
933 if (!BN_GF2m_add(w, z, w)) goto err;
934 if (BN_GF2m_cmp(w, a))
935 {
936 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_NO_SOLUTION);
937 goto err;
938 }
939
940 if (!BN_copy(r, z)) goto err;
941 bn_check_top(r);
942
943 ret = 1;
944
945err:
946 BN_CTX_end(ctx);
947 return ret;
948 }
949
950/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0.
951 *
952 * This function calls down to the BN_GF2m_mod_solve_quad_arr implementation; this wrapper
953 * function is only provided for convenience; for best performance, use the
954 * BN_GF2m_mod_solve_quad_arr function.
955 */
956int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
957 {
958 int ret = 0;
959 const int max = BN_num_bits(p) + 1;
960 int *arr=NULL;
961 bn_check_top(a);
962 bn_check_top(p);
963 if ((arr = (int *)OPENSSL_malloc(sizeof(int) *
964 max)) == NULL) goto err;
965 ret = BN_GF2m_poly2arr(p, arr, max);
966 if (!ret || ret > max)
967 {
968 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD,BN_R_INVALID_LENGTH);
969 goto err;
970 }
971 ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx);
972 bn_check_top(r);
973err:
974 if (arr) OPENSSL_free(arr);
975 return ret;
976 }
977
978/* Convert the bit-string representation of a polynomial
979 * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding
980 * to the bits with non-zero coefficient. Array is terminated with -1.
981 * Up to max elements of the array will be filled. Return value is total
982 * number of array elements that would be filled if array was large enough.
983 */
984int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)
985 {
986 int i, j, k = 0;
987 BN_ULONG mask;
988
989 if (BN_is_zero(a))
990 return 0;
991
992 for (i = a->top - 1; i >= 0; i--)
993 {
994 if (!a->d[i])
995 /* skip word if a->d[i] == 0 */
996 continue;
997 mask = BN_TBIT;
998 for (j = BN_BITS2 - 1; j >= 0; j--)
999 {
1000 if (a->d[i] & mask)
1001 {
1002 if (k < max) p[k] = BN_BITS2 * i + j;
1003 k++;
1004 }
1005 mask >>= 1;
1006 }
1007 }
1008
1009 if (k < max) {
1010 p[k] = -1;
1011 k++;
1012 }
1013
1014 return k;
1015 }
1016
1017/* Convert the coefficient array representation of a polynomial to a
1018 * bit-string. The array must be terminated by -1.
1019 */
1020int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
1021 {
1022 int i;
1023
1024 bn_check_top(a);
1025 BN_zero(a);
1026 for (i = 0; p[i] != -1; i++)
1027 {
1028 if (BN_set_bit(a, p[i]) == 0)
1029 return 0;
1030 }
1031 bn_check_top(a);
1032
1033 return 1;
1034 }
1035
diff --git a/src/lib/libcrypto/bn/bn_kron.c b/src/lib/libcrypto/bn/bn_kron.c
deleted file mode 100644
index 740359b752..0000000000
--- a/src/lib/libcrypto/bn/bn_kron.c
+++ /dev/null
@@ -1,184 +0,0 @@
1/* crypto/bn/bn_kron.c */
2/* ====================================================================
3 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include "cryptlib.h"
57#include "bn_lcl.h"
58
59/* least significant word */
60#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
61
62/* Returns -2 for errors because both -1 and 0 are valid results. */
63int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
64 {
65 int i;
66 int ret = -2; /* avoid 'uninitialized' warning */
67 int err = 0;
68 BIGNUM *A, *B, *tmp;
69 /* In 'tab', only odd-indexed entries are relevant:
70 * For any odd BIGNUM n,
71 * tab[BN_lsw(n) & 7]
72 * is $(-1)^{(n^2-1)/8}$ (using TeX notation).
73 * Note that the sign of n does not matter.
74 */
75 static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
76
77 bn_check_top(a);
78 bn_check_top(b);
79
80 BN_CTX_start(ctx);
81 A = BN_CTX_get(ctx);
82 B = BN_CTX_get(ctx);
83 if (B == NULL) goto end;
84
85 err = !BN_copy(A, a);
86 if (err) goto end;
87 err = !BN_copy(B, b);
88 if (err) goto end;
89
90 /*
91 * Kronecker symbol, imlemented according to Henri Cohen,
92 * "A Course in Computational Algebraic Number Theory"
93 * (algorithm 1.4.10).
94 */
95
96 /* Cohen's step 1: */
97
98 if (BN_is_zero(B))
99 {
100 ret = BN_abs_is_word(A, 1);
101 goto end;
102 }
103
104 /* Cohen's step 2: */
105
106 if (!BN_is_odd(A) && !BN_is_odd(B))
107 {
108 ret = 0;
109 goto end;
110 }
111
112 /* now B is non-zero */
113 i = 0;
114 while (!BN_is_bit_set(B, i))
115 i++;
116 err = !BN_rshift(B, B, i);
117 if (err) goto end;
118 if (i & 1)
119 {
120 /* i is odd */
121 /* (thus B was even, thus A must be odd!) */
122
123 /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
124 ret = tab[BN_lsw(A) & 7];
125 }
126 else
127 {
128 /* i is even */
129 ret = 1;
130 }
131
132 if (B->neg)
133 {
134 B->neg = 0;
135 if (A->neg)
136 ret = -ret;
137 }
138
139 /* now B is positive and odd, so what remains to be done is
140 * to compute the Jacobi symbol (A/B) and multiply it by 'ret' */
141
142 while (1)
143 {
144 /* Cohen's step 3: */
145
146 /* B is positive and odd */
147
148 if (BN_is_zero(A))
149 {
150 ret = BN_is_one(B) ? ret : 0;
151 goto end;
152 }
153
154 /* now A is non-zero */
155 i = 0;
156 while (!BN_is_bit_set(A, i))
157 i++;
158 err = !BN_rshift(A, A, i);
159 if (err) goto end;
160 if (i & 1)
161 {
162 /* i is odd */
163 /* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
164 ret = ret * tab[BN_lsw(B) & 7];
165 }
166
167 /* Cohen's step 4: */
168 /* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
169 if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
170 ret = -ret;
171
172 /* (A, B) := (B mod |A|, |A|) */
173 err = !BN_nnmod(B, B, A, ctx);
174 if (err) goto end;
175 tmp = A; A = B; B = tmp;
176 tmp->neg = 0;
177 }
178end:
179 BN_CTX_end(ctx);
180 if (err)
181 return -2;
182 else
183 return ret;
184 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
deleted file mode 100644
index 8e5e98e3f2..0000000000
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ /dev/null
@@ -1,491 +0,0 @@
1/* crypto/bn/bn_lcl.h */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#ifndef HEADER_BN_LCL_H
113#define HEADER_BN_LCL_H
114
115#include <openssl/bn.h>
116
117#ifdef __cplusplus
118extern "C" {
119#endif
120
121
122/*
123 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
124 *
125 *
126 * For window size 'w' (w >= 2) and a random 'b' bits exponent,
127 * the number of multiplications is a constant plus on average
128 *
129 * 2^(w-1) + (b-w)/(w+1);
130 *
131 * here 2^(w-1) is for precomputing the table (we actually need
132 * entries only for windows that have the lowest bit set), and
133 * (b-w)/(w+1) is an approximation for the expected number of
134 * w-bit windows, not counting the first one.
135 *
136 * Thus we should use
137 *
138 * w >= 6 if b > 671
139 * w = 5 if 671 > b > 239
140 * w = 4 if 239 > b > 79
141 * w = 3 if 79 > b > 23
142 * w <= 2 if 23 > b
143 *
144 * (with draws in between). Very small exponents are often selected
145 * with low Hamming weight, so we use w = 1 for b <= 23.
146 */
147#if 1
148#define BN_window_bits_for_exponent_size(b) \
149 ((b) > 671 ? 6 : \
150 (b) > 239 ? 5 : \
151 (b) > 79 ? 4 : \
152 (b) > 23 ? 3 : 1)
153#else
154/* Old SSLeay/OpenSSL table.
155 * Maximum window size was 5, so this table differs for b==1024;
156 * but it coincides for other interesting values (b==160, b==512).
157 */
158#define BN_window_bits_for_exponent_size(b) \
159 ((b) > 255 ? 5 : \
160 (b) > 127 ? 4 : \
161 (b) > 17 ? 3 : 1)
162#endif
163
164
165
166/* BN_mod_exp_mont_conttime is based on the assumption that the
167 * L1 data cache line width of the target processor is at least
168 * the following value.
169 */
170#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
171#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
172
173/* Window sizes optimized for fixed window size modular exponentiation
174 * algorithm (BN_mod_exp_mont_consttime).
175 *
176 * To achieve the security goals of BN_mode_exp_mont_consttime, the
177 * maximum size of the window must not exceed
178 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
179 *
180 * Window size thresholds are defined for cache line sizes of 32 and 64,
181 * cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
182 * window size of 7 should only be used on processors that have a 128
183 * byte or greater cache line size.
184 */
185#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
186
187# define BN_window_bits_for_ctime_exponent_size(b) \
188 ((b) > 937 ? 6 : \
189 (b) > 306 ? 5 : \
190 (b) > 89 ? 4 : \
191 (b) > 22 ? 3 : 1)
192# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
193
194#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
195
196# define BN_window_bits_for_ctime_exponent_size(b) \
197 ((b) > 306 ? 5 : \
198 (b) > 89 ? 4 : \
199 (b) > 22 ? 3 : 1)
200# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
201
202#endif
203
204
205/* Pentium pro 16,16,16,32,64 */
206/* Alpha 16,16,16,16.64 */
207#define BN_MULL_SIZE_NORMAL (16) /* 32 */
208#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
209#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
210#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
211#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
212
213#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
214/*
215 * BN_UMULT_HIGH section.
216 *
217 * No, I'm not trying to overwhelm you when stating that the
218 * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
219 * you to be impressed when I say that if the compiler doesn't
220 * support 2*N integer type, then you have to replace every N*N
221 * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
222 * and additions which unavoidably results in severe performance
223 * penalties. Of course provided that the hardware is capable of
224 * producing 2*N result... That's when you normally start
225 * considering assembler implementation. However! It should be
226 * pointed out that some CPUs (most notably Alpha, PowerPC and
227 * upcoming IA-64 family:-) provide *separate* instruction
228 * calculating the upper half of the product placing the result
229 * into a general purpose register. Now *if* the compiler supports
230 * inline assembler, then it's not impossible to implement the
231 * "bignum" routines (and have the compiler optimize 'em)
232 * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
233 * macro is about:-)
234 *
235 * <appro@fy.chalmers.se>
236 */
237# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
238# if defined(__DECC)
239# include <c_asm.h>
240# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
241# elif defined(__GNUC__)
242# define BN_UMULT_HIGH(a,b) ({ \
243 register BN_ULONG ret; \
244 asm ("umulh %1,%2,%0" \
245 : "=r"(ret) \
246 : "r"(a), "r"(b)); \
247 ret; })
248# endif /* compiler */
249# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
250# if defined(__GNUC__)
251# define BN_UMULT_HIGH(a,b) ({ \
252 register BN_ULONG ret; \
253 asm ("mulhdu %0,%1,%2" \
254 : "=r"(ret) \
255 : "r"(a), "r"(b)); \
256 ret; })
257# endif /* compiler */
258# elif (defined(__x86_64) || defined(__x86_64__)) && \
259 (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
260# if defined(__GNUC__)
261# define BN_UMULT_HIGH(a,b) ({ \
262 register BN_ULONG ret,discard; \
263 asm ("mulq %3" \
264 : "=a"(discard),"=d"(ret) \
265 : "a"(a), "g"(b) \
266 : "cc"); \
267 ret; })
268# define BN_UMULT_LOHI(low,high,a,b) \
269 asm ("mulq %3" \
270 : "=a"(low),"=d"(high) \
271 : "a"(a),"g"(b) \
272 : "cc");
273# endif
274# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
275# if defined(_MSC_VER) && _MSC_VER>=1400
276 unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b);
277 unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b,
278 unsigned __int64 *h);
279# pragma intrinsic(__umulh,_umul128)
280# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
281# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
282# endif
283# endif /* cpu */
284#endif /* OPENSSL_NO_ASM */
285
286/*************************************************************
287 * Using the long long type
288 */
289#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
290#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
291
292#ifdef BN_DEBUG_RAND
293#define bn_clear_top2max(a) \
294 { \
295 int ind = (a)->dmax - (a)->top; \
296 BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
297 for (; ind != 0; ind--) \
298 *(++ftl) = 0x0; \
299 }
300#else
301#define bn_clear_top2max(a)
302#endif
303
304#ifdef BN_LLONG
305#define mul_add(r,a,w,c) { \
306 BN_ULLONG t; \
307 t=(BN_ULLONG)w * (a) + (r) + (c); \
308 (r)= Lw(t); \
309 (c)= Hw(t); \
310 }
311
312#define mul(r,a,w,c) { \
313 BN_ULLONG t; \
314 t=(BN_ULLONG)w * (a) + (c); \
315 (r)= Lw(t); \
316 (c)= Hw(t); \
317 }
318
319#define sqr(r0,r1,a) { \
320 BN_ULLONG t; \
321 t=(BN_ULLONG)(a)*(a); \
322 (r0)=Lw(t); \
323 (r1)=Hw(t); \
324 }
325
326#elif defined(BN_UMULT_LOHI)
327#define mul_add(r,a,w,c) { \
328 BN_ULONG high,low,ret,tmp=(a); \
329 ret = (r); \
330 BN_UMULT_LOHI(low,high,w,tmp); \
331 ret += (c); \
332 (c) = (ret<(c))?1:0; \
333 (c) += high; \
334 ret += low; \
335 (c) += (ret<low)?1:0; \
336 (r) = ret; \
337 }
338
339#define mul(r,a,w,c) { \
340 BN_ULONG high,low,ret,ta=(a); \
341 BN_UMULT_LOHI(low,high,w,ta); \
342 ret = low + (c); \
343 (c) = high; \
344 (c) += (ret<low)?1:0; \
345 (r) = ret; \
346 }
347
348#define sqr(r0,r1,a) { \
349 BN_ULONG tmp=(a); \
350 BN_UMULT_LOHI(r0,r1,tmp,tmp); \
351 }
352
353#elif defined(BN_UMULT_HIGH)
354#define mul_add(r,a,w,c) { \
355 BN_ULONG high,low,ret,tmp=(a); \
356 ret = (r); \
357 high= BN_UMULT_HIGH(w,tmp); \
358 ret += (c); \
359 low = (w) * tmp; \
360 (c) = (ret<(c))?1:0; \
361 (c) += high; \
362 ret += low; \
363 (c) += (ret<low)?1:0; \
364 (r) = ret; \
365 }
366
367#define mul(r,a,w,c) { \
368 BN_ULONG high,low,ret,ta=(a); \
369 low = (w) * ta; \
370 high= BN_UMULT_HIGH(w,ta); \
371 ret = low + (c); \
372 (c) = high; \
373 (c) += (ret<low)?1:0; \
374 (r) = ret; \
375 }
376
377#define sqr(r0,r1,a) { \
378 BN_ULONG tmp=(a); \
379 (r0) = tmp * tmp; \
380 (r1) = BN_UMULT_HIGH(tmp,tmp); \
381 }
382
383#else
384/*************************************************************
385 * No long long type
386 */
387
388#define LBITS(a) ((a)&BN_MASK2l)
389#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
390#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
391
392#define LLBITS(a) ((a)&BN_MASKl)
393#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
394#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
395
396#define mul64(l,h,bl,bh) \
397 { \
398 BN_ULONG m,m1,lt,ht; \
399 \
400 lt=l; \
401 ht=h; \
402 m =(bh)*(lt); \
403 lt=(bl)*(lt); \
404 m1=(bl)*(ht); \
405 ht =(bh)*(ht); \
406 m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
407 ht+=HBITS(m); \
408 m1=L2HBITS(m); \
409 lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
410 (l)=lt; \
411 (h)=ht; \
412 }
413
414#define sqr64(lo,ho,in) \
415 { \
416 BN_ULONG l,h,m; \
417 \
418 h=(in); \
419 l=LBITS(h); \
420 h=HBITS(h); \
421 m =(l)*(h); \
422 l*=l; \
423 h*=h; \
424 h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
425 m =(m&BN_MASK2l)<<(BN_BITS4+1); \
426 l=(l+m)&BN_MASK2; if (l < m) h++; \
427 (lo)=l; \
428 (ho)=h; \
429 }
430
431#define mul_add(r,a,bl,bh,c) { \
432 BN_ULONG l,h; \
433 \
434 h= (a); \
435 l=LBITS(h); \
436 h=HBITS(h); \
437 mul64(l,h,(bl),(bh)); \
438 \
439 /* non-multiply part */ \
440 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
441 (c)=(r); \
442 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
443 (c)=h&BN_MASK2; \
444 (r)=l; \
445 }
446
447#define mul(r,a,bl,bh,c) { \
448 BN_ULONG l,h; \
449 \
450 h= (a); \
451 l=LBITS(h); \
452 h=HBITS(h); \
453 mul64(l,h,(bl),(bh)); \
454 \
455 /* non-multiply part */ \
456 l+=(c); if ((l&BN_MASK2) < (c)) h++; \
457 (c)=h&BN_MASK2; \
458 (r)=l&BN_MASK2; \
459 }
460#endif /* !BN_LLONG */
461
462void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
463void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
464void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
465void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
466void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
467void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
468int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
469int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
470 int cl, int dl);
471void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
472 int dna,int dnb,BN_ULONG *t);
473void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
474 int n,int tna,int tnb,BN_ULONG *t);
475void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
476void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
477void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
478 BN_ULONG *t);
479void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
480 BN_ULONG *t);
481BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
482 int cl, int dl);
483BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
484 int cl, int dl);
485int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
486
487#ifdef __cplusplus
488}
489#endif
490
491#endif
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
deleted file mode 100644
index 5470fbe6ef..0000000000
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ /dev/null
@@ -1,845 +0,0 @@
1/* crypto/bn/bn_lib.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
65#include <limits.h>
66#include <stdio.h>
67#include "cryptlib.h"
68#include "bn_lcl.h"
69
70const char BN_version[]="Big Number" OPENSSL_VERSION_PTEXT;
71
72/* This stuff appears to be completely unused, so is deprecated */
73#ifndef OPENSSL_NO_DEPRECATED
74/* For a 32 bit machine
75 * 2 - 4 == 128
76 * 3 - 8 == 256
77 * 4 - 16 == 512
78 * 5 - 32 == 1024
79 * 6 - 64 == 2048
80 * 7 - 128 == 4096
81 * 8 - 256 == 8192
82 */
83static int bn_limit_bits=0;
84static int bn_limit_num=8; /* (1<<bn_limit_bits) */
85static int bn_limit_bits_low=0;
86static int bn_limit_num_low=8; /* (1<<bn_limit_bits_low) */
87static int bn_limit_bits_high=0;
88static int bn_limit_num_high=8; /* (1<<bn_limit_bits_high) */
89static int bn_limit_bits_mont=0;
90static int bn_limit_num_mont=8; /* (1<<bn_limit_bits_mont) */
91
92void BN_set_params(int mult, int high, int low, int mont)
93 {
94 if (mult >= 0)
95 {
96 if (mult > (int)(sizeof(int)*8)-1)
97 mult=sizeof(int)*8-1;
98 bn_limit_bits=mult;
99 bn_limit_num=1<<mult;
100 }
101 if (high >= 0)
102 {
103 if (high > (int)(sizeof(int)*8)-1)
104 high=sizeof(int)*8-1;
105 bn_limit_bits_high=high;
106 bn_limit_num_high=1<<high;
107 }
108 if (low >= 0)
109 {
110 if (low > (int)(sizeof(int)*8)-1)
111 low=sizeof(int)*8-1;
112 bn_limit_bits_low=low;
113 bn_limit_num_low=1<<low;
114 }
115 if (mont >= 0)
116 {
117 if (mont > (int)(sizeof(int)*8)-1)
118 mont=sizeof(int)*8-1;
119 bn_limit_bits_mont=mont;
120 bn_limit_num_mont=1<<mont;
121 }
122 }
123
124int BN_get_params(int which)
125 {
126 if (which == 0) return(bn_limit_bits);
127 else if (which == 1) return(bn_limit_bits_high);
128 else if (which == 2) return(bn_limit_bits_low);
129 else if (which == 3) return(bn_limit_bits_mont);
130 else return(0);
131 }
132#endif
133
134const BIGNUM *BN_value_one(void)
135 {
136 static const BN_ULONG data_one=1L;
137 static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA};
138
139 return(&const_one);
140 }
141
142char *BN_options(void)
143 {
144 static int init=0;
145 static char data[16];
146
147 if (!init)
148 {
149 init++;
150#ifdef BN_LLONG
151 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
152 (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
153#else
154 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
155 (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
156#endif
157 }
158 return(data);
159 }
160
161int BN_num_bits_word(BN_ULONG l)
162 {
163 static const unsigned char bits[256]={
164 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
165 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
166 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
167 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
168 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
169 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
170 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
171 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
172 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
173 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
174 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
175 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
176 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
177 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
178 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
179 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
180 };
181
182#if defined(SIXTY_FOUR_BIT_LONG)
183 if (l & 0xffffffff00000000L)
184 {
185 if (l & 0xffff000000000000L)
186 {
187 if (l & 0xff00000000000000L)
188 {
189 return(bits[(int)(l>>56)]+56);
190 }
191 else return(bits[(int)(l>>48)]+48);
192 }
193 else
194 {
195 if (l & 0x0000ff0000000000L)
196 {
197 return(bits[(int)(l>>40)]+40);
198 }
199 else return(bits[(int)(l>>32)]+32);
200 }
201 }
202 else
203#else
204#ifdef SIXTY_FOUR_BIT
205 if (l & 0xffffffff00000000LL)
206 {
207 if (l & 0xffff000000000000LL)
208 {
209 if (l & 0xff00000000000000LL)
210 {
211 return(bits[(int)(l>>56)]+56);
212 }
213 else return(bits[(int)(l>>48)]+48);
214 }
215 else
216 {
217 if (l & 0x0000ff0000000000LL)
218 {
219 return(bits[(int)(l>>40)]+40);
220 }
221 else return(bits[(int)(l>>32)]+32);
222 }
223 }
224 else
225#endif
226#endif
227 {
228#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
229 if (l & 0xffff0000L)
230 {
231 if (l & 0xff000000L)
232 return(bits[(int)(l>>24L)]+24);
233 else return(bits[(int)(l>>16L)]+16);
234 }
235 else
236#endif
237 {
238#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
239 if (l & 0xff00L)
240 return(bits[(int)(l>>8)]+8);
241 else
242#endif
243 return(bits[(int)(l )] );
244 }
245 }
246 }
247
248int BN_num_bits(const BIGNUM *a)
249 {
250 int i = a->top - 1;
251 bn_check_top(a);
252
253 if (BN_is_zero(a)) return 0;
254 return ((i*BN_BITS2) + BN_num_bits_word(a->d[i]));
255 }
256
257void BN_clear_free(BIGNUM *a)
258 {
259 int i;
260
261 if (a == NULL) return;
262 bn_check_top(a);
263 if (a->d != NULL)
264 {
265 OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0]));
266 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
267 OPENSSL_free(a->d);
268 }
269 i=BN_get_flags(a,BN_FLG_MALLOCED);
270 OPENSSL_cleanse(a,sizeof(BIGNUM));
271 if (i)
272 OPENSSL_free(a);
273 }
274
275void BN_free(BIGNUM *a)
276 {
277 if (a == NULL) return;
278 bn_check_top(a);
279 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
280 OPENSSL_free(a->d);
281 if (a->flags & BN_FLG_MALLOCED)
282 OPENSSL_free(a);
283 else
284 {
285#ifndef OPENSSL_NO_DEPRECATED
286 a->flags|=BN_FLG_FREE;
287#endif
288 a->d = NULL;
289 }
290 }
291
292void BN_init(BIGNUM *a)
293 {
294 memset(a,0,sizeof(BIGNUM));
295 bn_check_top(a);
296 }
297
298BIGNUM *BN_new(void)
299 {
300 BIGNUM *ret;
301
302 if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL)
303 {
304 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
305 return(NULL);
306 }
307 ret->flags=BN_FLG_MALLOCED;
308 ret->top=0;
309 ret->neg=0;
310 ret->dmax=0;
311 ret->d=NULL;
312 bn_check_top(ret);
313 return(ret);
314 }
315
316/* This is used both by bn_expand2() and bn_dup_expand() */
317/* The caller MUST check that words > b->dmax before calling this */
318static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
319 {
320 BN_ULONG *A,*a = NULL;
321 const BN_ULONG *B;
322 int i;
323
324 bn_check_top(b);
325
326 if (words > (INT_MAX/(4*BN_BITS2)))
327 {
328 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG);
329 return NULL;
330 }
331 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
332 {
333 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
334 return(NULL);
335 }
336 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*words);
337 if (A == NULL)
338 {
339 BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
340 return(NULL);
341 }
342#if 1
343 B=b->d;
344 /* Check if the previous number needs to be copied */
345 if (B != NULL)
346 {
347 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
348 {
349 /*
350 * The fact that the loop is unrolled
351 * 4-wise is a tribute to Intel. It's
352 * the one that doesn't have enough
353 * registers to accomodate more data.
354 * I'd unroll it 8-wise otherwise:-)
355 *
356 * <appro@fy.chalmers.se>
357 */
358 BN_ULONG a0,a1,a2,a3;
359 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
360 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
361 }
362 switch (b->top&3)
363 {
364 case 3: A[2]=B[2];
365 case 2: A[1]=B[1];
366 case 1: A[0]=B[0];
367 case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
368 * the switch table by doing a=top&3; a--; goto jump_table[a];
369 * which fails for top== 0 */
370 ;
371 }
372 }
373
374#else
375 memset(A,0,sizeof(BN_ULONG)*words);
376 memcpy(A,b->d,sizeof(b->d[0])*b->top);
377#endif
378
379 return(a);
380 }
381
382/* This is an internal function that can be used instead of bn_expand2()
383 * when there is a need to copy BIGNUMs instead of only expanding the
384 * data part, while still expanding them.
385 * Especially useful when needing to expand BIGNUMs that are declared
386 * 'const' and should therefore not be changed.
387 * The reason to use this instead of a BN_dup() followed by a bn_expand2()
388 * is memory allocation overhead. A BN_dup() followed by a bn_expand2()
389 * will allocate new memory for the BIGNUM data twice, and free it once,
390 * while bn_dup_expand() makes sure allocation is made only once.
391 */
392
393#ifndef OPENSSL_NO_DEPRECATED
394BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
395 {
396 BIGNUM *r = NULL;
397
398 bn_check_top(b);
399
400 /* This function does not work if
401 * words <= b->dmax && top < words
402 * because BN_dup() does not preserve 'dmax'!
403 * (But bn_dup_expand() is not used anywhere yet.)
404 */
405
406 if (words > b->dmax)
407 {
408 BN_ULONG *a = bn_expand_internal(b, words);
409
410 if (a)
411 {
412 r = BN_new();
413 if (r)
414 {
415 r->top = b->top;
416 r->dmax = words;
417 r->neg = b->neg;
418 r->d = a;
419 }
420 else
421 {
422 /* r == NULL, BN_new failure */
423 OPENSSL_free(a);
424 }
425 }
426 /* If a == NULL, there was an error in allocation in
427 bn_expand_internal(), and NULL should be returned */
428 }
429 else
430 {
431 r = BN_dup(b);
432 }
433
434 bn_check_top(r);
435 return r;
436 }
437#endif
438
439/* This is an internal function that should not be used in applications.
440 * It ensures that 'b' has enough room for a 'words' word number
441 * and initialises any unused part of b->d with leading zeros.
442 * It is mostly used by the various BIGNUM routines. If there is an error,
443 * NULL is returned. If not, 'b' is returned. */
444
445BIGNUM *bn_expand2(BIGNUM *b, int words)
446 {
447 bn_check_top(b);
448
449 if (words > b->dmax)
450 {
451 BN_ULONG *a = bn_expand_internal(b, words);
452 if(!a) return NULL;
453 if(b->d) OPENSSL_free(b->d);
454 b->d=a;
455 b->dmax=words;
456 }
457
458/* None of this should be necessary because of what b->top means! */
459#if 0
460 /* NB: bn_wexpand() calls this only if the BIGNUM really has to grow */
461 if (b->top < b->dmax)
462 {
463 int i;
464 BN_ULONG *A = &(b->d[b->top]);
465 for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8)
466 {
467 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
468 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
469 }
470 for (i=(b->dmax - b->top)&7; i>0; i--,A++)
471 A[0]=0;
472 assert(A == &(b->d[b->dmax]));
473 }
474#endif
475 bn_check_top(b);
476 return b;
477 }
478
479BIGNUM *BN_dup(const BIGNUM *a)
480 {
481 BIGNUM *t;
482
483 if (a == NULL) return NULL;
484 bn_check_top(a);
485
486 t = BN_new();
487 if (t == NULL) return NULL;
488 if(!BN_copy(t, a))
489 {
490 BN_free(t);
491 return NULL;
492 }
493 bn_check_top(t);
494 return t;
495 }
496
497BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
498 {
499 int i;
500 BN_ULONG *A;
501 const BN_ULONG *B;
502
503 bn_check_top(b);
504
505 if (a == b) return(a);
506 if (bn_wexpand(a,b->top) == NULL) return(NULL);
507
508#if 1
509 A=a->d;
510 B=b->d;
511 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
512 {
513 BN_ULONG a0,a1,a2,a3;
514 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
515 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
516 }
517 switch (b->top&3)
518 {
519 case 3: A[2]=B[2];
520 case 2: A[1]=B[1];
521 case 1: A[0]=B[0];
522 case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
523 }
524#else
525 memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
526#endif
527
528 a->top=b->top;
529 a->neg=b->neg;
530 bn_check_top(a);
531 return(a);
532 }
533
534void BN_swap(BIGNUM *a, BIGNUM *b)
535 {
536 int flags_old_a, flags_old_b;
537 BN_ULONG *tmp_d;
538 int tmp_top, tmp_dmax, tmp_neg;
539
540 bn_check_top(a);
541 bn_check_top(b);
542
543 flags_old_a = a->flags;
544 flags_old_b = b->flags;
545
546 tmp_d = a->d;
547 tmp_top = a->top;
548 tmp_dmax = a->dmax;
549 tmp_neg = a->neg;
550
551 a->d = b->d;
552 a->top = b->top;
553 a->dmax = b->dmax;
554 a->neg = b->neg;
555
556 b->d = tmp_d;
557 b->top = tmp_top;
558 b->dmax = tmp_dmax;
559 b->neg = tmp_neg;
560
561 a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA);
562 b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA);
563 bn_check_top(a);
564 bn_check_top(b);
565 }
566
567void BN_clear(BIGNUM *a)
568 {
569 bn_check_top(a);
570 if (a->d != NULL)
571 memset(a->d,0,a->dmax*sizeof(a->d[0]));
572 a->top=0;
573 a->neg=0;
574 }
575
576BN_ULONG BN_get_word(const BIGNUM *a)
577 {
578 if (a->top > 1)
579 return BN_MASK2;
580 else if (a->top == 1)
581 return a->d[0];
582 /* a->top == 0 */
583 return 0;
584 }
585
586int BN_set_word(BIGNUM *a, BN_ULONG w)
587 {
588 bn_check_top(a);
589 if (bn_expand(a,(int)sizeof(BN_ULONG)*8) == NULL) return(0);
590 a->neg = 0;
591 a->d[0] = w;
592 a->top = (w ? 1 : 0);
593 bn_check_top(a);
594 return(1);
595 }
596
597BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
598 {
599 unsigned int i,m;
600 unsigned int n;
601 BN_ULONG l;
602 BIGNUM *bn = NULL;
603
604 if (ret == NULL)
605 ret = bn = BN_new();
606 if (ret == NULL) return(NULL);
607 bn_check_top(ret);
608 l=0;
609 n=len;
610 if (n == 0)
611 {
612 ret->top=0;
613 return(ret);
614 }
615 i=((n-1)/BN_BYTES)+1;
616 m=((n-1)%(BN_BYTES));
617 if (bn_wexpand(ret, (int)i) == NULL)
618 {
619 if (bn) BN_free(bn);
620 return NULL;
621 }
622 ret->top=i;
623 ret->neg=0;
624 while (n--)
625 {
626 l=(l<<8L)| *(s++);
627 if (m-- == 0)
628 {
629 ret->d[--i]=l;
630 l=0;
631 m=BN_BYTES-1;
632 }
633 }
634 /* need to call this due to clear byte at top if avoiding
635 * having the top bit set (-ve number) */
636 bn_correct_top(ret);
637 return(ret);
638 }
639
640/* ignore negative */
641int BN_bn2bin(const BIGNUM *a, unsigned char *to)
642 {
643 int n,i;
644 BN_ULONG l;
645
646 bn_check_top(a);
647 n=i=BN_num_bytes(a);
648 while (i--)
649 {
650 l=a->d[i/BN_BYTES];
651 *(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff;
652 }
653 return(n);
654 }
655
656int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
657 {
658 int i;
659 BN_ULONG t1,t2,*ap,*bp;
660
661 bn_check_top(a);
662 bn_check_top(b);
663
664 i=a->top-b->top;
665 if (i != 0) return(i);
666 ap=a->d;
667 bp=b->d;
668 for (i=a->top-1; i>=0; i--)
669 {
670 t1= ap[i];
671 t2= bp[i];
672 if (t1 != t2)
673 return((t1 > t2) ? 1 : -1);
674 }
675 return(0);
676 }
677
678int BN_cmp(const BIGNUM *a, const BIGNUM *b)
679 {
680 int i;
681 int gt,lt;
682 BN_ULONG t1,t2;
683
684 if ((a == NULL) || (b == NULL))
685 {
686 if (a != NULL)
687 return(-1);
688 else if (b != NULL)
689 return(1);
690 else
691 return(0);
692 }
693
694 bn_check_top(a);
695 bn_check_top(b);
696
697 if (a->neg != b->neg)
698 {
699 if (a->neg)
700 return(-1);
701 else return(1);
702 }
703 if (a->neg == 0)
704 { gt=1; lt= -1; }
705 else { gt= -1; lt=1; }
706
707 if (a->top > b->top) return(gt);
708 if (a->top < b->top) return(lt);
709 for (i=a->top-1; i>=0; i--)
710 {
711 t1=a->d[i];
712 t2=b->d[i];
713 if (t1 > t2) return(gt);
714 if (t1 < t2) return(lt);
715 }
716 return(0);
717 }
718
719int BN_set_bit(BIGNUM *a, int n)
720 {
721 int i,j,k;
722
723 if (n < 0)
724 return 0;
725
726 i=n/BN_BITS2;
727 j=n%BN_BITS2;
728 if (a->top <= i)
729 {
730 if (bn_wexpand(a,i+1) == NULL) return(0);
731 for(k=a->top; k<i+1; k++)
732 a->d[k]=0;
733 a->top=i+1;
734 }
735
736 a->d[i]|=(((BN_ULONG)1)<<j);
737 bn_check_top(a);
738 return(1);
739 }
740
741int BN_clear_bit(BIGNUM *a, int n)
742 {
743 int i,j;
744
745 bn_check_top(a);
746 if (n < 0) return 0;
747
748 i=n/BN_BITS2;
749 j=n%BN_BITS2;
750 if (a->top <= i) return(0);
751
752 a->d[i]&=(~(((BN_ULONG)1)<<j));
753 bn_correct_top(a);
754 return(1);
755 }
756
757int BN_is_bit_set(const BIGNUM *a, int n)
758 {
759 int i,j;
760
761 bn_check_top(a);
762 if (n < 0) return 0;
763 i=n/BN_BITS2;
764 j=n%BN_BITS2;
765 if (a->top <= i) return 0;
766 return (int)(((a->d[i])>>j)&((BN_ULONG)1));
767 }
768
769int BN_mask_bits(BIGNUM *a, int n)
770 {
771 int b,w;
772
773 bn_check_top(a);
774 if (n < 0) return 0;
775
776 w=n/BN_BITS2;
777 b=n%BN_BITS2;
778 if (w >= a->top) return 0;
779 if (b == 0)
780 a->top=w;
781 else
782 {
783 a->top=w+1;
784 a->d[w]&= ~(BN_MASK2<<b);
785 }
786 bn_correct_top(a);
787 return(1);
788 }
789
790void BN_set_negative(BIGNUM *a, int b)
791 {
792 if (b && !BN_is_zero(a))
793 a->neg = 1;
794 else
795 a->neg = 0;
796 }
797
798int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
799 {
800 int i;
801 BN_ULONG aa,bb;
802
803 aa=a[n-1];
804 bb=b[n-1];
805 if (aa != bb) return((aa > bb)?1:-1);
806 for (i=n-2; i>=0; i--)
807 {
808 aa=a[i];
809 bb=b[i];
810 if (aa != bb) return((aa > bb)?1:-1);
811 }
812 return(0);
813 }
814
815/* Here follows a specialised variants of bn_cmp_words(). It has the
816 property of performing the operation on arrays of different sizes.
817 The sizes of those arrays is expressed through cl, which is the
818 common length ( basicall, min(len(a),len(b)) ), and dl, which is the
819 delta between the two lengths, calculated as len(a)-len(b).
820 All lengths are the number of BN_ULONGs... */
821
822int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
823 int cl, int dl)
824 {
825 int n,i;
826 n = cl-1;
827
828 if (dl < 0)
829 {
830 for (i=dl; i<0; i++)
831 {
832 if (b[n-i] != 0)
833 return -1; /* a < b */
834 }
835 }
836 if (dl > 0)
837 {
838 for (i=dl; i>0; i--)
839 {
840 if (a[n+i] != 0)
841 return 1; /* a > b */
842 }
843 }
844 return bn_cmp_words(a,b,cl);
845 }
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
deleted file mode 100644
index 77d6ddb91a..0000000000
--- a/src/lib/libcrypto/bn/bn_mod.c
+++ /dev/null
@@ -1,301 +0,0 @@
1/* crypto/bn/bn_mod.c */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
58 * All rights reserved.
59 *
60 * This package is an SSL implementation written
61 * by Eric Young (eay@cryptsoft.com).
62 * The implementation was written so as to conform with Netscapes SSL.
63 *
64 * This library is free for commercial and non-commercial use as long as
65 * the following conditions are aheared to. The following conditions
66 * apply to all code found in this distribution, be it the RC4, RSA,
67 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
68 * included with this distribution is covered by the same copyright terms
69 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
70 *
71 * Copyright remains Eric Young's, and as such any Copyright notices in
72 * the code are not to be removed.
73 * If this package is used in a product, Eric Young should be given attribution
74 * as the author of the parts of the library used.
75 * This can be in the form of a textual message at program startup or
76 * in documentation (online or textual) provided with the package.
77 *
78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions
80 * are met:
81 * 1. Redistributions of source code must retain the copyright
82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement:
88 * "This product includes cryptographic software written by
89 * Eric Young (eay@cryptsoft.com)"
90 * The word 'cryptographic' can be left out if the rouines from the library
91 * being used are not cryptographic related :-).
92 * 4. If you include any Windows specific code (or a derivative thereof) from
93 * the apps directory (application code) you must include an acknowledgement:
94 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
95 *
96 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
97 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
98 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
99 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
100 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
101 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
102 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
103 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
104 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
105 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
106 * SUCH DAMAGE.
107 *
108 * The licence and distribution terms for any publically available version or
109 * derivative of this code cannot be changed. i.e. this code cannot simply be
110 * copied and put under another distribution licence
111 * [including the GNU Public Licence.]
112 */
113
114#include "cryptlib.h"
115#include "bn_lcl.h"
116
117
118#if 0 /* now just a #define */
119int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
120 {
121 return(BN_div(NULL,rem,m,d,ctx));
122 /* note that rem->neg == m->neg (unless the remainder is zero) */
123 }
124#endif
125
126
127int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
128 {
129 /* like BN_mod, but returns non-negative remainder
130 * (i.e., 0 <= r < |d| always holds) */
131
132 if (!(BN_mod(r,m,d,ctx)))
133 return 0;
134 if (!r->neg)
135 return 1;
136 /* now -|d| < r < 0, so we have to set r := r + |d| */
137 return (d->neg ? BN_sub : BN_add)(r, r, d);
138}
139
140
141int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
142 {
143 if (!BN_add(r, a, b)) return 0;
144 return BN_nnmod(r, r, m, ctx);
145 }
146
147
148/* BN_mod_add variant that may be used if both a and b are non-negative
149 * and less than m */
150int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
151 {
152 if (!BN_uadd(r, a, b)) return 0;
153 if (BN_ucmp(r, m) >= 0)
154 return BN_usub(r, r, m);
155 return 1;
156 }
157
158
159int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
160 {
161 if (!BN_sub(r, a, b)) return 0;
162 return BN_nnmod(r, r, m, ctx);
163 }
164
165
166/* BN_mod_sub variant that may be used if both a and b are non-negative
167 * and less than m */
168int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
169 {
170 if (!BN_sub(r, a, b)) return 0;
171 if (r->neg)
172 return BN_add(r, r, m);
173 return 1;
174 }
175
176
177/* slow but works */
178int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
179 BN_CTX *ctx)
180 {
181 BIGNUM *t;
182 int ret=0;
183
184 bn_check_top(a);
185 bn_check_top(b);
186 bn_check_top(m);
187
188 BN_CTX_start(ctx);
189 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
190 if (a == b)
191 { if (!BN_sqr(t,a,ctx)) goto err; }
192 else
193 { if (!BN_mul(t,a,b,ctx)) goto err; }
194 if (!BN_nnmod(r,t,m,ctx)) goto err;
195 bn_check_top(r);
196 ret=1;
197err:
198 BN_CTX_end(ctx);
199 return(ret);
200 }
201
202
203int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
204 {
205 if (!BN_sqr(r, a, ctx)) return 0;
206 /* r->neg == 0, thus we don't need BN_nnmod */
207 return BN_mod(r, r, m, ctx);
208 }
209
210
211int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
212 {
213 if (!BN_lshift1(r, a)) return 0;
214 bn_check_top(r);
215 return BN_nnmod(r, r, m, ctx);
216 }
217
218
219/* BN_mod_lshift1 variant that may be used if a is non-negative
220 * and less than m */
221int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
222 {
223 if (!BN_lshift1(r, a)) return 0;
224 bn_check_top(r);
225 if (BN_cmp(r, m) >= 0)
226 return BN_sub(r, r, m);
227 return 1;
228 }
229
230
231int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
232 {
233 BIGNUM *abs_m = NULL;
234 int ret;
235
236 if (!BN_nnmod(r, a, m, ctx)) return 0;
237
238 if (m->neg)
239 {
240 abs_m = BN_dup(m);
241 if (abs_m == NULL) return 0;
242 abs_m->neg = 0;
243 }
244
245 ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
246 bn_check_top(r);
247
248 if (abs_m)
249 BN_free(abs_m);
250 return ret;
251 }
252
253
254/* BN_mod_lshift variant that may be used if a is non-negative
255 * and less than m */
256int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
257 {
258 if (r != a)
259 {
260 if (BN_copy(r, a) == NULL) return 0;
261 }
262
263 while (n > 0)
264 {
265 int max_shift;
266
267 /* 0 < r < m */
268 max_shift = BN_num_bits(m) - BN_num_bits(r);
269 /* max_shift >= 0 */
270
271 if (max_shift < 0)
272 {
273 BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
274 return 0;
275 }
276
277 if (max_shift > n)
278 max_shift = n;
279
280 if (max_shift)
281 {
282 if (!BN_lshift(r, r, max_shift)) return 0;
283 n -= max_shift;
284 }
285 else
286 {
287 if (!BN_lshift1(r, r)) return 0;
288 --n;
289 }
290
291 /* BN_num_bits(r) <= BN_num_bits(m) */
292
293 if (BN_cmp(r, m) >= 0)
294 {
295 if (!BN_sub(r, r, m)) return 0;
296 }
297 }
298 bn_check_top(r);
299
300 return 1;
301 }
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
deleted file mode 100644
index 1a866880f5..0000000000
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/* crypto/bn/bn_mont.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112/*
113 * Details about Montgomery multiplication algorithms can be found at
114 * http://security.ece.orst.edu/publications.html, e.g.
115 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
116 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
117 */
118
119#include <stdio.h>
120#include "cryptlib.h"
121#include "bn_lcl.h"
122
123#define MONT_WORD /* use the faster word-based algorithm */
124
125#ifdef MONT_WORD
126static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
127#endif
128
129int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
130 BN_MONT_CTX *mont, BN_CTX *ctx)
131 {
132 BIGNUM *tmp;
133 int ret=0;
134#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
135 int num = mont->N.top;
136
137 if (num>1 && a->top==num && b->top==num)
138 {
139 if (bn_wexpand(r,num) == NULL) return(0);
140 if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
141 {
142 r->neg = a->neg^b->neg;
143 r->top = num;
144 bn_correct_top(r);
145 return(1);
146 }
147 }
148#endif
149
150 BN_CTX_start(ctx);
151 tmp = BN_CTX_get(ctx);
152 if (tmp == NULL) goto err;
153
154 bn_check_top(tmp);
155 if (a == b)
156 {
157 if (!BN_sqr(tmp,a,ctx)) goto err;
158 }
159 else
160 {
161 if (!BN_mul(tmp,a,b,ctx)) goto err;
162 }
163 /* reduce from aRR to aR */
164#ifdef MONT_WORD
165 if (!BN_from_montgomery_word(r,tmp,mont)) goto err;
166#else
167 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
168#endif
169 bn_check_top(r);
170 ret=1;
171err:
172 BN_CTX_end(ctx);
173 return(ret);
174 }
175
176#ifdef MONT_WORD
177static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
178 {
179 BIGNUM *n;
180 BN_ULONG *ap,*np,*rp,n0,v,*nrp;
181 int al,nl,max,i,x,ri;
182
183 n= &(mont->N);
184 /* mont->ri is the size of mont->N in bits (rounded up
185 to the word size) */
186 al=ri=mont->ri/BN_BITS2;
187
188 nl=n->top;
189 if ((al == 0) || (nl == 0)) { ret->top=0; return(1); }
190
191 max=(nl+al+1); /* allow for overflow (no?) XXX */
192 if (bn_wexpand(r,max) == NULL) return(0);
193
194 r->neg^=n->neg;
195 np=n->d;
196 rp=r->d;
197 nrp= &(r->d[nl]);
198
199 /* clear the top words of T */
200#if 1
201 for (i=r->top; i<max; i++) /* memset? XXX */
202 r->d[i]=0;
203#else
204 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
205#endif
206
207 r->top=max;
208 n0=mont->n0[0];
209
210#ifdef BN_COUNT
211 fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
212#endif
213 for (i=0; i<nl; i++)
214 {
215#ifdef __TANDEM
216 {
217 long long t1;
218 long long t2;
219 long long t3;
220 t1 = rp[0] * (n0 & 0177777);
221 t2 = 037777600000l;
222 t2 = n0 & t2;
223 t3 = rp[0] & 0177777;
224 t2 = (t3 * t2) & BN_MASK2;
225 t1 = t1 + t2;
226 v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
227 }
228#else
229 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
230#endif
231 nrp++;
232 rp++;
233 if (((nrp[-1]+=v)&BN_MASK2) >= v)
234 continue;
235 else
236 {
237 if (((++nrp[0])&BN_MASK2) != 0) continue;
238 if (((++nrp[1])&BN_MASK2) != 0) continue;
239 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
240 }
241 }
242 bn_correct_top(r);
243
244 /* mont->ri will be a multiple of the word size and below code
245 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
246 if (r->top <= ri)
247 {
248 ret->top=0;
249 return(1);
250 }
251 al=r->top-ri;
252
253#define BRANCH_FREE 1
254#if BRANCH_FREE
255 if (bn_wexpand(ret,ri) == NULL) return(0);
256 x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
257 ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
258 ret->neg=r->neg;
259
260 rp=ret->d;
261 ap=&(r->d[ri]);
262
263 {
264 size_t m1,m2;
265
266 v=bn_sub_words(rp,ap,np,ri);
267 /* this ----------------^^ works even in al<ri case
268 * thanks to zealous zeroing of top of the vector in the
269 * beginning. */
270
271 /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
272 /* in other words if subtraction result is real, then
273 * trick unconditional memcpy below to perform in-place
274 * "refresh" instead of actual copy. */
275 m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
276 m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
277 m1|=m2; /* (al!=ri) */
278 m1|=(0-(size_t)v); /* (al!=ri || v) */
279 m1&=~m2; /* (al!=ri || v) && !al>ri */
280 nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m1)|((PTR_SIZE_INT)ap&m1));
281 }
282
283 /* 'i<ri' is chosen to eliminate dependency on input data, even
284 * though it results in redundant copy in al<ri case. */
285 for (i=0,ri-=4; i<ri; i+=4)
286 {
287 BN_ULONG t1,t2,t3,t4;
288
289 t1=nrp[i+0];
290 t2=nrp[i+1];
291 t3=nrp[i+2]; ap[i+0]=0;
292 t4=nrp[i+3]; ap[i+1]=0;
293 rp[i+0]=t1; ap[i+2]=0;
294 rp[i+1]=t2; ap[i+3]=0;
295 rp[i+2]=t3;
296 rp[i+3]=t4;
297 }
298 for (ri+=4; i<ri; i++)
299 rp[i]=nrp[i], ap[i]=0;
300 bn_correct_top(r);
301 bn_correct_top(ret);
302#else
303 if (bn_wexpand(ret,al) == NULL) return(0);
304 ret->top=al;
305 ret->neg=r->neg;
306
307 rp=ret->d;
308 ap=&(r->d[ri]);
309 al-=4;
310 for (i=0; i<al; i+=4)
311 {
312 BN_ULONG t1,t2,t3,t4;
313
314 t1=ap[i+0];
315 t2=ap[i+1];
316 t3=ap[i+2];
317 t4=ap[i+3];
318 rp[i+0]=t1;
319 rp[i+1]=t2;
320 rp[i+2]=t3;
321 rp[i+3]=t4;
322 }
323 al+=4;
324 for (; i<al; i++)
325 rp[i]=ap[i];
326
327 if (BN_ucmp(ret, &(mont->N)) >= 0)
328 {
329 if (!BN_usub(ret,ret,&(mont->N))) return(0);
330 }
331#endif
332 bn_check_top(ret);
333
334 return(1);
335 }
336#endif /* MONT_WORD */
337
338int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
339 BN_CTX *ctx)
340 {
341 int retn=0;
342#ifdef MONT_WORD
343 BIGNUM *t;
344
345 BN_CTX_start(ctx);
346 if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
347 retn = BN_from_montgomery_word(ret,t,mont);
348 BN_CTX_end(ctx);
349#else /* !MONT_WORD */
350 BIGNUM *t1,*t2;
351
352 BN_CTX_start(ctx);
353 t1 = BN_CTX_get(ctx);
354 t2 = BN_CTX_get(ctx);
355 if (t1 == NULL || t2 == NULL) goto err;
356
357 if (!BN_copy(t1,a)) goto err;
358 BN_mask_bits(t1,mont->ri);
359
360 if (!BN_mul(t2,t1,&mont->Ni,ctx)) goto err;
361 BN_mask_bits(t2,mont->ri);
362
363 if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
364 if (!BN_add(t2,a,t1)) goto err;
365 if (!BN_rshift(ret,t2,mont->ri)) goto err;
366
367 if (BN_ucmp(ret, &(mont->N)) >= 0)
368 {
369 if (!BN_usub(ret,ret,&(mont->N))) goto err;
370 }
371 retn=1;
372 bn_check_top(ret);
373 err:
374 BN_CTX_end(ctx);
375#endif /* MONT_WORD */
376 return(retn);
377 }
378
379BN_MONT_CTX *BN_MONT_CTX_new(void)
380 {
381 BN_MONT_CTX *ret;
382
383 if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
384 return(NULL);
385
386 BN_MONT_CTX_init(ret);
387 ret->flags=BN_FLG_MALLOCED;
388 return(ret);
389 }
390
391void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
392 {
393 ctx->ri=0;
394 BN_init(&(ctx->RR));
395 BN_init(&(ctx->N));
396 BN_init(&(ctx->Ni));
397 ctx->n0[0] = ctx->n0[1] = 0;
398 ctx->flags=0;
399 }
400
401void BN_MONT_CTX_free(BN_MONT_CTX *mont)
402 {
403 if(mont == NULL)
404 return;
405
406 BN_free(&(mont->RR));
407 BN_free(&(mont->N));
408 BN_free(&(mont->Ni));
409 if (mont->flags & BN_FLG_MALLOCED)
410 OPENSSL_free(mont);
411 }
412
413int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
414 {
415 int ret = 0;
416 BIGNUM *Ri,*R;
417
418 BN_CTX_start(ctx);
419 if((Ri = BN_CTX_get(ctx)) == NULL) goto err;
420 R= &(mont->RR); /* grab RR as a temp */
421 if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */
422 mont->N.neg = 0;
423
424#ifdef MONT_WORD
425 {
426 BIGNUM tmod;
427 BN_ULONG buf[2];
428
429 BN_init(&tmod);
430 tmod.d=buf;
431 tmod.dmax=2;
432 tmod.neg=0;
433
434 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
435
436#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
437 /* Only certain BN_BITS2<=32 platforms actually make use of
438 * n0[1], and we could use the #else case (with a shorter R
439 * value) for the others. However, currently only the assembler
440 * files do know which is which. */
441
442 BN_zero(R);
443 if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
444
445 tmod.top=0;
446 if ((buf[0] = mod->d[0])) tmod.top=1;
447 if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2;
448
449 if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
450 goto err;
451 if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
452 if (!BN_is_zero(Ri))
453 {
454 if (!BN_sub_word(Ri,1)) goto err;
455 }
456 else /* if N mod word size == 1 */
457 {
458 if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
459 goto err;
460 /* Ri-- (mod double word size) */
461 Ri->neg=0;
462 Ri->d[0]=BN_MASK2;
463 Ri->d[1]=BN_MASK2;
464 Ri->top=2;
465 }
466 if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
467 /* Ni = (R*Ri-1)/N,
468 * keep only couple of least significant words: */
469 mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
470 mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
471#else
472 BN_zero(R);
473 if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
474
475 buf[0]=mod->d[0]; /* tmod = N mod word size */
476 buf[1]=0;
477 tmod.top = buf[0] != 0 ? 1 : 0;
478 /* Ri = R^-1 mod N*/
479 if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
480 goto err;
481 if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */
482 if (!BN_is_zero(Ri))
483 {
484 if (!BN_sub_word(Ri,1)) goto err;
485 }
486 else /* if N mod word size == 1 */
487 {
488 if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
489 }
490 if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
491 /* Ni = (R*Ri-1)/N,
492 * keep only least significant word: */
493 mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
494 mont->n0[1] = 0;
495#endif
496 }
497#else /* !MONT_WORD */
498 { /* bignum version */
499 mont->ri=BN_num_bits(&mont->N);
500 BN_zero(R);
501 if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */
502 /* Ri = R^-1 mod N*/
503 if ((BN_mod_inverse(Ri,R,&mont->N,ctx)) == NULL)
504 goto err;
505 if (!BN_lshift(Ri,Ri,mont->ri)) goto err; /* R*Ri */
506 if (!BN_sub_word(Ri,1)) goto err;
507 /* Ni = (R*Ri-1) / N */
508 if (!BN_div(&(mont->Ni),NULL,Ri,&mont->N,ctx)) goto err;
509 }
510#endif
511
512 /* setup RR for conversions */
513 BN_zero(&(mont->RR));
514 if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
515 if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
516
517 ret = 1;
518err:
519 BN_CTX_end(ctx);
520 return ret;
521 }
522
523BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
524 {
525 if (to == from) return(to);
526
527 if (!BN_copy(&(to->RR),&(from->RR))) return NULL;
528 if (!BN_copy(&(to->N),&(from->N))) return NULL;
529 if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
530 to->ri=from->ri;
531 to->n0[0]=from->n0[0];
532 to->n0[1]=from->n0[1];
533 return(to);
534 }
535
536BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
537 const BIGNUM *mod, BN_CTX *ctx)
538 {
539 int got_write_lock = 0;
540 BN_MONT_CTX *ret;
541
542 CRYPTO_r_lock(lock);
543 if (!*pmont)
544 {
545 CRYPTO_r_unlock(lock);
546 CRYPTO_w_lock(lock);
547 got_write_lock = 1;
548
549 if (!*pmont)
550 {
551 ret = BN_MONT_CTX_new();
552 if (ret && !BN_MONT_CTX_set(ret, mod, ctx))
553 BN_MONT_CTX_free(ret);
554 else
555 *pmont = ret;
556 }
557 }
558
559 ret = *pmont;
560
561 if (got_write_lock)
562 CRYPTO_w_unlock(lock);
563 else
564 CRYPTO_r_unlock(lock);
565
566 return ret;
567 }
diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c
deleted file mode 100644
index a054d21aed..0000000000
--- a/src/lib/libcrypto/bn/bn_mpi.c
+++ /dev/null
@@ -1,130 +0,0 @@
1/* crypto/bn/bn_mpi.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
64 {
65 int bits;
66 int num=0;
67 int ext=0;
68 long l;
69
70 bits=BN_num_bits(a);
71 num=(bits+7)/8;
72 if (bits > 0)
73 {
74 ext=((bits & 0x07) == 0);
75 }
76 if (d == NULL)
77 return(num+4+ext);
78
79 l=num+ext;
80 d[0]=(unsigned char)(l>>24)&0xff;
81 d[1]=(unsigned char)(l>>16)&0xff;
82 d[2]=(unsigned char)(l>> 8)&0xff;
83 d[3]=(unsigned char)(l )&0xff;
84 if (ext) d[4]=0;
85 num=BN_bn2bin(a,&(d[4+ext]));
86 if (a->neg)
87 d[4]|=0x80;
88 return(num+4+ext);
89 }
90
91BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a)
92 {
93 long len;
94 int neg=0;
95
96 if (n < 4)
97 {
98 BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
99 return(NULL);
100 }
101 len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
102 if ((len+4) != n)
103 {
104 BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
105 return(NULL);
106 }
107
108 if (a == NULL) a=BN_new();
109 if (a == NULL) return(NULL);
110
111 if (len == 0)
112 {
113 a->neg=0;
114 a->top=0;
115 return(a);
116 }
117 d+=4;
118 if ((*d) & 0x80)
119 neg=1;
120 if (BN_bin2bn(d,(int)len,a) == NULL)
121 return(NULL);
122 a->neg=neg;
123 if (neg)
124 {
125 BN_clear_bit(a,BN_num_bits(a)-1);
126 }
127 bn_check_top(a);
128 return(a);
129 }
130
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
deleted file mode 100644
index 12e5be80eb..0000000000
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ /dev/null
@@ -1,1166 +0,0 @@
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70/* Here follows specialised variants of bn_add_words() and
71 bn_sub_words(). They have the property performing operations on
72 arrays of different sizes. The sizes of those arrays is expressed through
73 cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74 which is the delta between the two lengths, calculated as len(a)-len(b).
75 All lengths are the number of BN_ULONGs... For the operations that require
76 a result array as parameter, it must have the length cl+abs(dl).
77 These functions should probably end up in bn_asm.c as soon as there are
78 assembler counterparts for the systems that use assembler files. */
79
80BN_ULONG bn_sub_part_words(BN_ULONG *r,
81 const BN_ULONG *a, const BN_ULONG *b,
82 int cl, int dl)
83 {
84 BN_ULONG c, t;
85
86 assert(cl >= 0);
87 c = bn_sub_words(r, a, b, cl);
88
89 if (dl == 0)
90 return c;
91
92 r += cl;
93 a += cl;
94 b += cl;
95
96 if (dl < 0)
97 {
98#ifdef BN_COUNT
99 fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100#endif
101 for (;;)
102 {
103 t = b[0];
104 r[0] = (0-t-c)&BN_MASK2;
105 if (t != 0) c=1;
106 if (++dl >= 0) break;
107
108 t = b[1];
109 r[1] = (0-t-c)&BN_MASK2;
110 if (t != 0) c=1;
111 if (++dl >= 0) break;
112
113 t = b[2];
114 r[2] = (0-t-c)&BN_MASK2;
115 if (t != 0) c=1;
116 if (++dl >= 0) break;
117
118 t = b[3];
119 r[3] = (0-t-c)&BN_MASK2;
120 if (t != 0) c=1;
121 if (++dl >= 0) break;
122
123 b += 4;
124 r += 4;
125 }
126 }
127 else
128 {
129 int save_dl = dl;
130#ifdef BN_COUNT
131 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132#endif
133 while(c)
134 {
135 t = a[0];
136 r[0] = (t-c)&BN_MASK2;
137 if (t != 0) c=0;
138 if (--dl <= 0) break;
139
140 t = a[1];
141 r[1] = (t-c)&BN_MASK2;
142 if (t != 0) c=0;
143 if (--dl <= 0) break;
144
145 t = a[2];
146 r[2] = (t-c)&BN_MASK2;
147 if (t != 0) c=0;
148 if (--dl <= 0) break;
149
150 t = a[3];
151 r[3] = (t-c)&BN_MASK2;
152 if (t != 0) c=0;
153 if (--dl <= 0) break;
154
155 save_dl = dl;
156 a += 4;
157 r += 4;
158 }
159 if (dl > 0)
160 {
161#ifdef BN_COUNT
162 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163#endif
164 if (save_dl > dl)
165 {
166 switch (save_dl - dl)
167 {
168 case 1:
169 r[1] = a[1];
170 if (--dl <= 0) break;
171 case 2:
172 r[2] = a[2];
173 if (--dl <= 0) break;
174 case 3:
175 r[3] = a[3];
176 if (--dl <= 0) break;
177 }
178 a += 4;
179 r += 4;
180 }
181 }
182 if (dl > 0)
183 {
184#ifdef BN_COUNT
185 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186#endif
187 for(;;)
188 {
189 r[0] = a[0];
190 if (--dl <= 0) break;
191 r[1] = a[1];
192 if (--dl <= 0) break;
193 r[2] = a[2];
194 if (--dl <= 0) break;
195 r[3] = a[3];
196 if (--dl <= 0) break;
197
198 a += 4;
199 r += 4;
200 }
201 }
202 }
203 return c;
204 }
205#endif
206
207BN_ULONG bn_add_part_words(BN_ULONG *r,
208 const BN_ULONG *a, const BN_ULONG *b,
209 int cl, int dl)
210 {
211 BN_ULONG c, l, t;
212
213 assert(cl >= 0);
214 c = bn_add_words(r, a, b, cl);
215
216 if (dl == 0)
217 return c;
218
219 r += cl;
220 a += cl;
221 b += cl;
222
223 if (dl < 0)
224 {
225 int save_dl = dl;
226#ifdef BN_COUNT
227 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228#endif
229 while (c)
230 {
231 l=(c+b[0])&BN_MASK2;
232 c=(l < c);
233 r[0]=l;
234 if (++dl >= 0) break;
235
236 l=(c+b[1])&BN_MASK2;
237 c=(l < c);
238 r[1]=l;
239 if (++dl >= 0) break;
240
241 l=(c+b[2])&BN_MASK2;
242 c=(l < c);
243 r[2]=l;
244 if (++dl >= 0) break;
245
246 l=(c+b[3])&BN_MASK2;
247 c=(l < c);
248 r[3]=l;
249 if (++dl >= 0) break;
250
251 save_dl = dl;
252 b+=4;
253 r+=4;
254 }
255 if (dl < 0)
256 {
257#ifdef BN_COUNT
258 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259#endif
260 if (save_dl < dl)
261 {
262 switch (dl - save_dl)
263 {
264 case 1:
265 r[1] = b[1];
266 if (++dl >= 0) break;
267 case 2:
268 r[2] = b[2];
269 if (++dl >= 0) break;
270 case 3:
271 r[3] = b[3];
272 if (++dl >= 0) break;
273 }
274 b += 4;
275 r += 4;
276 }
277 }
278 if (dl < 0)
279 {
280#ifdef BN_COUNT
281 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282#endif
283 for(;;)
284 {
285 r[0] = b[0];
286 if (++dl >= 0) break;
287 r[1] = b[1];
288 if (++dl >= 0) break;
289 r[2] = b[2];
290 if (++dl >= 0) break;
291 r[3] = b[3];
292 if (++dl >= 0) break;
293
294 b += 4;
295 r += 4;
296 }
297 }
298 }
299 else
300 {
301 int save_dl = dl;
302#ifdef BN_COUNT
303 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304#endif
305 while (c)
306 {
307 t=(a[0]+c)&BN_MASK2;
308 c=(t < c);
309 r[0]=t;
310 if (--dl <= 0) break;
311
312 t=(a[1]+c)&BN_MASK2;
313 c=(t < c);
314 r[1]=t;
315 if (--dl <= 0) break;
316
317 t=(a[2]+c)&BN_MASK2;
318 c=(t < c);
319 r[2]=t;
320 if (--dl <= 0) break;
321
322 t=(a[3]+c)&BN_MASK2;
323 c=(t < c);
324 r[3]=t;
325 if (--dl <= 0) break;
326
327 save_dl = dl;
328 a+=4;
329 r+=4;
330 }
331#ifdef BN_COUNT
332 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333#endif
334 if (dl > 0)
335 {
336 if (save_dl > dl)
337 {
338 switch (save_dl - dl)
339 {
340 case 1:
341 r[1] = a[1];
342 if (--dl <= 0) break;
343 case 2:
344 r[2] = a[2];
345 if (--dl <= 0) break;
346 case 3:
347 r[3] = a[3];
348 if (--dl <= 0) break;
349 }
350 a += 4;
351 r += 4;
352 }
353 }
354 if (dl > 0)
355 {
356#ifdef BN_COUNT
357 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358#endif
359 for(;;)
360 {
361 r[0] = a[0];
362 if (--dl <= 0) break;
363 r[1] = a[1];
364 if (--dl <= 0) break;
365 r[2] = a[2];
366 if (--dl <= 0) break;
367 r[3] = a[3];
368 if (--dl <= 0) break;
369
370 a += 4;
371 r += 4;
372 }
373 }
374 }
375 return c;
376 }
377
378#ifdef BN_RECURSION
379/* Karatsuba recursive multiplication algorithm
380 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
381
382/* r is 2*n2 words in size,
383 * a and b are both n2 words in size.
384 * n2 must be a power of 2.
385 * We multiply and return the result.
386 * t must be 2*n2 words in size
387 * We calculate
388 * a[0]*b[0]
389 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
390 * a[1]*b[1]
391 */
392/* dnX may not be positive, but n2/2+dnX has to be */
393void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
394 int dna, int dnb, BN_ULONG *t)
395 {
396 int n=n2/2,c1,c2;
397 int tna=n+dna, tnb=n+dnb;
398 unsigned int neg,zero;
399 BN_ULONG ln,lo,*p;
400
401# ifdef BN_COUNT
402 fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
403# endif
404# ifdef BN_MUL_COMBA
405# if 0
406 if (n2 == 4)
407 {
408 bn_mul_comba4(r,a,b);
409 return;
410 }
411# endif
412 /* Only call bn_mul_comba 8 if n2 == 8 and the
413 * two arrays are complete [steve]
414 */
415 if (n2 == 8 && dna == 0 && dnb == 0)
416 {
417 bn_mul_comba8(r,a,b);
418 return;
419 }
420# endif /* BN_MUL_COMBA */
421 /* Else do normal multiply */
422 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
423 {
424 bn_mul_normal(r,a,n2+dna,b,n2+dnb);
425 if ((dna + dnb) < 0)
426 memset(&r[2*n2 + dna + dnb], 0,
427 sizeof(BN_ULONG) * -(dna + dnb));
428 return;
429 }
430 /* r=(a[0]-a[1])*(b[1]-b[0]) */
431 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
432 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
433 zero=neg=0;
434 switch (c1*3+c2)
435 {
436 case -4:
437 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
438 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
439 break;
440 case -3:
441 zero=1;
442 break;
443 case -2:
444 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
445 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
446 neg=1;
447 break;
448 case -1:
449 case 0:
450 case 1:
451 zero=1;
452 break;
453 case 2:
454 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
455 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
456 neg=1;
457 break;
458 case 3:
459 zero=1;
460 break;
461 case 4:
462 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
463 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
464 break;
465 }
466
467# ifdef BN_MUL_COMBA
468 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
469 extra args to do this well */
470 {
471 if (!zero)
472 bn_mul_comba4(&(t[n2]),t,&(t[n]));
473 else
474 memset(&(t[n2]),0,8*sizeof(BN_ULONG));
475
476 bn_mul_comba4(r,a,b);
477 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
478 }
479 else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
480 take extra args to do this
481 well */
482 {
483 if (!zero)
484 bn_mul_comba8(&(t[n2]),t,&(t[n]));
485 else
486 memset(&(t[n2]),0,16*sizeof(BN_ULONG));
487
488 bn_mul_comba8(r,a,b);
489 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
490 }
491 else
492# endif /* BN_MUL_COMBA */
493 {
494 p= &(t[n2*2]);
495 if (!zero)
496 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
497 else
498 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
499 bn_mul_recursive(r,a,b,n,0,0,p);
500 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
501 }
502
503 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
504 * r[10] holds (a[0]*b[0])
505 * r[32] holds (b[1]*b[1])
506 */
507
508 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
509
510 if (neg) /* if t[32] is negative */
511 {
512 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
513 }
514 else
515 {
516 /* Might have a carry */
517 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
518 }
519
520 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
521 * r[10] holds (a[0]*b[0])
522 * r[32] holds (b[1]*b[1])
523 * c1 holds the carry bits
524 */
525 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
526 if (c1)
527 {
528 p= &(r[n+n2]);
529 lo= *p;
530 ln=(lo+c1)&BN_MASK2;
531 *p=ln;
532
533 /* The overflow will stop before we over write
534 * words we should not overwrite */
535 if (ln < (BN_ULONG)c1)
536 {
537 do {
538 p++;
539 lo= *p;
540 ln=(lo+1)&BN_MASK2;
541 *p=ln;
542 } while (ln == 0);
543 }
544 }
545 }
546
547/* n+tn is the word length
548 * t needs to be n*4 is size, as does r */
549/* tnX may not be negative but less than n */
550void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
551 int tna, int tnb, BN_ULONG *t)
552 {
553 int i,j,n2=n*2;
554 int c1,c2,neg;
555 BN_ULONG ln,lo,*p;
556
557# ifdef BN_COUNT
558 fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
559 n, tna, n, tnb);
560# endif
561 if (n < 8)
562 {
563 bn_mul_normal(r,a,n+tna,b,n+tnb);
564 return;
565 }
566
567 /* r=(a[0]-a[1])*(b[1]-b[0]) */
568 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
569 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
570 neg=0;
571 switch (c1*3+c2)
572 {
573 case -4:
574 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
575 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
576 break;
577 case -3:
578 /* break; */
579 case -2:
580 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
581 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
582 neg=1;
583 break;
584 case -1:
585 case 0:
586 case 1:
587 /* break; */
588 case 2:
589 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
590 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
591 neg=1;
592 break;
593 case 3:
594 /* break; */
595 case 4:
596 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
597 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
598 break;
599 }
600 /* The zero case isn't yet implemented here. The speedup
601 would probably be negligible. */
602# if 0
603 if (n == 4)
604 {
605 bn_mul_comba4(&(t[n2]),t,&(t[n]));
606 bn_mul_comba4(r,a,b);
607 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
608 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
609 }
610 else
611# endif
612 if (n == 8)
613 {
614 bn_mul_comba8(&(t[n2]),t,&(t[n]));
615 bn_mul_comba8(r,a,b);
616 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
617 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
618 }
619 else
620 {
621 p= &(t[n2*2]);
622 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
623 bn_mul_recursive(r,a,b,n,0,0,p);
624 i=n/2;
625 /* If there is only a bottom half to the number,
626 * just do it */
627 if (tna > tnb)
628 j = tna - i;
629 else
630 j = tnb - i;
631 if (j == 0)
632 {
633 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
634 i,tna-i,tnb-i,p);
635 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
636 }
637 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
638 {
639 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
640 i,tna-i,tnb-i,p);
641 memset(&(r[n2+tna+tnb]),0,
642 sizeof(BN_ULONG)*(n2-tna-tnb));
643 }
644 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
645 {
646 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
647 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
648 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
649 {
650 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
651 }
652 else
653 {
654 for (;;)
655 {
656 i/=2;
657 /* these simplified conditions work
658 * exclusively because difference
659 * between tna and tnb is 1 or 0 */
660 if (i < tna || i < tnb)
661 {
662 bn_mul_part_recursive(&(r[n2]),
663 &(a[n]),&(b[n]),
664 i,tna-i,tnb-i,p);
665 break;
666 }
667 else if (i == tna || i == tnb)
668 {
669 bn_mul_recursive(&(r[n2]),
670 &(a[n]),&(b[n]),
671 i,tna-i,tnb-i,p);
672 break;
673 }
674 }
675 }
676 }
677 }
678
679 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
680 * r[10] holds (a[0]*b[0])
681 * r[32] holds (b[1]*b[1])
682 */
683
684 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
685
686 if (neg) /* if t[32] is negative */
687 {
688 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
689 }
690 else
691 {
692 /* Might have a carry */
693 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
694 }
695
696 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
697 * r[10] holds (a[0]*b[0])
698 * r[32] holds (b[1]*b[1])
699 * c1 holds the carry bits
700 */
701 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
702 if (c1)
703 {
704 p= &(r[n+n2]);
705 lo= *p;
706 ln=(lo+c1)&BN_MASK2;
707 *p=ln;
708
709 /* The overflow will stop before we over write
710 * words we should not overwrite */
711 if (ln < (BN_ULONG)c1)
712 {
713 do {
714 p++;
715 lo= *p;
716 ln=(lo+1)&BN_MASK2;
717 *p=ln;
718 } while (ln == 0);
719 }
720 }
721 }
722
723/* a and b must be the same size, which is n2.
724 * r needs to be n2 words and t needs to be n2*2
725 */
726void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
727 BN_ULONG *t)
728 {
729 int n=n2/2;
730
731# ifdef BN_COUNT
732 fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
733# endif
734
735 bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
736 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
737 {
738 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
739 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
740 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
741 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
742 }
743 else
744 {
745 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
746 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
747 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
748 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
749 }
750 }
751
752/* a and b must be the same size, which is n2.
753 * r needs to be n2 words and t needs to be n2*2
754 * l is the low words of the output.
755 * t needs to be n2*3
756 */
757void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
758 BN_ULONG *t)
759 {
760 int i,n;
761 int c1,c2;
762 int neg,oneg,zero;
763 BN_ULONG ll,lc,*lp,*mp;
764
765# ifdef BN_COUNT
766 fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
767# endif
768 n=n2/2;
769
770 /* Calculate (al-ah)*(bh-bl) */
771 neg=zero=0;
772 c1=bn_cmp_words(&(a[0]),&(a[n]),n);
773 c2=bn_cmp_words(&(b[n]),&(b[0]),n);
774 switch (c1*3+c2)
775 {
776 case -4:
777 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
778 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
779 break;
780 case -3:
781 zero=1;
782 break;
783 case -2:
784 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
785 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
786 neg=1;
787 break;
788 case -1:
789 case 0:
790 case 1:
791 zero=1;
792 break;
793 case 2:
794 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
795 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
796 neg=1;
797 break;
798 case 3:
799 zero=1;
800 break;
801 case 4:
802 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
803 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
804 break;
805 }
806
807 oneg=neg;
808 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
809 /* r[10] = (a[1]*b[1]) */
810# ifdef BN_MUL_COMBA
811 if (n == 8)
812 {
813 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
814 bn_mul_comba8(r,&(a[n]),&(b[n]));
815 }
816 else
817# endif
818 {
819 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
820 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
821 }
822
823 /* s0 == low(al*bl)
824 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
825 * We know s0 and s1 so the only unknown is high(al*bl)
826 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
827 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
828 */
829 if (l != NULL)
830 {
831 lp= &(t[n2+n]);
832 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
833 }
834 else
835 {
836 c1=0;
837 lp= &(r[0]);
838 }
839
840 if (neg)
841 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
842 else
843 {
844 bn_add_words(&(t[n2]),lp,&(t[0]),n);
845 neg=0;
846 }
847
848 if (l != NULL)
849 {
850 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
851 }
852 else
853 {
854 lp= &(t[n2+n]);
855 mp= &(t[n2]);
856 for (i=0; i<n; i++)
857 lp[i]=((~mp[i])+1)&BN_MASK2;
858 }
859
860 /* s[0] = low(al*bl)
861 * t[3] = high(al*bl)
862 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
863 * r[10] = (a[1]*b[1])
864 */
865 /* R[10] = al*bl
866 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
867 * R[32] = ah*bh
868 */
869 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
870 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
871 * R[3]=r[1]+(carry/borrow)
872 */
873 if (l != NULL)
874 {
875 lp= &(t[n2]);
876 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
877 }
878 else
879 {
880 lp= &(t[n2+n]);
881 c1=0;
882 }
883 c1+=(int)(bn_add_words(&(t[n2]),lp, &(r[0]),n));
884 if (oneg)
885 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
886 else
887 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
888
889 c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
890 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
891 if (oneg)
892 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
893 else
894 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
895
896 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
897 {
898 i=0;
899 if (c1 > 0)
900 {
901 lc=c1;
902 do {
903 ll=(r[i]+lc)&BN_MASK2;
904 r[i++]=ll;
905 lc=(lc > ll);
906 } while (lc);
907 }
908 else
909 {
910 lc= -c1;
911 do {
912 ll=r[i];
913 r[i++]=(ll-lc)&BN_MASK2;
914 lc=(lc > ll);
915 } while (lc);
916 }
917 }
918 if (c2 != 0) /* Add starting at r[1] */
919 {
920 i=n;
921 if (c2 > 0)
922 {
923 lc=c2;
924 do {
925 ll=(r[i]+lc)&BN_MASK2;
926 r[i++]=ll;
927 lc=(lc > ll);
928 } while (lc);
929 }
930 else
931 {
932 lc= -c2;
933 do {
934 ll=r[i];
935 r[i++]=(ll-lc)&BN_MASK2;
936 lc=(lc > ll);
937 } while (lc);
938 }
939 }
940 }
941#endif /* BN_RECURSION */
942
943int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
944 {
945 int ret=0;
946 int top,al,bl;
947 BIGNUM *rr;
948#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
949 int i;
950#endif
951#ifdef BN_RECURSION
952 BIGNUM *t=NULL;
953 int j=0,k;
954#endif
955
956#ifdef BN_COUNT
957 fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
958#endif
959
960 bn_check_top(a);
961 bn_check_top(b);
962 bn_check_top(r);
963
964 al=a->top;
965 bl=b->top;
966
967 if ((al == 0) || (bl == 0))
968 {
969 BN_zero(r);
970 return(1);
971 }
972 top=al+bl;
973
974 BN_CTX_start(ctx);
975 if ((r == a) || (r == b))
976 {
977 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
978 }
979 else
980 rr = r;
981 rr->neg=a->neg^b->neg;
982
983#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
984 i = al-bl;
985#endif
986#ifdef BN_MUL_COMBA
987 if (i == 0)
988 {
989# if 0
990 if (al == 4)
991 {
992 if (bn_wexpand(rr,8) == NULL) goto err;
993 rr->top=8;
994 bn_mul_comba4(rr->d,a->d,b->d);
995 goto end;
996 }
997# endif
998 if (al == 8)
999 {
1000 if (bn_wexpand(rr,16) == NULL) goto err;
1001 rr->top=16;
1002 bn_mul_comba8(rr->d,a->d,b->d);
1003 goto end;
1004 }
1005 }
1006#endif /* BN_MUL_COMBA */
1007#ifdef BN_RECURSION
1008 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
1009 {
1010 if (i >= -1 && i <= 1)
1011 {
1012 /* Find out the power of two lower or equal
1013 to the longest of the two numbers */
1014 if (i >= 0)
1015 {
1016 j = BN_num_bits_word((BN_ULONG)al);
1017 }
1018 if (i == -1)
1019 {
1020 j = BN_num_bits_word((BN_ULONG)bl);
1021 }
1022 j = 1<<(j-1);
1023 assert(j <= al || j <= bl);
1024 k = j+j;
1025 t = BN_CTX_get(ctx);
1026 if (t == NULL)
1027 goto err;
1028 if (al > j || bl > j)
1029 {
1030 if (bn_wexpand(t,k*4) == NULL) goto err;
1031 if (bn_wexpand(rr,k*4) == NULL) goto err;
1032 bn_mul_part_recursive(rr->d,a->d,b->d,
1033 j,al-j,bl-j,t->d);
1034 }
1035 else /* al <= j || bl <= j */
1036 {
1037 if (bn_wexpand(t,k*2) == NULL) goto err;
1038 if (bn_wexpand(rr,k*2) == NULL) goto err;
1039 bn_mul_recursive(rr->d,a->d,b->d,
1040 j,al-j,bl-j,t->d);
1041 }
1042 rr->top=top;
1043 goto end;
1044 }
1045#if 0
1046 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1047 {
1048 BIGNUM *tmp_bn = (BIGNUM *)b;
1049 if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1050 tmp_bn->d[bl]=0;
1051 bl++;
1052 i--;
1053 }
1054 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1055 {
1056 BIGNUM *tmp_bn = (BIGNUM *)a;
1057 if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1058 tmp_bn->d[al]=0;
1059 al++;
1060 i++;
1061 }
1062 if (i == 0)
1063 {
1064 /* symmetric and > 4 */
1065 /* 16 or larger */
1066 j=BN_num_bits_word((BN_ULONG)al);
1067 j=1<<(j-1);
1068 k=j+j;
1069 t = BN_CTX_get(ctx);
1070 if (al == j) /* exact multiple */
1071 {
1072 if (bn_wexpand(t,k*2) == NULL) goto err;
1073 if (bn_wexpand(rr,k*2) == NULL) goto err;
1074 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1075 }
1076 else
1077 {
1078 if (bn_wexpand(t,k*4) == NULL) goto err;
1079 if (bn_wexpand(rr,k*4) == NULL) goto err;
1080 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1081 }
1082 rr->top=top;
1083 goto end;
1084 }
1085#endif
1086 }
1087#endif /* BN_RECURSION */
1088 if (bn_wexpand(rr,top) == NULL) goto err;
1089 rr->top=top;
1090 bn_mul_normal(rr->d,a->d,al,b->d,bl);
1091
1092#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1093end:
1094#endif
1095 bn_correct_top(rr);
1096 if (r != rr) BN_copy(r,rr);
1097 ret=1;
1098err:
1099 bn_check_top(r);
1100 BN_CTX_end(ctx);
1101 return(ret);
1102 }
1103
1104void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1105 {
1106 BN_ULONG *rr;
1107
1108#ifdef BN_COUNT
1109 fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1110#endif
1111
1112 if (na < nb)
1113 {
1114 int itmp;
1115 BN_ULONG *ltmp;
1116
1117 itmp=na; na=nb; nb=itmp;
1118 ltmp=a; a=b; b=ltmp;
1119
1120 }
1121 rr= &(r[na]);
1122 if (nb <= 0)
1123 {
1124 (void)bn_mul_words(r,a,na,0);
1125 return;
1126 }
1127 else
1128 rr[0]=bn_mul_words(r,a,na,b[0]);
1129
1130 for (;;)
1131 {
1132 if (--nb <= 0) return;
1133 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1134 if (--nb <= 0) return;
1135 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1136 if (--nb <= 0) return;
1137 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1138 if (--nb <= 0) return;
1139 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1140 rr+=4;
1141 r+=4;
1142 b+=4;
1143 }
1144 }
1145
1146void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1147 {
1148#ifdef BN_COUNT
1149 fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1150#endif
1151 bn_mul_words(r,a,n,b[0]);
1152
1153 for (;;)
1154 {
1155 if (--n <= 0) return;
1156 bn_mul_add_words(&(r[1]),a,n,b[1]);
1157 if (--n <= 0) return;
1158 bn_mul_add_words(&(r[2]),a,n,b[2]);
1159 if (--n <= 0) return;
1160 bn_mul_add_words(&(r[3]),a,n,b[3]);
1161 if (--n <= 0) return;
1162 bn_mul_add_words(&(r[4]),a,n,b[4]);
1163 r+=4;
1164 b+=4;
1165 }
1166 }
diff --git a/src/lib/libcrypto/bn/bn_nist.c b/src/lib/libcrypto/bn/bn_nist.c
deleted file mode 100644
index c6de032696..0000000000
--- a/src/lib/libcrypto/bn/bn_nist.c
+++ /dev/null
@@ -1,844 +0,0 @@
1/* crypto/bn/bn_nist.c */
2/*
3 * Written by Nils Larsch for the OpenSSL project
4 */
5/* ====================================================================
6 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. All advertising materials mentioning features or use of this
21 * software must display the following acknowledgment:
22 * "This product includes software developed by the OpenSSL Project
23 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24 *
25 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26 * endorse or promote products derived from this software without
27 * prior written permission. For written permission, please contact
28 * openssl-core@openssl.org.
29 *
30 * 5. Products derived from this software may not be called "OpenSSL"
31 * nor may "OpenSSL" appear in their names without prior written
32 * permission of the OpenSSL Project.
33 *
34 * 6. Redistributions of any form whatsoever must retain the following
35 * acknowledgment:
36 * "This product includes software developed by the OpenSSL Project
37 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38 *
39 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
43 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50 * OF THE POSSIBILITY OF SUCH DAMAGE.
51 * ====================================================================
52 *
53 * This product includes cryptographic software written by Eric Young
54 * (eay@cryptsoft.com). This product includes software written by Tim
55 * Hudson (tjh@cryptsoft.com).
56 *
57 */
58
59#include "bn_lcl.h"
60#include "cryptlib.h"
61
62
63#define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
64#define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
65#define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
66#define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
67#define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
68
69/* pre-computed tables are "carry-less" values of modulus*(i+1) */
70#if BN_BITS2 == 64
71static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
72 {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL},
73 {0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL},
74 {0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFCULL,0xFFFFFFFFFFFFFFFFULL}
75 };
76static const BN_ULONG _nist_p_192_sqr[] = {
77 0x0000000000000001ULL,0x0000000000000002ULL,0x0000000000000001ULL,
78 0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL
79 };
80static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
81 {0x0000000000000001ULL,0xFFFFFFFF00000000ULL,
82 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL},
83 {0x0000000000000002ULL,0xFFFFFFFE00000000ULL,
84 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFFULL} /* this one is "carry-full" */
85 };
86static const BN_ULONG _nist_p_224_sqr[] = {
87 0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
88 0xFFFFFFFFFFFFFFFFULL,0x0000000200000000ULL,
89 0x0000000000000000ULL,0xFFFFFFFFFFFFFFFEULL,
90 0xFFFFFFFFFFFFFFFFULL
91 };
92static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
93 {0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL,
94 0x0000000000000000ULL,0xFFFFFFFF00000001ULL},
95 {0xFFFFFFFFFFFFFFFEULL,0x00000001FFFFFFFFULL,
96 0x0000000000000000ULL,0xFFFFFFFE00000002ULL},
97 {0xFFFFFFFFFFFFFFFDULL,0x00000002FFFFFFFFULL,
98 0x0000000000000000ULL,0xFFFFFFFD00000003ULL},
99 {0xFFFFFFFFFFFFFFFCULL,0x00000003FFFFFFFFULL,
100 0x0000000000000000ULL,0xFFFFFFFC00000004ULL},
101 {0xFFFFFFFFFFFFFFFBULL,0x00000004FFFFFFFFULL,
102 0x0000000000000000ULL,0xFFFFFFFB00000005ULL},
103 };
104static const BN_ULONG _nist_p_256_sqr[] = {
105 0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
106 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFEULL,
107 0x00000001FFFFFFFEULL,0x00000001FFFFFFFEULL,
108 0xFFFFFFFE00000001ULL,0xFFFFFFFE00000002ULL
109 };
110static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
111 {0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL,0xFFFFFFFFFFFFFFFEULL,
112 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
113 {0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
114 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
115 {0x00000002FFFFFFFDULL,0xFFFFFFFD00000000ULL,0xFFFFFFFFFFFFFFFCULL,
116 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
117 {0x00000003FFFFFFFCULL,0xFFFFFFFC00000000ULL,0xFFFFFFFFFFFFFFFBULL,
118 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
119 {0x00000004FFFFFFFBULL,0xFFFFFFFB00000000ULL,0xFFFFFFFFFFFFFFFAULL,
120 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
121 };
122static const BN_ULONG _nist_p_384_sqr[] = {
123 0xFFFFFFFE00000001ULL,0x0000000200000000ULL,0xFFFFFFFE00000000ULL,
124 0x0000000200000000ULL,0x0000000000000001ULL,0x0000000000000000ULL,
125 0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
126 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL
127 };
128static const BN_ULONG _nist_p_521[] =
129 {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
130 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
131 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
132 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
133 0x00000000000001FFULL};
134static const BN_ULONG _nist_p_521_sqr[] = {
135 0x0000000000000001ULL,0x0000000000000000ULL,0x0000000000000000ULL,
136 0x0000000000000000ULL,0x0000000000000000ULL,0x0000000000000000ULL,
137 0x0000000000000000ULL,0x0000000000000000ULL,0xFFFFFFFFFFFFFC00ULL,
138 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
139 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
140 0xFFFFFFFFFFFFFFFFULL,0x000000000003FFFFULL
141 };
142#elif BN_BITS2 == 32
143static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
144 {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
145 {0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
146 {0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
147 };
148static const BN_ULONG _nist_p_192_sqr[] = {
149 0x00000001,0x00000000,0x00000002,0x00000000,0x00000001,0x00000000,
150 0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
151 };
152static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
153 {0x00000001,0x00000000,0x00000000,0xFFFFFFFF,
154 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
155 {0x00000002,0x00000000,0x00000000,0xFFFFFFFE,
156 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
157 };
158static const BN_ULONG _nist_p_224_sqr[] = {
159 0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
160 0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000002,
161 0x00000000,0x00000000,0xFFFFFFFE,0xFFFFFFFF,
162 0xFFFFFFFF,0xFFFFFFFF
163 };
164static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
165 {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000,
166 0x00000000,0x00000000,0x00000001,0xFFFFFFFF},
167 {0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0x00000001,
168 0x00000000,0x00000000,0x00000002,0xFFFFFFFE},
169 {0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0x00000002,
170 0x00000000,0x00000000,0x00000003,0xFFFFFFFD},
171 {0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0x00000003,
172 0x00000000,0x00000000,0x00000004,0xFFFFFFFC},
173 {0xFFFFFFFB,0xFFFFFFFF,0xFFFFFFFF,0x00000004,
174 0x00000000,0x00000000,0x00000005,0xFFFFFFFB},
175 };
176static const BN_ULONG _nist_p_256_sqr[] = {
177 0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
178 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0x00000001,
179 0xFFFFFFFE,0x00000001,0xFFFFFFFE,0x00000001,
180 0x00000001,0xFFFFFFFE,0x00000002,0xFFFFFFFE
181 };
182static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
183 {0xFFFFFFFF,0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,
184 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
185 {0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
186 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
187 {0xFFFFFFFD,0x00000002,0x00000000,0xFFFFFFFD,0xFFFFFFFC,0xFFFFFFFF,
188 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
189 {0xFFFFFFFC,0x00000003,0x00000000,0xFFFFFFFC,0xFFFFFFFB,0xFFFFFFFF,
190 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
191 {0xFFFFFFFB,0x00000004,0x00000000,0xFFFFFFFB,0xFFFFFFFA,0xFFFFFFFF,
192 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
193 };
194static const BN_ULONG _nist_p_384_sqr[] = {
195 0x00000001,0xFFFFFFFE,0x00000000,0x00000002,0x00000000,0xFFFFFFFE,
196 0x00000000,0x00000002,0x00000001,0x00000000,0x00000000,0x00000000,
197 0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
198 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
199 };
200static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
201 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
202 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
203 0xFFFFFFFF,0x000001FF};
204static const BN_ULONG _nist_p_521_sqr[] = {
205 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
206 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
207 0x00000000,0x00000000,0x00000000,0x00000000,0xFFFFFC00,0xFFFFFFFF,
208 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
209 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
210 0xFFFFFFFF,0xFFFFFFFF,0x0003FFFF
211 };
212#else
213#error "unsupported BN_BITS2"
214#endif
215
216
217static const BIGNUM _bignum_nist_p_192 =
218 {
219 (BN_ULONG *)_nist_p_192[0],
220 BN_NIST_192_TOP,
221 BN_NIST_192_TOP,
222 0,
223 BN_FLG_STATIC_DATA
224 };
225
226static const BIGNUM _bignum_nist_p_224 =
227 {
228 (BN_ULONG *)_nist_p_224[0],
229 BN_NIST_224_TOP,
230 BN_NIST_224_TOP,
231 0,
232 BN_FLG_STATIC_DATA
233 };
234
235static const BIGNUM _bignum_nist_p_256 =
236 {
237 (BN_ULONG *)_nist_p_256[0],
238 BN_NIST_256_TOP,
239 BN_NIST_256_TOP,
240 0,
241 BN_FLG_STATIC_DATA
242 };
243
244static const BIGNUM _bignum_nist_p_384 =
245 {
246 (BN_ULONG *)_nist_p_384[0],
247 BN_NIST_384_TOP,
248 BN_NIST_384_TOP,
249 0,
250 BN_FLG_STATIC_DATA
251 };
252
253static const BIGNUM _bignum_nist_p_521 =
254 {
255 (BN_ULONG *)_nist_p_521,
256 BN_NIST_521_TOP,
257 BN_NIST_521_TOP,
258 0,
259 BN_FLG_STATIC_DATA
260 };
261
262
263const BIGNUM *BN_get0_nist_prime_192(void)
264 {
265 return &_bignum_nist_p_192;
266 }
267
268const BIGNUM *BN_get0_nist_prime_224(void)
269 {
270 return &_bignum_nist_p_224;
271 }
272
273const BIGNUM *BN_get0_nist_prime_256(void)
274 {
275 return &_bignum_nist_p_256;
276 }
277
278const BIGNUM *BN_get0_nist_prime_384(void)
279 {
280 return &_bignum_nist_p_384;
281 }
282
283const BIGNUM *BN_get0_nist_prime_521(void)
284 {
285 return &_bignum_nist_p_521;
286 }
287
288
289static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max)
290 {
291 int i;
292 BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
293
294#ifdef BN_DEBUG
295 OPENSSL_assert(top <= max);
296#endif
297 for (i = (top); i != 0; i--)
298 *_tmp1++ = *_tmp2++;
299 for (i = (max) - (top); i != 0; i--)
300 *_tmp1++ = (BN_ULONG) 0;
301 }
302
303static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
304 {
305 int i;
306 BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
307 for (i = (top); i != 0; i--)
308 *_tmp1++ = *_tmp2++;
309 }
310
311#if BN_BITS2 == 64
312#define bn_cp_64(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
313#define bn_64_set_0(to, n) (to)[n] = (BN_ULONG)0;
314/*
315 * two following macros are implemented under assumption that they
316 * are called in a sequence with *ascending* n, i.e. as they are...
317 */
318#define bn_cp_32_naked(to, n, from, m) (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
319 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
320#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
321#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
322#else
323#define bn_cp_64(to, n, from, m) \
324 { \
325 bn_cp_32(to, (n)*2, from, (m)*2); \
326 bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
327 }
328#define bn_64_set_0(to, n) \
329 { \
330 bn_32_set_0(to, (n)*2); \
331 bn_32_set_0(to, (n)*2+1); \
332 }
333#if BN_BITS2 == 32
334#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
335#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
336#endif
337#endif /* BN_BITS2 != 64 */
338
339
340#define nist_set_192(to, from, a1, a2, a3) \
341 { \
342 bn_cp_64(to, 0, from, (a3) - 3) \
343 bn_cp_64(to, 1, from, (a2) - 3) \
344 bn_cp_64(to, 2, from, (a1) - 3) \
345 }
346
347int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
348 BN_CTX *ctx)
349 {
350 int top = a->top, i;
351 int carry;
352 register BN_ULONG *r_d, *a_d = a->d;
353 BN_ULONG t_d[BN_NIST_192_TOP],
354 buf[BN_NIST_192_TOP],
355 c_d[BN_NIST_192_TOP],
356 *res;
357 PTR_SIZE_INT mask;
358 static const BIGNUM _bignum_nist_p_192_sqr = {
359 (BN_ULONG *)_nist_p_192_sqr,
360 sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
361 sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
362 0,BN_FLG_STATIC_DATA };
363
364 field = &_bignum_nist_p_192; /* just to make sure */
365
366 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_192_sqr)>=0)
367 return BN_nnmod(r, a, field, ctx);
368
369 i = BN_ucmp(field, a);
370 if (i == 0)
371 {
372 BN_zero(r);
373 return 1;
374 }
375 else if (i > 0)
376 return (r == a) ? 1 : (BN_copy(r ,a) != NULL);
377
378 if (r != a)
379 {
380 if (!bn_wexpand(r, BN_NIST_192_TOP))
381 return 0;
382 r_d = r->d;
383 nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
384 }
385 else
386 r_d = a_d;
387
388 nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
389
390 nist_set_192(t_d, buf, 0, 3, 3);
391 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
392 nist_set_192(t_d, buf, 4, 4, 0);
393 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
394 nist_set_192(t_d, buf, 5, 5, 5)
395 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
396
397 if (carry > 0)
398 carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
399 else
400 carry = 1;
401
402 /*
403 * we need 'if (carry==0 || result>=modulus) result-=modulus;'
404 * as comparison implies subtraction, we can write
405 * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
406 * this is what happens below, but without explicit if:-) a.
407 */
408 mask = 0-(PTR_SIZE_INT)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP);
409 mask &= 0-(PTR_SIZE_INT)carry;
410 res = (BN_ULONG *)
411 (((PTR_SIZE_INT)c_d&~mask) | ((PTR_SIZE_INT)r_d&mask));
412 nist_cp_bn(r_d, res, BN_NIST_192_TOP);
413 r->top = BN_NIST_192_TOP;
414 bn_correct_top(r);
415
416 return 1;
417 }
418
419typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *,const BN_ULONG *,const BN_ULONG *,int);
420
421#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
422 { \
423 bn_cp_32(to, 0, from, (a7) - 7) \
424 bn_cp_32(to, 1, from, (a6) - 7) \
425 bn_cp_32(to, 2, from, (a5) - 7) \
426 bn_cp_32(to, 3, from, (a4) - 7) \
427 bn_cp_32(to, 4, from, (a3) - 7) \
428 bn_cp_32(to, 5, from, (a2) - 7) \
429 bn_cp_32(to, 6, from, (a1) - 7) \
430 }
431
432int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
433 BN_CTX *ctx)
434 {
435 int top = a->top, i;
436 int carry;
437 BN_ULONG *r_d, *a_d = a->d;
438 BN_ULONG t_d[BN_NIST_224_TOP],
439 buf[BN_NIST_224_TOP],
440 c_d[BN_NIST_224_TOP],
441 *res;
442 PTR_SIZE_INT mask;
443 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
444 static const BIGNUM _bignum_nist_p_224_sqr = {
445 (BN_ULONG *)_nist_p_224_sqr,
446 sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
447 sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
448 0,BN_FLG_STATIC_DATA };
449
450
451 field = &_bignum_nist_p_224; /* just to make sure */
452
453 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_224_sqr)>=0)
454 return BN_nnmod(r, a, field, ctx);
455
456 i = BN_ucmp(field, a);
457 if (i == 0)
458 {
459 BN_zero(r);
460 return 1;
461 }
462 else if (i > 0)
463 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
464
465 if (r != a)
466 {
467 if (!bn_wexpand(r, BN_NIST_224_TOP))
468 return 0;
469 r_d = r->d;
470 nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
471 }
472 else
473 r_d = a_d;
474
475#if BN_BITS2==64
476 /* copy upper 256 bits of 448 bit number ... */
477 nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
478 /* ... and right shift by 32 to obtain upper 224 bits */
479 nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8);
480 /* truncate lower part to 224 bits too */
481 r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
482#else
483 nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
484#endif
485 nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
486 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
487 nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
488 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
489 nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7);
490 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
491 nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11);
492 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
493
494#if BN_BITS2==64
495 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
496#endif
497 u.f = bn_sub_words;
498 if (carry > 0)
499 {
500 carry = (int)bn_sub_words(r_d,r_d,_nist_p_224[carry-1],BN_NIST_224_TOP);
501#if BN_BITS2==64
502 carry=(int)(~(r_d[BN_NIST_224_TOP-1]>>32))&1;
503#endif
504 }
505 else if (carry < 0)
506 {
507 /* it's a bit more comlicated logic in this case.
508 * if bn_add_words yields no carry, then result
509 * has to be adjusted by unconditionally *adding*
510 * the modulus. but if it does, then result has
511 * to be compared to the modulus and conditionally
512 * adjusted by *subtracting* the latter. */
513 carry = (int)bn_add_words(r_d,r_d,_nist_p_224[-carry-1],BN_NIST_224_TOP);
514 mask = 0-(PTR_SIZE_INT)carry;
515 u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
516 ((PTR_SIZE_INT)bn_add_words&~mask);
517 }
518 else
519 carry = 1;
520
521 /* otherwise it's effectively same as in BN_nist_mod_192... */
522 mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP);
523 mask &= 0-(PTR_SIZE_INT)carry;
524 res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
525 ((PTR_SIZE_INT)r_d&mask));
526 nist_cp_bn(r_d, res, BN_NIST_224_TOP);
527 r->top = BN_NIST_224_TOP;
528 bn_correct_top(r);
529
530 return 1;
531 }
532
533#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
534 { \
535 bn_cp_32(to, 0, from, (a8) - 8) \
536 bn_cp_32(to, 1, from, (a7) - 8) \
537 bn_cp_32(to, 2, from, (a6) - 8) \
538 bn_cp_32(to, 3, from, (a5) - 8) \
539 bn_cp_32(to, 4, from, (a4) - 8) \
540 bn_cp_32(to, 5, from, (a3) - 8) \
541 bn_cp_32(to, 6, from, (a2) - 8) \
542 bn_cp_32(to, 7, from, (a1) - 8) \
543 }
544
545int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
546 BN_CTX *ctx)
547 {
548 int i, top = a->top;
549 int carry = 0;
550 register BN_ULONG *a_d = a->d, *r_d;
551 BN_ULONG t_d[BN_NIST_256_TOP],
552 buf[BN_NIST_256_TOP],
553 c_d[BN_NIST_256_TOP],
554 *res;
555 PTR_SIZE_INT mask;
556 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
557 static const BIGNUM _bignum_nist_p_256_sqr = {
558 (BN_ULONG *)_nist_p_256_sqr,
559 sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
560 sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
561 0,BN_FLG_STATIC_DATA };
562
563 field = &_bignum_nist_p_256; /* just to make sure */
564
565 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_256_sqr)>=0)
566 return BN_nnmod(r, a, field, ctx);
567
568 i = BN_ucmp(field, a);
569 if (i == 0)
570 {
571 BN_zero(r);
572 return 1;
573 }
574 else if (i > 0)
575 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
576
577 if (r != a)
578 {
579 if (!bn_wexpand(r, BN_NIST_256_TOP))
580 return 0;
581 r_d = r->d;
582 nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
583 }
584 else
585 r_d = a_d;
586
587 nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
588
589 /*S1*/
590 nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0);
591 /*S2*/
592 nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0);
593 carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
594 /* left shift */
595 {
596 register BN_ULONG *ap,t,c;
597 ap = t_d;
598 c=0;
599 for (i = BN_NIST_256_TOP; i != 0; --i)
600 {
601 t= *ap;
602 *(ap++)=((t<<1)|c)&BN_MASK2;
603 c=(t & BN_TBIT)?1:0;
604 }
605 carry <<= 1;
606 carry |= c;
607 }
608 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
609 /*S3*/
610 nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8);
611 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
612 /*S4*/
613 nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9);
614 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
615 /*D1*/
616 nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11);
617 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
618 /*D2*/
619 nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12);
620 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
621 /*D3*/
622 nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13);
623 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
624 /*D4*/
625 nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14);
626 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
627
628 /* see BN_nist_mod_224 for explanation */
629 u.f = bn_sub_words;
630 if (carry > 0)
631 carry = (int)bn_sub_words(r_d,r_d,_nist_p_256[carry-1],BN_NIST_256_TOP);
632 else if (carry < 0)
633 {
634 carry = (int)bn_add_words(r_d,r_d,_nist_p_256[-carry-1],BN_NIST_256_TOP);
635 mask = 0-(PTR_SIZE_INT)carry;
636 u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
637 ((PTR_SIZE_INT)bn_add_words&~mask);
638 }
639 else
640 carry = 1;
641
642 mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP);
643 mask &= 0-(PTR_SIZE_INT)carry;
644 res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
645 ((PTR_SIZE_INT)r_d&mask));
646 nist_cp_bn(r_d, res, BN_NIST_256_TOP);
647 r->top = BN_NIST_256_TOP;
648 bn_correct_top(r);
649
650 return 1;
651 }
652
653#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
654 { \
655 bn_cp_32(to, 0, from, (a12) - 12) \
656 bn_cp_32(to, 1, from, (a11) - 12) \
657 bn_cp_32(to, 2, from, (a10) - 12) \
658 bn_cp_32(to, 3, from, (a9) - 12) \
659 bn_cp_32(to, 4, from, (a8) - 12) \
660 bn_cp_32(to, 5, from, (a7) - 12) \
661 bn_cp_32(to, 6, from, (a6) - 12) \
662 bn_cp_32(to, 7, from, (a5) - 12) \
663 bn_cp_32(to, 8, from, (a4) - 12) \
664 bn_cp_32(to, 9, from, (a3) - 12) \
665 bn_cp_32(to, 10, from, (a2) - 12) \
666 bn_cp_32(to, 11, from, (a1) - 12) \
667 }
668
669int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
670 BN_CTX *ctx)
671 {
672 int i, top = a->top;
673 int carry = 0;
674 register BN_ULONG *r_d, *a_d = a->d;
675 BN_ULONG t_d[BN_NIST_384_TOP],
676 buf[BN_NIST_384_TOP],
677 c_d[BN_NIST_384_TOP],
678 *res;
679 PTR_SIZE_INT mask;
680 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
681 static const BIGNUM _bignum_nist_p_384_sqr = {
682 (BN_ULONG *)_nist_p_384_sqr,
683 sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
684 sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
685 0,BN_FLG_STATIC_DATA };
686
687
688 field = &_bignum_nist_p_384; /* just to make sure */
689
690 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_384_sqr)>=0)
691 return BN_nnmod(r, a, field, ctx);
692
693 i = BN_ucmp(field, a);
694 if (i == 0)
695 {
696 BN_zero(r);
697 return 1;
698 }
699 else if (i > 0)
700 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
701
702 if (r != a)
703 {
704 if (!bn_wexpand(r, BN_NIST_384_TOP))
705 return 0;
706 r_d = r->d;
707 nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
708 }
709 else
710 r_d = a_d;
711
712 nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
713
714 /*S1*/
715 nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
716 /* left shift */
717 {
718 register BN_ULONG *ap,t,c;
719 ap = t_d;
720 c=0;
721 for (i = 3; i != 0; --i)
722 {
723 t= *ap;
724 *(ap++)=((t<<1)|c)&BN_MASK2;
725 c=(t & BN_TBIT)?1:0;
726 }
727 *ap=c;
728 }
729 carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
730 t_d, BN_NIST_256_TOP);
731 /*S2 */
732 carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP);
733 /*S3*/
734 nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21);
735 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
736 /*S4*/
737 nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0);
738 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
739 /*S5*/
740 nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0);
741 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
742 /*S6*/
743 nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20);
744 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
745 /*D1*/
746 nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23);
747 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
748 /*D2*/
749 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0);
750 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
751 /*D3*/
752 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0);
753 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
754
755 /* see BN_nist_mod_224 for explanation */
756 u.f = bn_sub_words;
757 if (carry > 0)
758 carry = (int)bn_sub_words(r_d,r_d,_nist_p_384[carry-1],BN_NIST_384_TOP);
759 else if (carry < 0)
760 {
761 carry = (int)bn_add_words(r_d,r_d,_nist_p_384[-carry-1],BN_NIST_384_TOP);
762 mask = 0-(PTR_SIZE_INT)carry;
763 u.p = ((PTR_SIZE_INT)bn_sub_words&mask) |
764 ((PTR_SIZE_INT)bn_add_words&~mask);
765 }
766 else
767 carry = 1;
768
769 mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP);
770 mask &= 0-(PTR_SIZE_INT)carry;
771 res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
772 ((PTR_SIZE_INT)r_d&mask));
773 nist_cp_bn(r_d, res, BN_NIST_384_TOP);
774 r->top = BN_NIST_384_TOP;
775 bn_correct_top(r);
776
777 return 1;
778 }
779
780#define BN_NIST_521_RSHIFT (521%BN_BITS2)
781#define BN_NIST_521_LSHIFT (BN_BITS2-BN_NIST_521_RSHIFT)
782#define BN_NIST_521_TOP_MASK ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
783
784int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
785 BN_CTX *ctx)
786 {
787 int top = a->top, i;
788 BN_ULONG *r_d, *a_d = a->d,
789 t_d[BN_NIST_521_TOP],
790 val,tmp,*res;
791 PTR_SIZE_INT mask;
792 static const BIGNUM _bignum_nist_p_521_sqr = {
793 (BN_ULONG *)_nist_p_521_sqr,
794 sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
795 sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
796 0,BN_FLG_STATIC_DATA };
797
798 field = &_bignum_nist_p_521; /* just to make sure */
799
800 if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_521_sqr)>=0)
801 return BN_nnmod(r, a, field, ctx);
802
803 i = BN_ucmp(field, a);
804 if (i == 0)
805 {
806 BN_zero(r);
807 return 1;
808 }
809 else if (i > 0)
810 return (r == a)? 1 : (BN_copy(r ,a) != NULL);
811
812 if (r != a)
813 {
814 if (!bn_wexpand(r,BN_NIST_521_TOP))
815 return 0;
816 r_d = r->d;
817 nist_cp_bn(r_d,a_d, BN_NIST_521_TOP);
818 }
819 else
820 r_d = a_d;
821
822 /* upper 521 bits, copy ... */
823 nist_cp_bn_0(t_d,a_d + (BN_NIST_521_TOP-1), top - (BN_NIST_521_TOP-1),BN_NIST_521_TOP);
824 /* ... and right shift */
825 for (val=t_d[0],i=0; i<BN_NIST_521_TOP-1; i++)
826 {
827 tmp = val>>BN_NIST_521_RSHIFT;
828 val = t_d[i+1];
829 t_d[i] = (tmp | val<<BN_NIST_521_LSHIFT) & BN_MASK2;
830 }
831 t_d[i] = val>>BN_NIST_521_RSHIFT;
832 /* lower 521 bits */
833 r_d[i] &= BN_NIST_521_TOP_MASK;
834
835 bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP);
836 mask = 0-(PTR_SIZE_INT)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP);
837 res = (BN_ULONG *)(((PTR_SIZE_INT)t_d&~mask) |
838 ((PTR_SIZE_INT)r_d&mask));
839 nist_cp_bn(r_d,res,BN_NIST_521_TOP);
840 r->top = BN_NIST_521_TOP;
841 bn_correct_top(r);
842
843 return 1;
844 }
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
deleted file mode 100644
index 7b25979dd1..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ /dev/null
@@ -1,494 +0,0 @@
1/* crypto/bn/bn_prime.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118/* NB: these functions have been "upgraded", the deprecated versions (which are
119 * compatibility wrappers using these functions) are in bn_depr.c.
120 * - Geoff
121 */
122
123/* The quick sieve algorithm approach to weeding out primes is
124 * Philip Zimmermann's, as implemented in PGP. I have had a read of
125 * his comments and implemented my own version.
126 */
127#include "bn_prime.h"
128
129static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
130 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
131static int probable_prime(BIGNUM *rnd, int bits);
132static int probable_prime_dh(BIGNUM *rnd, int bits,
133 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
134static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
135 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
136
137int BN_GENCB_call(BN_GENCB *cb, int a, int b)
138 {
139 /* No callback means continue */
140 if(!cb) return 1;
141 switch(cb->ver)
142 {
143 case 1:
144 /* Deprecated-style callbacks */
145 if(!cb->cb.cb_1)
146 return 1;
147 cb->cb.cb_1(a, b, cb->arg);
148 return 1;
149 case 2:
150 /* New-style callbacks */
151 return cb->cb.cb_2(a, b, cb);
152 default:
153 break;
154 }
155 /* Unrecognised callback type */
156 return 0;
157 }
158
159int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
160 const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
161 {
162 BIGNUM *t;
163 int found=0;
164 int i,j,c1=0;
165 BN_CTX *ctx;
166 int checks = BN_prime_checks_for_size(bits);
167
168 ctx=BN_CTX_new();
169 if (ctx == NULL) goto err;
170 BN_CTX_start(ctx);
171 t = BN_CTX_get(ctx);
172 if(!t) goto err;
173loop:
174 /* make a random number and set the top and bottom bits */
175 if (add == NULL)
176 {
177 if (!probable_prime(ret,bits)) goto err;
178 }
179 else
180 {
181 if (safe)
182 {
183 if (!probable_prime_dh_safe(ret,bits,add,rem,ctx))
184 goto err;
185 }
186 else
187 {
188 if (!probable_prime_dh(ret,bits,add,rem,ctx))
189 goto err;
190 }
191 }
192 /* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */
193 if(!BN_GENCB_call(cb, 0, c1++))
194 /* aborted */
195 goto err;
196
197 if (!safe)
198 {
199 i=BN_is_prime_fasttest_ex(ret,checks,ctx,0,cb);
200 if (i == -1) goto err;
201 if (i == 0) goto loop;
202 }
203 else
204 {
205 /* for "safe prime" generation,
206 * check that (p-1)/2 is prime.
207 * Since a prime is odd, We just
208 * need to divide by 2 */
209 if (!BN_rshift1(t,ret)) goto err;
210
211 for (i=0; i<checks; i++)
212 {
213 j=BN_is_prime_fasttest_ex(ret,1,ctx,0,cb);
214 if (j == -1) goto err;
215 if (j == 0) goto loop;
216
217 j=BN_is_prime_fasttest_ex(t,1,ctx,0,cb);
218 if (j == -1) goto err;
219 if (j == 0) goto loop;
220
221 if(!BN_GENCB_call(cb, 2, c1-1))
222 goto err;
223 /* We have a safe prime test pass */
224 }
225 }
226 /* we have a prime :-) */
227 found = 1;
228err:
229 if (ctx != NULL)
230 {
231 BN_CTX_end(ctx);
232 BN_CTX_free(ctx);
233 }
234 bn_check_top(ret);
235 return found;
236 }
237
238int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_GENCB *cb)
239 {
240 return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
241 }
242
243int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
244 int do_trial_division, BN_GENCB *cb)
245 {
246 int i, j, ret = -1;
247 int k;
248 BN_CTX *ctx = NULL;
249 BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
250 BN_MONT_CTX *mont = NULL;
251 const BIGNUM *A = NULL;
252
253 if (BN_cmp(a, BN_value_one()) <= 0)
254 return 0;
255
256 if (checks == BN_prime_checks)
257 checks = BN_prime_checks_for_size(BN_num_bits(a));
258
259 /* first look for small factors */
260 if (!BN_is_odd(a))
261 /* a is even => a is prime if and only if a == 2 */
262 return BN_is_word(a, 2);
263 if (do_trial_division)
264 {
265 for (i = 1; i < NUMPRIMES; i++)
266 if (BN_mod_word(a, primes[i]) == 0)
267 return 0;
268 if(!BN_GENCB_call(cb, 1, -1))
269 goto err;
270 }
271
272 if (ctx_passed != NULL)
273 ctx = ctx_passed;
274 else
275 if ((ctx=BN_CTX_new()) == NULL)
276 goto err;
277 BN_CTX_start(ctx);
278
279 /* A := abs(a) */
280 if (a->neg)
281 {
282 BIGNUM *t;
283 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
284 BN_copy(t, a);
285 t->neg = 0;
286 A = t;
287 }
288 else
289 A = a;
290 A1 = BN_CTX_get(ctx);
291 A1_odd = BN_CTX_get(ctx);
292 check = BN_CTX_get(ctx);
293 if (check == NULL) goto err;
294
295 /* compute A1 := A - 1 */
296 if (!BN_copy(A1, A))
297 goto err;
298 if (!BN_sub_word(A1, 1))
299 goto err;
300 if (BN_is_zero(A1))
301 {
302 ret = 0;
303 goto err;
304 }
305
306 /* write A1 as A1_odd * 2^k */
307 k = 1;
308 while (!BN_is_bit_set(A1, k))
309 k++;
310 if (!BN_rshift(A1_odd, A1, k))
311 goto err;
312
313 /* Montgomery setup for computations mod A */
314 mont = BN_MONT_CTX_new();
315 if (mont == NULL)
316 goto err;
317 if (!BN_MONT_CTX_set(mont, A, ctx))
318 goto err;
319
320 for (i = 0; i < checks; i++)
321 {
322 if (!BN_pseudo_rand_range(check, A1))
323 goto err;
324 if (!BN_add_word(check, 1))
325 goto err;
326 /* now 1 <= check < A */
327
328 j = witness(check, A, A1, A1_odd, k, ctx, mont);
329 if (j == -1) goto err;
330 if (j)
331 {
332 ret=0;
333 goto err;
334 }
335 if(!BN_GENCB_call(cb, 1, i))
336 goto err;
337 }
338 ret=1;
339err:
340 if (ctx != NULL)
341 {
342 BN_CTX_end(ctx);
343 if (ctx_passed == NULL)
344 BN_CTX_free(ctx);
345 }
346 if (mont != NULL)
347 BN_MONT_CTX_free(mont);
348
349 return(ret);
350 }
351
352static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
353 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont)
354 {
355 if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
356 return -1;
357 if (BN_is_one(w))
358 return 0; /* probably prime */
359 if (BN_cmp(w, a1) == 0)
360 return 0; /* w == -1 (mod a), 'a' is probably prime */
361 while (--k)
362 {
363 if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
364 return -1;
365 if (BN_is_one(w))
366 return 1; /* 'a' is composite, otherwise a previous 'w' would
367 * have been == -1 (mod 'a') */
368 if (BN_cmp(w, a1) == 0)
369 return 0; /* w == -1 (mod a), 'a' is probably prime */
370 }
371 /* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
372 * and it is neither -1 nor +1 -- so 'a' cannot be prime */
373 bn_check_top(w);
374 return 1;
375 }
376
377static int probable_prime(BIGNUM *rnd, int bits)
378 {
379 int i;
380 prime_t mods[NUMPRIMES];
381 BN_ULONG delta,maxdelta;
382
383again:
384 if (!BN_rand(rnd,bits,1,1)) return(0);
385 /* we now have a random number 'rand' to test. */
386 for (i=1; i<NUMPRIMES; i++)
387 mods[i]=(prime_t)BN_mod_word(rnd,(BN_ULONG)primes[i]);
388 maxdelta=BN_MASK2 - primes[NUMPRIMES-1];
389 delta=0;
390 loop: for (i=1; i<NUMPRIMES; i++)
391 {
392 /* check that rnd is not a prime and also
393 * that gcd(rnd-1,primes) == 1 (except for 2) */
394 if (((mods[i]+delta)%primes[i]) <= 1)
395 {
396 delta+=2;
397 if (delta > maxdelta) goto again;
398 goto loop;
399 }
400 }
401 if (!BN_add_word(rnd,delta)) return(0);
402 bn_check_top(rnd);
403 return(1);
404 }
405
406static int probable_prime_dh(BIGNUM *rnd, int bits,
407 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
408 {
409 int i,ret=0;
410 BIGNUM *t1;
411
412 BN_CTX_start(ctx);
413 if ((t1 = BN_CTX_get(ctx)) == NULL) goto err;
414
415 if (!BN_rand(rnd,bits,0,1)) goto err;
416
417 /* we need ((rnd-rem) % add) == 0 */
418
419 if (!BN_mod(t1,rnd,add,ctx)) goto err;
420 if (!BN_sub(rnd,rnd,t1)) goto err;
421 if (rem == NULL)
422 { if (!BN_add_word(rnd,1)) goto err; }
423 else
424 { if (!BN_add(rnd,rnd,rem)) goto err; }
425
426 /* we now have a random number 'rand' to test. */
427
428 loop: for (i=1; i<NUMPRIMES; i++)
429 {
430 /* check that rnd is a prime */
431 if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
432 {
433 if (!BN_add(rnd,rnd,add)) goto err;
434 goto loop;
435 }
436 }
437 ret=1;
438err:
439 BN_CTX_end(ctx);
440 bn_check_top(rnd);
441 return(ret);
442 }
443
444static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
445 const BIGNUM *rem, BN_CTX *ctx)
446 {
447 int i,ret=0;
448 BIGNUM *t1,*qadd,*q;
449
450 bits--;
451 BN_CTX_start(ctx);
452 t1 = BN_CTX_get(ctx);
453 q = BN_CTX_get(ctx);
454 qadd = BN_CTX_get(ctx);
455 if (qadd == NULL) goto err;
456
457 if (!BN_rshift1(qadd,padd)) goto err;
458
459 if (!BN_rand(q,bits,0,1)) goto err;
460
461 /* we need ((rnd-rem) % add) == 0 */
462 if (!BN_mod(t1,q,qadd,ctx)) goto err;
463 if (!BN_sub(q,q,t1)) goto err;
464 if (rem == NULL)
465 { if (!BN_add_word(q,1)) goto err; }
466 else
467 {
468 if (!BN_rshift1(t1,rem)) goto err;
469 if (!BN_add(q,q,t1)) goto err;
470 }
471
472 /* we now have a random number 'rand' to test. */
473 if (!BN_lshift1(p,q)) goto err;
474 if (!BN_add_word(p,1)) goto err;
475
476 loop: for (i=1; i<NUMPRIMES; i++)
477 {
478 /* check that p and q are prime */
479 /* check that for p and q
480 * gcd(p-1,primes) == 1 (except for 2) */
481 if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
482 (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
483 {
484 if (!BN_add(p,p,padd)) goto err;
485 if (!BN_add(q,q,qadd)) goto err;
486 goto loop;
487 }
488 }
489 ret=1;
490err:
491 BN_CTX_end(ctx);
492 bn_check_top(p);
493 return(ret);
494 }
diff --git a/src/lib/libcrypto/bn/bn_prime.h b/src/lib/libcrypto/bn/bn_prime.h
deleted file mode 100644
index 51d2194feb..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.h
+++ /dev/null
@@ -1,327 +0,0 @@
1/* Auto generated by bn_prime.pl */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef EIGHT_BIT
60#define NUMPRIMES 2048
61typedef unsigned short prime_t;
62#else
63#define NUMPRIMES 54
64typedef unsigned char prime_t;
65#endif
66static const prime_t primes[NUMPRIMES]=
67 {
68 2, 3, 5, 7, 11, 13, 17, 19,
69 23, 29, 31, 37, 41, 43, 47, 53,
70 59, 61, 67, 71, 73, 79, 83, 89,
71 97, 101, 103, 107, 109, 113, 127, 131,
72 137, 139, 149, 151, 157, 163, 167, 173,
73 179, 181, 191, 193, 197, 199, 211, 223,
74 227, 229, 233, 239, 241, 251,
75#ifndef EIGHT_BIT
76 257, 263,
77 269, 271, 277, 281, 283, 293, 307, 311,
78 313, 317, 331, 337, 347, 349, 353, 359,
79 367, 373, 379, 383, 389, 397, 401, 409,
80 419, 421, 431, 433, 439, 443, 449, 457,
81 461, 463, 467, 479, 487, 491, 499, 503,
82 509, 521, 523, 541, 547, 557, 563, 569,
83 571, 577, 587, 593, 599, 601, 607, 613,
84 617, 619, 631, 641, 643, 647, 653, 659,
85 661, 673, 677, 683, 691, 701, 709, 719,
86 727, 733, 739, 743, 751, 757, 761, 769,
87 773, 787, 797, 809, 811, 821, 823, 827,
88 829, 839, 853, 857, 859, 863, 877, 881,
89 883, 887, 907, 911, 919, 929, 937, 941,
90 947, 953, 967, 971, 977, 983, 991, 997,
91 1009,1013,1019,1021,1031,1033,1039,1049,
92 1051,1061,1063,1069,1087,1091,1093,1097,
93 1103,1109,1117,1123,1129,1151,1153,1163,
94 1171,1181,1187,1193,1201,1213,1217,1223,
95 1229,1231,1237,1249,1259,1277,1279,1283,
96 1289,1291,1297,1301,1303,1307,1319,1321,
97 1327,1361,1367,1373,1381,1399,1409,1423,
98 1427,1429,1433,1439,1447,1451,1453,1459,
99 1471,1481,1483,1487,1489,1493,1499,1511,
100 1523,1531,1543,1549,1553,1559,1567,1571,
101 1579,1583,1597,1601,1607,1609,1613,1619,
102 1621,1627,1637,1657,1663,1667,1669,1693,
103 1697,1699,1709,1721,1723,1733,1741,1747,
104 1753,1759,1777,1783,1787,1789,1801,1811,
105 1823,1831,1847,1861,1867,1871,1873,1877,
106 1879,1889,1901,1907,1913,1931,1933,1949,
107 1951,1973,1979,1987,1993,1997,1999,2003,
108 2011,2017,2027,2029,2039,2053,2063,2069,
109 2081,2083,2087,2089,2099,2111,2113,2129,
110 2131,2137,2141,2143,2153,2161,2179,2203,
111 2207,2213,2221,2237,2239,2243,2251,2267,
112 2269,2273,2281,2287,2293,2297,2309,2311,
113 2333,2339,2341,2347,2351,2357,2371,2377,
114 2381,2383,2389,2393,2399,2411,2417,2423,
115 2437,2441,2447,2459,2467,2473,2477,2503,
116 2521,2531,2539,2543,2549,2551,2557,2579,
117 2591,2593,2609,2617,2621,2633,2647,2657,
118 2659,2663,2671,2677,2683,2687,2689,2693,
119 2699,2707,2711,2713,2719,2729,2731,2741,
120 2749,2753,2767,2777,2789,2791,2797,2801,
121 2803,2819,2833,2837,2843,2851,2857,2861,
122 2879,2887,2897,2903,2909,2917,2927,2939,
123 2953,2957,2963,2969,2971,2999,3001,3011,
124 3019,3023,3037,3041,3049,3061,3067,3079,
125 3083,3089,3109,3119,3121,3137,3163,3167,
126 3169,3181,3187,3191,3203,3209,3217,3221,
127 3229,3251,3253,3257,3259,3271,3299,3301,
128 3307,3313,3319,3323,3329,3331,3343,3347,
129 3359,3361,3371,3373,3389,3391,3407,3413,
130 3433,3449,3457,3461,3463,3467,3469,3491,
131 3499,3511,3517,3527,3529,3533,3539,3541,
132 3547,3557,3559,3571,3581,3583,3593,3607,
133 3613,3617,3623,3631,3637,3643,3659,3671,
134 3673,3677,3691,3697,3701,3709,3719,3727,
135 3733,3739,3761,3767,3769,3779,3793,3797,
136 3803,3821,3823,3833,3847,3851,3853,3863,
137 3877,3881,3889,3907,3911,3917,3919,3923,
138 3929,3931,3943,3947,3967,3989,4001,4003,
139 4007,4013,4019,4021,4027,4049,4051,4057,
140 4073,4079,4091,4093,4099,4111,4127,4129,
141 4133,4139,4153,4157,4159,4177,4201,4211,
142 4217,4219,4229,4231,4241,4243,4253,4259,
143 4261,4271,4273,4283,4289,4297,4327,4337,
144 4339,4349,4357,4363,4373,4391,4397,4409,
145 4421,4423,4441,4447,4451,4457,4463,4481,
146 4483,4493,4507,4513,4517,4519,4523,4547,
147 4549,4561,4567,4583,4591,4597,4603,4621,
148 4637,4639,4643,4649,4651,4657,4663,4673,
149 4679,4691,4703,4721,4723,4729,4733,4751,
150 4759,4783,4787,4789,4793,4799,4801,4813,
151 4817,4831,4861,4871,4877,4889,4903,4909,
152 4919,4931,4933,4937,4943,4951,4957,4967,
153 4969,4973,4987,4993,4999,5003,5009,5011,
154 5021,5023,5039,5051,5059,5077,5081,5087,
155 5099,5101,5107,5113,5119,5147,5153,5167,
156 5171,5179,5189,5197,5209,5227,5231,5233,
157 5237,5261,5273,5279,5281,5297,5303,5309,
158 5323,5333,5347,5351,5381,5387,5393,5399,
159 5407,5413,5417,5419,5431,5437,5441,5443,
160 5449,5471,5477,5479,5483,5501,5503,5507,
161 5519,5521,5527,5531,5557,5563,5569,5573,
162 5581,5591,5623,5639,5641,5647,5651,5653,
163 5657,5659,5669,5683,5689,5693,5701,5711,
164 5717,5737,5741,5743,5749,5779,5783,5791,
165 5801,5807,5813,5821,5827,5839,5843,5849,
166 5851,5857,5861,5867,5869,5879,5881,5897,
167 5903,5923,5927,5939,5953,5981,5987,6007,
168 6011,6029,6037,6043,6047,6053,6067,6073,
169 6079,6089,6091,6101,6113,6121,6131,6133,
170 6143,6151,6163,6173,6197,6199,6203,6211,
171 6217,6221,6229,6247,6257,6263,6269,6271,
172 6277,6287,6299,6301,6311,6317,6323,6329,
173 6337,6343,6353,6359,6361,6367,6373,6379,
174 6389,6397,6421,6427,6449,6451,6469,6473,
175 6481,6491,6521,6529,6547,6551,6553,6563,
176 6569,6571,6577,6581,6599,6607,6619,6637,
177 6653,6659,6661,6673,6679,6689,6691,6701,
178 6703,6709,6719,6733,6737,6761,6763,6779,
179 6781,6791,6793,6803,6823,6827,6829,6833,
180 6841,6857,6863,6869,6871,6883,6899,6907,
181 6911,6917,6947,6949,6959,6961,6967,6971,
182 6977,6983,6991,6997,7001,7013,7019,7027,
183 7039,7043,7057,7069,7079,7103,7109,7121,
184 7127,7129,7151,7159,7177,7187,7193,7207,
185 7211,7213,7219,7229,7237,7243,7247,7253,
186 7283,7297,7307,7309,7321,7331,7333,7349,
187 7351,7369,7393,7411,7417,7433,7451,7457,
188 7459,7477,7481,7487,7489,7499,7507,7517,
189 7523,7529,7537,7541,7547,7549,7559,7561,
190 7573,7577,7583,7589,7591,7603,7607,7621,
191 7639,7643,7649,7669,7673,7681,7687,7691,
192 7699,7703,7717,7723,7727,7741,7753,7757,
193 7759,7789,7793,7817,7823,7829,7841,7853,
194 7867,7873,7877,7879,7883,7901,7907,7919,
195 7927,7933,7937,7949,7951,7963,7993,8009,
196 8011,8017,8039,8053,8059,8069,8081,8087,
197 8089,8093,8101,8111,8117,8123,8147,8161,
198 8167,8171,8179,8191,8209,8219,8221,8231,
199 8233,8237,8243,8263,8269,8273,8287,8291,
200 8293,8297,8311,8317,8329,8353,8363,8369,
201 8377,8387,8389,8419,8423,8429,8431,8443,
202 8447,8461,8467,8501,8513,8521,8527,8537,
203 8539,8543,8563,8573,8581,8597,8599,8609,
204 8623,8627,8629,8641,8647,8663,8669,8677,
205 8681,8689,8693,8699,8707,8713,8719,8731,
206 8737,8741,8747,8753,8761,8779,8783,8803,
207 8807,8819,8821,8831,8837,8839,8849,8861,
208 8863,8867,8887,8893,8923,8929,8933,8941,
209 8951,8963,8969,8971,8999,9001,9007,9011,
210 9013,9029,9041,9043,9049,9059,9067,9091,
211 9103,9109,9127,9133,9137,9151,9157,9161,
212 9173,9181,9187,9199,9203,9209,9221,9227,
213 9239,9241,9257,9277,9281,9283,9293,9311,
214 9319,9323,9337,9341,9343,9349,9371,9377,
215 9391,9397,9403,9413,9419,9421,9431,9433,
216 9437,9439,9461,9463,9467,9473,9479,9491,
217 9497,9511,9521,9533,9539,9547,9551,9587,
218 9601,9613,9619,9623,9629,9631,9643,9649,
219 9661,9677,9679,9689,9697,9719,9721,9733,
220 9739,9743,9749,9767,9769,9781,9787,9791,
221 9803,9811,9817,9829,9833,9839,9851,9857,
222 9859,9871,9883,9887,9901,9907,9923,9929,
223 9931,9941,9949,9967,9973,10007,10009,10037,
224 10039,10061,10067,10069,10079,10091,10093,10099,
225 10103,10111,10133,10139,10141,10151,10159,10163,
226 10169,10177,10181,10193,10211,10223,10243,10247,
227 10253,10259,10267,10271,10273,10289,10301,10303,
228 10313,10321,10331,10333,10337,10343,10357,10369,
229 10391,10399,10427,10429,10433,10453,10457,10459,
230 10463,10477,10487,10499,10501,10513,10529,10531,
231 10559,10567,10589,10597,10601,10607,10613,10627,
232 10631,10639,10651,10657,10663,10667,10687,10691,
233 10709,10711,10723,10729,10733,10739,10753,10771,
234 10781,10789,10799,10831,10837,10847,10853,10859,
235 10861,10867,10883,10889,10891,10903,10909,10937,
236 10939,10949,10957,10973,10979,10987,10993,11003,
237 11027,11047,11057,11059,11069,11071,11083,11087,
238 11093,11113,11117,11119,11131,11149,11159,11161,
239 11171,11173,11177,11197,11213,11239,11243,11251,
240 11257,11261,11273,11279,11287,11299,11311,11317,
241 11321,11329,11351,11353,11369,11383,11393,11399,
242 11411,11423,11437,11443,11447,11467,11471,11483,
243 11489,11491,11497,11503,11519,11527,11549,11551,
244 11579,11587,11593,11597,11617,11621,11633,11657,
245 11677,11681,11689,11699,11701,11717,11719,11731,
246 11743,11777,11779,11783,11789,11801,11807,11813,
247 11821,11827,11831,11833,11839,11863,11867,11887,
248 11897,11903,11909,11923,11927,11933,11939,11941,
249 11953,11959,11969,11971,11981,11987,12007,12011,
250 12037,12041,12043,12049,12071,12073,12097,12101,
251 12107,12109,12113,12119,12143,12149,12157,12161,
252 12163,12197,12203,12211,12227,12239,12241,12251,
253 12253,12263,12269,12277,12281,12289,12301,12323,
254 12329,12343,12347,12373,12377,12379,12391,12401,
255 12409,12413,12421,12433,12437,12451,12457,12473,
256 12479,12487,12491,12497,12503,12511,12517,12527,
257 12539,12541,12547,12553,12569,12577,12583,12589,
258 12601,12611,12613,12619,12637,12641,12647,12653,
259 12659,12671,12689,12697,12703,12713,12721,12739,
260 12743,12757,12763,12781,12791,12799,12809,12821,
261 12823,12829,12841,12853,12889,12893,12899,12907,
262 12911,12917,12919,12923,12941,12953,12959,12967,
263 12973,12979,12983,13001,13003,13007,13009,13033,
264 13037,13043,13049,13063,13093,13099,13103,13109,
265 13121,13127,13147,13151,13159,13163,13171,13177,
266 13183,13187,13217,13219,13229,13241,13249,13259,
267 13267,13291,13297,13309,13313,13327,13331,13337,
268 13339,13367,13381,13397,13399,13411,13417,13421,
269 13441,13451,13457,13463,13469,13477,13487,13499,
270 13513,13523,13537,13553,13567,13577,13591,13597,
271 13613,13619,13627,13633,13649,13669,13679,13681,
272 13687,13691,13693,13697,13709,13711,13721,13723,
273 13729,13751,13757,13759,13763,13781,13789,13799,
274 13807,13829,13831,13841,13859,13873,13877,13879,
275 13883,13901,13903,13907,13913,13921,13931,13933,
276 13963,13967,13997,13999,14009,14011,14029,14033,
277 14051,14057,14071,14081,14083,14087,14107,14143,
278 14149,14153,14159,14173,14177,14197,14207,14221,
279 14243,14249,14251,14281,14293,14303,14321,14323,
280 14327,14341,14347,14369,14387,14389,14401,14407,
281 14411,14419,14423,14431,14437,14447,14449,14461,
282 14479,14489,14503,14519,14533,14537,14543,14549,
283 14551,14557,14561,14563,14591,14593,14621,14627,
284 14629,14633,14639,14653,14657,14669,14683,14699,
285 14713,14717,14723,14731,14737,14741,14747,14753,
286 14759,14767,14771,14779,14783,14797,14813,14821,
287 14827,14831,14843,14851,14867,14869,14879,14887,
288 14891,14897,14923,14929,14939,14947,14951,14957,
289 14969,14983,15013,15017,15031,15053,15061,15073,
290 15077,15083,15091,15101,15107,15121,15131,15137,
291 15139,15149,15161,15173,15187,15193,15199,15217,
292 15227,15233,15241,15259,15263,15269,15271,15277,
293 15287,15289,15299,15307,15313,15319,15329,15331,
294 15349,15359,15361,15373,15377,15383,15391,15401,
295 15413,15427,15439,15443,15451,15461,15467,15473,
296 15493,15497,15511,15527,15541,15551,15559,15569,
297 15581,15583,15601,15607,15619,15629,15641,15643,
298 15647,15649,15661,15667,15671,15679,15683,15727,
299 15731,15733,15737,15739,15749,15761,15767,15773,
300 15787,15791,15797,15803,15809,15817,15823,15859,
301 15877,15881,15887,15889,15901,15907,15913,15919,
302 15923,15937,15959,15971,15973,15991,16001,16007,
303 16033,16057,16061,16063,16067,16069,16073,16087,
304 16091,16097,16103,16111,16127,16139,16141,16183,
305 16187,16189,16193,16217,16223,16229,16231,16249,
306 16253,16267,16273,16301,16319,16333,16339,16349,
307 16361,16363,16369,16381,16411,16417,16421,16427,
308 16433,16447,16451,16453,16477,16481,16487,16493,
309 16519,16529,16547,16553,16561,16567,16573,16603,
310 16607,16619,16631,16633,16649,16651,16657,16661,
311 16673,16691,16693,16699,16703,16729,16741,16747,
312 16759,16763,16787,16811,16823,16829,16831,16843,
313 16871,16879,16883,16889,16901,16903,16921,16927,
314 16931,16937,16943,16963,16979,16981,16987,16993,
315 17011,17021,17027,17029,17033,17041,17047,17053,
316 17077,17093,17099,17107,17117,17123,17137,17159,
317 17167,17183,17189,17191,17203,17207,17209,17231,
318 17239,17257,17291,17293,17299,17317,17321,17327,
319 17333,17341,17351,17359,17377,17383,17387,17389,
320 17393,17401,17417,17419,17431,17443,17449,17467,
321 17471,17477,17483,17489,17491,17497,17509,17519,
322 17539,17551,17569,17573,17579,17581,17597,17599,
323 17609,17623,17627,17657,17659,17669,17681,17683,
324 17707,17713,17729,17737,17747,17749,17761,17783,
325 17789,17791,17807,17827,17837,17839,17851,17863,
326#endif
327 };
diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl
deleted file mode 100644
index 3fafb6f3e9..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.pl
+++ /dev/null
@@ -1,119 +0,0 @@
1#!/usr/local/bin/perl
2# bn_prime.pl
3
4$num=2048;
5$num=$ARGV[0] if ($#ARGV >= 0);
6
7push(@primes,2);
8$p=1;
9loop: while ($#primes < $num-1)
10 {
11 $p+=2;
12 $s=int(sqrt($p));
13
14 for ($i=0; defined($primes[$i]) && $primes[$i]<=$s; $i++)
15 {
16 next loop if (($p%$primes[$i]) == 0);
17 }
18 push(@primes,$p);
19 }
20
21# print <<"EOF";
22# /* Auto generated by bn_prime.pl */
23# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au).
24# * All rights reserved.
25# * Copyright remains Eric Young's, and as such any Copyright notices in
26# * the code are not to be removed.
27# * See the COPYRIGHT file in the SSLeay distribution for more details.
28# */
29#
30# EOF
31
32print <<\EOF;
33/* Auto generated by bn_prime.pl */
34/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35 * All rights reserved.
36 *
37 * This package is an SSL implementation written
38 * by Eric Young (eay@cryptsoft.com).
39 * The implementation was written so as to conform with Netscapes SSL.
40 *
41 * This library is free for commercial and non-commercial use as long as
42 * the following conditions are aheared to. The following conditions
43 * apply to all code found in this distribution, be it the RC4, RSA,
44 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
45 * included with this distribution is covered by the same copyright terms
46 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
47 *
48 * Copyright remains Eric Young's, and as such any Copyright notices in
49 * the code are not to be removed.
50 * If this package is used in a product, Eric Young should be given attribution
51 * as the author of the parts of the library used.
52 * This can be in the form of a textual message at program startup or
53 * in documentation (online or textual) provided with the package.
54 *
55 * Redistribution and use in source and binary forms, with or without
56 * modification, are permitted provided that the following conditions
57 * are met:
58 * 1. Redistributions of source code must retain the copyright
59 * notice, this list of conditions and the following disclaimer.
60 * 2. Redistributions in binary form must reproduce the above copyright
61 * notice, this list of conditions and the following disclaimer in the
62 * documentation and/or other materials provided with the distribution.
63 * 3. All advertising materials mentioning features or use of this software
64 * must display the following acknowledgement:
65 * "This product includes cryptographic software written by
66 * Eric Young (eay@cryptsoft.com)"
67 * The word 'cryptographic' can be left out if the rouines from the library
68 * being used are not cryptographic related :-).
69 * 4. If you include any Windows specific code (or a derivative thereof) from
70 * the apps directory (application code) you must include an acknowledgement:
71 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
72 *
73 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * The licence and distribution terms for any publically available version or
86 * derivative of this code cannot be changed. i.e. this code cannot simply be
87 * copied and put under another distribution licence
88 * [including the GNU Public Licence.]
89 */
90
91EOF
92
93for ($i=0; $i <= $#primes; $i++)
94 {
95 if ($primes[$i] > 256)
96 {
97 $eight=$i;
98 last;
99 }
100 }
101
102printf "#ifndef EIGHT_BIT\n";
103printf "#define NUMPRIMES %d\n",$num;
104printf "typedef unsigned short prime_t;\n";
105printf "#else\n";
106printf "#define NUMPRIMES %d\n",$eight;
107printf "typedef unsigned char prime_t;\n";
108printf "#endif\n";
109print "static const prime_t primes[NUMPRIMES]=\n\t{\n\t";
110$init=0;
111for ($i=0; $i <= $#primes; $i++)
112 {
113 printf "\n#ifndef EIGHT_BIT\n\t" if ($primes[$i] > 256) && !($init++);
114 printf("\n\t") if (($i%8) == 0) && ($i != 0);
115 printf("%4d,",$primes[$i]);
116 }
117print "\n#endif\n\t};\n";
118
119
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
deleted file mode 100644
index bebb466d08..0000000000
--- a/src/lib/libcrypto/bn/bn_print.c
+++ /dev/null
@@ -1,359 +0,0 @@
1/* crypto/bn/bn_print.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <ctype.h>
61#include "cryptlib.h"
62#include <openssl/buffer.h>
63#include "bn_lcl.h"
64
65static const char Hex[]="0123456789ABCDEF";
66
67/* Must 'OPENSSL_free' the returned data */
68char *BN_bn2hex(const BIGNUM *a)
69 {
70 int i,j,v,z=0;
71 char *buf;
72 char *p;
73
74 buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2);
75 if (buf == NULL)
76 {
77 BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE);
78 goto err;
79 }
80 p=buf;
81 if (a->neg) *(p++)='-';
82 if (BN_is_zero(a)) *(p++)='0';
83 for (i=a->top-1; i >=0; i--)
84 {
85 for (j=BN_BITS2-8; j >= 0; j-=8)
86 {
87 /* strip leading zeros */
88 v=((int)(a->d[i]>>(long)j))&0xff;
89 if (z || (v != 0))
90 {
91 *(p++)=Hex[v>>4];
92 *(p++)=Hex[v&0x0f];
93 z=1;
94 }
95 }
96 }
97 *p='\0';
98err:
99 return(buf);
100 }
101
102/* Must 'OPENSSL_free' the returned data */
103char *BN_bn2dec(const BIGNUM *a)
104 {
105 int i=0,num, ok = 0;
106 char *buf=NULL;
107 char *p;
108 BIGNUM *t=NULL;
109 BN_ULONG *bn_data=NULL,*lp;
110
111 /* get an upper bound for the length of the decimal integer
112 * num <= (BN_num_bits(a) + 1) * log(2)
113 * <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1 (rounding error)
114 * <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1
115 */
116 i=BN_num_bits(a)*3;
117 num=(i/10+i/1000+1)+1;
118 bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
119 buf=(char *)OPENSSL_malloc(num+3);
120 if ((buf == NULL) || (bn_data == NULL))
121 {
122 BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE);
123 goto err;
124 }
125 if ((t=BN_dup(a)) == NULL) goto err;
126
127#define BUF_REMAIN (num+3 - (size_t)(p - buf))
128 p=buf;
129 lp=bn_data;
130 if (BN_is_zero(t))
131 {
132 *(p++)='0';
133 *(p++)='\0';
134 }
135 else
136 {
137 if (BN_is_negative(t))
138 *p++ = '-';
139
140 i=0;
141 while (!BN_is_zero(t))
142 {
143 *lp=BN_div_word(t,BN_DEC_CONV);
144 lp++;
145 }
146 lp--;
147 /* We now have a series of blocks, BN_DEC_NUM chars
148 * in length, where the last one needs truncation.
149 * The blocks need to be reversed in order. */
150 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT1,*lp);
151 while (*p) p++;
152 while (lp != bn_data)
153 {
154 lp--;
155 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT2,*lp);
156 while (*p) p++;
157 }
158 }
159 ok = 1;
160err:
161 if (bn_data != NULL) OPENSSL_free(bn_data);
162 if (t != NULL) BN_free(t);
163 if (!ok && buf)
164 {
165 OPENSSL_free(buf);
166 buf = NULL;
167 }
168
169 return(buf);
170 }
171
172int BN_hex2bn(BIGNUM **bn, const char *a)
173 {
174 BIGNUM *ret=NULL;
175 BN_ULONG l=0;
176 int neg=0,h,m,i,j,k,c;
177 int num;
178
179 if ((a == NULL) || (*a == '\0')) return(0);
180
181 if (*a == '-') { neg=1; a++; }
182
183 for (i=0; isxdigit((unsigned char) a[i]); i++)
184 ;
185
186 num=i+neg;
187 if (bn == NULL) return(num);
188
189 /* a is the start of the hex digits, and it is 'i' long */
190 if (*bn == NULL)
191 {
192 if ((ret=BN_new()) == NULL) return(0);
193 }
194 else
195 {
196 ret= *bn;
197 BN_zero(ret);
198 }
199
200 /* i is the number of hex digests; */
201 if (bn_expand(ret,i*4) == NULL) goto err;
202
203 j=i; /* least significant 'hex' */
204 m=0;
205 h=0;
206 while (j > 0)
207 {
208 m=((BN_BYTES*2) <= j)?(BN_BYTES*2):j;
209 l=0;
210 for (;;)
211 {
212 c=a[j-m];
213 if ((c >= '0') && (c <= '9')) k=c-'0';
214 else if ((c >= 'a') && (c <= 'f')) k=c-'a'+10;
215 else if ((c >= 'A') && (c <= 'F')) k=c-'A'+10;
216 else k=0; /* paranoia */
217 l=(l<<4)|k;
218
219 if (--m <= 0)
220 {
221 ret->d[h++]=l;
222 break;
223 }
224 }
225 j-=(BN_BYTES*2);
226 }
227 ret->top=h;
228 bn_correct_top(ret);
229 ret->neg=neg;
230
231 *bn=ret;
232 bn_check_top(ret);
233 return(num);
234err:
235 if (*bn == NULL) BN_free(ret);
236 return(0);
237 }
238
239int BN_dec2bn(BIGNUM **bn, const char *a)
240 {
241 BIGNUM *ret=NULL;
242 BN_ULONG l=0;
243 int neg=0,i,j;
244 int num;
245
246 if ((a == NULL) || (*a == '\0')) return(0);
247 if (*a == '-') { neg=1; a++; }
248
249 for (i=0; isdigit((unsigned char) a[i]); i++)
250 ;
251
252 num=i+neg;
253 if (bn == NULL) return(num);
254
255 /* a is the start of the digits, and it is 'i' long.
256 * We chop it into BN_DEC_NUM digits at a time */
257 if (*bn == NULL)
258 {
259 if ((ret=BN_new()) == NULL) return(0);
260 }
261 else
262 {
263 ret= *bn;
264 BN_zero(ret);
265 }
266
267 /* i is the number of digests, a bit of an over expand; */
268 if (bn_expand(ret,i*4) == NULL) goto err;
269
270 j=BN_DEC_NUM-(i%BN_DEC_NUM);
271 if (j == BN_DEC_NUM) j=0;
272 l=0;
273 while (*a)
274 {
275 l*=10;
276 l+= *a-'0';
277 a++;
278 if (++j == BN_DEC_NUM)
279 {
280 BN_mul_word(ret,BN_DEC_CONV);
281 BN_add_word(ret,l);
282 l=0;
283 j=0;
284 }
285 }
286 ret->neg=neg;
287
288 bn_correct_top(ret);
289 *bn=ret;
290 bn_check_top(ret);
291 return(num);
292err:
293 if (*bn == NULL) BN_free(ret);
294 return(0);
295 }
296
297int BN_asc2bn(BIGNUM **bn, const char *a)
298 {
299 const char *p = a;
300 if (*p == '-')
301 p++;
302
303 if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x'))
304 {
305 if (!BN_hex2bn(bn, p + 2))
306 return 0;
307 }
308 else
309 {
310 if (!BN_dec2bn(bn, p))
311 return 0;
312 }
313 if (*a == '-')
314 (*bn)->neg = 1;
315 return 1;
316 }
317
318#ifndef OPENSSL_NO_BIO
319#ifndef OPENSSL_NO_FP_API
320int BN_print_fp(FILE *fp, const BIGNUM *a)
321 {
322 BIO *b;
323 int ret;
324
325 if ((b=BIO_new(BIO_s_file())) == NULL)
326 return(0);
327 BIO_set_fp(b,fp,BIO_NOCLOSE);
328 ret=BN_print(b,a);
329 BIO_free(b);
330 return(ret);
331 }
332#endif
333
334int BN_print(BIO *bp, const BIGNUM *a)
335 {
336 int i,j,v,z=0;
337 int ret=0;
338
339 if ((a->neg) && (BIO_write(bp,"-",1) != 1)) goto end;
340 if (BN_is_zero(a) && (BIO_write(bp,"0",1) != 1)) goto end;
341 for (i=a->top-1; i >=0; i--)
342 {
343 for (j=BN_BITS2-4; j >= 0; j-=4)
344 {
345 /* strip leading zeros */
346 v=((int)(a->d[i]>>(long)j))&0x0f;
347 if (z || (v != 0))
348 {
349 if (BIO_write(bp,&(Hex[v]),1) != 1)
350 goto end;
351 z=1;
352 }
353 }
354 }
355 ret=1;
356end:
357 return(ret);
358 }
359#endif
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
deleted file mode 100644
index b376c28ff3..0000000000
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ /dev/null
@@ -1,305 +0,0 @@
1/* crypto/bn/bn_rand.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
119 {
120 unsigned char *buf=NULL;
121 int ret=0,bit,bytes,mask;
122 time_t tim;
123
124 if (bits == 0)
125 {
126 BN_zero(rnd);
127 return 1;
128 }
129
130 bytes=(bits+7)/8;
131 bit=(bits-1)%8;
132 mask=0xff<<(bit+1);
133
134 buf=(unsigned char *)OPENSSL_malloc(bytes);
135 if (buf == NULL)
136 {
137 BNerr(BN_F_BNRAND,ERR_R_MALLOC_FAILURE);
138 goto err;
139 }
140
141 /* make a random number and set the top and bottom bits */
142 time(&tim);
143 RAND_add(&tim,sizeof(tim),0.0);
144
145 if (pseudorand)
146 {
147 if (RAND_pseudo_bytes(buf, bytes) == -1)
148 goto err;
149 }
150 else
151 {
152 if (RAND_bytes(buf, bytes) <= 0)
153 goto err;
154 }
155
156#if 1
157 if (pseudorand == 2)
158 {
159 /* generate patterns that are more likely to trigger BN
160 library bugs */
161 int i;
162 unsigned char c;
163
164 for (i = 0; i < bytes; i++)
165 {
166 RAND_pseudo_bytes(&c, 1);
167 if (c >= 128 && i > 0)
168 buf[i] = buf[i-1];
169 else if (c < 42)
170 buf[i] = 0;
171 else if (c < 84)
172 buf[i] = 255;
173 }
174 }
175#endif
176
177 if (top != -1)
178 {
179 if (top)
180 {
181 if (bit == 0)
182 {
183 buf[0]=1;
184 buf[1]|=0x80;
185 }
186 else
187 {
188 buf[0]|=(3<<(bit-1));
189 }
190 }
191 else
192 {
193 buf[0]|=(1<<bit);
194 }
195 }
196 buf[0] &= ~mask;
197 if (bottom) /* set bottom bit if requested */
198 buf[bytes-1]|=1;
199 if (!BN_bin2bn(buf,bytes,rnd)) goto err;
200 ret=1;
201err:
202 if (buf != NULL)
203 {
204 OPENSSL_cleanse(buf,bytes);
205 OPENSSL_free(buf);
206 }
207 bn_check_top(rnd);
208 return(ret);
209 }
210
211int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
212 {
213 return bnrand(0, rnd, bits, top, bottom);
214 }
215
216int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
217 {
218 return bnrand(1, rnd, bits, top, bottom);
219 }
220
221#if 1
222int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
223 {
224 return bnrand(2, rnd, bits, top, bottom);
225 }
226#endif
227
228
229/* random number r: 0 <= r < range */
230static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
231 {
232 int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
233 int n;
234 int count = 100;
235
236 if (range->neg || BN_is_zero(range))
237 {
238 BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
239 return 0;
240 }
241
242 n = BN_num_bits(range); /* n > 0 */
243
244 /* BN_is_bit_set(range, n - 1) always holds */
245
246 if (n == 1)
247 BN_zero(r);
248 else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3))
249 {
250 /* range = 100..._2,
251 * so 3*range (= 11..._2) is exactly one bit longer than range */
252 do
253 {
254 if (!bn_rand(r, n + 1, -1, 0)) return 0;
255 /* If r < 3*range, use r := r MOD range
256 * (which is either r, r - range, or r - 2*range).
257 * Otherwise, iterate once more.
258 * Since 3*range = 11..._2, each iteration succeeds with
259 * probability >= .75. */
260 if (BN_cmp(r ,range) >= 0)
261 {
262 if (!BN_sub(r, r, range)) return 0;
263 if (BN_cmp(r, range) >= 0)
264 if (!BN_sub(r, r, range)) return 0;
265 }
266
267 if (!--count)
268 {
269 BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
270 return 0;
271 }
272
273 }
274 while (BN_cmp(r, range) >= 0);
275 }
276 else
277 {
278 do
279 {
280 /* range = 11..._2 or range = 101..._2 */
281 if (!bn_rand(r, n, -1, 0)) return 0;
282
283 if (!--count)
284 {
285 BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
286 return 0;
287 }
288 }
289 while (BN_cmp(r, range) >= 0);
290 }
291
292 bn_check_top(r);
293 return 1;
294 }
295
296
297int BN_rand_range(BIGNUM *r, const BIGNUM *range)
298 {
299 return bn_rand_range(0, r, range);
300 }
301
302int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
303 {
304 return bn_rand_range(1, r, range);
305 }
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
deleted file mode 100644
index 2e8efb8dae..0000000000
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ /dev/null
@@ -1,234 +0,0 @@
1/* crypto/bn/bn_recp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63void BN_RECP_CTX_init(BN_RECP_CTX *recp)
64 {
65 BN_init(&(recp->N));
66 BN_init(&(recp->Nr));
67 recp->num_bits=0;
68 recp->flags=0;
69 }
70
71BN_RECP_CTX *BN_RECP_CTX_new(void)
72 {
73 BN_RECP_CTX *ret;
74
75 if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
76 return(NULL);
77
78 BN_RECP_CTX_init(ret);
79 ret->flags=BN_FLG_MALLOCED;
80 return(ret);
81 }
82
83void BN_RECP_CTX_free(BN_RECP_CTX *recp)
84 {
85 if(recp == NULL)
86 return;
87
88 BN_free(&(recp->N));
89 BN_free(&(recp->Nr));
90 if (recp->flags & BN_FLG_MALLOCED)
91 OPENSSL_free(recp);
92 }
93
94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
95 {
96 if (!BN_copy(&(recp->N),d)) return 0;
97 BN_zero(&(recp->Nr));
98 recp->num_bits=BN_num_bits(d);
99 recp->shift=0;
100 return(1);
101 }
102
103int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
104 BN_RECP_CTX *recp, BN_CTX *ctx)
105 {
106 int ret=0;
107 BIGNUM *a;
108 const BIGNUM *ca;
109
110 BN_CTX_start(ctx);
111 if ((a = BN_CTX_get(ctx)) == NULL) goto err;
112 if (y != NULL)
113 {
114 if (x == y)
115 { if (!BN_sqr(a,x,ctx)) goto err; }
116 else
117 { if (!BN_mul(a,x,y,ctx)) goto err; }
118 ca = a;
119 }
120 else
121 ca=x; /* Just do the mod */
122
123 ret = BN_div_recp(NULL,r,ca,recp,ctx);
124err:
125 BN_CTX_end(ctx);
126 bn_check_top(r);
127 return(ret);
128 }
129
130int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
131 BN_RECP_CTX *recp, BN_CTX *ctx)
132 {
133 int i,j,ret=0;
134 BIGNUM *a,*b,*d,*r;
135
136 BN_CTX_start(ctx);
137 a=BN_CTX_get(ctx);
138 b=BN_CTX_get(ctx);
139 if (dv != NULL)
140 d=dv;
141 else
142 d=BN_CTX_get(ctx);
143 if (rem != NULL)
144 r=rem;
145 else
146 r=BN_CTX_get(ctx);
147 if (a == NULL || b == NULL || d == NULL || r == NULL) goto err;
148
149 if (BN_ucmp(m,&(recp->N)) < 0)
150 {
151 BN_zero(d);
152 if (!BN_copy(r,m)) return 0;
153 BN_CTX_end(ctx);
154 return(1);
155 }
156
157 /* We want the remainder
158 * Given input of ABCDEF / ab
159 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
160 *
161 */
162
163 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
164 i=BN_num_bits(m);
165 j=recp->num_bits<<1;
166 if (j>i) i=j;
167
168 /* Nr := round(2^i / N) */
169 if (i != recp->shift)
170 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
171 i,ctx); /* BN_reciprocal returns i, or -1 for an error */
172 if (recp->shift == -1) goto err;
173
174 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
175 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
176 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
177 * = |m/N|
178 */
179 if (!BN_rshift(a,m,recp->num_bits)) goto err;
180 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
181 if (!BN_rshift(d,b,i-recp->num_bits)) goto err;
182 d->neg=0;
183
184 if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
185 if (!BN_usub(r,m,b)) goto err;
186 r->neg=0;
187
188#if 1
189 j=0;
190 while (BN_ucmp(r,&(recp->N)) >= 0)
191 {
192 if (j++ > 2)
193 {
194 BNerr(BN_F_BN_DIV_RECP,BN_R_BAD_RECIPROCAL);
195 goto err;
196 }
197 if (!BN_usub(r,r,&(recp->N))) goto err;
198 if (!BN_add_word(d,1)) goto err;
199 }
200#endif
201
202 r->neg=BN_is_zero(r)?0:m->neg;
203 d->neg=m->neg^recp->N.neg;
204 ret=1;
205err:
206 BN_CTX_end(ctx);
207 bn_check_top(dv);
208 bn_check_top(rem);
209 return(ret);
210 }
211
212/* len is the expected size of the result
213 * We actually calculate with an extra word of precision, so
214 * we can do faster division if the remainder is not required.
215 */
216/* r := 2^len / m */
217int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
218 {
219 int ret= -1;
220 BIGNUM *t;
221
222 BN_CTX_start(ctx);
223 if((t = BN_CTX_get(ctx)) == NULL) goto err;
224
225 if (!BN_set_bit(t,len)) goto err;
226
227 if (!BN_div(r,NULL,t,m,ctx)) goto err;
228
229 ret=len;
230err:
231 bn_check_top(r);
232 BN_CTX_end(ctx);
233 return(ret);
234 }
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
deleted file mode 100644
index c4d301afc4..0000000000
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ /dev/null
@@ -1,220 +0,0 @@
1/* crypto/bn/bn_shift.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_lshift1(BIGNUM *r, const BIGNUM *a)
64 {
65 register BN_ULONG *ap,*rp,t,c;
66 int i;
67
68 bn_check_top(r);
69 bn_check_top(a);
70
71 if (r != a)
72 {
73 r->neg=a->neg;
74 if (bn_wexpand(r,a->top+1) == NULL) return(0);
75 r->top=a->top;
76 }
77 else
78 {
79 if (bn_wexpand(r,a->top+1) == NULL) return(0);
80 }
81 ap=a->d;
82 rp=r->d;
83 c=0;
84 for (i=0; i<a->top; i++)
85 {
86 t= *(ap++);
87 *(rp++)=((t<<1)|c)&BN_MASK2;
88 c=(t & BN_TBIT)?1:0;
89 }
90 if (c)
91 {
92 *rp=1;
93 r->top++;
94 }
95 bn_check_top(r);
96 return(1);
97 }
98
99int BN_rshift1(BIGNUM *r, const BIGNUM *a)
100 {
101 BN_ULONG *ap,*rp,t,c;
102 int i;
103
104 bn_check_top(r);
105 bn_check_top(a);
106
107 if (BN_is_zero(a))
108 {
109 BN_zero(r);
110 return(1);
111 }
112 if (a != r)
113 {
114 if (bn_wexpand(r,a->top) == NULL) return(0);
115 r->top=a->top;
116 r->neg=a->neg;
117 }
118 ap=a->d;
119 rp=r->d;
120 c=0;
121 for (i=a->top-1; i>=0; i--)
122 {
123 t=ap[i];
124 rp[i]=((t>>1)&BN_MASK2)|c;
125 c=(t&1)?BN_TBIT:0;
126 }
127 bn_correct_top(r);
128 bn_check_top(r);
129 return(1);
130 }
131
132int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
133 {
134 int i,nw,lb,rb;
135 BN_ULONG *t,*f;
136 BN_ULONG l;
137
138 bn_check_top(r);
139 bn_check_top(a);
140
141 r->neg=a->neg;
142 nw=n/BN_BITS2;
143 if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
144 lb=n%BN_BITS2;
145 rb=BN_BITS2-lb;
146 f=a->d;
147 t=r->d;
148 t[a->top+nw]=0;
149 if (lb == 0)
150 for (i=a->top-1; i>=0; i--)
151 t[nw+i]=f[i];
152 else
153 for (i=a->top-1; i>=0; i--)
154 {
155 l=f[i];
156 t[nw+i+1]|=(l>>rb)&BN_MASK2;
157 t[nw+i]=(l<<lb)&BN_MASK2;
158 }
159 memset(t,0,nw*sizeof(t[0]));
160/* for (i=0; i<nw; i++)
161 t[i]=0;*/
162 r->top=a->top+nw+1;
163 bn_correct_top(r);
164 bn_check_top(r);
165 return(1);
166 }
167
168int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
169 {
170 int i,j,nw,lb,rb;
171 BN_ULONG *t,*f;
172 BN_ULONG l,tmp;
173
174 bn_check_top(r);
175 bn_check_top(a);
176
177 nw=n/BN_BITS2;
178 rb=n%BN_BITS2;
179 lb=BN_BITS2-rb;
180 if (nw >= a->top || a->top == 0)
181 {
182 BN_zero(r);
183 return(1);
184 }
185 if (r != a)
186 {
187 r->neg=a->neg;
188 if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
189 }
190 else
191 {
192 if (n == 0)
193 return 1; /* or the copying loop will go berserk */
194 }
195
196 f= &(a->d[nw]);
197 t=r->d;
198 j=a->top-nw;
199 r->top=j;
200
201 if (rb == 0)
202 {
203 for (i=j; i != 0; i--)
204 *(t++)= *(f++);
205 }
206 else
207 {
208 l= *(f++);
209 for (i=j-1; i != 0; i--)
210 {
211 tmp =(l>>rb)&BN_MASK2;
212 l= *(f++);
213 *(t++) =(tmp|(l<<lb))&BN_MASK2;
214 }
215 *(t++) =(l>>rb)&BN_MASK2;
216 }
217 bn_correct_top(r);
218 bn_check_top(r);
219 return(1);
220 }
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
deleted file mode 100644
index 270d0cd348..0000000000
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ /dev/null
@@ -1,294 +0,0 @@
1/* crypto/bn/bn_sqr.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r must not be a */
64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
65int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
66 {
67 int max,al;
68 int ret = 0;
69 BIGNUM *tmp,*rr;
70
71#ifdef BN_COUNT
72 fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top);
73#endif
74 bn_check_top(a);
75
76 al=a->top;
77 if (al <= 0)
78 {
79 r->top=0;
80 return 1;
81 }
82
83 BN_CTX_start(ctx);
84 rr=(a != r) ? r : BN_CTX_get(ctx);
85 tmp=BN_CTX_get(ctx);
86 if (!rr || !tmp) goto err;
87
88 max = 2 * al; /* Non-zero (from above) */
89 if (bn_wexpand(rr,max) == NULL) goto err;
90
91 if (al == 4)
92 {
93#ifndef BN_SQR_COMBA
94 BN_ULONG t[8];
95 bn_sqr_normal(rr->d,a->d,4,t);
96#else
97 bn_sqr_comba4(rr->d,a->d);
98#endif
99 }
100 else if (al == 8)
101 {
102#ifndef BN_SQR_COMBA
103 BN_ULONG t[16];
104 bn_sqr_normal(rr->d,a->d,8,t);
105#else
106 bn_sqr_comba8(rr->d,a->d);
107#endif
108 }
109 else
110 {
111#if defined(BN_RECURSION)
112 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
113 {
114 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
115 bn_sqr_normal(rr->d,a->d,al,t);
116 }
117 else
118 {
119 int j,k;
120
121 j=BN_num_bits_word((BN_ULONG)al);
122 j=1<<(j-1);
123 k=j+j;
124 if (al == j)
125 {
126 if (bn_wexpand(tmp,k*2) == NULL) goto err;
127 bn_sqr_recursive(rr->d,a->d,al,tmp->d);
128 }
129 else
130 {
131 if (bn_wexpand(tmp,max) == NULL) goto err;
132 bn_sqr_normal(rr->d,a->d,al,tmp->d);
133 }
134 }
135#else
136 if (bn_wexpand(tmp,max) == NULL) goto err;
137 bn_sqr_normal(rr->d,a->d,al,tmp->d);
138#endif
139 }
140
141 rr->neg=0;
142 /* If the most-significant half of the top word of 'a' is zero, then
143 * the square of 'a' will max-1 words. */
144 if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l))
145 rr->top = max - 1;
146 else
147 rr->top = max;
148 if (rr != r) BN_copy(r,rr);
149 ret = 1;
150 err:
151 bn_check_top(rr);
152 bn_check_top(tmp);
153 BN_CTX_end(ctx);
154 return(ret);
155 }
156
157/* tmp must have 2*n words */
158void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
159 {
160 int i,j,max;
161 const BN_ULONG *ap;
162 BN_ULONG *rp;
163
164 max=n*2;
165 ap=a;
166 rp=r;
167 rp[0]=rp[max-1]=0;
168 rp++;
169 j=n;
170
171 if (--j > 0)
172 {
173 ap++;
174 rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
175 rp+=2;
176 }
177
178 for (i=n-2; i>0; i--)
179 {
180 j--;
181 ap++;
182 rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
183 rp+=2;
184 }
185
186 bn_add_words(r,r,r,max);
187
188 /* There will not be a carry */
189
190 bn_sqr_words(tmp,a,n);
191
192 bn_add_words(r,r,tmp,max);
193 }
194
195#ifdef BN_RECURSION
196/* r is 2*n words in size,
197 * a and b are both n words in size. (There's not actually a 'b' here ...)
198 * n must be a power of 2.
199 * We multiply and return the result.
200 * t must be 2*n words in size
201 * We calculate
202 * a[0]*b[0]
203 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
204 * a[1]*b[1]
205 */
206void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
207 {
208 int n=n2/2;
209 int zero,c1;
210 BN_ULONG ln,lo,*p;
211
212#ifdef BN_COUNT
213 fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2);
214#endif
215 if (n2 == 4)
216 {
217#ifndef BN_SQR_COMBA
218 bn_sqr_normal(r,a,4,t);
219#else
220 bn_sqr_comba4(r,a);
221#endif
222 return;
223 }
224 else if (n2 == 8)
225 {
226#ifndef BN_SQR_COMBA
227 bn_sqr_normal(r,a,8,t);
228#else
229 bn_sqr_comba8(r,a);
230#endif
231 return;
232 }
233 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
234 {
235 bn_sqr_normal(r,a,n2,t);
236 return;
237 }
238 /* r=(a[0]-a[1])*(a[1]-a[0]) */
239 c1=bn_cmp_words(a,&(a[n]),n);
240 zero=0;
241 if (c1 > 0)
242 bn_sub_words(t,a,&(a[n]),n);
243 else if (c1 < 0)
244 bn_sub_words(t,&(a[n]),a,n);
245 else
246 zero=1;
247
248 /* The result will always be negative unless it is zero */
249 p= &(t[n2*2]);
250
251 if (!zero)
252 bn_sqr_recursive(&(t[n2]),t,n,p);
253 else
254 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
255 bn_sqr_recursive(r,a,n,p);
256 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
257
258 /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
259 * r[10] holds (a[0]*b[0])
260 * r[32] holds (b[1]*b[1])
261 */
262
263 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
264
265 /* t[32] is negative */
266 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
267
268 /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
269 * r[10] holds (a[0]*a[0])
270 * r[32] holds (a[1]*a[1])
271 * c1 holds the carry bits
272 */
273 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
274 if (c1)
275 {
276 p= &(r[n+n2]);
277 lo= *p;
278 ln=(lo+c1)&BN_MASK2;
279 *p=ln;
280
281 /* The overflow will stop before we over write
282 * words we should not overwrite */
283 if (ln < (BN_ULONG)c1)
284 {
285 do {
286 p++;
287 lo= *p;
288 ln=(lo+1)&BN_MASK2;
289 *p=ln;
290 } while (ln == 0);
291 }
292 }
293 }
294#endif
diff --git a/src/lib/libcrypto/bn/bn_sqrt.c b/src/lib/libcrypto/bn/bn_sqrt.c
deleted file mode 100644
index 6beaf9e5e5..0000000000
--- a/src/lib/libcrypto/bn/bn_sqrt.c
+++ /dev/null
@@ -1,393 +0,0 @@
1/* crypto/bn/bn_sqrt.c */
2/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * and Bodo Moeller for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#include "bn_lcl.h"
60
61
62BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
63/* Returns 'ret' such that
64 * ret^2 == a (mod p),
65 * using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course
66 * in Algebraic Computational Number Theory", algorithm 1.5.1).
67 * 'p' must be prime!
68 */
69 {
70 BIGNUM *ret = in;
71 int err = 1;
72 int r;
73 BIGNUM *A, *b, *q, *t, *x, *y;
74 int e, i, j;
75
76 if (!BN_is_odd(p) || BN_abs_is_word(p, 1))
77 {
78 if (BN_abs_is_word(p, 2))
79 {
80 if (ret == NULL)
81 ret = BN_new();
82 if (ret == NULL)
83 goto end;
84 if (!BN_set_word(ret, BN_is_bit_set(a, 0)))
85 {
86 if (ret != in)
87 BN_free(ret);
88 return NULL;
89 }
90 bn_check_top(ret);
91 return ret;
92 }
93
94 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
95 return(NULL);
96 }
97
98 if (BN_is_zero(a) || BN_is_one(a))
99 {
100 if (ret == NULL)
101 ret = BN_new();
102 if (ret == NULL)
103 goto end;
104 if (!BN_set_word(ret, BN_is_one(a)))
105 {
106 if (ret != in)
107 BN_free(ret);
108 return NULL;
109 }
110 bn_check_top(ret);
111 return ret;
112 }
113
114 BN_CTX_start(ctx);
115 A = BN_CTX_get(ctx);
116 b = BN_CTX_get(ctx);
117 q = BN_CTX_get(ctx);
118 t = BN_CTX_get(ctx);
119 x = BN_CTX_get(ctx);
120 y = BN_CTX_get(ctx);
121 if (y == NULL) goto end;
122
123 if (ret == NULL)
124 ret = BN_new();
125 if (ret == NULL) goto end;
126
127 /* A = a mod p */
128 if (!BN_nnmod(A, a, p, ctx)) goto end;
129
130 /* now write |p| - 1 as 2^e*q where q is odd */
131 e = 1;
132 while (!BN_is_bit_set(p, e))
133 e++;
134 /* we'll set q later (if needed) */
135
136 if (e == 1)
137 {
138 /* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
139 * modulo (|p|-1)/2, and square roots can be computed
140 * directly by modular exponentiation.
141 * We have
142 * 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
143 * so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
144 */
145 if (!BN_rshift(q, p, 2)) goto end;
146 q->neg = 0;
147 if (!BN_add_word(q, 1)) goto end;
148 if (!BN_mod_exp(ret, A, q, p, ctx)) goto end;
149 err = 0;
150 goto vrfy;
151 }
152
153 if (e == 2)
154 {
155 /* |p| == 5 (mod 8)
156 *
157 * In this case 2 is always a non-square since
158 * Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
159 * So if a really is a square, then 2*a is a non-square.
160 * Thus for
161 * b := (2*a)^((|p|-5)/8),
162 * i := (2*a)*b^2
163 * we have
164 * i^2 = (2*a)^((1 + (|p|-5)/4)*2)
165 * = (2*a)^((p-1)/2)
166 * = -1;
167 * so if we set
168 * x := a*b*(i-1),
169 * then
170 * x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
171 * = a^2 * b^2 * (-2*i)
172 * = a*(-i)*(2*a*b^2)
173 * = a*(-i)*i
174 * = a.
175 *
176 * (This is due to A.O.L. Atkin,
177 * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
178 * November 1992.)
179 */
180
181 /* t := 2*a */
182 if (!BN_mod_lshift1_quick(t, A, p)) goto end;
183
184 /* b := (2*a)^((|p|-5)/8) */
185 if (!BN_rshift(q, p, 3)) goto end;
186 q->neg = 0;
187 if (!BN_mod_exp(b, t, q, p, ctx)) goto end;
188
189 /* y := b^2 */
190 if (!BN_mod_sqr(y, b, p, ctx)) goto end;
191
192 /* t := (2*a)*b^2 - 1*/
193 if (!BN_mod_mul(t, t, y, p, ctx)) goto end;
194 if (!BN_sub_word(t, 1)) goto end;
195
196 /* x = a*b*t */
197 if (!BN_mod_mul(x, A, b, p, ctx)) goto end;
198 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
199
200 if (!BN_copy(ret, x)) goto end;
201 err = 0;
202 goto vrfy;
203 }
204
205 /* e > 2, so we really have to use the Tonelli/Shanks algorithm.
206 * First, find some y that is not a square. */
207 if (!BN_copy(q, p)) goto end; /* use 'q' as temp */
208 q->neg = 0;
209 i = 2;
210 do
211 {
212 /* For efficiency, try small numbers first;
213 * if this fails, try random numbers.
214 */
215 if (i < 22)
216 {
217 if (!BN_set_word(y, i)) goto end;
218 }
219 else
220 {
221 if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) goto end;
222 if (BN_ucmp(y, p) >= 0)
223 {
224 if (!(p->neg ? BN_add : BN_sub)(y, y, p)) goto end;
225 }
226 /* now 0 <= y < |p| */
227 if (BN_is_zero(y))
228 if (!BN_set_word(y, i)) goto end;
229 }
230
231 r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
232 if (r < -1) goto end;
233 if (r == 0)
234 {
235 /* m divides p */
236 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
237 goto end;
238 }
239 }
240 while (r == 1 && ++i < 82);
241
242 if (r != -1)
243 {
244 /* Many rounds and still no non-square -- this is more likely
245 * a bug than just bad luck.
246 * Even if p is not prime, we should have found some y
247 * such that r == -1.
248 */
249 BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
250 goto end;
251 }
252
253 /* Here's our actual 'q': */
254 if (!BN_rshift(q, q, e)) goto end;
255
256 /* Now that we have some non-square, we can find an element
257 * of order 2^e by computing its q'th power. */
258 if (!BN_mod_exp(y, y, q, p, ctx)) goto end;
259 if (BN_is_one(y))
260 {
261 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
262 goto end;
263 }
264
265 /* Now we know that (if p is indeed prime) there is an integer
266 * k, 0 <= k < 2^e, such that
267 *
268 * a^q * y^k == 1 (mod p).
269 *
270 * As a^q is a square and y is not, k must be even.
271 * q+1 is even, too, so there is an element
272 *
273 * X := a^((q+1)/2) * y^(k/2),
274 *
275 * and it satisfies
276 *
277 * X^2 = a^q * a * y^k
278 * = a,
279 *
280 * so it is the square root that we are looking for.
281 */
282
283 /* t := (q-1)/2 (note that q is odd) */
284 if (!BN_rshift1(t, q)) goto end;
285
286 /* x := a^((q-1)/2) */
287 if (BN_is_zero(t)) /* special case: p = 2^e + 1 */
288 {
289 if (!BN_nnmod(t, A, p, ctx)) goto end;
290 if (BN_is_zero(t))
291 {
292 /* special case: a == 0 (mod p) */
293 BN_zero(ret);
294 err = 0;
295 goto end;
296 }
297 else
298 if (!BN_one(x)) goto end;
299 }
300 else
301 {
302 if (!BN_mod_exp(x, A, t, p, ctx)) goto end;
303 if (BN_is_zero(x))
304 {
305 /* special case: a == 0 (mod p) */
306 BN_zero(ret);
307 err = 0;
308 goto end;
309 }
310 }
311
312 /* b := a*x^2 (= a^q) */
313 if (!BN_mod_sqr(b, x, p, ctx)) goto end;
314 if (!BN_mod_mul(b, b, A, p, ctx)) goto end;
315
316 /* x := a*x (= a^((q+1)/2)) */
317 if (!BN_mod_mul(x, x, A, p, ctx)) goto end;
318
319 while (1)
320 {
321 /* Now b is a^q * y^k for some even k (0 <= k < 2^E
322 * where E refers to the original value of e, which we
323 * don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
324 *
325 * We have a*b = x^2,
326 * y^2^(e-1) = -1,
327 * b^2^(e-1) = 1.
328 */
329
330 if (BN_is_one(b))
331 {
332 if (!BN_copy(ret, x)) goto end;
333 err = 0;
334 goto vrfy;
335 }
336
337
338 /* find smallest i such that b^(2^i) = 1 */
339 i = 1;
340 if (!BN_mod_sqr(t, b, p, ctx)) goto end;
341 while (!BN_is_one(t))
342 {
343 i++;
344 if (i == e)
345 {
346 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
347 goto end;
348 }
349 if (!BN_mod_mul(t, t, t, p, ctx)) goto end;
350 }
351
352
353 /* t := y^2^(e - i - 1) */
354 if (!BN_copy(t, y)) goto end;
355 for (j = e - i - 1; j > 0; j--)
356 {
357 if (!BN_mod_sqr(t, t, p, ctx)) goto end;
358 }
359 if (!BN_mod_mul(y, t, t, p, ctx)) goto end;
360 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
361 if (!BN_mod_mul(b, b, y, p, ctx)) goto end;
362 e = i;
363 }
364
365 vrfy:
366 if (!err)
367 {
368 /* verify the result -- the input might have been not a square
369 * (test added in 0.9.8) */
370
371 if (!BN_mod_sqr(x, ret, p, ctx))
372 err = 1;
373
374 if (!err && 0 != BN_cmp(x, A))
375 {
376 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
377 err = 1;
378 }
379 }
380
381 end:
382 if (err)
383 {
384 if (ret != NULL && ret != in)
385 {
386 BN_clear_free(ret);
387 }
388 ret = NULL;
389 }
390 BN_CTX_end(ctx);
391 bn_check_top(ret);
392 return ret;
393 }
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
deleted file mode 100644
index ee7b87c45c..0000000000
--- a/src/lib/libcrypto/bn/bn_word.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/* crypto/bn/bn_word.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
64 {
65#ifndef BN_LLONG
66 BN_ULONG ret=0;
67#else
68 BN_ULLONG ret=0;
69#endif
70 int i;
71
72 if (w == 0)
73 return (BN_ULONG)-1;
74
75 bn_check_top(a);
76 w&=BN_MASK2;
77 for (i=a->top-1; i>=0; i--)
78 {
79#ifndef BN_LLONG
80 ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
81 ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
82#else
83 ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
84 (BN_ULLONG)w);
85#endif
86 }
87 return((BN_ULONG)ret);
88 }
89
90BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
91 {
92 BN_ULONG ret = 0;
93 int i, j;
94
95 bn_check_top(a);
96 w &= BN_MASK2;
97
98 if (!w)
99 /* actually this an error (division by zero) */
100 return (BN_ULONG)-1;
101 if (a->top == 0)
102 return 0;
103
104 /* normalize input (so bn_div_words doesn't complain) */
105 j = BN_BITS2 - BN_num_bits_word(w);
106 w <<= j;
107 if (!BN_lshift(a, a, j))
108 return (BN_ULONG)-1;
109
110 for (i=a->top-1; i>=0; i--)
111 {
112 BN_ULONG l,d;
113
114 l=a->d[i];
115 d=bn_div_words(ret,l,w);
116 ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
117 a->d[i]=d;
118 }
119 if ((a->top > 0) && (a->d[a->top-1] == 0))
120 a->top--;
121 ret >>= j;
122 bn_check_top(a);
123 return(ret);
124 }
125
126int BN_add_word(BIGNUM *a, BN_ULONG w)
127 {
128 BN_ULONG l;
129 int i;
130
131 bn_check_top(a);
132 w &= BN_MASK2;
133
134 /* degenerate case: w is zero */
135 if (!w) return 1;
136 /* degenerate case: a is zero */
137 if(BN_is_zero(a)) return BN_set_word(a, w);
138 /* handle 'a' when negative */
139 if (a->neg)
140 {
141 a->neg=0;
142 i=BN_sub_word(a,w);
143 if (!BN_is_zero(a))
144 a->neg=!(a->neg);
145 return(i);
146 }
147 /* Only expand (and risk failing) if it's possibly necessary */
148 if (((BN_ULONG)(a->d[a->top - 1] + 1) == 0) &&
149 (bn_wexpand(a,a->top+1) == NULL))
150 return(0);
151 i=0;
152 for (;;)
153 {
154 if (i >= a->top)
155 l=w;
156 else
157 l=(a->d[i]+w)&BN_MASK2;
158 a->d[i]=l;
159 if (w > l)
160 w=1;
161 else
162 break;
163 i++;
164 }
165 if (i >= a->top)
166 a->top++;
167 bn_check_top(a);
168 return(1);
169 }
170
171int BN_sub_word(BIGNUM *a, BN_ULONG w)
172 {
173 int i;
174
175 bn_check_top(a);
176 w &= BN_MASK2;
177
178 /* degenerate case: w is zero */
179 if (!w) return 1;
180 /* degenerate case: a is zero */
181 if(BN_is_zero(a))
182 {
183 i = BN_set_word(a,w);
184 if (i != 0)
185 BN_set_negative(a, 1);
186 return i;
187 }
188 /* handle 'a' when negative */
189 if (a->neg)
190 {
191 a->neg=0;
192 i=BN_add_word(a,w);
193 a->neg=1;
194 return(i);
195 }
196
197 if ((a->top == 1) && (a->d[0] < w))
198 {
199 a->d[0]=w-a->d[0];
200 a->neg=1;
201 return(1);
202 }
203 i=0;
204 for (;;)
205 {
206 if (a->d[i] >= w)
207 {
208 a->d[i]-=w;
209 break;
210 }
211 else
212 {
213 a->d[i]=(a->d[i]-w)&BN_MASK2;
214 i++;
215 w=1;
216 }
217 }
218 if ((a->d[i] == 0) && (i == (a->top-1)))
219 a->top--;
220 bn_check_top(a);
221 return(1);
222 }
223
224int BN_mul_word(BIGNUM *a, BN_ULONG w)
225 {
226 BN_ULONG ll;
227
228 bn_check_top(a);
229 w&=BN_MASK2;
230 if (a->top)
231 {
232 if (w == 0)
233 BN_zero(a);
234 else
235 {
236 ll=bn_mul_words(a->d,a->d,a->top,w);
237 if (ll)
238 {
239 if (bn_wexpand(a,a->top+1) == NULL) return(0);
240 a->d[a->top++]=ll;
241 }
242 }
243 }
244 bn_check_top(a);
245 return(1);
246 }
247
diff --git a/src/lib/libcrypto/bn/bnspeed.c b/src/lib/libcrypto/bn/bnspeed.c
deleted file mode 100644
index b554ac8cf8..0000000000
--- a/src/lib/libcrypto/bn/bnspeed.c
+++ /dev/null
@@ -1,233 +0,0 @@
1/* unused */
2
3/* crypto/bn/bnspeed.c */
4/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
5 * All rights reserved.
6 *
7 * This package is an SSL implementation written
8 * by Eric Young (eay@cryptsoft.com).
9 * The implementation was written so as to conform with Netscapes SSL.
10 *
11 * This library is free for commercial and non-commercial use as long as
12 * the following conditions are aheared to. The following conditions
13 * apply to all code found in this distribution, be it the RC4, RSA,
14 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
15 * included with this distribution is covered by the same copyright terms
16 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
17 *
18 * Copyright remains Eric Young's, and as such any Copyright notices in
19 * the code are not to be removed.
20 * If this package is used in a product, Eric Young should be given attribution
21 * as the author of the parts of the library used.
22 * This can be in the form of a textual message at program startup or
23 * in documentation (online or textual) provided with the package.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 * 3. All advertising materials mentioning features or use of this software
34 * must display the following acknowledgement:
35 * "This product includes cryptographic software written by
36 * Eric Young (eay@cryptsoft.com)"
37 * The word 'cryptographic' can be left out if the rouines from the library
38 * being used are not cryptographic related :-).
39 * 4. If you include any Windows specific code (or a derivative thereof) from
40 * the apps directory (application code) you must include an acknowledgement:
41 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
42 *
43 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * The licence and distribution terms for any publically available version or
56 * derivative of this code cannot be changed. i.e. this code cannot simply be
57 * copied and put under another distribution licence
58 * [including the GNU Public Licence.]
59 */
60
61/* most of this code has been pilfered from my libdes speed.c program */
62
63#define BASENUM 1000000
64#undef PROG
65#define PROG bnspeed_main
66
67#include <stdio.h>
68#include <stdlib.h>
69#include <signal.h>
70#include <string.h>
71#include <openssl/crypto.h>
72#include <openssl/err.h>
73
74#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
75#define TIMES
76#endif
77
78#ifndef _IRIX
79#include <time.h>
80#endif
81#ifdef TIMES
82#include <sys/types.h>
83#include <sys/times.h>
84#endif
85
86/* Depending on the VMS version, the tms structure is perhaps defined.
87 The __TMS macro will show if it was. If it wasn't defined, we should
88 undefine TIMES, since that tells the rest of the program how things
89 should be handled. -- Richard Levitte */
90#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
91#undef TIMES
92#endif
93
94#ifndef TIMES
95#include <sys/timeb.h>
96#endif
97
98#if defined(sun) || defined(__ultrix)
99#define _POSIX_SOURCE
100#include <limits.h>
101#include <sys/param.h>
102#endif
103
104#include <openssl/bn.h>
105#include <openssl/x509.h>
106
107/* The following if from times(3) man page. It may need to be changed */
108#ifndef HZ
109# ifndef CLK_TCK
110# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
111# define HZ 100.0
112# else /* _BSD_CLK_TCK_ */
113# define HZ ((double)_BSD_CLK_TCK_)
114# endif
115# else /* CLK_TCK */
116# define HZ ((double)CLK_TCK)
117# endif
118#endif
119
120#undef BUFSIZE
121#define BUFSIZE ((long)1024*8)
122int run=0;
123
124static double Time_F(int s);
125#define START 0
126#define STOP 1
127
128static double Time_F(int s)
129 {
130 double ret;
131#ifdef TIMES
132 static struct tms tstart,tend;
133
134 if (s == START)
135 {
136 times(&tstart);
137 return(0);
138 }
139 else
140 {
141 times(&tend);
142 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
143 return((ret < 1e-3)?1e-3:ret);
144 }
145#else /* !times() */
146 static struct timeb tstart,tend;
147 long i;
148
149 if (s == START)
150 {
151 ftime(&tstart);
152 return(0);
153 }
154 else
155 {
156 ftime(&tend);
157 i=(long)tend.millitm-(long)tstart.millitm;
158 ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
159 return((ret < 0.001)?0.001:ret);
160 }
161#endif
162 }
163
164#define NUM_SIZES 5
165static int sizes[NUM_SIZES]={128,256,512,1024,2048};
166/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
167
168void do_mul(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_CTX *ctx);
169
170int main(int argc, char **argv)
171 {
172 BN_CTX *ctx;
173 BIGNUM a,b,c;
174
175 ctx=BN_CTX_new();
176 BN_init(&a);
177 BN_init(&b);
178 BN_init(&c);
179
180 do_mul(&a,&b,&c,ctx);
181 }
182
183void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
184 {
185 int i,j,k;
186 double tm;
187 long num;
188
189 for (i=0; i<NUM_SIZES; i++)
190 {
191 num=BASENUM;
192 if (i) num/=(i*3);
193 BN_rand(a,sizes[i],1,0);
194 for (j=i; j<NUM_SIZES; j++)
195 {
196 BN_rand(b,sizes[j],1,0);
197 Time_F(START);
198 for (k=0; k<num; k++)
199 BN_mul(r,b,a,ctx);
200 tm=Time_F(STOP);
201 printf("mul %4d x %4d -> %8.3fms\n",sizes[i],sizes[j],tm*1000.0/num);
202 }
203 }
204
205 for (i=0; i<NUM_SIZES; i++)
206 {
207 num=BASENUM;
208 if (i) num/=(i*3);
209 BN_rand(a,sizes[i],1,0);
210 Time_F(START);
211 for (k=0; k<num; k++)
212 BN_sqr(r,a,ctx);
213 tm=Time_F(STOP);
214 printf("sqr %4d x %4d -> %8.3fms\n",sizes[i],sizes[i],tm*1000.0/num);
215 }
216
217 for (i=0; i<NUM_SIZES; i++)
218 {
219 num=BASENUM/10;
220 if (i) num/=(i*3);
221 BN_rand(a,sizes[i]-1,1,0);
222 for (j=i; j<NUM_SIZES; j++)
223 {
224 BN_rand(b,sizes[j],1,0);
225 Time_F(START);
226 for (k=0; k<100000; k++)
227 BN_div(r, NULL, b, a,ctx);
228 tm=Time_F(STOP);
229 printf("div %4d / %4d -> %8.3fms\n",sizes[j],sizes[i]-1,tm*1000.0/num);
230 }
231 }
232 }
233
diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c
deleted file mode 100644
index 0cd99c5b4b..0000000000
--- a/src/lib/libcrypto/bn/bntest.c
+++ /dev/null
@@ -1,2013 +0,0 @@
1/* crypto/bn/bntest.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
60 *
61 * Portions of the attached software ("Contribution") are developed by
62 * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
63 *
64 * The Contribution is licensed pursuant to the Eric Young open source
65 * license provided above.
66 *
67 * The binary polynomial arithmetic software is originally written by
68 * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems Laboratories.
69 *
70 */
71
72/* Until the key-gen callbacks are modified to use newer prototypes, we allow
73 * deprecated functions for openssl-internal code */
74#ifdef OPENSSL_NO_DEPRECATED
75#undef OPENSSL_NO_DEPRECATED
76#endif
77
78#include <stdio.h>
79#include <stdlib.h>
80#include <string.h>
81
82#include "e_os.h"
83
84#include <openssl/bio.h>
85#include <openssl/bn.h>
86#include <openssl/rand.h>
87#include <openssl/x509.h>
88#include <openssl/err.h>
89
90const int num0 = 100; /* number of tests */
91const int num1 = 50; /* additional tests for some functions */
92const int num2 = 5; /* number of tests for slow functions */
93
94int test_add(BIO *bp);
95int test_sub(BIO *bp);
96int test_lshift1(BIO *bp);
97int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_);
98int test_rshift1(BIO *bp);
99int test_rshift(BIO *bp,BN_CTX *ctx);
100int test_div(BIO *bp,BN_CTX *ctx);
101int test_div_word(BIO *bp);
102int test_div_recp(BIO *bp,BN_CTX *ctx);
103int test_mul(BIO *bp);
104int test_sqr(BIO *bp,BN_CTX *ctx);
105int test_mont(BIO *bp,BN_CTX *ctx);
106int test_mod(BIO *bp,BN_CTX *ctx);
107int test_mod_mul(BIO *bp,BN_CTX *ctx);
108int test_mod_exp(BIO *bp,BN_CTX *ctx);
109int test_mod_exp_mont_consttime(BIO *bp,BN_CTX *ctx);
110int test_exp(BIO *bp,BN_CTX *ctx);
111int test_gf2m_add(BIO *bp);
112int test_gf2m_mod(BIO *bp);
113int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx);
114int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx);
115int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx);
116int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx);
117int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx);
118int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx);
119int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx);
120int test_kron(BIO *bp,BN_CTX *ctx);
121int test_sqrt(BIO *bp,BN_CTX *ctx);
122int rand_neg(void);
123static int results=0;
124
125static unsigned char lst[]="\xC6\x4F\x43\x04\x2A\xEA\xCA\x6E\x58\x36\x80\x5B\xE8\xC9"
126"\x9B\x04\x5D\x48\x36\xC2\xFD\x16\xC9\x64\xF0";
127
128static const char rnd_seed[] = "string to make the random number generator think it has entropy";
129
130static void message(BIO *out, char *m)
131 {
132 fprintf(stderr, "test %s\n", m);
133 BIO_puts(out, "print \"test ");
134 BIO_puts(out, m);
135 BIO_puts(out, "\\n\"\n");
136 }
137
138int main(int argc, char *argv[])
139 {
140 BN_CTX *ctx;
141 BIO *out;
142 char *outfile=NULL;
143
144 results = 0;
145
146 RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_generate_prime may fail */
147
148 argc--;
149 argv++;
150 while (argc >= 1)
151 {
152 if (strcmp(*argv,"-results") == 0)
153 results=1;
154 else if (strcmp(*argv,"-out") == 0)
155 {
156 if (--argc < 1) break;
157 outfile= *(++argv);
158 }
159 argc--;
160 argv++;
161 }
162
163
164 ctx=BN_CTX_new();
165 if (ctx == NULL) EXIT(1);
166
167 out=BIO_new(BIO_s_file());
168 if (out == NULL) EXIT(1);
169 if (outfile == NULL)
170 {
171 BIO_set_fp(out,stdout,BIO_NOCLOSE);
172 }
173 else
174 {
175 if (!BIO_write_filename(out,outfile))
176 {
177 perror(outfile);
178 EXIT(1);
179 }
180 }
181
182 if (!results)
183 BIO_puts(out,"obase=16\nibase=16\n");
184
185 message(out,"BN_add");
186 if (!test_add(out)) goto err;
187 (void)BIO_flush(out);
188
189 message(out,"BN_sub");
190 if (!test_sub(out)) goto err;
191 (void)BIO_flush(out);
192
193 message(out,"BN_lshift1");
194 if (!test_lshift1(out)) goto err;
195 (void)BIO_flush(out);
196
197 message(out,"BN_lshift (fixed)");
198 if (!test_lshift(out,ctx,BN_bin2bn(lst,sizeof(lst)-1,NULL)))
199 goto err;
200 (void)BIO_flush(out);
201
202 message(out,"BN_lshift");
203 if (!test_lshift(out,ctx,NULL)) goto err;
204 (void)BIO_flush(out);
205
206 message(out,"BN_rshift1");
207 if (!test_rshift1(out)) goto err;
208 (void)BIO_flush(out);
209
210 message(out,"BN_rshift");
211 if (!test_rshift(out,ctx)) goto err;
212 (void)BIO_flush(out);
213
214 message(out,"BN_sqr");
215 if (!test_sqr(out,ctx)) goto err;
216 (void)BIO_flush(out);
217
218 message(out,"BN_mul");
219 if (!test_mul(out)) goto err;
220 (void)BIO_flush(out);
221
222 message(out,"BN_div");
223 if (!test_div(out,ctx)) goto err;
224 (void)BIO_flush(out);
225
226 message(out,"BN_div_word");
227 if (!test_div_word(out)) goto err;
228 (void)BIO_flush(out);
229
230 message(out,"BN_div_recp");
231 if (!test_div_recp(out,ctx)) goto err;
232 (void)BIO_flush(out);
233
234 message(out,"BN_mod");
235 if (!test_mod(out,ctx)) goto err;
236 (void)BIO_flush(out);
237
238 message(out,"BN_mod_mul");
239 if (!test_mod_mul(out,ctx)) goto err;
240 (void)BIO_flush(out);
241
242 message(out,"BN_mont");
243 if (!test_mont(out,ctx)) goto err;
244 (void)BIO_flush(out);
245
246 message(out,"BN_mod_exp");
247 if (!test_mod_exp(out,ctx)) goto err;
248 (void)BIO_flush(out);
249
250 message(out,"BN_mod_exp_mont_consttime");
251 if (!test_mod_exp_mont_consttime(out,ctx)) goto err;
252 (void)BIO_flush(out);
253
254 message(out,"BN_exp");
255 if (!test_exp(out,ctx)) goto err;
256 (void)BIO_flush(out);
257
258 message(out,"BN_kronecker");
259 if (!test_kron(out,ctx)) goto err;
260 (void)BIO_flush(out);
261
262 message(out,"BN_mod_sqrt");
263 if (!test_sqrt(out,ctx)) goto err;
264 (void)BIO_flush(out);
265
266 message(out,"BN_GF2m_add");
267 if (!test_gf2m_add(out)) goto err;
268 (void)BIO_flush(out);
269
270 message(out,"BN_GF2m_mod");
271 if (!test_gf2m_mod(out)) goto err;
272 (void)BIO_flush(out);
273
274 message(out,"BN_GF2m_mod_mul");
275 if (!test_gf2m_mod_mul(out,ctx)) goto err;
276 (void)BIO_flush(out);
277
278 message(out,"BN_GF2m_mod_sqr");
279 if (!test_gf2m_mod_sqr(out,ctx)) goto err;
280 (void)BIO_flush(out);
281
282 message(out,"BN_GF2m_mod_inv");
283 if (!test_gf2m_mod_inv(out,ctx)) goto err;
284 (void)BIO_flush(out);
285
286 message(out,"BN_GF2m_mod_div");
287 if (!test_gf2m_mod_div(out,ctx)) goto err;
288 (void)BIO_flush(out);
289
290 message(out,"BN_GF2m_mod_exp");
291 if (!test_gf2m_mod_exp(out,ctx)) goto err;
292 (void)BIO_flush(out);
293
294 message(out,"BN_GF2m_mod_sqrt");
295 if (!test_gf2m_mod_sqrt(out,ctx)) goto err;
296 (void)BIO_flush(out);
297
298 message(out,"BN_GF2m_mod_solve_quad");
299 if (!test_gf2m_mod_solve_quad(out,ctx)) goto err;
300 (void)BIO_flush(out);
301
302 BN_CTX_free(ctx);
303 BIO_free(out);
304
305/**/
306 EXIT(0);
307err:
308 BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices
309 * the failure, see test_bn in test/Makefile.ssl*/
310 (void)BIO_flush(out);
311 ERR_load_crypto_strings();
312 ERR_print_errors_fp(stderr);
313 EXIT(1);
314 return(1);
315 }
316
317int test_add(BIO *bp)
318 {
319 BIGNUM a,b,c;
320 int i;
321
322 BN_init(&a);
323 BN_init(&b);
324 BN_init(&c);
325
326 BN_bntest_rand(&a,512,0,0);
327 for (i=0; i<num0; i++)
328 {
329 BN_bntest_rand(&b,450+i,0,0);
330 a.neg=rand_neg();
331 b.neg=rand_neg();
332 BN_add(&c,&a,&b);
333 if (bp != NULL)
334 {
335 if (!results)
336 {
337 BN_print(bp,&a);
338 BIO_puts(bp," + ");
339 BN_print(bp,&b);
340 BIO_puts(bp," - ");
341 }
342 BN_print(bp,&c);
343 BIO_puts(bp,"\n");
344 }
345 a.neg=!a.neg;
346 b.neg=!b.neg;
347 BN_add(&c,&c,&b);
348 BN_add(&c,&c,&a);
349 if(!BN_is_zero(&c))
350 {
351 fprintf(stderr,"Add test failed!\n");
352 return 0;
353 }
354 }
355 BN_free(&a);
356 BN_free(&b);
357 BN_free(&c);
358 return(1);
359 }
360
361int test_sub(BIO *bp)
362 {
363 BIGNUM a,b,c;
364 int i;
365
366 BN_init(&a);
367 BN_init(&b);
368 BN_init(&c);
369
370 for (i=0; i<num0+num1; i++)
371 {
372 if (i < num1)
373 {
374 BN_bntest_rand(&a,512,0,0);
375 BN_copy(&b,&a);
376 if (BN_set_bit(&a,i)==0) return(0);
377 BN_add_word(&b,i);
378 }
379 else
380 {
381 BN_bntest_rand(&b,400+i-num1,0,0);
382 a.neg=rand_neg();
383 b.neg=rand_neg();
384 }
385 BN_sub(&c,&a,&b);
386 if (bp != NULL)
387 {
388 if (!results)
389 {
390 BN_print(bp,&a);
391 BIO_puts(bp," - ");
392 BN_print(bp,&b);
393 BIO_puts(bp," - ");
394 }
395 BN_print(bp,&c);
396 BIO_puts(bp,"\n");
397 }
398 BN_add(&c,&c,&b);
399 BN_sub(&c,&c,&a);
400 if(!BN_is_zero(&c))
401 {
402 fprintf(stderr,"Subtract test failed!\n");
403 return 0;
404 }
405 }
406 BN_free(&a);
407 BN_free(&b);
408 BN_free(&c);
409 return(1);
410 }
411
412int test_div(BIO *bp, BN_CTX *ctx)
413 {
414 BIGNUM a,b,c,d,e;
415 int i;
416
417 BN_init(&a);
418 BN_init(&b);
419 BN_init(&c);
420 BN_init(&d);
421 BN_init(&e);
422
423 for (i=0; i<num0+num1; i++)
424 {
425 if (i < num1)
426 {
427 BN_bntest_rand(&a,400,0,0);
428 BN_copy(&b,&a);
429 BN_lshift(&a,&a,i);
430 BN_add_word(&a,i);
431 }
432 else
433 BN_bntest_rand(&b,50+3*(i-num1),0,0);
434 a.neg=rand_neg();
435 b.neg=rand_neg();
436 BN_div(&d,&c,&a,&b,ctx);
437 if (bp != NULL)
438 {
439 if (!results)
440 {
441 BN_print(bp,&a);
442 BIO_puts(bp," / ");
443 BN_print(bp,&b);
444 BIO_puts(bp," - ");
445 }
446 BN_print(bp,&d);
447 BIO_puts(bp,"\n");
448
449 if (!results)
450 {
451 BN_print(bp,&a);
452 BIO_puts(bp," % ");
453 BN_print(bp,&b);
454 BIO_puts(bp," - ");
455 }
456 BN_print(bp,&c);
457 BIO_puts(bp,"\n");
458 }
459 BN_mul(&e,&d,&b,ctx);
460 BN_add(&d,&e,&c);
461 BN_sub(&d,&d,&a);
462 if(!BN_is_zero(&d))
463 {
464 fprintf(stderr,"Division test failed!\n");
465 return 0;
466 }
467 }
468 BN_free(&a);
469 BN_free(&b);
470 BN_free(&c);
471 BN_free(&d);
472 BN_free(&e);
473 return(1);
474 }
475
476static void print_word(BIO *bp,BN_ULONG w)
477 {
478#ifdef SIXTY_FOUR_BIT
479 if (sizeof(w) > sizeof(unsigned long))
480 {
481 unsigned long h=(unsigned long)(w>>32),
482 l=(unsigned long)(w);
483
484 if (h) BIO_printf(bp,"%lX%08lX",h,l);
485 else BIO_printf(bp,"%lX",l);
486 return;
487 }
488#endif
489 BIO_printf(bp,BN_HEX_FMT1,w);
490 }
491
492int test_div_word(BIO *bp)
493 {
494 BIGNUM a,b;
495 BN_ULONG r,s;
496 int i;
497
498 BN_init(&a);
499 BN_init(&b);
500
501 for (i=0; i<num0; i++)
502 {
503 do {
504 BN_bntest_rand(&a,512,-1,0);
505 BN_bntest_rand(&b,BN_BITS2,-1,0);
506 s = b.d[0];
507 } while (!s);
508
509 BN_copy(&b, &a);
510 r = BN_div_word(&b, s);
511
512 if (bp != NULL)
513 {
514 if (!results)
515 {
516 BN_print(bp,&a);
517 BIO_puts(bp," / ");
518 print_word(bp,s);
519 BIO_puts(bp," - ");
520 }
521 BN_print(bp,&b);
522 BIO_puts(bp,"\n");
523
524 if (!results)
525 {
526 BN_print(bp,&a);
527 BIO_puts(bp," % ");
528 print_word(bp,s);
529 BIO_puts(bp," - ");
530 }
531 print_word(bp,r);
532 BIO_puts(bp,"\n");
533 }
534 BN_mul_word(&b,s);
535 BN_add_word(&b,r);
536 BN_sub(&b,&a,&b);
537 if(!BN_is_zero(&b))
538 {
539 fprintf(stderr,"Division (word) test failed!\n");
540 return 0;
541 }
542 }
543 BN_free(&a);
544 BN_free(&b);
545 return(1);
546 }
547
548int test_div_recp(BIO *bp, BN_CTX *ctx)
549 {
550 BIGNUM a,b,c,d,e;
551 BN_RECP_CTX recp;
552 int i;
553
554 BN_RECP_CTX_init(&recp);
555 BN_init(&a);
556 BN_init(&b);
557 BN_init(&c);
558 BN_init(&d);
559 BN_init(&e);
560
561 for (i=0; i<num0+num1; i++)
562 {
563 if (i < num1)
564 {
565 BN_bntest_rand(&a,400,0,0);
566 BN_copy(&b,&a);
567 BN_lshift(&a,&a,i);
568 BN_add_word(&a,i);
569 }
570 else
571 BN_bntest_rand(&b,50+3*(i-num1),0,0);
572 a.neg=rand_neg();
573 b.neg=rand_neg();
574 BN_RECP_CTX_set(&recp,&b,ctx);
575 BN_div_recp(&d,&c,&a,&recp,ctx);
576 if (bp != NULL)
577 {
578 if (!results)
579 {
580 BN_print(bp,&a);
581 BIO_puts(bp," / ");
582 BN_print(bp,&b);
583 BIO_puts(bp," - ");
584 }
585 BN_print(bp,&d);
586 BIO_puts(bp,"\n");
587
588 if (!results)
589 {
590 BN_print(bp,&a);
591 BIO_puts(bp," % ");
592 BN_print(bp,&b);
593 BIO_puts(bp," - ");
594 }
595 BN_print(bp,&c);
596 BIO_puts(bp,"\n");
597 }
598 BN_mul(&e,&d,&b,ctx);
599 BN_add(&d,&e,&c);
600 BN_sub(&d,&d,&a);
601 if(!BN_is_zero(&d))
602 {
603 fprintf(stderr,"Reciprocal division test failed!\n");
604 fprintf(stderr,"a=");
605 BN_print_fp(stderr,&a);
606 fprintf(stderr,"\nb=");
607 BN_print_fp(stderr,&b);
608 fprintf(stderr,"\n");
609 return 0;
610 }
611 }
612 BN_free(&a);
613 BN_free(&b);
614 BN_free(&c);
615 BN_free(&d);
616 BN_free(&e);
617 BN_RECP_CTX_free(&recp);
618 return(1);
619 }
620
621int test_mul(BIO *bp)
622 {
623 BIGNUM a,b,c,d,e;
624 int i;
625 BN_CTX *ctx;
626
627 ctx = BN_CTX_new();
628 if (ctx == NULL) EXIT(1);
629
630 BN_init(&a);
631 BN_init(&b);
632 BN_init(&c);
633 BN_init(&d);
634 BN_init(&e);
635
636 for (i=0; i<num0+num1; i++)
637 {
638 if (i <= num1)
639 {
640 BN_bntest_rand(&a,100,0,0);
641 BN_bntest_rand(&b,100,0,0);
642 }
643 else
644 BN_bntest_rand(&b,i-num1,0,0);
645 a.neg=rand_neg();
646 b.neg=rand_neg();
647 BN_mul(&c,&a,&b,ctx);
648 if (bp != NULL)
649 {
650 if (!results)
651 {
652 BN_print(bp,&a);
653 BIO_puts(bp," * ");
654 BN_print(bp,&b);
655 BIO_puts(bp," - ");
656 }
657 BN_print(bp,&c);
658 BIO_puts(bp,"\n");
659 }
660 BN_div(&d,&e,&c,&a,ctx);
661 BN_sub(&d,&d,&b);
662 if(!BN_is_zero(&d) || !BN_is_zero(&e))
663 {
664 fprintf(stderr,"Multiplication test failed!\n");
665 return 0;
666 }
667 }
668 BN_free(&a);
669 BN_free(&b);
670 BN_free(&c);
671 BN_free(&d);
672 BN_free(&e);
673 BN_CTX_free(ctx);
674 return(1);
675 }
676
677int test_sqr(BIO *bp, BN_CTX *ctx)
678 {
679 BIGNUM a,c,d,e;
680 int i;
681
682 BN_init(&a);
683 BN_init(&c);
684 BN_init(&d);
685 BN_init(&e);
686
687 for (i=0; i<num0; i++)
688 {
689 BN_bntest_rand(&a,40+i*10,0,0);
690 a.neg=rand_neg();
691 BN_sqr(&c,&a,ctx);
692 if (bp != NULL)
693 {
694 if (!results)
695 {
696 BN_print(bp,&a);
697 BIO_puts(bp," * ");
698 BN_print(bp,&a);
699 BIO_puts(bp," - ");
700 }
701 BN_print(bp,&c);
702 BIO_puts(bp,"\n");
703 }
704 BN_div(&d,&e,&c,&a,ctx);
705 BN_sub(&d,&d,&a);
706 if(!BN_is_zero(&d) || !BN_is_zero(&e))
707 {
708 fprintf(stderr,"Square test failed!\n");
709 return 0;
710 }
711 }
712 BN_free(&a);
713 BN_free(&c);
714 BN_free(&d);
715 BN_free(&e);
716 return(1);
717 }
718
719int test_mont(BIO *bp, BN_CTX *ctx)
720 {
721 BIGNUM a,b,c,d,A,B;
722 BIGNUM n;
723 int i;
724 BN_MONT_CTX *mont;
725
726 BN_init(&a);
727 BN_init(&b);
728 BN_init(&c);
729 BN_init(&d);
730 BN_init(&A);
731 BN_init(&B);
732 BN_init(&n);
733
734 mont=BN_MONT_CTX_new();
735 if (mont == NULL)
736 return 0;
737
738 BN_bntest_rand(&a,100,0,0); /**/
739 BN_bntest_rand(&b,100,0,0); /**/
740 for (i=0; i<num2; i++)
741 {
742 int bits = (200*(i+1))/num2;
743
744 if (bits == 0)
745 continue;
746 BN_bntest_rand(&n,bits,0,1);
747 BN_MONT_CTX_set(mont,&n,ctx);
748
749 BN_nnmod(&a,&a,&n,ctx);
750 BN_nnmod(&b,&b,&n,ctx);
751
752 BN_to_montgomery(&A,&a,mont,ctx);
753 BN_to_montgomery(&B,&b,mont,ctx);
754
755 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
756 BN_from_montgomery(&A,&c,mont,ctx);/**/
757 if (bp != NULL)
758 {
759 if (!results)
760 {
761#ifdef undef
762fprintf(stderr,"%d * %d %% %d\n",
763BN_num_bits(&a),
764BN_num_bits(&b),
765BN_num_bits(mont->N));
766#endif
767 BN_print(bp,&a);
768 BIO_puts(bp," * ");
769 BN_print(bp,&b);
770 BIO_puts(bp," % ");
771 BN_print(bp,&(mont->N));
772 BIO_puts(bp," - ");
773 }
774 BN_print(bp,&A);
775 BIO_puts(bp,"\n");
776 }
777 BN_mod_mul(&d,&a,&b,&n,ctx);
778 BN_sub(&d,&d,&A);
779 if(!BN_is_zero(&d))
780 {
781 fprintf(stderr,"Montgomery multiplication test failed!\n");
782 return 0;
783 }
784 }
785 BN_MONT_CTX_free(mont);
786 BN_free(&a);
787 BN_free(&b);
788 BN_free(&c);
789 BN_free(&d);
790 BN_free(&A);
791 BN_free(&B);
792 BN_free(&n);
793 return(1);
794 }
795
796int test_mod(BIO *bp, BN_CTX *ctx)
797 {
798 BIGNUM *a,*b,*c,*d,*e;
799 int i;
800
801 a=BN_new();
802 b=BN_new();
803 c=BN_new();
804 d=BN_new();
805 e=BN_new();
806
807 BN_bntest_rand(a,1024,0,0); /**/
808 for (i=0; i<num0; i++)
809 {
810 BN_bntest_rand(b,450+i*10,0,0); /**/
811 a->neg=rand_neg();
812 b->neg=rand_neg();
813 BN_mod(c,a,b,ctx);/**/
814 if (bp != NULL)
815 {
816 if (!results)
817 {
818 BN_print(bp,a);
819 BIO_puts(bp," % ");
820 BN_print(bp,b);
821 BIO_puts(bp," - ");
822 }
823 BN_print(bp,c);
824 BIO_puts(bp,"\n");
825 }
826 BN_div(d,e,a,b,ctx);
827 BN_sub(e,e,c);
828 if(!BN_is_zero(e))
829 {
830 fprintf(stderr,"Modulo test failed!\n");
831 return 0;
832 }
833 }
834 BN_free(a);
835 BN_free(b);
836 BN_free(c);
837 BN_free(d);
838 BN_free(e);
839 return(1);
840 }
841
842int test_mod_mul(BIO *bp, BN_CTX *ctx)
843 {
844 BIGNUM *a,*b,*c,*d,*e;
845 int i,j;
846
847 a=BN_new();
848 b=BN_new();
849 c=BN_new();
850 d=BN_new();
851 e=BN_new();
852
853 for (j=0; j<3; j++) {
854 BN_bntest_rand(c,1024,0,0); /**/
855 for (i=0; i<num0; i++)
856 {
857 BN_bntest_rand(a,475+i*10,0,0); /**/
858 BN_bntest_rand(b,425+i*11,0,0); /**/
859 a->neg=rand_neg();
860 b->neg=rand_neg();
861 if (!BN_mod_mul(e,a,b,c,ctx))
862 {
863 unsigned long l;
864
865 while ((l=ERR_get_error()))
866 fprintf(stderr,"ERROR:%s\n",
867 ERR_error_string(l,NULL));
868 EXIT(1);
869 }
870 if (bp != NULL)
871 {
872 if (!results)
873 {
874 BN_print(bp,a);
875 BIO_puts(bp," * ");
876 BN_print(bp,b);
877 BIO_puts(bp," % ");
878 BN_print(bp,c);
879 if ((a->neg ^ b->neg) && !BN_is_zero(e))
880 {
881 /* If (a*b) % c is negative, c must be added
882 * in order to obtain the normalized remainder
883 * (new with OpenSSL 0.9.7, previous versions of
884 * BN_mod_mul could generate negative results)
885 */
886 BIO_puts(bp," + ");
887 BN_print(bp,c);
888 }
889 BIO_puts(bp," - ");
890 }
891 BN_print(bp,e);
892 BIO_puts(bp,"\n");
893 }
894 BN_mul(d,a,b,ctx);
895 BN_sub(d,d,e);
896 BN_div(a,b,d,c,ctx);
897 if(!BN_is_zero(b))
898 {
899 fprintf(stderr,"Modulo multiply test failed!\n");
900 ERR_print_errors_fp(stderr);
901 return 0;
902 }
903 }
904 }
905 BN_free(a);
906 BN_free(b);
907 BN_free(c);
908 BN_free(d);
909 BN_free(e);
910 return(1);
911 }
912
913int test_mod_exp(BIO *bp, BN_CTX *ctx)
914 {
915 BIGNUM *a,*b,*c,*d,*e;
916 int i;
917
918 a=BN_new();
919 b=BN_new();
920 c=BN_new();
921 d=BN_new();
922 e=BN_new();
923
924 BN_bntest_rand(c,30,0,1); /* must be odd for montgomery */
925 for (i=0; i<num2; i++)
926 {
927 BN_bntest_rand(a,20+i*5,0,0); /**/
928 BN_bntest_rand(b,2+i,0,0); /**/
929
930 if (!BN_mod_exp(d,a,b,c,ctx))
931 return(0);
932
933 if (bp != NULL)
934 {
935 if (!results)
936 {
937 BN_print(bp,a);
938 BIO_puts(bp," ^ ");
939 BN_print(bp,b);
940 BIO_puts(bp," % ");
941 BN_print(bp,c);
942 BIO_puts(bp," - ");
943 }
944 BN_print(bp,d);
945 BIO_puts(bp,"\n");
946 }
947 BN_exp(e,a,b,ctx);
948 BN_sub(e,e,d);
949 BN_div(a,b,e,c,ctx);
950 if(!BN_is_zero(b))
951 {
952 fprintf(stderr,"Modulo exponentiation test failed!\n");
953 return 0;
954 }
955 }
956 BN_free(a);
957 BN_free(b);
958 BN_free(c);
959 BN_free(d);
960 BN_free(e);
961 return(1);
962 }
963
964int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx)
965 {
966 BIGNUM *a,*b,*c,*d,*e;
967 int i;
968
969 a=BN_new();
970 b=BN_new();
971 c=BN_new();
972 d=BN_new();
973 e=BN_new();
974
975 BN_bntest_rand(c,30,0,1); /* must be odd for montgomery */
976 for (i=0; i<num2; i++)
977 {
978 BN_bntest_rand(a,20+i*5,0,0); /**/
979 BN_bntest_rand(b,2+i,0,0); /**/
980
981 if (!BN_mod_exp_mont_consttime(d,a,b,c,ctx,NULL))
982 return(00);
983
984 if (bp != NULL)
985 {
986 if (!results)
987 {
988 BN_print(bp,a);
989 BIO_puts(bp," ^ ");
990 BN_print(bp,b);
991 BIO_puts(bp," % ");
992 BN_print(bp,c);
993 BIO_puts(bp," - ");
994 }
995 BN_print(bp,d);
996 BIO_puts(bp,"\n");
997 }
998 BN_exp(e,a,b,ctx);
999 BN_sub(e,e,d);
1000 BN_div(a,b,e,c,ctx);
1001 if(!BN_is_zero(b))
1002 {
1003 fprintf(stderr,"Modulo exponentiation test failed!\n");
1004 return 0;
1005 }
1006 }
1007 BN_free(a);
1008 BN_free(b);
1009 BN_free(c);
1010 BN_free(d);
1011 BN_free(e);
1012 return(1);
1013 }
1014
1015int test_exp(BIO *bp, BN_CTX *ctx)
1016 {
1017 BIGNUM *a,*b,*d,*e,*one;
1018 int i;
1019
1020 a=BN_new();
1021 b=BN_new();
1022 d=BN_new();
1023 e=BN_new();
1024 one=BN_new();
1025 BN_one(one);
1026
1027 for (i=0; i<num2; i++)
1028 {
1029 BN_bntest_rand(a,20+i*5,0,0); /**/
1030 BN_bntest_rand(b,2+i,0,0); /**/
1031
1032 if (BN_exp(d,a,b,ctx) <= 0)
1033 return(0);
1034
1035 if (bp != NULL)
1036 {
1037 if (!results)
1038 {
1039 BN_print(bp,a);
1040 BIO_puts(bp," ^ ");
1041 BN_print(bp,b);
1042 BIO_puts(bp," - ");
1043 }
1044 BN_print(bp,d);
1045 BIO_puts(bp,"\n");
1046 }
1047 BN_one(e);
1048 for( ; !BN_is_zero(b) ; BN_sub(b,b,one))
1049 BN_mul(e,e,a,ctx);
1050 BN_sub(e,e,d);
1051 if(!BN_is_zero(e))
1052 {
1053 fprintf(stderr,"Exponentiation test failed!\n");
1054 return 0;
1055 }
1056 }
1057 BN_free(a);
1058 BN_free(b);
1059 BN_free(d);
1060 BN_free(e);
1061 BN_free(one);
1062 return(1);
1063 }
1064
1065int test_gf2m_add(BIO *bp)
1066 {
1067 BIGNUM a,b,c;
1068 int i, ret = 0;
1069
1070 BN_init(&a);
1071 BN_init(&b);
1072 BN_init(&c);
1073
1074 for (i=0; i<num0; i++)
1075 {
1076 BN_rand(&a,512,0,0);
1077 BN_copy(&b, BN_value_one());
1078 a.neg=rand_neg();
1079 b.neg=rand_neg();
1080 BN_GF2m_add(&c,&a,&b);
1081#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1082 if (bp != NULL)
1083 {
1084 if (!results)
1085 {
1086 BN_print(bp,&a);
1087 BIO_puts(bp," ^ ");
1088 BN_print(bp,&b);
1089 BIO_puts(bp," = ");
1090 }
1091 BN_print(bp,&c);
1092 BIO_puts(bp,"\n");
1093 }
1094#endif
1095 /* Test that two added values have the correct parity. */
1096 if((BN_is_odd(&a) && BN_is_odd(&c)) || (!BN_is_odd(&a) && !BN_is_odd(&c)))
1097 {
1098 fprintf(stderr,"GF(2^m) addition test (a) failed!\n");
1099 goto err;
1100 }
1101 BN_GF2m_add(&c,&c,&c);
1102 /* Test that c + c = 0. */
1103 if(!BN_is_zero(&c))
1104 {
1105 fprintf(stderr,"GF(2^m) addition test (b) failed!\n");
1106 goto err;
1107 }
1108 }
1109 ret = 1;
1110 err:
1111 BN_free(&a);
1112 BN_free(&b);
1113 BN_free(&c);
1114 return ret;
1115 }
1116
1117int test_gf2m_mod(BIO *bp)
1118 {
1119 BIGNUM *a,*b[2],*c,*d,*e;
1120 int i, j, ret = 0;
1121 int p0[] = {163,7,6,3,0,-1};
1122 int p1[] = {193,15,0,-1};
1123
1124 a=BN_new();
1125 b[0]=BN_new();
1126 b[1]=BN_new();
1127 c=BN_new();
1128 d=BN_new();
1129 e=BN_new();
1130
1131 BN_GF2m_arr2poly(p0, b[0]);
1132 BN_GF2m_arr2poly(p1, b[1]);
1133
1134 for (i=0; i<num0; i++)
1135 {
1136 BN_bntest_rand(a, 1024, 0, 0);
1137 for (j=0; j < 2; j++)
1138 {
1139 BN_GF2m_mod(c, a, b[j]);
1140#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1141 if (bp != NULL)
1142 {
1143 if (!results)
1144 {
1145 BN_print(bp,a);
1146 BIO_puts(bp," % ");
1147 BN_print(bp,b[j]);
1148 BIO_puts(bp," - ");
1149 BN_print(bp,c);
1150 BIO_puts(bp,"\n");
1151 }
1152 }
1153#endif
1154 BN_GF2m_add(d, a, c);
1155 BN_GF2m_mod(e, d, b[j]);
1156 /* Test that a + (a mod p) mod p == 0. */
1157 if(!BN_is_zero(e))
1158 {
1159 fprintf(stderr,"GF(2^m) modulo test failed!\n");
1160 goto err;
1161 }
1162 }
1163 }
1164 ret = 1;
1165 err:
1166 BN_free(a);
1167 BN_free(b[0]);
1168 BN_free(b[1]);
1169 BN_free(c);
1170 BN_free(d);
1171 BN_free(e);
1172 return ret;
1173 }
1174
1175int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx)
1176 {
1177 BIGNUM *a,*b[2],*c,*d,*e,*f,*g,*h;
1178 int i, j, ret = 0;
1179 int p0[] = {163,7,6,3,0,-1};
1180 int p1[] = {193,15,0,-1};
1181
1182 a=BN_new();
1183 b[0]=BN_new();
1184 b[1]=BN_new();
1185 c=BN_new();
1186 d=BN_new();
1187 e=BN_new();
1188 f=BN_new();
1189 g=BN_new();
1190 h=BN_new();
1191
1192 BN_GF2m_arr2poly(p0, b[0]);
1193 BN_GF2m_arr2poly(p1, b[1]);
1194
1195 for (i=0; i<num0; i++)
1196 {
1197 BN_bntest_rand(a, 1024, 0, 0);
1198 BN_bntest_rand(c, 1024, 0, 0);
1199 BN_bntest_rand(d, 1024, 0, 0);
1200 for (j=0; j < 2; j++)
1201 {
1202 BN_GF2m_mod_mul(e, a, c, b[j], ctx);
1203#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1204 if (bp != NULL)
1205 {
1206 if (!results)
1207 {
1208 BN_print(bp,a);
1209 BIO_puts(bp," * ");
1210 BN_print(bp,c);
1211 BIO_puts(bp," % ");
1212 BN_print(bp,b[j]);
1213 BIO_puts(bp," - ");
1214 BN_print(bp,e);
1215 BIO_puts(bp,"\n");
1216 }
1217 }
1218#endif
1219 BN_GF2m_add(f, a, d);
1220 BN_GF2m_mod_mul(g, f, c, b[j], ctx);
1221 BN_GF2m_mod_mul(h, d, c, b[j], ctx);
1222 BN_GF2m_add(f, e, g);
1223 BN_GF2m_add(f, f, h);
1224 /* Test that (a+d)*c = a*c + d*c. */
1225 if(!BN_is_zero(f))
1226 {
1227 fprintf(stderr,"GF(2^m) modular multiplication test failed!\n");
1228 goto err;
1229 }
1230 }
1231 }
1232 ret = 1;
1233 err:
1234 BN_free(a);
1235 BN_free(b[0]);
1236 BN_free(b[1]);
1237 BN_free(c);
1238 BN_free(d);
1239 BN_free(e);
1240 BN_free(f);
1241 BN_free(g);
1242 BN_free(h);
1243 return ret;
1244 }
1245
1246int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx)
1247 {
1248 BIGNUM *a,*b[2],*c,*d;
1249 int i, j, ret = 0;
1250 int p0[] = {163,7,6,3,0,-1};
1251 int p1[] = {193,15,0,-1};
1252
1253 a=BN_new();
1254 b[0]=BN_new();
1255 b[1]=BN_new();
1256 c=BN_new();
1257 d=BN_new();
1258
1259 BN_GF2m_arr2poly(p0, b[0]);
1260 BN_GF2m_arr2poly(p1, b[1]);
1261
1262 for (i=0; i<num0; i++)
1263 {
1264 BN_bntest_rand(a, 1024, 0, 0);
1265 for (j=0; j < 2; j++)
1266 {
1267 BN_GF2m_mod_sqr(c, a, b[j], ctx);
1268 BN_copy(d, a);
1269 BN_GF2m_mod_mul(d, a, d, b[j], ctx);
1270#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1271 if (bp != NULL)
1272 {
1273 if (!results)
1274 {
1275 BN_print(bp,a);
1276 BIO_puts(bp," ^ 2 % ");
1277 BN_print(bp,b[j]);
1278 BIO_puts(bp, " = ");
1279 BN_print(bp,c);
1280 BIO_puts(bp,"; a * a = ");
1281 BN_print(bp,d);
1282 BIO_puts(bp,"\n");
1283 }
1284 }
1285#endif
1286 BN_GF2m_add(d, c, d);
1287 /* Test that a*a = a^2. */
1288 if(!BN_is_zero(d))
1289 {
1290 fprintf(stderr,"GF(2^m) modular squaring test failed!\n");
1291 goto err;
1292 }
1293 }
1294 }
1295 ret = 1;
1296 err:
1297 BN_free(a);
1298 BN_free(b[0]);
1299 BN_free(b[1]);
1300 BN_free(c);
1301 BN_free(d);
1302 return ret;
1303 }
1304
1305int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx)
1306 {
1307 BIGNUM *a,*b[2],*c,*d;
1308 int i, j, ret = 0;
1309 int p0[] = {163,7,6,3,0,-1};
1310 int p1[] = {193,15,0,-1};
1311
1312 a=BN_new();
1313 b[0]=BN_new();
1314 b[1]=BN_new();
1315 c=BN_new();
1316 d=BN_new();
1317
1318 BN_GF2m_arr2poly(p0, b[0]);
1319 BN_GF2m_arr2poly(p1, b[1]);
1320
1321 for (i=0; i<num0; i++)
1322 {
1323 BN_bntest_rand(a, 512, 0, 0);
1324 for (j=0; j < 2; j++)
1325 {
1326 BN_GF2m_mod_inv(c, a, b[j], ctx);
1327 BN_GF2m_mod_mul(d, a, c, b[j], ctx);
1328#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1329 if (bp != NULL)
1330 {
1331 if (!results)
1332 {
1333 BN_print(bp,a);
1334 BIO_puts(bp, " * ");
1335 BN_print(bp,c);
1336 BIO_puts(bp," - 1 % ");
1337 BN_print(bp,b[j]);
1338 BIO_puts(bp,"\n");
1339 }
1340 }
1341#endif
1342 /* Test that ((1/a)*a) = 1. */
1343 if(!BN_is_one(d))
1344 {
1345 fprintf(stderr,"GF(2^m) modular inversion test failed!\n");
1346 goto err;
1347 }
1348 }
1349 }
1350 ret = 1;
1351 err:
1352 BN_free(a);
1353 BN_free(b[0]);
1354 BN_free(b[1]);
1355 BN_free(c);
1356 BN_free(d);
1357 return ret;
1358 }
1359
1360int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx)
1361 {
1362 BIGNUM *a,*b[2],*c,*d,*e,*f;
1363 int i, j, ret = 0;
1364 int p0[] = {163,7,6,3,0,-1};
1365 int p1[] = {193,15,0,-1};
1366
1367 a=BN_new();
1368 b[0]=BN_new();
1369 b[1]=BN_new();
1370 c=BN_new();
1371 d=BN_new();
1372 e=BN_new();
1373 f=BN_new();
1374
1375 BN_GF2m_arr2poly(p0, b[0]);
1376 BN_GF2m_arr2poly(p1, b[1]);
1377
1378 for (i=0; i<num0; i++)
1379 {
1380 BN_bntest_rand(a, 512, 0, 0);
1381 BN_bntest_rand(c, 512, 0, 0);
1382 for (j=0; j < 2; j++)
1383 {
1384 BN_GF2m_mod_div(d, a, c, b[j], ctx);
1385 BN_GF2m_mod_mul(e, d, c, b[j], ctx);
1386 BN_GF2m_mod_div(f, a, e, b[j], ctx);
1387#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1388 if (bp != NULL)
1389 {
1390 if (!results)
1391 {
1392 BN_print(bp,a);
1393 BIO_puts(bp, " = ");
1394 BN_print(bp,c);
1395 BIO_puts(bp," * ");
1396 BN_print(bp,d);
1397 BIO_puts(bp, " % ");
1398 BN_print(bp,b[j]);
1399 BIO_puts(bp,"\n");
1400 }
1401 }
1402#endif
1403 /* Test that ((a/c)*c)/a = 1. */
1404 if(!BN_is_one(f))
1405 {
1406 fprintf(stderr,"GF(2^m) modular division test failed!\n");
1407 goto err;
1408 }
1409 }
1410 }
1411 ret = 1;
1412 err:
1413 BN_free(a);
1414 BN_free(b[0]);
1415 BN_free(b[1]);
1416 BN_free(c);
1417 BN_free(d);
1418 BN_free(e);
1419 BN_free(f);
1420 return ret;
1421 }
1422
1423int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx)
1424 {
1425 BIGNUM *a,*b[2],*c,*d,*e,*f;
1426 int i, j, ret = 0;
1427 int p0[] = {163,7,6,3,0,-1};
1428 int p1[] = {193,15,0,-1};
1429
1430 a=BN_new();
1431 b[0]=BN_new();
1432 b[1]=BN_new();
1433 c=BN_new();
1434 d=BN_new();
1435 e=BN_new();
1436 f=BN_new();
1437
1438 BN_GF2m_arr2poly(p0, b[0]);
1439 BN_GF2m_arr2poly(p1, b[1]);
1440
1441 for (i=0; i<num0; i++)
1442 {
1443 BN_bntest_rand(a, 512, 0, 0);
1444 BN_bntest_rand(c, 512, 0, 0);
1445 BN_bntest_rand(d, 512, 0, 0);
1446 for (j=0; j < 2; j++)
1447 {
1448 BN_GF2m_mod_exp(e, a, c, b[j], ctx);
1449 BN_GF2m_mod_exp(f, a, d, b[j], ctx);
1450 BN_GF2m_mod_mul(e, e, f, b[j], ctx);
1451 BN_add(f, c, d);
1452 BN_GF2m_mod_exp(f, a, f, b[j], ctx);
1453#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1454 if (bp != NULL)
1455 {
1456 if (!results)
1457 {
1458 BN_print(bp,a);
1459 BIO_puts(bp, " ^ (");
1460 BN_print(bp,c);
1461 BIO_puts(bp," + ");
1462 BN_print(bp,d);
1463 BIO_puts(bp, ") = ");
1464 BN_print(bp,e);
1465 BIO_puts(bp, "; - ");
1466 BN_print(bp,f);
1467 BIO_puts(bp, " % ");
1468 BN_print(bp,b[j]);
1469 BIO_puts(bp,"\n");
1470 }
1471 }
1472#endif
1473 BN_GF2m_add(f, e, f);
1474 /* Test that a^(c+d)=a^c*a^d. */
1475 if(!BN_is_zero(f))
1476 {
1477 fprintf(stderr,"GF(2^m) modular exponentiation test failed!\n");
1478 goto err;
1479 }
1480 }
1481 }
1482 ret = 1;
1483 err:
1484 BN_free(a);
1485 BN_free(b[0]);
1486 BN_free(b[1]);
1487 BN_free(c);
1488 BN_free(d);
1489 BN_free(e);
1490 BN_free(f);
1491 return ret;
1492 }
1493
1494int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx)
1495 {
1496 BIGNUM *a,*b[2],*c,*d,*e,*f;
1497 int i, j, ret = 0;
1498 int p0[] = {163,7,6,3,0,-1};
1499 int p1[] = {193,15,0,-1};
1500
1501 a=BN_new();
1502 b[0]=BN_new();
1503 b[1]=BN_new();
1504 c=BN_new();
1505 d=BN_new();
1506 e=BN_new();
1507 f=BN_new();
1508
1509 BN_GF2m_arr2poly(p0, b[0]);
1510 BN_GF2m_arr2poly(p1, b[1]);
1511
1512 for (i=0; i<num0; i++)
1513 {
1514 BN_bntest_rand(a, 512, 0, 0);
1515 for (j=0; j < 2; j++)
1516 {
1517 BN_GF2m_mod(c, a, b[j]);
1518 BN_GF2m_mod_sqrt(d, a, b[j], ctx);
1519 BN_GF2m_mod_sqr(e, d, b[j], ctx);
1520#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1521 if (bp != NULL)
1522 {
1523 if (!results)
1524 {
1525 BN_print(bp,d);
1526 BIO_puts(bp, " ^ 2 - ");
1527 BN_print(bp,a);
1528 BIO_puts(bp,"\n");
1529 }
1530 }
1531#endif
1532 BN_GF2m_add(f, c, e);
1533 /* Test that d^2 = a, where d = sqrt(a). */
1534 if(!BN_is_zero(f))
1535 {
1536 fprintf(stderr,"GF(2^m) modular square root test failed!\n");
1537 goto err;
1538 }
1539 }
1540 }
1541 ret = 1;
1542 err:
1543 BN_free(a);
1544 BN_free(b[0]);
1545 BN_free(b[1]);
1546 BN_free(c);
1547 BN_free(d);
1548 BN_free(e);
1549 BN_free(f);
1550 return ret;
1551 }
1552
1553int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx)
1554 {
1555 BIGNUM *a,*b[2],*c,*d,*e;
1556 int i, j, s = 0, t, ret = 0;
1557 int p0[] = {163,7,6,3,0,-1};
1558 int p1[] = {193,15,0,-1};
1559
1560 a=BN_new();
1561 b[0]=BN_new();
1562 b[1]=BN_new();
1563 c=BN_new();
1564 d=BN_new();
1565 e=BN_new();
1566
1567 BN_GF2m_arr2poly(p0, b[0]);
1568 BN_GF2m_arr2poly(p1, b[1]);
1569
1570 for (i=0; i<num0; i++)
1571 {
1572 BN_bntest_rand(a, 512, 0, 0);
1573 for (j=0; j < 2; j++)
1574 {
1575 t = BN_GF2m_mod_solve_quad(c, a, b[j], ctx);
1576 if (t)
1577 {
1578 s++;
1579 BN_GF2m_mod_sqr(d, c, b[j], ctx);
1580 BN_GF2m_add(d, c, d);
1581 BN_GF2m_mod(e, a, b[j]);
1582#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1583 if (bp != NULL)
1584 {
1585 if (!results)
1586 {
1587 BN_print(bp,c);
1588 BIO_puts(bp, " is root of z^2 + z = ");
1589 BN_print(bp,a);
1590 BIO_puts(bp, " % ");
1591 BN_print(bp,b[j]);
1592 BIO_puts(bp, "\n");
1593 }
1594 }
1595#endif
1596 BN_GF2m_add(e, e, d);
1597 /* Test that solution of quadratic c satisfies c^2 + c = a. */
1598 if(!BN_is_zero(e))
1599 {
1600 fprintf(stderr,"GF(2^m) modular solve quadratic test failed!\n");
1601 goto err;
1602 }
1603
1604 }
1605 else
1606 {
1607#if 0 /* make test uses ouput in bc but bc can't handle GF(2^m) arithmetic */
1608 if (bp != NULL)
1609 {
1610 if (!results)
1611 {
1612 BIO_puts(bp, "There are no roots of z^2 + z = ");
1613 BN_print(bp,a);
1614 BIO_puts(bp, " % ");
1615 BN_print(bp,b[j]);
1616 BIO_puts(bp, "\n");
1617 }
1618 }
1619#endif
1620 }
1621 }
1622 }
1623 if (s == 0)
1624 {
1625 fprintf(stderr,"All %i tests of GF(2^m) modular solve quadratic resulted in no roots;\n", num0);
1626 fprintf(stderr,"this is very unlikely and probably indicates an error.\n");
1627 goto err;
1628 }
1629 ret = 1;
1630 err:
1631 BN_free(a);
1632 BN_free(b[0]);
1633 BN_free(b[1]);
1634 BN_free(c);
1635 BN_free(d);
1636 BN_free(e);
1637 return ret;
1638 }
1639
1640static int genprime_cb(int p, int n, BN_GENCB *arg)
1641 {
1642 char c='*';
1643
1644 if (p == 0) c='.';
1645 if (p == 1) c='+';
1646 if (p == 2) c='*';
1647 if (p == 3) c='\n';
1648 putc(c, stderr);
1649 fflush(stderr);
1650 return 1;
1651 }
1652
1653int test_kron(BIO *bp, BN_CTX *ctx)
1654 {
1655 BN_GENCB cb;
1656 BIGNUM *a,*b,*r,*t;
1657 int i;
1658 int legendre, kronecker;
1659 int ret = 0;
1660
1661 a = BN_new();
1662 b = BN_new();
1663 r = BN_new();
1664 t = BN_new();
1665 if (a == NULL || b == NULL || r == NULL || t == NULL) goto err;
1666
1667 BN_GENCB_set(&cb, genprime_cb, NULL);
1668
1669 /* We test BN_kronecker(a, b, ctx) just for b odd (Jacobi symbol).
1670 * In this case we know that if b is prime, then BN_kronecker(a, b, ctx)
1671 * is congruent to $a^{(b-1)/2}$, modulo $b$ (Legendre symbol).
1672 * So we generate a random prime b and compare these values
1673 * for a number of random a's. (That is, we run the Solovay-Strassen
1674 * primality test to confirm that b is prime, except that we
1675 * don't want to test whether b is prime but whether BN_kronecker
1676 * works.) */
1677
1678 if (!BN_generate_prime_ex(b, 512, 0, NULL, NULL, &cb)) goto err;
1679 b->neg = rand_neg();
1680 putc('\n', stderr);
1681
1682 for (i = 0; i < num0; i++)
1683 {
1684 if (!BN_bntest_rand(a, 512, 0, 0)) goto err;
1685 a->neg = rand_neg();
1686
1687 /* t := (|b|-1)/2 (note that b is odd) */
1688 if (!BN_copy(t, b)) goto err;
1689 t->neg = 0;
1690 if (!BN_sub_word(t, 1)) goto err;
1691 if (!BN_rshift1(t, t)) goto err;
1692 /* r := a^t mod b */
1693 b->neg=0;
1694
1695 if (!BN_mod_exp_recp(r, a, t, b, ctx)) goto err;
1696 b->neg=1;
1697
1698 if (BN_is_word(r, 1))
1699 legendre = 1;
1700 else if (BN_is_zero(r))
1701 legendre = 0;
1702 else
1703 {
1704 if (!BN_add_word(r, 1)) goto err;
1705 if (0 != BN_ucmp(r, b))
1706 {
1707 fprintf(stderr, "Legendre symbol computation failed\n");
1708 goto err;
1709 }
1710 legendre = -1;
1711 }
1712
1713 kronecker = BN_kronecker(a, b, ctx);
1714 if (kronecker < -1) goto err;
1715 /* we actually need BN_kronecker(a, |b|) */
1716 if (a->neg && b->neg)
1717 kronecker = -kronecker;
1718
1719 if (legendre != kronecker)
1720 {
1721 fprintf(stderr, "legendre != kronecker; a = ");
1722 BN_print_fp(stderr, a);
1723 fprintf(stderr, ", b = ");
1724 BN_print_fp(stderr, b);
1725 fprintf(stderr, "\n");
1726 goto err;
1727 }
1728
1729 putc('.', stderr);
1730 fflush(stderr);
1731 }
1732
1733 putc('\n', stderr);
1734 fflush(stderr);
1735 ret = 1;
1736 err:
1737 if (a != NULL) BN_free(a);
1738 if (b != NULL) BN_free(b);
1739 if (r != NULL) BN_free(r);
1740 if (t != NULL) BN_free(t);
1741 return ret;
1742 }
1743
1744int test_sqrt(BIO *bp, BN_CTX *ctx)
1745 {
1746 BN_GENCB cb;
1747 BIGNUM *a,*p,*r;
1748 int i, j;
1749 int ret = 0;
1750
1751 a = BN_new();
1752 p = BN_new();
1753 r = BN_new();
1754 if (a == NULL || p == NULL || r == NULL) goto err;
1755
1756 BN_GENCB_set(&cb, genprime_cb, NULL);
1757
1758 for (i = 0; i < 16; i++)
1759 {
1760 if (i < 8)
1761 {
1762 unsigned primes[8] = { 2, 3, 5, 7, 11, 13, 17, 19 };
1763
1764 if (!BN_set_word(p, primes[i])) goto err;
1765 }
1766 else
1767 {
1768 if (!BN_set_word(a, 32)) goto err;
1769 if (!BN_set_word(r, 2*i + 1)) goto err;
1770
1771 if (!BN_generate_prime_ex(p, 256, 0, a, r, &cb)) goto err;
1772 putc('\n', stderr);
1773 }
1774 p->neg = rand_neg();
1775
1776 for (j = 0; j < num2; j++)
1777 {
1778 /* construct 'a' such that it is a square modulo p,
1779 * but in general not a proper square and not reduced modulo p */
1780 if (!BN_bntest_rand(r, 256, 0, 3)) goto err;
1781 if (!BN_nnmod(r, r, p, ctx)) goto err;
1782 if (!BN_mod_sqr(r, r, p, ctx)) goto err;
1783 if (!BN_bntest_rand(a, 256, 0, 3)) goto err;
1784 if (!BN_nnmod(a, a, p, ctx)) goto err;
1785 if (!BN_mod_sqr(a, a, p, ctx)) goto err;
1786 if (!BN_mul(a, a, r, ctx)) goto err;
1787 if (rand_neg())
1788 if (!BN_sub(a, a, p)) goto err;
1789
1790 if (!BN_mod_sqrt(r, a, p, ctx)) goto err;
1791 if (!BN_mod_sqr(r, r, p, ctx)) goto err;
1792
1793 if (!BN_nnmod(a, a, p, ctx)) goto err;
1794
1795 if (BN_cmp(a, r) != 0)
1796 {
1797 fprintf(stderr, "BN_mod_sqrt failed: a = ");
1798 BN_print_fp(stderr, a);
1799 fprintf(stderr, ", r = ");
1800 BN_print_fp(stderr, r);
1801 fprintf(stderr, ", p = ");
1802 BN_print_fp(stderr, p);
1803 fprintf(stderr, "\n");
1804 goto err;
1805 }
1806
1807 putc('.', stderr);
1808 fflush(stderr);
1809 }
1810
1811 putc('\n', stderr);
1812 fflush(stderr);
1813 }
1814 ret = 1;
1815 err:
1816 if (a != NULL) BN_free(a);
1817 if (p != NULL) BN_free(p);
1818 if (r != NULL) BN_free(r);
1819 return ret;
1820 }
1821
1822int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_)
1823 {
1824 BIGNUM *a,*b,*c,*d;
1825 int i;
1826
1827 b=BN_new();
1828 c=BN_new();
1829 d=BN_new();
1830 BN_one(c);
1831
1832 if(a_)
1833 a=a_;
1834 else
1835 {
1836 a=BN_new();
1837 BN_bntest_rand(a,200,0,0); /**/
1838 a->neg=rand_neg();
1839 }
1840 for (i=0; i<num0; i++)
1841 {
1842 BN_lshift(b,a,i+1);
1843 BN_add(c,c,c);
1844 if (bp != NULL)
1845 {
1846 if (!results)
1847 {
1848 BN_print(bp,a);
1849 BIO_puts(bp," * ");
1850 BN_print(bp,c);
1851 BIO_puts(bp," - ");
1852 }
1853 BN_print(bp,b);
1854 BIO_puts(bp,"\n");
1855 }
1856 BN_mul(d,a,c,ctx);
1857 BN_sub(d,d,b);
1858 if(!BN_is_zero(d))
1859 {
1860 fprintf(stderr,"Left shift test failed!\n");
1861 fprintf(stderr,"a=");
1862 BN_print_fp(stderr,a);
1863 fprintf(stderr,"\nb=");
1864 BN_print_fp(stderr,b);
1865 fprintf(stderr,"\nc=");
1866 BN_print_fp(stderr,c);
1867 fprintf(stderr,"\nd=");
1868 BN_print_fp(stderr,d);
1869 fprintf(stderr,"\n");
1870 return 0;
1871 }
1872 }
1873 BN_free(a);
1874 BN_free(b);
1875 BN_free(c);
1876 BN_free(d);
1877 return(1);
1878 }
1879
1880int test_lshift1(BIO *bp)
1881 {
1882 BIGNUM *a,*b,*c;
1883 int i;
1884
1885 a=BN_new();
1886 b=BN_new();
1887 c=BN_new();
1888
1889 BN_bntest_rand(a,200,0,0); /**/
1890 a->neg=rand_neg();
1891 for (i=0; i<num0; i++)
1892 {
1893 BN_lshift1(b,a);
1894 if (bp != NULL)
1895 {
1896 if (!results)
1897 {
1898 BN_print(bp,a);
1899 BIO_puts(bp," * 2");
1900 BIO_puts(bp," - ");
1901 }
1902 BN_print(bp,b);
1903 BIO_puts(bp,"\n");
1904 }
1905 BN_add(c,a,a);
1906 BN_sub(a,b,c);
1907 if(!BN_is_zero(a))
1908 {
1909 fprintf(stderr,"Left shift one test failed!\n");
1910 return 0;
1911 }
1912
1913 BN_copy(a,b);
1914 }
1915 BN_free(a);
1916 BN_free(b);
1917 BN_free(c);
1918 return(1);
1919 }
1920
1921int test_rshift(BIO *bp,BN_CTX *ctx)
1922 {
1923 BIGNUM *a,*b,*c,*d,*e;
1924 int i;
1925
1926 a=BN_new();
1927 b=BN_new();
1928 c=BN_new();
1929 d=BN_new();
1930 e=BN_new();
1931 BN_one(c);
1932
1933 BN_bntest_rand(a,200,0,0); /**/
1934 a->neg=rand_neg();
1935 for (i=0; i<num0; i++)
1936 {
1937 BN_rshift(b,a,i+1);
1938 BN_add(c,c,c);
1939 if (bp != NULL)
1940 {
1941 if (!results)
1942 {
1943 BN_print(bp,a);
1944 BIO_puts(bp," / ");
1945 BN_print(bp,c);
1946 BIO_puts(bp," - ");
1947 }
1948 BN_print(bp,b);
1949 BIO_puts(bp,"\n");
1950 }
1951 BN_div(d,e,a,c,ctx);
1952 BN_sub(d,d,b);
1953 if(!BN_is_zero(d))
1954 {
1955 fprintf(stderr,"Right shift test failed!\n");
1956 return 0;
1957 }
1958 }
1959 BN_free(a);
1960 BN_free(b);
1961 BN_free(c);
1962 BN_free(d);
1963 BN_free(e);
1964 return(1);
1965 }
1966
1967int test_rshift1(BIO *bp)
1968 {
1969 BIGNUM *a,*b,*c;
1970 int i;
1971
1972 a=BN_new();
1973 b=BN_new();
1974 c=BN_new();
1975
1976 BN_bntest_rand(a,200,0,0); /**/
1977 a->neg=rand_neg();
1978 for (i=0; i<num0; i++)
1979 {
1980 BN_rshift1(b,a);
1981 if (bp != NULL)
1982 {
1983 if (!results)
1984 {
1985 BN_print(bp,a);
1986 BIO_puts(bp," / 2");
1987 BIO_puts(bp," - ");
1988 }
1989 BN_print(bp,b);
1990 BIO_puts(bp,"\n");
1991 }
1992 BN_sub(c,a,b);
1993 BN_sub(c,c,b);
1994 if(!BN_is_zero(c) && !BN_abs_is_word(c, 1))
1995 {
1996 fprintf(stderr,"Right shift one test failed!\n");
1997 return 0;
1998 }
1999 BN_copy(a,b);
2000 }
2001 BN_free(a);
2002 BN_free(b);
2003 BN_free(c);
2004 return(1);
2005 }
2006
2007int rand_neg(void)
2008 {
2009 static unsigned int neg=0;
2010 static int sign[8]={0,0,0,1,1,0,1,1};
2011
2012 return(sign[(neg++)%8]);
2013 }
diff --git a/src/lib/libcrypto/bn/divtest.c b/src/lib/libcrypto/bn/divtest.c
deleted file mode 100644
index d3fc688f33..0000000000
--- a/src/lib/libcrypto/bn/divtest.c
+++ /dev/null
@@ -1,41 +0,0 @@
1#include <openssl/bn.h>
2#include <openssl/rand.h>
3
4static int Rand(n)
5{
6 unsigned char x[2];
7 RAND_pseudo_bytes(x,2);
8 return (x[0] + 2*x[1]);
9}
10
11static void bug(char *m, BIGNUM *a, BIGNUM *b)
12{
13 printf("%s!\na=",m);
14 BN_print_fp(stdout, a);
15 printf("\nb=");
16 BN_print_fp(stdout, b);
17 printf("\n");
18 fflush(stdout);
19}
20
21main()
22{
23 BIGNUM *a=BN_new(), *b=BN_new(), *c=BN_new(), *d=BN_new(),
24 *C=BN_new(), *D=BN_new();
25 BN_RECP_CTX *recp=BN_RECP_CTX_new();
26 BN_CTX *ctx=BN_CTX_new();
27
28 for(;;) {
29 BN_pseudo_rand(a,Rand(),0,0);
30 BN_pseudo_rand(b,Rand(),0,0);
31 if (BN_is_zero(b)) continue;
32
33 BN_RECP_CTX_set(recp,b,ctx);
34 if (BN_div(C,D,a,b,ctx) != 1)
35 bug("BN_div failed",a,b);
36 if (BN_div_recp(c,d,a,recp,ctx) != 1)
37 bug("BN_div_recp failed",a,b);
38 else if (BN_cmp(c,C) != 0 || BN_cmp(c,C) != 0)
39 bug("mismatch",a,b);
40 }
41}
diff --git a/src/lib/libcrypto/bn/exp.c b/src/lib/libcrypto/bn/exp.c
deleted file mode 100644
index 4865b0ef74..0000000000
--- a/src/lib/libcrypto/bn/exp.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/* unused */
2
3#include <stdio.h>
4#include <openssl/tmdiff.h>
5#include "bn_lcl.h"
6
7#define SIZE 256
8#define NUM (8*8*8)
9#define MOD (8*8*8*8*8)
10
11main(argc,argv)
12int argc;
13char *argv[];
14 {
15 BN_CTX ctx;
16 BIGNUM a,b,c,r,rr,t,l;
17 int j,i,size=SIZE,num=NUM,mod=MOD;
18 char *start,*end;
19 BN_MONT_CTX mont;
20 double d,md;
21
22 BN_MONT_CTX_init(&mont);
23 BN_CTX_init(&ctx);
24 BN_init(&a);
25 BN_init(&b);
26 BN_init(&c);
27 BN_init(&r);
28
29 start=ms_time_new();
30 end=ms_time_new();
31 while (size <= 1024*8)
32 {
33 BN_rand(&a,size,0,0);
34 BN_rand(&b,size,1,0);
35 BN_rand(&c,size,0,1);
36
37 BN_mod(&a,&a,&c,&ctx);
38
39 ms_time_get(start);
40 for (i=0; i<10; i++)
41 BN_MONT_CTX_set(&mont,&c,&ctx);
42 ms_time_get(end);
43 md=ms_time_diff(start,end);
44
45 ms_time_get(start);
46 for (i=0; i<num; i++)
47 {
48 /* bn_mull(&r,&a,&b,&ctx); */
49 /* BN_sqr(&r,&a,&ctx); */
50 BN_mod_exp_mont(&r,&a,&b,&c,&ctx,&mont);
51 }
52 ms_time_get(end);
53 d=ms_time_diff(start,end)/* *50/33 */;
54 printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n",size,
55 d,num,d/num,(int)((d/num)*mod),md/10.0);
56 num/=8;
57 mod/=8;
58 if (num <= 0) num=1;
59 size*=2;
60 }
61
62 }
diff --git a/src/lib/libcrypto/bn/expspeed.c b/src/lib/libcrypto/bn/expspeed.c
deleted file mode 100644
index 4d5f221f33..0000000000
--- a/src/lib/libcrypto/bn/expspeed.c
+++ /dev/null
@@ -1,353 +0,0 @@
1/* unused */
2
3/* crypto/bn/expspeed.c */
4/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
5 * All rights reserved.
6 *
7 * This package is an SSL implementation written
8 * by Eric Young (eay@cryptsoft.com).
9 * The implementation was written so as to conform with Netscapes SSL.
10 *
11 * This library is free for commercial and non-commercial use as long as
12 * the following conditions are aheared to. The following conditions
13 * apply to all code found in this distribution, be it the RC4, RSA,
14 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
15 * included with this distribution is covered by the same copyright terms
16 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
17 *
18 * Copyright remains Eric Young's, and as such any Copyright notices in
19 * the code are not to be removed.
20 * If this package is used in a product, Eric Young should be given attribution
21 * as the author of the parts of the library used.
22 * This can be in the form of a textual message at program startup or
23 * in documentation (online or textual) provided with the package.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 * 3. All advertising materials mentioning features or use of this software
34 * must display the following acknowledgement:
35 * "This product includes cryptographic software written by
36 * Eric Young (eay@cryptsoft.com)"
37 * The word 'cryptographic' can be left out if the rouines from the library
38 * being used are not cryptographic related :-).
39 * 4. If you include any Windows specific code (or a derivative thereof) from
40 * the apps directory (application code) you must include an acknowledgement:
41 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
42 *
43 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * The licence and distribution terms for any publically available version or
56 * derivative of this code cannot be changed. i.e. this code cannot simply be
57 * copied and put under another distribution licence
58 * [including the GNU Public Licence.]
59 */
60
61/* most of this code has been pilfered from my libdes speed.c program */
62
63#define BASENUM 5000
64#define NUM_START 0
65
66
67/* determine timings for modexp, modmul, modsqr, gcd, Kronecker symbol,
68 * modular inverse, or modular square roots */
69#define TEST_EXP
70#undef TEST_MUL
71#undef TEST_SQR
72#undef TEST_GCD
73#undef TEST_KRON
74#undef TEST_INV
75#undef TEST_SQRT
76#define P_MOD_64 9 /* least significant 6 bits for prime to be used for BN_sqrt timings */
77
78#if defined(TEST_EXP) + defined(TEST_MUL) + defined(TEST_SQR) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) +defined(TEST_SQRT) != 1
79# error "choose one test"
80#endif
81
82#if defined(TEST_INV) || defined(TEST_SQRT)
83# define C_PRIME
84static void genprime_cb(int p, int n, void *arg);
85#endif
86
87
88
89#undef PROG
90#define PROG bnspeed_main
91
92#include <stdio.h>
93#include <stdlib.h>
94#include <signal.h>
95#include <string.h>
96#include <openssl/crypto.h>
97#include <openssl/err.h>
98#include <openssl/rand.h>
99
100#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
101#define TIMES
102#endif
103
104#ifndef _IRIX
105#include <time.h>
106#endif
107#ifdef TIMES
108#include <sys/types.h>
109#include <sys/times.h>
110#endif
111
112/* Depending on the VMS version, the tms structure is perhaps defined.
113 The __TMS macro will show if it was. If it wasn't defined, we should
114 undefine TIMES, since that tells the rest of the program how things
115 should be handled. -- Richard Levitte */
116#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
117#undef TIMES
118#endif
119
120#ifndef TIMES
121#include <sys/timeb.h>
122#endif
123
124#if defined(sun) || defined(__ultrix)
125#define _POSIX_SOURCE
126#include <limits.h>
127#include <sys/param.h>
128#endif
129
130#include <openssl/bn.h>
131#include <openssl/x509.h>
132
133/* The following if from times(3) man page. It may need to be changed */
134#ifndef HZ
135# ifndef CLK_TCK
136# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
137# define HZ 100.0
138# else /* _BSD_CLK_TCK_ */
139# define HZ ((double)_BSD_CLK_TCK_)
140# endif
141# else /* CLK_TCK */
142# define HZ ((double)CLK_TCK)
143# endif
144#endif
145
146#undef BUFSIZE
147#define BUFSIZE ((long)1024*8)
148int run=0;
149
150static double Time_F(int s);
151#define START 0
152#define STOP 1
153
154static double Time_F(int s)
155 {
156 double ret;
157#ifdef TIMES
158 static struct tms tstart,tend;
159
160 if (s == START)
161 {
162 times(&tstart);
163 return(0);
164 }
165 else
166 {
167 times(&tend);
168 ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
169 return((ret < 1e-3)?1e-3:ret);
170 }
171#else /* !times() */
172 static struct timeb tstart,tend;
173 long i;
174
175 if (s == START)
176 {
177 ftime(&tstart);
178 return(0);
179 }
180 else
181 {
182 ftime(&tend);
183 i=(long)tend.millitm-(long)tstart.millitm;
184 ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
185 return((ret < 0.001)?0.001:ret);
186 }
187#endif
188 }
189
190#define NUM_SIZES 7
191#if NUM_START > NUM_SIZES
192# error "NUM_START > NUM_SIZES"
193#endif
194static int sizes[NUM_SIZES]={128,256,512,1024,2048,4096,8192};
195static int mul_c[NUM_SIZES]={8*8*8*8*8*8,8*8*8*8*8,8*8*8*8,8*8*8,8*8,8,1};
196/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
197
198#define RAND_SEED(string) { const char str[] = string; RAND_seed(string, sizeof str); }
199
200void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx);
201
202int main(int argc, char **argv)
203 {
204 BN_CTX *ctx;
205 BIGNUM *a,*b,*c,*r;
206
207#if 1
208 if (!CRYPTO_set_mem_debug_functions(0,0,0,0,0))
209 abort();
210#endif
211
212 ctx=BN_CTX_new();
213 a=BN_new();
214 b=BN_new();
215 c=BN_new();
216 r=BN_new();
217
218 while (!RAND_status())
219 /* not enough bits */
220 RAND_SEED("I demand a manual recount!");
221
222 do_mul_exp(r,a,b,c,ctx);
223 return 0;
224 }
225
226void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
227 {
228 int i,k;
229 double tm;
230 long num;
231
232 num=BASENUM;
233 for (i=NUM_START; i<NUM_SIZES; i++)
234 {
235#ifdef C_PRIME
236# ifdef TEST_SQRT
237 if (!BN_set_word(a, 64)) goto err;
238 if (!BN_set_word(b, P_MOD_64)) goto err;
239# define ADD a
240# define REM b
241# else
242# define ADD NULL
243# define REM NULL
244# endif
245 if (!BN_generate_prime(c,sizes[i],0,ADD,REM,genprime_cb,NULL)) goto err;
246 putc('\n', stderr);
247 fflush(stderr);
248#endif
249
250 for (k=0; k<num; k++)
251 {
252 if (k%50 == 0) /* Average over num/50 different choices of random numbers. */
253 {
254 if (!BN_pseudo_rand(a,sizes[i],1,0)) goto err;
255
256 if (!BN_pseudo_rand(b,sizes[i],1,0)) goto err;
257
258#ifndef C_PRIME
259 if (!BN_pseudo_rand(c,sizes[i],1,1)) goto err;
260#endif
261
262#ifdef TEST_SQRT
263 if (!BN_mod_sqr(a,a,c,ctx)) goto err;
264 if (!BN_mod_sqr(b,b,c,ctx)) goto err;
265#else
266 if (!BN_nnmod(a,a,c,ctx)) goto err;
267 if (!BN_nnmod(b,b,c,ctx)) goto err;
268#endif
269
270 if (k == 0)
271 Time_F(START);
272 }
273
274#if defined(TEST_EXP)
275 if (!BN_mod_exp(r,a,b,c,ctx)) goto err;
276#elif defined(TEST_MUL)
277 {
278 int i = 0;
279 for (i = 0; i < 50; i++)
280 if (!BN_mod_mul(r,a,b,c,ctx)) goto err;
281 }
282#elif defined(TEST_SQR)
283 {
284 int i = 0;
285 for (i = 0; i < 50; i++)
286 {
287 if (!BN_mod_sqr(r,a,c,ctx)) goto err;
288 if (!BN_mod_sqr(r,b,c,ctx)) goto err;
289 }
290 }
291#elif defined(TEST_GCD)
292 if (!BN_gcd(r,a,b,ctx)) goto err;
293 if (!BN_gcd(r,b,c,ctx)) goto err;
294 if (!BN_gcd(r,c,a,ctx)) goto err;
295#elif defined(TEST_KRON)
296 if (-2 == BN_kronecker(a,b,ctx)) goto err;
297 if (-2 == BN_kronecker(b,c,ctx)) goto err;
298 if (-2 == BN_kronecker(c,a,ctx)) goto err;
299#elif defined(TEST_INV)
300 if (!BN_mod_inverse(r,a,c,ctx)) goto err;
301 if (!BN_mod_inverse(r,b,c,ctx)) goto err;
302#else /* TEST_SQRT */
303 if (!BN_mod_sqrt(r,a,c,ctx)) goto err;
304 if (!BN_mod_sqrt(r,b,c,ctx)) goto err;
305#endif
306 }
307 tm=Time_F(STOP);
308 printf(
309#if defined(TEST_EXP)
310 "modexp %4d ^ %4d %% %4d"
311#elif defined(TEST_MUL)
312 "50*modmul %4d %4d %4d"
313#elif defined(TEST_SQR)
314 "100*modsqr %4d %4d %4d"
315#elif defined(TEST_GCD)
316 "3*gcd %4d %4d %4d"
317#elif defined(TEST_KRON)
318 "3*kronecker %4d %4d %4d"
319#elif defined(TEST_INV)
320 "2*inv %4d %4d mod %4d"
321#else /* TEST_SQRT */
322 "2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d"
323#endif
324 " -> %8.6fms %5.1f (%ld)\n",
325#ifdef TEST_SQRT
326 P_MOD_64,
327#endif
328 sizes[i],sizes[i],sizes[i],tm*1000.0/num,tm*mul_c[i]/num, num);
329 num/=7;
330 if (num <= 0) num=1;
331 }
332 return;
333
334 err:
335 ERR_print_errors_fp(stderr);
336 }
337
338
339#ifdef C_PRIME
340static void genprime_cb(int p, int n, void *arg)
341 {
342 char c='*';
343
344 if (p == 0) c='.';
345 if (p == 1) c='+';
346 if (p == 2) c='*';
347 if (p == 3) c='\n';
348 putc(c, stderr);
349 fflush(stderr);
350 (void)n;
351 (void)arg;
352 }
353#endif
diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c
deleted file mode 100644
index 074a8e882a..0000000000
--- a/src/lib/libcrypto/bn/exptest.c
+++ /dev/null
@@ -1,204 +0,0 @@
1/* crypto/bn/exptest.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62
63#include "../e_os.h"
64
65#include <openssl/bio.h>
66#include <openssl/bn.h>
67#include <openssl/rand.h>
68#include <openssl/err.h>
69
70#define NUM_BITS (BN_BITS*2)
71
72static const char rnd_seed[] = "string to make the random number generator think it has entropy";
73
74int main(int argc, char *argv[])
75 {
76 BN_CTX *ctx;
77 BIO *out=NULL;
78 int i,ret;
79 unsigned char c;
80 BIGNUM *r_mont,*r_mont_const,*r_recp,*r_simple,*a,*b,*m;
81
82 RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_rand may fail, and we don't
83 * even check its return value
84 * (which we should) */
85
86 ERR_load_BN_strings();
87
88 ctx=BN_CTX_new();
89 if (ctx == NULL) EXIT(1);
90 r_mont=BN_new();
91 r_mont_const=BN_new();
92 r_recp=BN_new();
93 r_simple=BN_new();
94 a=BN_new();
95 b=BN_new();
96 m=BN_new();
97 if ( (r_mont == NULL) || (r_recp == NULL) ||
98 (a == NULL) || (b == NULL))
99 goto err;
100
101 out=BIO_new(BIO_s_file());
102
103 if (out == NULL) EXIT(1);
104 BIO_set_fp(out,stdout,BIO_NOCLOSE);
105
106 for (i=0; i<200; i++)
107 {
108 RAND_bytes(&c,1);
109 c=(c%BN_BITS)-BN_BITS2;
110 BN_rand(a,NUM_BITS+c,0,0);
111
112 RAND_bytes(&c,1);
113 c=(c%BN_BITS)-BN_BITS2;
114 BN_rand(b,NUM_BITS+c,0,0);
115
116 RAND_bytes(&c,1);
117 c=(c%BN_BITS)-BN_BITS2;
118 BN_rand(m,NUM_BITS+c,0,1);
119
120 BN_mod(a,a,m,ctx);
121 BN_mod(b,b,m,ctx);
122
123 ret=BN_mod_exp_mont(r_mont,a,b,m,ctx,NULL);
124 if (ret <= 0)
125 {
126 printf("BN_mod_exp_mont() problems\n");
127 ERR_print_errors(out);
128 EXIT(1);
129 }
130
131 ret=BN_mod_exp_recp(r_recp,a,b,m,ctx);
132 if (ret <= 0)
133 {
134 printf("BN_mod_exp_recp() problems\n");
135 ERR_print_errors(out);
136 EXIT(1);
137 }
138
139 ret=BN_mod_exp_simple(r_simple,a,b,m,ctx);
140 if (ret <= 0)
141 {
142 printf("BN_mod_exp_simple() problems\n");
143 ERR_print_errors(out);
144 EXIT(1);
145 }
146
147 ret=BN_mod_exp_mont_consttime(r_mont_const,a,b,m,ctx,NULL);
148 if (ret <= 0)
149 {
150 printf("BN_mod_exp_mont_consttime() problems\n");
151 ERR_print_errors(out);
152 EXIT(1);
153 }
154
155 if (BN_cmp(r_simple, r_mont) == 0
156 && BN_cmp(r_simple,r_recp) == 0
157 && BN_cmp(r_simple,r_mont_const) == 0)
158 {
159 printf(".");
160 fflush(stdout);
161 }
162 else
163 {
164 if (BN_cmp(r_simple,r_mont) != 0)
165 printf("\nsimple and mont results differ\n");
166 if (BN_cmp(r_simple,r_mont_const) != 0)
167 printf("\nsimple and mont const time results differ\n");
168 if (BN_cmp(r_simple,r_recp) != 0)
169 printf("\nsimple and recp results differ\n");
170
171 printf("a (%3d) = ",BN_num_bits(a)); BN_print(out,a);
172 printf("\nb (%3d) = ",BN_num_bits(b)); BN_print(out,b);
173 printf("\nm (%3d) = ",BN_num_bits(m)); BN_print(out,m);
174 printf("\nsimple ="); BN_print(out,r_simple);
175 printf("\nrecp ="); BN_print(out,r_recp);
176 printf("\nmont ="); BN_print(out,r_mont);
177 printf("\nmont_ct ="); BN_print(out,r_mont_const);
178 printf("\n");
179 EXIT(1);
180 }
181 }
182 BN_free(r_mont);
183 BN_free(r_mont_const);
184 BN_free(r_recp);
185 BN_free(r_simple);
186 BN_free(a);
187 BN_free(b);
188 BN_free(m);
189 BN_CTX_free(ctx);
190 ERR_remove_thread_state(NULL);
191 CRYPTO_mem_leaks(out);
192 BIO_free(out);
193 printf(" done\n");
194 EXIT(0);
195err:
196 ERR_load_crypto_strings();
197 ERR_print_errors(out);
198#ifdef OPENSSL_SYS_NETWARE
199 printf("ERROR\n");
200#endif
201 EXIT(1);
202 return(1);
203 }
204
diff --git a/src/lib/libcrypto/bn/todo b/src/lib/libcrypto/bn/todo
deleted file mode 100644
index e47e381aea..0000000000
--- a/src/lib/libcrypto/bn/todo
+++ /dev/null
@@ -1,3 +0,0 @@
1Cache RECP_CTX values
2make the result argument independant of the inputs.
3split up the _exp_ functions
diff --git a/src/lib/libcrypto/bn/vms-helper.c b/src/lib/libcrypto/bn/vms-helper.c
deleted file mode 100644
index 4b63149bf3..0000000000
--- a/src/lib/libcrypto/bn/vms-helper.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/* vms-helper.c */
2/* ====================================================================
3 * Copyright (c) 1999 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include <stdio.h>
57#include "cryptlib.h"
58#include "bn_lcl.h"
59
60bn_div_words_abort(int i)
61{
62#ifdef BN_DEBUG
63#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16)
64 fprintf(stderr,"Division would overflow (%d)\n",i);
65#endif
66 abort();
67#endif
68}