summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/Makefile.ssl276
-rw-r--r--src/lib/libcrypto/bn/asm/README4
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl213
-rw-r--r--src/lib/libcrypto/bn/asm/ia64.S1498
-rw-r--r--src/lib/libcrypto/bn/asm/vms.mar697
-rw-r--r--src/lib/libcrypto/bn/bn.h177
-rw-r--r--src/lib/libcrypto/bn/bn_add.c12
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c22
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c13
-rw-r--r--src/lib/libcrypto/bn/bn_div.c66
-rw-r--r--src/lib/libcrypto/bn/bn_err.c11
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c149
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c27
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c338
-rw-r--r--src/lib/libcrypto/bn/bn_kron.c182
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h39
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c336
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c296
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c63
-rw-r--r--src/lib/libcrypto/bn/bn_mpi.c2
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c503
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c29
-rw-r--r--src/lib/libcrypto/bn/bn_print.c6
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c94
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c48
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c8
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c18
-rw-r--r--src/lib/libcrypto/bn/bn_sqrt.c387
-rw-r--r--src/lib/libcrypto/bn/bnspeed.c4
-rw-r--r--src/lib/libcrypto/bn/bntest.c261
-rw-r--r--src/lib/libcrypto/bn/expspeed.c176
-rw-r--r--src/lib/libcrypto/bn/exptest.c2
-rw-r--r--src/lib/libcrypto/bn/vms-helper.c2
33 files changed, 4739 insertions, 1220 deletions
diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl
index 526d7adb5c..eb6f0eeebd 100644
--- a/src/lib/libcrypto/bn/Makefile.ssl
+++ b/src/lib/libcrypto/bn/Makefile.ssl
@@ -6,13 +6,14 @@ DIR= bn
6TOP= ../.. 6TOP= ../..
7CC= cc 7CC= cc
8CPP= $(CC) -E 8CPP= $(CC) -E
9INCLUDES= -I.. -I../../include 9INCLUDES= -I.. -I$(TOP) -I../../include
10CFLAG=-g 10CFLAG=-g
11INSTALL_PREFIX= 11INSTALL_PREFIX=
12OPENSSLDIR= /usr/local/ssl 12OPENSSLDIR= /usr/local/ssl
13INSTALLTOP=/usr/local/ssl 13INSTALLTOP=/usr/local/ssl
14MAKE= make -f Makefile.ssl 14MAKE= make -f Makefile.ssl
15MAKEDEPEND= $(TOP)/util/domd $(TOP) 15MAKEDEPPROG= makedepend
16MAKEDEPEND= $(TOP)/util/domd $(TOP) -MD $(MAKEDEPPROG)
16MAKEFILE= Makefile.ssl 17MAKEFILE= Makefile.ssl
17AR= ar r 18AR= ar r
18 19
@@ -35,15 +36,15 @@ TEST=bntest.c exptest.c
35APPS= 36APPS=
36 37
37LIB=$(TOP)/libcrypto.a 38LIB=$(TOP)/libcrypto.a
38LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \ 39LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
39 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ 40 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
40 bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c bn_recp.c bn_mont.c \ 41 bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
41 bn_mpi.c bn_exp2.c 42 bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c
42 43
43LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o \ 44LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
44 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ 45 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
45 bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) bn_recp.o bn_mont.o \ 46 bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
46 bn_mpi.o bn_exp2.o 47 bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o
47 48
48SRC= $(LIBSRC) 49SRC= $(LIBSRC)
49 50
@@ -68,8 +69,7 @@ bnbug: bnbug.c ../../libcrypto.a top
68 69
69lib: $(LIBOBJ) 70lib: $(LIBOBJ)
70 $(AR) $(LIB) $(LIBOBJ) 71 $(AR) $(LIB) $(LIBOBJ)
71 @echo You may get an error following this line. Please ignore. 72 $(RANLIB) $(LIB) || echo Never mind.
72 - $(RANLIB) $(LIB)
73 @touch lib 73 @touch lib
74 74
75# elf 75# elf
@@ -124,6 +124,18 @@ asm/sparcv8plus-gcc27.o: asm/sparcv8plus.S
124 $(CC) $(ASFLAGS) -E asm/sparcv8plus.S | \ 124 $(CC) $(ASFLAGS) -E asm/sparcv8plus.S | \
125 /usr/ccs/bin/as -xarch=v8plus - -o asm/sparcv8plus-gcc27.o 125 /usr/ccs/bin/as -xarch=v8plus - -o asm/sparcv8plus-gcc27.o
126 126
127
128asm/ia64.o: asm/ia64.S
129
130# Some compiler drivers (most notably HP-UX and Intel C++) don't
131# understand .S extension:-( I wish I could pipe output from cc -E,
132# but it's too compiler driver/ABI dependent to cover with a single
133# rule... <appro@fy.chalmers.se>
134asm/ia64-cpp.o: asm/ia64.S
135 $(CC) $(ASFLAGS) -E asm/ia64.S > /tmp/ia64.$$$$.s && \
136 $(CC) $(ASFLAGS) -c -o asm/ia64-cpp.o /tmp/ia64.$$$$.s; \
137 rm -f /tmp/ia64.$$$$.s
138
127files: 139files:
128 $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO 140 $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
129 141
@@ -168,146 +180,160 @@ clean:
168 180
169# DO NOT DELETE THIS LINE -- make depend depends on it. 181# DO NOT DELETE THIS LINE -- make depend depends on it.
170 182
171bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 183bn_add.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
172bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 184bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
173bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 185bn_add.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
174bn_add.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 186bn_add.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
175bn_add.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 187bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
176bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 188bn_add.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
177bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 189bn_add.o: ../cryptlib.h bn_add.c bn_lcl.h
178bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 190bn_asm.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
179bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 191bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
180bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 192bn_asm.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
181bn_asm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 193bn_asm.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
182bn_asm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 194bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
183bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 195bn_asm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
184bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 196bn_asm.o: ../cryptlib.h bn_asm.c bn_lcl.h
185bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 197bn_blind.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
186bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 198bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
187bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 199bn_blind.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
188bn_blind.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 200bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
189bn_blind.o: ../../include/openssl/opensslconf.h
190bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 201bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
191bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 202bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
192bn_blind.o: ../cryptlib.h bn_lcl.h 203bn_blind.o: ../cryptlib.h bn_blind.c bn_lcl.h
193bn_ctx.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 204bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
194bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 205bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
195bn_ctx.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 206bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
196bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 207bn_ctx.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
197bn_ctx.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 208bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
198bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 209bn_ctx.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
199bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h 210bn_ctx.o: ../cryptlib.h bn_ctx.c bn_lcl.h
200bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 211bn_div.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
201bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 212bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
202bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 213bn_div.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
203bn_div.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 214bn_div.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
204bn_div.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 215bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
205bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 216bn_div.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
206bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 217bn_div.o: ../cryptlib.h bn_div.c bn_lcl.h
207bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 218bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
208bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/err.h 219bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
209bn_err.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h 220bn_err.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
210bn_err.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 221bn_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
211bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 222bn_err.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
212bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 223bn_err.o: ../../include/openssl/symhacks.h bn_err.c
224bn_exp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
213bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 225bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
214bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 226bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
215bn_exp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 227bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
216bn_exp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 228bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
217bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 229bn_exp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
218bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 230bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h
219bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 231bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
220bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 232bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
221bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 233bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
222bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 234bn_exp2.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
223bn_exp2.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 235bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
224bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 236bn_exp2.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
225bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 237bn_exp2.o: ../cryptlib.h bn_exp2.c bn_lcl.h
226bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 238bn_gcd.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
227bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 239bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
228bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 240bn_gcd.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
229bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 241bn_gcd.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
230bn_gcd.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 242bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
231bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 243bn_gcd.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
232bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 244bn_gcd.o: ../cryptlib.h bn_gcd.c bn_lcl.h
233bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 245bn_kron.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h
246bn_kron.o: ../../include/openssl/opensslconf.h bn_kron.c bn_lcl.h
247bn_lib.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
234bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 248bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
235bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 249bn_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
236bn_lib.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 250bn_lib.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
237bn_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 251bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
238bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 252bn_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
239bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 253bn_lib.o: ../cryptlib.h bn_lcl.h bn_lib.c
240bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 254bn_mod.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
255bn_mod.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
256bn_mod.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
257bn_mod.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
258bn_mod.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
259bn_mod.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
260bn_mod.o: ../cryptlib.h bn_lcl.h bn_mod.c
261bn_mont.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
241bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 262bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
242bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 263bn_mont.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
243bn_mont.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 264bn_mont.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
244bn_mont.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 265bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
245bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 266bn_mont.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
246bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 267bn_mont.o: ../cryptlib.h bn_lcl.h bn_mont.c
247bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 268bn_mpi.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
248bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 269bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
249bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 270bn_mpi.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
250bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 271bn_mpi.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
251bn_mpi.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 272bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
252bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 273bn_mpi.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
253bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 274bn_mpi.o: ../cryptlib.h bn_lcl.h bn_mpi.c
254bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 275bn_mul.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
255bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 276bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
256bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 277bn_mul.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
257bn_mul.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 278bn_mul.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
258bn_mul.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 279bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
259bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 280bn_mul.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
260bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 281bn_mul.o: ../cryptlib.h bn_lcl.h bn_mul.c
261bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 282bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
262bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 283bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
263bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 284bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
264bn_prime.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 285bn_prime.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
265bn_prime.o: ../../include/openssl/opensslconf.h 286bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
266bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h 287bn_prime.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
267bn_prime.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 288bn_prime.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
268bn_prime.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_prime.h 289bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.c bn_prime.h
269bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 290bn_print.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
270bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 291bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
271bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 292bn_print.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
272bn_print.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 293bn_print.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
273bn_print.o: ../../include/openssl/opensslconf.h
274bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 294bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
275bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 295bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
276bn_print.o: ../cryptlib.h bn_lcl.h 296bn_print.o: ../cryptlib.h bn_lcl.h bn_print.c
277bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 297bn_rand.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
278bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 298bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
279bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 299bn_rand.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
280bn_rand.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 300bn_rand.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
281bn_rand.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 301bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
282bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h 302bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
283bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 303bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
284bn_rand.o: ../cryptlib.h bn_lcl.h 304bn_rand.o: ../cryptlib.h bn_lcl.h bn_rand.c
285bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 305bn_recp.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
286bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 306bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
287bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 307bn_recp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
288bn_recp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 308bn_recp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
289bn_recp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 309bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
290bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 310bn_recp.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
291bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 311bn_recp.o: ../cryptlib.h bn_lcl.h bn_recp.c
292bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 312bn_shift.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
293bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 313bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
294bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 314bn_shift.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
295bn_shift.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 315bn_shift.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
296bn_shift.o: ../../include/openssl/opensslconf.h
297bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 316bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
298bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h 317bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
299bn_shift.o: ../cryptlib.h bn_lcl.h 318bn_shift.o: ../cryptlib.h bn_lcl.h bn_shift.c
300bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 319bn_sqr.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
301bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 320bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
302bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 321bn_sqr.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
303bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 322bn_sqr.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
304bn_sqr.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 323bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
305bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 324bn_sqr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
306bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 325bn_sqr.o: ../cryptlib.h bn_lcl.h bn_sqr.c
307bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 326bn_sqrt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
327bn_sqrt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
328bn_sqrt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
329bn_sqrt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
330bn_sqrt.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
331bn_sqrt.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
332bn_sqrt.o: ../cryptlib.h bn_lcl.h bn_sqrt.c
333bn_word.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
308bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 334bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
309bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 335bn_word.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
310bn_word.o: ../../include/openssl/err.h ../../include/openssl/lhash.h 336bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
311bn_word.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h 337bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
312bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 338bn_word.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
313bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h 339bn_word.o: ../cryptlib.h bn_lcl.h bn_word.c
diff --git a/src/lib/libcrypto/bn/asm/README b/src/lib/libcrypto/bn/asm/README
index a0fe58a677..b0f3a68a06 100644
--- a/src/lib/libcrypto/bn/asm/README
+++ b/src/lib/libcrypto/bn/asm/README
@@ -1,3 +1,5 @@
1<OBSOLETE>
2
1All assember in this directory are just version of the file 3All assember in this directory are just version of the file
2crypto/bn/bn_asm.c. 4crypto/bn/bn_asm.c.
3 5
@@ -21,3 +23,5 @@ pa-risc.s is the origional one which works fine and generated using gcc :-)
21 23
22pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations 24pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations
23by Chris Ruemmler from HP (with some help from the HP C compiler). 25by Chris Ruemmler from HP (with some help from the HP C compiler).
26
27</OBSOLETE>
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 5191bed273..33f6125920 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -11,6 +11,7 @@ require "x86asm.pl";
11&bn_div_words("bn_div_words"); 11&bn_div_words("bn_div_words");
12&bn_add_words("bn_add_words"); 12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words"); 13&bn_sub_words("bn_sub_words");
14&bn_sub_part_words("bn_sub_part_words");
14 15
15&asm_finish(); 16&asm_finish();
16 17
@@ -300,7 +301,7 @@ sub bn_add_words
300 &add($tmp1,$tmp2); 301 &add($tmp1,$tmp2);
301 &adc($c,0); 302 &adc($c,0);
302 &dec($num) if ($i != 6); 303 &dec($num) if ($i != 6);
303 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a 304 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
304 &jz(&label("aw_end")) if ($i != 6); 305 &jz(&label("aw_end")) if ($i != 6);
305 } 306 }
306 &set_label("aw_end",0); 307 &set_label("aw_end",0);
@@ -372,7 +373,7 @@ sub bn_sub_words
372 &sub($tmp1,$tmp2); 373 &sub($tmp1,$tmp2);
373 &adc($c,0); 374 &adc($c,0);
374 &dec($num) if ($i != 6); 375 &dec($num) if ($i != 6);
375 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a 376 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
376 &jz(&label("aw_end")) if ($i != 6); 377 &jz(&label("aw_end")) if ($i != 6);
377 } 378 }
378 &set_label("aw_end",0); 379 &set_label("aw_end",0);
@@ -382,3 +383,211 @@ sub bn_sub_words
382 &function_end($name); 383 &function_end($name);
383 } 384 }
384 385
386sub bn_sub_part_words
387 {
388 local($name)=@_;
389
390 &function_begin($name,"");
391
392 &comment("");
393 $a="esi";
394 $b="edi";
395 $c="eax";
396 $r="ebx";
397 $tmp1="ecx";
398 $tmp2="edx";
399 $num="ebp";
400
401 &mov($r,&wparam(0)); # get r
402 &mov($a,&wparam(1)); # get a
403 &mov($b,&wparam(2)); # get b
404 &mov($num,&wparam(3)); # get num
405 &xor($c,$c); # clear carry
406 &and($num,0xfffffff8); # num / 8
407
408 &jz(&label("aw_finish"));
409
410 &set_label("aw_loop",0);
411 for ($i=0; $i<8; $i++)
412 {
413 &comment("Round $i");
414
415 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
416 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
417 &sub($tmp1,$c);
418 &mov($c,0);
419 &adc($c,$c);
420 &sub($tmp1,$tmp2);
421 &adc($c,0);
422 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
423 }
424
425 &comment("");
426 &add($a,32);
427 &add($b,32);
428 &add($r,32);
429 &sub($num,8);
430 &jnz(&label("aw_loop"));
431
432 &set_label("aw_finish",0);
433 &mov($num,&wparam(3)); # get num
434 &and($num,7);
435 &jz(&label("aw_end"));
436
437 for ($i=0; $i<7; $i++)
438 {
439 &comment("Tail Round $i");
440 &mov($tmp1,&DWP(0,$a,"",0)); # *a
441 &mov($tmp2,&DWP(0,$b,"",0));# *b
442 &sub($tmp1,$c);
443 &mov($c,0);
444 &adc($c,$c);
445 &sub($tmp1,$tmp2);
446 &adc($c,0);
447 &mov(&DWP(0,$r,"",0),$tmp1); # *r
448 &add($a, 4);
449 &add($b, 4);
450 &add($r, 4);
451 &dec($num) if ($i != 6);
452 &jz(&label("aw_end")) if ($i != 6);
453 }
454 &set_label("aw_end",0);
455
456 &cmp(&wparam(4),0);
457 &je(&label("pw_end"));
458
459 &mov($num,&wparam(4)); # get dl
460 &cmp($num,0);
461 &je(&label("pw_end"));
462 &jge(&label("pw_pos"));
463
464 &comment("pw_neg");
465 &mov($tmp2,0);
466 &sub($tmp2,$num);
467 &mov($num,$tmp2);
468 &and($num,0xfffffff8); # num / 8
469 &jz(&label("pw_neg_finish"));
470
471 &set_label("pw_neg_loop",0);
472 for ($i=0; $i<8; $i++)
473 {
474 &comment("dl<0 Round $i");
475
476 &mov($tmp1,0);
477 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
478 &sub($tmp1,$c);
479 &mov($c,0);
480 &adc($c,$c);
481 &sub($tmp1,$tmp2);
482 &adc($c,0);
483 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
484 }
485
486 &comment("");
487 &add($b,32);
488 &add($r,32);
489 &sub($num,8);
490 &jnz(&label("pw_neg_loop"));
491
492 &set_label("pw_neg_finish",0);
493 &mov($tmp2,&wparam(4)); # get dl
494 &mov($num,0);
495 &sub($num,$tmp2);
496 &and($num,7);
497 &jz(&label("pw_end"));
498
499 for ($i=0; $i<7; $i++)
500 {
501 &comment("dl<0 Tail Round $i");
502 &mov($tmp1,0);
503 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504 &sub($tmp1,$c);
505 &mov($c,0);
506 &adc($c,$c);
507 &sub($tmp1,$tmp2);
508 &adc($c,0);
509 &dec($num) if ($i != 6);
510 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
511 &jz(&label("pw_end")) if ($i != 6);
512 }
513
514 &jmp(&label("pw_end"));
515
516 &set_label("pw_pos",0);
517
518 &and($num,0xfffffff8); # num / 8
519 &jz(&label("pw_pos_finish"));
520
521 &set_label("pw_pos_loop",0);
522
523 for ($i=0; $i<8; $i++)
524 {
525 &comment("dl>0 Round $i");
526
527 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
528 &sub($tmp1,$c);
529 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
530 &jnc(&label("pw_nc".$i));
531 }
532
533 &comment("");
534 &add($a,32);
535 &add($r,32);
536 &sub($num,8);
537 &jnz(&label("pw_pos_loop"));
538
539 &set_label("pw_pos_finish",0);
540 &mov($num,&wparam(4)); # get dl
541 &and($num,7);
542 &jz(&label("pw_end"));
543
544 for ($i=0; $i<7; $i++)
545 {
546 &comment("dl>0 Tail Round $i");
547 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
548 &sub($tmp1,$c);
549 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
550 &jnc(&label("pw_tail_nc".$i));
551 &dec($num) if ($i != 6);
552 &jz(&label("pw_end")) if ($i != 6);
553 }
554 &mov($c,1);
555 &jmp(&label("pw_end"));
556
557 &set_label("pw_nc_loop",0);
558 for ($i=0; $i<8; $i++)
559 {
560 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
561 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
562 &set_label("pw_nc".$i,0);
563 }
564
565 &comment("");
566 &add($a,32);
567 &add($r,32);
568 &sub($num,8);
569 &jnz(&label("pw_nc_loop"));
570
571 &mov($num,&wparam(4)); # get dl
572 &and($num,7);
573 &jz(&label("pw_nc_end"));
574
575 for ($i=0; $i<7; $i++)
576 {
577 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
578 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
579 &set_label("pw_tail_nc".$i,0);
580 &dec($num) if ($i != 6);
581 &jz(&label("pw_nc_end")) if ($i != 6);
582 }
583
584 &set_label("pw_nc_end",0);
585 &mov($c,0);
586
587 &set_label("pw_end",0);
588
589# &mov("eax",$c); # $c is "eax"
590
591 &function_end($name);
592 }
593
diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S
new file mode 100644
index 0000000000..ae56066310
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/ia64.S
@@ -0,0 +1,1498 @@
1.explicit
2.text
3.ident "ia64.S, Version 1.1"
4.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6//
7// ====================================================================
8// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9// project.
10//
11// Rights for redistribution and usage in source and binary forms are
12// granted according to the OpenSSL license. Warranty of any kind is
13// disclaimed.
14// ====================================================================
15//
16
17// Q. How much faster does it get?
18// A. Here is the output from 'openssl speed rsa dsa' for vanilla
19// 0.9.6a compiled with gcc version 2.96 20000731 (Red Hat
20// Linux 7.1 2.96-81):
21//
22// sign verify sign/s verify/s
23// rsa 512 bits 0.0036s 0.0003s 275.3 2999.2
24// rsa 1024 bits 0.0203s 0.0011s 49.3 894.1
25// rsa 2048 bits 0.1331s 0.0040s 7.5 250.9
26// rsa 4096 bits 0.9270s 0.0147s 1.1 68.1
27// sign verify sign/s verify/s
28// dsa 512 bits 0.0035s 0.0043s 288.3 234.8
29// dsa 1024 bits 0.0111s 0.0135s 90.0 74.2
30//
31// And here is similar output but for this assembler
32// implementation:-)
33//
34// sign verify sign/s verify/s
35// rsa 512 bits 0.0021s 0.0001s 549.4 9638.5
36// rsa 1024 bits 0.0055s 0.0002s 183.8 4481.1
37// rsa 2048 bits 0.0244s 0.0006s 41.4 1726.3
38// rsa 4096 bits 0.1295s 0.0018s 7.7 561.5
39// sign verify sign/s verify/s
40// dsa 512 bits 0.0012s 0.0013s 891.9 756.6
41// dsa 1024 bits 0.0023s 0.0028s 440.4 376.2
42//
43// Yes, you may argue that it's not fair comparison as it's
44// possible to craft the C implementation with BN_UMULT_HIGH
45// inline assembler macro. But of course! Here is the output
46// with the macro:
47//
48// sign verify sign/s verify/s
49// rsa 512 bits 0.0020s 0.0002s 495.0 6561.0
50// rsa 1024 bits 0.0086s 0.0004s 116.2 2235.7
51// rsa 2048 bits 0.0519s 0.0015s 19.3 667.3
52// rsa 4096 bits 0.3464s 0.0053s 2.9 187.7
53// sign verify sign/s verify/s
54// dsa 512 bits 0.0016s 0.0020s 613.1 510.5
55// dsa 1024 bits 0.0045s 0.0054s 221.0 183.9
56//
57// My code is still way faster, huh:-) And I believe that even
58// higher performance can be achieved. Note that as keys get
59// longer, performance gain is larger. Why? According to the
60// profiler there is another player in the field, namely
61// BN_from_montgomery consuming larger and larger portion of CPU
62// time as keysize decreases. I therefore consider putting effort
63// to assembler implementation of the following routine:
64//
65// void bn_mul_add_mont (BN_ULONG *rp,BN_ULONG *np,int nl,BN_ULONG n0)
66// {
67// int i,j;
68// BN_ULONG v;
69//
70// for (i=0; i<nl; i++)
71// {
72// v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
73// nrp++;
74// rp++;
75// if (((nrp[-1]+=v)&BN_MASK2) < v)
76// for (j=0; ((++nrp[j])&BN_MASK2) == 0; j++) ;
77// }
78// }
79//
80// It might as well be beneficial to implement even combaX
81// variants, as it appears as it can literally unleash the
82// performance (see comment section to bn_mul_comba8 below).
83//
84// And finally for your reference the output for 0.9.6a compiled
85// with SGIcc version 0.01.0-12 (keep in mind that for the moment
86// of this writing it's not possible to convince SGIcc to use
87// BN_UMULT_HIGH inline assembler macro, yet the code is fast,
88// i.e. for a compiler generated one:-):
89//
90// sign verify sign/s verify/s
91// rsa 512 bits 0.0022s 0.0002s 452.7 5894.3
92// rsa 1024 bits 0.0097s 0.0005s 102.7 2002.9
93// rsa 2048 bits 0.0578s 0.0017s 17.3 600.2
94// rsa 4096 bits 0.3838s 0.0061s 2.6 164.5
95// sign verify sign/s verify/s
96// dsa 512 bits 0.0018s 0.0022s 547.3 459.6
97// dsa 1024 bits 0.0051s 0.0062s 196.6 161.3
98//
99// Oh! Benchmarks were performed on 733MHz Lion-class Itanium
100// system running Redhat Linux 7.1 (very special thanks to Ray
101// McCaffity of Williams Communications for providing an account).
102//
103// Q. What's the heck with 'rum 1<<5' at the end of every function?
104// A. Well, by clearing the "upper FP registers written" bit of the
105// User Mask I want to excuse the kernel from preserving upper
106// (f32-f128) FP register bank over process context switch, thus
107// minimizing bus bandwidth consumption during the switch (i.e.
108// after PKI opration completes and the program is off doing
109// something else like bulk symmetric encryption). Having said
110// this, I also want to point out that it might be good idea
111// to compile the whole toolkit (as well as majority of the
112// programs for that matter) with -mfixed-range=f32-f127 command
113// line option. No, it doesn't prevent the compiler from writing
114// to upper bank, but at least discourages to do so. If you don't
115// like the idea you have the option to compile the module with
116// -Drum=nop.m in command line.
117//
118
119#if 1
120//
121// bn_[add|sub]_words routines.
122//
123// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
124// data reside in L1 cache, i.e. 2 ticks away). It's possible to
125// compress the epilogue and get down to 2*n+6, but at the cost of
126// scalability (the neat feature of this implementation is that it
127// shall automagically spin in n+5 on "wider" IA-64 implementations:-)
128// I consider that the epilogue is short enough as it is to trade tiny
129// performance loss on Itanium for scalability.
130//
131// BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
132//
133.global bn_add_words#
134.proc bn_add_words#
135.align 64
136.skip 32 // makes the loop body aligned at 64-byte boundary
137bn_add_words:
138 .prologue
139 .fframe 0
140 .save ar.pfs,r2
141{ .mii; alloc r2=ar.pfs,4,12,0,16
142 cmp4.le p6,p0=r35,r0 };;
143{ .mfb; mov r8=r0 // return value
144(p6) br.ret.spnt.many b0 };;
145
146 .save ar.lc,r3
147{ .mib; sub r10=r35,r0,1
148 mov r3=ar.lc
149 brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16
150 }
151 .body
152{ .mib; mov r14=r32 // rp
153 mov r9=pr };;
154{ .mii; mov r15=r33 // ap
155 mov ar.lc=r10
156 mov ar.ec=6 }
157{ .mib; mov r16=r34 // bp
158 mov pr.rot=1<<16 };;
159
160.L_bn_add_words_ctop:
161{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
162 (p18) add r39=r37,r34
163 (p19) cmp.ltu.unc p56,p0=r40,r38 }
164{ .mfb; (p0) nop.m 0x0
165 (p0) nop.f 0x0
166 (p0) nop.b 0x0 }
167{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
168 (p58) cmp.eq.or p57,p0=-1,r41 // (p20)
169 (p58) add r41=1,r41 } // (p20)
170{ .mfb; (p21) st8 [r14]=r42,8 // *(rp++)=r
171 (p0) nop.f 0x0
172 br.ctop.sptk .L_bn_add_words_ctop };;
173.L_bn_add_words_cend:
174
175{ .mii;
176(p59) add r8=1,r8 // return value
177 mov pr=r9,-1
178 mov ar.lc=r3 }
179{ .mbb; nop.b 0x0
180 br.ret.sptk.many b0 };;
181.endp bn_add_words#
182
183//
184// BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
185//
186.global bn_sub_words#
187.proc bn_sub_words#
188.align 64
189.skip 32 // makes the loop body aligned at 64-byte boundary
190bn_sub_words:
191 .prologue
192 .fframe 0
193 .save ar.pfs,r2
194{ .mii; alloc r2=ar.pfs,4,12,0,16
195 cmp4.le p6,p0=r35,r0 };;
196{ .mfb; mov r8=r0 // return value
197(p6) br.ret.spnt.many b0 };;
198
199 .save ar.lc,r3
200{ .mib; sub r10=r35,r0,1
201 mov r3=ar.lc
202 brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16
203 }
204 .body
205{ .mib; mov r14=r32 // rp
206 mov r9=pr };;
207{ .mii; mov r15=r33 // ap
208 mov ar.lc=r10
209 mov ar.ec=6 }
210{ .mib; mov r16=r34 // bp
211 mov pr.rot=1<<16 };;
212
213.L_bn_sub_words_ctop:
214{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
215 (p18) sub r39=r37,r34
216 (p19) cmp.gtu.unc p56,p0=r40,r38 }
217{ .mfb; (p0) nop.m 0x0
218 (p0) nop.f 0x0
219 (p0) nop.b 0x0 }
220{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
221 (p58) cmp.eq.or p57,p0=0,r41 // (p20)
222 (p58) add r41=-1,r41 } // (p20)
223{ .mbb; (p21) st8 [r14]=r42,8 // *(rp++)=r
224 (p0) nop.b 0x0
225 br.ctop.sptk .L_bn_sub_words_ctop };;
226.L_bn_sub_words_cend:
227
228{ .mii;
229(p59) add r8=1,r8 // return value
230 mov pr=r9,-1
231 mov ar.lc=r3 }
232{ .mbb; nop.b 0x0
233 br.ret.sptk.many b0 };;
234.endp bn_sub_words#
235#endif
236
237#if 0
238#define XMA_TEMPTATION
239#endif
240
241#if 1
242//
243// BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
244//
245.global bn_mul_words#
246.proc bn_mul_words#
247.align 64
248.skip 32 // makes the loop body aligned at 64-byte boundary
249bn_mul_words:
250 .prologue
251 .fframe 0
252 .save ar.pfs,r2
253#ifdef XMA_TEMPTATION
254{ .mfi; alloc r2=ar.pfs,4,0,0,0 };;
255#else
256{ .mfi; alloc r2=ar.pfs,4,4,0,8 };;
257#endif
258{ .mib; mov r8=r0 // return value
259 cmp4.le p6,p0=r34,r0
260(p6) br.ret.spnt.many b0 };;
261
262 .save ar.lc,r3
263{ .mii; sub r10=r34,r0,1
264 mov r3=ar.lc
265 mov r9=pr };;
266
267 .body
268{ .mib; setf.sig f8=r35 // w
269 mov pr.rot=0x400001<<16
270 // ------^----- serves as (p48) at first (p26)
271 brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16
272 }
273
274#ifndef XMA_TEMPTATION
275
276{ .mii; mov r14=r32 // rp
277 mov r15=r33 // ap
278 mov ar.lc=r10 }
279{ .mii; mov r39=0 // serves as r33 at first (p26)
280 mov ar.ec=12 };;
281
282// This loop spins in 2*(n+11) ticks. It's scheduled for data in L2
283// cache (i.e. 9 ticks away) as floating point load/store instructions
284// bypass L1 cache and L2 latency is actually best-case scenario for
285// ldf8. The loop is not scalable and shall run in 2*(n+11) even on
286// "wider" IA-64 implementations. It's a trade-off here. n+22 loop
287// would give us ~5% in *overall* performance improvement on "wider"
288// IA-64, but would hurt Itanium for about same because of longer
289// epilogue. As it's a matter of few percents in either case I've
290// chosen to trade the scalability for development time (you can see
291// this very instruction sequence in bn_mul_add_words loop which in
292// turn is scalable).
293.L_bn_mul_words_ctop:
294{ .mfi; (p25) getf.sig r36=f49 // low
295 (p21) xmpy.lu f45=f37,f8
296 (p27) cmp.ltu p52,p48=r39,r38 }
297{ .mfi; (p16) ldf8 f32=[r15],8
298 (p21) xmpy.hu f38=f37,f8
299 (p0) nop.i 0x0 };;
300{ .mii; (p26) getf.sig r32=f43 // high
301 .pred.rel "mutex",p48,p52
302 (p48) add r38=r37,r33 // (p26)
303 (p52) add r38=r37,r33,1 } // (p26)
304{ .mfb; (p27) st8 [r14]=r39,8
305 (p0) nop.f 0x0
306 br.ctop.sptk .L_bn_mul_words_ctop };;
307.L_bn_mul_words_cend:
308
309{ .mii; nop.m 0x0
310.pred.rel "mutex",p49,p53
311(p49) add r8=r34,r0
312(p53) add r8=r34,r0,1 }
313{ .mfb; nop.m 0x0
314 nop.f 0x0
315 nop.b 0x0 }
316
317#else // XMA_TEMPTATION
318
319 setf.sig f37=r0 // serves as carry at (p18) tick
320 mov ar.lc=r10
321 mov ar.ec=5;;
322
323// Most of you examining this code very likely wonder why in the name
324// of Intel the following loop is commented out? Indeed, it looks so
325// neat that you find it hard to believe that it's something wrong
326// with it, right? The catch is that every iteration depends on the
327// result from previous one and the latter isn't available instantly.
328// The loop therefore spins at the latency of xma minus 1, or in other
329// words at 6*(n+4) ticks:-( Compare to the "production" loop above
330// that runs in 2*(n+11) where the low latency problem is worked around
331// by moving the dependency to one-tick latent interger ALU. Note that
332// "distance" between ldf8 and xma is not latency of ldf8, but the
333// *difference* between xma and ldf8 latencies.
334.L_bn_mul_words_ctop:
335{ .mfi; (p16) ldf8 f32=[r33],8
336 (p18) xma.hu f38=f34,f8,f39 }
337{ .mfb; (p20) stf8 [r32]=f37,8
338 (p18) xma.lu f35=f34,f8,f39
339 br.ctop.sptk .L_bn_mul_words_ctop };;
340.L_bn_mul_words_cend:
341
342 getf.sig r8=f41 // the return value
343
344#endif // XMA_TEMPTATION
345
346{ .mii; nop.m 0x0
347 mov pr=r9,-1
348 mov ar.lc=r3 }
349{ .mfb; rum 1<<5 // clear um.mfh
350 nop.f 0x0
351 br.ret.sptk.many b0 };;
352.endp bn_mul_words#
353#endif
354
355#if 1
356//
357// BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
358//
359.global bn_mul_add_words#
360.proc bn_mul_add_words#
361.align 64
362//.skip 0 // makes the loop split at 64-byte boundary
363bn_mul_add_words:
364 .prologue
365 .fframe 0
366 .save ar.pfs,r2
367{ .mii; alloc r2=ar.pfs,4,12,0,16
368 cmp4.le p6,p0=r34,r0 };;
369{ .mfb; mov r8=r0 // return value
370(p6) br.ret.spnt.many b0 };;
371
372 .save ar.lc,r3
373{ .mii; sub r10=r34,r0,1
374 mov r3=ar.lc
375 mov r9=pr };;
376
377 .body
378{ .mib; setf.sig f8=r35 // w
379 mov pr.rot=0x400001<<16
380 // ------^----- serves as (p48) at first (p26)
381 brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16
382 }
383{ .mii; mov r14=r32 // rp
384 mov r15=r33 // ap
385 mov ar.lc=r10 }
386{ .mii; mov r39=0 // serves as r33 at first (p26)
387 mov r18=r32 // rp copy
388 mov ar.ec=14 };;
389
390// This loop spins in 3*(n+13) ticks on Itanium and should spin in
391// 2*(n+13) on "wider" IA-64 implementations (to be verified with new
392// µ-architecture manuals as they become available). As usual it's
393// possible to compress the epilogue, down to 10 in this case, at the
394// cost of scalability. Compressed (and therefore non-scalable) loop
395// running at 3*(n+10) would buy you ~10% on Itanium but take ~35%
396// from "wider" IA-64 so let it be scalable! Special attention was
397// paid for having the loop body split at 64-byte boundary. ld8 is
398// scheduled for L1 cache as the data is more than likely there.
399// Indeed, bn_mul_words has put it there a moment ago:-)
400.L_bn_mul_add_words_ctop:
401{ .mfi; (p25) getf.sig r36=f49 // low
402 (p21) xmpy.lu f45=f37,f8
403 (p27) cmp.ltu p52,p48=r39,r38 }
404{ .mfi; (p16) ldf8 f32=[r15],8
405 (p21) xmpy.hu f38=f37,f8
406 (p27) add r43=r43,r39 };;
407{ .mii; (p26) getf.sig r32=f43 // high
408 .pred.rel "mutex",p48,p52
409 (p48) add r38=r37,r33 // (p26)
410 (p52) add r38=r37,r33,1 } // (p26)
411{ .mfb; (p27) cmp.ltu.unc p56,p0=r43,r39
412 (p0) nop.f 0x0
413 (p0) nop.b 0x0 }
414{ .mii; (p26) ld8 r42=[r18],8
415 (p58) cmp.eq.or p57,p0=-1,r44
416 (p58) add r44=1,r44 }
417{ .mfb; (p29) st8 [r14]=r45,8
418 (p0) nop.f 0x0
419 br.ctop.sptk .L_bn_mul_add_words_ctop};;
420.L_bn_mul_add_words_cend:
421
422{ .mii; nop.m 0x0
423.pred.rel "mutex",p51,p55
424(p51) add r8=r36,r0
425(p55) add r8=r36,r0,1 }
426{ .mfb; nop.m 0x0
427 nop.f 0x0
428 nop.b 0x0 };;
429{ .mii;
430(p59) add r8=1,r8
431 mov pr=r9,-1
432 mov ar.lc=r3 }
433{ .mfb; rum 1<<5 // clear um.mfh
434 nop.f 0x0
435 br.ret.sptk.many b0 };;
436.endp bn_mul_add_words#
437#endif
438
439#if 1
440//
441// void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
442//
443.global bn_sqr_words#
444.proc bn_sqr_words#
445.align 64
446.skip 32 // makes the loop body aligned at 64-byte boundary
447bn_sqr_words:
448 .prologue
449 .fframe 0
450 .save ar.pfs,r2
451{ .mii; alloc r2=ar.pfs,3,0,0,0
452 sxt4 r34=r34 };;
453{ .mii; cmp.le p6,p0=r34,r0
454 mov r8=r0 } // return value
455{ .mfb; nop.f 0x0
456(p6) br.ret.spnt.many b0 };;
457
458 .save ar.lc,r3
459{ .mii; sub r10=r34,r0,1
460 mov r3=ar.lc
461 mov r9=pr };;
462
463 .body
464{ .mib;
465 mov pr.rot=1<<16
466 brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16
467 }
468{ .mii; add r34=8,r32
469 mov ar.lc=r10
470 mov ar.ec=18 };;
471
472// 2*(n+17) on Itanium, (n+17) on "wider" IA-64 implementations. It's
473// possible to compress the epilogue (I'm getting tired to write this
474// comment over and over) and get down to 2*n+16 at the cost of
475// scalability. The decision will very likely be reconsidered after the
476// benchmark program is profiled. I.e. if perfomance gain on Itanium
477// will appear larger than loss on "wider" IA-64, then the loop should
478// be explicitely split and the epilogue compressed.
479.L_bn_sqr_words_ctop:
480{ .mfi; (p16) ldf8 f32=[r33],8
481 (p25) xmpy.lu f42=f41,f41
482 (p0) nop.i 0x0 }
483{ .mib; (p33) stf8 [r32]=f50,16
484 (p0) nop.i 0x0
485 (p0) nop.b 0x0 }
486{ .mfi; (p0) nop.m 0x0
487 (p25) xmpy.hu f52=f41,f41
488 (p0) nop.i 0x0 }
489{ .mib; (p33) stf8 [r34]=f60,16
490 (p0) nop.i 0x0
491 br.ctop.sptk .L_bn_sqr_words_ctop };;
492.L_bn_sqr_words_cend:
493
494{ .mii; nop.m 0x0
495 mov pr=r9,-1
496 mov ar.lc=r3 }
497{ .mfb; rum 1<<5 // clear um.mfh
498 nop.f 0x0
499 br.ret.sptk.many b0 };;
500.endp bn_sqr_words#
501#endif
502
503#if 1
504// Apparently we win nothing by implementing special bn_sqr_comba8.
505// Yes, it is possible to reduce the number of multiplications by
506// almost factor of two, but then the amount of additions would
507// increase by factor of two (as we would have to perform those
508// otherwise performed by xma ourselves). Normally we would trade
509// anyway as multiplications are way more expensive, but not this
510// time... Multiplication kernel is fully pipelined and as we drain
511// one 128-bit multiplication result per clock cycle multiplications
512// are effectively as inexpensive as additions. Special implementation
513// might become of interest for "wider" IA-64 implementation as you'll
514// be able to get through the multiplication phase faster (there won't
515// be any stall issues as discussed in the commentary section below and
516// you therefore will be able to employ all 4 FP units)... But these
517// Itanium days it's simply too hard to justify the effort so I just
518// drop down to bn_mul_comba8 code:-)
519//
520// void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
521//
522.global bn_sqr_comba8#
523.proc bn_sqr_comba8#
524.align 64
525bn_sqr_comba8:
526 .prologue
527 .fframe 0
528 .save ar.pfs,r2
529{ .mii; alloc r2=ar.pfs,2,1,0,0
530 mov r34=r33
531 add r14=8,r33 };;
532 .body
533{ .mii; add r17=8,r34
534 add r15=16,r33
535 add r18=16,r34 }
536{ .mfb; add r16=24,r33
537 br .L_cheat_entry_point8 };;
538.endp bn_sqr_comba8#
539#endif
540
541#if 1
542// I've estimated this routine to run in ~120 ticks, but in reality
543// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
544// cycles consumed for instructions fetch? Or did I misinterpret some
545// clause in Itanium µ-architecture manual? Comments are welcomed and
546// highly appreciated.
547//
548// However! It should be noted that even 160 ticks is darn good result
549// as it's over 10 (yes, ten, spelled as t-e-n) times faster than the
550// C version (compiled with gcc with inline assembler). I really
551// kicked compiler's butt here, didn't I? Yeah! This brings us to the
552// following statement. It's damn shame that this routine isn't called
553// very often nowadays! According to the profiler most CPU time is
554// consumed by bn_mul_add_words called from BN_from_montgomery. In
555// order to estimate what we're missing, I've compared the performance
556// of this routine against "traditional" implementation, i.e. against
557// following routine:
558//
559// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
560// { r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
561// r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
562// r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
563// r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
564// r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
565// r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
566// r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
567// r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
568// }
569//
570// The one below is over 8 times faster than the one above:-( Even
571// more reasons to "combafy" bn_mul_add_mont...
572//
573// And yes, this routine really made me wish there were an optimizing
574// assembler! It also feels like it deserves a dedication.
575//
576// To my wife for being there and to my kids...
577//
578// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
579//
580#define carry1 r14
581#define carry2 r15
582#define carry3 r34
583.global bn_mul_comba8#
584.proc bn_mul_comba8#
585.align 64
586bn_mul_comba8:
587 .prologue
588 .fframe 0
589 .save ar.pfs,r2
590{ .mii; alloc r2=ar.pfs,3,0,0,0
591 add r14=8,r33
592 add r17=8,r34 }
593 .body
594{ .mii; add r15=16,r33
595 add r18=16,r34
596 add r16=24,r33 }
597.L_cheat_entry_point8:
598{ .mmi; add r19=24,r34
599
600 ldf8 f32=[r33],32 };;
601
602{ .mmi; ldf8 f120=[r34],32
603 ldf8 f121=[r17],32 }
604{ .mmi; ldf8 f122=[r18],32
605 ldf8 f123=[r19],32 };;
606{ .mmi; ldf8 f124=[r34]
607 ldf8 f125=[r17] }
608{ .mmi; ldf8 f126=[r18]
609 ldf8 f127=[r19] }
610
611{ .mmi; ldf8 f33=[r14],32
612 ldf8 f34=[r15],32 }
613{ .mmi; ldf8 f35=[r16],32;;
614 ldf8 f36=[r33] }
615{ .mmi; ldf8 f37=[r14]
616 ldf8 f38=[r15] }
617{ .mfi; ldf8 f39=[r16]
618// -------\ Entering multiplier's heaven /-------
619// ------------\ /------------
620// -----------------\ /-----------------
621// ----------------------\/----------------------
622 xma.hu f41=f32,f120,f0 }
623{ .mfi; xma.lu f40=f32,f120,f0 };; // (*)
624{ .mfi; xma.hu f51=f32,f121,f0 }
625{ .mfi; xma.lu f50=f32,f121,f0 };;
626{ .mfi; xma.hu f61=f32,f122,f0 }
627{ .mfi; xma.lu f60=f32,f122,f0 };;
628{ .mfi; xma.hu f71=f32,f123,f0 }
629{ .mfi; xma.lu f70=f32,f123,f0 };;
630{ .mfi; xma.hu f81=f32,f124,f0 }
631{ .mfi; xma.lu f80=f32,f124,f0 };;
632{ .mfi; xma.hu f91=f32,f125,f0 }
633{ .mfi; xma.lu f90=f32,f125,f0 };;
634{ .mfi; xma.hu f101=f32,f126,f0 }
635{ .mfi; xma.lu f100=f32,f126,f0 };;
636{ .mfi; xma.hu f111=f32,f127,f0 }
637{ .mfi; xma.lu f110=f32,f127,f0 };;//
638// (*) You can argue that splitting at every second bundle would
639// prevent "wider" IA-64 implementations from achieving the peak
640// performance. Well, not really... The catch is that if you
641// intend to keep 4 FP units busy by splitting at every fourth
642// bundle and thus perform these 16 multiplications in 4 ticks,
643// the first bundle *below* would stall because the result from
644// the first xma bundle *above* won't be available for another 3
645// ticks (if not more, being an optimist, I assume that "wider"
646// implementation will have same latency:-). This stall will hold
647// you back and the performance would be as if every second bundle
648// were split *anyway*...
649{ .mfi; getf.sig r16=f40
650 xma.hu f42=f33,f120,f41
651 add r33=8,r32 }
652{ .mfi; xma.lu f41=f33,f120,f41 };;
653{ .mfi; getf.sig r24=f50
654 xma.hu f52=f33,f121,f51 }
655{ .mfi; xma.lu f51=f33,f121,f51 };;
656{ .mfi; st8 [r32]=r16,16
657 xma.hu f62=f33,f122,f61 }
658{ .mfi; xma.lu f61=f33,f122,f61 };;
659{ .mfi; xma.hu f72=f33,f123,f71 }
660{ .mfi; xma.lu f71=f33,f123,f71 };;
661{ .mfi; xma.hu f82=f33,f124,f81 }
662{ .mfi; xma.lu f81=f33,f124,f81 };;
663{ .mfi; xma.hu f92=f33,f125,f91 }
664{ .mfi; xma.lu f91=f33,f125,f91 };;
665{ .mfi; xma.hu f102=f33,f126,f101 }
666{ .mfi; xma.lu f101=f33,f126,f101 };;
667{ .mfi; xma.hu f112=f33,f127,f111 }
668{ .mfi; xma.lu f111=f33,f127,f111 };;//
669//-------------------------------------------------//
670{ .mfi; getf.sig r25=f41
671 xma.hu f43=f34,f120,f42 }
672{ .mfi; xma.lu f42=f34,f120,f42 };;
673{ .mfi; getf.sig r16=f60
674 xma.hu f53=f34,f121,f52 }
675{ .mfi; xma.lu f52=f34,f121,f52 };;
676{ .mfi; getf.sig r17=f51
677 xma.hu f63=f34,f122,f62
678 add r25=r25,r24 }
679{ .mfi; xma.lu f62=f34,f122,f62
680 mov carry1=0 };;
681{ .mfi; cmp.ltu p6,p0=r25,r24
682 xma.hu f73=f34,f123,f72 }
683{ .mfi; xma.lu f72=f34,f123,f72 };;
684{ .mfi; st8 [r33]=r25,16
685 xma.hu f83=f34,f124,f82
686(p6) add carry1=1,carry1 }
687{ .mfi; xma.lu f82=f34,f124,f82 };;
688{ .mfi; xma.hu f93=f34,f125,f92 }
689{ .mfi; xma.lu f92=f34,f125,f92 };;
690{ .mfi; xma.hu f103=f34,f126,f102 }
691{ .mfi; xma.lu f102=f34,f126,f102 };;
692{ .mfi; xma.hu f113=f34,f127,f112 }
693{ .mfi; xma.lu f112=f34,f127,f112 };;//
694//-------------------------------------------------//
695{ .mfi; getf.sig r18=f42
696 xma.hu f44=f35,f120,f43
697 add r17=r17,r16 }
698{ .mfi; xma.lu f43=f35,f120,f43 };;
699{ .mfi; getf.sig r24=f70
700 xma.hu f54=f35,f121,f53 }
701{ .mfi; mov carry2=0
702 xma.lu f53=f35,f121,f53 };;
703{ .mfi; getf.sig r25=f61
704 xma.hu f64=f35,f122,f63
705 cmp.ltu p7,p0=r17,r16 }
706{ .mfi; add r18=r18,r17
707 xma.lu f63=f35,f122,f63 };;
708{ .mfi; getf.sig r26=f52
709 xma.hu f74=f35,f123,f73
710(p7) add carry2=1,carry2 }
711{ .mfi; cmp.ltu p7,p0=r18,r17
712 xma.lu f73=f35,f123,f73
713 add r18=r18,carry1 };;
714{ .mfi;
715 xma.hu f84=f35,f124,f83
716(p7) add carry2=1,carry2 }
717{ .mfi; cmp.ltu p7,p0=r18,carry1
718 xma.lu f83=f35,f124,f83 };;
719{ .mfi; st8 [r32]=r18,16
720 xma.hu f94=f35,f125,f93
721(p7) add carry2=1,carry2 }
722{ .mfi; xma.lu f93=f35,f125,f93 };;
723{ .mfi; xma.hu f104=f35,f126,f103 }
724{ .mfi; xma.lu f103=f35,f126,f103 };;
725{ .mfi; xma.hu f114=f35,f127,f113 }
726{ .mfi; mov carry1=0
727 xma.lu f113=f35,f127,f113
728 add r25=r25,r24 };;//
729//-------------------------------------------------//
730{ .mfi; getf.sig r27=f43
731 xma.hu f45=f36,f120,f44
732 cmp.ltu p6,p0=r25,r24 }
733{ .mfi; xma.lu f44=f36,f120,f44
734 add r26=r26,r25 };;
735{ .mfi; getf.sig r16=f80
736 xma.hu f55=f36,f121,f54
737(p6) add carry1=1,carry1 }
738{ .mfi; xma.lu f54=f36,f121,f54 };;
739{ .mfi; getf.sig r17=f71
740 xma.hu f65=f36,f122,f64
741 cmp.ltu p6,p0=r26,r25 }
742{ .mfi; xma.lu f64=f36,f122,f64
743 add r27=r27,r26 };;
744{ .mfi; getf.sig r18=f62
745 xma.hu f75=f36,f123,f74
746(p6) add carry1=1,carry1 }
747{ .mfi; cmp.ltu p6,p0=r27,r26
748 xma.lu f74=f36,f123,f74
749 add r27=r27,carry2 };;
750{ .mfi; getf.sig r19=f53
751 xma.hu f85=f36,f124,f84
752(p6) add carry1=1,carry1 }
753{ .mfi; xma.lu f84=f36,f124,f84
754 cmp.ltu p6,p0=r27,carry2 };;
755{ .mfi; st8 [r33]=r27,16
756 xma.hu f95=f36,f125,f94
757(p6) add carry1=1,carry1 }
758{ .mfi; xma.lu f94=f36,f125,f94 };;
759{ .mfi; xma.hu f105=f36,f126,f104 }
760{ .mfi; mov carry2=0
761 xma.lu f104=f36,f126,f104
762 add r17=r17,r16 };;
763{ .mfi; xma.hu f115=f36,f127,f114
764 cmp.ltu p7,p0=r17,r16 }
765{ .mfi; xma.lu f114=f36,f127,f114
766 add r18=r18,r17 };;//
767//-------------------------------------------------//
768{ .mfi; getf.sig r20=f44
769 xma.hu f46=f37,f120,f45
770(p7) add carry2=1,carry2 }
771{ .mfi; cmp.ltu p7,p0=r18,r17
772 xma.lu f45=f37,f120,f45
773 add r19=r19,r18 };;
774{ .mfi; getf.sig r24=f90
775 xma.hu f56=f37,f121,f55 }
776{ .mfi; xma.lu f55=f37,f121,f55 };;
777{ .mfi; getf.sig r25=f81
778 xma.hu f66=f37,f122,f65
779(p7) add carry2=1,carry2 }
780{ .mfi; cmp.ltu p7,p0=r19,r18
781 xma.lu f65=f37,f122,f65
782 add r20=r20,r19 };;
783{ .mfi; getf.sig r26=f72
784 xma.hu f76=f37,f123,f75
785(p7) add carry2=1,carry2 }
786{ .mfi; cmp.ltu p7,p0=r20,r19
787 xma.lu f75=f37,f123,f75
788 add r20=r20,carry1 };;
789{ .mfi; getf.sig r27=f63
790 xma.hu f86=f37,f124,f85
791(p7) add carry2=1,carry2 }
792{ .mfi; xma.lu f85=f37,f124,f85
793 cmp.ltu p7,p0=r20,carry1 };;
794{ .mfi; getf.sig r28=f54
795 xma.hu f96=f37,f125,f95
796(p7) add carry2=1,carry2 }
797{ .mfi; st8 [r32]=r20,16
798 xma.lu f95=f37,f125,f95 };;
799{ .mfi; xma.hu f106=f37,f126,f105 }
800{ .mfi; mov carry1=0
801 xma.lu f105=f37,f126,f105
802 add r25=r25,r24 };;
803{ .mfi; xma.hu f116=f37,f127,f115
804 cmp.ltu p6,p0=r25,r24 }
805{ .mfi; xma.lu f115=f37,f127,f115
806 add r26=r26,r25 };;//
807//-------------------------------------------------//
808{ .mfi; getf.sig r29=f45
809 xma.hu f47=f38,f120,f46
810(p6) add carry1=1,carry1 }
811{ .mfi; cmp.ltu p6,p0=r26,r25
812 xma.lu f46=f38,f120,f46
813 add r27=r27,r26 };;
814{ .mfi; getf.sig r16=f100
815 xma.hu f57=f38,f121,f56
816(p6) add carry1=1,carry1 }
817{ .mfi; cmp.ltu p6,p0=r27,r26
818 xma.lu f56=f38,f121,f56
819 add r28=r28,r27 };;
820{ .mfi; getf.sig r17=f91
821 xma.hu f67=f38,f122,f66
822(p6) add carry1=1,carry1 }
823{ .mfi; cmp.ltu p6,p0=r28,r27
824 xma.lu f66=f38,f122,f66
825 add r29=r29,r28 };;
826{ .mfi; getf.sig r18=f82
827 xma.hu f77=f38,f123,f76
828(p6) add carry1=1,carry1 }
829{ .mfi; cmp.ltu p6,p0=r29,r28
830 xma.lu f76=f38,f123,f76
831 add r29=r29,carry2 };;
832{ .mfi; getf.sig r19=f73
833 xma.hu f87=f38,f124,f86
834(p6) add carry1=1,carry1 }
835{ .mfi; xma.lu f86=f38,f124,f86
836 cmp.ltu p6,p0=r29,carry2 };;
837{ .mfi; getf.sig r20=f64
838 xma.hu f97=f38,f125,f96
839(p6) add carry1=1,carry1 }
840{ .mfi; st8 [r33]=r29,16
841 xma.lu f96=f38,f125,f96 };;
842{ .mfi; getf.sig r21=f55
843 xma.hu f107=f38,f126,f106 }
844{ .mfi; mov carry2=0
845 xma.lu f106=f38,f126,f106
846 add r17=r17,r16 };;
847{ .mfi; xma.hu f117=f38,f127,f116
848 cmp.ltu p7,p0=r17,r16 }
849{ .mfi; xma.lu f116=f38,f127,f116
850 add r18=r18,r17 };;//
851//-------------------------------------------------//
852{ .mfi; getf.sig r22=f46
853 xma.hu f48=f39,f120,f47
854(p7) add carry2=1,carry2 }
855{ .mfi; cmp.ltu p7,p0=r18,r17
856 xma.lu f47=f39,f120,f47
857 add r19=r19,r18 };;
858{ .mfi; getf.sig r24=f110
859 xma.hu f58=f39,f121,f57
860(p7) add carry2=1,carry2 }
861{ .mfi; cmp.ltu p7,p0=r19,r18
862 xma.lu f57=f39,f121,f57
863 add r20=r20,r19 };;
864{ .mfi; getf.sig r25=f101
865 xma.hu f68=f39,f122,f67
866(p7) add carry2=1,carry2 }
867{ .mfi; cmp.ltu p7,p0=r20,r19
868 xma.lu f67=f39,f122,f67
869 add r21=r21,r20 };;
870{ .mfi; getf.sig r26=f92
871 xma.hu f78=f39,f123,f77
872(p7) add carry2=1,carry2 }
873{ .mfi; cmp.ltu p7,p0=r21,r20
874 xma.lu f77=f39,f123,f77
875 add r22=r22,r21 };;
876{ .mfi; getf.sig r27=f83
877 xma.hu f88=f39,f124,f87
878(p7) add carry2=1,carry2 }
879{ .mfi; cmp.ltu p7,p0=r22,r21
880 xma.lu f87=f39,f124,f87
881 add r22=r22,carry1 };;
882{ .mfi; getf.sig r28=f74
883 xma.hu f98=f39,f125,f97
884(p7) add carry2=1,carry2 }
885{ .mfi; xma.lu f97=f39,f125,f97
886 cmp.ltu p7,p0=r22,carry1 };;
887{ .mfi; getf.sig r29=f65
888 xma.hu f108=f39,f126,f107
889(p7) add carry2=1,carry2 }
890{ .mfi; st8 [r32]=r22,16
891 xma.lu f107=f39,f126,f107 };;
892{ .mfi; getf.sig r30=f56
893 xma.hu f118=f39,f127,f117 }
894{ .mfi; xma.lu f117=f39,f127,f117 };;//
895//-------------------------------------------------//
896// Leaving muliplier's heaven... Quite a ride, huh?
897
898{ .mii; getf.sig r31=f47
899 add r25=r25,r24
900 mov carry1=0 };;
901{ .mii; getf.sig r16=f111
902 cmp.ltu p6,p0=r25,r24
903 add r26=r26,r25 };;
904{ .mfb; getf.sig r17=f102 }
905{ .mii;
906(p6) add carry1=1,carry1
907 cmp.ltu p6,p0=r26,r25
908 add r27=r27,r26 };;
909{ .mfb; nop.m 0x0 }
910{ .mii;
911(p6) add carry1=1,carry1
912 cmp.ltu p6,p0=r27,r26
913 add r28=r28,r27 };;
914{ .mii; getf.sig r18=f93
915 add r17=r17,r16
916 mov carry3=0 }
917{ .mii;
918(p6) add carry1=1,carry1
919 cmp.ltu p6,p0=r28,r27
920 add r29=r29,r28 };;
921{ .mii; getf.sig r19=f84
922 cmp.ltu p7,p0=r17,r16 }
923{ .mii;
924(p6) add carry1=1,carry1
925 cmp.ltu p6,p0=r29,r28
926 add r30=r30,r29 };;
927{ .mii; getf.sig r20=f75
928 add r18=r18,r17 }
929{ .mii;
930(p6) add carry1=1,carry1
931 cmp.ltu p6,p0=r30,r29
932 add r31=r31,r30 };;
933{ .mfb; getf.sig r21=f66 }
934{ .mii; (p7) add carry3=1,carry3
935 cmp.ltu p7,p0=r18,r17
936 add r19=r19,r18 }
937{ .mfb; nop.m 0x0 }
938{ .mii;
939(p6) add carry1=1,carry1
940 cmp.ltu p6,p0=r31,r30
941 add r31=r31,carry2 };;
942{ .mfb; getf.sig r22=f57 }
943{ .mii; (p7) add carry3=1,carry3
944 cmp.ltu p7,p0=r19,r18
945 add r20=r20,r19 }
946{ .mfb; nop.m 0x0 }
947{ .mii;
948(p6) add carry1=1,carry1
949 cmp.ltu p6,p0=r31,carry2 };;
950{ .mfb; getf.sig r23=f48 }
951{ .mii; (p7) add carry3=1,carry3
952 cmp.ltu p7,p0=r20,r19
953 add r21=r21,r20 }
954{ .mii;
955(p6) add carry1=1,carry1 }
956{ .mfb; st8 [r33]=r31,16 };;
957
958{ .mfb; getf.sig r24=f112 }
959{ .mii; (p7) add carry3=1,carry3
960 cmp.ltu p7,p0=r21,r20
961 add r22=r22,r21 };;
962{ .mfb; getf.sig r25=f103 }
963{ .mii; (p7) add carry3=1,carry3
964 cmp.ltu p7,p0=r22,r21
965 add r23=r23,r22 };;
966{ .mfb; getf.sig r26=f94 }
967{ .mii; (p7) add carry3=1,carry3
968 cmp.ltu p7,p0=r23,r22
969 add r23=r23,carry1 };;
970{ .mfb; getf.sig r27=f85 }
971{ .mii; (p7) add carry3=1,carry3
972 cmp.ltu p7,p8=r23,carry1};;
973{ .mii; getf.sig r28=f76
974 add r25=r25,r24
975 mov carry1=0 }
976{ .mii; st8 [r32]=r23,16
977 (p7) add carry2=1,carry3
978 (p8) add carry2=0,carry3 };;
979
980{ .mfb; nop.m 0x0 }
981{ .mii; getf.sig r29=f67
982 cmp.ltu p6,p0=r25,r24
983 add r26=r26,r25 };;
984{ .mfb; getf.sig r30=f58 }
985{ .mii;
986(p6) add carry1=1,carry1
987 cmp.ltu p6,p0=r26,r25
988 add r27=r27,r26 };;
989{ .mfb; getf.sig r16=f113 }
990{ .mii;
991(p6) add carry1=1,carry1
992 cmp.ltu p6,p0=r27,r26
993 add r28=r28,r27 };;
994{ .mfb; getf.sig r17=f104 }
995{ .mii;
996(p6) add carry1=1,carry1
997 cmp.ltu p6,p0=r28,r27
998 add r29=r29,r28 };;
999{ .mfb; getf.sig r18=f95 }
1000{ .mii;
1001(p6) add carry1=1,carry1
1002 cmp.ltu p6,p0=r29,r28
1003 add r30=r30,r29 };;
1004{ .mii; getf.sig r19=f86
1005 add r17=r17,r16
1006 mov carry3=0 }
1007{ .mii;
1008(p6) add carry1=1,carry1
1009 cmp.ltu p6,p0=r30,r29
1010 add r30=r30,carry2 };;
1011{ .mii; getf.sig r20=f77
1012 cmp.ltu p7,p0=r17,r16
1013 add r18=r18,r17 }
1014{ .mii;
1015(p6) add carry1=1,carry1
1016 cmp.ltu p6,p0=r30,carry2 };;
1017{ .mfb; getf.sig r21=f68 }
1018{ .mii; st8 [r33]=r30,16
1019(p6) add carry1=1,carry1 };;
1020
1021{ .mfb; getf.sig r24=f114 }
1022{ .mii; (p7) add carry3=1,carry3
1023 cmp.ltu p7,p0=r18,r17
1024 add r19=r19,r18 };;
1025{ .mfb; getf.sig r25=f105 }
1026{ .mii; (p7) add carry3=1,carry3
1027 cmp.ltu p7,p0=r19,r18
1028 add r20=r20,r19 };;
1029{ .mfb; getf.sig r26=f96 }
1030{ .mii; (p7) add carry3=1,carry3
1031 cmp.ltu p7,p0=r20,r19
1032 add r21=r21,r20 };;
1033{ .mfb; getf.sig r27=f87 }
1034{ .mii; (p7) add carry3=1,carry3
1035 cmp.ltu p7,p0=r21,r20
1036 add r21=r21,carry1 };;
1037{ .mib; getf.sig r28=f78
1038 add r25=r25,r24 }
1039{ .mib; (p7) add carry3=1,carry3
1040 cmp.ltu p7,p8=r21,carry1};;
1041{ .mii; st8 [r32]=r21,16
1042 (p7) add carry2=1,carry3
1043 (p8) add carry2=0,carry3 }
1044
1045{ .mii; mov carry1=0
1046 cmp.ltu p6,p0=r25,r24
1047 add r26=r26,r25 };;
1048{ .mfb; getf.sig r16=f115 }
1049{ .mii;
1050(p6) add carry1=1,carry1
1051 cmp.ltu p6,p0=r26,r25
1052 add r27=r27,r26 };;
1053{ .mfb; getf.sig r17=f106 }
1054{ .mii;
1055(p6) add carry1=1,carry1
1056 cmp.ltu p6,p0=r27,r26
1057 add r28=r28,r27 };;
1058{ .mfb; getf.sig r18=f97 }
1059{ .mii;
1060(p6) add carry1=1,carry1
1061 cmp.ltu p6,p0=r28,r27
1062 add r28=r28,carry2 };;
1063{ .mib; getf.sig r19=f88
1064 add r17=r17,r16 }
1065{ .mib;
1066(p6) add carry1=1,carry1
1067 cmp.ltu p6,p0=r28,carry2 };;
1068{ .mii; st8 [r33]=r28,16
1069(p6) add carry1=1,carry1 }
1070
1071{ .mii; mov carry2=0
1072 cmp.ltu p7,p0=r17,r16
1073 add r18=r18,r17 };;
1074{ .mfb; getf.sig r24=f116 }
1075{ .mii; (p7) add carry2=1,carry2
1076 cmp.ltu p7,p0=r18,r17
1077 add r19=r19,r18 };;
1078{ .mfb; getf.sig r25=f107 }
1079{ .mii; (p7) add carry2=1,carry2
1080 cmp.ltu p7,p0=r19,r18
1081 add r19=r19,carry1 };;
1082{ .mfb; getf.sig r26=f98 }
1083{ .mii; (p7) add carry2=1,carry2
1084 cmp.ltu p7,p0=r19,carry1};;
1085{ .mii; st8 [r32]=r19,16
1086 (p7) add carry2=1,carry2 }
1087
1088{ .mfb; add r25=r25,r24 };;
1089
1090{ .mfb; getf.sig r16=f117 }
1091{ .mii; mov carry1=0
1092 cmp.ltu p6,p0=r25,r24
1093 add r26=r26,r25 };;
1094{ .mfb; getf.sig r17=f108 }
1095{ .mii;
1096(p6) add carry1=1,carry1
1097 cmp.ltu p6,p0=r26,r25
1098 add r26=r26,carry2 };;
1099{ .mfb; nop.m 0x0 }
1100{ .mii;
1101(p6) add carry1=1,carry1
1102 cmp.ltu p6,p0=r26,carry2 };;
1103{ .mii; st8 [r33]=r26,16
1104(p6) add carry1=1,carry1 }
1105
1106{ .mfb; add r17=r17,r16 };;
1107{ .mfb; getf.sig r24=f118 }
1108{ .mii; mov carry2=0
1109 cmp.ltu p7,p0=r17,r16
1110 add r17=r17,carry1 };;
1111{ .mii; (p7) add carry2=1,carry2
1112 cmp.ltu p7,p0=r17,carry1};;
1113{ .mii; st8 [r32]=r17
1114 (p7) add carry2=1,carry2 };;
1115{ .mfb; add r24=r24,carry2 };;
1116{ .mib; st8 [r33]=r24 }
1117
1118{ .mib; rum 1<<5 // clear um.mfh
1119 br.ret.sptk.many b0 };;
1120.endp bn_mul_comba8#
1121#undef carry3
1122#undef carry2
1123#undef carry1
1124#endif
1125
1126#if 1
1127// It's possible to make it faster (see comment to bn_sqr_comba8), but
1128// I reckon it doesn't worth the effort. Basically because the routine
1129// (actually both of them) practically never called... So I just play
1130// same trick as with bn_sqr_comba8.
1131//
1132// void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1133//
1134.global bn_sqr_comba4#
1135.proc bn_sqr_comba4#
1136.align 64
1137bn_sqr_comba4:
1138 .prologue
1139 .fframe 0
1140 .save ar.pfs,r2
1141{ .mii; alloc r2=ar.pfs,2,1,0,0
1142 mov r34=r33
1143 add r14=8,r33 };;
1144 .body
1145{ .mii; add r17=8,r34
1146 add r15=16,r33
1147 add r18=16,r34 }
1148{ .mfb; add r16=24,r33
1149 br .L_cheat_entry_point4 };;
1150.endp bn_sqr_comba4#
1151#endif
1152
1153#if 1
1154// Runs in ~115 cycles and ~4.5 times faster than C. Well, whatever...
1155//
1156// void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1157//
1158#define carry1 r14
1159#define carry2 r15
1160.global bn_mul_comba4#
1161.proc bn_mul_comba4#
1162.align 64
1163bn_mul_comba4:
1164 .prologue
1165 .fframe 0
1166 .save ar.pfs,r2
1167{ .mii; alloc r2=ar.pfs,3,0,0,0
1168 add r14=8,r33
1169 add r17=8,r34 }
1170 .body
1171{ .mii; add r15=16,r33
1172 add r18=16,r34
1173 add r16=24,r33 };;
1174.L_cheat_entry_point4:
1175{ .mmi; add r19=24,r34
1176
1177 ldf8 f32=[r33] }
1178
1179{ .mmi; ldf8 f120=[r34]
1180 ldf8 f121=[r17] };;
1181{ .mmi; ldf8 f122=[r18]
1182 ldf8 f123=[r19] }
1183
1184{ .mmi; ldf8 f33=[r14]
1185 ldf8 f34=[r15] }
1186{ .mfi; ldf8 f35=[r16]
1187
1188 xma.hu f41=f32,f120,f0 }
1189{ .mfi; xma.lu f40=f32,f120,f0 };;
1190{ .mfi; xma.hu f51=f32,f121,f0 }
1191{ .mfi; xma.lu f50=f32,f121,f0 };;
1192{ .mfi; xma.hu f61=f32,f122,f0 }
1193{ .mfi; xma.lu f60=f32,f122,f0 };;
1194{ .mfi; xma.hu f71=f32,f123,f0 }
1195{ .mfi; xma.lu f70=f32,f123,f0 };;//
1196// Major stall takes place here, and 3 more places below. Result from
1197// first xma is not available for another 3 ticks.
1198{ .mfi; getf.sig r16=f40
1199 xma.hu f42=f33,f120,f41
1200 add r33=8,r32 }
1201{ .mfi; xma.lu f41=f33,f120,f41 };;
1202{ .mfi; getf.sig r24=f50
1203 xma.hu f52=f33,f121,f51 }
1204{ .mfi; xma.lu f51=f33,f121,f51 };;
1205{ .mfi; st8 [r32]=r16,16
1206 xma.hu f62=f33,f122,f61 }
1207{ .mfi; xma.lu f61=f33,f122,f61 };;
1208{ .mfi; xma.hu f72=f33,f123,f71 }
1209{ .mfi; xma.lu f71=f33,f123,f71 };;//
1210//-------------------------------------------------//
1211{ .mfi; getf.sig r25=f41
1212 xma.hu f43=f34,f120,f42 }
1213{ .mfi; xma.lu f42=f34,f120,f42 };;
1214{ .mfi; getf.sig r16=f60
1215 xma.hu f53=f34,f121,f52 }
1216{ .mfi; xma.lu f52=f34,f121,f52 };;
1217{ .mfi; getf.sig r17=f51
1218 xma.hu f63=f34,f122,f62
1219 add r25=r25,r24 }
1220{ .mfi; mov carry1=0
1221 xma.lu f62=f34,f122,f62 };;
1222{ .mfi; st8 [r33]=r25,16
1223 xma.hu f73=f34,f123,f72
1224 cmp.ltu p6,p0=r25,r24 }
1225{ .mfi; xma.lu f72=f34,f123,f72 };;//
1226//-------------------------------------------------//
1227{ .mfi; getf.sig r18=f42
1228 xma.hu f44=f35,f120,f43
1229(p6) add carry1=1,carry1 }
1230{ .mfi; add r17=r17,r16
1231 xma.lu f43=f35,f120,f43
1232 mov carry2=0 };;
1233{ .mfi; getf.sig r24=f70
1234 xma.hu f54=f35,f121,f53
1235 cmp.ltu p7,p0=r17,r16 }
1236{ .mfi; xma.lu f53=f35,f121,f53 };;
1237{ .mfi; getf.sig r25=f61
1238 xma.hu f64=f35,f122,f63
1239 add r18=r18,r17 }
1240{ .mfi; xma.lu f63=f35,f122,f63
1241(p7) add carry2=1,carry2 };;
1242{ .mfi; getf.sig r26=f52
1243 xma.hu f74=f35,f123,f73
1244 cmp.ltu p7,p0=r18,r17 }
1245{ .mfi; xma.lu f73=f35,f123,f73
1246 add r18=r18,carry1 };;
1247//-------------------------------------------------//
1248{ .mii; st8 [r32]=r18,16
1249(p7) add carry2=1,carry2
1250 cmp.ltu p7,p0=r18,carry1 };;
1251
1252{ .mfi; getf.sig r27=f43 // last major stall
1253(p7) add carry2=1,carry2 };;
1254{ .mii; getf.sig r16=f71
1255 add r25=r25,r24
1256 mov carry1=0 };;
1257{ .mii; getf.sig r17=f62
1258 cmp.ltu p6,p0=r25,r24
1259 add r26=r26,r25 };;
1260{ .mii;
1261(p6) add carry1=1,carry1
1262 cmp.ltu p6,p0=r26,r25
1263 add r27=r27,r26 };;
1264{ .mii;
1265(p6) add carry1=1,carry1
1266 cmp.ltu p6,p0=r27,r26
1267 add r27=r27,carry2 };;
1268{ .mii; getf.sig r18=f53
1269(p6) add carry1=1,carry1
1270 cmp.ltu p6,p0=r27,carry2 };;
1271{ .mfi; st8 [r33]=r27,16
1272(p6) add carry1=1,carry1 }
1273
1274{ .mii; getf.sig r19=f44
1275 add r17=r17,r16
1276 mov carry2=0 };;
1277{ .mii; getf.sig r24=f72
1278 cmp.ltu p7,p0=r17,r16
1279 add r18=r18,r17 };;
1280{ .mii; (p7) add carry2=1,carry2
1281 cmp.ltu p7,p0=r18,r17
1282 add r19=r19,r18 };;
1283{ .mii; (p7) add carry2=1,carry2
1284 cmp.ltu p7,p0=r19,r18
1285 add r19=r19,carry1 };;
1286{ .mii; getf.sig r25=f63
1287 (p7) add carry2=1,carry2
1288 cmp.ltu p7,p0=r19,carry1};;
1289{ .mii; st8 [r32]=r19,16
1290 (p7) add carry2=1,carry2 }
1291
1292{ .mii; getf.sig r26=f54
1293 add r25=r25,r24
1294 mov carry1=0 };;
1295{ .mii; getf.sig r16=f73
1296 cmp.ltu p6,p0=r25,r24
1297 add r26=r26,r25 };;
1298{ .mii;
1299(p6) add carry1=1,carry1
1300 cmp.ltu p6,p0=r26,r25
1301 add r26=r26,carry2 };;
1302{ .mii; getf.sig r17=f64
1303(p6) add carry1=1,carry1
1304 cmp.ltu p6,p0=r26,carry2 };;
1305{ .mii; st8 [r33]=r26,16
1306(p6) add carry1=1,carry1 }
1307
1308{ .mii; getf.sig r24=f74
1309 add r17=r17,r16
1310 mov carry2=0 };;
1311{ .mii; cmp.ltu p7,p0=r17,r16
1312 add r17=r17,carry1 };;
1313
1314{ .mii; (p7) add carry2=1,carry2
1315 cmp.ltu p7,p0=r17,carry1};;
1316{ .mii; st8 [r32]=r17,16
1317 (p7) add carry2=1,carry2 };;
1318
1319{ .mii; add r24=r24,carry2 };;
1320{ .mii; st8 [r33]=r24 }
1321
1322{ .mib; rum 1<<5 // clear um.mfh
1323 br.ret.sptk.many b0 };;
1324.endp bn_mul_comba4#
1325#undef carry2
1326#undef carry1
1327#endif
1328
1329#if 1
1330//
1331// BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
1332//
1333// In the nutshell it's a port of my MIPS III/IV implementation.
1334//
1335#define AT r14
1336#define H r16
1337#define HH r20
1338#define L r17
1339#define D r18
1340#define DH r22
1341#define I r21
1342
1343#if 0
1344// Some preprocessors (most notably HP-UX) apper to be allergic to
1345// macros enclosed to parenthesis as these three will be.
1346#define cont p16
1347#define break p0 // p20
1348#define equ p24
1349#else
1350cont=p16
1351break=p0
1352equ=p24
1353#endif
1354
1355.global abort#
1356.global bn_div_words#
1357.proc bn_div_words#
1358.align 64
1359bn_div_words:
1360 .prologue
1361 .fframe 0
1362 .save ar.pfs,r2
1363 .save b0,r3
1364{ .mii; alloc r2=ar.pfs,3,5,0,8
1365 mov r3=b0
1366 mov r10=pr };;
1367{ .mmb; cmp.eq p6,p0=r34,r0
1368 mov r8=-1
1369(p6) br.ret.spnt.many b0 };;
1370
1371 .body
1372{ .mii; mov H=r32 // save h
1373 mov ar.ec=0 // don't rotate at exit
1374 mov pr.rot=0 }
1375{ .mii; mov L=r33 // save l
1376 mov r36=r0 };;
1377
1378.L_divw_shift: // -vv- note signed comparison
1379{ .mfi; (p0) cmp.lt p16,p0=r0,r34 // d
1380 (p0) shladd r33=r34,1,r0 }
1381{ .mfb; (p0) add r35=1,r36
1382 (p0) nop.f 0x0
1383(p16) br.wtop.dpnt .L_divw_shift };;
1384
1385{ .mii; mov D=r34
1386 shr.u DH=r34,32
1387 sub r35=64,r36 };;
1388{ .mii; setf.sig f7=DH
1389 shr.u AT=H,r35
1390 mov I=r36 };;
1391{ .mib; cmp.ne p6,p0=r0,AT
1392 shl H=H,r36
1393(p6) br.call.spnt.clr b0=abort };; // overflow, die...
1394
1395{ .mfi; fcvt.xuf.s1 f7=f7
1396 shr.u AT=L,r35 };;
1397{ .mii; shl L=L,r36
1398 or H=H,AT };;
1399
1400{ .mii; nop.m 0x0
1401 cmp.leu p6,p0=D,H;;
1402(p6) sub H=H,D }
1403
1404{ .mlx; setf.sig f14=D
1405 movl AT=0xffffffff };;
1406///////////////////////////////////////////////////////////
1407{ .mii; setf.sig f6=H
1408 shr.u HH=H,32;;
1409 cmp.eq p6,p7=HH,DH };;
1410{ .mfb;
1411(p6) setf.sig f8=AT
1412(p7) fcvt.xuf.s1 f6=f6
1413(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1414
1415{ .mfi; getf.sig r33=f8 // q
1416 xmpy.lu f9=f8,f14 }
1417{ .mfi; xmpy.hu f10=f8,f14
1418 shrp H=H,L,32 };;
1419
1420{ .mmi; getf.sig r35=f9 // tl
1421 getf.sig r31=f10 };; // th
1422
1423.L_divw_1st_iter:
1424{ .mii; (p0) add r32=-1,r33
1425 (p0) cmp.eq equ,cont=HH,r31 };;
1426{ .mii; (p0) cmp.ltu p8,p0=r35,D
1427 (p0) sub r34=r35,D
1428 (equ) cmp.leu break,cont=r35,H };;
1429{ .mib; (cont) cmp.leu cont,break=HH,r31
1430 (p8) add r31=-1,r31
1431(cont) br.wtop.spnt .L_divw_1st_iter };;
1432///////////////////////////////////////////////////////////
1433{ .mii; sub H=H,r35
1434 shl r8=r33,32
1435 shl L=L,32 };;
1436///////////////////////////////////////////////////////////
1437{ .mii; setf.sig f6=H
1438 shr.u HH=H,32;;
1439 cmp.eq p6,p7=HH,DH };;
1440{ .mfb;
1441(p6) setf.sig f8=AT
1442(p7) fcvt.xuf.s1 f6=f6
1443(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1444
1445{ .mfi; getf.sig r33=f8 // q
1446 xmpy.lu f9=f8,f14 }
1447{ .mfi; xmpy.hu f10=f8,f14
1448 shrp H=H,L,32 };;
1449
1450{ .mmi; getf.sig r35=f9 // tl
1451 getf.sig r31=f10 };; // th
1452
1453.L_divw_2nd_iter:
1454{ .mii; (p0) add r32=-1,r33
1455 (p0) cmp.eq equ,cont=HH,r31 };;
1456{ .mii; (p0) cmp.ltu p8,p0=r35,D
1457 (p0) sub r34=r35,D
1458 (equ) cmp.leu break,cont=r35,H };;
1459{ .mib; (cont) cmp.leu cont,break=HH,r31
1460 (p8) add r31=-1,r31
1461(cont) br.wtop.spnt .L_divw_2nd_iter };;
1462///////////////////////////////////////////////////////////
1463{ .mii; sub H=H,r35
1464 or r8=r8,r33
1465 mov ar.pfs=r2 };;
1466{ .mii; shr.u r9=H,I // remainder if anybody wants it
1467 mov pr=r10,-1 }
1468{ .mfb; br.ret.sptk.many b0 };;
1469
1470// Unsigned 64 by 32 (well, by 64 for the moment) bit integer division
1471// procedure.
1472//
1473// inputs: f6 = (double)a, f7 = (double)b
1474// output: f8 = (int)(a/b)
1475// clobbered: f8,f9,f10,f11,pred
1476pred=p15
1477// This procedure is essentially Intel code and therefore is
1478// copyrighted to Intel Corporation (I suppose...). It's sligtly
1479// modified for specific needs.
1480.align 32
1481.skip 16
1482.L_udiv64_32_b6:
1483 frcpa.s1 f8,pred=f6,f7;; // [0] y0 = 1 / b
1484
1485(pred) fnma.s1 f9=f7,f8,f1 // [5] e0 = 1 - b * y0
1486(pred) fmpy.s1 f10=f6,f8;; // [5] q0 = a * y0
1487(pred) fmpy.s1 f11=f9,f9 // [10] e1 = e0 * e0
1488(pred) fma.s1 f10=f9,f10,f10;; // [10] q1 = q0 + e0 * q0
1489(pred) fma.s1 f8=f9,f8,f8 //;; // [15] y1 = y0 + e0 * y0
1490(pred) fma.s1 f9=f11,f10,f10;; // [15] q2 = q1 + e1 * q1
1491(pred) fma.s1 f8=f11,f8,f8 //;; // [20] y2 = y1 + e1 * y1
1492(pred) fnma.s1 f10=f7,f9,f6;; // [20] r2 = a - b * q2
1493(pred) fma.s1 f8=f10,f8,f9;; // [25] q3 = q2 + r2 * y2
1494
1495 fcvt.fxu.trunc.s1 f8=f8 // [30] q = trunc(q3)
1496 br.ret.sptk.many b6;;
1497.endp bn_div_words#
1498#endif
diff --git a/src/lib/libcrypto/bn/asm/vms.mar b/src/lib/libcrypto/bn/asm/vms.mar
index ac9d57d7b0..465f2774b6 100644
--- a/src/lib/libcrypto/bn/asm/vms.mar
+++ b/src/lib/libcrypto/bn/asm/vms.mar
@@ -162,442 +162,237 @@ n=12 ;(AP) n by value (input)
162 movl #1,r0 ; return SS$_NORMAL 162 movl #1,r0 ; return SS$_NORMAL
163 ret 163 ret
164 164
165 .title (generated) 165 .title vax_bn_div_words unsigned divide
166 166;
167 .psect code,nowrt 167; Richard Levitte 20-Nov-2000
168 168;
169.entry BN_DIV_WORDS,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10> 169; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
170 subl2 #4,sp 170; {
171 171; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
172 clrl r9 172; }
173 movl #2,r8 173;
174 174; Using EDIV would be very easy, if it didn't do signed calculations.
175 tstl 12(ap) 175; Therefore, som extra things have to happen around it. The way to
176 bneq noname.2 176; handle that is to shift all operands right one step (basically dividing
177 mnegl #1,r10 177; them by 2) and handle the different cases depending on what the lowest
178 brw noname.3 178; bit of each operand was.
179 tstl r0 179;
180 nop 180; To start with, let's define the following:
181noname.2: 181;
182 182; a' = l & 1
183 pushl 12(ap) 183; a2 = <h,l> >> 1 # UNSIGNED shift!
184 calls #1,BN_NUM_BITS_WORD 184; b' = d & 1
185 movl r0,r7 185; b2 = d >> 1 # UNSIGNED shift!
186 186;
187 cmpl r7,#32 187; Now, use EDIV to calculate a quotient and a remainder:
188 beql noname.4 188;
189 ashl r7,#1,r2 189; q'' = a2/b2
190 cmpl 4(ap),r2 190; r'' = a2 - q''*b2
191 blequ noname.4 191;
192 192; If b' is 0, the quotient is already correct, we just need to adjust the
193 pushl r7 193; remainder:
194 calls #1,BN_DIV_WORDS_ABORT 194;
195noname.4: 195; if (b' == 0)
196 196; {
197 subl3 r7,#32,r7 197; r = 2*r'' + a'
198 198; q = q''
199 movl 12(ap),r2 199; }
200 cmpl 4(ap),r2 200;
201 blssu noname.5 201; If b' is 1, we need to do other adjustements. The first thought is the
202 subl2 r2,4(ap) 202; following (note that r' will not always have the right value, but an
203noname.5: 203; adjustement follows further down):
204 204;
205 tstl r7 205; if (b' == 1)
206 beql noname.6 206; {
207 207; q' = q''
208 ashl r7,r2,12(ap) 208; r' = a - q'*b
209 209;
210 ashl r7,4(ap),r4 210; However, one can note the folowing relationship:
211 subl3 r7,#32,r3 211;
212 subl3 r3,#32,r2 212; r'' = a2 - q''*b2
213 extzv r3,r2,8(ap),r2 213; => 2*r'' = 2*a2 - 2*q''*b2
214 bisl3 r4,r2,4(ap) 214; = { a = 2*a2 + a', b = 2*b2 + b' = 2*b2 + 1,
215 215; q' = q'' }
216 ashl r7,8(ap),8(ap) 216; = a - a' - q'*(b - 1)
217noname.6: 217; = a - q'*b - a' + q'
218 218; = r' - a' + q'
219 bicl3 #65535,12(ap),r2 219; => r' = 2*r'' - q' + a'
220 extzv #16,#16,r2,r5 220;
221 221; This enables us to use r'' instead of discarding and calculating another
222 bicl3 #-65536,12(ap),r6 222; modulo:
223 223;
224noname.7: 224; if (b' == 1)
225 225; {
226 moval 4(ap),r2 226; q' = q''
227 movzwl 2(r2),r0 227; r' = (r'' << 1) - q' + a'
228 cmpl r0,r5 228;
229 bneq noname.8 229; Now, all we have to do is adjust r', because it might be < 0:
230 230;
231 movzwl #65535,r4 231; while (r' < 0)
232 brb noname.9 232; {
233noname.8: 233; r' = r' + b
234 234; q' = q' - 1
235 clrl r1 235; }
236 movl (r2),r0 236; }
237 movl r5,r2 237;
238 bgeq vcg.1 238; return q'
239 cmpl r2,r0
240 bgtru vcg.2
241 incl r1
242 brb vcg.2
243 nop
244vcg.1:
245 ediv r2,r0,r1,r0
246vcg.2:
247 movl r1,r4
248noname.9:
249
250noname.10:
251
252 mull3 r5,r4,r0
253 subl3 r0,4(ap),r3
254
255 bicl3 #65535,r3,r0
256 bneq noname.13
257 mull3 r6,r4,r2
258 ashl #16,r3,r1
259 bicl3 #65535,8(ap),r0
260 extzv #16,#16,r0,r0
261 addl2 r0,r1
262 cmpl r2,r1
263 bgtru noname.12
264noname.11:
265
266 brb noname.13
267 nop
268noname.12:
269
270 decl r4
271 brb noname.10
272noname.13:
273
274 mull3 r5,r4,r1
275
276 mull3 r6,r4,r0
277
278 extzv #16,#16,r0,r3
279
280 ashl #16,r0,r2
281 bicl3 #65535,r2,r0
282
283 addl2 r3,r1
284
285 moval 8(ap),r3
286 cmpl (r3),r0
287 bgequ noname.15
288 incl r1
289noname.15:
290
291 subl2 r0,(r3)
292
293 cmpl 4(ap),r1
294 bgequ noname.16
295
296 addl2 12(ap),4(ap)
297
298 decl r4
299noname.16:
300
301 subl2 r1,4(ap)
302
303 decl r8
304 beql noname.18
305noname.17:
306
307 ashl #16,r4,r9
308 239
309 ashl #16,4(ap),r2 240h=4 ;(AP) h by value (input)
310 movzwl 2(r3),r0 241l=8 ;(AP) l by value (input)
311 bisl2 r0,r2 242d=12 ;(AP) d by value (input)
312 bicl3 #0,r2,4(ap)
313 243
314 bicl3 #-65536,(r3),r0 244;aprim=r5
315 ashl #16,r0,(r3) 245;a2=r6
316 brw noname.7 246;a20=r6
317 nop 247;a21=r7
318noname.18: 248;bprim=r8
249;b2=r9
250;qprim=r10 ; initially used as q''
251;rprim=r11 ; initially used as r''
319 252
320 bisl2 r4,r9
321 253
322 movl r9,r10 254 .psect code,nowrt
323 255
324noname.3: 256.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
257 movl l(ap),r2
258 movl h(ap),r3
259 movl d(ap),r4
260
261 movl #0,r5
262 movl #0,r8
263 movl #0,r0
264; movl #0,r1
265
266 rotl #-1,r2,r6 ; a20 = l >> 1 (almost)
267 rotl #-1,r3,r7 ; a21 = h >> 1 (almost)
268 rotl #-1,r4,r9 ; b2 = d >> 1 (almost)
269
270 tstl r6
271 bgeq 1$
272 xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 0
273 incl r5 ; a' = 1
2741$:
275 tstl r7
276 bgeq 2$
277 xorl2 #^X80000000,r6 ; fixup a20 so highest bit is 1,
278 ; since that's what was lowest in a21
279 xorl2 #^X80000000,r7 ; fixup a21 so highest bit is 1
2802$:
281 tstl r9
282 beql 666$ ; Uh-oh, the divisor is 0...
283 bgtr 3$
284 xorl2 #^X80000000,r9 ; fixup b2 so highest bit is 0
285 incl r8 ; b' = 1
2863$:
287 tstl r9
288 bneq 4$ ; if b2 is 0, we know that b' is 1
289 tstl r3
290 bneq 666$ ; if higher half isn't 0, we overflow
291 movl r2,r10 ; otherwise, we have our result
292 brb 42$ ; This is a success, really.
2934$:
294 ediv r9,r6,r10,r11
295
296 tstl r8
297 bneq 5$ ; If b' != 0, go to the other part
298; addl3 r11,r11,r1
299; addl2 r5,r1
300 brb 42$
3015$:
302 ashl #1,r11,r11
303 subl2 r10,r11
304 addl2 r5,r11
305 bgeq 7$
3066$:
307 decl r10
308 addl2 r4,r11
309 blss 6$
3107$:
311; movl r11,r1
31242$:
325 movl r10,r0 313 movl r10,r0
326 ret 314666$:
327 tstl r0 315 ret
328
329 316
330 .psect code,nowrt 317 .title vax_bn_add_words unsigned add of two arrays
331 318;
332.entry BN_ADD_WORDS,^m<r2,r3,r4,r5,r6,r7> 319; Richard Levitte 20-Nov-2000
333 320;
334 tstl 16(ap) 321; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
335 bgtr noname.21 322; ULONG c = 0;
336 clrl r7 323; int i;
337 brw noname.22 324; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
338noname.21: 325; return(c);
339 326; }
340 clrl r4
341
342 tstl r0
343noname.23:
344
345 movl 8(ap),r6
346 addl3 r4,(r6),r2
347
348 bicl2 #0,r2
349
350 clrl r0
351 cmpl r2,r4
352 bgequ vcg.3
353 incl r0
354vcg.3:
355 movl r0,r4
356
357 movl 12(ap),r5
358 addl3 (r5),r2,r1
359 bicl2 #0,r1
360
361 clrl r0
362 cmpl r1,r2
363 bgequ vcg.4
364 incl r0
365vcg.4:
366 addl2 r0,r4
367
368 movl 4(ap),r3
369 movl r1,(r3)
370
371 decl 16(ap)
372 bgtr gen.1
373 brw noname.25
374gen.1:
375noname.24:
376
377 addl3 r4,4(r6),r2
378
379 bicl2 #0,r2
380
381 clrl r0
382 cmpl r2,r4
383 bgequ vcg.5
384 incl r0
385vcg.5:
386 movl r0,r4
387
388 addl3 4(r5),r2,r1
389 bicl2 #0,r1
390
391 clrl r0
392 cmpl r1,r2
393 bgequ vcg.6
394 incl r0
395vcg.6:
396 addl2 r0,r4
397
398 movl r1,4(r3)
399
400 decl 16(ap)
401 bleq noname.25
402noname.26:
403
404 addl3 r4,8(r6),r2
405
406 bicl2 #0,r2
407
408 clrl r0
409 cmpl r2,r4
410 bgequ vcg.7
411 incl r0
412vcg.7:
413 movl r0,r4
414
415 addl3 8(r5),r2,r1
416 bicl2 #0,r1
417
418 clrl r0
419 cmpl r1,r2
420 bgequ vcg.8
421 incl r0
422vcg.8:
423 addl2 r0,r4
424
425 movl r1,8(r3)
426
427 decl 16(ap)
428 bleq noname.25
429noname.27:
430
431 addl3 r4,12(r6),r2
432
433 bicl2 #0,r2
434
435 clrl r0
436 cmpl r2,r4
437 bgequ vcg.9
438 incl r0
439vcg.9:
440 movl r0,r4
441
442 addl3 12(r5),r2,r1
443 bicl2 #0,r1
444
445 clrl r0
446 cmpl r1,r2
447 bgequ vcg.10
448 incl r0
449vcg.10:
450 addl2 r0,r4
451 327
452 movl r1,12(r3) 328r=4 ;(AP) r by reference (output)
329a=8 ;(AP) a by reference (input)
330b=12 ;(AP) b by reference (input)
331n=16 ;(AP) n by value (input)
453 332
454 decl 16(ap)
455 bleq noname.25
456noname.28:
457 333
458 addl3 #16,r6,8(ap) 334 .psect code,nowrt
459 335
460 addl3 #16,r5,12(ap) 336.entry bn_add_words,^m<r2,r3,r4,r5,r6>
461 337
462 addl3 #16,r3,4(ap) 338 moval @r(ap),r2
463 brw noname.23 339 moval @a(ap),r3
464 tstl r0 340 moval @b(ap),r4
465noname.25: 341 movl n(ap),r5 ; assumed >0 by C code
342 clrl r0 ; c
466 343
467 movl r4,r7 344 tstl r5 ; carry = 0
345 bleq 666$
468 346
469noname.22: 3470$:
470 movl r7,r0 348 movl (r3)+,r6 ; carry untouched
471 ret 349 adwc (r4)+,r6 ; carry used and touched
472 nop 350 movl r6,(r2)+ ; carry untouched
351 sobgtr r5,0$ ; carry untouched
473 352
353 adwc #0,r0
354666$:
355 ret
474 356
357 .title vax_bn_sub_words unsigned add of two arrays
358;
359; Richard Levitte 20-Nov-2000
360;
361; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
362; ULONG c = 0;
363; int i;
364; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
365; return(c);
366; }
475 367
476;r=4 ;(AP) 368r=4 ;(AP) r by reference (output)
477;a=8 ;(AP) 369a=8 ;(AP) a by reference (input)
478;b=12 ;(AP) 370b=12 ;(AP) b by reference (input)
479;n=16 ;(AP) n by value (input) 371n=16 ;(AP) n by value (input)
480 372
481 .psect code,nowrt
482 373
483.entry BN_SUB_WORDS,^m<r2,r3,r4,r5,r6,r7> 374 .psect code,nowrt
484 375
485 clrl r6 376.entry bn_sub_words,^m<r2,r3,r4,r5,r6>
486 377
487 tstl 16(ap) 378 moval @r(ap),r2
488 bgtr noname.31 379 moval @a(ap),r3
489 clrl r7 380 moval @b(ap),r4
490 brw noname.32 381 movl n(ap),r5 ; assumed >0 by C code
491 tstl r0 382 clrl r0 ; c
492noname.31:
493 383
494noname.33: 384 tstl r5 ; carry = 0
385 bleq 666$
495 386
496 movl 8(ap),r5 3870$:
497 movl (r5),r1 388 movl (r3)+,r6 ; carry untouched
498 movl 12(ap),r4 389 sbwc (r4)+,r6 ; carry used and touched
499 movl (r4),r2 390 movl r6,(r2)+ ; carry untouched
500 391 sobgtr r5,0$ ; carry untouched
501 movl 4(ap),r3
502 subl3 r2,r1,r0
503 subl2 r6,r0
504 bicl3 #0,r0,(r3)
505
506 cmpl r1,r2
507 beql noname.34
508 clrl r0
509 cmpl r1,r2
510 bgequ vcg.11
511 incl r0
512vcg.11:
513 movl r0,r6
514noname.34:
515
516 decl 16(ap)
517 bgtr gen.2
518 brw noname.36
519gen.2:
520noname.35:
521
522 movl 4(r5),r2
523 movl 4(r4),r1
524
525 subl3 r1,r2,r0
526 subl2 r6,r0
527 bicl3 #0,r0,4(r3)
528
529 cmpl r2,r1
530 beql noname.37
531 clrl r0
532 cmpl r2,r1
533 bgequ vcg.12
534 incl r0
535vcg.12:
536 movl r0,r6
537noname.37:
538
539 decl 16(ap)
540 bleq noname.36
541noname.38:
542
543 movl 8(r5),r1
544 movl 8(r4),r2
545
546 subl3 r2,r1,r0
547 subl2 r6,r0
548 bicl3 #0,r0,8(r3)
549
550 cmpl r1,r2
551 beql noname.39
552 clrl r0
553 cmpl r1,r2
554 bgequ vcg.13
555 incl r0
556vcg.13:
557 movl r0,r6
558noname.39:
559
560 decl 16(ap)
561 bleq noname.36
562noname.40:
563
564 movl 12(r5),r1
565 movl 12(r4),r2
566
567 subl3 r2,r1,r0
568 subl2 r6,r0
569 bicl3 #0,r0,12(r3)
570
571 cmpl r1,r2
572 beql noname.41
573 clrl r0
574 cmpl r1,r2
575 bgequ vcg.14
576 incl r0
577vcg.14:
578 movl r0,r6
579noname.41:
580
581 decl 16(ap)
582 bleq noname.36
583noname.42:
584
585 addl3 #16,r5,8(ap)
586
587 addl3 #16,r4,12(ap)
588
589 addl3 #16,r3,4(ap)
590 brw noname.33
591 tstl r0
592noname.36:
593
594 movl r6,r7
595
596noname.32:
597 movl r7,r0
598 ret
599 nop
600 392
393 adwc #0,r0
394666$:
395 ret
601 396
602 397
603;r=4 ;(AP) 398;r=4 ;(AP)
@@ -6615,81 +6410,3 @@ noname.610:
6615 6410
6616; For now, the code below doesn't work, so I end this prematurely. 6411; For now, the code below doesn't work, so I end this prematurely.
6617.end 6412.end
6618
6619 .title vax_bn_div64 division 64/32=>32
6620;
6621; r.l. 16-jan-1998
6622;
6623; unsigned int bn_div64(unsigned long h, unsigned long l, unsigned long d)
6624; return <h,l>/d;
6625;
6626
6627 .psect code,nowrt
6628
6629h=4 ;(AP) by value (input)
6630l=8 ;(AP) by value (input)
6631d=12 ;(AP) by value (input)
6632
6633.entry bn_div64,^m<r2,r3,r4,r5,r6,r7,r8,r9>
6634
6635 movl l(ap),r2 ; l
6636 movl h(ap),r3 ; h
6637 movl d(ap),r4 ; d
6638 clrl r5 ; q
6639 clrl r6 ; r
6640
6641 ; Treat "negative" specially
6642 tstl r3
6643 blss 30$
6644
6645 tstl r4
6646 beql 90$
6647
6648 ediv r4,r2,r5,r6
6649 bvs 666$
6650
6651 movl r5,r0
6652 ret
6653
665430$:
6655 ; The theory here is to do some harmless shifting and a little
6656 ; bit of rounding (brackets are to designate when decimals are
6657 ; cut off):
6658 ;
6659 ; result = 2 * [ ([<h,0>/2] + [d/2]) / d ] + [ l / d ]
6660
6661 movl #0,r7
6662 movl r3,r8 ; copy h
6663 ashq #-1,r7,r7 ; [<h,0>/2] => <r8,r7>
6664 bicl2 #^X80000000,r8 ; Remove "sign"
6665
6666 movl r4,r9 ; copy d
6667 ashl #-1,r9,r9 ; [d/2] => r9
6668 bicl2 #^X80000000,r9 ; Remove "sign"
6669
6670 addl2 r9,r7
6671 adwc #0,r8 ; [<h,0>/2] + [d/2] => <r8,r7>
6672
6673 ediv r4,r7,r5,r6 ; [ ([<h,0>/2] + [d/2]) / d ] => <r5,r6>
6674 bvs 666$
6675
6676 movl #0,r6
6677 ashq #1,r5,r5 ; 2 * [ ([<h,0>/2] + [d/2]) / d ] => r5
6678
6679 movl #0,r3
6680 ediv r4,r2,r8,r9 ; [ l / d ] => <r8,r9>
6681
6682 addl2 r8,r5 ;
6683 bcs 666$
6684
6685 movl r5,r0
6686 ret
6687
668890$:
6689 movl #-1,r0
6690 ret
6691
6692666$:
6693
6694
6695.end
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
index b232c2ceae..d25b49c9d8 100644
--- a/src/lib/libcrypto/bn/bn.h
+++ b/src/lib/libcrypto/bn/bn.h
@@ -59,24 +59,22 @@
59#ifndef HEADER_BN_H 59#ifndef HEADER_BN_H
60#define HEADER_BN_H 60#define HEADER_BN_H
61 61
62#ifndef NO_FP_API 62#include <openssl/e_os2.h>
63#ifndef OPENSSL_NO_FP_API
63#include <stdio.h> /* FILE */ 64#include <stdio.h> /* FILE */
64#endif 65#endif
65#include <openssl/opensslconf.h>
66 66
67#ifdef __cplusplus 67#ifdef __cplusplus
68extern "C" { 68extern "C" {
69#endif 69#endif
70 70
71#ifdef VMS 71#ifdef OPENSSL_SYS_VMS
72#undef BN_LLONG /* experimental, so far... */ 72#undef BN_LLONG /* experimental, so far... */
73#endif 73#endif
74 74
75#define BN_MUL_COMBA 75#define BN_MUL_COMBA
76#define BN_SQR_COMBA 76#define BN_SQR_COMBA
77#define BN_RECURSION 77#define BN_RECURSION
78#define RECP_MUL_MOD
79#define MONT_MUL_MOD
80 78
81/* This next option uses the C libraries (2 word)/(1 word) function. 79/* This next option uses the C libraries (2 word)/(1 word) function.
82 * If it is not defined, I use my C version (which is slower). 80 * If it is not defined, I use my C version (which is slower).
@@ -89,8 +87,11 @@ extern "C" {
89 * For machines with only one compiler (or shared libraries), this should 87 * For machines with only one compiler (or shared libraries), this should
90 * be on. Again this in only really a problem on machines 88 * be on. Again this in only really a problem on machines
91 * using "long long's", are 32bit, and are not using my assembler code. */ 89 * using "long long's", are 32bit, and are not using my assembler code. */
92#if defined(MSDOS) || defined(WINDOWS) || defined(WIN32) || defined(linux) 90#if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
93#define BN_DIV2W 91 defined(OPENSSL_SYS_WIN32) || defined(linux)
92# ifndef BN_DIV2W
93# define BN_DIV2W
94# endif
94#endif 95#endif
95 96
96/* assuming long is 64bit - this is the DEC Alpha 97/* assuming long is 64bit - this is the DEC Alpha
@@ -142,7 +143,7 @@ extern "C" {
142#endif 143#endif
143 144
144#ifdef THIRTY_TWO_BIT 145#ifdef THIRTY_TWO_BIT
145#if defined(WIN32) && !defined(__GNUC__) 146#if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__)
146#define BN_ULLONG unsigned _int64 147#define BN_ULLONG unsigned _int64
147#else 148#else
148#define BN_ULLONG unsigned long long 149#define BN_ULLONG unsigned long long
@@ -153,7 +154,7 @@ extern "C" {
153#define BN_BYTES 4 154#define BN_BYTES 4
154#define BN_BITS2 32 155#define BN_BITS2 32
155#define BN_BITS4 16 156#define BN_BITS4 16
156#ifdef WIN32 157#ifdef OPENSSL_SYS_WIN32
157/* VC++ doesn't like the LL suffix */ 158/* VC++ doesn't like the LL suffix */
158#define BN_MASK (0xffffffffffffffffL) 159#define BN_MASK (0xffffffffffffffffL)
159#else 160#else
@@ -238,18 +239,8 @@ typedef struct bignum_st
238 int flags; 239 int flags;
239 } BIGNUM; 240 } BIGNUM;
240 241
241/* Used for temp variables */ 242/* Used for temp variables (declaration hidden in bn_lcl.h) */
242#define BN_CTX_NUM 16 243typedef struct bignum_ctx BN_CTX;
243#define BN_CTX_NUM_POS 12
244typedef struct bignum_ctx
245 {
246 int tos;
247 BIGNUM bn[BN_CTX_NUM];
248 int flags;
249 int depth;
250 int pos[BN_CTX_NUM_POS];
251 int too_many;
252 } BN_CTX;
253 244
254typedef struct bn_blinding_st 245typedef struct bn_blinding_st
255 { 246 {
@@ -283,9 +274,6 @@ typedef struct bn_recp_ctx_st
283 int flags; 274 int flags;
284 } BN_RECP_CTX; 275 } BN_RECP_CTX;
285 276
286#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
287 r,a,&((mont)->RR),(mont),ctx)
288
289#define BN_prime_checks 0 /* default: select number of iterations 277#define BN_prime_checks 0 /* default: select number of iterations
290 based on the size of the number */ 278 based on the size of the number */
291 279
@@ -308,17 +296,22 @@ typedef struct bn_recp_ctx_st
308 /* b >= 100 */ 27) 296 /* b >= 100 */ 27)
309 297
310#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8) 298#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
311#define BN_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) 299
312#define BN_is_zero(a) (((a)->top == 0) || BN_is_word(a,0)) 300/* Note that BN_abs_is_word does not work reliably for w == 0 */
313#define BN_is_one(a) (BN_is_word((a),1)) 301#define BN_abs_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w)))
314#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1)) 302#define BN_is_zero(a) (((a)->top == 0) || BN_abs_is_word(a,0))
303#define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg)
304#define BN_is_word(a,w) ((w) ? BN_abs_is_word((a),(w)) && !(a)->neg : \
305 BN_is_zero((a)))
306#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
307
315#define BN_one(a) (BN_set_word((a),1)) 308#define BN_one(a) (BN_set_word((a),1))
316#define BN_zero(a) (BN_set_word((a),0)) 309#define BN_zero(a) (BN_set_word((a),0))
317 310
318/*#define BN_ascii2bn(a) BN_hex2bn(a) */ 311/*#define BN_ascii2bn(a) BN_hex2bn(a) */
319/*#define BN_bn2ascii(a) BN_bn2hex(a) */ 312/*#define BN_bn2ascii(a) BN_bn2hex(a) */
320 313
321BIGNUM *BN_value_one(void); 314const BIGNUM *BN_value_one(void);
322char * BN_options(void); 315char * BN_options(void);
323BN_CTX *BN_CTX_new(void); 316BN_CTX *BN_CTX_new(void);
324void BN_CTX_init(BN_CTX *c); 317void BN_CTX_init(BN_CTX *c);
@@ -329,51 +322,70 @@ void BN_CTX_end(BN_CTX *ctx);
329int BN_rand(BIGNUM *rnd, int bits, int top,int bottom); 322int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
330int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom); 323int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom);
331int BN_rand_range(BIGNUM *rnd, BIGNUM *range); 324int BN_rand_range(BIGNUM *rnd, BIGNUM *range);
325int BN_pseudo_rand_range(BIGNUM *rnd, BIGNUM *range);
332int BN_num_bits(const BIGNUM *a); 326int BN_num_bits(const BIGNUM *a);
333int BN_num_bits_word(BN_ULONG); 327int BN_num_bits_word(BN_ULONG);
334BIGNUM *BN_new(void); 328BIGNUM *BN_new(void);
335void BN_init(BIGNUM *); 329void BN_init(BIGNUM *);
336void BN_clear_free(BIGNUM *a); 330void BN_clear_free(BIGNUM *a);
337BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b); 331BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
332void BN_swap(BIGNUM *a, BIGNUM *b);
338BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret); 333BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret);
339int BN_bn2bin(const BIGNUM *a, unsigned char *to); 334int BN_bn2bin(const BIGNUM *a, unsigned char *to);
340BIGNUM *BN_mpi2bn(unsigned char *s,int len,BIGNUM *ret); 335BIGNUM *BN_mpi2bn(const unsigned char *s,int len,BIGNUM *ret);
341int BN_bn2mpi(const BIGNUM *a, unsigned char *to); 336int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
342int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); 337int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
343int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); 338int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
344int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); 339int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
345int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); 340int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
346int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx); 341int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
342int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx);
343
347int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, 344int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
348 BN_CTX *ctx); 345 BN_CTX *ctx);
349int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); 346#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
350int BN_sqr(BIGNUM *r, BIGNUM *a,BN_CTX *ctx); 347int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
348int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
349int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
350int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
351int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
352int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
353 const BIGNUM *m, BN_CTX *ctx);
354int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
355int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
356int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m);
357int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx);
358int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m);
359
351BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w); 360BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w);
352BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w); 361BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
353int BN_mul_word(BIGNUM *a, BN_ULONG w); 362int BN_mul_word(BIGNUM *a, BN_ULONG w);
354int BN_add_word(BIGNUM *a, BN_ULONG w); 363int BN_add_word(BIGNUM *a, BN_ULONG w);
355int BN_sub_word(BIGNUM *a, BN_ULONG w); 364int BN_sub_word(BIGNUM *a, BN_ULONG w);
356int BN_set_word(BIGNUM *a, BN_ULONG w); 365int BN_set_word(BIGNUM *a, BN_ULONG w);
357BN_ULONG BN_get_word(BIGNUM *a); 366BN_ULONG BN_get_word(const BIGNUM *a);
367
358int BN_cmp(const BIGNUM *a, const BIGNUM *b); 368int BN_cmp(const BIGNUM *a, const BIGNUM *b);
359void BN_free(BIGNUM *a); 369void BN_free(BIGNUM *a);
360int BN_is_bit_set(const BIGNUM *a, int n); 370int BN_is_bit_set(const BIGNUM *a, int n);
361int BN_lshift(BIGNUM *r, const BIGNUM *a, int n); 371int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
362int BN_lshift1(BIGNUM *r, BIGNUM *a); 372int BN_lshift1(BIGNUM *r, const BIGNUM *a);
363int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p,BN_CTX *ctx); 373int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,BN_CTX *ctx);
364int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, 374
365 const BIGNUM *m,BN_CTX *ctx); 375int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
366int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p, 376 const BIGNUM *m,BN_CTX *ctx);
367 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); 377int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
378 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
368int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p, 379int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
369 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); 380 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
370int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2, 381int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1,
371 BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx); 382 const BIGNUM *a2, const BIGNUM *p2,const BIGNUM *m,
372int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, 383 BN_CTX *ctx,BN_MONT_CTX *m_ctx);
373 BIGNUM *m,BN_CTX *ctx); 384int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
385 const BIGNUM *m,BN_CTX *ctx);
386
374int BN_mask_bits(BIGNUM *a,int n); 387int BN_mask_bits(BIGNUM *a,int n);
375int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx); 388#ifndef OPENSSL_NO_FP_API
376#ifndef NO_FP_API
377int BN_print_fp(FILE *fp, const BIGNUM *a); 389int BN_print_fp(FILE *fp, const BIGNUM *a);
378#endif 390#endif
379#ifdef HEADER_BIO_H 391#ifdef HEADER_BIO_H
@@ -381,9 +393,9 @@ int BN_print(BIO *fp, const BIGNUM *a);
381#else 393#else
382int BN_print(void *fp, const BIGNUM *a); 394int BN_print(void *fp, const BIGNUM *a);
383#endif 395#endif
384int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx); 396int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx);
385int BN_rshift(BIGNUM *r, BIGNUM *a, int n); 397int BN_rshift(BIGNUM *r, const BIGNUM *a, int n);
386int BN_rshift1(BIGNUM *r, BIGNUM *a); 398int BN_rshift1(BIGNUM *r, const BIGNUM *a);
387void BN_clear(BIGNUM *a); 399void BN_clear(BIGNUM *a);
388BIGNUM *BN_dup(const BIGNUM *a); 400BIGNUM *BN_dup(const BIGNUM *a);
389int BN_ucmp(const BIGNUM *a, const BIGNUM *b); 401int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
@@ -393,23 +405,30 @@ char * BN_bn2hex(const BIGNUM *a);
393char * BN_bn2dec(const BIGNUM *a); 405char * BN_bn2dec(const BIGNUM *a);
394int BN_hex2bn(BIGNUM **a, const char *str); 406int BN_hex2bn(BIGNUM **a, const char *str);
395int BN_dec2bn(BIGNUM **a, const char *str); 407int BN_dec2bn(BIGNUM **a, const char *str);
396int BN_gcd(BIGNUM *r,BIGNUM *in_a,BIGNUM *in_b,BN_CTX *ctx); 408int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
397BIGNUM *BN_mod_inverse(BIGNUM *ret,BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); 409int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
398BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,BIGNUM *add, 410BIGNUM *BN_mod_inverse(BIGNUM *ret,
399 BIGNUM *rem,void (*callback)(int,int,void *),void *cb_arg); 411 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
412BIGNUM *BN_mod_sqrt(BIGNUM *ret,
413 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
414BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
415 const BIGNUM *add, const BIGNUM *rem,
416 void (*callback)(int,int,void *),void *cb_arg);
400int BN_is_prime(const BIGNUM *p,int nchecks, 417int BN_is_prime(const BIGNUM *p,int nchecks,
401 void (*callback)(int,int,void *), 418 void (*callback)(int,int,void *),
402 BN_CTX *ctx,void *cb_arg); 419 BN_CTX *ctx,void *cb_arg);
403int BN_is_prime_fasttest(const BIGNUM *p,int nchecks, 420int BN_is_prime_fasttest(const BIGNUM *p,int nchecks,
404 void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg, 421 void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg,
405 int do_trial_division); 422 int do_trial_division);
406void ERR_load_BN_strings(void );
407 423
408BN_MONT_CTX *BN_MONT_CTX_new(void ); 424BN_MONT_CTX *BN_MONT_CTX_new(void );
409void BN_MONT_CTX_init(BN_MONT_CTX *ctx); 425void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
410int BN_mod_mul_montgomery(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_MONT_CTX *mont, 426int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
411 BN_CTX *ctx); 427 BN_MONT_CTX *mont, BN_CTX *ctx);
412int BN_from_montgomery(BIGNUM *r,BIGNUM *a,BN_MONT_CTX *mont,BN_CTX *ctx); 428#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
429 (r),(a),&((mont)->RR),(mont),(ctx))
430int BN_from_montgomery(BIGNUM *r,const BIGNUM *a,
431 BN_MONT_CTX *mont, BN_CTX *ctx);
413void BN_MONT_CTX_free(BN_MONT_CTX *mont); 432void BN_MONT_CTX_free(BN_MONT_CTX *mont);
414int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *modulus,BN_CTX *ctx); 433int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *modulus,BN_CTX *ctx);
415BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from); 434BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
@@ -427,12 +446,12 @@ void BN_RECP_CTX_init(BN_RECP_CTX *recp);
427BN_RECP_CTX *BN_RECP_CTX_new(void); 446BN_RECP_CTX *BN_RECP_CTX_new(void);
428void BN_RECP_CTX_free(BN_RECP_CTX *recp); 447void BN_RECP_CTX_free(BN_RECP_CTX *recp);
429int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx); 448int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx);
430int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, 449int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
431 BN_RECP_CTX *recp,BN_CTX *ctx); 450 BN_RECP_CTX *recp,BN_CTX *ctx);
432int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 451int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
433 const BIGNUM *m, BN_CTX *ctx); 452 const BIGNUM *m, BN_CTX *ctx);
434int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, 453int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
435 BN_RECP_CTX *recp, BN_CTX *ctx); 454 BN_RECP_CTX *recp, BN_CTX *ctx);
436 455
437/* library internal functions */ 456/* library internal functions */
438 457
@@ -440,6 +459,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m,
440 (a):bn_expand2((a),(bits)/BN_BITS2+1)) 459 (a):bn_expand2((a),(bits)/BN_BITS2+1))
441#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) 460#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
442BIGNUM *bn_expand2(BIGNUM *a, int words); 461BIGNUM *bn_expand2(BIGNUM *a, int words);
462BIGNUM *bn_dup_expand(const BIGNUM *a, int words);
443 463
444#define bn_fix_top(a) \ 464#define bn_fix_top(a) \
445 { \ 465 { \
@@ -451,15 +471,15 @@ BIGNUM *bn_expand2(BIGNUM *a, int words);
451 } \ 471 } \
452 } 472 }
453 473
454BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); 474BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
455BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); 475BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
456void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num); 476void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
457BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); 477BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
458BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); 478BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
459BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); 479BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
460 480
461#ifdef BN_DEBUG 481#ifdef BN_DEBUG
462 void bn_dump1(FILE *o, const char *a, BN_ULONG *b,int n); 482void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n);
463# define bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \ 483# define bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \
464 fprintf(stderr,"\n");} 484 fprintf(stderr,"\n");}
465# define bn_dump(a,n) bn_dump1(stderr,#a,a,n); 485# define bn_dump(a,n) bn_dump1(stderr,#a,a,n);
@@ -474,6 +494,7 @@ int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
474/* The following lines are auto generated by the script mkerr.pl. Any changes 494/* The following lines are auto generated by the script mkerr.pl. Any changes
475 * made after this point may be overwritten when the script is next run. 495 * made after this point may be overwritten when the script is next run.
476 */ 496 */
497void ERR_load_BN_strings(void);
477 498
478/* Error codes for the BN functions. */ 499/* Error codes for the BN functions. */
479 500
@@ -488,11 +509,14 @@ int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
488#define BN_F_BN_CTX_NEW 106 509#define BN_F_BN_CTX_NEW 106
489#define BN_F_BN_DIV 107 510#define BN_F_BN_DIV 107
490#define BN_F_BN_EXPAND2 108 511#define BN_F_BN_EXPAND2 108
512#define BN_F_BN_EXPAND_INTERNAL 120
491#define BN_F_BN_MOD_EXP2_MONT 118 513#define BN_F_BN_MOD_EXP2_MONT 118
492#define BN_F_BN_MOD_EXP_MONT 109 514#define BN_F_BN_MOD_EXP_MONT 109
493#define BN_F_BN_MOD_EXP_MONT_WORD 117 515#define BN_F_BN_MOD_EXP_MONT_WORD 117
494#define BN_F_BN_MOD_INVERSE 110 516#define BN_F_BN_MOD_INVERSE 110
517#define BN_F_BN_MOD_LSHIFT_QUICK 119
495#define BN_F_BN_MOD_MUL_RECIPROCAL 111 518#define BN_F_BN_MOD_MUL_RECIPROCAL 111
519#define BN_F_BN_MOD_SQRT 121
496#define BN_F_BN_MPI2BN 112 520#define BN_F_BN_MPI2BN 112
497#define BN_F_BN_NEW 113 521#define BN_F_BN_NEW 113
498#define BN_F_BN_RAND 114 522#define BN_F_BN_RAND 114
@@ -507,14 +531,17 @@ int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
507#define BN_R_DIV_BY_ZERO 103 531#define BN_R_DIV_BY_ZERO 103
508#define BN_R_ENCODING_ERROR 104 532#define BN_R_ENCODING_ERROR 104
509#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105 533#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
534#define BN_R_INPUT_NOT_REDUCED 110
510#define BN_R_INVALID_LENGTH 106 535#define BN_R_INVALID_LENGTH 106
511#define BN_R_INVALID_RANGE 115 536#define BN_R_INVALID_RANGE 115
537#define BN_R_NOT_A_SQUARE 111
512#define BN_R_NOT_INITIALIZED 107 538#define BN_R_NOT_INITIALIZED 107
513#define BN_R_NO_INVERSE 108 539#define BN_R_NO_INVERSE 108
540#define BN_R_P_IS_NOT_PRIME 112
541#define BN_R_TOO_MANY_ITERATIONS 113
514#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109 542#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
515 543
516#ifdef __cplusplus 544#ifdef __cplusplus
517} 545}
518#endif 546#endif
519#endif 547#endif
520
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
index 5d24691233..6cba07e9f6 100644
--- a/src/lib/libcrypto/bn/bn_add.c
+++ b/src/lib/libcrypto/bn/bn_add.c
@@ -64,6 +64,7 @@
64int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 64int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
65 { 65 {
66 const BIGNUM *tmp; 66 const BIGNUM *tmp;
67 int a_neg = a->neg;
67 68
68 bn_check_top(a); 69 bn_check_top(a);
69 bn_check_top(b); 70 bn_check_top(b);
@@ -73,10 +74,10 @@ int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
73 * -a + b b-a 74 * -a + b b-a
74 * -a + -b -(a+b) 75 * -a + -b -(a+b)
75 */ 76 */
76 if (a->neg ^ b->neg) 77 if (a_neg ^ b->neg)
77 { 78 {
78 /* only one is negative */ 79 /* only one is negative */
79 if (a->neg) 80 if (a_neg)
80 { tmp=a; a=b; b=tmp; } 81 { tmp=a; a=b; b=tmp; }
81 82
82 /* we are now a - b */ 83 /* we are now a - b */
@@ -94,12 +95,11 @@ int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
94 return(1); 95 return(1);
95 } 96 }
96 97
97 if (a->neg) /* both are neg */ 98 if (!BN_uadd(r,a,b)) return(0);
99 if (a_neg) /* both are neg */
98 r->neg=1; 100 r->neg=1;
99 else 101 else
100 r->neg=0; 102 r->neg=0;
101
102 if (!BN_uadd(r,a,b)) return(0);
103 return(1); 103 return(1);
104 } 104 }
105 105
@@ -160,6 +160,7 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
160 *(rp++)= *(ap++); 160 *(rp++)= *(ap++);
161 } 161 }
162 /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/ 162 /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/
163 r->neg = 0;
163 return(1); 164 return(1);
164 } 165 }
165 166
@@ -251,6 +252,7 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
251#endif 252#endif
252 253
253 r->top=max; 254 r->top=max;
255 r->neg=0;
254 bn_fix_top(r); 256 bn_fix_top(r);
255 return(1); 257 return(1);
256 } 258 }
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index 44e52a40db..be8aa3ffc5 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -68,7 +68,7 @@
68 68
69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) 69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70 70
71BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 71BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
72 { 72 {
73 BN_ULONG c1=0; 73 BN_ULONG c1=0;
74 74
@@ -93,7 +93,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
93 return(c1); 93 return(c1);
94 } 94 }
95 95
96BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 96BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
97 { 97 {
98 BN_ULONG c1=0; 98 BN_ULONG c1=0;
99 99
@@ -117,7 +117,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
117 return(c1); 117 return(c1);
118 } 118 }
119 119
120void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 120void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
121 { 121 {
122 assert(n >= 0); 122 assert(n >= 0);
123 if (n <= 0) return; 123 if (n <= 0) return;
@@ -139,7 +139,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
139 139
140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
141 141
142BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 142BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
143 { 143 {
144 BN_ULONG c=0; 144 BN_ULONG c=0;
145 BN_ULONG bl,bh; 145 BN_ULONG bl,bh;
@@ -166,7 +166,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
166 return(c); 166 return(c);
167 } 167 }
168 168
169BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 169BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
170 { 170 {
171 BN_ULONG carry=0; 171 BN_ULONG carry=0;
172 BN_ULONG bl,bh; 172 BN_ULONG bl,bh;
@@ -193,7 +193,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
193 return(carry); 193 return(carry);
194 } 194 }
195 195
196void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 196void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
197 { 197 {
198 assert(n >= 0); 198 assert(n >= 0);
199 if (n <= 0) return; 199 if (n <= 0) return;
@@ -296,7 +296,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
296#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ 296#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
297 297
298#ifdef BN_LLONG 298#ifdef BN_LLONG
299BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 299BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
300 { 300 {
301 BN_ULLONG ll=0; 301 BN_ULLONG ll=0;
302 302
@@ -332,7 +332,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
332 return((BN_ULONG)ll); 332 return((BN_ULONG)ll);
333 } 333 }
334#else /* !BN_LLONG */ 334#else /* !BN_LLONG */
335BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 335BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
336 { 336 {
337 BN_ULONG c,l,t; 337 BN_ULONG c,l,t;
338 338
@@ -382,7 +382,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
382 } 382 }
383#endif /* !BN_LLONG */ 383#endif /* !BN_LLONG */
384 384
385BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 385BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
386 { 386 {
387 BN_ULONG t1,t2; 387 BN_ULONG t1,t2;
388 int c=0; 388 int c=0;
@@ -673,7 +673,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
673 r[7]=c2; 673 r[7]=c2;
674 } 674 }
675 675
676void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) 676void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
677 { 677 {
678#ifdef BN_LLONG 678#ifdef BN_LLONG
679 BN_ULLONG t,tt; 679 BN_ULLONG t,tt;
@@ -754,7 +754,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
754 r[15]=c1; 754 r[15]=c1;
755 } 755 }
756 756
757void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) 757void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
758 { 758 {
759#ifdef BN_LLONG 759#ifdef BN_LLONG
760 BN_ULLONG t,tt; 760 BN_ULLONG t,tt;
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
index b1a8d7571e..7daf19eb84 100644
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ b/src/lib/libcrypto/bn/bn_ctx.c
@@ -61,8 +61,9 @@
61 61
62#include <stdio.h> 62#include <stdio.h>
63#include <assert.h> 63#include <assert.h>
64
64#include "cryptlib.h" 65#include "cryptlib.h"
65#include <openssl/bn.h> 66#include "bn_lcl.h"
66 67
67 68
68BN_CTX *BN_CTX_new(void) 69BN_CTX *BN_CTX_new(void)
@@ -83,6 +84,7 @@ BN_CTX *BN_CTX_new(void)
83 84
84void BN_CTX_init(BN_CTX *ctx) 85void BN_CTX_init(BN_CTX *ctx)
85 { 86 {
87#if 0 /* explicit version */
86 int i; 88 int i;
87 ctx->tos = 0; 89 ctx->tos = 0;
88 ctx->flags = 0; 90 ctx->flags = 0;
@@ -90,6 +92,9 @@ void BN_CTX_init(BN_CTX *ctx)
90 ctx->too_many = 0; 92 ctx->too_many = 0;
91 for (i = 0; i < BN_CTX_NUM; i++) 93 for (i = 0; i < BN_CTX_NUM; i++)
92 BN_init(&(ctx->bn[i])); 94 BN_init(&(ctx->bn[i]));
95#else
96 memset(ctx, 0, sizeof *ctx);
97#endif
93 } 98 }
94 99
95void BN_CTX_free(BN_CTX *ctx) 100void BN_CTX_free(BN_CTX *ctx)
@@ -112,8 +117,14 @@ void BN_CTX_start(BN_CTX *ctx)
112 ctx->depth++; 117 ctx->depth++;
113 } 118 }
114 119
120
115BIGNUM *BN_CTX_get(BN_CTX *ctx) 121BIGNUM *BN_CTX_get(BN_CTX *ctx)
116 { 122 {
123 /* Note: If BN_CTX_get is ever changed to allocate BIGNUMs dynamically,
124 * make sure that if BN_CTX_get fails once it will return NULL again
125 * until BN_CTX_end is called. (This is so that callers have to check
126 * only the last return value.)
127 */
117 if (ctx->depth > BN_CTX_NUM_POS || ctx->tos >= BN_CTX_NUM) 128 if (ctx->depth > BN_CTX_NUM_POS || ctx->tos >= BN_CTX_NUM)
118 { 129 {
119 if (!ctx->too_many) 130 if (!ctx->too_many)
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
index c328b5b411..ac1a09615a 100644
--- a/src/lib/libcrypto/bn/bn_div.c
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -61,6 +61,7 @@
61#include "cryptlib.h" 61#include "cryptlib.h"
62#include "bn_lcl.h" 62#include "bn_lcl.h"
63 63
64
64/* The old slow way */ 65/* The old slow way */
65#if 0 66#if 0
66int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, 67int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
@@ -126,9 +127,10 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
126 127
127#else 128#else
128 129
129#if !defined(NO_ASM) && !defined(NO_INLINE_ASM) && !defined(PEDANTIC) && !defined(BN_DIV3W) 130#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
131 && !defined(PEDANTIC) && !defined(BN_DIV3W)
130# if defined(__GNUC__) && __GNUC__>=2 132# if defined(__GNUC__) && __GNUC__>=2
131# if defined(__i386) 133# if defined(__i386) || defined (__i386__)
132 /* 134 /*
133 * There were two reasons for implementing this template: 135 * There were two reasons for implementing this template:
134 * - GNU C generates a call to a function (__udivdi3 to be exact) 136 * - GNU C generates a call to a function (__udivdi3 to be exact)
@@ -150,8 +152,16 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
150# define REMAINDER_IS_ALREADY_CALCULATED 152# define REMAINDER_IS_ALREADY_CALCULATED
151# endif /* __<cpu> */ 153# endif /* __<cpu> */
152# endif /* __GNUC__ */ 154# endif /* __GNUC__ */
153#endif /* NO_ASM */ 155#endif /* OPENSSL_NO_ASM */
156
154 157
158/* BN_div computes dv := num / divisor, rounding towards zero, and sets up
159 * rm such that dv*divisor + rm = num holds.
160 * Thus:
161 * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
162 * rm->neg == num->neg (unless the remainder is zero)
163 * If 'dv' or 'rm' is NULL, the respective value is not returned.
164 */
155int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, 165int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
156 BN_CTX *ctx) 166 BN_CTX *ctx)
157 { 167 {
@@ -185,7 +195,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
185 if (dv == NULL) 195 if (dv == NULL)
186 res=BN_CTX_get(ctx); 196 res=BN_CTX_get(ctx);
187 else res=dv; 197 else res=dv;
188 if (sdiv==NULL || res == NULL) goto err; 198 if (sdiv == NULL || res == NULL) goto err;
189 tmp->neg=0; 199 tmp->neg=0;
190 200
191 /* First we normalise the numbers */ 201 /* First we normalise the numbers */
@@ -232,12 +242,14 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
232 } 242 }
233 else 243 else
234 res->top--; 244 res->top--;
245 if (res->top == 0)
246 res->neg = 0;
235 resp--; 247 resp--;
236 248
237 for (i=0; i<loop-1; i++) 249 for (i=0; i<loop-1; i++)
238 { 250 {
239 BN_ULONG q,l0; 251 BN_ULONG q,l0;
240#if defined(BN_DIV3W) && !defined(NO_ASM) 252#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
241 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG); 253 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
242 q=bn_div_3_words(wnump,d1,d0); 254 q=bn_div_3_words(wnump,d1,d0);
243#else 255#else
@@ -331,8 +343,13 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
331 } 343 }
332 if (rm != NULL) 344 if (rm != NULL)
333 { 345 {
346 /* Keep a copy of the neg flag in num because if rm==num
347 * BN_rshift() will overwrite it.
348 */
349 int neg = num->neg;
334 BN_rshift(rm,snum,norm_shift); 350 BN_rshift(rm,snum,norm_shift);
335 rm->neg=num->neg; 351 if (!BN_is_zero(rm))
352 rm->neg = neg;
336 } 353 }
337 BN_CTX_end(ctx); 354 BN_CTX_end(ctx);
338 return(1); 355 return(1);
@@ -342,40 +359,3 @@ err:
342 } 359 }
343 360
344#endif 361#endif
345
346/* rem != m */
347int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
348 {
349#if 0 /* The old slow way */
350 int i,nm,nd;
351 BIGNUM *dv;
352
353 if (BN_ucmp(m,d) < 0)
354 return((BN_copy(rem,m) == NULL)?0:1);
355
356 BN_CTX_start(ctx);
357 dv=BN_CTX_get(ctx);
358
359 if (!BN_copy(rem,m)) goto err;
360
361 nm=BN_num_bits(rem);
362 nd=BN_num_bits(d);
363 if (!BN_lshift(dv,d,nm-nd)) goto err;
364 for (i=nm-nd; i>=0; i--)
365 {
366 if (BN_cmp(rem,dv) >= 0)
367 {
368 if (!BN_sub(rem,rem,dv)) goto err;
369 }
370 if (!BN_rshift1(dv,dv)) goto err;
371 }
372 BN_CTX_end(ctx);
373 return(1);
374 err:
375 BN_CTX_end(ctx);
376 return(0);
377#else
378 return(BN_div(NULL,rem,m,d,ctx));
379#endif
380 }
381
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
index adc6a214fc..fb84ee96d8 100644
--- a/src/lib/libcrypto/bn/bn_err.c
+++ b/src/lib/libcrypto/bn/bn_err.c
@@ -63,7 +63,7 @@
63#include <openssl/bn.h> 63#include <openssl/bn.h>
64 64
65/* BEGIN ERROR CODES */ 65/* BEGIN ERROR CODES */
66#ifndef NO_ERR 66#ifndef OPENSSL_NO_ERR
67static ERR_STRING_DATA BN_str_functs[]= 67static ERR_STRING_DATA BN_str_functs[]=
68 { 68 {
69{ERR_PACK(0,BN_F_BN_BLINDING_CONVERT,0), "BN_BLINDING_convert"}, 69{ERR_PACK(0,BN_F_BN_BLINDING_CONVERT,0), "BN_BLINDING_convert"},
@@ -76,11 +76,14 @@ static ERR_STRING_DATA BN_str_functs[]=
76{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, 76{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"},
77{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, 77{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"},
78{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, 78{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"},
79{ERR_PACK(0,BN_F_BN_EXPAND_INTERNAL,0), "BN_EXPAND_INTERNAL"},
79{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"}, 80{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"},
80{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, 81{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"},
81{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"}, 82{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"},
82{ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, 83{ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"},
84{ERR_PACK(0,BN_F_BN_MOD_LSHIFT_QUICK,0), "BN_mod_lshift_quick"},
83{ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, 85{ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"},
86{ERR_PACK(0,BN_F_BN_MOD_SQRT,0), "BN_mod_sqrt"},
84{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, 87{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"},
85{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"}, 88{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"},
86{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"}, 89{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"},
@@ -98,10 +101,14 @@ static ERR_STRING_DATA BN_str_reasons[]=
98{BN_R_DIV_BY_ZERO ,"div by zero"}, 101{BN_R_DIV_BY_ZERO ,"div by zero"},
99{BN_R_ENCODING_ERROR ,"encoding error"}, 102{BN_R_ENCODING_ERROR ,"encoding error"},
100{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"}, 103{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"},
104{BN_R_INPUT_NOT_REDUCED ,"input not reduced"},
101{BN_R_INVALID_LENGTH ,"invalid length"}, 105{BN_R_INVALID_LENGTH ,"invalid length"},
102{BN_R_INVALID_RANGE ,"invalid range"}, 106{BN_R_INVALID_RANGE ,"invalid range"},
107{BN_R_NOT_A_SQUARE ,"not a square"},
103{BN_R_NOT_INITIALIZED ,"not initialized"}, 108{BN_R_NOT_INITIALIZED ,"not initialized"},
104{BN_R_NO_INVERSE ,"no inverse"}, 109{BN_R_NO_INVERSE ,"no inverse"},
110{BN_R_P_IS_NOT_PRIME ,"p is not prime"},
111{BN_R_TOO_MANY_ITERATIONS ,"too many iterations"},
105{BN_R_TOO_MANY_TEMPORARY_VARIABLES ,"too many temporary variables"}, 112{BN_R_TOO_MANY_TEMPORARY_VARIABLES ,"too many temporary variables"},
106{0,NULL} 113{0,NULL}
107 }; 114 };
@@ -115,7 +122,7 @@ void ERR_load_BN_strings(void)
115 if (init) 122 if (init)
116 { 123 {
117 init=0; 124 init=0;
118#ifndef NO_ERR 125#ifndef OPENSSL_NO_ERR
119 ERR_load_strings(ERR_LIB_BN,BN_str_functs); 126 ERR_load_strings(ERR_LIB_BN,BN_str_functs);
120 ERR_load_strings(ERR_LIB_BN,BN_str_reasons); 127 ERR_load_strings(ERR_LIB_BN,BN_str_reasons);
121#endif 128#endif
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index d2c91628ac..afdfd580fb 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -110,38 +110,13 @@
110 */ 110 */
111 111
112 112
113#include <stdio.h>
114#include "cryptlib.h" 113#include "cryptlib.h"
115#include "bn_lcl.h" 114#include "bn_lcl.h"
116 115
117#define TABLE_SIZE 32 116#define TABLE_SIZE 32
118 117
119/* slow but works */
120int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
121 {
122 BIGNUM *t;
123 int r=0;
124
125 bn_check_top(a);
126 bn_check_top(b);
127 bn_check_top(m);
128
129 BN_CTX_start(ctx);
130 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
131 if (a == b)
132 { if (!BN_sqr(t,a,ctx)) goto err; }
133 else
134 { if (!BN_mul(t,a,b,ctx)) goto err; }
135 if (!BN_mod(ret,t,m,ctx)) goto err;
136 r=1;
137err:
138 BN_CTX_end(ctx);
139 return(r);
140 }
141
142
143/* this one works - simple but works */ 118/* this one works - simple but works */
144int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx) 119int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
145 { 120 {
146 int i,bits,ret=0; 121 int i,bits,ret=0;
147 BIGNUM *v,*rr; 122 BIGNUM *v,*rr;
@@ -176,7 +151,7 @@ err:
176 } 151 }
177 152
178 153
179int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 154int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
180 BN_CTX *ctx) 155 BN_CTX *ctx)
181 { 156 {
182 int ret; 157 int ret;
@@ -185,6 +160,40 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
185 bn_check_top(p); 160 bn_check_top(p);
186 bn_check_top(m); 161 bn_check_top(m);
187 162
163 /* For even modulus m = 2^k*m_odd, it might make sense to compute
164 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
165 * exponentiation for the odd part), using appropriate exponent
166 * reductions, and combine the results using the CRT.
167 *
168 * For now, we use Montgomery only if the modulus is odd; otherwise,
169 * exponentiation using the reciprocal-based quick remaindering
170 * algorithm is used.
171 *
172 * (Timing obtained with expspeed.c [computations a^p mod m
173 * where a, p, m are of the same length: 256, 512, 1024, 2048,
174 * 4096, 8192 bits], compared to the running time of the
175 * standard algorithm:
176 *
177 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
178 * 55 .. 77 % [UltraSparc processor, but
179 * debug-solaris-sparcv8-gcc conf.]
180 *
181 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
182 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
183 *
184 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
185 * at 2048 and more bits, but at 512 and 1024 bits, it was
186 * slower even than the standard algorithm!
187 *
188 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
189 * should be obtained when the new Montgomery reduction code
190 * has been integrated into OpenSSL.)
191 */
192
193#define MONT_MUL_MOD
194#define MONT_EXP_WORD
195#define RECP_MUL_MOD
196
188#ifdef MONT_MUL_MOD 197#ifdef MONT_MUL_MOD
189 /* I have finally been able to take out this pre-condition of 198 /* I have finally been able to take out this pre-condition of
190 * the top bit being set. It was caused by an error in BN_div 199 * the top bit being set. It was caused by an error in BN_div
@@ -194,12 +203,14 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
194 203
195 if (BN_is_odd(m)) 204 if (BN_is_odd(m))
196 { 205 {
197 if (a->top == 1) 206# ifdef MONT_EXP_WORD
207 if (a->top == 1 && !a->neg)
198 { 208 {
199 BN_ULONG A = a->d[0]; 209 BN_ULONG A = a->d[0];
200 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); 210 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
201 } 211 }
202 else 212 else
213# endif
203 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); 214 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
204 } 215 }
205 else 216 else
@@ -227,20 +238,35 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
227 238
228 if (bits == 0) 239 if (bits == 0)
229 { 240 {
230 BN_one(r); 241 ret = BN_one(r);
231 return(1); 242 return ret;
232 } 243 }
233 244
234 BN_CTX_start(ctx); 245 BN_CTX_start(ctx);
235 if ((aa = BN_CTX_get(ctx)) == NULL) goto err; 246 if ((aa = BN_CTX_get(ctx)) == NULL) goto err;
236 247
237 BN_RECP_CTX_init(&recp); 248 BN_RECP_CTX_init(&recp);
238 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err; 249 if (m->neg)
250 {
251 /* ignore sign of 'm' */
252 if (!BN_copy(aa, m)) goto err;
253 aa->neg = 0;
254 if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
255 }
256 else
257 {
258 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
259 }
239 260
240 BN_init(&(val[0])); 261 BN_init(&(val[0]));
241 ts=1; 262 ts=1;
242 263
243 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ 264 if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */
265 if (BN_is_zero(&(val[0])))
266 {
267 ret = BN_zero(r);
268 goto err;
269 }
244 270
245 window = BN_window_bits_for_exponent_size(bits); 271 window = BN_window_bits_for_exponent_size(bits);
246 if (window > 1) 272 if (window > 1)
@@ -325,13 +351,13 @@ err:
325 } 351 }
326 352
327 353
328int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, 354int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
329 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 355 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
330 { 356 {
331 int i,j,bits,ret=0,wstart,wend,window,wvalue; 357 int i,j,bits,ret=0,wstart,wend,window,wvalue;
332 int start=1,ts=0; 358 int start=1,ts=0;
333 BIGNUM *d,*r; 359 BIGNUM *d,*r;
334 BIGNUM *aa; 360 const BIGNUM *aa;
335 BIGNUM val[TABLE_SIZE]; 361 BIGNUM val[TABLE_SIZE];
336 BN_MONT_CTX *mont=NULL; 362 BN_MONT_CTX *mont=NULL;
337 363
@@ -347,9 +373,10 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
347 bits=BN_num_bits(p); 373 bits=BN_num_bits(p);
348 if (bits == 0) 374 if (bits == 0)
349 { 375 {
350 BN_one(rr); 376 ret = BN_one(rr);
351 return(1); 377 return ret;
352 } 378 }
379
353 BN_CTX_start(ctx); 380 BN_CTX_start(ctx);
354 d = BN_CTX_get(ctx); 381 d = BN_CTX_get(ctx);
355 r = BN_CTX_get(ctx); 382 r = BN_CTX_get(ctx);
@@ -368,14 +395,19 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
368 395
369 BN_init(&val[0]); 396 BN_init(&val[0]);
370 ts=1; 397 ts=1;
371 if (BN_ucmp(a,m) >= 0) 398 if (a->neg || BN_ucmp(a,m) >= 0)
372 { 399 {
373 if (!BN_mod(&(val[0]),a,m,ctx)) 400 if (!BN_nnmod(&(val[0]),a,m,ctx))
374 goto err; 401 goto err;
375 aa= &(val[0]); 402 aa= &(val[0]);
376 } 403 }
377 else 404 else
378 aa=a; 405 aa=a;
406 if (BN_is_zero(aa))
407 {
408 ret = BN_zero(rr);
409 goto err;
410 }
379 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ 411 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
380 412
381 window = BN_window_bits_for_exponent_size(bits); 413 window = BN_window_bits_for_exponent_size(bits);
@@ -475,26 +507,39 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
475 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 507 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
476 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 508 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
477 /* BN_MOD_MUL_WORD is only used with 'w' large, 509 /* BN_MOD_MUL_WORD is only used with 'w' large,
478 * so the BN_ucmp test is probably more overhead 510 * so the BN_ucmp test is probably more overhead
479 * than always using BN_mod (which uses BN_copy if 511 * than always using BN_mod (which uses BN_copy if
480 * a similar test returns true). */ 512 * a similar test returns true). */
513 /* We can use BN_mod and do not need BN_nnmod because our
514 * accumulator is never negative (the result of BN_mod does
515 * not depend on the sign of the modulus).
516 */
481#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 517#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
482 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 518 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
483 519
484 bn_check_top(p); 520 bn_check_top(p);
485 bn_check_top(m); 521 bn_check_top(m);
486 522
487 if (!(m->d[0] & 1)) 523 if (m->top == 0 || !(m->d[0] & 1))
488 { 524 {
489 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); 525 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
490 return(0); 526 return(0);
491 } 527 }
528 if (m->top == 1)
529 a %= m->d[0]; /* make sure that 'a' is reduced */
530
492 bits = BN_num_bits(p); 531 bits = BN_num_bits(p);
493 if (bits == 0) 532 if (bits == 0)
494 { 533 {
495 BN_one(rr); 534 ret = BN_one(rr);
496 return(1); 535 return ret;
536 }
537 if (a == 0)
538 {
539 ret = BN_zero(rr);
540 return ret;
497 } 541 }
542
498 BN_CTX_start(ctx); 543 BN_CTX_start(ctx);
499 d = BN_CTX_get(ctx); 544 d = BN_CTX_get(ctx);
500 r = BN_CTX_get(ctx); 545 r = BN_CTX_get(ctx);
@@ -590,8 +635,9 @@ err:
590 635
591 636
592/* The old fallback, simple version :-) */ 637/* The old fallback, simple version :-) */
593int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, 638int BN_mod_exp_simple(BIGNUM *r,
594 BN_CTX *ctx) 639 const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
640 BN_CTX *ctx)
595 { 641 {
596 int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0; 642 int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
597 int start=1; 643 int start=1;
@@ -602,8 +648,8 @@ int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
602 648
603 if (bits == 0) 649 if (bits == 0)
604 { 650 {
605 BN_one(r); 651 ret = BN_one(r);
606 return(1); 652 return ret;
607 } 653 }
608 654
609 BN_CTX_start(ctx); 655 BN_CTX_start(ctx);
@@ -611,7 +657,12 @@ int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
611 657
612 BN_init(&(val[0])); 658 BN_init(&(val[0]));
613 ts=1; 659 ts=1;
614 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ 660 if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */
661 if (BN_is_zero(&(val[0])))
662 {
663 ret = BN_zero(r);
664 goto err;
665 }
615 666
616 window = BN_window_bits_for_exponent_size(bits); 667 window = BN_window_bits_for_exponent_size(bits);
617 if (window > 1) 668 if (window > 1)
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
index 29029f4c72..73ccd58a83 100644
--- a/src/lib/libcrypto/bn/bn_exp2.c
+++ b/src/lib/libcrypto/bn/bn_exp2.c
@@ -115,13 +115,14 @@
115 115
116#define TABLE_SIZE 32 116#define TABLE_SIZE 32
117 117
118int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, 118int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
119 BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 119 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
120 BN_CTX *ctx, BN_MONT_CTX *in_mont)
120 { 121 {
121 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2; 122 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
122 int r_is_one=1,ts1=0,ts2=0; 123 int r_is_one=1,ts1=0,ts2=0;
123 BIGNUM *d,*r; 124 BIGNUM *d,*r;
124 BIGNUM *a_mod_m; 125 const BIGNUM *a_mod_m;
125 BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE]; 126 BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE];
126 BN_MONT_CTX *mont=NULL; 127 BN_MONT_CTX *mont=NULL;
127 128
@@ -140,9 +141,10 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
140 bits2=BN_num_bits(p2); 141 bits2=BN_num_bits(p2);
141 if ((bits1 == 0) && (bits2 == 0)) 142 if ((bits1 == 0) && (bits2 == 0))
142 { 143 {
143 BN_one(rr); 144 ret = BN_one(rr);
144 return(1); 145 return ret;
145 } 146 }
147
146 bits=(bits1 > bits2)?bits1:bits2; 148 bits=(bits1 > bits2)?bits1:bits2;
147 149
148 BN_CTX_start(ctx); 150 BN_CTX_start(ctx);
@@ -166,7 +168,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
166 */ 168 */
167 BN_init(&val1[0]); 169 BN_init(&val1[0]);
168 ts1=1; 170 ts1=1;
169 if (BN_ucmp(a1,m) >= 0) 171 if (a1->neg || BN_ucmp(a1,m) >= 0)
170 { 172 {
171 if (!BN_mod(&(val1[0]),a1,m,ctx)) 173 if (!BN_mod(&(val1[0]),a1,m,ctx))
172 goto err; 174 goto err;
@@ -174,6 +176,12 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
174 } 176 }
175 else 177 else
176 a_mod_m = a1; 178 a_mod_m = a1;
179 if (BN_is_zero(a_mod_m))
180 {
181 ret = BN_zero(rr);
182 goto err;
183 }
184
177 if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err; 185 if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err;
178 if (window1 > 1) 186 if (window1 > 1)
179 { 187 {
@@ -195,7 +203,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
195 */ 203 */
196 BN_init(&val2[0]); 204 BN_init(&val2[0]);
197 ts2=1; 205 ts2=1;
198 if (BN_ucmp(a2,m) >= 0) 206 if (a2->neg || BN_ucmp(a2,m) >= 0)
199 { 207 {
200 if (!BN_mod(&(val2[0]),a2,m,ctx)) 208 if (!BN_mod(&(val2[0]),a2,m,ctx))
201 goto err; 209 goto err;
@@ -203,6 +211,11 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
203 } 211 }
204 else 212 else
205 a_mod_m = a2; 213 a_mod_m = a2;
214 if (BN_is_zero(a_mod_m))
215 {
216 ret = BN_zero(rr);
217 goto err;
218 }
206 if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err; 219 if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err;
207 if (window2 > 1) 220 if (window2 > 1)
208 { 221 {
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
index 398207196b..7649f63fd2 100644
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ b/src/lib/libcrypto/bn/bn_gcd.c
@@ -55,14 +55,66 @@
55 * copied and put under another distribution licence 55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
58 111
59#include <stdio.h>
60#include "cryptlib.h" 112#include "cryptlib.h"
61#include "bn_lcl.h" 113#include "bn_lcl.h"
62 114
63static BIGNUM *euclid(BIGNUM *a, BIGNUM *b); 115static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
64 116
65int BN_gcd(BIGNUM *r, BIGNUM *in_a, BIGNUM *in_b, BN_CTX *ctx) 117int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
66 { 118 {
67 BIGNUM *a,*b,*t; 119 BIGNUM *a,*b,*t;
68 int ret=0; 120 int ret=0;
@@ -77,6 +129,8 @@ int BN_gcd(BIGNUM *r, BIGNUM *in_a, BIGNUM *in_b, BN_CTX *ctx)
77 129
78 if (BN_copy(a,in_a) == NULL) goto err; 130 if (BN_copy(a,in_a) == NULL) goto err;
79 if (BN_copy(b,in_b) == NULL) goto err; 131 if (BN_copy(b,in_b) == NULL) goto err;
132 a->neg = 0;
133 b->neg = 0;
80 134
81 if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; } 135 if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; }
82 t=euclid(a,b); 136 t=euclid(a,b);
@@ -97,10 +151,10 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
97 bn_check_top(a); 151 bn_check_top(a);
98 bn_check_top(b); 152 bn_check_top(b);
99 153
100 for (;;) 154 /* 0 <= b <= a */
155 while (!BN_is_zero(b))
101 { 156 {
102 if (BN_is_zero(b)) 157 /* 0 < b <= a */
103 break;
104 158
105 if (BN_is_odd(a)) 159 if (BN_is_odd(a))
106 { 160 {
@@ -133,7 +187,9 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
133 shifts++; 187 shifts++;
134 } 188 }
135 } 189 }
190 /* 0 <= b <= a */
136 } 191 }
192
137 if (shifts) 193 if (shifts)
138 { 194 {
139 if (!BN_lshift(a,a,shifts)) goto err; 195 if (!BN_lshift(a,a,shifts)) goto err;
@@ -143,11 +199,13 @@ err:
143 return(NULL); 199 return(NULL);
144 } 200 }
145 201
202
146/* solves ax == 1 (mod n) */ 203/* solves ax == 1 (mod n) */
147BIGNUM *BN_mod_inverse(BIGNUM *in, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) 204BIGNUM *BN_mod_inverse(BIGNUM *in,
205 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
148 { 206 {
149 BIGNUM *A,*B,*X,*Y,*M,*D,*R=NULL; 207 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
150 BIGNUM *T,*ret=NULL; 208 BIGNUM *ret=NULL;
151 int sign; 209 int sign;
152 210
153 bn_check_top(a); 211 bn_check_top(a);
@@ -160,7 +218,8 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
160 D = BN_CTX_get(ctx); 218 D = BN_CTX_get(ctx);
161 M = BN_CTX_get(ctx); 219 M = BN_CTX_get(ctx);
162 Y = BN_CTX_get(ctx); 220 Y = BN_CTX_get(ctx);
163 if (Y == NULL) goto err; 221 T = BN_CTX_get(ctx);
222 if (T == NULL) goto err;
164 223
165 if (in == NULL) 224 if (in == NULL)
166 R=BN_new(); 225 R=BN_new();
@@ -168,34 +227,256 @@ BIGNUM *BN_mod_inverse(BIGNUM *in, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
168 R=in; 227 R=in;
169 if (R == NULL) goto err; 228 if (R == NULL) goto err;
170 229
171 BN_zero(X); 230 BN_one(X);
172 BN_one(Y); 231 BN_zero(Y);
173 if (BN_copy(A,a) == NULL) goto err; 232 if (BN_copy(B,a) == NULL) goto err;
174 if (BN_copy(B,n) == NULL) goto err; 233 if (BN_copy(A,n) == NULL) goto err;
175 sign=1; 234 A->neg = 0;
235 if (B->neg || (BN_ucmp(B, A) >= 0))
236 {
237 if (!BN_nnmod(B, B, A, ctx)) goto err;
238 }
239 sign = -1;
240 /* From B = a mod |n|, A = |n| it follows that
241 *
242 * 0 <= B < A,
243 * -sign*X*a == B (mod |n|),
244 * sign*Y*a == A (mod |n|).
245 */
176 246
177 while (!BN_is_zero(B)) 247 if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
178 { 248 {
179 if (!BN_div(D,M,A,B,ctx)) goto err; 249 /* Binary inversion algorithm; requires odd modulus.
180 T=A; 250 * This is faster than the general algorithm if the modulus
181 A=B; 251 * is sufficiently small (about 400 .. 500 bits on 32-bit
182 B=M; 252 * sytems, but much more on 64-bit systems) */
183 /* T has a struct, M does not */ 253 int shift;
184 254
185 if (!BN_mul(T,D,X,ctx)) goto err; 255 while (!BN_is_zero(B))
186 if (!BN_add(T,T,Y)) goto err; 256 {
187 M=Y; 257 /*
188 Y=X; 258 * 0 < B < |n|,
189 X=T; 259 * 0 < A <= |n|,
190 sign= -sign; 260 * (1) -sign*X*a == B (mod |n|),
261 * (2) sign*Y*a == A (mod |n|)
262 */
263
264 /* Now divide B by the maximum possible power of two in the integers,
265 * and divide X by the same value mod |n|.
266 * When we're done, (1) still holds. */
267 shift = 0;
268 while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
269 {
270 shift++;
271
272 if (BN_is_odd(X))
273 {
274 if (!BN_uadd(X, X, n)) goto err;
275 }
276 /* now X is even, so we can easily divide it by two */
277 if (!BN_rshift1(X, X)) goto err;
278 }
279 if (shift > 0)
280 {
281 if (!BN_rshift(B, B, shift)) goto err;
282 }
283
284
285 /* Same for A and Y. Afterwards, (2) still holds. */
286 shift = 0;
287 while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
288 {
289 shift++;
290
291 if (BN_is_odd(Y))
292 {
293 if (!BN_uadd(Y, Y, n)) goto err;
294 }
295 /* now Y is even */
296 if (!BN_rshift1(Y, Y)) goto err;
297 }
298 if (shift > 0)
299 {
300 if (!BN_rshift(A, A, shift)) goto err;
301 }
302
303
304 /* We still have (1) and (2).
305 * Both A and B are odd.
306 * The following computations ensure that
307 *
308 * 0 <= B < |n|,
309 * 0 < A < |n|,
310 * (1) -sign*X*a == B (mod |n|),
311 * (2) sign*Y*a == A (mod |n|),
312 *
313 * and that either A or B is even in the next iteration.
314 */
315 if (BN_ucmp(B, A) >= 0)
316 {
317 /* -sign*(X + Y)*a == B - A (mod |n|) */
318 if (!BN_uadd(X, X, Y)) goto err;
319 /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
320 * actually makes the algorithm slower */
321 if (!BN_usub(B, B, A)) goto err;
322 }
323 else
324 {
325 /* sign*(X + Y)*a == A - B (mod |n|) */
326 if (!BN_uadd(Y, Y, X)) goto err;
327 /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
328 if (!BN_usub(A, A, B)) goto err;
329 }
330 }
331 }
332 else
333 {
334 /* general inversion algorithm */
335
336 while (!BN_is_zero(B))
337 {
338 BIGNUM *tmp;
339
340 /*
341 * 0 < B < A,
342 * (*) -sign*X*a == B (mod |n|),
343 * sign*Y*a == A (mod |n|)
344 */
345
346 /* (D, M) := (A/B, A%B) ... */
347 if (BN_num_bits(A) == BN_num_bits(B))
348 {
349 if (!BN_one(D)) goto err;
350 if (!BN_sub(M,A,B)) goto err;
351 }
352 else if (BN_num_bits(A) == BN_num_bits(B) + 1)
353 {
354 /* A/B is 1, 2, or 3 */
355 if (!BN_lshift1(T,B)) goto err;
356 if (BN_ucmp(A,T) < 0)
357 {
358 /* A < 2*B, so D=1 */
359 if (!BN_one(D)) goto err;
360 if (!BN_sub(M,A,B)) goto err;
361 }
362 else
363 {
364 /* A >= 2*B, so D=2 or D=3 */
365 if (!BN_sub(M,A,T)) goto err;
366 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */
367 if (BN_ucmp(A,D) < 0)
368 {
369 /* A < 3*B, so D=2 */
370 if (!BN_set_word(D,2)) goto err;
371 /* M (= A - 2*B) already has the correct value */
372 }
373 else
374 {
375 /* only D=3 remains */
376 if (!BN_set_word(D,3)) goto err;
377 /* currently M = A - 2*B, but we need M = A - 3*B */
378 if (!BN_sub(M,M,B)) goto err;
379 }
380 }
381 }
382 else
383 {
384 if (!BN_div(D,M,A,B,ctx)) goto err;
385 }
386
387 /* Now
388 * A = D*B + M;
389 * thus we have
390 * (**) sign*Y*a == D*B + M (mod |n|).
391 */
392
393 tmp=A; /* keep the BIGNUM object, the value does not matter */
394
395 /* (A, B) := (B, A mod B) ... */
396 A=B;
397 B=M;
398 /* ... so we have 0 <= B < A again */
399
400 /* Since the former M is now B and the former B is now A,
401 * (**) translates into
402 * sign*Y*a == D*A + B (mod |n|),
403 * i.e.
404 * sign*Y*a - D*A == B (mod |n|).
405 * Similarly, (*) translates into
406 * -sign*X*a == A (mod |n|).
407 *
408 * Thus,
409 * sign*Y*a + D*sign*X*a == B (mod |n|),
410 * i.e.
411 * sign*(Y + D*X)*a == B (mod |n|).
412 *
413 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
414 * -sign*X*a == B (mod |n|),
415 * sign*Y*a == A (mod |n|).
416 * Note that X and Y stay non-negative all the time.
417 */
418
419 /* most of the time D is very small, so we can optimize tmp := D*X+Y */
420 if (BN_is_one(D))
421 {
422 if (!BN_add(tmp,X,Y)) goto err;
423 }
424 else
425 {
426 if (BN_is_word(D,2))
427 {
428 if (!BN_lshift1(tmp,X)) goto err;
429 }
430 else if (BN_is_word(D,4))
431 {
432 if (!BN_lshift(tmp,X,2)) goto err;
433 }
434 else if (D->top == 1)
435 {
436 if (!BN_copy(tmp,X)) goto err;
437 if (!BN_mul_word(tmp,D->d[0])) goto err;
438 }
439 else
440 {
441 if (!BN_mul(tmp,D,X,ctx)) goto err;
442 }
443 if (!BN_add(tmp,tmp,Y)) goto err;
444 }
445
446 M=Y; /* keep the BIGNUM object, the value does not matter */
447 Y=X;
448 X=tmp;
449 sign = -sign;
450 }
191 } 451 }
452
453 /*
454 * The while loop (Euclid's algorithm) ends when
455 * A == gcd(a,n);
456 * we have
457 * sign*Y*a == A (mod |n|),
458 * where Y is non-negative.
459 */
460
192 if (sign < 0) 461 if (sign < 0)
193 { 462 {
194 if (!BN_sub(Y,n,Y)) goto err; 463 if (!BN_sub(Y,n,Y)) goto err;
195 } 464 }
465 /* Now Y*a == A (mod |n|). */
466
196 467
197 if (BN_is_one(A)) 468 if (BN_is_one(A))
198 { if (!BN_mod(R,Y,n,ctx)) goto err; } 469 {
470 /* Y*a == 1 (mod |n|) */
471 if (!Y->neg && BN_ucmp(Y,n) < 0)
472 {
473 if (!BN_copy(R,Y)) goto err;
474 }
475 else
476 {
477 if (!BN_nnmod(R,Y,n,ctx)) goto err;
478 }
479 }
199 else 480 else
200 { 481 {
201 BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE); 482 BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE);
@@ -207,4 +488,3 @@ err:
207 BN_CTX_end(ctx); 488 BN_CTX_end(ctx);
208 return(ret); 489 return(ret);
209 } 490 }
210
diff --git a/src/lib/libcrypto/bn/bn_kron.c b/src/lib/libcrypto/bn/bn_kron.c
new file mode 100644
index 0000000000..49f75594ae
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_kron.c
@@ -0,0 +1,182 @@
1/* crypto/bn/bn_kron.c */
2/* ====================================================================
3 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include "bn_lcl.h"
57
58
59/* least significant word */
60#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
61
62/* Returns -2 for errors because both -1 and 0 are valid results. */
63int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
64 {
65 int i;
66 int ret = -2; /* avoid 'uninitialized' warning */
67 int err = 0;
68 BIGNUM *A, *B, *tmp;
69 /* In 'tab', only odd-indexed entries are relevant:
70 * For any odd BIGNUM n,
71 * tab[BN_lsw(n) & 7]
72 * is $(-1)^{(n^2-1)/8}$ (using TeX notation).
73 * Note that the sign of n does not matter.
74 */
75 static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
76
77 BN_CTX_start(ctx);
78 A = BN_CTX_get(ctx);
79 B = BN_CTX_get(ctx);
80 if (B == NULL) goto end;
81
82 err = !BN_copy(A, a);
83 if (err) goto end;
84 err = !BN_copy(B, b);
85 if (err) goto end;
86
87 /*
88 * Kronecker symbol, imlemented according to Henri Cohen,
89 * "A Course in Computational Algebraic Number Theory"
90 * (algorithm 1.4.10).
91 */
92
93 /* Cohen's step 1: */
94
95 if (BN_is_zero(B))
96 {
97 ret = BN_abs_is_word(A, 1);
98 goto end;
99 }
100
101 /* Cohen's step 2: */
102
103 if (!BN_is_odd(A) && !BN_is_odd(B))
104 {
105 ret = 0;
106 goto end;
107 }
108
109 /* now B is non-zero */
110 i = 0;
111 while (!BN_is_bit_set(B, i))
112 i++;
113 err = !BN_rshift(B, B, i);
114 if (err) goto end;
115 if (i & 1)
116 {
117 /* i is odd */
118 /* (thus B was even, thus A must be odd!) */
119
120 /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
121 ret = tab[BN_lsw(A) & 7];
122 }
123 else
124 {
125 /* i is even */
126 ret = 1;
127 }
128
129 if (B->neg)
130 {
131 B->neg = 0;
132 if (A->neg)
133 ret = -ret;
134 }
135
136 /* now B is positive and odd, so what remains to be done is
137 * to compute the Jacobi symbol (A/B) and multiply it by 'ret' */
138
139 while (1)
140 {
141 /* Cohen's step 3: */
142
143 /* B is positive and odd */
144
145 if (BN_is_zero(A))
146 {
147 ret = BN_is_one(B) ? ret : 0;
148 goto end;
149 }
150
151 /* now A is non-zero */
152 i = 0;
153 while (!BN_is_bit_set(A, i))
154 i++;
155 err = !BN_rshift(A, A, i);
156 if (err) goto end;
157 if (i & 1)
158 {
159 /* i is odd */
160 /* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
161 ret = ret * tab[BN_lsw(B) & 7];
162 }
163
164 /* Cohen's step 4: */
165 /* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
166 if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
167 ret = -ret;
168
169 /* (A, B) := (B mod |A|, |A|) */
170 err = !BN_nnmod(B, B, A, ctx);
171 if (err) goto end;
172 tmp = A; A = B; B = tmp;
173 tmp->neg = 0;
174 }
175
176 end:
177 BN_CTX_end(ctx);
178 if (err)
179 return -2;
180 else
181 return ret;
182 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
index 9c959921b4..8a4dba375a 100644
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ b/src/lib/libcrypto/bn/bn_lcl.h
@@ -119,6 +119,20 @@ extern "C" {
119#endif 119#endif
120 120
121 121
122/* Used for temp variables */
123#define BN_CTX_NUM 32
124#define BN_CTX_NUM_POS 12
125struct bignum_ctx
126 {
127 int tos;
128 BIGNUM bn[BN_CTX_NUM];
129 int flags;
130 int depth;
131 int pos[BN_CTX_NUM_POS];
132 int too_many;
133 } /* BN_CTX */;
134
135
122/* 136/*
123 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions 137 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
124 * 138 *
@@ -171,7 +185,7 @@ extern "C" {
171#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ 185#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
172#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ 186#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
173 187
174#if !defined(NO_ASM) && !defined(NO_INLINE_ASM) && !defined(PEDANTIC) 188#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
175/* 189/*
176 * BN_UMULT_HIGH section. 190 * BN_UMULT_HIGH section.
177 * 191 *
@@ -217,7 +231,7 @@ extern "C" {
217 ret; }) 231 ret; })
218# endif /* compiler */ 232# endif /* compiler */
219# endif /* cpu */ 233# endif /* cpu */
220#endif /* NO_ASM */ 234#endif /* OPENSSL_NO_ASM */
221 235
222/************************************************************* 236/*************************************************************
223 * Using the long long type 237 * Using the long long type
@@ -398,19 +412,26 @@ extern "C" {
398void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb); 412void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
399void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); 413void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
400void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); 414void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
401void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp); 415void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
402void bn_sqr_comba8(BN_ULONG *r,BN_ULONG *a); 416void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
403void bn_sqr_comba4(BN_ULONG *r,BN_ULONG *a); 417void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
404int bn_cmp_words(BN_ULONG *a,BN_ULONG *b,int n); 418int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
405void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,BN_ULONG *t); 419int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
420 int cl, int dl);
421void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
422 int dna,int dnb,BN_ULONG *t);
406void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, 423void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
407 int tn, int n,BN_ULONG *t); 424 int n,int tna,int tnb,BN_ULONG *t);
408void bn_sqr_recursive(BN_ULONG *r,BN_ULONG *a, int n2, BN_ULONG *t); 425void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
409void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n); 426void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
410void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2, 427void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
411 BN_ULONG *t); 428 BN_ULONG *t);
412void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2, 429void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
413 BN_ULONG *t); 430 BN_ULONG *t);
431BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
432 int cl, int dl);
433BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
434 int cl, int dl);
414 435
415#ifdef __cplusplus 436#ifdef __cplusplus
416} 437}
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
index 7767d65170..a016cb7f53 100644
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -128,7 +128,7 @@ int BN_get_params(int which)
128 else return(0); 128 else return(0);
129 } 129 }
130 130
131BIGNUM *BN_value_one(void) 131const BIGNUM *BN_value_one(void)
132 { 132 {
133 static BN_ULONG data_one=1L; 133 static BN_ULONG data_one=1L;
134 static BIGNUM const_one={&data_one,1,1,0}; 134 static BIGNUM const_one={&data_one,1,1,0};
@@ -305,172 +305,168 @@ BIGNUM *BN_new(void)
305 return(ret); 305 return(ret);
306 } 306 }
307 307
308/* This is an internal function that should not be used in applications. 308/* This is used both by bn_expand2() and bn_dup_expand() */
309 * It ensures that 'b' has enough room for a 'words' word number number. 309/* The caller MUST check that words > b->dmax before calling this */
310 * It is mostly used by the various BIGNUM routines. If there is an error, 310static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
311 * NULL is returned. If not, 'b' is returned. */
312
313BIGNUM *bn_expand2(BIGNUM *b, int words)
314 { 311 {
315 BN_ULONG *A,*a; 312 BN_ULONG *A,*a = NULL;
316 const BN_ULONG *B; 313 const BN_ULONG *B;
317 int i; 314 int i;
318 315
319 bn_check_top(b); 316 if (words > (INT_MAX/(4*BN_BITS2)))
317 {
318 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG);
319 return NULL;
320 }
320 321
321 if (words > b->dmax) 322 bn_check_top(b);
323 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
322 { 324 {
323 if (words > (INT_MAX/(4*BN_BITS2))) 325 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
324 { 326 return(NULL);
325 BNerr(BN_F_BN_EXPAND2,BN_R_BIGNUM_TOO_LONG); 327 }
326 return NULL; 328 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
327 } 329 if (A == NULL)
328 330 {
329 bn_check_top(b); 331 BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
330 if (BN_get_flags(b,BN_FLG_STATIC_DATA)) 332 return(NULL);
333 }
334#if 1
335 B=b->d;
336 /* Check if the previous number needs to be copied */
337 if (B != NULL)
338 {
339 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
331 { 340 {
332 BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); 341 /*
333 return(NULL); 342 * The fact that the loop is unrolled
343 * 4-wise is a tribute to Intel. It's
344 * the one that doesn't have enough
345 * registers to accomodate more data.
346 * I'd unroll it 8-wise otherwise:-)
347 *
348 * <appro@fy.chalmers.se>
349 */
350 BN_ULONG a0,a1,a2,a3;
351 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
352 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
334 } 353 }
335 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1)); 354 switch (b->top&3)
336 if (A == NULL)
337 { 355 {
338 BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); 356 case 3: A[2]=B[2];
339 return(NULL); 357 case 2: A[1]=B[1];
358 case 1: A[0]=B[0];
359 case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
360 * the switch table by doing a=top&3; a--; goto jump_table[a];
361 * which fails for top== 0 */
362 ;
340 } 363 }
341#if 1 364 }
342 B=b->d; 365
343 /* Check if the previous number needs to be copied */ 366 /* Now need to zero any data between b->top and b->max */
344 if (B != NULL) 367 /* XXX Why? */
345 { 368
346#if 0 369 A= &(a[b->top]);
347 /* This lot is an unrolled loop to copy b->top 370 for (i=(words - b->top)>>3; i>0; i--,A+=8)
348 * BN_ULONGs from B to A 371 {
349 */ 372 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
350/* 373 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
351 * I have nothing against unrolling but it's usually done for 374 }
352 * several reasons, namely: 375 for (i=(words - b->top)&7; i>0; i--,A++)
353 * - minimize percentage of decision making code, i.e. branches; 376 A[0]=0;
354 * - avoid cache trashing;
355 * - make it possible to schedule loads earlier;
356 * Now let's examine the code below. The cornerstone of C is
357 * "programmer is always right" and that's what we love it for:-)
358 * For this very reason C compilers have to be paranoid when it
359 * comes to data aliasing and assume the worst. Yeah, but what
360 * does it mean in real life? This means that loop body below will
361 * be compiled to sequence of loads immediately followed by stores
362 * as compiler assumes the worst, something in A==B+1 style. As a
363 * result CPU pipeline is going to starve for incoming data. Secondly
364 * if A and B happen to share same cache line such code is going to
365 * cause severe cache trashing. Both factors have severe impact on
366 * performance of modern CPUs and this is the reason why this
367 * particular piece of code is #ifdefed away and replaced by more
368 * "friendly" version found in #else section below. This comment
369 * also applies to BN_copy function.
370 *
371 * <appro@fy.chalmers.se>
372 */
373 for (i=b->top&(~7); i>0; i-=8)
374 {
375 A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
376 A[4]=B[4]; A[5]=B[5]; A[6]=B[6]; A[7]=B[7];
377 A+=8;
378 B+=8;
379 }
380 switch (b->top&7)
381 {
382 case 7:
383 A[6]=B[6];
384 case 6:
385 A[5]=B[5];
386 case 5:
387 A[4]=B[4];
388 case 4:
389 A[3]=B[3];
390 case 3:
391 A[2]=B[2];
392 case 2:
393 A[1]=B[1];
394 case 1:
395 A[0]=B[0];
396 case 0:
397 /* I need the 'case 0' entry for utrix cc.
398 * If the optimizer is turned on, it does the
399 * switch table by doing
400 * a=top&7
401 * a--;
402 * goto jump_table[a];
403 * If top is 0, this makes us jump to 0xffffffc
404 * which is rather bad :-(.
405 * eric 23-Apr-1998
406 */
407 ;
408 }
409#else 377#else
410 for (i=b->top>>2; i>0; i--,A+=4,B+=4) 378 memset(A,0,sizeof(BN_ULONG)*(words+1));
379 memcpy(A,b->d,sizeof(b->d[0])*b->top);
380#endif
381
382 return(a);
383 }
384
385/* This is an internal function that can be used instead of bn_expand2()
386 * when there is a need to copy BIGNUMs instead of only expanding the
387 * data part, while still expanding them.
388 * Especially useful when needing to expand BIGNUMs that are declared
389 * 'const' and should therefore not be changed.
390 * The reason to use this instead of a BN_dup() followed by a bn_expand2()
391 * is memory allocation overhead. A BN_dup() followed by a bn_expand2()
392 * will allocate new memory for the BIGNUM data twice, and free it once,
393 * while bn_dup_expand() makes sure allocation is made only once.
394 */
395
396BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
397 {
398 BIGNUM *r = NULL;
399
400 if (words > b->dmax)
401 {
402 BN_ULONG *a = bn_expand_internal(b, words);
403
404 if (a)
405 {
406 r = BN_new();
407 if (r)
411 { 408 {
412 /* 409 r->top = b->top;
413 * The fact that the loop is unrolled 410 r->dmax = words;
414 * 4-wise is a tribute to Intel. It's 411 r->neg = b->neg;
415 * the one that doesn't have enough 412 r->d = a;
416 * registers to accomodate more data.
417 * I'd unroll it 8-wise otherwise:-)
418 *
419 * <appro@fy.chalmers.se>
420 */
421 BN_ULONG a0,a1,a2,a3;
422 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
423 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
424 } 413 }
425 switch (b->top&3) 414 else
426 { 415 {
427 case 3: A[2]=B[2]; 416 /* r == NULL, BN_new failure */
428 case 2: A[1]=B[1]; 417 OPENSSL_free(a);
429 case 1: A[0]=B[0];
430 case 0: ; /* ultrix cc workaround, see above */
431 } 418 }
432#endif
433 OPENSSL_free(b->d);
434 } 419 }
420 /* If a == NULL, there was an error in allocation in
421 bn_expand_internal(), and NULL should be returned */
422 }
423 else
424 {
425 r = BN_dup(b);
426 }
435 427
436 b->d=a; 428 return r;
437 b->dmax=words; 429 }
430
431/* This is an internal function that should not be used in applications.
432 * It ensures that 'b' has enough room for a 'words' word number number.
433 * It is mostly used by the various BIGNUM routines. If there is an error,
434 * NULL is returned. If not, 'b' is returned. */
438 435
439 /* Now need to zero any data between b->top and b->max */ 436BIGNUM *bn_expand2(BIGNUM *b, int words)
437 {
438 if (words > b->dmax)
439 {
440 BN_ULONG *a = bn_expand_internal(b, words);
440 441
441 A= &(b->d[b->top]); 442 if (a)
442 for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8)
443 { 443 {
444 A[0]=0; A[1]=0; A[2]=0; A[3]=0; 444 if (b->d)
445 A[4]=0; A[5]=0; A[6]=0; A[7]=0; 445 OPENSSL_free(b->d);
446 }
447 for (i=(b->dmax - b->top)&7; i>0; i--,A++)
448 A[0]=0;
449#else
450 memset(A,0,sizeof(BN_ULONG)*(words+1));
451 memcpy(A,b->d,sizeof(b->d[0])*b->top);
452 b->d=a; 446 b->d=a;
453 b->max=words; 447 b->dmax=words;
454#endif 448 }
455 449 else
456/* memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */ 450 b = NULL;
457/* { int i; for (i=b->max; i<words+1; i++) p[i]=i;} */
458
459 } 451 }
460 return(b); 452 return b;
461 } 453 }
462 454
463BIGNUM *BN_dup(const BIGNUM *a) 455BIGNUM *BN_dup(const BIGNUM *a)
464 { 456 {
465 BIGNUM *r; 457 BIGNUM *r, *t;
466 458
467 if (a == NULL) return NULL; 459 if (a == NULL) return NULL;
468 460
469 bn_check_top(a); 461 bn_check_top(a);
470 462
471 r=BN_new(); 463 t = BN_new();
472 if (r == NULL) return(NULL); 464 if (t == NULL) return(NULL);
473 return((BIGNUM *)BN_copy(r,a)); 465 r = BN_copy(t, a);
466 /* now r == t || r == NULL */
467 if (r == NULL)
468 BN_free(t);
469 return r;
474 } 470 }
475 471
476BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) 472BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
@@ -498,7 +494,7 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
498 case 3: A[2]=B[2]; 494 case 3: A[2]=B[2];
499 case 2: A[1]=B[1]; 495 case 2: A[1]=B[1];
500 case 1: A[0]=B[0]; 496 case 1: A[0]=B[0];
501 case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */ 497 case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
502 } 498 }
503#else 499#else
504 memcpy(a->d,b->d,sizeof(b->d[0])*b->top); 500 memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
@@ -512,6 +508,35 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
512 return(a); 508 return(a);
513 } 509 }
514 510
511void BN_swap(BIGNUM *a, BIGNUM *b)
512 {
513 int flags_old_a, flags_old_b;
514 BN_ULONG *tmp_d;
515 int tmp_top, tmp_dmax, tmp_neg;
516
517 flags_old_a = a->flags;
518 flags_old_b = b->flags;
519
520 tmp_d = a->d;
521 tmp_top = a->top;
522 tmp_dmax = a->dmax;
523 tmp_neg = a->neg;
524
525 a->d = b->d;
526 a->top = b->top;
527 a->dmax = b->dmax;
528 a->neg = b->neg;
529
530 b->d = tmp_d;
531 b->top = tmp_top;
532 b->dmax = tmp_dmax;
533 b->neg = tmp_neg;
534
535 a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA);
536 b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA);
537 }
538
539
515void BN_clear(BIGNUM *a) 540void BN_clear(BIGNUM *a)
516 { 541 {
517 if (a->d != NULL) 542 if (a->d != NULL)
@@ -520,7 +545,7 @@ void BN_clear(BIGNUM *a)
520 a->neg=0; 545 a->neg=0;
521 } 546 }
522 547
523BN_ULONG BN_get_word(BIGNUM *a) 548BN_ULONG BN_get_word(const BIGNUM *a)
524 { 549 {
525 int i,n; 550 int i,n;
526 BN_ULONG ret=0; 551 BN_ULONG ret=0;
@@ -568,7 +593,6 @@ int BN_set_word(BIGNUM *a, BN_ULONG w)
568 return(1); 593 return(1);
569 } 594 }
570 595
571/* ignore negative */
572BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) 596BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
573 { 597 {
574 unsigned int i,m; 598 unsigned int i,m;
@@ -589,6 +613,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
589 i=((n-1)/BN_BYTES)+1; 613 i=((n-1)/BN_BYTES)+1;
590 m=((n-1)%(BN_BYTES)); 614 m=((n-1)%(BN_BYTES));
591 ret->top=i; 615 ret->top=i;
616 ret->neg=0;
592 while (n-- > 0) 617 while (n-- > 0)
593 { 618 {
594 l=(l<<8L)| *(s++); 619 l=(l<<8L)| *(s++);
@@ -743,7 +768,7 @@ int BN_mask_bits(BIGNUM *a, int n)
743 return(1); 768 return(1);
744 } 769 }
745 770
746int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n) 771int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
747 { 772 {
748 int i; 773 int i;
749 BN_ULONG aa,bb; 774 BN_ULONG aa,bb;
@@ -760,3 +785,34 @@ int bn_cmp_words(BN_ULONG *a, BN_ULONG *b, int n)
760 return(0); 785 return(0);
761 } 786 }
762 787
788/* Here follows a specialised variants of bn_cmp_words(). It has the
789 property of performing the operation on arrays of different sizes.
790 The sizes of those arrays is expressed through cl, which is the
791 common length ( basicall, min(len(a),len(b)) ), and dl, which is the
792 delta between the two lengths, calculated as len(a)-len(b).
793 All lengths are the number of BN_ULONGs... */
794
795int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
796 int cl, int dl)
797 {
798 int n,i;
799 n = cl-1;
800
801 if (dl < 0)
802 {
803 for (i=dl; i<0; i++)
804 {
805 if (b[n-i] != 0)
806 return -1; /* a < b */
807 }
808 }
809 if (dl > 0)
810 {
811 for (i=dl; i>0; i--)
812 {
813 if (a[n+i] != 0)
814 return 1; /* a > b */
815 }
816 }
817 return bn_cmp_words(a,b,cl);
818 }
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
new file mode 100644
index 0000000000..5cf82480d7
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_mod.c
@@ -0,0 +1,296 @@
1/* crypto/bn/bn_mod.c */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
58 * All rights reserved.
59 *
60 * This package is an SSL implementation written
61 * by Eric Young (eay@cryptsoft.com).
62 * The implementation was written so as to conform with Netscapes SSL.
63 *
64 * This library is free for commercial and non-commercial use as long as
65 * the following conditions are aheared to. The following conditions
66 * apply to all code found in this distribution, be it the RC4, RSA,
67 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
68 * included with this distribution is covered by the same copyright terms
69 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
70 *
71 * Copyright remains Eric Young's, and as such any Copyright notices in
72 * the code are not to be removed.
73 * If this package is used in a product, Eric Young should be given attribution
74 * as the author of the parts of the library used.
75 * This can be in the form of a textual message at program startup or
76 * in documentation (online or textual) provided with the package.
77 *
78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions
80 * are met:
81 * 1. Redistributions of source code must retain the copyright
82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement:
88 * "This product includes cryptographic software written by
89 * Eric Young (eay@cryptsoft.com)"
90 * The word 'cryptographic' can be left out if the rouines from the library
91 * being used are not cryptographic related :-).
92 * 4. If you include any Windows specific code (or a derivative thereof) from
93 * the apps directory (application code) you must include an acknowledgement:
94 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
95 *
96 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
97 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
98 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
99 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
100 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
101 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
102 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
103 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
104 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
105 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
106 * SUCH DAMAGE.
107 *
108 * The licence and distribution terms for any publically available version or
109 * derivative of this code cannot be changed. i.e. this code cannot simply be
110 * copied and put under another distribution licence
111 * [including the GNU Public Licence.]
112 */
113
114#include "cryptlib.h"
115#include "bn_lcl.h"
116
117
118#if 0 /* now just a #define */
119int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
120 {
121 return(BN_div(NULL,rem,m,d,ctx));
122 /* note that rem->neg == m->neg (unless the remainder is zero) */
123 }
124#endif
125
126
127int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
128 {
129 /* like BN_mod, but returns non-negative remainder
130 * (i.e., 0 <= r < |d| always holds) */
131
132 if (!(BN_mod(r,m,d,ctx)))
133 return 0;
134 if (!r->neg)
135 return 1;
136 /* now -|d| < r < 0, so we have to set r := r + |d| */
137 return (d->neg ? BN_sub : BN_add)(r, r, d);
138}
139
140
141int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
142 {
143 if (!BN_add(r, a, b)) return 0;
144 return BN_nnmod(r, r, m, ctx);
145 }
146
147
148/* BN_mod_add variant that may be used if both a and b are non-negative
149 * and less than m */
150int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
151 {
152 if (!BN_add(r, a, b)) return 0;
153 if (BN_ucmp(r, m) >= 0)
154 return BN_usub(r, r, m);
155 return 1;
156 }
157
158
159int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
160 {
161 if (!BN_sub(r, a, b)) return 0;
162 return BN_nnmod(r, r, m, ctx);
163 }
164
165
166/* BN_mod_sub variant that may be used if both a and b are non-negative
167 * and less than m */
168int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
169 {
170 if (!BN_sub(r, a, b)) return 0;
171 if (r->neg)
172 return BN_add(r, r, m);
173 return 1;
174 }
175
176
177/* slow but works */
178int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
179 BN_CTX *ctx)
180 {
181 BIGNUM *t;
182 int ret=0;
183
184 bn_check_top(a);
185 bn_check_top(b);
186 bn_check_top(m);
187
188 BN_CTX_start(ctx);
189 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
190 if (a == b)
191 { if (!BN_sqr(t,a,ctx)) goto err; }
192 else
193 { if (!BN_mul(t,a,b,ctx)) goto err; }
194 if (!BN_nnmod(r,t,m,ctx)) goto err;
195 ret=1;
196err:
197 BN_CTX_end(ctx);
198 return(ret);
199 }
200
201
202int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
203 {
204 if (!BN_sqr(r, a, ctx)) return 0;
205 /* r->neg == 0, thus we don't need BN_nnmod */
206 return BN_mod(r, r, m, ctx);
207 }
208
209
210int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
211 {
212 if (!BN_lshift1(r, a)) return 0;
213 return BN_nnmod(r, r, m, ctx);
214 }
215
216
217/* BN_mod_lshift1 variant that may be used if a is non-negative
218 * and less than m */
219int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
220 {
221 if (!BN_lshift1(r, a)) return 0;
222 if (BN_cmp(r, m) >= 0)
223 return BN_sub(r, r, m);
224 return 1;
225 }
226
227
228int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
229 {
230 BIGNUM *abs_m = NULL;
231 int ret;
232
233 if (!BN_nnmod(r, a, m, ctx)) return 0;
234
235 if (m->neg)
236 {
237 abs_m = BN_dup(m);
238 if (abs_m == NULL) return 0;
239 abs_m->neg = 0;
240 }
241
242 ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
243
244 if (abs_m)
245 BN_free(abs_m);
246 return ret;
247 }
248
249
250/* BN_mod_lshift variant that may be used if a is non-negative
251 * and less than m */
252int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
253 {
254 if (r != a)
255 {
256 if (BN_copy(r, a) == NULL) return 0;
257 }
258
259 while (n > 0)
260 {
261 int max_shift;
262
263 /* 0 < r < m */
264 max_shift = BN_num_bits(m) - BN_num_bits(r);
265 /* max_shift >= 0 */
266
267 if (max_shift < 0)
268 {
269 BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
270 return 0;
271 }
272
273 if (max_shift > n)
274 max_shift = n;
275
276 if (max_shift)
277 {
278 if (!BN_lshift(r, r, max_shift)) return 0;
279 n -= max_shift;
280 }
281 else
282 {
283 if (!BN_lshift1(r, r)) return 0;
284 --n;
285 }
286
287 /* BN_num_bits(r) <= BN_num_bits(m) */
288
289 if (BN_cmp(r, m) >= 0)
290 {
291 if (!BN_sub(r, r, m)) return 0;
292 }
293 }
294
295 return 1;
296 }
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index 8cf1febacc..82942a4759 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -69,20 +69,17 @@
69 69
70#define MONT_WORD /* use the faster word-based algorithm */ 70#define MONT_WORD /* use the faster word-based algorithm */
71 71
72int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b, 72int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
73 BN_MONT_CTX *mont, BN_CTX *ctx) 73 BN_MONT_CTX *mont, BN_CTX *ctx)
74 { 74 {
75 BIGNUM *tmp,*tmp2; 75 BIGNUM *tmp;
76 int ret=0; 76 int ret=0;
77 77
78 BN_CTX_start(ctx); 78 BN_CTX_start(ctx);
79 tmp = BN_CTX_get(ctx); 79 tmp = BN_CTX_get(ctx);
80 tmp2 = BN_CTX_get(ctx); 80 if (tmp == NULL) goto err;
81 if (tmp == NULL || tmp2 == NULL) goto err;
82 81
83 bn_check_top(tmp); 82 bn_check_top(tmp);
84 bn_check_top(tmp2);
85
86 if (a == b) 83 if (a == b)
87 { 84 {
88 if (!BN_sqr(tmp,a,ctx)) goto err; 85 if (!BN_sqr(tmp,a,ctx)) goto err;
@@ -99,7 +96,7 @@ err:
99 return(ret); 96 return(ret);
100 } 97 }
101 98
102int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont, 99int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
103 BN_CTX *ctx) 100 BN_CTX *ctx)
104 { 101 {
105 int retn=0; 102 int retn=0;
@@ -144,7 +141,7 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont,
144 n0=mont->n0; 141 n0=mont->n0;
145 142
146#ifdef BN_COUNT 143#ifdef BN_COUNT
147 printf("word BN_from_montgomery %d * %d\n",nl,nl); 144 fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl);
148#endif 145#endif
149 for (i=0; i<nl; i++) 146 for (i=0; i<nl; i++)
150 { 147 {
@@ -229,7 +226,7 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont,
229 226
230 if (BN_ucmp(ret, &(mont->N)) >= 0) 227 if (BN_ucmp(ret, &(mont->N)) >= 0)
231 { 228 {
232 BN_usub(ret,ret,&(mont->N)); 229 if (!BN_usub(ret,ret,&(mont->N))) goto err;
233 } 230 }
234 retn=1; 231 retn=1;
235 err: 232 err:
@@ -277,6 +274,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
277 BN_init(&Ri); 274 BN_init(&Ri);
278 R= &(mont->RR); /* grab RR as a temp */ 275 R= &(mont->RR); /* grab RR as a temp */
279 BN_copy(&(mont->N),mod); /* Set N */ 276 BN_copy(&(mont->N),mod); /* Set N */
277 mont->N.neg = 0;
280 278
281#ifdef MONT_WORD 279#ifdef MONT_WORD
282 { 280 {
@@ -292,40 +290,45 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
292 tmod.d=buf; 290 tmod.d=buf;
293 tmod.top=1; 291 tmod.top=1;
294 tmod.dmax=2; 292 tmod.dmax=2;
295 tmod.neg=mod->neg; 293 tmod.neg=0;
296 /* Ri = R^-1 mod N*/ 294 /* Ri = R^-1 mod N*/
297 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) 295 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
298 goto err; 296 goto err;
299 BN_lshift(&Ri,&Ri,BN_BITS2); /* R*Ri */ 297 if (!BN_lshift(&Ri,&Ri,BN_BITS2)) goto err; /* R*Ri */
300 if (!BN_is_zero(&Ri)) 298 if (!BN_is_zero(&Ri))
301 BN_sub_word(&Ri,1); 299 {
300 if (!BN_sub_word(&Ri,1)) goto err;
301 }
302 else /* if N mod word size == 1 */ 302 else /* if N mod word size == 1 */
303 BN_set_word(&Ri,BN_MASK2); /* Ri-- (mod word size) */ 303 {
304 BN_div(&Ri,NULL,&Ri,&tmod,ctx); /* Ni = (R*Ri-1)/N, 304 if (!BN_set_word(&Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
305 * keep only least significant word: */ 305 }
306 mont->n0=Ri.d[0]; 306 if (!BN_div(&Ri,NULL,&Ri,&tmod,ctx)) goto err;
307 /* Ni = (R*Ri-1)/N,
308 * keep only least significant word: */
309 mont->n0 = (Ri.top > 0) ? Ri.d[0] : 0;
307 BN_free(&Ri); 310 BN_free(&Ri);
308 } 311 }
309#else /* !MONT_WORD */ 312#else /* !MONT_WORD */
310 { /* bignum version */ 313 { /* bignum version */
311 mont->ri=BN_num_bits(mod); 314 mont->ri=BN_num_bits(&mont->N);
312 BN_zero(R); 315 if (!BN_zero(R)) goto err;
313 BN_set_bit(R,mont->ri); /* R = 2^ri */ 316 if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */
314 /* Ri = R^-1 mod N*/ 317 /* Ri = R^-1 mod N*/
315 if ((BN_mod_inverse(&Ri,R,mod,ctx)) == NULL) 318 if ((BN_mod_inverse(&Ri,R,&mont->N,ctx)) == NULL)
316 goto err; 319 goto err;
317 BN_lshift(&Ri,&Ri,mont->ri); /* R*Ri */ 320 if (!BN_lshift(&Ri,&Ri,mont->ri)) goto err; /* R*Ri */
318 BN_sub_word(&Ri,1); 321 if (!BN_sub_word(&Ri,1)) goto err;
319 /* Ni = (R*Ri-1) / N */ 322 /* Ni = (R*Ri-1) / N */
320 BN_div(&(mont->Ni),NULL,&Ri,mod,ctx); 323 if (!BN_div(&(mont->Ni),NULL,&Ri,&mont->N,ctx)) goto err;
321 BN_free(&Ri); 324 BN_free(&Ri);
322 } 325 }
323#endif 326#endif
324 327
325 /* setup RR for conversions */ 328 /* setup RR for conversions */
326 BN_zero(&(mont->RR)); 329 if (!BN_zero(&(mont->RR))) goto err;
327 BN_set_bit(&(mont->RR),mont->ri*2); 330 if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
328 BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx); 331 if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
329 332
330 return(1); 333 return(1);
331err: 334err:
@@ -336,9 +339,9 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
336 { 339 {
337 if (to == from) return(to); 340 if (to == from) return(to);
338 341
339 BN_copy(&(to->RR),&(from->RR)); 342 if (!BN_copy(&(to->RR),&(from->RR))) return NULL;
340 BN_copy(&(to->N),&(from->N)); 343 if (!BN_copy(&(to->N),&(from->N))) return NULL;
341 BN_copy(&(to->Ni),&(from->Ni)); 344 if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
342 to->ri=from->ri; 345 to->ri=from->ri;
343 to->n0=from->n0; 346 to->n0=from->n0;
344 return(to); 347 return(to);
diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c
index 80e1dca6b7..05fa9d1e9a 100644
--- a/src/lib/libcrypto/bn/bn_mpi.c
+++ b/src/lib/libcrypto/bn/bn_mpi.c
@@ -88,7 +88,7 @@ int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
88 return(num+4+ext); 88 return(num+4+ext);
89 } 89 }
90 90
91BIGNUM *BN_mpi2bn(unsigned char *d, int n, BIGNUM *a) 91BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a)
92 { 92 {
93 long len; 93 long len;
94 int neg=0; 94 int neg=0;
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index 3e8d8b9567..41ea925b8d 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -56,10 +56,325 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58 58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
59#include <stdio.h> 64#include <stdio.h>
65#include <assert.h>
60#include "cryptlib.h" 66#include "cryptlib.h"
61#include "bn_lcl.h" 67#include "bn_lcl.h"
62 68
69#if defined(OPENSSL_NO_ASM) || !(defined(__i386) || defined(__i386__))/* Assembler implementation exists only for x86 */
70/* Here follows specialised variants of bn_add_words() and
71 bn_sub_words(). They have the property performing operations on
72 arrays of different sizes. The sizes of those arrays is expressed through
73 cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74 which is the delta between the two lengths, calculated as len(a)-len(b).
75 All lengths are the number of BN_ULONGs... For the operations that require
76 a result array as parameter, it must have the length cl+abs(dl).
77 These functions should probably end up in bn_asm.c as soon as there are
78 assembler counterparts for the systems that use assembler files. */
79
80BN_ULONG bn_sub_part_words(BN_ULONG *r,
81 const BN_ULONG *a, const BN_ULONG *b,
82 int cl, int dl)
83 {
84 BN_ULONG c, t;
85
86 assert(cl >= 0);
87 c = bn_sub_words(r, a, b, cl);
88
89 if (dl == 0)
90 return c;
91
92 r += cl;
93 a += cl;
94 b += cl;
95
96 if (dl < 0)
97 {
98#ifdef BN_COUNT
99 fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100#endif
101 for (;;)
102 {
103 t = b[0];
104 r[0] = (0-t-c)&BN_MASK2;
105 if (t != 0) c=1;
106 if (++dl >= 0) break;
107
108 t = b[1];
109 r[1] = (0-t-c)&BN_MASK2;
110 if (t != 0) c=1;
111 if (++dl >= 0) break;
112
113 t = b[2];
114 r[2] = (0-t-c)&BN_MASK2;
115 if (t != 0) c=1;
116 if (++dl >= 0) break;
117
118 t = b[3];
119 r[3] = (0-t-c)&BN_MASK2;
120 if (t != 0) c=1;
121 if (++dl >= 0) break;
122
123 b += 4;
124 r += 4;
125 }
126 }
127 else
128 {
129 int save_dl = dl;
130#ifdef BN_COUNT
131 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132#endif
133 while(c)
134 {
135 t = a[0];
136 r[0] = (t-c)&BN_MASK2;
137 if (t != 0) c=0;
138 if (--dl <= 0) break;
139
140 t = a[1];
141 r[1] = (t-c)&BN_MASK2;
142 if (t != 0) c=0;
143 if (--dl <= 0) break;
144
145 t = a[2];
146 r[2] = (t-c)&BN_MASK2;
147 if (t != 0) c=0;
148 if (--dl <= 0) break;
149
150 t = a[3];
151 r[3] = (t-c)&BN_MASK2;
152 if (t != 0) c=0;
153 if (--dl <= 0) break;
154
155 save_dl = dl;
156 a += 4;
157 r += 4;
158 }
159 if (dl > 0)
160 {
161#ifdef BN_COUNT
162 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163#endif
164 if (save_dl > dl)
165 {
166 switch (save_dl - dl)
167 {
168 case 1:
169 r[1] = a[1];
170 if (--dl <= 0) break;
171 case 2:
172 r[2] = a[2];
173 if (--dl <= 0) break;
174 case 3:
175 r[3] = a[3];
176 if (--dl <= 0) break;
177 }
178 a += 4;
179 r += 4;
180 }
181 }
182 if (dl > 0)
183 {
184#ifdef BN_COUNT
185 fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186#endif
187 for(;;)
188 {
189 r[0] = a[0];
190 if (--dl <= 0) break;
191 r[1] = a[1];
192 if (--dl <= 0) break;
193 r[2] = a[2];
194 if (--dl <= 0) break;
195 r[3] = a[3];
196 if (--dl <= 0) break;
197
198 a += 4;
199 r += 4;
200 }
201 }
202 }
203 return c;
204 }
205#endif
206
207BN_ULONG bn_add_part_words(BN_ULONG *r,
208 const BN_ULONG *a, const BN_ULONG *b,
209 int cl, int dl)
210 {
211 BN_ULONG c, l, t;
212
213 assert(cl >= 0);
214 c = bn_add_words(r, a, b, cl);
215
216 if (dl == 0)
217 return c;
218
219 r += cl;
220 a += cl;
221 b += cl;
222
223 if (dl < 0)
224 {
225 int save_dl = dl;
226#ifdef BN_COUNT
227 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228#endif
229 while (c)
230 {
231 l=(c+b[0])&BN_MASK2;
232 c=(l < c);
233 r[0]=l;
234 if (++dl >= 0) break;
235
236 l=(c+b[1])&BN_MASK2;
237 c=(l < c);
238 r[1]=l;
239 if (++dl >= 0) break;
240
241 l=(c+b[2])&BN_MASK2;
242 c=(l < c);
243 r[2]=l;
244 if (++dl >= 0) break;
245
246 l=(c+b[3])&BN_MASK2;
247 c=(l < c);
248 r[3]=l;
249 if (++dl >= 0) break;
250
251 save_dl = dl;
252 b+=4;
253 r+=4;
254 }
255 if (dl < 0)
256 {
257#ifdef BN_COUNT
258 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259#endif
260 if (save_dl < dl)
261 {
262 switch (dl - save_dl)
263 {
264 case 1:
265 r[1] = b[1];
266 if (++dl >= 0) break;
267 case 2:
268 r[2] = b[2];
269 if (++dl >= 0) break;
270 case 3:
271 r[3] = b[3];
272 if (++dl >= 0) break;
273 }
274 b += 4;
275 r += 4;
276 }
277 }
278 if (dl < 0)
279 {
280#ifdef BN_COUNT
281 fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282#endif
283 for(;;)
284 {
285 r[0] = b[0];
286 if (++dl >= 0) break;
287 r[1] = b[1];
288 if (++dl >= 0) break;
289 r[2] = b[2];
290 if (++dl >= 0) break;
291 r[3] = b[3];
292 if (++dl >= 0) break;
293
294 b += 4;
295 r += 4;
296 }
297 }
298 }
299 else
300 {
301 int save_dl = dl;
302#ifdef BN_COUNT
303 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304#endif
305 while (c)
306 {
307 t=(a[0]+c)&BN_MASK2;
308 c=(t < c);
309 r[0]=t;
310 if (--dl <= 0) break;
311
312 t=(a[1]+c)&BN_MASK2;
313 c=(t < c);
314 r[1]=t;
315 if (--dl <= 0) break;
316
317 t=(a[2]+c)&BN_MASK2;
318 c=(t < c);
319 r[2]=t;
320 if (--dl <= 0) break;
321
322 t=(a[3]+c)&BN_MASK2;
323 c=(t < c);
324 r[3]=t;
325 if (--dl <= 0) break;
326
327 save_dl = dl;
328 a+=4;
329 r+=4;
330 }
331#ifdef BN_COUNT
332 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333#endif
334 if (dl > 0)
335 {
336 if (save_dl > dl)
337 {
338 switch (save_dl - dl)
339 {
340 case 1:
341 r[1] = a[1];
342 if (--dl <= 0) break;
343 case 2:
344 r[2] = a[2];
345 if (--dl <= 0) break;
346 case 3:
347 r[3] = a[3];
348 if (--dl <= 0) break;
349 }
350 a += 4;
351 r += 4;
352 }
353 }
354 if (dl > 0)
355 {
356#ifdef BN_COUNT
357 fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358#endif
359 for(;;)
360 {
361 r[0] = a[0];
362 if (--dl <= 0) break;
363 r[1] = a[1];
364 if (--dl <= 0) break;
365 r[2] = a[2];
366 if (--dl <= 0) break;
367 r[3] = a[3];
368 if (--dl <= 0) break;
369
370 a += 4;
371 r += 4;
372 }
373 }
374 }
375 return c;
376 }
377
63#ifdef BN_RECURSION 378#ifdef BN_RECURSION
64/* Karatsuba recursive multiplication algorithm 379/* Karatsuba recursive multiplication algorithm
65 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ 380 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
@@ -75,14 +390,15 @@
75 * a[1]*b[1] 390 * a[1]*b[1]
76 */ 391 */
77void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, 392void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
78 BN_ULONG *t) 393 int dna, int dnb, BN_ULONG *t)
79 { 394 {
80 int n=n2/2,c1,c2; 395 int n=n2/2,c1,c2;
396 int tna=n+dna, tnb=n+dnb;
81 unsigned int neg,zero; 397 unsigned int neg,zero;
82 BN_ULONG ln,lo,*p; 398 BN_ULONG ln,lo,*p;
83 399
84# ifdef BN_COUNT 400# ifdef BN_COUNT
85 printf(" bn_mul_recursive %d * %d\n",n2,n2); 401 fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2);
86# endif 402# endif
87# ifdef BN_MUL_COMBA 403# ifdef BN_MUL_COMBA
88# if 0 404# if 0
@@ -105,21 +421,21 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
105 return; 421 return;
106 } 422 }
107 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 423 /* r=(a[0]-a[1])*(b[1]-b[0]) */
108 c1=bn_cmp_words(a,&(a[n]),n); 424 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
109 c2=bn_cmp_words(&(b[n]),b,n); 425 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
110 zero=neg=0; 426 zero=neg=0;
111 switch (c1*3+c2) 427 switch (c1*3+c2)
112 { 428 {
113 case -4: 429 case -4:
114 bn_sub_words(t, &(a[n]),a, n); /* - */ 430 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
115 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ 431 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
116 break; 432 break;
117 case -3: 433 case -3:
118 zero=1; 434 zero=1;
119 break; 435 break;
120 case -2: 436 case -2:
121 bn_sub_words(t, &(a[n]),a, n); /* - */ 437 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
122 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ 438 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
123 neg=1; 439 neg=1;
124 break; 440 break;
125 case -1: 441 case -1:
@@ -128,21 +444,22 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
128 zero=1; 444 zero=1;
129 break; 445 break;
130 case 2: 446 case 2:
131 bn_sub_words(t, a, &(a[n]),n); /* + */ 447 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
132 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ 448 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
133 neg=1; 449 neg=1;
134 break; 450 break;
135 case 3: 451 case 3:
136 zero=1; 452 zero=1;
137 break; 453 break;
138 case 4: 454 case 4:
139 bn_sub_words(t, a, &(a[n]),n); 455 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
140 bn_sub_words(&(t[n]),&(b[n]),b, n); 456 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
141 break; 457 break;
142 } 458 }
143 459
144# ifdef BN_MUL_COMBA 460# ifdef BN_MUL_COMBA
145 if (n == 4) 461 if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
462 extra args to do this well */
146 { 463 {
147 if (!zero) 464 if (!zero)
148 bn_mul_comba4(&(t[n2]),t,&(t[n])); 465 bn_mul_comba4(&(t[n2]),t,&(t[n]));
@@ -152,7 +469,9 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
152 bn_mul_comba4(r,a,b); 469 bn_mul_comba4(r,a,b);
153 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); 470 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
154 } 471 }
155 else if (n == 8) 472 else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
473 take extra args to do this
474 well */
156 { 475 {
157 if (!zero) 476 if (!zero)
158 bn_mul_comba8(&(t[n2]),t,&(t[n])); 477 bn_mul_comba8(&(t[n2]),t,&(t[n]));
@@ -167,11 +486,11 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
167 { 486 {
168 p= &(t[n2*2]); 487 p= &(t[n2*2]);
169 if (!zero) 488 if (!zero)
170 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); 489 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
171 else 490 else
172 memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); 491 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
173 bn_mul_recursive(r,a,b,n,p); 492 bn_mul_recursive(r,a,b,n,0,0,p);
174 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); 493 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
175 } 494 }
176 495
177 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 496 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
@@ -220,39 +539,39 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
220 539
221/* n+tn is the word length 540/* n+tn is the word length
222 * t needs to be n*4 is size, as does r */ 541 * t needs to be n*4 is size, as does r */
223void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, 542void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
224 int n, BN_ULONG *t) 543 int tna, int tnb, BN_ULONG *t)
225 { 544 {
226 int i,j,n2=n*2; 545 int i,j,n2=n*2;
227 unsigned int c1,c2,neg,zero; 546 unsigned int c1,c2,neg,zero;
228 BN_ULONG ln,lo,*p; 547 BN_ULONG ln,lo,*p;
229 548
230# ifdef BN_COUNT 549# ifdef BN_COUNT
231 printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); 550 fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n",
551 tna, n, tnb, n);
232# endif 552# endif
233 if (n < 8) 553 if (n < 8)
234 { 554 {
235 i=tn+n; 555 bn_mul_normal(r,a,n+tna,b,n+tnb);
236 bn_mul_normal(r,a,i,b,i);
237 return; 556 return;
238 } 557 }
239 558
240 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 559 /* r=(a[0]-a[1])*(b[1]-b[0]) */
241 c1=bn_cmp_words(a,&(a[n]),n); 560 c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
242 c2=bn_cmp_words(&(b[n]),b,n); 561 c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
243 zero=neg=0; 562 zero=neg=0;
244 switch (c1*3+c2) 563 switch (c1*3+c2)
245 { 564 {
246 case -4: 565 case -4:
247 bn_sub_words(t, &(a[n]),a, n); /* - */ 566 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
248 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ 567 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
249 break; 568 break;
250 case -3: 569 case -3:
251 zero=1; 570 zero=1;
252 /* break; */ 571 /* break; */
253 case -2: 572 case -2:
254 bn_sub_words(t, &(a[n]),a, n); /* - */ 573 bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */
255 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ 574 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */
256 neg=1; 575 neg=1;
257 break; 576 break;
258 case -1: 577 case -1:
@@ -261,16 +580,16 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
261 zero=1; 580 zero=1;
262 /* break; */ 581 /* break; */
263 case 2: 582 case 2:
264 bn_sub_words(t, a, &(a[n]),n); /* + */ 583 bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */
265 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ 584 bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */
266 neg=1; 585 neg=1;
267 break; 586 break;
268 case 3: 587 case 3:
269 zero=1; 588 zero=1;
270 /* break; */ 589 /* break; */
271 case 4: 590 case 4:
272 bn_sub_words(t, a, &(a[n]),n); 591 bn_sub_part_words(t, a, &(a[n]),tna,n-tna);
273 bn_sub_words(&(t[n]),&(b[n]),b, n); 592 bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n);
274 break; 593 break;
275 } 594 }
276 /* The zero case isn't yet implemented here. The speedup 595 /* The zero case isn't yet implemented here. The speedup
@@ -289,54 +608,59 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
289 { 608 {
290 bn_mul_comba8(&(t[n2]),t,&(t[n])); 609 bn_mul_comba8(&(t[n2]),t,&(t[n]));
291 bn_mul_comba8(r,a,b); 610 bn_mul_comba8(r,a,b);
292 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); 611 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
293 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); 612 memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
294 } 613 }
295 else 614 else
296 { 615 {
297 p= &(t[n2*2]); 616 p= &(t[n2*2]);
298 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); 617 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
299 bn_mul_recursive(r,a,b,n,p); 618 bn_mul_recursive(r,a,b,n,0,0,p);
300 i=n/2; 619 i=n/2;
301 /* If there is only a bottom half to the number, 620 /* If there is only a bottom half to the number,
302 * just do it */ 621 * just do it */
303 j=tn-i; 622 if (tna > tnb)
623 j = tna - i;
624 else
625 j = tnb - i;
304 if (j == 0) 626 if (j == 0)
305 { 627 {
306 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); 628 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
629 i,tna-i,tnb-i,p);
307 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); 630 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
308 } 631 }
309 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ 632 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
310 { 633 {
311 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), 634 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
312 j,i,p); 635 i,tna-i,tnb-i,p);
313 memset(&(r[n2+tn*2]),0, 636 memset(&(r[n2+tna+tnb]),0,
314 sizeof(BN_ULONG)*(n2-tn*2)); 637 sizeof(BN_ULONG)*(n2-tna-tnb));
315 } 638 }
316 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ 639 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
317 { 640 {
318 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); 641 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
319 if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL) 642 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
643 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
320 { 644 {
321 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); 645 bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
322 } 646 }
323 else 647 else
324 { 648 {
325 for (;;) 649 for (;;)
326 { 650 {
327 i/=2; 651 i/=2;
328 if (i < tn) 652 if (i < tna && i < tnb)
329 { 653 {
330 bn_mul_part_recursive(&(r[n2]), 654 bn_mul_part_recursive(&(r[n2]),
331 &(a[n]),&(b[n]), 655 &(a[n]),&(b[n]),
332 tn-i,i,p); 656 i,tna-i,tnb-i,p);
333 break; 657 break;
334 } 658 }
335 else if (i == tn) 659 else if (i <= tna && i <= tnb)
336 { 660 {
337 bn_mul_recursive(&(r[n2]), 661 bn_mul_recursive(&(r[n2]),
338 &(a[n]),&(b[n]), 662 &(a[n]),&(b[n]),
339 i,p); 663 i,tna-i,tnb-i,p);
340 break; 664 break;
341 } 665 }
342 } 666 }
@@ -397,10 +721,10 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
397 int n=n2/2; 721 int n=n2/2;
398 722
399# ifdef BN_COUNT 723# ifdef BN_COUNT
400 printf(" bn_mul_low_recursive %d * %d\n",n2,n2); 724 fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
401# endif 725# endif
402 726
403 bn_mul_recursive(r,a,b,n,&(t[0])); 727 bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
404 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) 728 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
405 { 729 {
406 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); 730 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
@@ -431,7 +755,7 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
431 BN_ULONG ll,lc,*lp,*mp; 755 BN_ULONG ll,lc,*lp,*mp;
432 756
433# ifdef BN_COUNT 757# ifdef BN_COUNT
434 printf(" bn_mul_high %d * %d\n",n2,n2); 758 fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
435# endif 759# endif
436 n=n2/2; 760 n=n2/2;
437 761
@@ -484,8 +808,8 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
484 else 808 else
485# endif 809# endif
486 { 810 {
487 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); 811 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
488 bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); 812 bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
489 } 813 }
490 814
491 /* s0 == low(al*bl) 815 /* s0 == low(al*bl)
@@ -608,21 +932,21 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
608 } 932 }
609#endif /* BN_RECURSION */ 933#endif /* BN_RECURSION */
610 934
611int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) 935int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
612 { 936 {
937 int ret=0;
613 int top,al,bl; 938 int top,al,bl;
614 BIGNUM *rr; 939 BIGNUM *rr;
615 int ret = 0;
616#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) 940#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
617 int i; 941 int i;
618#endif 942#endif
619#ifdef BN_RECURSION 943#ifdef BN_RECURSION
620 BIGNUM *t; 944 BIGNUM *t=NULL;
621 int j,k; 945 int j=0,k;
622#endif 946#endif
623 947
624#ifdef BN_COUNT 948#ifdef BN_COUNT
625 printf("BN_mul %d * %d\n",a->top,b->top); 949 fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
626#endif 950#endif
627 951
628 bn_check_top(a); 952 bn_check_top(a);
@@ -675,17 +999,55 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
675#ifdef BN_RECURSION 999#ifdef BN_RECURSION
676 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) 1000 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
677 { 1001 {
1002 if (i >= -1 && i <= 1)
1003 {
1004 int sav_j =0;
1005 /* Find out the power of two lower or equal
1006 to the longest of the two numbers */
1007 if (i >= 0)
1008 {
1009 j = BN_num_bits_word((BN_ULONG)al);
1010 }
1011 if (i == -1)
1012 {
1013 j = BN_num_bits_word((BN_ULONG)bl);
1014 }
1015 sav_j = j;
1016 j = 1<<(j-1);
1017 assert(j <= al || j <= bl);
1018 k = j+j;
1019 t = BN_CTX_get(ctx);
1020 if (al > j || bl > j)
1021 {
1022 bn_wexpand(t,k*4);
1023 bn_wexpand(rr,k*4);
1024 bn_mul_part_recursive(rr->d,a->d,b->d,
1025 j,al-j,bl-j,t->d);
1026 }
1027 else /* al <= j || bl <= j */
1028 {
1029 bn_wexpand(t,k*2);
1030 bn_wexpand(rr,k*2);
1031 bn_mul_recursive(rr->d,a->d,b->d,
1032 j,al-j,bl-j,t->d);
1033 }
1034 rr->top=top;
1035 goto end;
1036 }
1037#if 0
678 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA)) 1038 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
679 { 1039 {
680 bn_wexpand(b,al); 1040 BIGNUM *tmp_bn = (BIGNUM *)b;
681 b->d[bl]=0; 1041 bn_wexpand(tmp_bn,al);
1042 tmp_bn->d[bl]=0;
682 bl++; 1043 bl++;
683 i--; 1044 i--;
684 } 1045 }
685 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA)) 1046 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
686 { 1047 {
687 bn_wexpand(a,bl); 1048 BIGNUM *tmp_bn = (BIGNUM *)a;
688 a->d[al]=0; 1049 bn_wexpand(tmp_bn,bl);
1050 tmp_bn->d[al]=0;
689 al++; 1051 al++;
690 i++; 1052 i++;
691 } 1053 }
@@ -705,19 +1067,14 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
705 } 1067 }
706 else 1068 else
707 { 1069 {
708 bn_wexpand(a,k);
709 bn_wexpand(b,k);
710 bn_wexpand(t,k*4); 1070 bn_wexpand(t,k*4);
711 bn_wexpand(rr,k*4); 1071 bn_wexpand(rr,k*4);
712 for (i=a->top; i<k; i++)
713 a->d[i]=0;
714 for (i=b->top; i<k; i++)
715 b->d[i]=0;
716 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); 1072 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
717 } 1073 }
718 rr->top=top; 1074 rr->top=top;
719 goto end; 1075 goto end;
720 } 1076 }
1077#endif
721 } 1078 }
722#endif /* BN_RECURSION */ 1079#endif /* BN_RECURSION */
723 if (bn_wexpand(rr,top) == NULL) goto err; 1080 if (bn_wexpand(rr,top) == NULL) goto err;
@@ -740,7 +1097,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
740 BN_ULONG *rr; 1097 BN_ULONG *rr;
741 1098
742#ifdef BN_COUNT 1099#ifdef BN_COUNT
743 printf(" bn_mul_normal %d * %d\n",na,nb); 1100 fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
744#endif 1101#endif
745 1102
746 if (na < nb) 1103 if (na < nb)
@@ -753,7 +1110,13 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
753 1110
754 } 1111 }
755 rr= &(r[na]); 1112 rr= &(r[na]);
756 rr[0]=bn_mul_words(r,a,na,b[0]); 1113 if (nb <= 0)
1114 {
1115 (void)bn_mul_words(r,a,na,0);
1116 return;
1117 }
1118 else
1119 rr[0]=bn_mul_words(r,a,na,b[0]);
757 1120
758 for (;;) 1121 for (;;)
759 { 1122 {
@@ -774,7 +1137,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
774void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 1137void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
775 { 1138 {
776#ifdef BN_COUNT 1139#ifdef BN_COUNT
777 printf(" bn_mul_low_normal %d * %d\n",n,n); 1140 fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
778#endif 1141#endif
779 bn_mul_words(r,a,n,b[0]); 1142 bn_mul_words(r,a,n,b[0]);
780 1143
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
index a5f01b92eb..918b9237c6 100644
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ b/src/lib/libcrypto/bn/bn_prime.c
@@ -56,7 +56,7 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58/* ==================================================================== 58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. 59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 * 60 *
61 * Redistribution and use in source and binary forms, with or without 61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions 62 * modification, are permitted provided that the following conditions
@@ -125,12 +125,13 @@ static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
125 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont); 125 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
126static int probable_prime(BIGNUM *rnd, int bits); 126static int probable_prime(BIGNUM *rnd, int bits);
127static int probable_prime_dh(BIGNUM *rnd, int bits, 127static int probable_prime_dh(BIGNUM *rnd, int bits,
128 BIGNUM *add, BIGNUM *rem, BN_CTX *ctx); 128 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
129static int probable_prime_dh_safe(BIGNUM *rnd, int bits, 129static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
130 BIGNUM *add, BIGNUM *rem, BN_CTX *ctx); 130 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
131 131
132BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, BIGNUM *add, 132BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
133 BIGNUM *rem, void (*callback)(int,int,void *), void *cb_arg) 133 const BIGNUM *add, const BIGNUM *rem,
134 void (*callback)(int,int,void *), void *cb_arg)
134 { 135 {
135 BIGNUM *rnd=NULL; 136 BIGNUM *rnd=NULL;
136 BIGNUM t; 137 BIGNUM t;
@@ -225,12 +226,15 @@ int BN_is_prime_fasttest(const BIGNUM *a, int checks,
225 BN_MONT_CTX *mont = NULL; 226 BN_MONT_CTX *mont = NULL;
226 const BIGNUM *A = NULL; 227 const BIGNUM *A = NULL;
227 228
229 if (BN_cmp(a, BN_value_one()) <= 0)
230 return 0;
231
228 if (checks == BN_prime_checks) 232 if (checks == BN_prime_checks)
229 checks = BN_prime_checks_for_size(BN_num_bits(a)); 233 checks = BN_prime_checks_for_size(BN_num_bits(a));
230 234
231 /* first look for small factors */ 235 /* first look for small factors */
232 if (!BN_is_odd(a)) 236 if (!BN_is_odd(a))
233 return(0); 237 return 0;
234 if (do_trial_division) 238 if (do_trial_division)
235 { 239 {
236 for (i = 1; i < NUMPRIMES; i++) 240 for (i = 1; i < NUMPRIMES; i++)
@@ -289,11 +293,8 @@ int BN_is_prime_fasttest(const BIGNUM *a, int checks,
289 293
290 for (i = 0; i < checks; i++) 294 for (i = 0; i < checks; i++)
291 { 295 {
292 if (!BN_pseudo_rand(check, BN_num_bits(A1), 0, 0)) 296 if (!BN_pseudo_rand_range(check, A1))
293 goto err; 297 goto err;
294 if (BN_cmp(check, A1) >= 0)
295 if (!BN_sub(check, check, A1))
296 goto err;
297 if (!BN_add_word(check, 1)) 298 if (!BN_add_word(check, 1))
298 goto err; 299 goto err;
299 /* now 1 <= check < A */ 300 /* now 1 <= check < A */
@@ -376,8 +377,8 @@ again:
376 return(1); 377 return(1);
377 } 378 }
378 379
379static int probable_prime_dh(BIGNUM *rnd, int bits, BIGNUM *add, BIGNUM *rem, 380static int probable_prime_dh(BIGNUM *rnd, int bits,
380 BN_CTX *ctx) 381 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
381 { 382 {
382 int i,ret=0; 383 int i,ret=0;
383 BIGNUM *t1; 384 BIGNUM *t1;
@@ -413,8 +414,8 @@ err:
413 return(ret); 414 return(ret);
414 } 415 }
415 416
416static int probable_prime_dh_safe(BIGNUM *p, int bits, BIGNUM *padd, 417static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
417 BIGNUM *rem, BN_CTX *ctx) 418 const BIGNUM *rem, BN_CTX *ctx)
418 { 419 {
419 int i,ret=0; 420 int i,ret=0;
420 BIGNUM *t1,*qadd,*q; 421 BIGNUM *t1,*qadd,*q;
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
index 532e66bcc3..5f46b1826c 100644
--- a/src/lib/libcrypto/bn/bn_print.c
+++ b/src/lib/libcrypto/bn/bn_print.c
@@ -277,8 +277,8 @@ err:
277 return(0); 277 return(0);
278 } 278 }
279 279
280#ifndef NO_BIO 280#ifndef OPENSSL_NO_BIO
281#ifndef NO_FP_API 281#ifndef OPENSSL_NO_FP_API
282int BN_print_fp(FILE *fp, const BIGNUM *a) 282int BN_print_fp(FILE *fp, const BIGNUM *a)
283 { 283 {
284 BIO *b; 284 BIO *b;
@@ -321,7 +321,7 @@ end:
321#endif 321#endif
322 322
323#ifdef BN_DEBUG 323#ifdef BN_DEBUG
324void bn_dump1(FILE *o, const char *a, BN_ULONG *b,int n) 324void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n)
325 { 325 {
326 int i; 326 int i;
327 fprintf(o, "%s=", a); 327 fprintf(o, "%s=", a);
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
index acd0619921..9e08ccd22e 100644
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ b/src/lib/libcrypto/bn/bn_rand.c
@@ -55,6 +55,59 @@
55 * copied and put under another distribution licence 55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
58 111
59#include <stdio.h> 112#include <stdio.h>
60#include <time.h> 113#include <time.h>
@@ -171,9 +224,11 @@ int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
171 } 224 }
172#endif 225#endif
173 226
227
174/* random number r: 0 <= r < range */ 228/* random number r: 0 <= r < range */
175int BN_rand_range(BIGNUM *r, BIGNUM *range) 229static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range)
176 { 230 {
231 int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
177 int n; 232 int n;
178 233
179 if (range->neg || BN_is_zero(range)) 234 if (range->neg || BN_is_zero(range))
@@ -184,26 +239,19 @@ int BN_rand_range(BIGNUM *r, BIGNUM *range)
184 239
185 n = BN_num_bits(range); /* n > 0 */ 240 n = BN_num_bits(range); /* n > 0 */
186 241
242 /* BN_is_bit_set(range, n - 1) always holds */
243
187 if (n == 1) 244 if (n == 1)
188 { 245 {
189 if (!BN_zero(r)) return 0; 246 if (!BN_zero(r)) return 0;
190 } 247 }
191 else if (BN_is_bit_set(range, n - 2)) 248 else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3))
192 {
193 do
194 {
195 /* range = 11..._2, so each iteration succeeds with probability >= .75 */
196 if (!BN_rand(r, n, -1, 0)) return 0;
197 }
198 while (BN_cmp(r, range) >= 0);
199 }
200 else
201 { 249 {
202 /* range = 10..._2, 250 /* range = 100..._2,
203 * so 3*range (= 11..._2) is exactly one bit longer than range */ 251 * so 3*range (= 11..._2) is exactly one bit longer than range */
204 do 252 do
205 { 253 {
206 if (!BN_rand(r, n + 1, -1, 0)) return 0; 254 if (!bn_rand(r, n + 1, -1, 0)) return 0;
207 /* If r < 3*range, use r := r MOD range 255 /* If r < 3*range, use r := r MOD range
208 * (which is either r, r - range, or r - 2*range). 256 * (which is either r, r - range, or r - 2*range).
209 * Otherwise, iterate once more. 257 * Otherwise, iterate once more.
@@ -218,6 +266,26 @@ int BN_rand_range(BIGNUM *r, BIGNUM *range)
218 } 266 }
219 while (BN_cmp(r, range) >= 0); 267 while (BN_cmp(r, range) >= 0);
220 } 268 }
269 else
270 {
271 do
272 {
273 /* range = 11..._2 or range = 101..._2 */
274 if (!bn_rand(r, n, -1, 0)) return 0;
275 }
276 while (BN_cmp(r, range) >= 0);
277 }
221 278
222 return 1; 279 return 1;
223 } 280 }
281
282
283int BN_rand_range(BIGNUM *r, BIGNUM *range)
284 {
285 return bn_rand_range(0, r, range);
286 }
287
288int BN_pseudo_rand_range(BIGNUM *r, BIGNUM *range)
289 {
290 return bn_rand_range(1, r, range);
291 }
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
index d019941d6b..ef5fdd4708 100644
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ b/src/lib/libcrypto/bn/bn_recp.c
@@ -93,18 +93,19 @@ void BN_RECP_CTX_free(BN_RECP_CTX *recp)
93 93
94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) 94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
95 { 95 {
96 BN_copy(&(recp->N),d); 96 if (!BN_copy(&(recp->N),d)) return 0;
97 BN_zero(&(recp->Nr)); 97 if (!BN_zero(&(recp->Nr))) return 0;
98 recp->num_bits=BN_num_bits(d); 98 recp->num_bits=BN_num_bits(d);
99 recp->shift=0; 99 recp->shift=0;
100 return(1); 100 return(1);
101 } 101 }
102 102
103int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BN_RECP_CTX *recp, 103int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
104 BN_CTX *ctx) 104 BN_RECP_CTX *recp, BN_CTX *ctx)
105 { 105 {
106 int ret=0; 106 int ret=0;
107 BIGNUM *a; 107 BIGNUM *a;
108 const BIGNUM *ca;
108 109
109 BN_CTX_start(ctx); 110 BN_CTX_start(ctx);
110 if ((a = BN_CTX_get(ctx)) == NULL) goto err; 111 if ((a = BN_CTX_get(ctx)) == NULL) goto err;
@@ -114,19 +115,19 @@ int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BN_RECP_CTX *recp,
114 { if (!BN_sqr(a,x,ctx)) goto err; } 115 { if (!BN_sqr(a,x,ctx)) goto err; }
115 else 116 else
116 { if (!BN_mul(a,x,y,ctx)) goto err; } 117 { if (!BN_mul(a,x,y,ctx)) goto err; }
118 ca = a;
117 } 119 }
118 else 120 else
119 a=x; /* Just do the mod */ 121 ca=x; /* Just do the mod */
120 122
121 BN_div_recp(NULL,r,a,recp,ctx); 123 ret = BN_div_recp(NULL,r,ca,recp,ctx);
122 ret=1;
123err: 124err:
124 BN_CTX_end(ctx); 125 BN_CTX_end(ctx);
125 return(ret); 126 return(ret);
126 } 127 }
127 128
128int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BN_RECP_CTX *recp, 129int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
129 BN_CTX *ctx) 130 BN_RECP_CTX *recp, BN_CTX *ctx)
130 { 131 {
131 int i,j,ret=0; 132 int i,j,ret=0;
132 BIGNUM *a,*b,*d,*r; 133 BIGNUM *a,*b,*d,*r;
@@ -146,8 +147,8 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BN_RECP_CTX *recp,
146 147
147 if (BN_ucmp(m,&(recp->N)) < 0) 148 if (BN_ucmp(m,&(recp->N)) < 0)
148 { 149 {
149 BN_zero(d); 150 if (!BN_zero(d)) return 0;
150 BN_copy(r,m); 151 if (!BN_copy(r,m)) return 0;
151 BN_CTX_end(ctx); 152 BN_CTX_end(ctx);
152 return(1); 153 return(1);
153 } 154 }
@@ -157,20 +158,28 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BN_RECP_CTX *recp,
157 * we need multiply ABCDEF by 3 digests of the reciprocal of ab 158 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
158 * 159 *
159 */ 160 */
160 i=BN_num_bits(m);
161 161
162 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
163 i=BN_num_bits(m);
162 j=recp->num_bits<<1; 164 j=recp->num_bits<<1;
163 if (j>i) i=j; 165 if (j>i) i=j;
164 j>>=1;
165 166
167 /* Nr := round(2^i / N) */
166 if (i != recp->shift) 168 if (i != recp->shift)
167 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N), 169 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
168 i,ctx); 170 i,ctx); /* BN_reciprocal returns i, or -1 for an error */
171 if (recp->shift == -1) goto err;
169 172
170 if (!BN_rshift(a,m,j)) goto err; 173 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
174 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
175 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
176 * = |m/N|
177 */
178 if (!BN_rshift(a,m,recp->num_bits)) goto err;
171 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err; 179 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
172 if (!BN_rshift(d,b,i-j)) goto err; 180 if (!BN_rshift(d,b,i-recp->num_bits)) goto err;
173 d->neg=0; 181 d->neg=0;
182
174 if (!BN_mul(b,&(recp->N),d,ctx)) goto err; 183 if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
175 if (!BN_usub(r,m,b)) goto err; 184 if (!BN_usub(r,m,b)) goto err;
176 r->neg=0; 185 r->neg=0;
@@ -201,20 +210,21 @@ err:
201 * We actually calculate with an extra word of precision, so 210 * We actually calculate with an extra word of precision, so
202 * we can do faster division if the remainder is not required. 211 * we can do faster division if the remainder is not required.
203 */ 212 */
204int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx) 213/* r := 2^len / m */
214int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
205 { 215 {
206 int ret= -1; 216 int ret= -1;
207 BIGNUM t; 217 BIGNUM t;
208 218
209 BN_init(&t); 219 BN_init(&t);
210 220
211 BN_zero(&t); 221 if (!BN_zero(&t)) goto err;
212 if (!BN_set_bit(&t,len)) goto err; 222 if (!BN_set_bit(&t,len)) goto err;
213 223
214 if (!BN_div(r,NULL,&t,m,ctx)) goto err; 224 if (!BN_div(r,NULL,&t,m,ctx)) goto err;
225
215 ret=len; 226 ret=len;
216err: 227err:
217 BN_free(&t); 228 BN_free(&t);
218 return(ret); 229 return(ret);
219 } 230 }
220
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
index c2608f9f4a..70f785ea18 100644
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ b/src/lib/libcrypto/bn/bn_shift.c
@@ -60,7 +60,7 @@
60#include "cryptlib.h" 60#include "cryptlib.h"
61#include "bn_lcl.h" 61#include "bn_lcl.h"
62 62
63int BN_lshift1(BIGNUM *r, BIGNUM *a) 63int BN_lshift1(BIGNUM *r, const BIGNUM *a)
64 { 64 {
65 register BN_ULONG *ap,*rp,t,c; 65 register BN_ULONG *ap,*rp,t,c;
66 int i; 66 int i;
@@ -92,7 +92,7 @@ int BN_lshift1(BIGNUM *r, BIGNUM *a)
92 return(1); 92 return(1);
93 } 93 }
94 94
95int BN_rshift1(BIGNUM *r, BIGNUM *a) 95int BN_rshift1(BIGNUM *r, const BIGNUM *a)
96 { 96 {
97 BN_ULONG *ap,*rp,t,c; 97 BN_ULONG *ap,*rp,t,c;
98 int i; 98 int i;
@@ -128,8 +128,8 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
128 BN_ULONG l; 128 BN_ULONG l;
129 129
130 r->neg=a->neg; 130 r->neg=a->neg;
131 if (bn_wexpand(r,a->top+(n/BN_BITS2)+1) == NULL) return(0);
132 nw=n/BN_BITS2; 131 nw=n/BN_BITS2;
132 if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
133 lb=n%BN_BITS2; 133 lb=n%BN_BITS2;
134 rb=BN_BITS2-lb; 134 rb=BN_BITS2-lb;
135 f=a->d; 135 f=a->d;
@@ -153,7 +153,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
153 return(1); 153 return(1);
154 } 154 }
155 155
156int BN_rshift(BIGNUM *r, BIGNUM *a, int n) 156int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
157 { 157 {
158 int i,j,nw,lb,rb; 158 int i,j,nw,lb,rb;
159 BN_ULONG *t,*f; 159 BN_ULONG *t,*f;
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
index 75f4f38392..c1d0cca438 100644
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -62,14 +62,14 @@
62 62
63/* r must not be a */ 63/* r must not be a */
64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */ 64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
65int BN_sqr(BIGNUM *r, BIGNUM *a, BN_CTX *ctx) 65int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
66 { 66 {
67 int max,al; 67 int max,al;
68 int ret = 0; 68 int ret = 0;
69 BIGNUM *tmp,*rr; 69 BIGNUM *tmp,*rr;
70 70
71#ifdef BN_COUNT 71#ifdef BN_COUNT
72printf("BN_sqr %d * %d\n",a->top,a->top); 72 fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top);
73#endif 73#endif
74 bn_check_top(a); 74 bn_check_top(a);
75 75
@@ -88,7 +88,6 @@ printf("BN_sqr %d * %d\n",a->top,a->top);
88 max=(al+al); 88 max=(al+al);
89 if (bn_wexpand(rr,max+1) == NULL) goto err; 89 if (bn_wexpand(rr,max+1) == NULL) goto err;
90 90
91 r->neg=0;
92 if (al == 4) 91 if (al == 4)
93 { 92 {
94#ifndef BN_SQR_COMBA 93#ifndef BN_SQR_COMBA
@@ -124,7 +123,6 @@ printf("BN_sqr %d * %d\n",a->top,a->top);
124 k=j+j; 123 k=j+j;
125 if (al == j) 124 if (al == j)
126 { 125 {
127 if (bn_wexpand(a,k*2) == NULL) goto err;
128 if (bn_wexpand(tmp,k*2) == NULL) goto err; 126 if (bn_wexpand(tmp,k*2) == NULL) goto err;
129 bn_sqr_recursive(rr->d,a->d,al,tmp->d); 127 bn_sqr_recursive(rr->d,a->d,al,tmp->d);
130 } 128 }
@@ -141,6 +139,7 @@ printf("BN_sqr %d * %d\n",a->top,a->top);
141 } 139 }
142 140
143 rr->top=max; 141 rr->top=max;
142 rr->neg=0;
144 if ((max > 0) && (rr->d[max-1] == 0)) rr->top--; 143 if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
145 if (rr != r) BN_copy(r,rr); 144 if (rr != r) BN_copy(r,rr);
146 ret = 1; 145 ret = 1;
@@ -150,10 +149,11 @@ printf("BN_sqr %d * %d\n",a->top,a->top);
150 } 149 }
151 150
152/* tmp must have 2*n words */ 151/* tmp must have 2*n words */
153void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp) 152void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
154 { 153 {
155 int i,j,max; 154 int i,j,max;
156 BN_ULONG *ap,*rp; 155 const BN_ULONG *ap;
156 BN_ULONG *rp;
157 157
158 max=n*2; 158 max=n*2;
159 ap=a; 159 ap=a;
@@ -197,14 +197,14 @@ void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp)
197 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) 197 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
198 * a[1]*b[1] 198 * a[1]*b[1]
199 */ 199 */
200void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t) 200void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
201 { 201 {
202 int n=n2/2; 202 int n=n2/2;
203 int zero,c1; 203 int zero,c1;
204 BN_ULONG ln,lo,*p; 204 BN_ULONG ln,lo,*p;
205 205
206#ifdef BN_COUNT 206#ifdef BN_COUNT
207printf(" bn_sqr_recursive %d * %d\n",n2,n2); 207 fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2);
208#endif 208#endif
209 if (n2 == 4) 209 if (n2 == 4)
210 { 210 {
@@ -245,7 +245,7 @@ printf(" bn_sqr_recursive %d * %d\n",n2,n2);
245 if (!zero) 245 if (!zero)
246 bn_sqr_recursive(&(t[n2]),t,n,p); 246 bn_sqr_recursive(&(t[n2]),t,n,p);
247 else 247 else
248 memset(&(t[n2]),0,n*sizeof(BN_ULONG)); 248 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
249 bn_sqr_recursive(r,a,n,p); 249 bn_sqr_recursive(r,a,n,p);
250 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p); 250 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
251 251
diff --git a/src/lib/libcrypto/bn/bn_sqrt.c b/src/lib/libcrypto/bn/bn_sqrt.c
new file mode 100644
index 0000000000..e2a1105dc8
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_sqrt.c
@@ -0,0 +1,387 @@
1/* crypto/bn/bn_mod.c */
2/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * and Bodo Moeller for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#include "bn_lcl.h"
60
61
62BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
63/* Returns 'ret' such that
64 * ret^2 == a (mod p),
65 * using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course
66 * in Algebraic Computational Number Theory", algorithm 1.5.1).
67 * 'p' must be prime!
68 * If 'a' is not a square, this is not necessarily detected by
69 * the algorithms; a bogus result must be expected in this case.
70 */
71 {
72 BIGNUM *ret = in;
73 int err = 1;
74 int r;
75 BIGNUM *b, *q, *t, *x, *y;
76 int e, i, j;
77
78 if (!BN_is_odd(p) || BN_abs_is_word(p, 1))
79 {
80 if (BN_abs_is_word(p, 2))
81 {
82 if (ret == NULL)
83 ret = BN_new();
84 if (ret == NULL)
85 goto end;
86 if (!BN_set_word(ret, BN_is_bit_set(a, 0)))
87 {
88 BN_free(ret);
89 return NULL;
90 }
91 return ret;
92 }
93
94 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
95 return(NULL);
96 }
97
98 if (BN_is_zero(a) || BN_is_one(a))
99 {
100 if (ret == NULL)
101 ret = BN_new();
102 if (ret == NULL)
103 goto end;
104 if (!BN_set_word(ret, BN_is_one(a)))
105 {
106 BN_free(ret);
107 return NULL;
108 }
109 return ret;
110 }
111
112#if 0 /* if BN_mod_sqrt is used with correct input, this just wastes time */
113 r = BN_kronecker(a, p, ctx);
114 if (r < -1) return NULL;
115 if (r == -1)
116 {
117 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
118 return(NULL);
119 }
120#endif
121
122 BN_CTX_start(ctx);
123 b = BN_CTX_get(ctx);
124 q = BN_CTX_get(ctx);
125 t = BN_CTX_get(ctx);
126 x = BN_CTX_get(ctx);
127 y = BN_CTX_get(ctx);
128 if (y == NULL) goto end;
129
130 if (ret == NULL)
131 ret = BN_new();
132 if (ret == NULL) goto end;
133
134 /* now write |p| - 1 as 2^e*q where q is odd */
135 e = 1;
136 while (!BN_is_bit_set(p, e))
137 e++;
138 /* we'll set q later (if needed) */
139
140 if (e == 1)
141 {
142 /* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
143 * modulo (|p|-1)/2, and square roots can be computed
144 * directly by modular exponentiation.
145 * We have
146 * 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
147 * so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
148 */
149 if (!BN_rshift(q, p, 2)) goto end;
150 q->neg = 0;
151 if (!BN_add_word(q, 1)) goto end;
152 if (!BN_mod_exp(ret, a, q, p, ctx)) goto end;
153 err = 0;
154 goto end;
155 }
156
157 if (e == 2)
158 {
159 /* |p| == 5 (mod 8)
160 *
161 * In this case 2 is always a non-square since
162 * Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
163 * So if a really is a square, then 2*a is a non-square.
164 * Thus for
165 * b := (2*a)^((|p|-5)/8),
166 * i := (2*a)*b^2
167 * we have
168 * i^2 = (2*a)^((1 + (|p|-5)/4)*2)
169 * = (2*a)^((p-1)/2)
170 * = -1;
171 * so if we set
172 * x := a*b*(i-1),
173 * then
174 * x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
175 * = a^2 * b^2 * (-2*i)
176 * = a*(-i)*(2*a*b^2)
177 * = a*(-i)*i
178 * = a.
179 *
180 * (This is due to A.O.L. Atkin,
181 * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
182 * November 1992.)
183 */
184
185 /* make sure that a is reduced modulo p */
186 if (a->neg || BN_ucmp(a, p) >= 0)
187 {
188 if (!BN_nnmod(x, a, p, ctx)) goto end;
189 a = x; /* use x as temporary variable */
190 }
191
192 /* t := 2*a */
193 if (!BN_mod_lshift1_quick(t, a, p)) goto end;
194
195 /* b := (2*a)^((|p|-5)/8) */
196 if (!BN_rshift(q, p, 3)) goto end;
197 q->neg = 0;
198 if (!BN_mod_exp(b, t, q, p, ctx)) goto end;
199
200 /* y := b^2 */
201 if (!BN_mod_sqr(y, b, p, ctx)) goto end;
202
203 /* t := (2*a)*b^2 - 1*/
204 if (!BN_mod_mul(t, t, y, p, ctx)) goto end;
205 if (!BN_sub_word(t, 1)) goto end;
206
207 /* x = a*b*t */
208 if (!BN_mod_mul(x, a, b, p, ctx)) goto end;
209 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
210
211 if (!BN_copy(ret, x)) goto end;
212 err = 0;
213 goto end;
214 }
215
216 /* e > 2, so we really have to use the Tonelli/Shanks algorithm.
217 * First, find some y that is not a square. */
218 if (!BN_copy(q, p)) goto end; /* use 'q' as temp */
219 q->neg = 0;
220 i = 2;
221 do
222 {
223 /* For efficiency, try small numbers first;
224 * if this fails, try random numbers.
225 */
226 if (i < 22)
227 {
228 if (!BN_set_word(y, i)) goto end;
229 }
230 else
231 {
232 if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) goto end;
233 if (BN_ucmp(y, p) >= 0)
234 {
235 if (!(p->neg ? BN_add : BN_sub)(y, y, p)) goto end;
236 }
237 /* now 0 <= y < |p| */
238 if (BN_is_zero(y))
239 if (!BN_set_word(y, i)) goto end;
240 }
241
242 r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
243 if (r < -1) goto end;
244 if (r == 0)
245 {
246 /* m divides p */
247 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
248 goto end;
249 }
250 }
251 while (r == 1 && ++i < 82);
252
253 if (r != -1)
254 {
255 /* Many rounds and still no non-square -- this is more likely
256 * a bug than just bad luck.
257 * Even if p is not prime, we should have found some y
258 * such that r == -1.
259 */
260 BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
261 goto end;
262 }
263
264 /* Here's our actual 'q': */
265 if (!BN_rshift(q, q, e)) goto end;
266
267 /* Now that we have some non-square, we can find an element
268 * of order 2^e by computing its q'th power. */
269 if (!BN_mod_exp(y, y, q, p, ctx)) goto end;
270 if (BN_is_one(y))
271 {
272 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
273 goto end;
274 }
275
276 /* Now we know that (if p is indeed prime) there is an integer
277 * k, 0 <= k < 2^e, such that
278 *
279 * a^q * y^k == 1 (mod p).
280 *
281 * As a^q is a square and y is not, k must be even.
282 * q+1 is even, too, so there is an element
283 *
284 * X := a^((q+1)/2) * y^(k/2),
285 *
286 * and it satisfies
287 *
288 * X^2 = a^q * a * y^k
289 * = a,
290 *
291 * so it is the square root that we are looking for.
292 */
293
294 /* t := (q-1)/2 (note that q is odd) */
295 if (!BN_rshift1(t, q)) goto end;
296
297 /* x := a^((q-1)/2) */
298 if (BN_is_zero(t)) /* special case: p = 2^e + 1 */
299 {
300 if (!BN_nnmod(t, a, p, ctx)) goto end;
301 if (BN_is_zero(t))
302 {
303 /* special case: a == 0 (mod p) */
304 if (!BN_zero(ret)) goto end;
305 err = 0;
306 goto end;
307 }
308 else
309 if (!BN_one(x)) goto end;
310 }
311 else
312 {
313 if (!BN_mod_exp(x, a, t, p, ctx)) goto end;
314 if (BN_is_zero(x))
315 {
316 /* special case: a == 0 (mod p) */
317 if (!BN_zero(ret)) goto end;
318 err = 0;
319 goto end;
320 }
321 }
322
323 /* b := a*x^2 (= a^q) */
324 if (!BN_mod_sqr(b, x, p, ctx)) goto end;
325 if (!BN_mod_mul(b, b, a, p, ctx)) goto end;
326
327 /* x := a*x (= a^((q+1)/2)) */
328 if (!BN_mod_mul(x, x, a, p, ctx)) goto end;
329
330 while (1)
331 {
332 /* Now b is a^q * y^k for some even k (0 <= k < 2^E
333 * where E refers to the original value of e, which we
334 * don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
335 *
336 * We have a*b = x^2,
337 * y^2^(e-1) = -1,
338 * b^2^(e-1) = 1.
339 */
340
341 if (BN_is_one(b))
342 {
343 if (!BN_copy(ret, x)) goto end;
344 err = 0;
345 goto end;
346 }
347
348
349 /* find smallest i such that b^(2^i) = 1 */
350 i = 1;
351 if (!BN_mod_sqr(t, b, p, ctx)) goto end;
352 while (!BN_is_one(t))
353 {
354 i++;
355 if (i == e)
356 {
357 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
358 goto end;
359 }
360 if (!BN_mod_mul(t, t, t, p, ctx)) goto end;
361 }
362
363
364 /* t := y^2^(e - i - 1) */
365 if (!BN_copy(t, y)) goto end;
366 for (j = e - i - 1; j > 0; j--)
367 {
368 if (!BN_mod_sqr(t, t, p, ctx)) goto end;
369 }
370 if (!BN_mod_mul(y, t, t, p, ctx)) goto end;
371 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
372 if (!BN_mod_mul(b, b, y, p, ctx)) goto end;
373 e = i;
374 }
375
376 end:
377 if (err)
378 {
379 if (ret != NULL && ret != in)
380 {
381 BN_clear_free(ret);
382 }
383 ret = NULL;
384 }
385 BN_CTX_end(ctx);
386 return ret;
387 }
diff --git a/src/lib/libcrypto/bn/bnspeed.c b/src/lib/libcrypto/bn/bnspeed.c
index 20fc7e08ff..b554ac8cf8 100644
--- a/src/lib/libcrypto/bn/bnspeed.c
+++ b/src/lib/libcrypto/bn/bnspeed.c
@@ -71,7 +71,7 @@
71#include <openssl/crypto.h> 71#include <openssl/crypto.h>
72#include <openssl/err.h> 72#include <openssl/err.h>
73 73
74#if !defined(MSDOS) && (!defined(VMS) || defined(__DECC)) 74#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
75#define TIMES 75#define TIMES
76#endif 76#endif
77 77
@@ -87,7 +87,7 @@
87 The __TMS macro will show if it was. If it wasn't defined, we should 87 The __TMS macro will show if it was. If it wasn't defined, we should
88 undefine TIMES, since that tells the rest of the program how things 88 undefine TIMES, since that tells the rest of the program how things
89 should be handled. -- Richard Levitte */ 89 should be handled. -- Richard Levitte */
90#if defined(VMS) && defined(__DECC) && !defined(__TMS) 90#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
91#undef TIMES 91#undef TIMES
92#endif 92#endif
93 93
diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c
index af0c2629e8..443cf420e5 100644
--- a/src/lib/libcrypto/bn/bntest.c
+++ b/src/lib/libcrypto/bn/bntest.c
@@ -60,7 +60,7 @@
60#include <stdlib.h> 60#include <stdlib.h>
61#include <string.h> 61#include <string.h>
62 62
63#include "openssl/e_os.h" 63#include "e_os.h"
64 64
65#include <openssl/bio.h> 65#include <openssl/bio.h>
66#include <openssl/bn.h> 66#include <openssl/bn.h>
@@ -68,7 +68,7 @@
68#include <openssl/x509.h> 68#include <openssl/x509.h>
69#include <openssl/err.h> 69#include <openssl/err.h>
70 70
71#ifdef WINDOWS 71#ifdef OPENSSL_SYS_WINDOWS
72#include "../bio/bss_file.c" 72#include "../bio/bss_file.c"
73#endif 73#endif
74 74
@@ -91,10 +91,12 @@ int test_mod(BIO *bp,BN_CTX *ctx);
91int test_mod_mul(BIO *bp,BN_CTX *ctx); 91int test_mod_mul(BIO *bp,BN_CTX *ctx);
92int test_mod_exp(BIO *bp,BN_CTX *ctx); 92int test_mod_exp(BIO *bp,BN_CTX *ctx);
93int test_exp(BIO *bp,BN_CTX *ctx); 93int test_exp(BIO *bp,BN_CTX *ctx);
94int test_kron(BIO *bp,BN_CTX *ctx);
95int test_sqrt(BIO *bp,BN_CTX *ctx);
94int rand_neg(void); 96int rand_neg(void);
95static int results=0; 97static int results=0;
96 98
97#ifdef NO_STDIO 99#ifdef OPENSSL_NO_STDIO
98#define APPS_WIN16 100#define APPS_WIN16
99#include "bss_file.c" 101#include "bss_file.c"
100#endif 102#endif
@@ -224,6 +226,14 @@ int main(int argc, char *argv[])
224 if (!test_exp(out,ctx)) goto err; 226 if (!test_exp(out,ctx)) goto err;
225 BIO_flush(out); 227 BIO_flush(out);
226 228
229 message(out,"BN_kronecker");
230 if (!test_kron(out,ctx)) goto err;
231 BIO_flush(out);
232
233 message(out,"BN_mod_sqrt");
234 if (!test_sqrt(out,ctx)) goto err;
235 BIO_flush(out);
236
227 BN_CTX_free(ctx); 237 BN_CTX_free(ctx);
228 BIO_free(out); 238 BIO_free(out);
229 239
@@ -243,7 +253,6 @@ int test_add(BIO *bp)
243 { 253 {
244 BIGNUM a,b,c; 254 BIGNUM a,b,c;
245 int i; 255 int i;
246 int j;
247 256
248 BN_init(&a); 257 BN_init(&a);
249 BN_init(&b); 258 BN_init(&b);
@@ -255,9 +264,6 @@ int test_add(BIO *bp)
255 BN_bntest_rand(&b,450+i,0,0); 264 BN_bntest_rand(&b,450+i,0,0);
256 a.neg=rand_neg(); 265 a.neg=rand_neg();
257 b.neg=rand_neg(); 266 b.neg=rand_neg();
258 if (bp == NULL)
259 for (j=0; j<10000; j++)
260 BN_add(&c,&a,&b);
261 BN_add(&c,&a,&b); 267 BN_add(&c,&a,&b);
262 if (bp != NULL) 268 if (bp != NULL)
263 { 269 {
@@ -291,7 +297,6 @@ int test_sub(BIO *bp)
291 { 297 {
292 BIGNUM a,b,c; 298 BIGNUM a,b,c;
293 int i; 299 int i;
294 int j;
295 300
296 BN_init(&a); 301 BN_init(&a);
297 BN_init(&b); 302 BN_init(&b);
@@ -312,9 +317,6 @@ int test_sub(BIO *bp)
312 a.neg=rand_neg(); 317 a.neg=rand_neg();
313 b.neg=rand_neg(); 318 b.neg=rand_neg();
314 } 319 }
315 if (bp == NULL)
316 for (j=0; j<10000; j++)
317 BN_sub(&c,&a,&b);
318 BN_sub(&c,&a,&b); 320 BN_sub(&c,&a,&b);
319 if (bp != NULL) 321 if (bp != NULL)
320 { 322 {
@@ -346,7 +348,6 @@ int test_div(BIO *bp, BN_CTX *ctx)
346 { 348 {
347 BIGNUM a,b,c,d,e; 349 BIGNUM a,b,c,d,e;
348 int i; 350 int i;
349 int j;
350 351
351 BN_init(&a); 352 BN_init(&a);
352 BN_init(&b); 353 BN_init(&b);
@@ -367,9 +368,6 @@ int test_div(BIO *bp, BN_CTX *ctx)
367 BN_bntest_rand(&b,50+3*(i-num1),0,0); 368 BN_bntest_rand(&b,50+3*(i-num1),0,0);
368 a.neg=rand_neg(); 369 a.neg=rand_neg();
369 b.neg=rand_neg(); 370 b.neg=rand_neg();
370 if (bp == NULL)
371 for (j=0; j<100; j++)
372 BN_div(&d,&c,&a,&b,ctx);
373 BN_div(&d,&c,&a,&b,ctx); 371 BN_div(&d,&c,&a,&b,ctx);
374 if (bp != NULL) 372 if (bp != NULL)
375 { 373 {
@@ -415,7 +413,6 @@ int test_div_recp(BIO *bp, BN_CTX *ctx)
415 BIGNUM a,b,c,d,e; 413 BIGNUM a,b,c,d,e;
416 BN_RECP_CTX recp; 414 BN_RECP_CTX recp;
417 int i; 415 int i;
418 int j;
419 416
420 BN_RECP_CTX_init(&recp); 417 BN_RECP_CTX_init(&recp);
421 BN_init(&a); 418 BN_init(&a);
@@ -438,9 +435,6 @@ int test_div_recp(BIO *bp, BN_CTX *ctx)
438 a.neg=rand_neg(); 435 a.neg=rand_neg();
439 b.neg=rand_neg(); 436 b.neg=rand_neg();
440 BN_RECP_CTX_set(&recp,&b,ctx); 437 BN_RECP_CTX_set(&recp,&b,ctx);
441 if (bp == NULL)
442 for (j=0; j<100; j++)
443 BN_div_recp(&d,&c,&a,&recp,ctx);
444 BN_div_recp(&d,&c,&a,&recp,ctx); 438 BN_div_recp(&d,&c,&a,&recp,ctx);
445 if (bp != NULL) 439 if (bp != NULL)
446 { 440 {
@@ -491,10 +485,11 @@ int test_mul(BIO *bp)
491 { 485 {
492 BIGNUM a,b,c,d,e; 486 BIGNUM a,b,c,d,e;
493 int i; 487 int i;
494 int j; 488 BN_CTX *ctx;
495 BN_CTX ctx;
496 489
497 BN_CTX_init(&ctx); 490 ctx = BN_CTX_new();
491 if (ctx == NULL) exit(1);
492
498 BN_init(&a); 493 BN_init(&a);
499 BN_init(&b); 494 BN_init(&b);
500 BN_init(&c); 495 BN_init(&c);
@@ -512,10 +507,7 @@ int test_mul(BIO *bp)
512 BN_bntest_rand(&b,i-num1,0,0); 507 BN_bntest_rand(&b,i-num1,0,0);
513 a.neg=rand_neg(); 508 a.neg=rand_neg();
514 b.neg=rand_neg(); 509 b.neg=rand_neg();
515 if (bp == NULL) 510 BN_mul(&c,&a,&b,ctx);
516 for (j=0; j<100; j++)
517 BN_mul(&c,&a,&b,&ctx);
518 BN_mul(&c,&a,&b,&ctx);
519 if (bp != NULL) 511 if (bp != NULL)
520 { 512 {
521 if (!results) 513 if (!results)
@@ -528,7 +520,7 @@ int test_mul(BIO *bp)
528 BN_print(bp,&c); 520 BN_print(bp,&c);
529 BIO_puts(bp,"\n"); 521 BIO_puts(bp,"\n");
530 } 522 }
531 BN_div(&d,&e,&c,&a,&ctx); 523 BN_div(&d,&e,&c,&a,ctx);
532 BN_sub(&d,&d,&b); 524 BN_sub(&d,&d,&b);
533 if(!BN_is_zero(&d) || !BN_is_zero(&e)) 525 if(!BN_is_zero(&d) || !BN_is_zero(&e))
534 { 526 {
@@ -541,7 +533,7 @@ int test_mul(BIO *bp)
541 BN_free(&c); 533 BN_free(&c);
542 BN_free(&d); 534 BN_free(&d);
543 BN_free(&e); 535 BN_free(&e);
544 BN_CTX_free(&ctx); 536 BN_CTX_free(ctx);
545 return(1); 537 return(1);
546 } 538 }
547 539
@@ -549,7 +541,6 @@ int test_sqr(BIO *bp, BN_CTX *ctx)
549 { 541 {
550 BIGNUM a,c,d,e; 542 BIGNUM a,c,d,e;
551 int i; 543 int i;
552 int j;
553 544
554 BN_init(&a); 545 BN_init(&a);
555 BN_init(&c); 546 BN_init(&c);
@@ -560,9 +551,6 @@ int test_sqr(BIO *bp, BN_CTX *ctx)
560 { 551 {
561 BN_bntest_rand(&a,40+i*10,0,0); 552 BN_bntest_rand(&a,40+i*10,0,0);
562 a.neg=rand_neg(); 553 a.neg=rand_neg();
563 if (bp == NULL)
564 for (j=0; j<100; j++)
565 BN_sqr(&c,&a,ctx);
566 BN_sqr(&c,&a,ctx); 554 BN_sqr(&c,&a,ctx);
567 if (bp != NULL) 555 if (bp != NULL)
568 { 556 {
@@ -596,7 +584,6 @@ int test_mont(BIO *bp, BN_CTX *ctx)
596 BIGNUM a,b,c,d,A,B; 584 BIGNUM a,b,c,d,A,B;
597 BIGNUM n; 585 BIGNUM n;
598 int i; 586 int i;
599 int j;
600 BN_MONT_CTX *mont; 587 BN_MONT_CTX *mont;
601 588
602 BN_init(&a); 589 BN_init(&a);
@@ -620,12 +607,12 @@ int test_mont(BIO *bp, BN_CTX *ctx)
620 BN_bntest_rand(&n,bits,0,1); 607 BN_bntest_rand(&n,bits,0,1);
621 BN_MONT_CTX_set(mont,&n,ctx); 608 BN_MONT_CTX_set(mont,&n,ctx);
622 609
610 BN_nnmod(&a,&a,&n,ctx);
611 BN_nnmod(&b,&b,&n,ctx);
612
623 BN_to_montgomery(&A,&a,mont,ctx); 613 BN_to_montgomery(&A,&a,mont,ctx);
624 BN_to_montgomery(&B,&b,mont,ctx); 614 BN_to_montgomery(&B,&b,mont,ctx);
625 615
626 if (bp == NULL)
627 for (j=0; j<100; j++)
628 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
629 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/ 616 BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
630 BN_from_montgomery(&A,&c,mont,ctx);/**/ 617 BN_from_montgomery(&A,&c,mont,ctx);/**/
631 if (bp != NULL) 618 if (bp != NULL)
@@ -671,7 +658,6 @@ int test_mod(BIO *bp, BN_CTX *ctx)
671 { 658 {
672 BIGNUM *a,*b,*c,*d,*e; 659 BIGNUM *a,*b,*c,*d,*e;
673 int i; 660 int i;
674 int j;
675 661
676 a=BN_new(); 662 a=BN_new();
677 b=BN_new(); 663 b=BN_new();
@@ -685,9 +671,6 @@ int test_mod(BIO *bp, BN_CTX *ctx)
685 BN_bntest_rand(b,450+i*10,0,0); /**/ 671 BN_bntest_rand(b,450+i*10,0,0); /**/
686 a->neg=rand_neg(); 672 a->neg=rand_neg();
687 b->neg=rand_neg(); 673 b->neg=rand_neg();
688 if (bp == NULL)
689 for (j=0; j<100; j++)
690 BN_mod(c,a,b,ctx);/**/
691 BN_mod(c,a,b,ctx);/**/ 674 BN_mod(c,a,b,ctx);/**/
692 if (bp != NULL) 675 if (bp != NULL)
693 { 676 {
@@ -720,7 +703,7 @@ int test_mod(BIO *bp, BN_CTX *ctx)
720int test_mod_mul(BIO *bp, BN_CTX *ctx) 703int test_mod_mul(BIO *bp, BN_CTX *ctx)
721 { 704 {
722 BIGNUM *a,*b,*c,*d,*e; 705 BIGNUM *a,*b,*c,*d,*e;
723 int i; 706 int i,j;
724 707
725 a=BN_new(); 708 a=BN_new();
726 b=BN_new(); 709 b=BN_new();
@@ -728,6 +711,7 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx)
728 d=BN_new(); 711 d=BN_new();
729 e=BN_new(); 712 e=BN_new();
730 713
714 for (j=0; j<3; j++) {
731 BN_bntest_rand(c,1024,0,0); /**/ 715 BN_bntest_rand(c,1024,0,0); /**/
732 for (i=0; i<num0; i++) 716 for (i=0; i<num0; i++)
733 { 717 {
@@ -735,10 +719,6 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx)
735 BN_bntest_rand(b,425+i*11,0,0); /**/ 719 BN_bntest_rand(b,425+i*11,0,0); /**/
736 a->neg=rand_neg(); 720 a->neg=rand_neg();
737 b->neg=rand_neg(); 721 b->neg=rand_neg();
738 /* if (bp == NULL)
739 for (j=0; j<100; j++)
740 BN_mod_mul(d,a,b,c,ctx);*/ /**/
741
742 if (!BN_mod_mul(e,a,b,c,ctx)) 722 if (!BN_mod_mul(e,a,b,c,ctx))
743 { 723 {
744 unsigned long l; 724 unsigned long l;
@@ -757,6 +737,16 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx)
757 BN_print(bp,b); 737 BN_print(bp,b);
758 BIO_puts(bp," % "); 738 BIO_puts(bp," % ");
759 BN_print(bp,c); 739 BN_print(bp,c);
740 if ((a->neg ^ b->neg) && !BN_is_zero(e))
741 {
742 /* If (a*b) % c is negative, c must be added
743 * in order to obtain the normalized remainder
744 * (new with OpenSSL 0.9.7, previous versions of
745 * BN_mod_mul could generate negative results)
746 */
747 BIO_puts(bp," + ");
748 BN_print(bp,c);
749 }
760 BIO_puts(bp," - "); 750 BIO_puts(bp," - ");
761 } 751 }
762 BN_print(bp,e); 752 BN_print(bp,e);
@@ -768,9 +758,11 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx)
768 if(!BN_is_zero(b)) 758 if(!BN_is_zero(b))
769 { 759 {
770 fprintf(stderr,"Modulo multiply test failed!\n"); 760 fprintf(stderr,"Modulo multiply test failed!\n");
761 ERR_print_errors_fp(stderr);
771 return 0; 762 return 0;
772 } 763 }
773 } 764 }
765 }
774 BN_free(a); 766 BN_free(a);
775 BN_free(b); 767 BN_free(b);
776 BN_free(c); 768 BN_free(c);
@@ -880,6 +872,183 @@ int test_exp(BIO *bp, BN_CTX *ctx)
880 return(1); 872 return(1);
881 } 873 }
882 874
875static void genprime_cb(int p, int n, void *arg)
876 {
877 char c='*';
878
879 if (p == 0) c='.';
880 if (p == 1) c='+';
881 if (p == 2) c='*';
882 if (p == 3) c='\n';
883 putc(c, stderr);
884 fflush(stderr);
885 (void)n;
886 (void)arg;
887 }
888
889int test_kron(BIO *bp, BN_CTX *ctx)
890 {
891 BIGNUM *a,*b,*r,*t;
892 int i;
893 int legendre, kronecker;
894 int ret = 0;
895
896 a = BN_new();
897 b = BN_new();
898 r = BN_new();
899 t = BN_new();
900 if (a == NULL || b == NULL || r == NULL || t == NULL) goto err;
901
902 /* We test BN_kronecker(a, b, ctx) just for b odd (Jacobi symbol).
903 * In this case we know that if b is prime, then BN_kronecker(a, b, ctx)
904 * is congruent to $a^{(b-1)/2}$, modulo $b$ (Legendre symbol).
905 * So we generate a random prime b and compare these values
906 * for a number of random a's. (That is, we run the Solovay-Strassen
907 * primality test to confirm that b is prime, except that we
908 * don't want to test whether b is prime but whether BN_kronecker
909 * works.) */
910
911 if (!BN_generate_prime(b, 512, 0, NULL, NULL, genprime_cb, NULL)) goto err;
912 b->neg = rand_neg();
913 putc('\n', stderr);
914
915 for (i = 0; i < num0; i++)
916 {
917 if (!BN_bntest_rand(a, 512, 0, 0)) goto err;
918 a->neg = rand_neg();
919
920 /* t := (|b|-1)/2 (note that b is odd) */
921 if (!BN_copy(t, b)) goto err;
922 t->neg = 0;
923 if (!BN_sub_word(t, 1)) goto err;
924 if (!BN_rshift1(t, t)) goto err;
925 /* r := a^t mod b */
926 b->neg=0;
927
928 if (!BN_mod_exp_recp(r, a, t, b, ctx)) goto err; /* XXX should be BN_mod_exp_recp, but ..._recp triggers a bug that must be fixed */
929 b->neg=1;
930
931 if (BN_is_word(r, 1))
932 legendre = 1;
933 else if (BN_is_zero(r))
934 legendre = 0;
935 else
936 {
937 if (!BN_add_word(r, 1)) goto err;
938 if (0 != BN_ucmp(r, b))
939 {
940 fprintf(stderr, "Legendre symbol computation failed\n");
941 goto err;
942 }
943 legendre = -1;
944 }
945
946 kronecker = BN_kronecker(a, b, ctx);
947 if (kronecker < -1) goto err;
948 /* we actually need BN_kronecker(a, |b|) */
949 if (a->neg && b->neg)
950 kronecker = -kronecker;
951
952 if (legendre != kronecker)
953 {
954 fprintf(stderr, "legendre != kronecker; a = ");
955 BN_print_fp(stderr, a);
956 fprintf(stderr, ", b = ");
957 BN_print_fp(stderr, b);
958 fprintf(stderr, "\n");
959 goto err;
960 }
961
962 putc('.', stderr);
963 fflush(stderr);
964 }
965
966 putc('\n', stderr);
967 fflush(stderr);
968 ret = 1;
969 err:
970 if (a != NULL) BN_free(a);
971 if (b != NULL) BN_free(b);
972 if (r != NULL) BN_free(r);
973 if (t != NULL) BN_free(t);
974 return ret;
975 }
976
977int test_sqrt(BIO *bp, BN_CTX *ctx)
978 {
979 BIGNUM *a,*p,*r;
980 int i, j;
981 int ret = 0;
982
983 a = BN_new();
984 p = BN_new();
985 r = BN_new();
986 if (a == NULL || p == NULL || r == NULL) goto err;
987
988 for (i = 0; i < 16; i++)
989 {
990 if (i < 8)
991 {
992 unsigned primes[8] = { 2, 3, 5, 7, 11, 13, 17, 19 };
993
994 if (!BN_set_word(p, primes[i])) goto err;
995 }
996 else
997 {
998 if (!BN_set_word(a, 32)) goto err;
999 if (!BN_set_word(r, 2*i + 1)) goto err;
1000
1001 if (!BN_generate_prime(p, 256, 0, a, r, genprime_cb, NULL)) goto err;
1002 putc('\n', stderr);
1003 }
1004 p->neg = rand_neg();
1005
1006 for (j = 0; j < num2; j++)
1007 {
1008 /* construct 'a' such that it is a square modulo p,
1009 * but in general not a proper square and not reduced modulo p */
1010 if (!BN_bntest_rand(r, 256, 0, 3)) goto err;
1011 if (!BN_nnmod(r, r, p, ctx)) goto err;
1012 if (!BN_mod_sqr(r, r, p, ctx)) goto err;
1013 if (!BN_bntest_rand(a, 256, 0, 3)) goto err;
1014 if (!BN_nnmod(a, a, p, ctx)) goto err;
1015 if (!BN_mod_sqr(a, a, p, ctx)) goto err;
1016 if (!BN_mul(a, a, r, ctx)) goto err;
1017 if (rand_neg())
1018 if (!BN_sub(a, a, p)) goto err;
1019
1020 if (!BN_mod_sqrt(r, a, p, ctx)) goto err;
1021 if (!BN_mod_sqr(r, r, p, ctx)) goto err;
1022
1023 if (!BN_nnmod(a, a, p, ctx)) goto err;
1024
1025 if (BN_cmp(a, r) != 0)
1026 {
1027 fprintf(stderr, "BN_mod_sqrt failed: a = ");
1028 BN_print_fp(stderr, a);
1029 fprintf(stderr, ", r = ");
1030 BN_print_fp(stderr, r);
1031 fprintf(stderr, ", p = ");
1032 BN_print_fp(stderr, p);
1033 fprintf(stderr, "\n");
1034 goto err;
1035 }
1036
1037 putc('.', stderr);
1038 fflush(stderr);
1039 }
1040
1041 putc('\n', stderr);
1042 fflush(stderr);
1043 }
1044 ret = 1;
1045 err:
1046 if (a != NULL) BN_free(a);
1047 if (p != NULL) BN_free(p);
1048 if (r != NULL) BN_free(r);
1049 return ret;
1050 }
1051
883int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_) 1052int test_lshift(BIO *bp,BN_CTX *ctx,BIGNUM *a_)
884 { 1053 {
885 BIGNUM *a,*b,*c,*d; 1054 BIGNUM *a,*b,*c,*d;
@@ -1052,7 +1221,7 @@ int test_rshift1(BIO *bp)
1052 } 1221 }
1053 BN_sub(c,a,b); 1222 BN_sub(c,a,b);
1054 BN_sub(c,c,b); 1223 BN_sub(c,c,b);
1055 if(!BN_is_zero(c) && !BN_is_one(c)) 1224 if(!BN_is_zero(c) && !BN_abs_is_word(c, 1))
1056 { 1225 {
1057 fprintf(stderr,"Right shift one test failed!\n"); 1226 fprintf(stderr,"Right shift one test failed!\n");
1058 return 0; 1227 return 0;
diff --git a/src/lib/libcrypto/bn/expspeed.c b/src/lib/libcrypto/bn/expspeed.c
index 2044ab9bff..07a1bcf51c 100644
--- a/src/lib/libcrypto/bn/expspeed.c
+++ b/src/lib/libcrypto/bn/expspeed.c
@@ -61,6 +61,31 @@
61/* most of this code has been pilfered from my libdes speed.c program */ 61/* most of this code has been pilfered from my libdes speed.c program */
62 62
63#define BASENUM 5000 63#define BASENUM 5000
64#define NUM_START 0
65
66
67/* determine timings for modexp, modmul, modsqr, gcd, Kronecker symbol,
68 * modular inverse, or modular square roots */
69#define TEST_EXP
70#undef TEST_MUL
71#undef TEST_SQR
72#undef TEST_GCD
73#undef TEST_KRON
74#undef TEST_INV
75#undef TEST_SQRT
76#define P_MOD_64 9 /* least significant 6 bits for prime to be used for BN_sqrt timings */
77
78#if defined(TEST_EXP) + defined(TEST_MUL) + defined(TEST_SQR) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) +defined(TEST_SQRT) != 1
79# error "choose one test"
80#endif
81
82#if defined(TEST_INV) || defined(TEST_SQRT)
83# define C_PRIME
84static void genprime_cb(int p, int n, void *arg);
85#endif
86
87
88
64#undef PROG 89#undef PROG
65#define PROG bnspeed_main 90#define PROG bnspeed_main
66 91
@@ -70,8 +95,9 @@
70#include <string.h> 95#include <string.h>
71#include <openssl/crypto.h> 96#include <openssl/crypto.h>
72#include <openssl/err.h> 97#include <openssl/err.h>
98#include <openssl/rand.h>
73 99
74#if !defined(MSDOS) && (!defined(VMS) || defined(__DECC)) 100#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
75#define TIMES 101#define TIMES
76#endif 102#endif
77 103
@@ -87,7 +113,7 @@
87 The __TMS macro will show if it was. If it wasn't defined, we should 113 The __TMS macro will show if it was. If it wasn't defined, we should
88 undefine TIMES, since that tells the rest of the program how things 114 undefine TIMES, since that tells the rest of the program how things
89 should be handled. -- Richard Levitte */ 115 should be handled. -- Richard Levitte */
90#if defined(VMS) && defined(__DECC) && !defined(__TMS) 116#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
91#undef TIMES 117#undef TIMES
92#endif 118#endif
93 119
@@ -161,11 +187,16 @@ static double Time_F(int s)
161#endif 187#endif
162 } 188 }
163 189
164#define NUM_SIZES 6 190#define NUM_SIZES 7
165static int sizes[NUM_SIZES]={256,512,1024,2048,4096,8192}; 191#if NUM_START > NUM_SIZES
166static int mul_c[NUM_SIZES]={8*8*8*8*8,8*8*8*8,8*8*8,8*8,8,1}; 192# error "NUM_START > NUM_SIZES"
193#endif
194static int sizes[NUM_SIZES]={128,256,512,1024,2048,4096,8192};
195static int mul_c[NUM_SIZES]={8*8*8*8*8*8,8*8*8*8*8,8*8*8*8,8*8*8,8*8,8,1};
167/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */ 196/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
168 197
198#define RAND_SEED(string) { const char str[] = string; RAND_seed(string, sizeof str); }
199
169void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx); 200void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx);
170 201
171int main(int argc, char **argv) 202int main(int argc, char **argv)
@@ -173,13 +204,23 @@ int main(int argc, char **argv)
173 BN_CTX *ctx; 204 BN_CTX *ctx;
174 BIGNUM *a,*b,*c,*r; 205 BIGNUM *a,*b,*c,*r;
175 206
207#if 1
208 if (!CRYPTO_set_mem_debug_functions(0,0,0,0,0))
209 abort();
210#endif
211
176 ctx=BN_CTX_new(); 212 ctx=BN_CTX_new();
177 a=BN_new(); 213 a=BN_new();
178 b=BN_new(); 214 b=BN_new();
179 c=BN_new(); 215 c=BN_new();
180 r=BN_new(); 216 r=BN_new();
181 217
218 while (!RAND_status())
219 /* not enough bits */
220 RAND_SEED("I demand a manual recount!");
221
182 do_mul_exp(r,a,b,c,ctx); 222 do_mul_exp(r,a,b,c,ctx);
223 return 0;
183 } 224 }
184 225
185void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx) 226void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
@@ -187,29 +228,126 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
187 int i,k; 228 int i,k;
188 double tm; 229 double tm;
189 long num; 230 long num;
190 BN_MONT_CTX m;
191
192 memset(&m,0,sizeof(m));
193 231
194 num=BASENUM; 232 num=BASENUM;
195 for (i=0; i<NUM_SIZES; i++) 233 for (i=NUM_START; i<NUM_SIZES; i++)
196 { 234 {
197 BN_rand(a,sizes[i],1,0); 235#ifdef C_PRIME
198 BN_rand(b,sizes[i],1,0); 236# ifdef TEST_SQRT
199 BN_rand(c,sizes[i],1,1); 237 if (!BN_set_word(a, 64)) goto err;
200 BN_mod(a,a,c,ctx); 238 if (!BN_set_word(b, P_MOD_64)) goto err;
201 BN_mod(b,b,c,ctx); 239# define ADD a
202 240# define REM b
203 BN_MONT_CTX_set(&m,c,ctx); 241# else
242# define ADD NULL
243# define REM NULL
244# endif
245 if (!BN_generate_prime(c,sizes[i],0,ADD,REM,genprime_cb,NULL)) goto err;
246 putc('\n', stderr);
247 fflush(stderr);
248#endif
204 249
205 Time_F(START);
206 for (k=0; k<num; k++) 250 for (k=0; k<num; k++)
207 BN_mod_exp_mont(r,a,b,c,ctx,&m); 251 {
252 if (k%50 == 0) /* Average over num/50 different choices of random numbers. */
253 {
254 if (!BN_pseudo_rand(a,sizes[i],1,0)) goto err;
255
256 if (!BN_pseudo_rand(b,sizes[i],1,0)) goto err;
257
258#ifndef C_PRIME
259 if (!BN_pseudo_rand(c,sizes[i],1,1)) goto err;
260#endif
261
262#ifdef TEST_SQRT
263 if (!BN_mod_sqr(a,a,c,ctx)) goto err;
264 if (!BN_mod_sqr(b,b,c,ctx)) goto err;
265#else
266 if (!BN_nnmod(a,a,c,ctx)) goto err;
267 if (!BN_nnmod(b,b,c,ctx)) goto err;
268#endif
269
270 if (k == 0)
271 Time_F(START);
272 }
273
274#if defined(TEST_EXP)
275 if (!BN_mod_exp(r,a,b,c,ctx)) goto err;
276#elif defined(TEST_MUL)
277 {
278 int i = 0;
279 for (i = 0; i < 50; i++)
280 if (!BN_mod_mul(r,a,b,c,ctx)) goto err;
281 }
282#elif defined(TEST_SQR)
283 {
284 int i = 0;
285 for (i = 0; i < 50; i++)
286 {
287 if (!BN_mod_sqr(r,a,c,ctx)) goto err;
288 if (!BN_mod_sqr(r,b,c,ctx)) goto err;
289 }
290 }
291#elif defined(TEST_GCD)
292 if (!BN_gcd(r,a,b,ctx)) goto err;
293 if (!BN_gcd(r,b,c,ctx)) goto err;
294 if (!BN_gcd(r,c,a,ctx)) goto err;
295#elif defined(TEST_KRON)
296 if (-2 == BN_kronecker(a,b,ctx)) goto err;
297 if (-2 == BN_kronecker(b,c,ctx)) goto err;
298 if (-2 == BN_kronecker(c,a,ctx)) goto err;
299#elif defined(TEST_INV)
300 if (!BN_mod_inverse(r,a,c,ctx)) goto err;
301 if (!BN_mod_inverse(r,b,c,ctx)) goto err;
302#else /* TEST_SQRT */
303 if (!BN_mod_sqrt(r,a,c,ctx)) goto err;
304 if (!BN_mod_sqrt(r,b,c,ctx)) goto err;
305#endif
306 }
208 tm=Time_F(STOP); 307 tm=Time_F(STOP);
209 printf("mul %4d ^ %4d %% %d -> %8.3fms %5.1f\n",sizes[i],sizes[i],sizes[i],tm*1000.0/num,tm*mul_c[i]/num); 308 printf(
309#if defined(TEST_EXP)
310 "modexp %4d ^ %4d %% %4d"
311#elif defined(TEST_MUL)
312 "50*modmul %4d %4d %4d"
313#elif defined(TEST_SQR)
314 "100*modsqr %4d %4d %4d"
315#elif defined(TEST_GCD)
316 "3*gcd %4d %4d %4d"
317#elif defined(TEST_KRON)
318 "3*kronecker %4d %4d %4d"
319#elif defined(TEST_INV)
320 "2*inv %4d %4d mod %4d"
321#else /* TEST_SQRT */
322 "2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d"
323#endif
324 " -> %8.3fms %5.1f (%ld)\n",
325#ifdef TEST_SQRT
326 P_MOD_64,
327#endif
328 sizes[i],sizes[i],sizes[i],tm*1000.0/num,tm*mul_c[i]/num, num);
210 num/=7; 329 num/=7;
211 if (num <= 0) num=1; 330 if (num <= 0) num=1;
212 } 331 }
332 return;
213 333
334 err:
335 ERR_print_errors_fp(stderr);
214 } 336 }
215 337
338
339#ifdef C_PRIME
340static void genprime_cb(int p, int n, void *arg)
341 {
342 char c='*';
343
344 if (p == 0) c='.';
345 if (p == 1) c='+';
346 if (p == 2) c='*';
347 if (p == 3) c='\n';
348 putc(c, stderr);
349 fflush(stderr);
350 (void)n;
351 (void)arg;
352 }
353#endif
diff --git a/src/lib/libcrypto/bn/exptest.c b/src/lib/libcrypto/bn/exptest.c
index 3e86f2ea0e..5ca570d1a8 100644
--- a/src/lib/libcrypto/bn/exptest.c
+++ b/src/lib/libcrypto/bn/exptest.c
@@ -63,7 +63,7 @@
63#include <openssl/bn.h> 63#include <openssl/bn.h>
64#include <openssl/rand.h> 64#include <openssl/rand.h>
65#include <openssl/err.h> 65#include <openssl/err.h>
66#ifdef WINDOWS 66#ifdef OPENSSL_SYS_WINDOWS
67#include "../bio/bss_file.c" 67#include "../bio/bss_file.c"
68#endif 68#endif
69 69
diff --git a/src/lib/libcrypto/bn/vms-helper.c b/src/lib/libcrypto/bn/vms-helper.c
index 0fa79c4edb..4b63149bf3 100644
--- a/src/lib/libcrypto/bn/vms-helper.c
+++ b/src/lib/libcrypto/bn/vms-helper.c
@@ -60,7 +60,7 @@
60bn_div_words_abort(int i) 60bn_div_words_abort(int i)
61{ 61{
62#ifdef BN_DEBUG 62#ifdef BN_DEBUG
63#if !defined(NO_STDIO) && !defined(WIN16) 63#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16)
64 fprintf(stderr,"Division would overflow (%d)\n",i); 64 fprintf(stderr,"Division would overflow (%d)\n",i);
65#endif 65#endif
66 abort(); 66 abort();