summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/Makefile.ssl135
-rw-r--r--src/lib/libcrypto/bn/asm/README12
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2.s2024
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2W.s1605
-rw-r--r--src/lib/libcrypto/bn/bn.h12
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c11
-rw-r--r--src/lib/libcrypto/bn/bn_blind.c4
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c4
-rw-r--r--src/lib/libcrypto/bn/bn_div.c2
-rw-r--r--src/lib/libcrypto/bn/bn_err.c2
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c521
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c357
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h100
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c42
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c31
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c2
-rw-r--r--src/lib/libcrypto/bn/bn_print.c12
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c10
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c4
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c2
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c2
-rw-r--r--src/lib/libcrypto/bn/bn_word.c17
-rw-r--r--src/lib/libcrypto/bn/vms-helper.c2
23 files changed, 3956 insertions, 957 deletions
diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl
index beb9c1b523..17b72d577f 100644
--- a/src/lib/libcrypto/bn/Makefile.ssl
+++ b/src/lib/libcrypto/bn/Makefile.ssl
@@ -170,118 +170,143 @@ clean:
170bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 170bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
171bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 171bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
172bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 172bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
173bn_add.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 173bn_add.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
174bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 174bn_add.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
175bn_add.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 175bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
176bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
176bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 177bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
177bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 178bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
178bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 179bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
179bn_asm.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 180bn_asm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
180bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 181bn_asm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
181bn_asm.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 182bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
183bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
182bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 184bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
183bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 185bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
184bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 186bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
185bn_blind.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 187bn_blind.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
188bn_blind.o: ../../include/openssl/opensslconf.h
186bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 189bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
187bn_blind.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 190bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
191bn_blind.o: ../cryptlib.h bn_lcl.h
188bn_ctx.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 192bn_ctx.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
189bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 193bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
190bn_ctx.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 194bn_ctx.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
191bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 195bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
192bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 196bn_ctx.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
193bn_ctx.o: ../../include/openssl/stack.h ../cryptlib.h 197bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
198bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h
194bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 199bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
195bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 200bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
196bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 201bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
197bn_div.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 202bn_div.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
198bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 203bn_div.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
199bn_div.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 204bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
200bn_err.o: ../../include/openssl/bn.h ../../include/openssl/err.h 205bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
201bn_err.o: ../../include/openssl/opensslconf.h 206bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
207bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/err.h
208bn_err.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
209bn_err.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
210bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
202bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 211bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
203bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 212bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
204bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 213bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
205bn_exp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 214bn_exp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
206bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 215bn_exp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
207bn_exp.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 216bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
217bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
208bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 218bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
209bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 219bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
210bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 220bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
211bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 221bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
212bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 222bn_exp2.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
213bn_exp2.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 223bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
224bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
214bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 225bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
215bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 226bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
216bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 227bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
217bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 228bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
218bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 229bn_gcd.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
219bn_gcd.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 230bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
231bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
220bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 232bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
221bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 233bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
222bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 234bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
223bn_lib.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 235bn_lib.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
224bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 236bn_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
225bn_lib.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 237bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
238bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
226bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 239bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
227bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 240bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
228bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 241bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
229bn_mont.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 242bn_mont.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
230bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 243bn_mont.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
231bn_mont.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 244bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
245bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
232bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 246bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
233bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 247bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
234bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 248bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
235bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 249bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
236bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 250bn_mpi.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
237bn_mpi.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 251bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
252bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
238bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 253bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
239bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 254bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
240bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 255bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
241bn_mul.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 256bn_mul.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
242bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 257bn_mul.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
243bn_mul.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 258bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
259bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
244bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 260bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
245bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 261bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
246bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 262bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
247bn_prime.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 263bn_prime.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
264bn_prime.o: ../../include/openssl/opensslconf.h
248bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h 265bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h
249bn_prime.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 266bn_prime.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
250bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.h 267bn_prime.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_prime.h
251bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 268bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
252bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 269bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
253bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 270bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
254bn_print.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 271bn_print.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
272bn_print.o: ../../include/openssl/opensslconf.h
255bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 273bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
256bn_print.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 274bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
275bn_print.o: ../cryptlib.h bn_lcl.h
257bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 276bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
258bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 277bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
259bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 278bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
260bn_rand.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 279bn_rand.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
261bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h 280bn_rand.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
262bn_rand.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 281bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
282bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
263bn_rand.o: ../cryptlib.h bn_lcl.h 283bn_rand.o: ../cryptlib.h bn_lcl.h
264bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 284bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
265bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 285bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
266bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 286bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
267bn_recp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 287bn_recp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
268bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 288bn_recp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
269bn_recp.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 289bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
290bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
270bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 291bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
271bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 292bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
272bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 293bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
273bn_shift.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 294bn_shift.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
295bn_shift.o: ../../include/openssl/opensslconf.h
274bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 296bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h
275bn_shift.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 297bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
298bn_shift.o: ../cryptlib.h bn_lcl.h
276bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 299bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
277bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 300bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
278bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 301bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
279bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 302bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
280bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 303bn_sqr.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
281bn_sqr.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 304bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
305bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
282bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h 306bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
283bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h 307bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
284bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h 308bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h
285bn_word.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h 309bn_word.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
286bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h 310bn_word.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
287bn_word.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h 311bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
312bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h
diff --git a/src/lib/libcrypto/bn/asm/README b/src/lib/libcrypto/bn/asm/README
index 86bf64cfc2..a0fe58a677 100644
--- a/src/lib/libcrypto/bn/asm/README
+++ b/src/lib/libcrypto/bn/asm/README
@@ -15,9 +15,9 @@ On the 2 alpha C compilers I had access to, it was not possible to do
15were 64 bits). So the hand assember gives access to the 128 bit result and 15were 64 bits). So the hand assember gives access to the 128 bit result and
16a 2 times speedup :-). 16a 2 times speedup :-).
17 17
18There are 2 versions of assember for the HP PA-RISC. 18There are 3 versions of assember for the HP PA-RISC.
19pa-risc.s is the origional one which works fine. 19
20pa-risc2.s is a new version that often generates warnings but if the 20pa-risc.s is the origional one which works fine and generated using gcc :-)
21tests pass, it gives performance that is over 2 times faster than 21
22pa-risc.s. 22pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations
23Both were generated using gcc :-) 23by Chris Ruemmler from HP (with some help from the HP C compiler).
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s
index c2725996a4..7239aa2c76 100644
--- a/src/lib/libcrypto/bn/asm/pa-risc2.s
+++ b/src/lib/libcrypto/bn/asm/pa-risc2.s
@@ -1,416 +1,1618 @@
1 .SPACE $PRIVATE$ 1;
2 .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 2; PA-RISC 2.0 implementation of bn_asm code, based on the
3 .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 3; 64-bit version of the code. This code is effectively the
4 .SPACE $TEXT$ 4; same as the 64-bit version except the register model is
5 .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 5; slightly different given all values must be 32-bit between
6 .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY 6; function calls. Thus the 64-bit return values are returned
7 .IMPORT $global$,DATA 7; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
8 .IMPORT $$dyncall,MILLICODE 8;
9; gcc_compiled.: 9;
10 .SPACE $TEXT$ 10; This code is approximately 2x faster than the C version
11 .SUBSPA $CODE$ 11; for RSA/DSA.
12 12;
13 .align 4 13; See http://devresource.hp.com/ for more details on the PA-RISC
14 .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR 14; architecture. Also see the book "PA-RISC 2.0 Architecture"
15; by Gerry Kane for information on the instruction set architecture.
16;
17; Code written by Chris Ruemmler (with some help from the HP C
18; compiler).
19;
20; The code compiles with HP's assembler
21;
22
23 .level 2.0N
24 .space $TEXT$
25 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
26
27;
28; Global Register definitions used for the routines.
29;
30; Some information about HP's runtime architecture for 32-bits.
31;
32; "Caller save" means the calling function must save the register
33; if it wants the register to be preserved.
34; "Callee save" means if a function uses the register, it must save
35; the value before using it.
36;
37; For the floating point registers
38;
39; "caller save" registers: fr4-fr11, fr22-fr31
40; "callee save" registers: fr12-fr21
41; "special" registers: fr0-fr3 (status and exception registers)
42;
43; For the integer registers
44; value zero : r0
45; "caller save" registers: r1,r19-r26
46; "callee save" registers: r3-r18
47; return register : r2 (rp)
48; return values ; r28,r29 (ret0,ret1)
49; Stack pointer ; r30 (sp)
50; millicode return ptr ; r31 (also a caller save register)
51
52
53;
54; Arguments to the routines
55;
56r_ptr .reg %r26
57a_ptr .reg %r25
58b_ptr .reg %r24
59num .reg %r24
60n .reg %r23
61
62;
63; Note that the "w" argument for bn_mul_add_words and bn_mul_words
64; is passed on the stack at a delta of -56 from the top of stack
65; as the routine is entered.
66;
67
68;
69; Globals used in some routines
70;
71
72top_overflow .reg %r23
73high_mask .reg %r22 ; value 0xffffffff80000000L
74
75
76;------------------------------------------------------------------------------
77;
78; bn_mul_add_words
79;
80;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
81; int num, BN_ULONG w)
82;
83; arg0 = r_ptr
84; arg1 = a_ptr
85; arg3 = num
86; -56(sp) = w
87;
88; Local register definitions
89;
90
91fm1 .reg %fr22
92fm .reg %fr23
93ht_temp .reg %fr24
94ht_temp_1 .reg %fr25
95lt_temp .reg %fr26
96lt_temp_1 .reg %fr27
97fm1_1 .reg %fr28
98fm_1 .reg %fr29
99
100fw_h .reg %fr7L
101fw_l .reg %fr7R
102fw .reg %fr7
103
104fht_0 .reg %fr8L
105flt_0 .reg %fr8R
106t_float_0 .reg %fr8
107
108fht_1 .reg %fr9L
109flt_1 .reg %fr9R
110t_float_1 .reg %fr9
111
112tmp_0 .reg %r31
113tmp_1 .reg %r21
114m_0 .reg %r20
115m_1 .reg %r19
116ht_0 .reg %r1
117ht_1 .reg %r3
118lt_0 .reg %r4
119lt_1 .reg %r5
120m1_0 .reg %r6
121m1_1 .reg %r7
122rp_val .reg %r8
123rp_val_1 .reg %r9
124
15bn_mul_add_words 125bn_mul_add_words
16 .PROC 126 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
17 .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 127 .proc
18 .ENTRY 128 .callinfo frame=128
19 stw %r2,-20(0,%r30) 129 .entry
20 stwm %r4,64(0,%r30) 130 .align 64
21 copy %r24,%r31 131
22 stw %r3,-60(0,%r30) 132 STD %r3,0(%sp) ; save r3
23 ldi 0,%r20 133 STD %r4,8(%sp) ; save r4
24 ldo 12(%r26),%r2 134 NOP ; Needed to make the loop 16-byte aligned
25 stw %r23,-16(0,%r30) 135 NOP ; needed to make the loop 16-byte aligned
26 copy %r25,%r3 136
27 ldo 12(%r3),%r1 137 STD %r5,16(%sp) ; save r5
28 fldws -16(0,%r30),%fr8L 138 NOP
29L$0010 139 STD %r6,24(%sp) ; save r6
30 copy %r20,%r25 140 STD %r7,32(%sp) ; save r7
31 ldi 0,%r24 141
32 fldws 0(0,%r3),%fr9L 142 STD %r8,40(%sp) ; save r8
33 ldw 0(0,%r26),%r19 143 STD %r9,48(%sp) ; save r9
34 xmpyu %fr8L,%fr9L,%fr9 144 COPY %r0,%ret1 ; return 0 by default
35 fstds %fr9,-16(0,%r30) 145 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
36 copy %r19,%r23 146
37 ldw -16(0,%r30),%r28 147 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
38 ldw -12(0,%r30),%r29 148 LDO 128(%sp),%sp ; bump stack
39 ldi 0,%r22 149
40 add %r23,%r29,%r29 150 ;
41 addc %r22,%r28,%r28 151 ; The loop is unrolled twice, so if there is only 1 number
42 add %r25,%r29,%r29 152 ; then go straight to the cleanup code.
43 addc %r24,%r28,%r28 153 ;
44 copy %r28,%r21 154 CMPIB,= 1,num,bn_mul_add_words_single_top
45 ldi 0,%r20 155 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
46 copy %r21,%r20 156
47 addib,= -1,%r31,L$0011 157 ;
48 stw %r29,0(0,%r26) 158 ; This loop is unrolled 2 times (64-byte aligned as well)
49 copy %r20,%r25 159 ;
50 ldi 0,%r24 160 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
51 fldws -8(0,%r1),%fr9L 161 ; two 32-bit mutiplies can be issued per cycle.
52 ldw -8(0,%r2),%r19 162 ;
53 xmpyu %fr8L,%fr9L,%fr9 163bn_mul_add_words_unroll2
54 fstds %fr9,-16(0,%r30) 164
55 copy %r19,%r23 165 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
56 ldw -16(0,%r30),%r28 166 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
57 ldw -12(0,%r30),%r29 167 LDD 0(r_ptr),rp_val ; rp[0]
58 ldi 0,%r22 168 LDD 8(r_ptr),rp_val_1 ; rp[1]
59 add %r23,%r29,%r29 169
60 addc %r22,%r28,%r28 170 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
61 add %r25,%r29,%r29 171 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
62 addc %r24,%r28,%r28 172 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
63 copy %r28,%r21 173 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
64 ldi 0,%r20 174
65 copy %r21,%r20 175 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
66 addib,= -1,%r31,L$0011 176 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
67 stw %r29,-8(0,%r2) 177 FSTD fm,-8(%sp) ; -8(sp) = m[0]
68 copy %r20,%r25 178 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
69 ldi 0,%r24 179
70 fldws -4(0,%r1),%fr9L 180 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
71 ldw -4(0,%r2),%r19 181 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
72 xmpyu %fr8L,%fr9L,%fr9 182 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
73 fstds %fr9,-16(0,%r30) 183 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
74 copy %r19,%r23 184
75 ldw -16(0,%r30),%r28 185 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
76 ldw -12(0,%r30),%r29 186 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
77 ldi 0,%r22 187 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
78 add %r23,%r29,%r29 188 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
79 addc %r22,%r28,%r28 189
80 add %r25,%r29,%r29 190 LDD -8(%sp),m_0 ; m[0]
81 addc %r24,%r28,%r28 191 LDD -40(%sp),m_1 ; m[1]
82 copy %r28,%r21 192 LDD -16(%sp),m1_0 ; m1[0]
83 ldi 0,%r20 193 LDD -48(%sp),m1_1 ; m1[1]
84 copy %r21,%r20 194
85 addib,= -1,%r31,L$0011 195 LDD -24(%sp),ht_0 ; ht[0]
86 stw %r29,-4(0,%r2) 196 LDD -56(%sp),ht_1 ; ht[1]
87 copy %r20,%r25 197 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
88 ldi 0,%r24 198 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
89 fldws 0(0,%r1),%fr9L 199
90 ldw 0(0,%r2),%r19 200 LDD -32(%sp),lt_0
91 xmpyu %fr8L,%fr9L,%fr9 201 LDD -64(%sp),lt_1
92 fstds %fr9,-16(0,%r30) 202 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
93 copy %r19,%r23 203 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
94 ldw -16(0,%r30),%r28 204
95 ldw -12(0,%r30),%r29 205 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
96 ldi 0,%r22 206 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
97 add %r23,%r29,%r29 207 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
98 addc %r22,%r28,%r28 208 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
99 add %r25,%r29,%r29 209
100 addc %r24,%r28,%r28 210 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
101 copy %r28,%r21 211 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
102 ldi 0,%r20 212 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
103 copy %r21,%r20 213 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
104 addib,= -1,%r31,L$0011 214
105 stw %r29,0(0,%r2) 215 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
106 ldo 16(%r1),%r1 216 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
107 ldo 16(%r3),%r3 217 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
108 ldo 16(%r2),%r2 218 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
109 bl L$0010,0 219
110 ldo 16(%r26),%r26 220 ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
111L$0011 221 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
112 copy %r20,%r28 222 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
113 ldw -84(0,%r30),%r2 223 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
114 ldw -60(0,%r30),%r3 224
115 bv 0(%r2) 225 LDO -2(num),num ; num = num - 2;
116 ldwm -64(0,%r30),%r4 226 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
117 .EXIT 227 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
118 .PROCEND 228 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
119 .align 4 229
120 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR 230 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
231 ADD,DC ht_1,%r0,%ret1 ; ht[1]++
232 LDO 16(a_ptr),a_ptr ; a_ptr += 2
233
234 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
235 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
236 LDO 16(r_ptr),r_ptr ; r_ptr += 2
237
238 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
239
240 ;
241 ; Top of loop aligned on 64-byte boundary
242 ;
243bn_mul_add_words_single_top
244 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
245 LDD 0(r_ptr),rp_val ; rp[0]
246 LDO 8(a_ptr),a_ptr ; a_ptr++
247 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
248 FSTD fm1,-16(%sp) ; -16(sp) = m1
249 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
250 FSTD fm,-8(%sp) ; -8(sp) = m
251 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
252 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
253 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
254 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
255
256 LDD -8(%sp),m_0
257 LDD -16(%sp),m1_0 ; m1 = temp1
258 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
259 LDD -24(%sp),ht_0
260 LDD -32(%sp),lt_0
261
262 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
263 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
264
265 EXTRD,U tmp_0,31,32,m_0 ; m>>32
266 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
267
268 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
269 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
270 ADD,DC ht_0,%r0,ht_0 ; ht++
271 ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
272 ADD,DC ht_0,%r0,ht_0 ; ht++
273 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
274 ADD,DC ht_0,%r0,%ret1 ; ht++
275 STD lt_0,0(r_ptr) ; rp[0] = lt
276
277bn_mul_add_words_exit
278 .EXIT
279
280 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
281 LDD -80(%sp),%r9 ; restore r9
282 LDD -88(%sp),%r8 ; restore r8
283 LDD -96(%sp),%r7 ; restore r7
284 LDD -104(%sp),%r6 ; restore r6
285 LDD -112(%sp),%r5 ; restore r5
286 LDD -120(%sp),%r4 ; restore r4
287 BVE (%rp)
288 LDD,MB -128(%sp),%r3 ; restore r3
289 .PROCEND ;in=23,24,25,26,29;out=28;
290
291;----------------------------------------------------------------------------
292;
293;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
294;
295; arg0 = rp
296; arg1 = ap
297; arg3 = num
298; w on stack at -56(sp)
299
121bn_mul_words 300bn_mul_words
122 .PROC 301 .proc
123 .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 302 .callinfo frame=128
124 .ENTRY 303 .entry
125 stw %r2,-20(0,%r30) 304 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
126 copy %r25,%r2 305 .align 64
127 stwm %r4,64(0,%r30) 306
128 copy %r24,%r19 307 STD %r3,0(%sp) ; save r3
129 ldi 0,%r28 308 STD %r4,8(%sp) ; save r4
130 stw %r23,-16(0,%r30) 309 NOP
131 ldo 12(%r26),%r31 310 STD %r5,16(%sp) ; save r5
132 ldo 12(%r2),%r29 311
133 fldws -16(0,%r30),%fr8L 312 STD %r6,24(%sp) ; save r6
134L$0026 313 STD %r7,32(%sp) ; save r7
135 fldws 0(0,%r2),%fr9L 314 COPY %r0,%ret1 ; return 0 by default
136 xmpyu %fr8L,%fr9L,%fr9 315 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
137 fstds %fr9,-16(0,%r30) 316
138 copy %r28,%r21 317 CMPIB,>= 0,num,bn_mul_words_exit
139 ldi 0,%r20 318 LDO 128(%sp),%sp ; bump stack
140 ldw -16(0,%r30),%r24 319
141 ldw -12(0,%r30),%r25 320 ;
142 add %r21,%r25,%r25 321 ; See if only 1 word to do, thus just do cleanup
143 addc %r20,%r24,%r24 322 ;
144 copy %r24,%r23 323 CMPIB,= 1,num,bn_mul_words_single_top
145 ldi 0,%r22 324 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
146 copy %r23,%r28 325
147 addib,= -1,%r19,L$0027 326 ;
148 stw %r25,0(0,%r26) 327 ; This loop is unrolled 2 times (64-byte aligned as well)
149 fldws -8(0,%r29),%fr9L 328 ;
150 xmpyu %fr8L,%fr9L,%fr9 329 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
151 fstds %fr9,-16(0,%r30) 330 ; two 32-bit mutiplies can be issued per cycle.
152 copy %r28,%r21 331 ;
153 ldi 0,%r20 332bn_mul_words_unroll2
154 ldw -16(0,%r30),%r24 333
155 ldw -12(0,%r30),%r25 334 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
156 add %r21,%r25,%r25 335 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
157 addc %r20,%r24,%r24 336 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
158 copy %r24,%r23 337 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
159 ldi 0,%r22 338
160 copy %r23,%r28 339 FSTD fm1,-16(%sp) ; -16(sp) = m1
161 addib,= -1,%r19,L$0027 340 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
162 stw %r25,-8(0,%r31) 341 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
163 fldws -4(0,%r29),%fr9L 342 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
164 xmpyu %fr8L,%fr9L,%fr9 343
165 fstds %fr9,-16(0,%r30) 344 FSTD fm,-8(%sp) ; -8(sp) = m
166 copy %r28,%r21 345 FSTD fm_1,-40(%sp) ; -40(sp) = m
167 ldi 0,%r20 346 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
168 ldw -16(0,%r30),%r24 347 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
169 ldw -12(0,%r30),%r25 348
170 add %r21,%r25,%r25 349 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
171 addc %r20,%r24,%r24 350 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
172 copy %r24,%r23 351 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
173 ldi 0,%r22 352 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
174 copy %r23,%r28 353
175 addib,= -1,%r19,L$0027 354 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
176 stw %r25,-4(0,%r31) 355 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
177 fldws 0(0,%r29),%fr9L 356 LDD -8(%sp),m_0
178 xmpyu %fr8L,%fr9L,%fr9 357 LDD -40(%sp),m_1
179 fstds %fr9,-16(0,%r30) 358
180 copy %r28,%r21 359 LDD -16(%sp),m1_0
181 ldi 0,%r20 360 LDD -48(%sp),m1_1
182 ldw -16(0,%r30),%r24 361 LDD -24(%sp),ht_0
183 ldw -12(0,%r30),%r25 362 LDD -56(%sp),ht_1
184 add %r21,%r25,%r25 363
185 addc %r20,%r24,%r24 364 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
186 copy %r24,%r23 365 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
187 ldi 0,%r22 366 LDD -32(%sp),lt_0
188 copy %r23,%r28 367 LDD -64(%sp),lt_1
189 addib,= -1,%r19,L$0027 368
190 stw %r25,0(0,%r31) 369 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
191 ldo 16(%r29),%r29 370 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
192 ldo 16(%r2),%r2 371 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
193 ldo 16(%r31),%r31 372 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
194 bl L$0026,0 373
195 ldo 16(%r26),%r26 374 EXTRD,U tmp_0,31,32,m_0 ; m>>32
196L$0027 375 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
197 ldw -84(0,%r30),%r2 376 EXTRD,U tmp_1,31,32,m_1 ; m>>32
198 bv 0(%r2) 377 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
199 ldwm -64(0,%r30),%r4 378
200 .EXIT 379 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
201 .PROCEND 380 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
202 .align 4 381 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
203 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR 382 ADD,DC ht_0,%r0,ht_0 ; ht++
383
384 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
385 ADD,DC ht_1,%r0,ht_1 ; ht++
386 ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1);
387 ADD,DC ht_0,%r0,ht_0 ; ht++
388
389 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
390 ADD,DC ht_1,%r0,ht_1 ; ht++
391 STD lt_0,0(r_ptr) ; rp[0] = lt
392 STD lt_1,8(r_ptr) ; rp[1] = lt
393
394 COPY ht_1,%ret1 ; carry = ht
395 LDO -2(num),num ; num = num - 2;
396 LDO 16(a_ptr),a_ptr ; ap += 2
397 CMPIB,<= 2,num,bn_mul_words_unroll2
398 LDO 16(r_ptr),r_ptr ; rp++
399
400 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
401
402 ;
403 ; Top of loop aligned on 64-byte boundary
404 ;
405bn_mul_words_single_top
406 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
407
408 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
409 FSTD fm1,-16(%sp) ; -16(sp) = m1
410 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
411 FSTD fm,-8(%sp) ; -8(sp) = m
412 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
413 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
414 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
415 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
416
417 LDD -8(%sp),m_0
418 LDD -16(%sp),m1_0
419 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
420 LDD -24(%sp),ht_0
421 LDD -32(%sp),lt_0
422
423 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
424 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
425
426 EXTRD,U tmp_0,31,32,m_0 ; m>>32
427 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
428
429 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
430 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
431 ADD,DC ht_0,%r0,ht_0 ; ht++
432
433 ADD %ret1,lt_0,lt_0 ; lt = lt + c;
434 ADD,DC ht_0,%r0,ht_0 ; ht++
435
436 COPY ht_0,%ret1 ; copy carry
437 STD lt_0,0(r_ptr) ; rp[0] = lt
438
439bn_mul_words_exit
440 .EXIT
441 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
442 LDD -96(%sp),%r7 ; restore r7
443 LDD -104(%sp),%r6 ; restore r6
444 LDD -112(%sp),%r5 ; restore r5
445 LDD -120(%sp),%r4 ; restore r4
446 BVE (%rp)
447 LDD,MB -128(%sp),%r3 ; restore r3
448 .PROCEND
449
450;----------------------------------------------------------------------------
451;
452;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
453;
454; arg0 = rp
455; arg1 = ap
456; arg2 = num
457;
458
204bn_sqr_words 459bn_sqr_words
460 .proc
461 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
462 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
463 .entry
464 .align 64
465
466 STD %r3,0(%sp) ; save r3
467 STD %r4,8(%sp) ; save r4
468 NOP
469 STD %r5,16(%sp) ; save r5
470
471 CMPIB,>= 0,num,bn_sqr_words_exit
472 LDO 128(%sp),%sp ; bump stack
473
474 ;
475 ; If only 1, the goto straight to cleanup
476 ;
477 CMPIB,= 1,num,bn_sqr_words_single_top
478 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
479
480 ;
481 ; This loop is unrolled 2 times (64-byte aligned as well)
482 ;
483
484bn_sqr_words_unroll2
485 FLDD 0(a_ptr),t_float_0 ; a[0]
486 FLDD 8(a_ptr),t_float_1 ; a[1]
487 XMPYU fht_0,flt_0,fm ; m[0]
488 XMPYU fht_1,flt_1,fm_1 ; m[1]
489
490 FSTD fm,-24(%sp) ; store m[0]
491 FSTD fm_1,-56(%sp) ; store m[1]
492 XMPYU flt_0,flt_0,lt_temp ; lt[0]
493 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
494
495 FSTD lt_temp,-16(%sp) ; store lt[0]
496 FSTD lt_temp_1,-48(%sp) ; store lt[1]
497 XMPYU fht_0,fht_0,ht_temp ; ht[0]
498 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
499
500 FSTD ht_temp,-8(%sp) ; store ht[0]
501 FSTD ht_temp_1,-40(%sp) ; store ht[1]
502 LDD -24(%sp),m_0
503 LDD -56(%sp),m_1
504
505 AND m_0,high_mask,tmp_0 ; m[0] & Mask
506 AND m_1,high_mask,tmp_1 ; m[1] & Mask
507 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
508 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
509
510 LDD -16(%sp),lt_0
511 LDD -48(%sp),lt_1
512 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
513 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
514
515 LDD -8(%sp),ht_0
516 LDD -40(%sp),ht_1
517 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
518 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
519
520 ADD lt_0,m_0,lt_0 ; lt = lt+m
521 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
522 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
523 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
524
525 ADD lt_1,m_1,lt_1 ; lt = lt+m
526 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
527 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
528 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
529
530 LDO -2(num),num ; num = num - 2;
531 LDO 16(a_ptr),a_ptr ; ap += 2
532 CMPIB,<= 2,num,bn_sqr_words_unroll2
533 LDO 32(r_ptr),r_ptr ; rp += 4
534
535 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
536
537 ;
538 ; Top of loop aligned on 64-byte boundary
539 ;
540bn_sqr_words_single_top
541 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
542
543 XMPYU fht_0,flt_0,fm ; m
544 FSTD fm,-24(%sp) ; store m
545
546 XMPYU flt_0,flt_0,lt_temp ; lt
547 FSTD lt_temp,-16(%sp) ; store lt
548
549 XMPYU fht_0,fht_0,ht_temp ; ht
550 FSTD ht_temp,-8(%sp) ; store ht
551
552 LDD -24(%sp),m_0 ; load m
553 AND m_0,high_mask,tmp_0 ; m & Mask
554 DEPD,Z m_0,30,31,m_0 ; m << 32+1
555 LDD -16(%sp),lt_0 ; lt
556
557 LDD -8(%sp),ht_0 ; ht
558 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
559 ADD m_0,lt_0,lt_0 ; lt = lt+m
560 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
561 ADD,DC ht_0,%r0,ht_0 ; ht++
562
563 STD lt_0,0(r_ptr) ; rp[0] = lt
564 STD ht_0,8(r_ptr) ; rp[1] = ht
565
566bn_sqr_words_exit
567 .EXIT
568 LDD -112(%sp),%r5 ; restore r5
569 LDD -120(%sp),%r4 ; restore r4
570 BVE (%rp)
571 LDD,MB -128(%sp),%r3
572 .PROCEND ;in=23,24,25,26,29;out=28;
573
574
575;----------------------------------------------------------------------------
576;
577;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
578;
579; arg0 = rp
580; arg1 = ap
581; arg2 = bp
582; arg3 = n
583
584t .reg %r22
585b .reg %r21
586l .reg %r20
587
588bn_add_words
589 .proc
590 .entry
591 .callinfo
592 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
593 .align 64
594
595 CMPIB,>= 0,n,bn_add_words_exit
596 COPY %r0,%ret1 ; return 0 by default
597
598 ;
599 ; If 2 or more numbers do the loop
600 ;
601 CMPIB,= 1,n,bn_add_words_single_top
602 NOP
603
604 ;
605 ; This loop is unrolled 2 times (64-byte aligned as well)
606 ;
607bn_add_words_unroll2
608 LDD 0(a_ptr),t
609 LDD 0(b_ptr),b
610 ADD t,%ret1,t ; t = t+c;
611 ADD,DC %r0,%r0,%ret1 ; set c to carry
612 ADD t,b,l ; l = t + b[0]
613 ADD,DC %ret1,%r0,%ret1 ; c+= carry
614 STD l,0(r_ptr)
615
616 LDD 8(a_ptr),t
617 LDD 8(b_ptr),b
618 ADD t,%ret1,t ; t = t+c;
619 ADD,DC %r0,%r0,%ret1 ; set c to carry
620 ADD t,b,l ; l = t + b[0]
621 ADD,DC %ret1,%r0,%ret1 ; c+= carry
622 STD l,8(r_ptr)
623
624 LDO -2(n),n
625 LDO 16(a_ptr),a_ptr
626 LDO 16(b_ptr),b_ptr
627
628 CMPIB,<= 2,n,bn_add_words_unroll2
629 LDO 16(r_ptr),r_ptr
630
631 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
632
633bn_add_words_single_top
634 LDD 0(a_ptr),t
635 LDD 0(b_ptr),b
636
637 ADD t,%ret1,t ; t = t+c;
638 ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??)
639 ADD t,b,l ; l = t + b[0]
640 ADD,DC %ret1,%r0,%ret1 ; c+= carry
641 STD l,0(r_ptr)
642
643bn_add_words_exit
644 .EXIT
645 BVE (%rp)
646 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
647 .PROCEND ;in=23,24,25,26,29;out=28;
648
649;----------------------------------------------------------------------------
650;
651;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
652;
653; arg0 = rp
654; arg1 = ap
655; arg2 = bp
656; arg3 = n
657
658t1 .reg %r22
659t2 .reg %r21
660sub_tmp1 .reg %r20
661sub_tmp2 .reg %r19
662
663
664bn_sub_words
665 .proc
666 .callinfo
667 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
668 .entry
669 .align 64
670
671 CMPIB,>= 0,n,bn_sub_words_exit
672 COPY %r0,%ret1 ; return 0 by default
673
674 ;
675 ; If 2 or more numbers do the loop
676 ;
677 CMPIB,= 1,n,bn_sub_words_single_top
678 NOP
679
680 ;
681 ; This loop is unrolled 2 times (64-byte aligned as well)
682 ;
683bn_sub_words_unroll2
684 LDD 0(a_ptr),t1
685 LDD 0(b_ptr),t2
686 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
687 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
688
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret1
694 STD sub_tmp1,0(r_ptr)
695
696 LDD 8(a_ptr),t1
697 LDD 8(b_ptr),t2
698 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
699 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
700 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
701 LDO 1(%r0),sub_tmp2
702
703 CMPCLR,*= t1,t2,%r0
704 COPY sub_tmp2,%ret1
705 STD sub_tmp1,8(r_ptr)
706
707 LDO -2(n),n
708 LDO 16(a_ptr),a_ptr
709 LDO 16(b_ptr),b_ptr
710
711 CMPIB,<= 2,n,bn_sub_words_unroll2
712 LDO 16(r_ptr),r_ptr
713
714 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
715
716bn_sub_words_single_top
717 LDD 0(a_ptr),t1
718 LDD 0(b_ptr),t2
719 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
720 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
721 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
722 LDO 1(%r0),sub_tmp2
723
724 CMPCLR,*= t1,t2,%r0
725 COPY sub_tmp2,%ret1
726
727 STD sub_tmp1,0(r_ptr)
728
729bn_sub_words_exit
730 .EXIT
731 BVE (%rp)
732 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
733 .PROCEND ;in=23,24,25,26,29;out=28;
734
735;------------------------------------------------------------------------------
736;
737; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
738;
739; arg0 = h
740; arg1 = l
741; arg2 = d
742;
743; This is mainly just output from the HP C compiler.
744;
745;------------------------------------------------------------------------------
746bn_div_words
205 .PROC 747 .PROC
206 .CALLINFO FRAME=0,NO_CALLS 748 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
207 .ENTRY 749 .IMPORT BN_num_bits_word,CODE
208 ldo 28(%r26),%r19 750 .IMPORT __iob,DATA
209 ldo 12(%r25),%r28 751 .IMPORT fprintf,CODE
210L$0042 752 .IMPORT abort,CODE
211 fldws 0(0,%r25),%fr8L 753 .IMPORT $$div2U,MILLICODE
212 fldws 0(0,%r25),%fr8R 754 .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
213 xmpyu %fr8L,%fr8R,%fr8 755 .ENTRY
214 fstds %fr8,-16(0,%r30) 756 STW %r2,-20(%r30) ;offset 0x8ec
215 ldw -16(0,%r30),%r22 757 STW,MA %r3,192(%r30) ;offset 0x8f0
216 ldw -12(0,%r30),%r23 758 STW %r4,-188(%r30) ;offset 0x8f4
217 stw %r23,0(0,%r26) 759 DEPD %r5,31,32,%r6 ;offset 0x8f8
218 copy %r22,%r21 760 STD %r6,-184(%r30) ;offset 0x8fc
219 ldi 0,%r20 761 DEPD %r7,31,32,%r8 ;offset 0x900
220 addib,= -1,%r24,L$0049 762 STD %r8,-176(%r30) ;offset 0x904
221 stw %r21,-24(0,%r19) 763 STW %r9,-168(%r30) ;offset 0x908
222 fldws -8(0,%r28),%fr8L 764 LDD -248(%r30),%r3 ;offset 0x90c
223 fldws -8(0,%r28),%fr8R 765 COPY %r26,%r4 ;offset 0x910
224 xmpyu %fr8L,%fr8R,%fr8 766 COPY %r24,%r5 ;offset 0x914
225 fstds %fr8,-16(0,%r30) 767 DEPD %r25,31,32,%r4 ;offset 0x918
226 ldw -16(0,%r30),%r22 768 CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c
227 ldw -12(0,%r30),%r23 769 DEPD %r23,31,32,%r5 ;offset 0x920
228 stw %r23,-20(0,%r19) 770 MOVIB,TR -1,%r29,$00060002 ;offset 0x924
229 copy %r22,%r21 771 EXTRD,U %r29,31,32,%r28 ;offset 0x928
230 ldi 0,%r20 772$0006002A
231 addib,= -1,%r24,L$0049 773 LDO -1(%r29),%r29 ;offset 0x92c
232 stw %r21,-16(0,%r19) 774 SUB %r23,%r7,%r23 ;offset 0x930
233 fldws -4(0,%r28),%fr8L 775$00060024
234 fldws -4(0,%r28),%fr8R 776 SUB %r4,%r31,%r25 ;offset 0x934
235 xmpyu %fr8L,%fr8R,%fr8 777 AND %r25,%r19,%r26 ;offset 0x938
236 fstds %fr8,-16(0,%r30) 778 CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c
237 ldw -16(0,%r30),%r22 779 DEPD,Z %r25,31,32,%r20 ;offset 0x940
238 ldw -12(0,%r30),%r23 780 OR %r20,%r24,%r21 ;offset 0x944
239 stw %r23,-12(0,%r19) 781 CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948
240 copy %r22,%r21 782 SUB %r31,%r2,%r31 ;offset 0x94c
241 ldi 0,%r20 783$00060046
242 addib,= -1,%r24,L$0049 784$0006002E
243 stw %r21,-8(0,%r19) 785 DEPD,Z %r23,31,32,%r25 ;offset 0x950
244 fldws 0(0,%r28),%fr8L 786 EXTRD,U %r23,31,32,%r26 ;offset 0x954
245 fldws 0(0,%r28),%fr8R 787 AND %r25,%r19,%r24 ;offset 0x958
246 xmpyu %fr8L,%fr8R,%fr8 788 ADD,L %r31,%r26,%r31 ;offset 0x95c
247 fstds %fr8,-16(0,%r30) 789 CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960
248 ldw -16(0,%r30),%r22 790 LDO 1(%r31),%r31 ;offset 0x964
249 ldw -12(0,%r30),%r23 791$00060032
250 stw %r23,-4(0,%r19) 792 CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968
251 copy %r22,%r21 793 LDO -1(%r29),%r29 ;offset 0x96c
252 ldi 0,%r20 794 ADD,L %r4,%r3,%r4 ;offset 0x970
253 addib,= -1,%r24,L$0049 795$00060036
254 stw %r21,0(0,%r19) 796 ADDIB,=,N -1,%r8,$D0 ;offset 0x974
255 ldo 16(%r28),%r28 797 SUB %r5,%r24,%r28 ;offset 0x978
256 ldo 16(%r25),%r25 798$0006003A
257 ldo 32(%r19),%r19 799 SUB %r4,%r31,%r24 ;offset 0x97c
258 bl L$0042,0 800 SHRPD %r24,%r28,32,%r4 ;offset 0x980
259 ldo 32(%r26),%r26 801 DEPD,Z %r29,31,32,%r9 ;offset 0x984
260L$0049 802 DEPD,Z %r28,31,32,%r5 ;offset 0x988
261 bv,n 0(%r2) 803$0006001C
262 .EXIT 804 EXTRD,U %r4,31,32,%r31 ;offset 0x98c
263 .PROCEND 805 CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990
264 .IMPORT BN_num_bits_word,CODE 806 MOVB,TR %r6,%r29,$D1 ;offset 0x994
265 .IMPORT fprintf,CODE 807 STD %r29,-152(%r30) ;offset 0x998
266 .IMPORT __iob,DATA 808$0006000C
267 .SPACE $TEXT$ 809 EXTRD,U %r3,31,32,%r25 ;offset 0x99c
268 .SUBSPA $LIT$ 810 COPY %r3,%r26 ;offset 0x9a0
269 811 EXTRD,U %r3,31,32,%r9 ;offset 0x9a4
270 .align 4 812 EXTRD,U %r4,31,32,%r8 ;offset 0x9a8
271L$C0000 813 .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28;
272 .STRING "Division would overflow (%d)\x0a\x00" 814 B,L BN_num_bits_word,%r2 ;offset 0x9ac
273 .IMPORT abort,CODE 815 EXTRD,U %r5,31,32,%r7 ;offset 0x9b0
274 .SPACE $TEXT$ 816 LDI 64,%r20 ;offset 0x9b4
275 .SUBSPA $CODE$ 817 DEPD %r7,31,32,%r5 ;offset 0x9b8
276 818 DEPD %r8,31,32,%r4 ;offset 0x9bc
277 .align 4 819 DEPD %r9,31,32,%r3 ;offset 0x9c0
278 .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR 820 CMPB,= %r28,%r20,$00060012 ;offset 0x9c4
279bn_div64 821 COPY %r28,%r24 ;offset 0x9c8
822 MTSARCM %r24 ;offset 0x9cc
823 DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0
824 CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4
825$00060012
826 SUBI 64,%r24,%r31 ;offset 0x9d8
827 CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc
828 SUB %r4,%r3,%r4 ;offset 0x9e0
829$00060016
830 CMPB,= %r31,%r0,$0006001A ;offset 0x9e4
831 COPY %r0,%r9 ;offset 0x9e8
832 MTSARCM %r31 ;offset 0x9ec
833 DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0
834 SUBI 64,%r31,%r26 ;offset 0x9f4
835 MTSAR %r26 ;offset 0x9f8
836 SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc
837 MTSARCM %r31 ;offset 0xa00
838 DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04
839$0006001A
840 DEPDI,Z -1,31,32,%r19 ;offset 0xa08
841 AND %r3,%r19,%r29 ;offset 0xa0c
842 EXTRD,U %r29,31,32,%r2 ;offset 0xa10
843 DEPDI,Z -1,63,32,%r6 ;offset 0xa14
844 MOVIB,TR 2,%r8,$0006001C ;offset 0xa18
845 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c
846$D2
847 ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20
848 LDIL LR'C$7,%r21 ;offset 0xa24
849 LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28
850 .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28;
851 B,L fprintf,%r2 ;offset 0xa2c
852 LDO RR'C$7(%r21),%r25 ;offset 0xa30
853 .CALL ;
854 B,L abort,%r2 ;offset 0xa34
855 NOP ;offset 0xa38
856 B $D3 ;offset 0xa3c
857 LDW -212(%r30),%r2 ;offset 0xa40
858$00060020
859 COPY %r4,%r26 ;offset 0xa44
860 EXTRD,U %r4,31,32,%r25 ;offset 0xa48
861 COPY %r2,%r24 ;offset 0xa4c
862 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
863 B,L $$div2U,%r31 ;offset 0xa50
864 EXTRD,U %r2,31,32,%r23 ;offset 0xa54
865 DEPD %r28,31,32,%r29 ;offset 0xa58
866$00060022
867 STD %r29,-152(%r30) ;offset 0xa5c
868$D1
869 AND %r5,%r19,%r24 ;offset 0xa60
870 EXTRD,U %r24,31,32,%r24 ;offset 0xa64
871 STW %r2,-160(%r30) ;offset 0xa68
872 STW %r7,-128(%r30) ;offset 0xa6c
873 FLDD -152(%r30),%fr4 ;offset 0xa70
874 FLDD -152(%r30),%fr7 ;offset 0xa74
875 FLDW -160(%r30),%fr8L ;offset 0xa78
876 FLDW -128(%r30),%fr5L ;offset 0xa7c
877 XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80
878 FSTD %fr10,-136(%r30) ;offset 0xa84
879 XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88
880 FSTD %fr22,-144(%r30) ;offset 0xa8c
881 XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90
882 XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94
883 FSTD %fr11,-112(%r30) ;offset 0xa98
884 FSTD %fr23,-120(%r30) ;offset 0xa9c
885 LDD -136(%r30),%r28 ;offset 0xaa0
886 DEPD,Z %r28,31,32,%r31 ;offset 0xaa4
887 LDD -144(%r30),%r20 ;offset 0xaa8
888 ADD,L %r20,%r31,%r31 ;offset 0xaac
889 LDD -112(%r30),%r22 ;offset 0xab0
890 DEPD,Z %r22,31,32,%r22 ;offset 0xab4
891 LDD -120(%r30),%r21 ;offset 0xab8
892 B $00060024 ;offset 0xabc
893 ADD,L %r21,%r22,%r23 ;offset 0xac0
894$D0
895 OR %r9,%r29,%r29 ;offset 0xac4
896$00060040
897 EXTRD,U %r29,31,32,%r28 ;offset 0xac8
898$00060002
899$L2
900 LDW -212(%r30),%r2 ;offset 0xacc
901$D3
902 LDW -168(%r30),%r9 ;offset 0xad0
903 LDD -176(%r30),%r8 ;offset 0xad4
904 EXTRD,U %r8,31,32,%r7 ;offset 0xad8
905 LDD -184(%r30),%r6 ;offset 0xadc
906 EXTRD,U %r6,31,32,%r5 ;offset 0xae0
907 LDW -188(%r30),%r4 ;offset 0xae4
908 BVE (%r2) ;offset 0xae8
909 .EXIT
910 LDW,MB -192(%r30),%r3 ;offset 0xaec
911 .PROCEND ;in=23,25;out=28,29;fpin=105,107;
912
913
914
915
916;----------------------------------------------------------------------------
917;
918; Registers to hold 64-bit values to manipulate. The "L" part
919; of the register corresponds to the upper 32-bits, while the "R"
920; part corresponds to the lower 32-bits
921;
922; Note, that when using b6 and b7, the code must save these before
923; using them because they are callee save registers
924;
925;
926; Floating point registers to use to save values that
927; are manipulated. These don't collide with ftemp1-6 and
928; are all caller save registers
929;
930a0 .reg %fr22
931a0L .reg %fr22L
932a0R .reg %fr22R
933
934a1 .reg %fr23
935a1L .reg %fr23L
936a1R .reg %fr23R
937
938a2 .reg %fr24
939a2L .reg %fr24L
940a2R .reg %fr24R
941
942a3 .reg %fr25
943a3L .reg %fr25L
944a3R .reg %fr25R
945
946a4 .reg %fr26
947a4L .reg %fr26L
948a4R .reg %fr26R
949
950a5 .reg %fr27
951a5L .reg %fr27L
952a5R .reg %fr27R
953
954a6 .reg %fr28
955a6L .reg %fr28L
956a6R .reg %fr28R
957
958a7 .reg %fr29
959a7L .reg %fr29L
960a7R .reg %fr29R
961
962b0 .reg %fr30
963b0L .reg %fr30L
964b0R .reg %fr30R
965
966b1 .reg %fr31
967b1L .reg %fr31L
968b1R .reg %fr31R
969
970;
971; Temporary floating point variables, these are all caller save
972; registers
973;
974ftemp1 .reg %fr4
975ftemp2 .reg %fr5
976ftemp3 .reg %fr6
977ftemp4 .reg %fr7
978
979;
980; The B set of registers when used.
981;
982
983b2 .reg %fr8
984b2L .reg %fr8L
985b2R .reg %fr8R
986
987b3 .reg %fr9
988b3L .reg %fr9L
989b3R .reg %fr9R
990
991b4 .reg %fr10
992b4L .reg %fr10L
993b4R .reg %fr10R
994
995b5 .reg %fr11
996b5L .reg %fr11L
997b5R .reg %fr11R
998
999b6 .reg %fr12
1000b6L .reg %fr12L
1001b6R .reg %fr12R
1002
1003b7 .reg %fr13
1004b7L .reg %fr13L
1005b7R .reg %fr13R
1006
1007c1 .reg %r21 ; only reg
1008temp1 .reg %r20 ; only reg
1009temp2 .reg %r19 ; only reg
1010temp3 .reg %r31 ; only reg
1011
1012m1 .reg %r28
1013c2 .reg %r23
1014high_one .reg %r1
1015ht .reg %r6
1016lt .reg %r5
1017m .reg %r4
1018c3 .reg %r3
1019
1020SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1021 XMPYU A0L,A0R,ftemp1 ; m
1022 FSTD ftemp1,-24(%sp) ; store m
1023
1024 XMPYU A0R,A0R,ftemp2 ; lt
1025 FSTD ftemp2,-16(%sp) ; store lt
1026
1027 XMPYU A0L,A0L,ftemp3 ; ht
1028 FSTD ftemp3,-8(%sp) ; store ht
1029
1030 LDD -24(%sp),m ; load m
1031 AND m,high_mask,temp2 ; m & Mask
1032 DEPD,Z m,30,31,temp3 ; m << 32+1
1033 LDD -16(%sp),lt ; lt
1034
1035 LDD -8(%sp),ht ; ht
1036 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1037 ADD temp3,lt,lt ; lt = lt+m
1038 ADD,L ht,temp1,ht ; ht += temp1
1039 ADD,DC ht,%r0,ht ; ht++
1040
1041 ADD C1,lt,C1 ; c1=c1+lt
1042 ADD,DC ht,%r0,ht ; ht++
1043
1044 ADD C2,ht,C2 ; c2=c2+ht
1045 ADD,DC C3,%r0,C3 ; c3++
1046.endm
1047
1048SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1049 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1050 FSTD ftemp1,-16(%sp) ;
1051 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1052 FSTD ftemp2,-8(%sp) ;
1053 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1054 FSTD ftemp3,-32(%sp)
1055 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1056 FSTD ftemp4,-24(%sp) ;
1057
1058 LDD -8(%sp),m ; r21 = m
1059 LDD -16(%sp),m1 ; r19 = m1
1060 ADD,L m,m1,m ; m+m1
1061
1062 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1063 LDD -24(%sp),ht ; r24 = ht
1064
1065 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1066 ADD,L ht,high_one,ht ; ht+=high_one
1067
1068 EXTRD,U m,31,32,temp1 ; m >> 32
1069 LDD -32(%sp),lt ; lt
1070 ADD,L ht,temp1,ht ; ht+= m>>32
1071 ADD lt,temp3,lt ; lt = lt+m1
1072 ADD,DC ht,%r0,ht ; ht++
1073
1074 ADD ht,ht,ht ; ht=ht+ht;
1075 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1076
1077 ADD lt,lt,lt ; lt=lt+lt;
1078 ADD,DC ht,%r0,ht ; add in carry (ht++)
1079
1080 ADD C1,lt,C1 ; c1=c1+lt
1081 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1082 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1083
1084 ADD C2,ht,C2 ; c2 = c2 + ht
1085 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1086.endm
1087
1088;
1089;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1090; arg0 = r_ptr
1091; arg1 = a_ptr
1092;
1093
1094bn_sqr_comba8
280 .PROC 1095 .PROC
281 .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 1096 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
282 .ENTRY 1097 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
283 stw %r2,-20(0,%r30) 1098 .ENTRY
284 stwm %r8,128(0,%r30) 1099 .align 64
285 stw %r7,-124(0,%r30) 1100
286 stw %r4,-112(0,%r30) 1101 STD %r3,0(%sp) ; save r3
287 stw %r3,-108(0,%r30) 1102 STD %r4,8(%sp) ; save r4
288 copy %r26,%r3 1103 STD %r5,16(%sp) ; save r5
289 copy %r25,%r4 1104 STD %r6,24(%sp) ; save r6
290 stw %r6,-120(0,%r30) 1105
291 ldi 0,%r7 1106 ;
292 stw %r5,-116(0,%r30) 1107 ; Zero out carries
293 movb,<> %r24,%r5,L$0051 1108 ;
294 ldi 2,%r6 1109 COPY %r0,c1
295 bl L$0068,0 1110 COPY %r0,c2
296 ldi -1,%r28 1111 COPY %r0,c3
297L$0051 1112
298 .CALL ARGW0=GR 1113 LDO 128(%sp),%sp ; bump stack
299 bl BN_num_bits_word,%r2 1114 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
300 copy %r5,%r26 1115 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
301 copy %r28,%r24 1116
302 ldi 32,%r19 1117 ;
303 comb,= %r19,%r24,L$0052 1118 ; Load up all of the values we are going to use
304 subi 31,%r24,%r19 1119 ;
305 mtsar %r19 1120 FLDD 0(a_ptr),a0
306 zvdepi 1,32,%r19 1121 FLDD 8(a_ptr),a1
307 comb,>>= %r19,%r3,L$0052 1122 FLDD 16(a_ptr),a2
308 addil LR'__iob-$global$+32,%r27 1123 FLDD 24(a_ptr),a3
309 ldo RR'__iob-$global$+32(%r1),%r26 1124 FLDD 32(a_ptr),a4
310 ldil LR'L$C0000,%r25 1125 FLDD 40(a_ptr),a5
311 .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR 1126 FLDD 48(a_ptr),a6
312 bl fprintf,%r2 1127 FLDD 56(a_ptr),a7
313 ldo RR'L$C0000(%r25),%r25 1128
314 .CALL 1129 SQR_ADD_C a0L,a0R,c1,c2,c3
315 bl abort,%r2 1130 STD c1,0(r_ptr) ; r[0] = c1;
316 nop 1131 COPY %r0,c1
317L$0052 1132
318 comb,>> %r5,%r3,L$0053 1133 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
319 subi 32,%r24,%r24 1134 STD c2,8(r_ptr) ; r[1] = c2;
320 sub %r3,%r5,%r3 1135 COPY %r0,c2
321L$0053 1136
322 comib,= 0,%r24,L$0054 1137 SQR_ADD_C a1L,a1R,c3,c1,c2
323 subi 31,%r24,%r19 1138 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
324 mtsar %r19 1139 STD c3,16(r_ptr) ; r[2] = c3;
325 zvdep %r5,32,%r5 1140 COPY %r0,c3
326 zvdep %r3,32,%r21 1141
327 subi 32,%r24,%r20 1142 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
328 mtsar %r20 1143 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
329 vshd 0,%r4,%r20 1144 STD c1,24(r_ptr) ; r[3] = c1;
330 or %r21,%r20,%r3 1145 COPY %r0,c1
331 mtsar %r19 1146
332 zvdep %r4,32,%r4 1147 SQR_ADD_C a2L,a2R,c2,c3,c1
333L$0054 1148 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
334 extru %r5,15,16,%r23 1149 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
335 extru %r5,31,16,%r28 1150 STD c2,32(r_ptr) ; r[4] = c2;
336L$0055 1151 COPY %r0,c2
337 extru %r3,15,16,%r19 1152
338 comb,<> %r23,%r19,L$0058 1153 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
339 copy %r3,%r26 1154 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
340 bl L$0059,0 1155 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
341 zdepi -1,31,16,%r29 1156 STD c3,40(r_ptr) ; r[5] = c3;
342L$0058 1157 COPY %r0,c3
343 .IMPORT $$divU,MILLICODE 1158
344 bl $$divU,%r31 1159 SQR_ADD_C a3L,a3R,c1,c2,c3
345 copy %r23,%r25 1160 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
346L$0059 1161 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
347 stw %r29,-16(0,%r30) 1162 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
348 fldws -16(0,%r30),%fr10L 1163 STD c1,48(r_ptr) ; r[6] = c1;
349 stw %r28,-16(0,%r30) 1164 COPY %r0,c1
350 fldws -16(0,%r30),%fr10R 1165
351 stw %r23,-16(0,%r30) 1166 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
352 xmpyu %fr10L,%fr10R,%fr8 1167 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
353 fldws -16(0,%r30),%fr10R 1168 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
354 fstws %fr8R,-16(0,%r30) 1169 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
355 xmpyu %fr10L,%fr10R,%fr9 1170 STD c2,56(r_ptr) ; r[7] = c2;
356 ldw -16(0,%r30),%r8 1171 COPY %r0,c2
357 fstws %fr9R,-16(0,%r30) 1172
358 copy %r8,%r22 1173 SQR_ADD_C a4L,a4R,c3,c1,c2
359 ldw -16(0,%r30),%r8 1174 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
360 extru %r4,15,16,%r24 1175 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
361 copy %r8,%r21 1176 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
362L$0060 1177 STD c3,64(r_ptr) ; r[8] = c3;
363 sub %r3,%r21,%r20 1178 COPY %r0,c3
364 copy %r20,%r19 1179
365 depi 0,31,16,%r19 1180 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
366 comib,<> 0,%r19,L$0061 1181 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
367 zdep %r20,15,16,%r19 1182 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
368 addl %r19,%r24,%r19 1183 STD c1,72(r_ptr) ; r[9] = c1;
369 comb,>>= %r19,%r22,L$0061 1184 COPY %r0,c1
370 sub %r22,%r28,%r22 1185
371 sub %r21,%r23,%r21 1186 SQR_ADD_C a5L,a5R,c2,c3,c1
372 bl L$0060,0 1187 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
373 ldo -1(%r29),%r29 1188 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
374L$0061 1189 STD c2,80(r_ptr) ; r[10] = c2;
375 stw %r29,-16(0,%r30) 1190 COPY %r0,c2
376 fldws -16(0,%r30),%fr10L 1191
377 stw %r28,-16(0,%r30) 1192 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
378 fldws -16(0,%r30),%fr10R 1193 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
379 xmpyu %fr10L,%fr10R,%fr8 1194 STD c3,88(r_ptr) ; r[11] = c3;
380 fstws %fr8R,-16(0,%r30) 1195 COPY %r0,c3
381 ldw -16(0,%r30),%r8 1196
382 stw %r23,-16(0,%r30) 1197 SQR_ADD_C a6L,a6R,c1,c2,c3
383 fldws -16(0,%r30),%fr10R 1198 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
384 copy %r8,%r19 1199 STD c1,96(r_ptr) ; r[12] = c1;
385 xmpyu %fr10L,%fr10R,%fr8 1200 COPY %r0,c1
386 fstws %fr8R,-16(0,%r30) 1201
387 extru %r19,15,16,%r20 1202 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
388 ldw -16(0,%r30),%r8 1203 STD c2,104(r_ptr) ; r[13] = c2;
389 zdep %r19,15,16,%r19 1204 COPY %r0,c2
390 addl %r8,%r20,%r20 1205
391 comclr,<<= %r19,%r4,0 1206 SQR_ADD_C a7L,a7R,c3,c1,c2
392 addi 1,%r20,%r20 1207 STD c3, 112(r_ptr) ; r[14] = c3
393 comb,<<= %r20,%r3,L$0066 1208 STD c1, 120(r_ptr) ; r[15] = c1
394 sub %r4,%r19,%r4 1209
395 addl %r3,%r5,%r3 1210 .EXIT
396 ldo -1(%r29),%r29 1211 LDD -104(%sp),%r6 ; restore r6
397L$0066 1212 LDD -112(%sp),%r5 ; restore r5
398 addib,= -1,%r6,L$0056 1213 LDD -120(%sp),%r4 ; restore r4
399 sub %r3,%r20,%r3 1214 BVE (%rp)
400 zdep %r29,15,16,%r7 1215 LDD,MB -128(%sp),%r3
401 shd %r3,%r4,16,%r3 1216
402 bl L$0055,0 1217 .PROCEND
403 zdep %r4,15,16,%r4 1218
404L$0056 1219;-----------------------------------------------------------------------------
405 or %r7,%r29,%r28 1220;
406L$0068 1221;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
407 ldw -148(0,%r30),%r2 1222; arg0 = r_ptr
408 ldw -124(0,%r30),%r7 1223; arg1 = a_ptr
409 ldw -120(0,%r30),%r6 1224;
410 ldw -116(0,%r30),%r5 1225
411 ldw -112(0,%r30),%r4 1226bn_sqr_comba4
412 ldw -108(0,%r30),%r3 1227 .proc
413 bv 0(%r2) 1228 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
414 ldwm -128(0,%r30),%r8 1229 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
415 .EXIT 1230 .entry
416 .PROCEND 1231 .align 64
1232 STD %r3,0(%sp) ; save r3
1233 STD %r4,8(%sp) ; save r4
1234 STD %r5,16(%sp) ; save r5
1235 STD %r6,24(%sp) ; save r6
1236
1237 ;
1238 ; Zero out carries
1239 ;
1240 COPY %r0,c1
1241 COPY %r0,c2
1242 COPY %r0,c3
1243
1244 LDO 128(%sp),%sp ; bump stack
1245 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1246 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1247
1248 ;
1249 ; Load up all of the values we are going to use
1250 ;
1251 FLDD 0(a_ptr),a0
1252 FLDD 8(a_ptr),a1
1253 FLDD 16(a_ptr),a2
1254 FLDD 24(a_ptr),a3
1255 FLDD 32(a_ptr),a4
1256 FLDD 40(a_ptr),a5
1257 FLDD 48(a_ptr),a6
1258 FLDD 56(a_ptr),a7
1259
1260 SQR_ADD_C a0L,a0R,c1,c2,c3
1261
1262 STD c1,0(r_ptr) ; r[0] = c1;
1263 COPY %r0,c1
1264
1265 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1266
1267 STD c2,8(r_ptr) ; r[1] = c2;
1268 COPY %r0,c2
1269
1270 SQR_ADD_C a1L,a1R,c3,c1,c2
1271 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1272
1273 STD c3,16(r_ptr) ; r[2] = c3;
1274 COPY %r0,c3
1275
1276 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1277 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1278
1279 STD c1,24(r_ptr) ; r[3] = c1;
1280 COPY %r0,c1
1281
1282 SQR_ADD_C a2L,a2R,c2,c3,c1
1283 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1284
1285 STD c2,32(r_ptr) ; r[4] = c2;
1286 COPY %r0,c2
1287
1288 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1289 STD c3,40(r_ptr) ; r[5] = c3;
1290 COPY %r0,c3
1291
1292 SQR_ADD_C a3L,a3R,c1,c2,c3
1293 STD c1,48(r_ptr) ; r[6] = c1;
1294 STD c2,56(r_ptr) ; r[7] = c2;
1295
1296 .EXIT
1297 LDD -104(%sp),%r6 ; restore r6
1298 LDD -112(%sp),%r5 ; restore r5
1299 LDD -120(%sp),%r4 ; restore r4
1300 BVE (%rp)
1301 LDD,MB -128(%sp),%r3
1302
1303 .PROCEND
1304
1305
1306;---------------------------------------------------------------------------
1307
1308MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1309 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1310 FSTD ftemp1,-16(%sp) ;
1311 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1312 FSTD ftemp2,-8(%sp) ;
1313 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1314 FSTD ftemp3,-32(%sp)
1315 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1316 FSTD ftemp4,-24(%sp) ;
1317
1318 LDD -8(%sp),m ; r21 = m
1319 LDD -16(%sp),m1 ; r19 = m1
1320 ADD,L m,m1,m ; m+m1
1321
1322 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1323 LDD -24(%sp),ht ; r24 = ht
1324
1325 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1326 ADD,L ht,high_one,ht ; ht+=high_one
1327
1328 EXTRD,U m,31,32,temp1 ; m >> 32
1329 LDD -32(%sp),lt ; lt
1330 ADD,L ht,temp1,ht ; ht+= m>>32
1331 ADD lt,temp3,lt ; lt = lt+m1
1332 ADD,DC ht,%r0,ht ; ht++
1333
1334 ADD C1,lt,C1 ; c1=c1+lt
1335 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1336
1337 ADD C2,ht,C2 ; c2 = c2 + ht
1338 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1339.endm
1340
1341
1342;
1343;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1344; arg0 = r_ptr
1345; arg1 = a_ptr
1346; arg2 = b_ptr
1347;
1348
1349bn_mul_comba8
1350 .proc
1351 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1352 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1353 .entry
1354 .align 64
1355
1356 STD %r3,0(%sp) ; save r3
1357 STD %r4,8(%sp) ; save r4
1358 STD %r5,16(%sp) ; save r5
1359 STD %r6,24(%sp) ; save r6
1360 FSTD %fr12,32(%sp) ; save r6
1361 FSTD %fr13,40(%sp) ; save r7
1362
1363 ;
1364 ; Zero out carries
1365 ;
1366 COPY %r0,c1
1367 COPY %r0,c2
1368 COPY %r0,c3
1369
1370 LDO 128(%sp),%sp ; bump stack
1371 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1372
1373 ;
1374 ; Load up all of the values we are going to use
1375 ;
1376 FLDD 0(a_ptr),a0
1377 FLDD 8(a_ptr),a1
1378 FLDD 16(a_ptr),a2
1379 FLDD 24(a_ptr),a3
1380 FLDD 32(a_ptr),a4
1381 FLDD 40(a_ptr),a5
1382 FLDD 48(a_ptr),a6
1383 FLDD 56(a_ptr),a7
1384
1385 FLDD 0(b_ptr),b0
1386 FLDD 8(b_ptr),b1
1387 FLDD 16(b_ptr),b2
1388 FLDD 24(b_ptr),b3
1389 FLDD 32(b_ptr),b4
1390 FLDD 40(b_ptr),b5
1391 FLDD 48(b_ptr),b6
1392 FLDD 56(b_ptr),b7
1393
1394 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1395 STD c1,0(r_ptr)
1396 COPY %r0,c1
1397
1398 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1399 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1400 STD c2,8(r_ptr)
1401 COPY %r0,c2
1402
1403 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1404 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1405 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1406 STD c3,16(r_ptr)
1407 COPY %r0,c3
1408
1409 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1410 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1411 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1412 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1413 STD c1,24(r_ptr)
1414 COPY %r0,c1
1415
1416 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1417 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1418 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1419 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1420 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1421 STD c2,32(r_ptr)
1422 COPY %r0,c2
1423
1424 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1425 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1426 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1427 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1428 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1429 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1430 STD c3,40(r_ptr)
1431 COPY %r0,c3
1432
1433 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1434 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1435 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1436 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1437 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1438 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1439 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1440 STD c1,48(r_ptr)
1441 COPY %r0,c1
1442
1443 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1444 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1445 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1446 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1447 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1448 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1449 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1450 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1451 STD c2,56(r_ptr)
1452 COPY %r0,c2
1453
1454 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1455 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1456 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1457 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1458 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1459 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1460 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1461 STD c3,64(r_ptr)
1462 COPY %r0,c3
1463
1464 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1465 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1466 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1467 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1468 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1469 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1470 STD c1,72(r_ptr)
1471 COPY %r0,c1
1472
1473 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1474 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1475 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1476 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1477 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1478 STD c2,80(r_ptr)
1479 COPY %r0,c2
1480
1481 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1482 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1483 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1484 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1485 STD c3,88(r_ptr)
1486 COPY %r0,c3
1487
1488 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1489 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1490 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1491 STD c1,96(r_ptr)
1492 COPY %r0,c1
1493
1494 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1495 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1496 STD c2,104(r_ptr)
1497 COPY %r0,c2
1498
1499 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1500 STD c3,112(r_ptr)
1501 STD c1,120(r_ptr)
1502
1503 .EXIT
1504 FLDD -88(%sp),%fr13
1505 FLDD -96(%sp),%fr12
1506 LDD -104(%sp),%r6 ; restore r6
1507 LDD -112(%sp),%r5 ; restore r5
1508 LDD -120(%sp),%r4 ; restore r4
1509 BVE (%rp)
1510 LDD,MB -128(%sp),%r3
1511
1512 .PROCEND
1513
1514;-----------------------------------------------------------------------------
1515;
1516;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1517; arg0 = r_ptr
1518; arg1 = a_ptr
1519; arg2 = b_ptr
1520;
1521
1522bn_mul_comba4
1523 .proc
1524 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1525 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1526 .entry
1527 .align 64
1528
1529 STD %r3,0(%sp) ; save r3
1530 STD %r4,8(%sp) ; save r4
1531 STD %r5,16(%sp) ; save r5
1532 STD %r6,24(%sp) ; save r6
1533 FSTD %fr12,32(%sp) ; save r6
1534 FSTD %fr13,40(%sp) ; save r7
1535
1536 ;
1537 ; Zero out carries
1538 ;
1539 COPY %r0,c1
1540 COPY %r0,c2
1541 COPY %r0,c3
1542
1543 LDO 128(%sp),%sp ; bump stack
1544 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1545
1546 ;
1547 ; Load up all of the values we are going to use
1548 ;
1549 FLDD 0(a_ptr),a0
1550 FLDD 8(a_ptr),a1
1551 FLDD 16(a_ptr),a2
1552 FLDD 24(a_ptr),a3
1553
1554 FLDD 0(b_ptr),b0
1555 FLDD 8(b_ptr),b1
1556 FLDD 16(b_ptr),b2
1557 FLDD 24(b_ptr),b3
1558
1559 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1560 STD c1,0(r_ptr)
1561 COPY %r0,c1
1562
1563 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1564 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1565 STD c2,8(r_ptr)
1566 COPY %r0,c2
1567
1568 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1569 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1570 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1571 STD c3,16(r_ptr)
1572 COPY %r0,c3
1573
1574 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1575 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1576 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1577 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1578 STD c1,24(r_ptr)
1579 COPY %r0,c1
1580
1581 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1582 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1583 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1584 STD c2,32(r_ptr)
1585 COPY %r0,c2
1586
1587 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1588 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1589 STD c3,40(r_ptr)
1590 COPY %r0,c3
1591
1592 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1593 STD c1,48(r_ptr)
1594 STD c2,56(r_ptr)
1595
1596 .EXIT
1597 FLDD -88(%sp),%fr13
1598 FLDD -96(%sp),%fr12
1599 LDD -104(%sp),%r6 ; restore r6
1600 LDD -112(%sp),%r5 ; restore r5
1601 LDD -120(%sp),%r4 ; restore r4
1602 BVE (%rp)
1603 LDD,MB -128(%sp),%r3
1604
1605 .PROCEND
1606
1607
1608 .SPACE $TEXT$
1609 .SUBSPA $CODE$
1610 .SPACE $PRIVATE$,SORT=16
1611 .IMPORT $global$,DATA
1612 .SPACE $TEXT$
1613 .SUBSPA $CODE$
1614 .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16
1615C$7
1616 .ALIGN 8
1617 .STRINGZ "Division would overflow (%d)\n"
1618 .END
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
new file mode 100644
index 0000000000..54b6606252
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s
@@ -0,0 +1,1605 @@
1;
2; PA-RISC 64-bit implementation of bn_asm code
3;
4; This code is approximately 2x faster than the C version
5; for RSA/DSA.
6;
7; See http://devresource.hp.com/ for more details on the PA-RISC
8; architecture. Also see the book "PA-RISC 2.0 Architecture"
9; by Gerry Kane for information on the instruction set architecture.
10;
11; Code written by Chris Ruemmler (with some help from the HP C
12; compiler).
13;
14; The code compiles with HP's assembler
15;
16
17 .level 2.0W
18 .space $TEXT$
19 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
20
21;
22; Global Register definitions used for the routines.
23;
24; Some information about HP's runtime architecture for 64-bits.
25;
26; "Caller save" means the calling function must save the register
27; if it wants the register to be preserved.
28; "Callee save" means if a function uses the register, it must save
29; the value before using it.
30;
31; For the floating point registers
32;
33; "caller save" registers: fr4-fr11, fr22-fr31
34; "callee save" registers: fr12-fr21
35; "special" registers: fr0-fr3 (status and exception registers)
36;
37; For the integer registers
38; value zero : r0
39; "caller save" registers: r1,r19-r26
40; "callee save" registers: r3-r18
41; return register : r2 (rp)
42; return values ; r28 (ret0,ret1)
43; Stack pointer ; r30 (sp)
44; global data pointer ; r27 (dp)
45; argument pointer ; r29 (ap)
46; millicode return ptr ; r31 (also a caller save register)
47
48
49;
50; Arguments to the routines
51;
52r_ptr .reg %r26
53a_ptr .reg %r25
54b_ptr .reg %r24
55num .reg %r24
56w .reg %r23
57n .reg %r23
58
59
60;
61; Globals used in some routines
62;
63
64top_overflow .reg %r29
65high_mask .reg %r22 ; value 0xffffffff80000000L
66
67
68;------------------------------------------------------------------------------
69;
70; bn_mul_add_words
71;
72;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
73; int num, BN_ULONG w)
74;
75; arg0 = r_ptr
76; arg1 = a_ptr
77; arg2 = num
78; arg3 = w
79;
80; Local register definitions
81;
82
83fm1 .reg %fr22
84fm .reg %fr23
85ht_temp .reg %fr24
86ht_temp_1 .reg %fr25
87lt_temp .reg %fr26
88lt_temp_1 .reg %fr27
89fm1_1 .reg %fr28
90fm_1 .reg %fr29
91
92fw_h .reg %fr7L
93fw_l .reg %fr7R
94fw .reg %fr7
95
96fht_0 .reg %fr8L
97flt_0 .reg %fr8R
98t_float_0 .reg %fr8
99
100fht_1 .reg %fr9L
101flt_1 .reg %fr9R
102t_float_1 .reg %fr9
103
104tmp_0 .reg %r31
105tmp_1 .reg %r21
106m_0 .reg %r20
107m_1 .reg %r19
108ht_0 .reg %r1
109ht_1 .reg %r3
110lt_0 .reg %r4
111lt_1 .reg %r5
112m1_0 .reg %r6
113m1_1 .reg %r7
114rp_val .reg %r8
115rp_val_1 .reg %r9
116
117bn_mul_add_words
118 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
119 .proc
120 .callinfo frame=128
121 .entry
122 .align 64
123
124 STD %r3,0(%sp) ; save r3
125 STD %r4,8(%sp) ; save r4
126 NOP ; Needed to make the loop 16-byte aligned
127 NOP ; Needed to make the loop 16-byte aligned
128
129 STD %r5,16(%sp) ; save r5
130 STD %r6,24(%sp) ; save r6
131 STD %r7,32(%sp) ; save r7
132 STD %r8,40(%sp) ; save r8
133
134 STD %r9,48(%sp) ; save r9
135 COPY %r0,%ret0 ; return 0 by default
136 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
137 STD w,56(%sp) ; store w on stack
138
139 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
140 LDO 128(%sp),%sp ; bump stack
141
142 ;
143 ; The loop is unrolled twice, so if there is only 1 number
144 ; then go straight to the cleanup code.
145 ;
146 CMPIB,= 1,num,bn_mul_add_words_single_top
147 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
148
149 ;
150 ; This loop is unrolled 2 times (64-byte aligned as well)
151 ;
152 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
153 ; two 32-bit mutiplies can be issued per cycle.
154 ;
155bn_mul_add_words_unroll2
156
157 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
158 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
159 LDD 0(r_ptr),rp_val ; rp[0]
160 LDD 8(r_ptr),rp_val_1 ; rp[1]
161
162 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
163 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
164 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
165 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
166
167 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
168 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
169 FSTD fm,-8(%sp) ; -8(sp) = m[0]
170 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
171
172 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
173 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
174 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
175 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
176
177 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
178 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
179 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
180 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
181
182 LDD -8(%sp),m_0 ; m[0]
183 LDD -40(%sp),m_1 ; m[1]
184 LDD -16(%sp),m1_0 ; m1[0]
185 LDD -48(%sp),m1_1 ; m1[1]
186
187 LDD -24(%sp),ht_0 ; ht[0]
188 LDD -56(%sp),ht_1 ; ht[1]
189 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
190 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
191
192 LDD -32(%sp),lt_0
193 LDD -64(%sp),lt_1
194 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
195 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
196
197 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
198 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
199 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
200 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
201
202 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
203 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
204 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
205 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
206
207 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
208 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
209 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
210 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
211
212 ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c;
213 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
214 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
215 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
216
217 LDO -2(num),num ; num = num - 2;
218 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
219 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
220 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
221
222 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
223 ADD,DC ht_1,%r0,%ret0 ; ht[1]++
224 LDO 16(a_ptr),a_ptr ; a_ptr += 2
225
226 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
227 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
228 LDO 16(r_ptr),r_ptr ; r_ptr += 2
229
230 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
231
232 ;
233 ; Top of loop aligned on 64-byte boundary
234 ;
235bn_mul_add_words_single_top
236 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
237 LDD 0(r_ptr),rp_val ; rp[0]
238 LDO 8(a_ptr),a_ptr ; a_ptr++
239 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
240 FSTD fm1,-16(%sp) ; -16(sp) = m1
241 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
242 FSTD fm,-8(%sp) ; -8(sp) = m
243 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
244 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
245 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
246 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
247
248 LDD -8(%sp),m_0
249 LDD -16(%sp),m1_0 ; m1 = temp1
250 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
251 LDD -24(%sp),ht_0
252 LDD -32(%sp),lt_0
253
254 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
255 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
256
257 EXTRD,U tmp_0,31,32,m_0 ; m>>32
258 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
259
260 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
261 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
262 ADD,DC ht_0,%r0,ht_0 ; ht++
263 ADD %ret0,tmp_0,lt_0 ; lt = lt + c;
264 ADD,DC ht_0,%r0,ht_0 ; ht++
265 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
266 ADD,DC ht_0,%r0,%ret0 ; ht++
267 STD lt_0,0(r_ptr) ; rp[0] = lt
268
269bn_mul_add_words_exit
270 .EXIT
271 LDD -80(%sp),%r9 ; restore r9
272 LDD -88(%sp),%r8 ; restore r8
273 LDD -96(%sp),%r7 ; restore r7
274 LDD -104(%sp),%r6 ; restore r6
275 LDD -112(%sp),%r5 ; restore r5
276 LDD -120(%sp),%r4 ; restore r4
277 BVE (%rp)
278 LDD,MB -128(%sp),%r3 ; restore r3
279 .PROCEND ;in=23,24,25,26,29;out=28;
280
281;----------------------------------------------------------------------------
282;
283;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
284;
285; arg0 = rp
286; arg1 = ap
287; arg2 = num
288; arg3 = w
289
290bn_mul_words
291 .proc
292 .callinfo frame=128
293 .entry
294 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
295 .align 64
296
297 STD %r3,0(%sp) ; save r3
298 STD %r4,8(%sp) ; save r4
299 STD %r5,16(%sp) ; save r5
300 STD %r6,24(%sp) ; save r6
301
302 STD %r7,32(%sp) ; save r7
303 COPY %r0,%ret0 ; return 0 by default
304 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
305 STD w,56(%sp) ; w on stack
306
307 CMPIB,>= 0,num,bn_mul_words_exit
308 LDO 128(%sp),%sp ; bump stack
309
310 ;
311 ; See if only 1 word to do, thus just do cleanup
312 ;
313 CMPIB,= 1,num,bn_mul_words_single_top
314 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
315
316 ;
317 ; This loop is unrolled 2 times (64-byte aligned as well)
318 ;
319 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
320 ; two 32-bit mutiplies can be issued per cycle.
321 ;
322bn_mul_words_unroll2
323
324 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
325 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
326 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
327 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
328
329 FSTD fm1,-16(%sp) ; -16(sp) = m1
330 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
331 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
332 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
333
334 FSTD fm,-8(%sp) ; -8(sp) = m
335 FSTD fm_1,-40(%sp) ; -40(sp) = m
336 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
337 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
338
339 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
340 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
341 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
342 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
343
344 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
345 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
346 LDD -8(%sp),m_0
347 LDD -40(%sp),m_1
348
349 LDD -16(%sp),m1_0
350 LDD -48(%sp),m1_1
351 LDD -24(%sp),ht_0
352 LDD -56(%sp),ht_1
353
354 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
355 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
356 LDD -32(%sp),lt_0
357 LDD -64(%sp),lt_1
358
359 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
360 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
361 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
362 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
363
364 EXTRD,U tmp_0,31,32,m_0 ; m>>32
365 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
366 EXTRD,U tmp_1,31,32,m_1 ; m>>32
367 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
368
369 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
370 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
371 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
372 ADD,DC ht_0,%r0,ht_0 ; ht++
373
374 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
375 ADD,DC ht_1,%r0,ht_1 ; ht++
376 ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0);
377 ADD,DC ht_0,%r0,ht_0 ; ht++
378
379 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
380 ADD,DC ht_1,%r0,ht_1 ; ht++
381 STD lt_0,0(r_ptr) ; rp[0] = lt
382 STD lt_1,8(r_ptr) ; rp[1] = lt
383
384 COPY ht_1,%ret0 ; carry = ht
385 LDO -2(num),num ; num = num - 2;
386 LDO 16(a_ptr),a_ptr ; ap += 2
387 CMPIB,<= 2,num,bn_mul_words_unroll2
388 LDO 16(r_ptr),r_ptr ; rp++
389
390 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
391
392 ;
393 ; Top of loop aligned on 64-byte boundary
394 ;
395bn_mul_words_single_top
396 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
397
398 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
399 FSTD fm1,-16(%sp) ; -16(sp) = m1
400 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
401 FSTD fm,-8(%sp) ; -8(sp) = m
402 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
403 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
404 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
405 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
406
407 LDD -8(%sp),m_0
408 LDD -16(%sp),m1_0
409 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
410 LDD -24(%sp),ht_0
411 LDD -32(%sp),lt_0
412
413 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
414 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
415
416 EXTRD,U tmp_0,31,32,m_0 ; m>>32
417 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
418
419 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
420 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
421 ADD,DC ht_0,%r0,ht_0 ; ht++
422
423 ADD %ret0,lt_0,lt_0 ; lt = lt + c;
424 ADD,DC ht_0,%r0,ht_0 ; ht++
425
426 COPY ht_0,%ret0 ; copy carry
427 STD lt_0,0(r_ptr) ; rp[0] = lt
428
429bn_mul_words_exit
430 .EXIT
431 LDD -96(%sp),%r7 ; restore r7
432 LDD -104(%sp),%r6 ; restore r6
433 LDD -112(%sp),%r5 ; restore r5
434 LDD -120(%sp),%r4 ; restore r4
435 BVE (%rp)
436 LDD,MB -128(%sp),%r3 ; restore r3
437 .PROCEND ;in=23,24,25,26,29;out=28;
438
439;----------------------------------------------------------------------------
440;
441;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
442;
443; arg0 = rp
444; arg1 = ap
445; arg2 = num
446;
447
448bn_sqr_words
449 .proc
450 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
451 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
452 .entry
453 .align 64
454
455 STD %r3,0(%sp) ; save r3
456 STD %r4,8(%sp) ; save r4
457 NOP
458 STD %r5,16(%sp) ; save r5
459
460 CMPIB,>= 0,num,bn_sqr_words_exit
461 LDO 128(%sp),%sp ; bump stack
462
463 ;
464 ; If only 1, the goto straight to cleanup
465 ;
466 CMPIB,= 1,num,bn_sqr_words_single_top
467 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
468
469 ;
470 ; This loop is unrolled 2 times (64-byte aligned as well)
471 ;
472
473bn_sqr_words_unroll2
474 FLDD 0(a_ptr),t_float_0 ; a[0]
475 FLDD 8(a_ptr),t_float_1 ; a[1]
476 XMPYU fht_0,flt_0,fm ; m[0]
477 XMPYU fht_1,flt_1,fm_1 ; m[1]
478
479 FSTD fm,-24(%sp) ; store m[0]
480 FSTD fm_1,-56(%sp) ; store m[1]
481 XMPYU flt_0,flt_0,lt_temp ; lt[0]
482 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
483
484 FSTD lt_temp,-16(%sp) ; store lt[0]
485 FSTD lt_temp_1,-48(%sp) ; store lt[1]
486 XMPYU fht_0,fht_0,ht_temp ; ht[0]
487 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
488
489 FSTD ht_temp,-8(%sp) ; store ht[0]
490 FSTD ht_temp_1,-40(%sp) ; store ht[1]
491 LDD -24(%sp),m_0
492 LDD -56(%sp),m_1
493
494 AND m_0,high_mask,tmp_0 ; m[0] & Mask
495 AND m_1,high_mask,tmp_1 ; m[1] & Mask
496 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
497 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
498
499 LDD -16(%sp),lt_0
500 LDD -48(%sp),lt_1
501 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
502 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
503
504 LDD -8(%sp),ht_0
505 LDD -40(%sp),ht_1
506 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
507 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
508
509 ADD lt_0,m_0,lt_0 ; lt = lt+m
510 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
511 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
512 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
513
514 ADD lt_1,m_1,lt_1 ; lt = lt+m
515 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
516 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
517 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
518
519 LDO -2(num),num ; num = num - 2;
520 LDO 16(a_ptr),a_ptr ; ap += 2
521 CMPIB,<= 2,num,bn_sqr_words_unroll2
522 LDO 32(r_ptr),r_ptr ; rp += 4
523
524 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
525
526 ;
527 ; Top of loop aligned on 64-byte boundary
528 ;
529bn_sqr_words_single_top
530 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
531
532 XMPYU fht_0,flt_0,fm ; m
533 FSTD fm,-24(%sp) ; store m
534
535 XMPYU flt_0,flt_0,lt_temp ; lt
536 FSTD lt_temp,-16(%sp) ; store lt
537
538 XMPYU fht_0,fht_0,ht_temp ; ht
539 FSTD ht_temp,-8(%sp) ; store ht
540
541 LDD -24(%sp),m_0 ; load m
542 AND m_0,high_mask,tmp_0 ; m & Mask
543 DEPD,Z m_0,30,31,m_0 ; m << 32+1
544 LDD -16(%sp),lt_0 ; lt
545
546 LDD -8(%sp),ht_0 ; ht
547 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
548 ADD m_0,lt_0,lt_0 ; lt = lt+m
549 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
550 ADD,DC ht_0,%r0,ht_0 ; ht++
551
552 STD lt_0,0(r_ptr) ; rp[0] = lt
553 STD ht_0,8(r_ptr) ; rp[1] = ht
554
555bn_sqr_words_exit
556 .EXIT
557 LDD -112(%sp),%r5 ; restore r5
558 LDD -120(%sp),%r4 ; restore r4
559 BVE (%rp)
560 LDD,MB -128(%sp),%r3
561 .PROCEND ;in=23,24,25,26,29;out=28;
562
563
564;----------------------------------------------------------------------------
565;
566;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
567;
568; arg0 = rp
569; arg1 = ap
570; arg2 = bp
571; arg3 = n
572
573t .reg %r22
574b .reg %r21
575l .reg %r20
576
577bn_add_words
578 .proc
579 .entry
580 .callinfo
581 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
582 .align 64
583
584 CMPIB,>= 0,n,bn_add_words_exit
585 COPY %r0,%ret0 ; return 0 by default
586
587 ;
588 ; If 2 or more numbers do the loop
589 ;
590 CMPIB,= 1,n,bn_add_words_single_top
591 NOP
592
593 ;
594 ; This loop is unrolled 2 times (64-byte aligned as well)
595 ;
596bn_add_words_unroll2
597 LDD 0(a_ptr),t
598 LDD 0(b_ptr),b
599 ADD t,%ret0,t ; t = t+c;
600 ADD,DC %r0,%r0,%ret0 ; set c to carry
601 ADD t,b,l ; l = t + b[0]
602 ADD,DC %ret0,%r0,%ret0 ; c+= carry
603 STD l,0(r_ptr)
604
605 LDD 8(a_ptr),t
606 LDD 8(b_ptr),b
607 ADD t,%ret0,t ; t = t+c;
608 ADD,DC %r0,%r0,%ret0 ; set c to carry
609 ADD t,b,l ; l = t + b[0]
610 ADD,DC %ret0,%r0,%ret0 ; c+= carry
611 STD l,8(r_ptr)
612
613 LDO -2(n),n
614 LDO 16(a_ptr),a_ptr
615 LDO 16(b_ptr),b_ptr
616
617 CMPIB,<= 2,n,bn_add_words_unroll2
618 LDO 16(r_ptr),r_ptr
619
620 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
621
622bn_add_words_single_top
623 LDD 0(a_ptr),t
624 LDD 0(b_ptr),b
625
626 ADD t,%ret0,t ; t = t+c;
627 ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??)
628 ADD t,b,l ; l = t + b[0]
629 ADD,DC %ret0,%r0,%ret0 ; c+= carry
630 STD l,0(r_ptr)
631
632bn_add_words_exit
633 .EXIT
634 BVE (%rp)
635 NOP
636 .PROCEND ;in=23,24,25,26,29;out=28;
637
638;----------------------------------------------------------------------------
639;
640;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
641;
642; arg0 = rp
643; arg1 = ap
644; arg2 = bp
645; arg3 = n
646
647t1 .reg %r22
648t2 .reg %r21
649sub_tmp1 .reg %r20
650sub_tmp2 .reg %r19
651
652
653bn_sub_words
654 .proc
655 .callinfo
656 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
657 .entry
658 .align 64
659
660 CMPIB,>= 0,n,bn_sub_words_exit
661 COPY %r0,%ret0 ; return 0 by default
662
663 ;
664 ; If 2 or more numbers do the loop
665 ;
666 CMPIB,= 1,n,bn_sub_words_single_top
667 NOP
668
669 ;
670 ; This loop is unrolled 2 times (64-byte aligned as well)
671 ;
672bn_sub_words_unroll2
673 LDD 0(a_ptr),t1
674 LDD 0(b_ptr),t2
675 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
676 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
677
678 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
679 LDO 1(%r0),sub_tmp2
680
681 CMPCLR,*= t1,t2,%r0
682 COPY sub_tmp2,%ret0
683 STD sub_tmp1,0(r_ptr)
684
685 LDD 8(a_ptr),t1
686 LDD 8(b_ptr),t2
687 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
688 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret0
694 STD sub_tmp1,8(r_ptr)
695
696 LDO -2(n),n
697 LDO 16(a_ptr),a_ptr
698 LDO 16(b_ptr),b_ptr
699
700 CMPIB,<= 2,n,bn_sub_words_unroll2
701 LDO 16(r_ptr),r_ptr
702
703 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
704
705bn_sub_words_single_top
706 LDD 0(a_ptr),t1
707 LDD 0(b_ptr),t2
708 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
709 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
710 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
711 LDO 1(%r0),sub_tmp2
712
713 CMPCLR,*= t1,t2,%r0
714 COPY sub_tmp2,%ret0
715
716 STD sub_tmp1,0(r_ptr)
717
718bn_sub_words_exit
719 .EXIT
720 BVE (%rp)
721 NOP
722 .PROCEND ;in=23,24,25,26,29;out=28;
723
724;------------------------------------------------------------------------------
725;
726; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
727;
728; arg0 = h
729; arg1 = l
730; arg2 = d
731;
732; This is mainly just modified assembly from the compiler, thus the
733; lack of variable names.
734;
735;------------------------------------------------------------------------------
736bn_div_words
737 .proc
738 .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
739 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
740 .IMPORT BN_num_bits_word,CODE,NO_RELOCATION
741 .IMPORT __iob,DATA
742 .IMPORT fprintf,CODE,NO_RELOCATION
743 .IMPORT abort,CODE,NO_RELOCATION
744 .IMPORT $$div2U,MILLICODE
745 .entry
746 STD %r2,-16(%r30)
747 STD,MA %r3,352(%r30)
748 STD %r4,-344(%r30)
749 STD %r5,-336(%r30)
750 STD %r6,-328(%r30)
751 STD %r7,-320(%r30)
752 STD %r8,-312(%r30)
753 STD %r9,-304(%r30)
754 STD %r10,-296(%r30)
755
756 STD %r27,-288(%r30) ; save gp
757
758 COPY %r24,%r3 ; save d
759 COPY %r26,%r4 ; save h (high 64-bits)
760 LDO -1(%r0),%ret0 ; return -1 by default
761
762 CMPB,*= %r0,%arg2,$D3 ; if (d == 0)
763 COPY %r25,%r5 ; save l (low 64-bits)
764
765 LDO -48(%r30),%r29 ; create ap
766 .CALL ;in=26,29;out=28;
767 B,L BN_num_bits_word,%r2
768 COPY %r3,%r26
769 LDD -288(%r30),%r27 ; restore gp
770 LDI 64,%r21
771
772 CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward)
773 COPY %ret0,%r24 ; i
774 MTSARCM %r24
775 DEPDI,Z -1,%sar,1,%r29
776 CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)
777
778$00000012
779 SUBI 64,%r24,%r31 ; i = 64 - i;
780 CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d)
781 SUB %r4,%r3,%r4 ; h -= d
782 CMPB,= %r31,%r0,$0000001A ; if (i)
783 COPY %r0,%r10 ; ret = 0
784 MTSARCM %r31 ; i to shift
785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786 SUBI 64,%r31,%r19 ; 64 - i; redundent
787 MTSAR %r19 ; (64 -i) to shift
788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789 MTSARCM %r31 ; i to shift
790 DEPD,Z %r5,%sar,64,%r5 ; l <<= i;
791
792$0000001A
793 DEPDI,Z -1,31,32,%r19
794 EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32
795 EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff
796 LDO 2(%r0),%r9
797 STD %r3,-280(%r30) ; "d" to stack
798
799$0000001C
800 DEPDI,Z -1,63,32,%r29 ;
801 EXTRD,U %r4,31,32,%r31 ; h >> 32
802 CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div
803 COPY %r4,%r26
804 EXTRD,U %r4,31,32,%r25
805 COPY %r6,%r24
806 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
807 B,L $$div2U,%r2
808 EXTRD,U %r6,31,32,%r23
809 DEPD %r28,31,32,%r29
810$D2
811 STD %r29,-272(%r30) ; q
812 AND %r5,%r19,%r24 ; t & 0xffffffff00000000;
813 EXTRD,U %r24,31,32,%r24 ; ???
814 FLDD -272(%r30),%fr7 ; q
815 FLDD -280(%r30),%fr8 ; d
816 XMPYU %fr8L,%fr7L,%fr10
817 FSTD %fr10,-256(%r30)
818 XMPYU %fr8L,%fr7R,%fr22
819 FSTD %fr22,-264(%r30)
820 XMPYU %fr8R,%fr7L,%fr11
821 XMPYU %fr8R,%fr7R,%fr23
822 FSTD %fr11,-232(%r30)
823 FSTD %fr23,-240(%r30)
824 LDD -256(%r30),%r28
825 DEPD,Z %r28,31,32,%r2
826 LDD -264(%r30),%r20
827 ADD,L %r20,%r2,%r31
828 LDD -232(%r30),%r22
829 DEPD,Z %r22,31,32,%r22
830 LDD -240(%r30),%r21
831 B $00000024 ; enter loop
832 ADD,L %r21,%r22,%r23
833
834$0000002A
835 LDO -1(%r29),%r29
836 SUB %r23,%r8,%r23
837$00000024
838 SUB %r4,%r31,%r25
839 AND %r25,%r19,%r26
840 CMPB,*<>,N %r0,%r26,$00000046 ; (forward)
841 DEPD,Z %r25,31,32,%r20
842 OR %r20,%r24,%r21
843 CMPB,*<<,N %r21,%r23,$0000002A ;(backward)
844 SUB %r31,%r6,%r31
845;-------------Break path---------------------
846
847$00000046
848 DEPD,Z %r23,31,32,%r25 ;tl
849 EXTRD,U %r23,31,32,%r26 ;t
850 AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L
851 ADD,L %r31,%r26,%r31 ;th += t;
852 CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl)
853 LDO 1(%r31),%r31 ; th++;
854 CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward)
855 LDO -1(%r29),%r29 ;q--;
856 ADD,L %r4,%r3,%r4 ;h += d;
857$00000036
858 ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward)
859 SUB %r5,%r24,%r28 ; l -= tl;
860 SUB %r4,%r31,%r24 ; h -= th;
861 SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32));
862 DEPD,Z %r29,31,32,%r10 ; ret = q<<32
863 b $0000001C
864 DEPD,Z %r28,31,32,%r5 ; l = l << 32
865
866$D1
867 OR %r10,%r29,%r28 ; ret |= q
868$D3
869 LDD -368(%r30),%r2
870$D0
871 LDD -296(%r30),%r10
872 LDD -304(%r30),%r9
873 LDD -312(%r30),%r8
874 LDD -320(%r30),%r7
875 LDD -328(%r30),%r6
876 LDD -336(%r30),%r5
877 LDD -344(%r30),%r4
878 BVE (%r2)
879 .EXIT
880 LDD,MB -352(%r30),%r3
881
882bn_div_err_case
883 MFIA %r6
884 ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1
885 LDO R'bn_div_words-bn_div_err_case(%r1),%r6
886 ADDIL LT'__iob,%r27,%r1
887 LDD RT'__iob(%r1),%r26
888 ADDIL L'C$4-bn_div_words,%r6,%r1
889 LDO R'C$4-bn_div_words(%r1),%r25
890 LDO 64(%r26),%r26
891 .CALL ;in=24,25,26,29;out=28;
892 B,L fprintf,%r2
893 LDO -48(%r30),%r29
894 LDD -288(%r30),%r27
895 .CALL ;in=29;
896 B,L abort,%r2
897 LDO -48(%r30),%r29
898 LDD -288(%r30),%r27
899 B $D0
900 LDD -368(%r30),%r2
901 .PROCEND ;in=24,25,26,29;out=28;
902
903;----------------------------------------------------------------------------
904;
905; Registers to hold 64-bit values to manipulate. The "L" part
906; of the register corresponds to the upper 32-bits, while the "R"
907; part corresponds to the lower 32-bits
908;
909; Note, that when using b6 and b7, the code must save these before
910; using them because they are callee save registers
911;
912;
913; Floating point registers to use to save values that
914; are manipulated. These don't collide with ftemp1-6 and
915; are all caller save registers
916;
917a0 .reg %fr22
918a0L .reg %fr22L
919a0R .reg %fr22R
920
921a1 .reg %fr23
922a1L .reg %fr23L
923a1R .reg %fr23R
924
925a2 .reg %fr24
926a2L .reg %fr24L
927a2R .reg %fr24R
928
929a3 .reg %fr25
930a3L .reg %fr25L
931a3R .reg %fr25R
932
933a4 .reg %fr26
934a4L .reg %fr26L
935a4R .reg %fr26R
936
937a5 .reg %fr27
938a5L .reg %fr27L
939a5R .reg %fr27R
940
941a6 .reg %fr28
942a6L .reg %fr28L
943a6R .reg %fr28R
944
945a7 .reg %fr29
946a7L .reg %fr29L
947a7R .reg %fr29R
948
949b0 .reg %fr30
950b0L .reg %fr30L
951b0R .reg %fr30R
952
953b1 .reg %fr31
954b1L .reg %fr31L
955b1R .reg %fr31R
956
957;
958; Temporary floating point variables, these are all caller save
959; registers
960;
961ftemp1 .reg %fr4
962ftemp2 .reg %fr5
963ftemp3 .reg %fr6
964ftemp4 .reg %fr7
965
966;
967; The B set of registers when used.
968;
969
970b2 .reg %fr8
971b2L .reg %fr8L
972b2R .reg %fr8R
973
974b3 .reg %fr9
975b3L .reg %fr9L
976b3R .reg %fr9R
977
978b4 .reg %fr10
979b4L .reg %fr10L
980b4R .reg %fr10R
981
982b5 .reg %fr11
983b5L .reg %fr11L
984b5R .reg %fr11R
985
986b6 .reg %fr12
987b6L .reg %fr12L
988b6R .reg %fr12R
989
990b7 .reg %fr13
991b7L .reg %fr13L
992b7R .reg %fr13R
993
994c1 .reg %r21 ; only reg
995temp1 .reg %r20 ; only reg
996temp2 .reg %r19 ; only reg
997temp3 .reg %r31 ; only reg
998
999m1 .reg %r28
1000c2 .reg %r23
1001high_one .reg %r1
1002ht .reg %r6
1003lt .reg %r5
1004m .reg %r4
1005c3 .reg %r3
1006
1007SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1008 XMPYU A0L,A0R,ftemp1 ; m
1009 FSTD ftemp1,-24(%sp) ; store m
1010
1011 XMPYU A0R,A0R,ftemp2 ; lt
1012 FSTD ftemp2,-16(%sp) ; store lt
1013
1014 XMPYU A0L,A0L,ftemp3 ; ht
1015 FSTD ftemp3,-8(%sp) ; store ht
1016
1017 LDD -24(%sp),m ; load m
1018 AND m,high_mask,temp2 ; m & Mask
1019 DEPD,Z m,30,31,temp3 ; m << 32+1
1020 LDD -16(%sp),lt ; lt
1021
1022 LDD -8(%sp),ht ; ht
1023 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1024 ADD temp3,lt,lt ; lt = lt+m
1025 ADD,L ht,temp1,ht ; ht += temp1
1026 ADD,DC ht,%r0,ht ; ht++
1027
1028 ADD C1,lt,C1 ; c1=c1+lt
1029 ADD,DC ht,%r0,ht ; ht++
1030
1031 ADD C2,ht,C2 ; c2=c2+ht
1032 ADD,DC C3,%r0,C3 ; c3++
1033.endm
1034
1035SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1036 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1037 FSTD ftemp1,-16(%sp) ;
1038 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1039 FSTD ftemp2,-8(%sp) ;
1040 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1041 FSTD ftemp3,-32(%sp)
1042 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1043 FSTD ftemp4,-24(%sp) ;
1044
1045 LDD -8(%sp),m ; r21 = m
1046 LDD -16(%sp),m1 ; r19 = m1
1047 ADD,L m,m1,m ; m+m1
1048
1049 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1050 LDD -24(%sp),ht ; r24 = ht
1051
1052 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1053 ADD,L ht,high_one,ht ; ht+=high_one
1054
1055 EXTRD,U m,31,32,temp1 ; m >> 32
1056 LDD -32(%sp),lt ; lt
1057 ADD,L ht,temp1,ht ; ht+= m>>32
1058 ADD lt,temp3,lt ; lt = lt+m1
1059 ADD,DC ht,%r0,ht ; ht++
1060
1061 ADD ht,ht,ht ; ht=ht+ht;
1062 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1063
1064 ADD lt,lt,lt ; lt=lt+lt;
1065 ADD,DC ht,%r0,ht ; add in carry (ht++)
1066
1067 ADD C1,lt,C1 ; c1=c1+lt
1068 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1069 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1070
1071 ADD C2,ht,C2 ; c2 = c2 + ht
1072 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1073.endm
1074
1075;
1076;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1077; arg0 = r_ptr
1078; arg1 = a_ptr
1079;
1080
1081bn_sqr_comba8
1082 .PROC
1083 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1084 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1085 .ENTRY
1086 .align 64
1087
1088 STD %r3,0(%sp) ; save r3
1089 STD %r4,8(%sp) ; save r4
1090 STD %r5,16(%sp) ; save r5
1091 STD %r6,24(%sp) ; save r6
1092
1093 ;
1094 ; Zero out carries
1095 ;
1096 COPY %r0,c1
1097 COPY %r0,c2
1098 COPY %r0,c3
1099
1100 LDO 128(%sp),%sp ; bump stack
1101 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1102 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1103
1104 ;
1105 ; Load up all of the values we are going to use
1106 ;
1107 FLDD 0(a_ptr),a0
1108 FLDD 8(a_ptr),a1
1109 FLDD 16(a_ptr),a2
1110 FLDD 24(a_ptr),a3
1111 FLDD 32(a_ptr),a4
1112 FLDD 40(a_ptr),a5
1113 FLDD 48(a_ptr),a6
1114 FLDD 56(a_ptr),a7
1115
1116 SQR_ADD_C a0L,a0R,c1,c2,c3
1117 STD c1,0(r_ptr) ; r[0] = c1;
1118 COPY %r0,c1
1119
1120 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1121 STD c2,8(r_ptr) ; r[1] = c2;
1122 COPY %r0,c2
1123
1124 SQR_ADD_C a1L,a1R,c3,c1,c2
1125 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1126 STD c3,16(r_ptr) ; r[2] = c3;
1127 COPY %r0,c3
1128
1129 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1130 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1131 STD c1,24(r_ptr) ; r[3] = c1;
1132 COPY %r0,c1
1133
1134 SQR_ADD_C a2L,a2R,c2,c3,c1
1135 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1136 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1137 STD c2,32(r_ptr) ; r[4] = c2;
1138 COPY %r0,c2
1139
1140 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1141 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1142 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1143 STD c3,40(r_ptr) ; r[5] = c3;
1144 COPY %r0,c3
1145
1146 SQR_ADD_C a3L,a3R,c1,c2,c3
1147 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1148 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1149 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1150 STD c1,48(r_ptr) ; r[6] = c1;
1151 COPY %r0,c1
1152
1153 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1154 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1155 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1156 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1157 STD c2,56(r_ptr) ; r[7] = c2;
1158 COPY %r0,c2
1159
1160 SQR_ADD_C a4L,a4R,c3,c1,c2
1161 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1162 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1163 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1164 STD c3,64(r_ptr) ; r[8] = c3;
1165 COPY %r0,c3
1166
1167 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1168 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1169 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1170 STD c1,72(r_ptr) ; r[9] = c1;
1171 COPY %r0,c1
1172
1173 SQR_ADD_C a5L,a5R,c2,c3,c1
1174 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1175 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1176 STD c2,80(r_ptr) ; r[10] = c2;
1177 COPY %r0,c2
1178
1179 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1180 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1181 STD c3,88(r_ptr) ; r[11] = c3;
1182 COPY %r0,c3
1183
1184 SQR_ADD_C a6L,a6R,c1,c2,c3
1185 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1186 STD c1,96(r_ptr) ; r[12] = c1;
1187 COPY %r0,c1
1188
1189 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1190 STD c2,104(r_ptr) ; r[13] = c2;
1191 COPY %r0,c2
1192
1193 SQR_ADD_C a7L,a7R,c3,c1,c2
1194 STD c3, 112(r_ptr) ; r[14] = c3
1195 STD c1, 120(r_ptr) ; r[15] = c1
1196
1197 .EXIT
1198 LDD -104(%sp),%r6 ; restore r6
1199 LDD -112(%sp),%r5 ; restore r5
1200 LDD -120(%sp),%r4 ; restore r4
1201 BVE (%rp)
1202 LDD,MB -128(%sp),%r3
1203
1204 .PROCEND
1205
1206;-----------------------------------------------------------------------------
1207;
1208;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1209; arg0 = r_ptr
1210; arg1 = a_ptr
1211;
1212
1213bn_sqr_comba4
1214 .proc
1215 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1216 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1217 .entry
1218 .align 64
1219 STD %r3,0(%sp) ; save r3
1220 STD %r4,8(%sp) ; save r4
1221 STD %r5,16(%sp) ; save r5
1222 STD %r6,24(%sp) ; save r6
1223
1224 ;
1225 ; Zero out carries
1226 ;
1227 COPY %r0,c1
1228 COPY %r0,c2
1229 COPY %r0,c3
1230
1231 LDO 128(%sp),%sp ; bump stack
1232 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1233 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1234
1235 ;
1236 ; Load up all of the values we are going to use
1237 ;
1238 FLDD 0(a_ptr),a0
1239 FLDD 8(a_ptr),a1
1240 FLDD 16(a_ptr),a2
1241 FLDD 24(a_ptr),a3
1242 FLDD 32(a_ptr),a4
1243 FLDD 40(a_ptr),a5
1244 FLDD 48(a_ptr),a6
1245 FLDD 56(a_ptr),a7
1246
1247 SQR_ADD_C a0L,a0R,c1,c2,c3
1248
1249 STD c1,0(r_ptr) ; r[0] = c1;
1250 COPY %r0,c1
1251
1252 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1253
1254 STD c2,8(r_ptr) ; r[1] = c2;
1255 COPY %r0,c2
1256
1257 SQR_ADD_C a1L,a1R,c3,c1,c2
1258 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1259
1260 STD c3,16(r_ptr) ; r[2] = c3;
1261 COPY %r0,c3
1262
1263 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1264 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1265
1266 STD c1,24(r_ptr) ; r[3] = c1;
1267 COPY %r0,c1
1268
1269 SQR_ADD_C a2L,a2R,c2,c3,c1
1270 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1271
1272 STD c2,32(r_ptr) ; r[4] = c2;
1273 COPY %r0,c2
1274
1275 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1276 STD c3,40(r_ptr) ; r[5] = c3;
1277 COPY %r0,c3
1278
1279 SQR_ADD_C a3L,a3R,c1,c2,c3
1280 STD c1,48(r_ptr) ; r[6] = c1;
1281 STD c2,56(r_ptr) ; r[7] = c2;
1282
1283 .EXIT
1284 LDD -104(%sp),%r6 ; restore r6
1285 LDD -112(%sp),%r5 ; restore r5
1286 LDD -120(%sp),%r4 ; restore r4
1287 BVE (%rp)
1288 LDD,MB -128(%sp),%r3
1289
1290 .PROCEND
1291
1292
1293;---------------------------------------------------------------------------
1294
1295MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1296 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1297 FSTD ftemp1,-16(%sp) ;
1298 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1299 FSTD ftemp2,-8(%sp) ;
1300 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1301 FSTD ftemp3,-32(%sp)
1302 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1303 FSTD ftemp4,-24(%sp) ;
1304
1305 LDD -8(%sp),m ; r21 = m
1306 LDD -16(%sp),m1 ; r19 = m1
1307 ADD,L m,m1,m ; m+m1
1308
1309 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1310 LDD -24(%sp),ht ; r24 = ht
1311
1312 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1313 ADD,L ht,high_one,ht ; ht+=high_one
1314
1315 EXTRD,U m,31,32,temp1 ; m >> 32
1316 LDD -32(%sp),lt ; lt
1317 ADD,L ht,temp1,ht ; ht+= m>>32
1318 ADD lt,temp3,lt ; lt = lt+m1
1319 ADD,DC ht,%r0,ht ; ht++
1320
1321 ADD C1,lt,C1 ; c1=c1+lt
1322 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1323
1324 ADD C2,ht,C2 ; c2 = c2 + ht
1325 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1326.endm
1327
1328
1329;
1330;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1331; arg0 = r_ptr
1332; arg1 = a_ptr
1333; arg2 = b_ptr
1334;
1335
1336bn_mul_comba8
1337 .proc
1338 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1339 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1340 .entry
1341 .align 64
1342
1343 STD %r3,0(%sp) ; save r3
1344 STD %r4,8(%sp) ; save r4
1345 STD %r5,16(%sp) ; save r5
1346 STD %r6,24(%sp) ; save r6
1347 FSTD %fr12,32(%sp) ; save r6
1348 FSTD %fr13,40(%sp) ; save r7
1349
1350 ;
1351 ; Zero out carries
1352 ;
1353 COPY %r0,c1
1354 COPY %r0,c2
1355 COPY %r0,c3
1356
1357 LDO 128(%sp),%sp ; bump stack
1358 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1359
1360 ;
1361 ; Load up all of the values we are going to use
1362 ;
1363 FLDD 0(a_ptr),a0
1364 FLDD 8(a_ptr),a1
1365 FLDD 16(a_ptr),a2
1366 FLDD 24(a_ptr),a3
1367 FLDD 32(a_ptr),a4
1368 FLDD 40(a_ptr),a5
1369 FLDD 48(a_ptr),a6
1370 FLDD 56(a_ptr),a7
1371
1372 FLDD 0(b_ptr),b0
1373 FLDD 8(b_ptr),b1
1374 FLDD 16(b_ptr),b2
1375 FLDD 24(b_ptr),b3
1376 FLDD 32(b_ptr),b4
1377 FLDD 40(b_ptr),b5
1378 FLDD 48(b_ptr),b6
1379 FLDD 56(b_ptr),b7
1380
1381 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1382 STD c1,0(r_ptr)
1383 COPY %r0,c1
1384
1385 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1386 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1387 STD c2,8(r_ptr)
1388 COPY %r0,c2
1389
1390 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1391 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1392 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1393 STD c3,16(r_ptr)
1394 COPY %r0,c3
1395
1396 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1397 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1398 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1399 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1400 STD c1,24(r_ptr)
1401 COPY %r0,c1
1402
1403 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1404 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1405 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1406 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1407 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1408 STD c2,32(r_ptr)
1409 COPY %r0,c2
1410
1411 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1412 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1413 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1414 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1415 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1416 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1417 STD c3,40(r_ptr)
1418 COPY %r0,c3
1419
1420 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1421 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1422 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1423 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1424 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1425 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1426 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1427 STD c1,48(r_ptr)
1428 COPY %r0,c1
1429
1430 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1431 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1432 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1433 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1434 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1435 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1436 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1437 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1438 STD c2,56(r_ptr)
1439 COPY %r0,c2
1440
1441 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1442 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1443 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1444 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1445 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1446 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1447 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1448 STD c3,64(r_ptr)
1449 COPY %r0,c3
1450
1451 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1452 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1453 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1454 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1455 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1456 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1457 STD c1,72(r_ptr)
1458 COPY %r0,c1
1459
1460 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1461 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1462 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1463 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1464 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1465 STD c2,80(r_ptr)
1466 COPY %r0,c2
1467
1468 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1469 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1470 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1471 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1472 STD c3,88(r_ptr)
1473 COPY %r0,c3
1474
1475 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1476 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1477 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1478 STD c1,96(r_ptr)
1479 COPY %r0,c1
1480
1481 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1482 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1483 STD c2,104(r_ptr)
1484 COPY %r0,c2
1485
1486 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1487 STD c3,112(r_ptr)
1488 STD c1,120(r_ptr)
1489
1490 .EXIT
1491 FLDD -88(%sp),%fr13
1492 FLDD -96(%sp),%fr12
1493 LDD -104(%sp),%r6 ; restore r6
1494 LDD -112(%sp),%r5 ; restore r5
1495 LDD -120(%sp),%r4 ; restore r4
1496 BVE (%rp)
1497 LDD,MB -128(%sp),%r3
1498
1499 .PROCEND
1500
1501;-----------------------------------------------------------------------------
1502;
1503;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1504; arg0 = r_ptr
1505; arg1 = a_ptr
1506; arg2 = b_ptr
1507;
1508
1509bn_mul_comba4
1510 .proc
1511 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1512 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1513 .entry
1514 .align 64
1515
1516 STD %r3,0(%sp) ; save r3
1517 STD %r4,8(%sp) ; save r4
1518 STD %r5,16(%sp) ; save r5
1519 STD %r6,24(%sp) ; save r6
1520 FSTD %fr12,32(%sp) ; save r6
1521 FSTD %fr13,40(%sp) ; save r7
1522
1523 ;
1524 ; Zero out carries
1525 ;
1526 COPY %r0,c1
1527 COPY %r0,c2
1528 COPY %r0,c3
1529
1530 LDO 128(%sp),%sp ; bump stack
1531 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1532
1533 ;
1534 ; Load up all of the values we are going to use
1535 ;
1536 FLDD 0(a_ptr),a0
1537 FLDD 8(a_ptr),a1
1538 FLDD 16(a_ptr),a2
1539 FLDD 24(a_ptr),a3
1540
1541 FLDD 0(b_ptr),b0
1542 FLDD 8(b_ptr),b1
1543 FLDD 16(b_ptr),b2
1544 FLDD 24(b_ptr),b3
1545
1546 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1547 STD c1,0(r_ptr)
1548 COPY %r0,c1
1549
1550 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1551 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1552 STD c2,8(r_ptr)
1553 COPY %r0,c2
1554
1555 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1556 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1557 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1558 STD c3,16(r_ptr)
1559 COPY %r0,c3
1560
1561 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1562 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1563 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1564 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1565 STD c1,24(r_ptr)
1566 COPY %r0,c1
1567
1568 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1569 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1570 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1571 STD c2,32(r_ptr)
1572 COPY %r0,c2
1573
1574 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1575 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1576 STD c3,40(r_ptr)
1577 COPY %r0,c3
1578
1579 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1580 STD c1,48(r_ptr)
1581 STD c2,56(r_ptr)
1582
1583 .EXIT
1584 FLDD -88(%sp),%fr13
1585 FLDD -96(%sp),%fr12
1586 LDD -104(%sp),%r6 ; restore r6
1587 LDD -112(%sp),%r5 ; restore r5
1588 LDD -120(%sp),%r4 ; restore r4
1589 BVE (%rp)
1590 LDD,MB -128(%sp),%r3
1591
1592 .PROCEND
1593
1594
1595 .SPACE $TEXT$
1596 .SUBSPA $CODE$
1597 .SPACE $PRIVATE$,SORT=16
1598 .IMPORT $global$,DATA
1599 .SPACE $TEXT$
1600 .SUBSPA $CODE$
1601 .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16
1602C$4
1603 .ALIGN 8
1604 .STRINGZ "Division would overflow (%d)\n"
1605 .END
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
index 009b0eb685..1eb8395b25 100644
--- a/src/lib/libcrypto/bn/bn.h
+++ b/src/lib/libcrypto/bn/bn.h
@@ -59,7 +59,7 @@
59#ifndef HEADER_BN_H 59#ifndef HEADER_BN_H
60#define HEADER_BN_H 60#define HEADER_BN_H
61 61
62#ifndef WIN16 62#ifndef NO_FP_API
63#include <stdio.h> /* FILE */ 63#include <stdio.h> /* FILE */
64#endif 64#endif
65#include <openssl/opensslconf.h> 65#include <openssl/opensslconf.h>
@@ -233,7 +233,7 @@ typedef struct bignum_st
233 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ 233 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
234 int top; /* Index of last used d +1. */ 234 int top; /* Index of last used d +1. */
235 /* The next are internal book keeping for bn_expand. */ 235 /* The next are internal book keeping for bn_expand. */
236 int max; /* Size of the d array. */ 236 int dmax; /* Size of the d array. */
237 int neg; /* one if the number is negative */ 237 int neg; /* one if the number is negative */
238 int flags; 238 int flags;
239 } BIGNUM; 239 } BIGNUM;
@@ -364,6 +364,8 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p,
364 const BIGNUM *m,BN_CTX *ctx); 364 const BIGNUM *m,BN_CTX *ctx);
365int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p, 365int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p,
366 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); 366 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
367int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
368 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
367int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2, 369int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2,
368 BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx); 370 BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx);
369int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, 371int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p,
@@ -433,9 +435,9 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m,
433 435
434/* library internal functions */ 436/* library internal functions */
435 437
436#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->max)?\ 438#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
437 (a):bn_expand2((a),(bits)/BN_BITS2+1)) 439 (a):bn_expand2((a),(bits)/BN_BITS2+1))
438#define bn_wexpand(a,words) (((words) <= (a)->max)?(a):bn_expand2((a),(words))) 440#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
439BIGNUM *bn_expand2(BIGNUM *a, int words); 441BIGNUM *bn_expand2(BIGNUM *a, int words);
440 442
441#define bn_fix_top(a) \ 443#define bn_fix_top(a) \
@@ -483,7 +485,9 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
483#define BN_F_BN_CTX_NEW 106 485#define BN_F_BN_CTX_NEW 106
484#define BN_F_BN_DIV 107 486#define BN_F_BN_DIV 107
485#define BN_F_BN_EXPAND2 108 487#define BN_F_BN_EXPAND2 108
488#define BN_F_BN_MOD_EXP2_MONT 118
486#define BN_F_BN_MOD_EXP_MONT 109 489#define BN_F_BN_MOD_EXP_MONT 109
490#define BN_F_BN_MOD_EXP_MONT_WORD 117
487#define BN_F_BN_MOD_INVERSE 110 491#define BN_F_BN_MOD_INVERSE 110
488#define BN_F_BN_MOD_MUL_RECIPROCAL 111 492#define BN_F_BN_MOD_MUL_RECIPROCAL 111
489#define BN_F_BN_MPI2BN 112 493#define BN_F_BN_MPI2BN 112
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index 3329cc18e6..44e52a40db 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -227,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
227 227
228#else 228#else
229 229
230/* Divide h-l by d and return the result. */ 230/* Divide h,l by d and return the result. */
231/* I need to test this some more :-( */ 231/* I need to test this some more :-( */
232BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) 232BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
233 { 233 {
@@ -237,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
237 if (d == 0) return(BN_MASK2); 237 if (d == 0) return(BN_MASK2);
238 238
239 i=BN_num_bits_word(d); 239 i=BN_num_bits_word(d);
240 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) 240 assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i));
241 { 241
242#if !defined(NO_STDIO) && !defined(WIN16)
243 fprintf(stderr,"Division would overflow (%d)\n",i);
244#endif
245 abort();
246 }
247 i=BN_BITS2-i; 242 i=BN_BITS2-i;
248 if (h >= d) h-=d; 243 if (h >= d) h-=d;
249 244
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
index 1b1bb06046..2d287e6d1b 100644
--- a/src/lib/libcrypto/bn/bn_blind.c
+++ b/src/lib/libcrypto/bn/bn_blind.c
@@ -67,7 +67,7 @@ BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod)
67 bn_check_top(Ai); 67 bn_check_top(Ai);
68 bn_check_top(mod); 68 bn_check_top(mod);
69 69
70 if ((ret=(BN_BLINDING *)Malloc(sizeof(BN_BLINDING))) == NULL) 70 if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL)
71 { 71 {
72 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE); 72 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
73 return(NULL); 73 return(NULL);
@@ -91,7 +91,7 @@ void BN_BLINDING_free(BN_BLINDING *r)
91 91
92 if (r->A != NULL) BN_free(r->A ); 92 if (r->A != NULL) BN_free(r->A );
93 if (r->Ai != NULL) BN_free(r->Ai); 93 if (r->Ai != NULL) BN_free(r->Ai);
94 Free(r); 94 OPENSSL_free(r);
95 } 95 }
96 96
97int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) 97int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
index 46132fd180..b1a8d7571e 100644
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ b/src/lib/libcrypto/bn/bn_ctx.c
@@ -69,7 +69,7 @@ BN_CTX *BN_CTX_new(void)
69 { 69 {
70 BN_CTX *ret; 70 BN_CTX *ret;
71 71
72 ret=(BN_CTX *)Malloc(sizeof(BN_CTX)); 72 ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX));
73 if (ret == NULL) 73 if (ret == NULL)
74 { 74 {
75 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); 75 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
@@ -102,7 +102,7 @@ void BN_CTX_free(BN_CTX *ctx)
102 for (i=0; i < BN_CTX_NUM; i++) 102 for (i=0; i < BN_CTX_NUM; i++)
103 BN_clear_free(&(ctx->bn[i])); 103 BN_clear_free(&(ctx->bn[i]));
104 if (ctx->flags & BN_FLG_MALLOCED) 104 if (ctx->flags & BN_FLG_MALLOCED)
105 Free(ctx); 105 OPENSSL_free(ctx);
106 } 106 }
107 107
108void BN_CTX_start(BN_CTX *ctx) 108void BN_CTX_start(BN_CTX *ctx)
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
index 07af1d3b44..c3772c243b 100644
--- a/src/lib/libcrypto/bn/bn_div.c
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -205,7 +205,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
205 BN_init(&wnum); 205 BN_init(&wnum);
206 wnum.d= &(snum->d[loop]); 206 wnum.d= &(snum->d[loop]);
207 wnum.top= div_n; 207 wnum.top= div_n;
208 wnum.max= snum->max+1; /* a bit of a lie */ 208 wnum.dmax= snum->dmax+1; /* a bit of a lie */
209 209
210 /* Get the top 2 words of sdiv */ 210 /* Get the top 2 words of sdiv */
211 /* i=sdiv->top; */ 211 /* i=sdiv->top; */
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
index 988270bcf4..86550c4c21 100644
--- a/src/lib/libcrypto/bn/bn_err.c
+++ b/src/lib/libcrypto/bn/bn_err.c
@@ -76,7 +76,9 @@ static ERR_STRING_DATA BN_str_functs[]=
76{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, 76{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"},
77{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, 77{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"},
78{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, 78{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"},
79{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"},
79{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, 80{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"},
81{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"},
80{ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, 82{ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"},
81{ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, 83{ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"},
82{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, 84{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"},
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index 0c11601675..d2c91628ac 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -55,18 +55,66 @@
55 * copied and put under another distribution licence 55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
58 112
59#include <stdio.h> 113#include <stdio.h>
60#include "cryptlib.h" 114#include "cryptlib.h"
61#include "bn_lcl.h" 115#include "bn_lcl.h"
62#ifdef ATALLA
63# include <alloca.h>
64# include <atasi.h>
65# include <assert.h>
66# include <dlfcn.h>
67#endif
68 116
69#define TABLE_SIZE 16 117#define TABLE_SIZE 32
70 118
71/* slow but works */ 119/* slow but works */
72int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 120int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
@@ -91,42 +139,6 @@ err:
91 return(r); 139 return(r);
92 } 140 }
93 141
94#if 0
95/* this one works - simple but works */
96int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, BN_CTX *ctx)
97 {
98 int i,bits,ret=0;
99 BIGNUM *v,*tmp;
100
101 BN_CTX_start(ctx);
102 v = BN_CTX_get(ctx);
103 tmp = BN_CTX_get(ctx);
104 if (v == NULL || tmp == NULL) goto err;
105
106 if (BN_copy(v,a) == NULL) goto err;
107 bits=BN_num_bits(p);
108
109 if (BN_is_odd(p))
110 { if (BN_copy(r,a) == NULL) goto err; }
111 else { if (!BN_one(r)) goto err; }
112
113 for (i=1; i<bits; i++)
114 {
115 if (!BN_sqr(tmp,v,ctx)) goto err;
116 if (!BN_mod(v,tmp,m,ctx)) goto err;
117 if (BN_is_bit_set(p,i))
118 {
119 if (!BN_mul(tmp,r,v,ctx)) goto err;
120 if (!BN_mod(r,tmp,m,ctx)) goto err;
121 }
122 }
123 ret=1;
124err:
125 BN_CTX_end(ctx);
126 return(ret);
127 }
128
129#endif
130 142
131/* this one works - simple but works */ 143/* this one works - simple but works */
132int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx) 144int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx)
@@ -163,172 +175,6 @@ err:
163 return(ret); 175 return(ret);
164 } 176 }
165 177
166#ifdef ATALLA
167
168/*
169 * This routine will dynamically check for the existance of an Atalla AXL-200
170 * SSL accelerator module. If one is found, the variable
171 * asi_accelerator_present is set to 1 and the function pointers
172 * ptr_ASI_xxxxxx above will be initialized to corresponding ASI API calls.
173 */
174typedef int tfnASI_GetPerformanceStatistics(int reset_flag,
175 unsigned int *ret_buf);
176typedef int tfnASI_GetHardwareConfig(long card_num, unsigned int *ret_buf);
177typedef int tfnASI_RSAPrivateKeyOpFn(RSAPrivateKey * rsaKey,
178 unsigned char *output,
179 unsigned char *input,
180 unsigned int modulus_len);
181
182static tfnASI_GetHardwareConfig *ptr_ASI_GetHardwareConfig;
183static tfnASI_RSAPrivateKeyOpFn *ptr_ASI_RSAPrivateKeyOpFn;
184static tfnASI_GetPerformanceStatistics *ptr_ASI_GetPerformanceStatistics;
185static int asi_accelerator_present;
186static int tried_atalla;
187
188void atalla_initialize_accelerator_handle(void)
189 {
190 void *dl_handle;
191 int status;
192 unsigned int config_buf[1024];
193 static int tested;
194
195 if(tested)
196 return;
197
198 tested=1;
199
200 bzero((void *)config_buf, 1024);
201
202 /*
203 * Check to see if the library is present on the system
204 */
205 dl_handle = dlopen("atasi.so", RTLD_NOW);
206 if (dl_handle == (void *) NULL)
207 {
208/* printf("atasi.so library is not present on the system\n");
209 printf("No HW acceleration available\n");*/
210 return;
211 }
212
213 /*
214 * The library is present. Now we'll check to insure that the
215 * LDM is up and running. First we'll get the address of the
216 * function in the atasi library that we need to see if the
217 * LDM is operating.
218 */
219
220 ptr_ASI_GetHardwareConfig =
221 (tfnASI_GetHardwareConfig *)dlsym(dl_handle,"ASI_GetHardwareConfig");
222
223 if (ptr_ASI_GetHardwareConfig)
224 {
225 /*
226 * We found the call, now we'll get our config
227 * status. If we get a non 0 result, the LDM is not
228 * running and we cannot use the Atalla ASI *
229 * library.
230 */
231 status = (*ptr_ASI_GetHardwareConfig)(0L, config_buf);
232 if (status != 0)
233 {
234 printf("atasi.so library is present but not initialized\n");
235 printf("No HW acceleration available\n");
236 return;
237 }
238 }
239 else
240 {
241/* printf("We found the library, but not the function. Very Strange!\n");*/
242 return ;
243 }
244
245 /*
246 * It looks like we have acceleration capabilities. Load up the
247 * pointers to our ASI API calls.
248 */
249 ptr_ASI_RSAPrivateKeyOpFn=
250 (tfnASI_RSAPrivateKeyOpFn *)dlsym(dl_handle, "ASI_RSAPrivateKeyOpFn");
251 if (ptr_ASI_RSAPrivateKeyOpFn == NULL)
252 {
253/* printf("We found the library, but no RSA function. Very Strange!\n");*/
254 return;
255 }
256
257 ptr_ASI_GetPerformanceStatistics =
258 (tfnASI_GetPerformanceStatistics *)dlsym(dl_handle, "ASI_GetPerformanceStatistics");
259 if (ptr_ASI_GetPerformanceStatistics == NULL)
260 {
261/* printf("We found the library, but no stat function. Very Strange!\n");*/
262 return;
263 }
264
265 /*
266 * Indicate that acceleration is available
267 */
268 asi_accelerator_present = 1;
269
270/* printf("This system has acceleration!\n");*/
271
272 return;
273 }
274
275/* make sure this only gets called once when bn_mod_exp calls bn_mod_exp_mont */
276int BN_mod_exp_atalla(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m)
277 {
278 unsigned char *abin;
279 unsigned char *pbin;
280 unsigned char *mbin;
281 unsigned char *rbin;
282 int an,pn,mn,ret;
283 RSAPrivateKey keydata;
284
285 atalla_initialize_accelerator_handle();
286 if(!asi_accelerator_present)
287 return 0;
288
289
290/* We should be able to run without size testing */
291# define ASIZE 128
292 an=BN_num_bytes(a);
293 pn=BN_num_bytes(p);
294 mn=BN_num_bytes(m);
295
296 if(an <= ASIZE && pn <= ASIZE && mn <= ASIZE)
297 {
298 int size=mn;
299
300 assert(an <= mn);
301 abin=alloca(size);
302 memset(abin,'\0',mn);
303 BN_bn2bin(a,abin+size-an);
304
305 pbin=alloca(pn);
306 BN_bn2bin(p,pbin);
307
308 mbin=alloca(size);
309 memset(mbin,'\0',mn);
310 BN_bn2bin(m,mbin+size-mn);
311
312 rbin=alloca(size);
313
314 memset(&keydata,'\0',sizeof keydata);
315 keydata.privateExponent.data=pbin;
316 keydata.privateExponent.len=pn;
317 keydata.modulus.data=mbin;
318 keydata.modulus.len=size;
319
320 ret=(*ptr_ASI_RSAPrivateKeyOpFn)(&keydata,rbin,abin,keydata.modulus.len);
321/*fprintf(stderr,"!%s\n",BN_bn2hex(a));*/
322 if(!ret)
323 {
324 BN_bin2bn(rbin,keydata.modulus.len,r);
325/*fprintf(stderr,"?%s\n",BN_bn2hex(r));*/
326 return 1;
327 }
328 }
329 return 0;
330 }
331#endif /* def ATALLA */
332 178
333int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 179int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
334 BN_CTX *ctx) 180 BN_CTX *ctx)
@@ -339,13 +185,6 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
339 bn_check_top(p); 185 bn_check_top(p);
340 bn_check_top(m); 186 bn_check_top(m);
341 187
342#ifdef ATALLA
343 if(BN_mod_exp_atalla(r,a,p,m))
344 return 1;
345/* If it fails, try the other methods (but don't try atalla again) */
346 tried_atalla=1;
347#endif
348
349#ifdef MONT_MUL_MOD 188#ifdef MONT_MUL_MOD
350 /* I have finally been able to take out this pre-condition of 189 /* I have finally been able to take out this pre-condition of
351 * the top bit being set. It was caused by an error in BN_div 190 * the top bit being set. It was caused by an error in BN_div
@@ -354,7 +193,15 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
354/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 193/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
355 194
356 if (BN_is_odd(m)) 195 if (BN_is_odd(m))
357 { ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); } 196 {
197 if (a->top == 1)
198 {
199 BN_ULONG A = a->d[0];
200 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
201 }
202 else
203 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
204 }
358 else 205 else
359#endif 206#endif
360#ifdef RECP_MUL_MOD 207#ifdef RECP_MUL_MOD
@@ -363,14 +210,10 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
363 { ret=BN_mod_exp_simple(r,a,p,m,ctx); } 210 { ret=BN_mod_exp_simple(r,a,p,m,ctx); }
364#endif 211#endif
365 212
366#ifdef ATALLA
367 tried_atalla=0;
368#endif
369
370 return(ret); 213 return(ret);
371 } 214 }
372 215
373/* #ifdef RECP_MUL_MOD */ 216
374int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 217int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
375 const BIGNUM *m, BN_CTX *ctx) 218 const BIGNUM *m, BN_CTX *ctx)
376 { 219 {
@@ -398,27 +241,22 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
398 ts=1; 241 ts=1;
399 242
400 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ 243 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */
401 if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
402 goto err; /* 2 */
403
404 if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
405 window=1;
406 else if (bits >= 256)
407 window=5; /* max size of window */
408 else if (bits >= 128)
409 window=4;
410 else
411 window=3;
412 244
413 j=1<<(window-1); 245 window = BN_window_bits_for_exponent_size(bits);
414 for (i=1; i<j; i++) 246 if (window > 1)
415 { 247 {
416 BN_init(&val[i]); 248 if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
417 if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx)) 249 goto err; /* 2 */
418 goto err; 250 j=1<<(window-1);
251 for (i=1; i<j; i++)
252 {
253 BN_init(&val[i]);
254 if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
255 goto err;
256 }
257 ts=i;
419 } 258 }
420 ts=i; 259
421
422 start=1; /* This is used to avoid multiplication etc 260 start=1; /* This is used to avoid multiplication etc
423 * when there is only the value '1' in the 261 * when there is only the value '1' in the
424 * buffer. */ 262 * buffer. */
@@ -485,9 +323,8 @@ err:
485 BN_RECP_CTX_free(&recp); 323 BN_RECP_CTX_free(&recp);
486 return(ret); 324 return(ret);
487 } 325 }
488/* #endif */
489 326
490/* #ifdef MONT_MUL_MOD */ 327
491int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, 328int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
492 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 329 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
493 { 330 {
@@ -502,12 +339,6 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
502 bn_check_top(p); 339 bn_check_top(p);
503 bn_check_top(m); 340 bn_check_top(m);
504 341
505#ifdef ATALLA
506 if(!tried_atalla && BN_mod_exp_atalla(rr,a,p,m))
507 return 1;
508/* If it fails, try the other methods */
509#endif
510
511 if (!(m->d[0] & 1)) 342 if (!(m->d[0] & 1))
512 { 343 {
513 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); 344 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
@@ -527,11 +358,9 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
527 /* If this is not done, things will break in the montgomery 358 /* If this is not done, things will break in the montgomery
528 * part */ 359 * part */
529 360
530#if 1
531 if (in_mont != NULL) 361 if (in_mont != NULL)
532 mont=in_mont; 362 mont=in_mont;
533 else 363 else
534#endif
535 { 364 {
536 if ((mont=BN_MONT_CTX_new()) == NULL) goto err; 365 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
537 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 366 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
@@ -541,31 +370,27 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
541 ts=1; 370 ts=1;
542 if (BN_ucmp(a,m) >= 0) 371 if (BN_ucmp(a,m) >= 0)
543 { 372 {
544 BN_mod(&(val[0]),a,m,ctx); 373 if (!BN_mod(&(val[0]),a,m,ctx))
374 goto err;
545 aa= &(val[0]); 375 aa= &(val[0]);
546 } 376 }
547 else 377 else
548 aa=a; 378 aa=a;
549 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ 379 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
550 if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
551
552 if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
553 window=1;
554 else if (bits >= 256)
555 window=5; /* max size of window */
556 else if (bits >= 128)
557 window=4;
558 else
559 window=3;
560 380
561 j=1<<(window-1); 381 window = BN_window_bits_for_exponent_size(bits);
562 for (i=1; i<j; i++) 382 if (window > 1)
563 { 383 {
564 BN_init(&(val[i])); 384 if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
565 if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx)) 385 j=1<<(window-1);
566 goto err; 386 for (i=1; i<j; i++)
387 {
388 BN_init(&(val[i]));
389 if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
390 goto err;
391 }
392 ts=i;
567 } 393 }
568 ts=i;
569 394
570 start=1; /* This is used to avoid multiplication etc 395 start=1; /* This is used to avoid multiplication etc
571 * when there is only the value '1' in the 396 * when there is only the value '1' in the
@@ -574,7 +399,7 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
574 wstart=bits-1; /* The top bit of the window */ 399 wstart=bits-1; /* The top bit of the window */
575 wend=0; /* The bottom bit of the window */ 400 wend=0; /* The bottom bit of the window */
576 401
577 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; 402 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
578 for (;;) 403 for (;;)
579 { 404 {
580 if (BN_is_bit_set(p,wstart) == 0) 405 if (BN_is_bit_set(p,wstart) == 0)
@@ -626,7 +451,7 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p,
626 start=0; 451 start=0;
627 if (wstart < 0) break; 452 if (wstart < 0) break;
628 } 453 }
629 BN_from_montgomery(rr,r,mont,ctx); 454 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
630 ret=1; 455 ret=1;
631err: 456err:
632 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 457 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
@@ -635,7 +460,134 @@ err:
635 BN_clear_free(&(val[i])); 460 BN_clear_free(&(val[i]));
636 return(ret); 461 return(ret);
637 } 462 }
638/* #endif */ 463
464int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
465 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
466 {
467 BN_MONT_CTX *mont = NULL;
468 int b, bits, ret=0;
469 int r_is_one;
470 BN_ULONG w, next_w;
471 BIGNUM *d, *r, *t;
472 BIGNUM *swap_tmp;
473#define BN_MOD_MUL_WORD(r, w, m) \
474 (BN_mul_word(r, (w)) && \
475 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
476 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
477 /* BN_MOD_MUL_WORD is only used with 'w' large,
478 * so the BN_ucmp test is probably more overhead
479 * than always using BN_mod (which uses BN_copy if
480 * a similar test returns true). */
481#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
482 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
483
484 bn_check_top(p);
485 bn_check_top(m);
486
487 if (!(m->d[0] & 1))
488 {
489 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
490 return(0);
491 }
492 bits = BN_num_bits(p);
493 if (bits == 0)
494 {
495 BN_one(rr);
496 return(1);
497 }
498 BN_CTX_start(ctx);
499 d = BN_CTX_get(ctx);
500 r = BN_CTX_get(ctx);
501 t = BN_CTX_get(ctx);
502 if (d == NULL || r == NULL || t == NULL) goto err;
503
504 if (in_mont != NULL)
505 mont=in_mont;
506 else
507 {
508 if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
509 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
510 }
511
512 r_is_one = 1; /* except for Montgomery factor */
513
514 /* bits-1 >= 0 */
515
516 /* The result is accumulated in the product r*w. */
517 w = a; /* bit 'bits-1' of 'p' is always set */
518 for (b = bits-2; b >= 0; b--)
519 {
520 /* First, square r*w. */
521 next_w = w*w;
522 if ((next_w/w) != w) /* overflow */
523 {
524 if (r_is_one)
525 {
526 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
527 r_is_one = 0;
528 }
529 else
530 {
531 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
532 }
533 next_w = 1;
534 }
535 w = next_w;
536 if (!r_is_one)
537 {
538 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
539 }
540
541 /* Second, multiply r*w by 'a' if exponent bit is set. */
542 if (BN_is_bit_set(p, b))
543 {
544 next_w = w*a;
545 if ((next_w/a) != w) /* overflow */
546 {
547 if (r_is_one)
548 {
549 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
550 r_is_one = 0;
551 }
552 else
553 {
554 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
555 }
556 next_w = a;
557 }
558 w = next_w;
559 }
560 }
561
562 /* Finally, set r:=r*w. */
563 if (w != 1)
564 {
565 if (r_is_one)
566 {
567 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
568 r_is_one = 0;
569 }
570 else
571 {
572 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
573 }
574 }
575
576 if (r_is_one) /* can happen only if a == 1*/
577 {
578 if (!BN_one(rr)) goto err;
579 }
580 else
581 {
582 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
583 }
584 ret = 1;
585err:
586 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
587 BN_CTX_end(ctx);
588 return(ret);
589 }
590
639 591
640/* The old fallback, simple version :-) */ 592/* The old fallback, simple version :-) */
641int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, 593int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
@@ -660,26 +612,21 @@ int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
660 BN_init(&(val[0])); 612 BN_init(&(val[0]));
661 ts=1; 613 ts=1;
662 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ 614 if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */
663 if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
664 goto err; /* 2 */
665
666 if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
667 window=1;
668 else if (bits >= 256)
669 window=5; /* max size of window */
670 else if (bits >= 128)
671 window=4;
672 else
673 window=3;
674 615
675 j=1<<(window-1); 616 window = BN_window_bits_for_exponent_size(bits);
676 for (i=1; i<j; i++) 617 if (window > 1)
677 { 618 {
678 BN_init(&(val[i])); 619 if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
679 if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx)) 620 goto err; /* 2 */
680 goto err; 621 j=1<<(window-1);
622 for (i=1; i<j; i++)
623 {
624 BN_init(&(val[i]));
625 if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
626 goto err;
627 }
628 ts=i;
681 } 629 }
682 ts=i;
683 630
684 start=1; /* This is used to avoid multiplication etc 631 start=1; /* This is used to avoid multiplication etc
685 * when there is only the value '1' in the 632 * when there is only the value '1' in the
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
index 4f4e9e3299..29029f4c72 100644
--- a/src/lib/libcrypto/bn/bn_exp2.c
+++ b/src/lib/libcrypto/bn/bn_exp2.c
@@ -1,27 +1,128 @@
1/* crypto/bn/bn_exp2.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
1#include <stdio.h> 112#include <stdio.h>
2#include "cryptlib.h" 113#include "cryptlib.h"
3#include "bn_lcl.h" 114#include "bn_lcl.h"
4 115
5/* I've done some timing with different table sizes. 116#define TABLE_SIZE 32
6 * The main hassle is that even with bits set at 3, this requires
7 * 63 BIGNUMs to store the pre-calculated values.
8 * 512 1024
9 * bits=1 75.4% 79.4%
10 * bits=2 61.2% 62.4%
11 * bits=3 61.3% 59.3%
12 * The lack of speed improvement is also a function of the pre-calculation
13 * which could be removed.
14 */
15#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */
16#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */
17 117
18int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, 118int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
19 BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 119 BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
20 { 120 {
21 int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue; 121 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
22 int start=1,ts=0,x,y; 122 int r_is_one=1,ts1=0,ts2=0;
23 BIGNUM *d,*aa1,*aa2,*r; 123 BIGNUM *d,*r;
24 BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE]; 124 BIGNUM *a_mod_m;
125 BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE];
25 BN_MONT_CTX *mont=NULL; 126 BN_MONT_CTX *mont=NULL;
26 127
27 bn_check_top(a1); 128 bn_check_top(a1);
@@ -32,7 +133,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
32 133
33 if (!(m->d[0] & 1)) 134 if (!(m->d[0] & 1))
34 { 135 {
35 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); 136 BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
36 return(0); 137 return(0);
37 } 138 }
38 bits1=BN_num_bits(p1); 139 bits1=BN_num_bits(p1);
@@ -42,17 +143,13 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
42 BN_one(rr); 143 BN_one(rr);
43 return(1); 144 return(1);
44 } 145 }
146 bits=(bits1 > bits2)?bits1:bits2;
45 147
46 BN_CTX_start(ctx); 148 BN_CTX_start(ctx);
47 d = BN_CTX_get(ctx); 149 d = BN_CTX_get(ctx);
48 r = BN_CTX_get(ctx); 150 r = BN_CTX_get(ctx);
49 if (d == NULL || r == NULL) goto err; 151 if (d == NULL || r == NULL) goto err;
50 152
51 bits=(bits1 > bits2)?bits1:bits2;
52
53 /* If this is not done, things will break in the montgomery
54 * part */
55
56 if (in_mont != NULL) 153 if (in_mont != NULL)
57 mont=in_mont; 154 mont=in_mont;
58 else 155 else
@@ -61,139 +158,143 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2,
61 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 158 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
62 } 159 }
63 160
64 BN_init(&(val[0][0])); 161 window1 = BN_window_bits_for_exponent_size(bits1);
65 BN_init(&(val[1][1])); 162 window2 = BN_window_bits_for_exponent_size(bits2);
66 BN_init(&(val[0][1])); 163
67 BN_init(&(val[1][0])); 164 /*
68 ts=1; 165 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
166 */
167 BN_init(&val1[0]);
168 ts1=1;
69 if (BN_ucmp(a1,m) >= 0) 169 if (BN_ucmp(a1,m) >= 0)
70 { 170 {
71 BN_mod(&(val[1][0]),a1,m,ctx); 171 if (!BN_mod(&(val1[0]),a1,m,ctx))
72 aa1= &(val[1][0]); 172 goto err;
173 a_mod_m = &(val1[0]);
73 } 174 }
74 else 175 else
75 aa1=a1; 176 a_mod_m = a1;
177 if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err;
178 if (window1 > 1)
179 {
180 if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err;
181
182 j=1<<(window1-1);
183 for (i=1; i<j; i++)
184 {
185 BN_init(&(val1[i]));
186 if (!BN_mod_mul_montgomery(&(val1[i]),&(val1[i-1]),d,mont,ctx))
187 goto err;
188 }
189 ts1=i;
190 }
191
192
193 /*
194 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
195 */
196 BN_init(&val2[0]);
197 ts2=1;
76 if (BN_ucmp(a2,m) >= 0) 198 if (BN_ucmp(a2,m) >= 0)
77 { 199 {
78 BN_mod(&(val[0][1]),a2,m,ctx); 200 if (!BN_mod(&(val2[0]),a2,m,ctx))
79 aa2= &(val[0][1]); 201 goto err;
202 a_mod_m = &(val2[0]);
80 } 203 }
81 else 204 else
82 aa2=a2; 205 a_mod_m = a2;
83 if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err; 206 if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err;
84 if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err; 207 if (window2 > 1)
85 if (!BN_mod_mul_montgomery(&(val[1][1]),
86 &(val[1][0]),&(val[0][1]),mont,ctx))
87 goto err;
88
89#if 0
90 if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
91 window=1;
92 else if (bits > 250)
93 window=5; /* max size of window */
94 else if (bits >= 120)
95 window=4;
96 else
97 window=3;
98#else
99 window=EXP2_TABLE_BITS;
100#endif
101
102 k=1<<window;
103 for (x=0; x<k; x++)
104 { 208 {
105 if (x >= 2) 209 if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err;
106 { 210
107 BN_init(&(val[x][0])); 211 j=1<<(window2-1);
108 BN_init(&(val[x][1])); 212 for (i=1; i<j; i++)
109 if (!BN_mod_mul_montgomery(&(val[x][0]),
110 &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err;
111 if (!BN_mod_mul_montgomery(&(val[x][1]),
112 &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err;
113 }
114 for (y=2; y<k; y++)
115 { 213 {
116 BN_init(&(val[x][y])); 214 BN_init(&(val2[i]));
117 if (!BN_mod_mul_montgomery(&(val[x][y]), 215 if (!BN_mod_mul_montgomery(&(val2[i]),&(val2[i-1]),d,mont,ctx))
118 &(val[x][y-1]),&(val[0][1]),mont,ctx))
119 goto err; 216 goto err;
120 } 217 }
218 ts2=i;
121 } 219 }
122 ts=k; 220
123 221
124 start=1; /* This is used to avoid multiplication etc 222 /* Now compute the power product, using independent windows. */
125 * when there is only the value '1' in the 223 r_is_one=1;
126 * buffer. */ 224 wvalue1=0; /* The 'value' of the first window */
127 xvalue=0; /* The 'x value' of the window */ 225 wvalue2=0; /* The 'value' of the second window */
128 yvalue=0; /* The 'y value' of the window */ 226 wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */
129 wstart=bits-1; /* The top bit of the window */ 227 wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */
130 wend=0; /* The bottom bit of the window */ 228
131 229 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
132 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; 230 for (b=bits-1; b>=0; b--)
133 for (;;)
134 { 231 {
135 xvalue=BN_is_bit_set(p1,wstart); 232 if (!r_is_one)
136 yvalue=BN_is_bit_set(p2,wstart);
137 if (!(xvalue || yvalue))
138 { 233 {
139 if (!start) 234 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
235 goto err;
236 }
237
238 if (!wvalue1)
239 if (BN_is_bit_set(p1, b))
140 { 240 {
141 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) 241 /* consider bits b-window1+1 .. b for this window */
142 goto err; 242 i = b-window1+1;
243 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
244 i++;
245 wpos1 = i;
246 wvalue1 = 1;
247 for (i = b-1; i >= wpos1; i--)
248 {
249 wvalue1 <<= 1;
250 if (BN_is_bit_set(p1, i))
251 wvalue1++;
252 }
143 } 253 }
144 wstart--; 254
145 if (wstart < 0) break; 255 if (!wvalue2)
146 continue; 256 if (BN_is_bit_set(p2, b))
147 }
148 /* We now have wstart on a 'set' bit, we now need to work out
149 * how bit a window to do. To do this we need to scan
150 * forward until the last set bit before the end of the
151 * window */
152 j=wstart;
153 /* xvalue=BN_is_bit_set(p1,wstart); already set */
154 /* yvalue=BN_is_bit_set(p1,wstart); already set */
155 wend=0;
156 for (i=1; i<window; i++)
157 {
158 if (wstart-i < 0) break;
159 xvalue+=xvalue;
160 xvalue|=BN_is_bit_set(p1,wstart-i);
161 yvalue+=yvalue;
162 yvalue|=BN_is_bit_set(p2,wstart-i);
163 }
164
165 /* i is the size of the current window */
166 /* add the 'bytes above' */
167 if (!start)
168 for (j=0; j<i; j++)
169 { 257 {
170 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) 258 /* consider bits b-window2+1 .. b for this window */
171 goto err; 259 i = b-window2+1;
260 while (!BN_is_bit_set(p2, i))
261 i++;
262 wpos2 = i;
263 wvalue2 = 1;
264 for (i = b-1; i >= wpos2; i--)
265 {
266 wvalue2 <<= 1;
267 if (BN_is_bit_set(p2, i))
268 wvalue2++;
269 }
172 } 270 }
271
272 if (wvalue1 && b == wpos1)
273 {
274 /* wvalue1 is odd and < 2^window1 */
275 if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx))
276 goto err;
277 wvalue1 = 0;
278 r_is_one = 0;
279 }
173 280
174 /* wvalue will be an odd number < 2^window */ 281 if (wvalue2 && b == wpos2)
175 if (xvalue || yvalue)
176 { 282 {
177 if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]), 283 /* wvalue2 is odd and < 2^window2 */
178 mont,ctx)) goto err; 284 if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx))
285 goto err;
286 wvalue2 = 0;
287 r_is_one = 0;
179 } 288 }
180
181 /* move the 'window' down further */
182 wstart-=i;
183 start=0;
184 if (wstart < 0) break;
185 } 289 }
186 BN_from_montgomery(rr,r,mont,ctx); 290 BN_from_montgomery(rr,r,mont,ctx);
187 ret=1; 291 ret=1;
188err: 292err:
189 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 293 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
190 BN_CTX_end(ctx); 294 BN_CTX_end(ctx);
191 for (i=0; i<ts; i++) 295 for (i=0; i<ts1; i++)
192 { 296 BN_clear_free(&(val1[i]));
193 for (j=0; j<ts; j++) 297 for (i=0; i<ts2; i++)
194 { 298 BN_clear_free(&(val2[i]));
195 BN_clear_free(&(val[i][j]));
196 }
197 }
198 return(ret); 299 return(ret);
199 } 300 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
index e36ccbc4c2..9c959921b4 100644
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ b/src/lib/libcrypto/bn/bn_lcl.h
@@ -55,6 +55,59 @@
55 * copied and put under another distribution licence 55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
58 111
59#ifndef HEADER_BN_LCL_H 112#ifndef HEADER_BN_LCL_H
60#define HEADER_BN_LCL_H 113#define HEADER_BN_LCL_H
@@ -65,6 +118,51 @@
65extern "C" { 118extern "C" {
66#endif 119#endif
67 120
121
122/*
123 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
124 *
125 *
126 * For window size 'w' (w >= 2) and a random 'b' bits exponent,
127 * the number of multiplications is a constant plus on average
128 *
129 * 2^(w-1) + (b-w)/(w+1);
130 *
131 * here 2^(w-1) is for precomputing the table (we actually need
132 * entries only for windows that have the lowest bit set), and
133 * (b-w)/(w+1) is an approximation for the expected number of
134 * w-bit windows, not counting the first one.
135 *
136 * Thus we should use
137 *
138 * w >= 6 if b > 671
139 * w = 5 if 671 > b > 239
140 * w = 4 if 239 > b > 79
141 * w = 3 if 79 > b > 23
142 * w <= 2 if 23 > b
143 *
144 * (with draws in between). Very small exponents are often selected
145 * with low Hamming weight, so we use w = 1 for b <= 23.
146 */
147#if 1
148#define BN_window_bits_for_exponent_size(b) \
149 ((b) > 671 ? 6 : \
150 (b) > 239 ? 5 : \
151 (b) > 79 ? 4 : \
152 (b) > 23 ? 3 : 1)
153#else
154/* Old SSLeay/OpenSSL table.
155 * Maximum window size was 5, so this table differs for b==1024;
156 * but it coincides for other interesting values (b==160, b==512).
157 */
158#define BN_window_bits_for_exponent_size(b) \
159 ((b) > 255 ? 5 : \
160 (b) > 127 ? 4 : \
161 (b) > 17 ? 3 : 1)
162#endif
163
164
165
68/* Pentium pro 16,16,16,32,64 */ 166/* Pentium pro 16,16,16,32,64 */
69/* Alpha 16,16,16,16.64 */ 167/* Alpha 16,16,16,16.64 */
70#define BN_MULL_SIZE_NORMAL (16) /* 32 */ 168#define BN_MULL_SIZE_NORMAL (16) /* 32 */
@@ -130,7 +228,7 @@ extern "C" {
130/* This is used for internal error checking and is not normally used */ 228/* This is used for internal error checking and is not normally used */
131#ifdef BN_DEBUG 229#ifdef BN_DEBUG
132# include <assert.h> 230# include <assert.h>
133# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->max); 231# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax);
134#else 232#else
135# define bn_check_top(a) 233# define bn_check_top(a)
136#endif 234#endif
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
index 0e6b12d9c3..b6b0ce4b3c 100644
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -56,6 +56,12 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58 58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
59#include <stdio.h> 65#include <stdio.h>
60#include "cryptlib.h" 66#include "cryptlib.h"
61#include "bn_lcl.h" 67#include "bn_lcl.h"
@@ -244,14 +250,8 @@ int BN_num_bits(const BIGNUM *a)
244 250
245 if (a->top == 0) return(0); 251 if (a->top == 0) return(0);
246 l=a->d[a->top-1]; 252 l=a->d[a->top-1];
253 assert(l != 0);
247 i=(a->top-1)*BN_BITS2; 254 i=(a->top-1)*BN_BITS2;
248 if (l == 0)
249 {
250#if !defined(NO_STDIO) && !defined(WIN16)
251 fprintf(stderr,"BAD TOP VALUE\n");
252#endif
253 abort();
254 }
255 return(i+BN_num_bits_word(l)); 255 return(i+BN_num_bits_word(l));
256 } 256 }
257 257
@@ -262,24 +262,24 @@ void BN_clear_free(BIGNUM *a)
262 if (a == NULL) return; 262 if (a == NULL) return;
263 if (a->d != NULL) 263 if (a->d != NULL)
264 { 264 {
265 memset(a->d,0,a->max*sizeof(a->d[0])); 265 memset(a->d,0,a->dmax*sizeof(a->d[0]));
266 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) 266 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
267 Free(a->d); 267 OPENSSL_free(a->d);
268 } 268 }
269 i=BN_get_flags(a,BN_FLG_MALLOCED); 269 i=BN_get_flags(a,BN_FLG_MALLOCED);
270 memset(a,0,sizeof(BIGNUM)); 270 memset(a,0,sizeof(BIGNUM));
271 if (i) 271 if (i)
272 Free(a); 272 OPENSSL_free(a);
273 } 273 }
274 274
275void BN_free(BIGNUM *a) 275void BN_free(BIGNUM *a)
276 { 276 {
277 if (a == NULL) return; 277 if (a == NULL) return;
278 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) 278 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
279 Free(a->d); 279 OPENSSL_free(a->d);
280 a->flags|=BN_FLG_FREE; /* REMOVE? */ 280 a->flags|=BN_FLG_FREE; /* REMOVE? */
281 if (a->flags & BN_FLG_MALLOCED) 281 if (a->flags & BN_FLG_MALLOCED)
282 Free(a); 282 OPENSSL_free(a);
283 } 283 }
284 284
285void BN_init(BIGNUM *a) 285void BN_init(BIGNUM *a)
@@ -291,7 +291,7 @@ BIGNUM *BN_new(void)
291 { 291 {
292 BIGNUM *ret; 292 BIGNUM *ret;
293 293
294 if ((ret=(BIGNUM *)Malloc(sizeof(BIGNUM))) == NULL) 294 if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL)
295 { 295 {
296 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); 296 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
297 return(NULL); 297 return(NULL);
@@ -299,7 +299,7 @@ BIGNUM *BN_new(void)
299 ret->flags=BN_FLG_MALLOCED; 299 ret->flags=BN_FLG_MALLOCED;
300 ret->top=0; 300 ret->top=0;
301 ret->neg=0; 301 ret->neg=0;
302 ret->max=0; 302 ret->dmax=0;
303 ret->d=NULL; 303 ret->d=NULL;
304 return(ret); 304 return(ret);
305 } 305 }
@@ -317,7 +317,7 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
317 317
318 bn_check_top(b); 318 bn_check_top(b);
319 319
320 if (words > b->max) 320 if (words > b->dmax)
321 { 321 {
322 bn_check_top(b); 322 bn_check_top(b);
323 if (BN_get_flags(b,BN_FLG_STATIC_DATA)) 323 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
@@ -325,7 +325,7 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
325 BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); 325 BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
326 return(NULL); 326 return(NULL);
327 } 327 }
328 a=A=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(words+1)); 328 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
329 if (A == NULL) 329 if (A == NULL)
330 { 330 {
331 BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); 331 BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
@@ -423,21 +423,21 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
423 case 0: ; /* ultrix cc workaround, see above */ 423 case 0: ; /* ultrix cc workaround, see above */
424 } 424 }
425#endif 425#endif
426 Free(b->d); 426 OPENSSL_free(b->d);
427 } 427 }
428 428
429 b->d=a; 429 b->d=a;
430 b->max=words; 430 b->dmax=words;
431 431
432 /* Now need to zero any data between b->top and b->max */ 432 /* Now need to zero any data between b->top and b->max */
433 433
434 A= &(b->d[b->top]); 434 A= &(b->d[b->top]);
435 for (i=(b->max - b->top)>>3; i>0; i--,A+=8) 435 for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8)
436 { 436 {
437 A[0]=0; A[1]=0; A[2]=0; A[3]=0; 437 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
438 A[4]=0; A[5]=0; A[6]=0; A[7]=0; 438 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
439 } 439 }
440 for (i=(b->max - b->top)&7; i>0; i--,A++) 440 for (i=(b->dmax - b->top)&7; i>0; i--,A++)
441 A[0]=0; 441 A[0]=0;
442#else 442#else
443 memset(A,0,sizeof(BN_ULONG)*(words+1)); 443 memset(A,0,sizeof(BN_ULONG)*(words+1));
@@ -508,7 +508,7 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
508void BN_clear(BIGNUM *a) 508void BN_clear(BIGNUM *a)
509 { 509 {
510 if (a->d != NULL) 510 if (a->d != NULL)
511 memset(a->d,0,a->max*sizeof(a->d[0])); 511 memset(a->d,0,a->dmax*sizeof(a->d[0]));
512 a->top=0; 512 a->top=0;
513 a->neg=0; 513 a->neg=0;
514 } 514 }
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index 598fecbf0c..8cf1febacc 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -85,16 +85,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b,
85 85
86 if (a == b) 86 if (a == b)
87 { 87 {
88#if 0
89 bn_wexpand(tmp,a->top*2);
90 bn_wexpand(tmp2,a->top*4);
91 bn_sqr_recursive(tmp->d,a->d,a->top,tmp2->d);
92 tmp->top=a->top*2;
93 if (tmp->d[tmp->top-1] == 0)
94 tmp->top--;
95#else
96 if (!BN_sqr(tmp,a,ctx)) goto err; 88 if (!BN_sqr(tmp,a,ctx)) goto err;
97#endif
98 } 89 }
99 else 90 else
100 { 91 {
@@ -157,7 +148,22 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont,
157#endif 148#endif
158 for (i=0; i<nl; i++) 149 for (i=0; i<nl; i++)
159 { 150 {
151#ifdef __TANDEM
152 {
153 long long t1;
154 long long t2;
155 long long t3;
156 t1 = rp[0] * (n0 & 0177777);
157 t2 = 037777600000l;
158 t2 = n0 & t2;
159 t3 = rp[0] & 0177777;
160 t2 = (t3 * t2) & BN_MASK2;
161 t1 = t1 + t2;
162 v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
163 }
164#else
160 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); 165 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
166#endif
161 nrp++; 167 nrp++;
162 rp++; 168 rp++;
163 if (((nrp[-1]+=v)&BN_MASK2) >= v) 169 if (((nrp[-1]+=v)&BN_MASK2) >= v)
@@ -175,6 +181,7 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont,
175#if 0 181#if 0
176 BN_rshift(ret,r,mont->ri); 182 BN_rshift(ret,r,mont->ri);
177#else 183#else
184 ret->neg = r->neg;
178 x=ri; 185 x=ri;
179 rp=ret->d; 186 rp=ret->d;
180 ap= &(r->d[x]); 187 ap= &(r->d[x]);
@@ -234,7 +241,7 @@ BN_MONT_CTX *BN_MONT_CTX_new(void)
234 { 241 {
235 BN_MONT_CTX *ret; 242 BN_MONT_CTX *ret;
236 243
237 if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL) 244 if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
238 return(NULL); 245 return(NULL);
239 246
240 BN_MONT_CTX_init(ret); 247 BN_MONT_CTX_init(ret);
@@ -260,7 +267,7 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont)
260 BN_free(&(mont->N)); 267 BN_free(&(mont->N));
261 BN_free(&(mont->Ni)); 268 BN_free(&(mont->Ni));
262 if (mont->flags & BN_FLG_MALLOCED) 269 if (mont->flags & BN_FLG_MALLOCED)
263 Free(mont); 270 OPENSSL_free(mont);
264 } 271 }
265 272
266int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) 273int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
@@ -284,7 +291,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
284 buf[1]=0; 291 buf[1]=0;
285 tmod.d=buf; 292 tmod.d=buf;
286 tmod.top=1; 293 tmod.top=1;
287 tmod.max=2; 294 tmod.dmax=2;
288 tmod.neg=mod->neg; 295 tmod.neg=mod->neg;
289 /* Ri = R^-1 mod N*/ 296 /* Ri = R^-1 mod N*/
290 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) 297 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index 3e8baaad9a..3e8d8b9567 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -631,7 +631,6 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
631 631
632 al=a->top; 632 al=a->top;
633 bl=b->top; 633 bl=b->top;
634 r->neg=a->neg^b->neg;
635 634
636 if ((al == 0) || (bl == 0)) 635 if ((al == 0) || (bl == 0))
637 { 636 {
@@ -647,6 +646,7 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
647 } 646 }
648 else 647 else
649 rr = r; 648 rr = r;
649 rr->neg=a->neg^b->neg;
650 650
651#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) 651#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
652 i = al-bl; 652 i = al-bl;
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
index 782a96e7e0..532e66bcc3 100644
--- a/src/lib/libcrypto/bn/bn_print.c
+++ b/src/lib/libcrypto/bn/bn_print.c
@@ -64,14 +64,14 @@
64 64
65static const char *Hex="0123456789ABCDEF"; 65static const char *Hex="0123456789ABCDEF";
66 66
67/* Must 'Free' the returned data */ 67/* Must 'OPENSSL_free' the returned data */
68char *BN_bn2hex(const BIGNUM *a) 68char *BN_bn2hex(const BIGNUM *a)
69 { 69 {
70 int i,j,v,z=0; 70 int i,j,v,z=0;
71 char *buf; 71 char *buf;
72 char *p; 72 char *p;
73 73
74 buf=(char *)Malloc(a->top*BN_BYTES*2+2); 74 buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2);
75 if (buf == NULL) 75 if (buf == NULL)
76 { 76 {
77 BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE); 77 BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE);
@@ -99,7 +99,7 @@ err:
99 return(buf); 99 return(buf);
100 } 100 }
101 101
102/* Must 'Free' the returned data */ 102/* Must 'OPENSSL_free' the returned data */
103char *BN_bn2dec(const BIGNUM *a) 103char *BN_bn2dec(const BIGNUM *a)
104 { 104 {
105 int i=0,num; 105 int i=0,num;
@@ -110,8 +110,8 @@ char *BN_bn2dec(const BIGNUM *a)
110 110
111 i=BN_num_bits(a)*3; 111 i=BN_num_bits(a)*3;
112 num=(i/10+i/1000+3)+1; 112 num=(i/10+i/1000+3)+1;
113 bn_data=(BN_ULONG *)Malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); 113 bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
114 buf=(char *)Malloc(num+3); 114 buf=(char *)OPENSSL_malloc(num+3);
115 if ((buf == NULL) || (bn_data == NULL)) 115 if ((buf == NULL) || (bn_data == NULL))
116 { 116 {
117 BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE); 117 BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE);
@@ -149,7 +149,7 @@ char *BN_bn2dec(const BIGNUM *a)
149 } 149 }
150 } 150 }
151err: 151err:
152 if (bn_data != NULL) Free(bn_data); 152 if (bn_data != NULL) OPENSSL_free(bn_data);
153 if (t != NULL) BN_free(t); 153 if (t != NULL) BN_free(t);
154 return(buf); 154 return(buf);
155 } 155 }
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
index 943712c15b..21ecbc04ed 100644
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ b/src/lib/libcrypto/bn/bn_rand.c
@@ -68,11 +68,17 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
68 int ret=0,bit,bytes,mask; 68 int ret=0,bit,bytes,mask;
69 time_t tim; 69 time_t tim;
70 70
71 if (bits == 0)
72 {
73 BN_zero(rnd);
74 return 1;
75 }
76
71 bytes=(bits+7)/8; 77 bytes=(bits+7)/8;
72 bit=(bits-1)%8; 78 bit=(bits-1)%8;
73 mask=0xff<<bit; 79 mask=0xff<<bit;
74 80
75 buf=(unsigned char *)Malloc(bytes); 81 buf=(unsigned char *)OPENSSL_malloc(bytes);
76 if (buf == NULL) 82 if (buf == NULL)
77 { 83 {
78 BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE); 84 BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE);
@@ -120,7 +126,7 @@ err:
120 if (buf != NULL) 126 if (buf != NULL)
121 { 127 {
122 memset(buf,0,bytes); 128 memset(buf,0,bytes);
123 Free(buf); 129 OPENSSL_free(buf);
124 } 130 }
125 return(ret); 131 return(ret);
126 } 132 }
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
index a8796bd0aa..d019941d6b 100644
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ b/src/lib/libcrypto/bn/bn_recp.c
@@ -72,7 +72,7 @@ BN_RECP_CTX *BN_RECP_CTX_new(void)
72 { 72 {
73 BN_RECP_CTX *ret; 73 BN_RECP_CTX *ret;
74 74
75 if ((ret=(BN_RECP_CTX *)Malloc(sizeof(BN_RECP_CTX))) == NULL) 75 if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
76 return(NULL); 76 return(NULL);
77 77
78 BN_RECP_CTX_init(ret); 78 BN_RECP_CTX_init(ret);
@@ -88,7 +88,7 @@ void BN_RECP_CTX_free(BN_RECP_CTX *recp)
88 BN_free(&(recp->N)); 88 BN_free(&(recp->N));
89 BN_free(&(recp->Nr)); 89 BN_free(&(recp->Nr));
90 if (recp->flags & BN_FLG_MALLOCED) 90 if (recp->flags & BN_FLG_MALLOCED)
91 Free(recp); 91 OPENSSL_free(recp);
92 } 92 }
93 93
94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) 94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
index 61aae65a6b..0883247384 100644
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ b/src/lib/libcrypto/bn/bn_shift.c
@@ -162,7 +162,7 @@ int BN_rshift(BIGNUM *r, BIGNUM *a, int n)
162 nw=n/BN_BITS2; 162 nw=n/BN_BITS2;
163 rb=n%BN_BITS2; 163 rb=n%BN_BITS2;
164 lb=BN_BITS2-rb; 164 lb=BN_BITS2-rb;
165 if (nw > a->top) 165 if (nw > a->top || a->top == 0)
166 { 166 {
167 BN_zero(r); 167 BN_zero(r);
168 return(1); 168 return(1);
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
index fe00c5f69a..75f4f38392 100644
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -188,7 +188,7 @@ void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp)
188 188
189#ifdef BN_RECURSION 189#ifdef BN_RECURSION
190/* r is 2*n words in size, 190/* r is 2*n words in size,
191 * a and b are both n words in size. 191 * a and b are both n words in size. (There's not actually a 'b' here ...)
192 * n must be a power of 2. 192 * n must be a power of 2.
193 * We multiply and return the result. 193 * We multiply and return the result.
194 * t must be 2*n words in size 194 * t must be 2*n words in size
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
index 73157a7d43..cd59baa2c4 100644
--- a/src/lib/libcrypto/bn/bn_word.c
+++ b/src/lib/libcrypto/bn/bn_word.c
@@ -115,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w)
115 a->neg=0; 115 a->neg=0;
116 i=BN_sub_word(a,w); 116 i=BN_sub_word(a,w);
117 if (!BN_is_zero(a)) 117 if (!BN_is_zero(a))
118 a->neg=1; 118 a->neg=!(a->neg);
119 return(i); 119 return(i);
120 } 120 }
121 w&=BN_MASK2; 121 w&=BN_MASK2;
@@ -140,7 +140,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w)
140 { 140 {
141 int i; 141 int i;
142 142
143 if (a->neg) 143 if (BN_is_zero(a) || a->neg)
144 { 144 {
145 a->neg=0; 145 a->neg=0;
146 i=BN_add_word(a,w); 146 i=BN_add_word(a,w);
@@ -182,11 +182,16 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w)
182 w&=BN_MASK2; 182 w&=BN_MASK2;
183 if (a->top) 183 if (a->top)
184 { 184 {
185 ll=bn_mul_words(a->d,a->d,a->top,w); 185 if (w == 0)
186 if (ll) 186 BN_zero(a);
187 else
187 { 188 {
188 if (bn_wexpand(a,a->top+1) == NULL) return(0); 189 ll=bn_mul_words(a->d,a->d,a->top,w);
189 a->d[a->top++]=ll; 190 if (ll)
191 {
192 if (bn_wexpand(a,a->top+1) == NULL) return(0);
193 a->d[a->top++]=ll;
194 }
190 } 195 }
191 } 196 }
192 return(1); 197 return(1);
diff --git a/src/lib/libcrypto/bn/vms-helper.c b/src/lib/libcrypto/bn/vms-helper.c
index 73af337069..0fa79c4edb 100644
--- a/src/lib/libcrypto/bn/vms-helper.c
+++ b/src/lib/libcrypto/bn/vms-helper.c
@@ -59,8 +59,10 @@
59 59
60bn_div_words_abort(int i) 60bn_div_words_abort(int i)
61{ 61{
62#ifdef BN_DEBUG
62#if !defined(NO_STDIO) && !defined(WIN16) 63#if !defined(NO_STDIO) && !defined(WIN16)
63 fprintf(stderr,"Division would overflow (%d)\n",i); 64 fprintf(stderr,"Division would overflow (%d)\n",i);
64#endif 65#endif
65 abort(); 66 abort();
67#endif
66} 68}