diff options
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r-- | src/lib/libcrypto/bn/Makefile.ssl | 135 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/README | 12 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/pa-risc2.s | 2024 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/pa-risc2W.s | 1605 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn.h | 12 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 11 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_blind.c | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_ctx.c | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_div.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_err.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_exp.c | 521 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_exp2.c | 357 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_lcl.h | 100 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_lib.c | 42 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_mont.c | 31 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_print.c | 12 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_rand.c | 10 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_recp.c | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_shift.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_sqr.c | 2 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_word.c | 17 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/vms-helper.c | 2 |
23 files changed, 3956 insertions, 957 deletions
diff --git a/src/lib/libcrypto/bn/Makefile.ssl b/src/lib/libcrypto/bn/Makefile.ssl index beb9c1b523..17b72d577f 100644 --- a/src/lib/libcrypto/bn/Makefile.ssl +++ b/src/lib/libcrypto/bn/Makefile.ssl | |||
@@ -170,118 +170,143 @@ clean: | |||
170 | bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 170 | bn_add.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
171 | bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 171 | bn_add.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
172 | bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 172 | bn_add.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
173 | bn_add.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 173 | bn_add.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
174 | bn_add.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 174 | bn_add.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
175 | bn_add.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 175 | bn_add.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
176 | bn_add.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
176 | bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 177 | bn_asm.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
177 | bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 178 | bn_asm.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
178 | bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 179 | bn_asm.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
179 | bn_asm.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 180 | bn_asm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
180 | bn_asm.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 181 | bn_asm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
181 | bn_asm.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 182 | bn_asm.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
183 | bn_asm.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
182 | bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 184 | bn_blind.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
183 | bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 185 | bn_blind.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
184 | bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 186 | bn_blind.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
185 | bn_blind.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 187 | bn_blind.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
188 | bn_blind.o: ../../include/openssl/opensslconf.h | ||
186 | bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 189 | bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h |
187 | bn_blind.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 190 | bn_blind.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h |
191 | bn_blind.o: ../cryptlib.h bn_lcl.h | ||
188 | bn_ctx.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 192 | bn_ctx.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
189 | bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 193 | bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
190 | bn_ctx.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 194 | bn_ctx.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
191 | bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 195 | bn_ctx.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
192 | bn_ctx.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 196 | bn_ctx.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
193 | bn_ctx.o: ../../include/openssl/stack.h ../cryptlib.h | 197 | bn_ctx.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
198 | bn_ctx.o: ../../include/openssl/symhacks.h ../cryptlib.h | ||
194 | bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 199 | bn_div.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
195 | bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 200 | bn_div.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
196 | bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 201 | bn_div.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
197 | bn_div.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 202 | bn_div.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
198 | bn_div.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 203 | bn_div.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
199 | bn_div.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 204 | bn_div.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
200 | bn_err.o: ../../include/openssl/bn.h ../../include/openssl/err.h | 205 | bn_div.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h |
201 | bn_err.o: ../../include/openssl/opensslconf.h | 206 | bn_err.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
207 | bn_err.o: ../../include/openssl/crypto.h ../../include/openssl/err.h | ||
208 | bn_err.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h | ||
209 | bn_err.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | ||
210 | bn_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h | ||
202 | bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 211 | bn_exp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
203 | bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 212 | bn_exp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
204 | bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 213 | bn_exp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
205 | bn_exp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 214 | bn_exp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
206 | bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 215 | bn_exp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
207 | bn_exp.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 216 | bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
217 | bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
208 | bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 218 | bn_exp2.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
209 | bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 219 | bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
210 | bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 220 | bn_exp2.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
211 | bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 221 | bn_exp2.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
212 | bn_exp2.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 222 | bn_exp2.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
213 | bn_exp2.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 223 | bn_exp2.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
224 | bn_exp2.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
214 | bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 225 | bn_gcd.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
215 | bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 226 | bn_gcd.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
216 | bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 227 | bn_gcd.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
217 | bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 228 | bn_gcd.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
218 | bn_gcd.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 229 | bn_gcd.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
219 | bn_gcd.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 230 | bn_gcd.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
231 | bn_gcd.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
220 | bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 232 | bn_lib.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
221 | bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 233 | bn_lib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
222 | bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 234 | bn_lib.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
223 | bn_lib.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 235 | bn_lib.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
224 | bn_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 236 | bn_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
225 | bn_lib.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 237 | bn_lib.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
238 | bn_lib.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
226 | bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 239 | bn_mont.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
227 | bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 240 | bn_mont.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
228 | bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 241 | bn_mont.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
229 | bn_mont.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 242 | bn_mont.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
230 | bn_mont.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 243 | bn_mont.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
231 | bn_mont.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 244 | bn_mont.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
245 | bn_mont.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
232 | bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 246 | bn_mpi.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
233 | bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 247 | bn_mpi.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
234 | bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 248 | bn_mpi.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
235 | bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 249 | bn_mpi.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
236 | bn_mpi.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 250 | bn_mpi.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
237 | bn_mpi.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 251 | bn_mpi.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
252 | bn_mpi.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
238 | bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 253 | bn_mul.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
239 | bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 254 | bn_mul.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
240 | bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 255 | bn_mul.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
241 | bn_mul.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 256 | bn_mul.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
242 | bn_mul.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 257 | bn_mul.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
243 | bn_mul.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 258 | bn_mul.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
259 | bn_mul.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
244 | bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 260 | bn_prime.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
245 | bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 261 | bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
246 | bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 262 | bn_prime.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
247 | bn_prime.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 263 | bn_prime.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
264 | bn_prime.o: ../../include/openssl/opensslconf.h | ||
248 | bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h | 265 | bn_prime.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h |
249 | bn_prime.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | 266 | bn_prime.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
250 | bn_prime.o: ../cryptlib.h bn_lcl.h bn_prime.h | 267 | bn_prime.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_prime.h |
251 | bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 268 | bn_print.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
252 | bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 269 | bn_print.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
253 | bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 270 | bn_print.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
254 | bn_print.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 271 | bn_print.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
272 | bn_print.o: ../../include/openssl/opensslconf.h | ||
255 | bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 273 | bn_print.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h |
256 | bn_print.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 274 | bn_print.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h |
275 | bn_print.o: ../cryptlib.h bn_lcl.h | ||
257 | bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 276 | bn_rand.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
258 | bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 277 | bn_rand.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
259 | bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 278 | bn_rand.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
260 | bn_rand.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 279 | bn_rand.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
261 | bn_rand.o: ../../include/openssl/opensslv.h ../../include/openssl/rand.h | 280 | bn_rand.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
262 | bn_rand.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | 281 | bn_rand.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h |
282 | bn_rand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h | ||
263 | bn_rand.o: ../cryptlib.h bn_lcl.h | 283 | bn_rand.o: ../cryptlib.h bn_lcl.h |
264 | bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 284 | bn_recp.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
265 | bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 285 | bn_recp.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
266 | bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 286 | bn_recp.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
267 | bn_recp.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 287 | bn_recp.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
268 | bn_recp.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 288 | bn_recp.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
269 | bn_recp.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 289 | bn_recp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
290 | bn_recp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
270 | bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 291 | bn_shift.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
271 | bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 292 | bn_shift.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
272 | bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 293 | bn_shift.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
273 | bn_shift.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 294 | bn_shift.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
295 | bn_shift.o: ../../include/openssl/opensslconf.h | ||
274 | bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 296 | bn_shift.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h |
275 | bn_shift.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 297 | bn_shift.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h |
298 | bn_shift.o: ../cryptlib.h bn_lcl.h | ||
276 | bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 299 | bn_sqr.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
277 | bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 300 | bn_sqr.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
278 | bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 301 | bn_sqr.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
279 | bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 302 | bn_sqr.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
280 | bn_sqr.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 303 | bn_sqr.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
281 | bn_sqr.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 304 | bn_sqr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
305 | bn_sqr.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
282 | bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h | 306 | bn_word.o: ../../include/openssl/bio.h ../../include/openssl/bn.h |
283 | bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | 307 | bn_word.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
284 | bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h | 308 | bn_word.o: ../../include/openssl/e_os.h ../../include/openssl/e_os2.h |
285 | bn_word.o: ../../include/openssl/err.h ../../include/openssl/opensslconf.h | 309 | bn_word.o: ../../include/openssl/err.h ../../include/openssl/lhash.h |
286 | bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/safestack.h | 310 | bn_word.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
287 | bn_word.o: ../../include/openssl/stack.h ../cryptlib.h bn_lcl.h | 311 | bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
312 | bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h | ||
diff --git a/src/lib/libcrypto/bn/asm/README b/src/lib/libcrypto/bn/asm/README index 86bf64cfc2..a0fe58a677 100644 --- a/src/lib/libcrypto/bn/asm/README +++ b/src/lib/libcrypto/bn/asm/README | |||
@@ -15,9 +15,9 @@ On the 2 alpha C compilers I had access to, it was not possible to do | |||
15 | were 64 bits). So the hand assember gives access to the 128 bit result and | 15 | were 64 bits). So the hand assember gives access to the 128 bit result and |
16 | a 2 times speedup :-). | 16 | a 2 times speedup :-). |
17 | 17 | ||
18 | There are 2 versions of assember for the HP PA-RISC. | 18 | There are 3 versions of assember for the HP PA-RISC. |
19 | pa-risc.s is the origional one which works fine. | 19 | |
20 | pa-risc2.s is a new version that often generates warnings but if the | 20 | pa-risc.s is the origional one which works fine and generated using gcc :-) |
21 | tests pass, it gives performance that is over 2 times faster than | 21 | |
22 | pa-risc.s. | 22 | pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations |
23 | Both were generated using gcc :-) | 23 | by Chris Ruemmler from HP (with some help from the HP C compiler). |
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s index c2725996a4..7239aa2c76 100644 --- a/src/lib/libcrypto/bn/asm/pa-risc2.s +++ b/src/lib/libcrypto/bn/asm/pa-risc2.s | |||
@@ -1,416 +1,1618 @@ | |||
1 | .SPACE $PRIVATE$ | 1 | ; |
2 | .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 | 2 | ; PA-RISC 2.0 implementation of bn_asm code, based on the |
3 | .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 | 3 | ; 64-bit version of the code. This code is effectively the |
4 | .SPACE $TEXT$ | 4 | ; same as the 64-bit version except the register model is |
5 | .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 | 5 | ; slightly different given all values must be 32-bit between |
6 | .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY | 6 | ; function calls. Thus the 64-bit return values are returned |
7 | .IMPORT $global$,DATA | 7 | ; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit |
8 | .IMPORT $$dyncall,MILLICODE | 8 | ; |
9 | ; gcc_compiled.: | 9 | ; |
10 | .SPACE $TEXT$ | 10 | ; This code is approximately 2x faster than the C version |
11 | .SUBSPA $CODE$ | 11 | ; for RSA/DSA. |
12 | 12 | ; | |
13 | .align 4 | 13 | ; See http://devresource.hp.com/ for more details on the PA-RISC |
14 | .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR | 14 | ; architecture. Also see the book "PA-RISC 2.0 Architecture" |
15 | ; by Gerry Kane for information on the instruction set architecture. | ||
16 | ; | ||
17 | ; Code written by Chris Ruemmler (with some help from the HP C | ||
18 | ; compiler). | ||
19 | ; | ||
20 | ; The code compiles with HP's assembler | ||
21 | ; | ||
22 | |||
23 | .level 2.0N | ||
24 | .space $TEXT$ | ||
25 | .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY | ||
26 | |||
27 | ; | ||
28 | ; Global Register definitions used for the routines. | ||
29 | ; | ||
30 | ; Some information about HP's runtime architecture for 32-bits. | ||
31 | ; | ||
32 | ; "Caller save" means the calling function must save the register | ||
33 | ; if it wants the register to be preserved. | ||
34 | ; "Callee save" means if a function uses the register, it must save | ||
35 | ; the value before using it. | ||
36 | ; | ||
37 | ; For the floating point registers | ||
38 | ; | ||
39 | ; "caller save" registers: fr4-fr11, fr22-fr31 | ||
40 | ; "callee save" registers: fr12-fr21 | ||
41 | ; "special" registers: fr0-fr3 (status and exception registers) | ||
42 | ; | ||
43 | ; For the integer registers | ||
44 | ; value zero : r0 | ||
45 | ; "caller save" registers: r1,r19-r26 | ||
46 | ; "callee save" registers: r3-r18 | ||
47 | ; return register : r2 (rp) | ||
48 | ; return values ; r28,r29 (ret0,ret1) | ||
49 | ; Stack pointer ; r30 (sp) | ||
50 | ; millicode return ptr ; r31 (also a caller save register) | ||
51 | |||
52 | |||
53 | ; | ||
54 | ; Arguments to the routines | ||
55 | ; | ||
56 | r_ptr .reg %r26 | ||
57 | a_ptr .reg %r25 | ||
58 | b_ptr .reg %r24 | ||
59 | num .reg %r24 | ||
60 | n .reg %r23 | ||
61 | |||
62 | ; | ||
63 | ; Note that the "w" argument for bn_mul_add_words and bn_mul_words | ||
64 | ; is passed on the stack at a delta of -56 from the top of stack | ||
65 | ; as the routine is entered. | ||
66 | ; | ||
67 | |||
68 | ; | ||
69 | ; Globals used in some routines | ||
70 | ; | ||
71 | |||
72 | top_overflow .reg %r23 | ||
73 | high_mask .reg %r22 ; value 0xffffffff80000000L | ||
74 | |||
75 | |||
76 | ;------------------------------------------------------------------------------ | ||
77 | ; | ||
78 | ; bn_mul_add_words | ||
79 | ; | ||
80 | ;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, | ||
81 | ; int num, BN_ULONG w) | ||
82 | ; | ||
83 | ; arg0 = r_ptr | ||
84 | ; arg1 = a_ptr | ||
85 | ; arg3 = num | ||
86 | ; -56(sp) = w | ||
87 | ; | ||
88 | ; Local register definitions | ||
89 | ; | ||
90 | |||
91 | fm1 .reg %fr22 | ||
92 | fm .reg %fr23 | ||
93 | ht_temp .reg %fr24 | ||
94 | ht_temp_1 .reg %fr25 | ||
95 | lt_temp .reg %fr26 | ||
96 | lt_temp_1 .reg %fr27 | ||
97 | fm1_1 .reg %fr28 | ||
98 | fm_1 .reg %fr29 | ||
99 | |||
100 | fw_h .reg %fr7L | ||
101 | fw_l .reg %fr7R | ||
102 | fw .reg %fr7 | ||
103 | |||
104 | fht_0 .reg %fr8L | ||
105 | flt_0 .reg %fr8R | ||
106 | t_float_0 .reg %fr8 | ||
107 | |||
108 | fht_1 .reg %fr9L | ||
109 | flt_1 .reg %fr9R | ||
110 | t_float_1 .reg %fr9 | ||
111 | |||
112 | tmp_0 .reg %r31 | ||
113 | tmp_1 .reg %r21 | ||
114 | m_0 .reg %r20 | ||
115 | m_1 .reg %r19 | ||
116 | ht_0 .reg %r1 | ||
117 | ht_1 .reg %r3 | ||
118 | lt_0 .reg %r4 | ||
119 | lt_1 .reg %r5 | ||
120 | m1_0 .reg %r6 | ||
121 | m1_1 .reg %r7 | ||
122 | rp_val .reg %r8 | ||
123 | rp_val_1 .reg %r9 | ||
124 | |||
15 | bn_mul_add_words | 125 | bn_mul_add_words |
16 | .PROC | 126 | .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN |
17 | .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 | 127 | .proc |
18 | .ENTRY | 128 | .callinfo frame=128 |
19 | stw %r2,-20(0,%r30) | 129 | .entry |
20 | stwm %r4,64(0,%r30) | 130 | .align 64 |
21 | copy %r24,%r31 | 131 | |
22 | stw %r3,-60(0,%r30) | 132 | STD %r3,0(%sp) ; save r3 |
23 | ldi 0,%r20 | 133 | STD %r4,8(%sp) ; save r4 |
24 | ldo 12(%r26),%r2 | 134 | NOP ; Needed to make the loop 16-byte aligned |
25 | stw %r23,-16(0,%r30) | 135 | NOP ; needed to make the loop 16-byte aligned |
26 | copy %r25,%r3 | 136 | |
27 | ldo 12(%r3),%r1 | 137 | STD %r5,16(%sp) ; save r5 |
28 | fldws -16(0,%r30),%fr8L | 138 | NOP |
29 | L$0010 | 139 | STD %r6,24(%sp) ; save r6 |
30 | copy %r20,%r25 | 140 | STD %r7,32(%sp) ; save r7 |
31 | ldi 0,%r24 | 141 | |
32 | fldws 0(0,%r3),%fr9L | 142 | STD %r8,40(%sp) ; save r8 |
33 | ldw 0(0,%r26),%r19 | 143 | STD %r9,48(%sp) ; save r9 |
34 | xmpyu %fr8L,%fr9L,%fr9 | 144 | COPY %r0,%ret1 ; return 0 by default |
35 | fstds %fr9,-16(0,%r30) | 145 | DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 |
36 | copy %r19,%r23 | 146 | |
37 | ldw -16(0,%r30),%r28 | 147 | CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit |
38 | ldw -12(0,%r30),%r29 | 148 | LDO 128(%sp),%sp ; bump stack |
39 | ldi 0,%r22 | 149 | |
40 | add %r23,%r29,%r29 | 150 | ; |
41 | addc %r22,%r28,%r28 | 151 | ; The loop is unrolled twice, so if there is only 1 number |
42 | add %r25,%r29,%r29 | 152 | ; then go straight to the cleanup code. |
43 | addc %r24,%r28,%r28 | 153 | ; |
44 | copy %r28,%r21 | 154 | CMPIB,= 1,num,bn_mul_add_words_single_top |
45 | ldi 0,%r20 | 155 | FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) |
46 | copy %r21,%r20 | 156 | |
47 | addib,= -1,%r31,L$0011 | 157 | ; |
48 | stw %r29,0(0,%r26) | 158 | ; This loop is unrolled 2 times (64-byte aligned as well) |
49 | copy %r20,%r25 | 159 | ; |
50 | ldi 0,%r24 | 160 | ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus |
51 | fldws -8(0,%r1),%fr9L | 161 | ; two 32-bit mutiplies can be issued per cycle. |
52 | ldw -8(0,%r2),%r19 | 162 | ; |
53 | xmpyu %fr8L,%fr9L,%fr9 | 163 | bn_mul_add_words_unroll2 |
54 | fstds %fr9,-16(0,%r30) | 164 | |
55 | copy %r19,%r23 | 165 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) |
56 | ldw -16(0,%r30),%r28 | 166 | FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) |
57 | ldw -12(0,%r30),%r29 | 167 | LDD 0(r_ptr),rp_val ; rp[0] |
58 | ldi 0,%r22 | 168 | LDD 8(r_ptr),rp_val_1 ; rp[1] |
59 | add %r23,%r29,%r29 | 169 | |
60 | addc %r22,%r28,%r28 | 170 | XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l |
61 | add %r25,%r29,%r29 | 171 | XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l |
62 | addc %r24,%r28,%r28 | 172 | FSTD fm1,-16(%sp) ; -16(sp) = m1[0] |
63 | copy %r28,%r21 | 173 | FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] |
64 | ldi 0,%r20 | 174 | |
65 | copy %r21,%r20 | 175 | XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h |
66 | addib,= -1,%r31,L$0011 | 176 | XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h |
67 | stw %r29,-8(0,%r2) | 177 | FSTD fm,-8(%sp) ; -8(sp) = m[0] |
68 | copy %r20,%r25 | 178 | FSTD fm_1,-40(%sp) ; -40(sp) = m[1] |
69 | ldi 0,%r24 | 179 | |
70 | fldws -4(0,%r1),%fr9L | 180 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h |
71 | ldw -4(0,%r2),%r19 | 181 | XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h |
72 | xmpyu %fr8L,%fr9L,%fr9 | 182 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp |
73 | fstds %fr9,-16(0,%r30) | 183 | FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 |
74 | copy %r19,%r23 | 184 | |
75 | ldw -16(0,%r30),%r28 | 185 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l |
76 | ldw -12(0,%r30),%r29 | 186 | XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l |
77 | ldi 0,%r22 | 187 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp |
78 | add %r23,%r29,%r29 | 188 | FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 |
79 | addc %r22,%r28,%r28 | 189 | |
80 | add %r25,%r29,%r29 | 190 | LDD -8(%sp),m_0 ; m[0] |
81 | addc %r24,%r28,%r28 | 191 | LDD -40(%sp),m_1 ; m[1] |
82 | copy %r28,%r21 | 192 | LDD -16(%sp),m1_0 ; m1[0] |
83 | ldi 0,%r20 | 193 | LDD -48(%sp),m1_1 ; m1[1] |
84 | copy %r21,%r20 | 194 | |
85 | addib,= -1,%r31,L$0011 | 195 | LDD -24(%sp),ht_0 ; ht[0] |
86 | stw %r29,-4(0,%r2) | 196 | LDD -56(%sp),ht_1 ; ht[1] |
87 | copy %r20,%r25 | 197 | ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; |
88 | ldi 0,%r24 | 198 | ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; |
89 | fldws 0(0,%r1),%fr9L | 199 | |
90 | ldw 0(0,%r2),%r19 | 200 | LDD -32(%sp),lt_0 |
91 | xmpyu %fr8L,%fr9L,%fr9 | 201 | LDD -64(%sp),lt_1 |
92 | fstds %fr9,-16(0,%r30) | 202 | CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) |
93 | copy %r19,%r23 | 203 | ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) |
94 | ldw -16(0,%r30),%r28 | 204 | |
95 | ldw -12(0,%r30),%r29 | 205 | CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) |
96 | ldi 0,%r22 | 206 | ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) |
97 | add %r23,%r29,%r29 | 207 | EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 |
98 | addc %r22,%r28,%r28 | 208 | DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 |
99 | add %r25,%r29,%r29 | 209 | |
100 | addc %r24,%r28,%r28 | 210 | EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 |
101 | copy %r28,%r21 | 211 | DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 |
102 | ldi 0,%r20 | 212 | ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) |
103 | copy %r21,%r20 | 213 | ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) |
104 | addib,= -1,%r31,L$0011 | 214 | |
105 | stw %r29,0(0,%r2) | 215 | ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; |
106 | ldo 16(%r1),%r1 | 216 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ |
107 | ldo 16(%r3),%r3 | 217 | ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; |
108 | ldo 16(%r2),%r2 | 218 | ADD,DC ht_1,%r0,ht_1 ; ht[1]++ |
109 | bl L$0010,0 | 219 | |
110 | ldo 16(%r26),%r26 | 220 | ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; |
111 | L$0011 | 221 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ |
112 | copy %r20,%r28 | 222 | ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] |
113 | ldw -84(0,%r30),%r2 | 223 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ |
114 | ldw -60(0,%r30),%r3 | 224 | |
115 | bv 0(%r2) | 225 | LDO -2(num),num ; num = num - 2; |
116 | ldwm -64(0,%r30),%r4 | 226 | ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); |
117 | .EXIT | 227 | ADD,DC ht_1,%r0,ht_1 ; ht[1]++ |
118 | .PROCEND | 228 | STD lt_0,0(r_ptr) ; rp[0] = lt[0] |
119 | .align 4 | 229 | |
120 | .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR | 230 | ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] |
231 | ADD,DC ht_1,%r0,%ret1 ; ht[1]++ | ||
232 | LDO 16(a_ptr),a_ptr ; a_ptr += 2 | ||
233 | |||
234 | STD lt_1,8(r_ptr) ; rp[1] = lt[1] | ||
235 | CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do | ||
236 | LDO 16(r_ptr),r_ptr ; r_ptr += 2 | ||
237 | |||
238 | CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one | ||
239 | |||
240 | ; | ||
241 | ; Top of loop aligned on 64-byte boundary | ||
242 | ; | ||
243 | bn_mul_add_words_single_top | ||
244 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
245 | LDD 0(r_ptr),rp_val ; rp[0] | ||
246 | LDO 8(a_ptr),a_ptr ; a_ptr++ | ||
247 | XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l | ||
248 | FSTD fm1,-16(%sp) ; -16(sp) = m1 | ||
249 | XMPYU flt_0,fw_h,fm ; m = lt*fw_h | ||
250 | FSTD fm,-8(%sp) ; -8(sp) = m | ||
251 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h | ||
252 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht | ||
253 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l | ||
254 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt | ||
255 | |||
256 | LDD -8(%sp),m_0 | ||
257 | LDD -16(%sp),m1_0 ; m1 = temp1 | ||
258 | ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; | ||
259 | LDD -24(%sp),ht_0 | ||
260 | LDD -32(%sp),lt_0 | ||
261 | |||
262 | CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) | ||
263 | ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) | ||
264 | |||
265 | EXTRD,U tmp_0,31,32,m_0 ; m>>32 | ||
266 | DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 | ||
267 | |||
268 | ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) | ||
269 | ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; | ||
270 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
271 | ADD %ret1,tmp_0,lt_0 ; lt = lt + c; | ||
272 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
273 | ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] | ||
274 | ADD,DC ht_0,%r0,%ret1 ; ht++ | ||
275 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
276 | |||
277 | bn_mul_add_words_exit | ||
278 | .EXIT | ||
279 | |||
280 | EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 | ||
281 | LDD -80(%sp),%r9 ; restore r9 | ||
282 | LDD -88(%sp),%r8 ; restore r8 | ||
283 | LDD -96(%sp),%r7 ; restore r7 | ||
284 | LDD -104(%sp),%r6 ; restore r6 | ||
285 | LDD -112(%sp),%r5 ; restore r5 | ||
286 | LDD -120(%sp),%r4 ; restore r4 | ||
287 | BVE (%rp) | ||
288 | LDD,MB -128(%sp),%r3 ; restore r3 | ||
289 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
290 | |||
291 | ;---------------------------------------------------------------------------- | ||
292 | ; | ||
293 | ;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | ||
294 | ; | ||
295 | ; arg0 = rp | ||
296 | ; arg1 = ap | ||
297 | ; arg3 = num | ||
298 | ; w on stack at -56(sp) | ||
299 | |||
121 | bn_mul_words | 300 | bn_mul_words |
122 | .PROC | 301 | .proc |
123 | .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 | 302 | .callinfo frame=128 |
124 | .ENTRY | 303 | .entry |
125 | stw %r2,-20(0,%r30) | 304 | .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN |
126 | copy %r25,%r2 | 305 | .align 64 |
127 | stwm %r4,64(0,%r30) | 306 | |
128 | copy %r24,%r19 | 307 | STD %r3,0(%sp) ; save r3 |
129 | ldi 0,%r28 | 308 | STD %r4,8(%sp) ; save r4 |
130 | stw %r23,-16(0,%r30) | 309 | NOP |
131 | ldo 12(%r26),%r31 | 310 | STD %r5,16(%sp) ; save r5 |
132 | ldo 12(%r2),%r29 | 311 | |
133 | fldws -16(0,%r30),%fr8L | 312 | STD %r6,24(%sp) ; save r6 |
134 | L$0026 | 313 | STD %r7,32(%sp) ; save r7 |
135 | fldws 0(0,%r2),%fr9L | 314 | COPY %r0,%ret1 ; return 0 by default |
136 | xmpyu %fr8L,%fr9L,%fr9 | 315 | DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 |
137 | fstds %fr9,-16(0,%r30) | 316 | |
138 | copy %r28,%r21 | 317 | CMPIB,>= 0,num,bn_mul_words_exit |
139 | ldi 0,%r20 | 318 | LDO 128(%sp),%sp ; bump stack |
140 | ldw -16(0,%r30),%r24 | 319 | |
141 | ldw -12(0,%r30),%r25 | 320 | ; |
142 | add %r21,%r25,%r25 | 321 | ; See if only 1 word to do, thus just do cleanup |
143 | addc %r20,%r24,%r24 | 322 | ; |
144 | copy %r24,%r23 | 323 | CMPIB,= 1,num,bn_mul_words_single_top |
145 | ldi 0,%r22 | 324 | FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) |
146 | copy %r23,%r28 | 325 | |
147 | addib,= -1,%r19,L$0027 | 326 | ; |
148 | stw %r25,0(0,%r26) | 327 | ; This loop is unrolled 2 times (64-byte aligned as well) |
149 | fldws -8(0,%r29),%fr9L | 328 | ; |
150 | xmpyu %fr8L,%fr9L,%fr9 | 329 | ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus |
151 | fstds %fr9,-16(0,%r30) | 330 | ; two 32-bit mutiplies can be issued per cycle. |
152 | copy %r28,%r21 | 331 | ; |
153 | ldi 0,%r20 | 332 | bn_mul_words_unroll2 |
154 | ldw -16(0,%r30),%r24 | 333 | |
155 | ldw -12(0,%r30),%r25 | 334 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) |
156 | add %r21,%r25,%r25 | 335 | FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) |
157 | addc %r20,%r24,%r24 | 336 | XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l |
158 | copy %r24,%r23 | 337 | XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l |
159 | ldi 0,%r22 | 338 | |
160 | copy %r23,%r28 | 339 | FSTD fm1,-16(%sp) ; -16(sp) = m1 |
161 | addib,= -1,%r19,L$0027 | 340 | FSTD fm1_1,-48(%sp) ; -48(sp) = m1 |
162 | stw %r25,-8(0,%r31) | 341 | XMPYU flt_0,fw_h,fm ; m = lt*fw_h |
163 | fldws -4(0,%r29),%fr9L | 342 | XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h |
164 | xmpyu %fr8L,%fr9L,%fr9 | 343 | |
165 | fstds %fr9,-16(0,%r30) | 344 | FSTD fm,-8(%sp) ; -8(sp) = m |
166 | copy %r28,%r21 | 345 | FSTD fm_1,-40(%sp) ; -40(sp) = m |
167 | ldi 0,%r20 | 346 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h |
168 | ldw -16(0,%r30),%r24 | 347 | XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h |
169 | ldw -12(0,%r30),%r25 | 348 | |
170 | add %r21,%r25,%r25 | 349 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht |
171 | addc %r20,%r24,%r24 | 350 | FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht |
172 | copy %r24,%r23 | 351 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l |
173 | ldi 0,%r22 | 352 | XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l |
174 | copy %r23,%r28 | 353 | |
175 | addib,= -1,%r19,L$0027 | 354 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt |
176 | stw %r25,-4(0,%r31) | 355 | FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt |
177 | fldws 0(0,%r29),%fr9L | 356 | LDD -8(%sp),m_0 |
178 | xmpyu %fr8L,%fr9L,%fr9 | 357 | LDD -40(%sp),m_1 |
179 | fstds %fr9,-16(0,%r30) | 358 | |
180 | copy %r28,%r21 | 359 | LDD -16(%sp),m1_0 |
181 | ldi 0,%r20 | 360 | LDD -48(%sp),m1_1 |
182 | ldw -16(0,%r30),%r24 | 361 | LDD -24(%sp),ht_0 |
183 | ldw -12(0,%r30),%r25 | 362 | LDD -56(%sp),ht_1 |
184 | add %r21,%r25,%r25 | 363 | |
185 | addc %r20,%r24,%r24 | 364 | ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; |
186 | copy %r24,%r23 | 365 | ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; |
187 | ldi 0,%r22 | 366 | LDD -32(%sp),lt_0 |
188 | copy %r23,%r28 | 367 | LDD -64(%sp),lt_1 |
189 | addib,= -1,%r19,L$0027 | 368 | |
190 | stw %r25,0(0,%r31) | 369 | CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) |
191 | ldo 16(%r29),%r29 | 370 | ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) |
192 | ldo 16(%r2),%r2 | 371 | CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) |
193 | ldo 16(%r31),%r31 | 372 | ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) |
194 | bl L$0026,0 | 373 | |
195 | ldo 16(%r26),%r26 | 374 | EXTRD,U tmp_0,31,32,m_0 ; m>>32 |
196 | L$0027 | 375 | DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 |
197 | ldw -84(0,%r30),%r2 | 376 | EXTRD,U tmp_1,31,32,m_1 ; m>>32 |
198 | bv 0(%r2) | 377 | DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 |
199 | ldwm -64(0,%r30),%r4 | 378 | |
200 | .EXIT | 379 | ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) |
201 | .PROCEND | 380 | ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) |
202 | .align 4 | 381 | ADD lt_0,m1_0,lt_0 ; lt = lt+m1; |
203 | .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR | 382 | ADD,DC ht_0,%r0,ht_0 ; ht++ |
383 | |||
384 | ADD lt_1,m1_1,lt_1 ; lt = lt+m1; | ||
385 | ADD,DC ht_1,%r0,ht_1 ; ht++ | ||
386 | ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); | ||
387 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
388 | |||
389 | ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) | ||
390 | ADD,DC ht_1,%r0,ht_1 ; ht++ | ||
391 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
392 | STD lt_1,8(r_ptr) ; rp[1] = lt | ||
393 | |||
394 | COPY ht_1,%ret1 ; carry = ht | ||
395 | LDO -2(num),num ; num = num - 2; | ||
396 | LDO 16(a_ptr),a_ptr ; ap += 2 | ||
397 | CMPIB,<= 2,num,bn_mul_words_unroll2 | ||
398 | LDO 16(r_ptr),r_ptr ; rp++ | ||
399 | |||
400 | CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? | ||
401 | |||
402 | ; | ||
403 | ; Top of loop aligned on 64-byte boundary | ||
404 | ; | ||
405 | bn_mul_words_single_top | ||
406 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
407 | |||
408 | XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l | ||
409 | FSTD fm1,-16(%sp) ; -16(sp) = m1 | ||
410 | XMPYU flt_0,fw_h,fm ; m = lt*fw_h | ||
411 | FSTD fm,-8(%sp) ; -8(sp) = m | ||
412 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h | ||
413 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht | ||
414 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l | ||
415 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt | ||
416 | |||
417 | LDD -8(%sp),m_0 | ||
418 | LDD -16(%sp),m1_0 | ||
419 | ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; | ||
420 | LDD -24(%sp),ht_0 | ||
421 | LDD -32(%sp),lt_0 | ||
422 | |||
423 | CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) | ||
424 | ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) | ||
425 | |||
426 | EXTRD,U tmp_0,31,32,m_0 ; m>>32 | ||
427 | DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 | ||
428 | |||
429 | ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) | ||
430 | ADD lt_0,m1_0,lt_0 ; lt= lt+m1; | ||
431 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
432 | |||
433 | ADD %ret1,lt_0,lt_0 ; lt = lt + c; | ||
434 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
435 | |||
436 | COPY ht_0,%ret1 ; copy carry | ||
437 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
438 | |||
439 | bn_mul_words_exit | ||
440 | .EXIT | ||
441 | EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 | ||
442 | LDD -96(%sp),%r7 ; restore r7 | ||
443 | LDD -104(%sp),%r6 ; restore r6 | ||
444 | LDD -112(%sp),%r5 ; restore r5 | ||
445 | LDD -120(%sp),%r4 ; restore r4 | ||
446 | BVE (%rp) | ||
447 | LDD,MB -128(%sp),%r3 ; restore r3 | ||
448 | .PROCEND | ||
449 | |||
450 | ;---------------------------------------------------------------------------- | ||
451 | ; | ||
452 | ;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) | ||
453 | ; | ||
454 | ; arg0 = rp | ||
455 | ; arg1 = ap | ||
456 | ; arg2 = num | ||
457 | ; | ||
458 | |||
204 | bn_sqr_words | 459 | bn_sqr_words |
460 | .proc | ||
461 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
462 | .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
463 | .entry | ||
464 | .align 64 | ||
465 | |||
466 | STD %r3,0(%sp) ; save r3 | ||
467 | STD %r4,8(%sp) ; save r4 | ||
468 | NOP | ||
469 | STD %r5,16(%sp) ; save r5 | ||
470 | |||
471 | CMPIB,>= 0,num,bn_sqr_words_exit | ||
472 | LDO 128(%sp),%sp ; bump stack | ||
473 | |||
474 | ; | ||
475 | ; If only 1, the goto straight to cleanup | ||
476 | ; | ||
477 | CMPIB,= 1,num,bn_sqr_words_single_top | ||
478 | DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L | ||
479 | |||
480 | ; | ||
481 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
482 | ; | ||
483 | |||
484 | bn_sqr_words_unroll2 | ||
485 | FLDD 0(a_ptr),t_float_0 ; a[0] | ||
486 | FLDD 8(a_ptr),t_float_1 ; a[1] | ||
487 | XMPYU fht_0,flt_0,fm ; m[0] | ||
488 | XMPYU fht_1,flt_1,fm_1 ; m[1] | ||
489 | |||
490 | FSTD fm,-24(%sp) ; store m[0] | ||
491 | FSTD fm_1,-56(%sp) ; store m[1] | ||
492 | XMPYU flt_0,flt_0,lt_temp ; lt[0] | ||
493 | XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] | ||
494 | |||
495 | FSTD lt_temp,-16(%sp) ; store lt[0] | ||
496 | FSTD lt_temp_1,-48(%sp) ; store lt[1] | ||
497 | XMPYU fht_0,fht_0,ht_temp ; ht[0] | ||
498 | XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] | ||
499 | |||
500 | FSTD ht_temp,-8(%sp) ; store ht[0] | ||
501 | FSTD ht_temp_1,-40(%sp) ; store ht[1] | ||
502 | LDD -24(%sp),m_0 | ||
503 | LDD -56(%sp),m_1 | ||
504 | |||
505 | AND m_0,high_mask,tmp_0 ; m[0] & Mask | ||
506 | AND m_1,high_mask,tmp_1 ; m[1] & Mask | ||
507 | DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 | ||
508 | DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 | ||
509 | |||
510 | LDD -16(%sp),lt_0 | ||
511 | LDD -48(%sp),lt_1 | ||
512 | EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 | ||
513 | EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 | ||
514 | |||
515 | LDD -8(%sp),ht_0 | ||
516 | LDD -40(%sp),ht_1 | ||
517 | ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 | ||
518 | ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 | ||
519 | |||
520 | ADD lt_0,m_0,lt_0 ; lt = lt+m | ||
521 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ | ||
522 | STD lt_0,0(r_ptr) ; rp[0] = lt[0] | ||
523 | STD ht_0,8(r_ptr) ; rp[1] = ht[1] | ||
524 | |||
525 | ADD lt_1,m_1,lt_1 ; lt = lt+m | ||
526 | ADD,DC ht_1,%r0,ht_1 ; ht[1]++ | ||
527 | STD lt_1,16(r_ptr) ; rp[2] = lt[1] | ||
528 | STD ht_1,24(r_ptr) ; rp[3] = ht[1] | ||
529 | |||
530 | LDO -2(num),num ; num = num - 2; | ||
531 | LDO 16(a_ptr),a_ptr ; ap += 2 | ||
532 | CMPIB,<= 2,num,bn_sqr_words_unroll2 | ||
533 | LDO 32(r_ptr),r_ptr ; rp += 4 | ||
534 | |||
535 | CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? | ||
536 | |||
537 | ; | ||
538 | ; Top of loop aligned on 64-byte boundary | ||
539 | ; | ||
540 | bn_sqr_words_single_top | ||
541 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
542 | |||
543 | XMPYU fht_0,flt_0,fm ; m | ||
544 | FSTD fm,-24(%sp) ; store m | ||
545 | |||
546 | XMPYU flt_0,flt_0,lt_temp ; lt | ||
547 | FSTD lt_temp,-16(%sp) ; store lt | ||
548 | |||
549 | XMPYU fht_0,fht_0,ht_temp ; ht | ||
550 | FSTD ht_temp,-8(%sp) ; store ht | ||
551 | |||
552 | LDD -24(%sp),m_0 ; load m | ||
553 | AND m_0,high_mask,tmp_0 ; m & Mask | ||
554 | DEPD,Z m_0,30,31,m_0 ; m << 32+1 | ||
555 | LDD -16(%sp),lt_0 ; lt | ||
556 | |||
557 | LDD -8(%sp),ht_0 ; ht | ||
558 | EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 | ||
559 | ADD m_0,lt_0,lt_0 ; lt = lt+m | ||
560 | ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 | ||
561 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
562 | |||
563 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
564 | STD ht_0,8(r_ptr) ; rp[1] = ht | ||
565 | |||
566 | bn_sqr_words_exit | ||
567 | .EXIT | ||
568 | LDD -112(%sp),%r5 ; restore r5 | ||
569 | LDD -120(%sp),%r4 ; restore r4 | ||
570 | BVE (%rp) | ||
571 | LDD,MB -128(%sp),%r3 | ||
572 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
573 | |||
574 | |||
575 | ;---------------------------------------------------------------------------- | ||
576 | ; | ||
577 | ;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
578 | ; | ||
579 | ; arg0 = rp | ||
580 | ; arg1 = ap | ||
581 | ; arg2 = bp | ||
582 | ; arg3 = n | ||
583 | |||
584 | t .reg %r22 | ||
585 | b .reg %r21 | ||
586 | l .reg %r20 | ||
587 | |||
588 | bn_add_words | ||
589 | .proc | ||
590 | .entry | ||
591 | .callinfo | ||
592 | .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
593 | .align 64 | ||
594 | |||
595 | CMPIB,>= 0,n,bn_add_words_exit | ||
596 | COPY %r0,%ret1 ; return 0 by default | ||
597 | |||
598 | ; | ||
599 | ; If 2 or more numbers do the loop | ||
600 | ; | ||
601 | CMPIB,= 1,n,bn_add_words_single_top | ||
602 | NOP | ||
603 | |||
604 | ; | ||
605 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
606 | ; | ||
607 | bn_add_words_unroll2 | ||
608 | LDD 0(a_ptr),t | ||
609 | LDD 0(b_ptr),b | ||
610 | ADD t,%ret1,t ; t = t+c; | ||
611 | ADD,DC %r0,%r0,%ret1 ; set c to carry | ||
612 | ADD t,b,l ; l = t + b[0] | ||
613 | ADD,DC %ret1,%r0,%ret1 ; c+= carry | ||
614 | STD l,0(r_ptr) | ||
615 | |||
616 | LDD 8(a_ptr),t | ||
617 | LDD 8(b_ptr),b | ||
618 | ADD t,%ret1,t ; t = t+c; | ||
619 | ADD,DC %r0,%r0,%ret1 ; set c to carry | ||
620 | ADD t,b,l ; l = t + b[0] | ||
621 | ADD,DC %ret1,%r0,%ret1 ; c+= carry | ||
622 | STD l,8(r_ptr) | ||
623 | |||
624 | LDO -2(n),n | ||
625 | LDO 16(a_ptr),a_ptr | ||
626 | LDO 16(b_ptr),b_ptr | ||
627 | |||
628 | CMPIB,<= 2,n,bn_add_words_unroll2 | ||
629 | LDO 16(r_ptr),r_ptr | ||
630 | |||
631 | CMPIB,=,N 0,n,bn_add_words_exit ; are we done? | ||
632 | |||
633 | bn_add_words_single_top | ||
634 | LDD 0(a_ptr),t | ||
635 | LDD 0(b_ptr),b | ||
636 | |||
637 | ADD t,%ret1,t ; t = t+c; | ||
638 | ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) | ||
639 | ADD t,b,l ; l = t + b[0] | ||
640 | ADD,DC %ret1,%r0,%ret1 ; c+= carry | ||
641 | STD l,0(r_ptr) | ||
642 | |||
643 | bn_add_words_exit | ||
644 | .EXIT | ||
645 | BVE (%rp) | ||
646 | EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 | ||
647 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
648 | |||
649 | ;---------------------------------------------------------------------------- | ||
650 | ; | ||
651 | ;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
652 | ; | ||
653 | ; arg0 = rp | ||
654 | ; arg1 = ap | ||
655 | ; arg2 = bp | ||
656 | ; arg3 = n | ||
657 | |||
658 | t1 .reg %r22 | ||
659 | t2 .reg %r21 | ||
660 | sub_tmp1 .reg %r20 | ||
661 | sub_tmp2 .reg %r19 | ||
662 | |||
663 | |||
664 | bn_sub_words | ||
665 | .proc | ||
666 | .callinfo | ||
667 | .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
668 | .entry | ||
669 | .align 64 | ||
670 | |||
671 | CMPIB,>= 0,n,bn_sub_words_exit | ||
672 | COPY %r0,%ret1 ; return 0 by default | ||
673 | |||
674 | ; | ||
675 | ; If 2 or more numbers do the loop | ||
676 | ; | ||
677 | CMPIB,= 1,n,bn_sub_words_single_top | ||
678 | NOP | ||
679 | |||
680 | ; | ||
681 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
682 | ; | ||
683 | bn_sub_words_unroll2 | ||
684 | LDD 0(a_ptr),t1 | ||
685 | LDD 0(b_ptr),t2 | ||
686 | SUB t1,t2,sub_tmp1 ; t3 = t1-t2; | ||
687 | SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; | ||
688 | |||
689 | CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 | ||
690 | LDO 1(%r0),sub_tmp2 | ||
691 | |||
692 | CMPCLR,*= t1,t2,%r0 | ||
693 | COPY sub_tmp2,%ret1 | ||
694 | STD sub_tmp1,0(r_ptr) | ||
695 | |||
696 | LDD 8(a_ptr),t1 | ||
697 | LDD 8(b_ptr),t2 | ||
698 | SUB t1,t2,sub_tmp1 ; t3 = t1-t2; | ||
699 | SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; | ||
700 | CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 | ||
701 | LDO 1(%r0),sub_tmp2 | ||
702 | |||
703 | CMPCLR,*= t1,t2,%r0 | ||
704 | COPY sub_tmp2,%ret1 | ||
705 | STD sub_tmp1,8(r_ptr) | ||
706 | |||
707 | LDO -2(n),n | ||
708 | LDO 16(a_ptr),a_ptr | ||
709 | LDO 16(b_ptr),b_ptr | ||
710 | |||
711 | CMPIB,<= 2,n,bn_sub_words_unroll2 | ||
712 | LDO 16(r_ptr),r_ptr | ||
713 | |||
714 | CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? | ||
715 | |||
716 | bn_sub_words_single_top | ||
717 | LDD 0(a_ptr),t1 | ||
718 | LDD 0(b_ptr),t2 | ||
719 | SUB t1,t2,sub_tmp1 ; t3 = t1-t2; | ||
720 | SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; | ||
721 | CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 | ||
722 | LDO 1(%r0),sub_tmp2 | ||
723 | |||
724 | CMPCLR,*= t1,t2,%r0 | ||
725 | COPY sub_tmp2,%ret1 | ||
726 | |||
727 | STD sub_tmp1,0(r_ptr) | ||
728 | |||
729 | bn_sub_words_exit | ||
730 | .EXIT | ||
731 | BVE (%rp) | ||
732 | EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 | ||
733 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
734 | |||
735 | ;------------------------------------------------------------------------------ | ||
736 | ; | ||
737 | ; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) | ||
738 | ; | ||
739 | ; arg0 = h | ||
740 | ; arg1 = l | ||
741 | ; arg2 = d | ||
742 | ; | ||
743 | ; This is mainly just output from the HP C compiler. | ||
744 | ; | ||
745 | ;------------------------------------------------------------------------------ | ||
746 | bn_div_words | ||
205 | .PROC | 747 | .PROC |
206 | .CALLINFO FRAME=0,NO_CALLS | 748 | .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN |
207 | .ENTRY | 749 | .IMPORT BN_num_bits_word,CODE |
208 | ldo 28(%r26),%r19 | 750 | .IMPORT __iob,DATA |
209 | ldo 12(%r25),%r28 | 751 | .IMPORT fprintf,CODE |
210 | L$0042 | 752 | .IMPORT abort,CODE |
211 | fldws 0(0,%r25),%fr8L | 753 | .IMPORT $$div2U,MILLICODE |
212 | fldws 0(0,%r25),%fr8R | 754 | .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE |
213 | xmpyu %fr8L,%fr8R,%fr8 | 755 | .ENTRY |
214 | fstds %fr8,-16(0,%r30) | 756 | STW %r2,-20(%r30) ;offset 0x8ec |
215 | ldw -16(0,%r30),%r22 | 757 | STW,MA %r3,192(%r30) ;offset 0x8f0 |
216 | ldw -12(0,%r30),%r23 | 758 | STW %r4,-188(%r30) ;offset 0x8f4 |
217 | stw %r23,0(0,%r26) | 759 | DEPD %r5,31,32,%r6 ;offset 0x8f8 |
218 | copy %r22,%r21 | 760 | STD %r6,-184(%r30) ;offset 0x8fc |
219 | ldi 0,%r20 | 761 | DEPD %r7,31,32,%r8 ;offset 0x900 |
220 | addib,= -1,%r24,L$0049 | 762 | STD %r8,-176(%r30) ;offset 0x904 |
221 | stw %r21,-24(0,%r19) | 763 | STW %r9,-168(%r30) ;offset 0x908 |
222 | fldws -8(0,%r28),%fr8L | 764 | LDD -248(%r30),%r3 ;offset 0x90c |
223 | fldws -8(0,%r28),%fr8R | 765 | COPY %r26,%r4 ;offset 0x910 |
224 | xmpyu %fr8L,%fr8R,%fr8 | 766 | COPY %r24,%r5 ;offset 0x914 |
225 | fstds %fr8,-16(0,%r30) | 767 | DEPD %r25,31,32,%r4 ;offset 0x918 |
226 | ldw -16(0,%r30),%r22 | 768 | CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c |
227 | ldw -12(0,%r30),%r23 | 769 | DEPD %r23,31,32,%r5 ;offset 0x920 |
228 | stw %r23,-20(0,%r19) | 770 | MOVIB,TR -1,%r29,$00060002 ;offset 0x924 |
229 | copy %r22,%r21 | 771 | EXTRD,U %r29,31,32,%r28 ;offset 0x928 |
230 | ldi 0,%r20 | 772 | $0006002A |
231 | addib,= -1,%r24,L$0049 | 773 | LDO -1(%r29),%r29 ;offset 0x92c |
232 | stw %r21,-16(0,%r19) | 774 | SUB %r23,%r7,%r23 ;offset 0x930 |
233 | fldws -4(0,%r28),%fr8L | 775 | $00060024 |
234 | fldws -4(0,%r28),%fr8R | 776 | SUB %r4,%r31,%r25 ;offset 0x934 |
235 | xmpyu %fr8L,%fr8R,%fr8 | 777 | AND %r25,%r19,%r26 ;offset 0x938 |
236 | fstds %fr8,-16(0,%r30) | 778 | CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c |
237 | ldw -16(0,%r30),%r22 | 779 | DEPD,Z %r25,31,32,%r20 ;offset 0x940 |
238 | ldw -12(0,%r30),%r23 | 780 | OR %r20,%r24,%r21 ;offset 0x944 |
239 | stw %r23,-12(0,%r19) | 781 | CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 |
240 | copy %r22,%r21 | 782 | SUB %r31,%r2,%r31 ;offset 0x94c |
241 | ldi 0,%r20 | 783 | $00060046 |
242 | addib,= -1,%r24,L$0049 | 784 | $0006002E |
243 | stw %r21,-8(0,%r19) | 785 | DEPD,Z %r23,31,32,%r25 ;offset 0x950 |
244 | fldws 0(0,%r28),%fr8L | 786 | EXTRD,U %r23,31,32,%r26 ;offset 0x954 |
245 | fldws 0(0,%r28),%fr8R | 787 | AND %r25,%r19,%r24 ;offset 0x958 |
246 | xmpyu %fr8L,%fr8R,%fr8 | 788 | ADD,L %r31,%r26,%r31 ;offset 0x95c |
247 | fstds %fr8,-16(0,%r30) | 789 | CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 |
248 | ldw -16(0,%r30),%r22 | 790 | LDO 1(%r31),%r31 ;offset 0x964 |
249 | ldw -12(0,%r30),%r23 | 791 | $00060032 |
250 | stw %r23,-4(0,%r19) | 792 | CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 |
251 | copy %r22,%r21 | 793 | LDO -1(%r29),%r29 ;offset 0x96c |
252 | ldi 0,%r20 | 794 | ADD,L %r4,%r3,%r4 ;offset 0x970 |
253 | addib,= -1,%r24,L$0049 | 795 | $00060036 |
254 | stw %r21,0(0,%r19) | 796 | ADDIB,=,N -1,%r8,$D0 ;offset 0x974 |
255 | ldo 16(%r28),%r28 | 797 | SUB %r5,%r24,%r28 ;offset 0x978 |
256 | ldo 16(%r25),%r25 | 798 | $0006003A |
257 | ldo 32(%r19),%r19 | 799 | SUB %r4,%r31,%r24 ;offset 0x97c |
258 | bl L$0042,0 | 800 | SHRPD %r24,%r28,32,%r4 ;offset 0x980 |
259 | ldo 32(%r26),%r26 | 801 | DEPD,Z %r29,31,32,%r9 ;offset 0x984 |
260 | L$0049 | 802 | DEPD,Z %r28,31,32,%r5 ;offset 0x988 |
261 | bv,n 0(%r2) | 803 | $0006001C |
262 | .EXIT | 804 | EXTRD,U %r4,31,32,%r31 ;offset 0x98c |
263 | .PROCEND | 805 | CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 |
264 | .IMPORT BN_num_bits_word,CODE | 806 | MOVB,TR %r6,%r29,$D1 ;offset 0x994 |
265 | .IMPORT fprintf,CODE | 807 | STD %r29,-152(%r30) ;offset 0x998 |
266 | .IMPORT __iob,DATA | 808 | $0006000C |
267 | .SPACE $TEXT$ | 809 | EXTRD,U %r3,31,32,%r25 ;offset 0x99c |
268 | .SUBSPA $LIT$ | 810 | COPY %r3,%r26 ;offset 0x9a0 |
269 | 811 | EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 | |
270 | .align 4 | 812 | EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 |
271 | L$C0000 | 813 | .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; |
272 | .STRING "Division would overflow (%d)\x0a\x00" | 814 | B,L BN_num_bits_word,%r2 ;offset 0x9ac |
273 | .IMPORT abort,CODE | 815 | EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 |
274 | .SPACE $TEXT$ | 816 | LDI 64,%r20 ;offset 0x9b4 |
275 | .SUBSPA $CODE$ | 817 | DEPD %r7,31,32,%r5 ;offset 0x9b8 |
276 | 818 | DEPD %r8,31,32,%r4 ;offset 0x9bc | |
277 | .align 4 | 819 | DEPD %r9,31,32,%r3 ;offset 0x9c0 |
278 | .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR | 820 | CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 |
279 | bn_div64 | 821 | COPY %r28,%r24 ;offset 0x9c8 |
822 | MTSARCM %r24 ;offset 0x9cc | ||
823 | DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 | ||
824 | CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 | ||
825 | $00060012 | ||
826 | SUBI 64,%r24,%r31 ;offset 0x9d8 | ||
827 | CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc | ||
828 | SUB %r4,%r3,%r4 ;offset 0x9e0 | ||
829 | $00060016 | ||
830 | CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 | ||
831 | COPY %r0,%r9 ;offset 0x9e8 | ||
832 | MTSARCM %r31 ;offset 0x9ec | ||
833 | DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 | ||
834 | SUBI 64,%r31,%r26 ;offset 0x9f4 | ||
835 | MTSAR %r26 ;offset 0x9f8 | ||
836 | SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc | ||
837 | MTSARCM %r31 ;offset 0xa00 | ||
838 | DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 | ||
839 | $0006001A | ||
840 | DEPDI,Z -1,31,32,%r19 ;offset 0xa08 | ||
841 | AND %r3,%r19,%r29 ;offset 0xa0c | ||
842 | EXTRD,U %r29,31,32,%r2 ;offset 0xa10 | ||
843 | DEPDI,Z -1,63,32,%r6 ;offset 0xa14 | ||
844 | MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 | ||
845 | EXTRD,U %r3,63,32,%r7 ;offset 0xa1c | ||
846 | $D2 | ||
847 | ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 | ||
848 | LDIL LR'C$7,%r21 ;offset 0xa24 | ||
849 | LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 | ||
850 | .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; | ||
851 | B,L fprintf,%r2 ;offset 0xa2c | ||
852 | LDO RR'C$7(%r21),%r25 ;offset 0xa30 | ||
853 | .CALL ; | ||
854 | B,L abort,%r2 ;offset 0xa34 | ||
855 | NOP ;offset 0xa38 | ||
856 | B $D3 ;offset 0xa3c | ||
857 | LDW -212(%r30),%r2 ;offset 0xa40 | ||
858 | $00060020 | ||
859 | COPY %r4,%r26 ;offset 0xa44 | ||
860 | EXTRD,U %r4,31,32,%r25 ;offset 0xa48 | ||
861 | COPY %r2,%r24 ;offset 0xa4c | ||
862 | .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) | ||
863 | B,L $$div2U,%r31 ;offset 0xa50 | ||
864 | EXTRD,U %r2,31,32,%r23 ;offset 0xa54 | ||
865 | DEPD %r28,31,32,%r29 ;offset 0xa58 | ||
866 | $00060022 | ||
867 | STD %r29,-152(%r30) ;offset 0xa5c | ||
868 | $D1 | ||
869 | AND %r5,%r19,%r24 ;offset 0xa60 | ||
870 | EXTRD,U %r24,31,32,%r24 ;offset 0xa64 | ||
871 | STW %r2,-160(%r30) ;offset 0xa68 | ||
872 | STW %r7,-128(%r30) ;offset 0xa6c | ||
873 | FLDD -152(%r30),%fr4 ;offset 0xa70 | ||
874 | FLDD -152(%r30),%fr7 ;offset 0xa74 | ||
875 | FLDW -160(%r30),%fr8L ;offset 0xa78 | ||
876 | FLDW -128(%r30),%fr5L ;offset 0xa7c | ||
877 | XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 | ||
878 | FSTD %fr10,-136(%r30) ;offset 0xa84 | ||
879 | XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 | ||
880 | FSTD %fr22,-144(%r30) ;offset 0xa8c | ||
881 | XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 | ||
882 | XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 | ||
883 | FSTD %fr11,-112(%r30) ;offset 0xa98 | ||
884 | FSTD %fr23,-120(%r30) ;offset 0xa9c | ||
885 | LDD -136(%r30),%r28 ;offset 0xaa0 | ||
886 | DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 | ||
887 | LDD -144(%r30),%r20 ;offset 0xaa8 | ||
888 | ADD,L %r20,%r31,%r31 ;offset 0xaac | ||
889 | LDD -112(%r30),%r22 ;offset 0xab0 | ||
890 | DEPD,Z %r22,31,32,%r22 ;offset 0xab4 | ||
891 | LDD -120(%r30),%r21 ;offset 0xab8 | ||
892 | B $00060024 ;offset 0xabc | ||
893 | ADD,L %r21,%r22,%r23 ;offset 0xac0 | ||
894 | $D0 | ||
895 | OR %r9,%r29,%r29 ;offset 0xac4 | ||
896 | $00060040 | ||
897 | EXTRD,U %r29,31,32,%r28 ;offset 0xac8 | ||
898 | $00060002 | ||
899 | $L2 | ||
900 | LDW -212(%r30),%r2 ;offset 0xacc | ||
901 | $D3 | ||
902 | LDW -168(%r30),%r9 ;offset 0xad0 | ||
903 | LDD -176(%r30),%r8 ;offset 0xad4 | ||
904 | EXTRD,U %r8,31,32,%r7 ;offset 0xad8 | ||
905 | LDD -184(%r30),%r6 ;offset 0xadc | ||
906 | EXTRD,U %r6,31,32,%r5 ;offset 0xae0 | ||
907 | LDW -188(%r30),%r4 ;offset 0xae4 | ||
908 | BVE (%r2) ;offset 0xae8 | ||
909 | .EXIT | ||
910 | LDW,MB -192(%r30),%r3 ;offset 0xaec | ||
911 | .PROCEND ;in=23,25;out=28,29;fpin=105,107; | ||
912 | |||
913 | |||
914 | |||
915 | |||
916 | ;---------------------------------------------------------------------------- | ||
917 | ; | ||
918 | ; Registers to hold 64-bit values to manipulate. The "L" part | ||
919 | ; of the register corresponds to the upper 32-bits, while the "R" | ||
920 | ; part corresponds to the lower 32-bits | ||
921 | ; | ||
922 | ; Note, that when using b6 and b7, the code must save these before | ||
923 | ; using them because they are callee save registers | ||
924 | ; | ||
925 | ; | ||
926 | ; Floating point registers to use to save values that | ||
927 | ; are manipulated. These don't collide with ftemp1-6 and | ||
928 | ; are all caller save registers | ||
929 | ; | ||
930 | a0 .reg %fr22 | ||
931 | a0L .reg %fr22L | ||
932 | a0R .reg %fr22R | ||
933 | |||
934 | a1 .reg %fr23 | ||
935 | a1L .reg %fr23L | ||
936 | a1R .reg %fr23R | ||
937 | |||
938 | a2 .reg %fr24 | ||
939 | a2L .reg %fr24L | ||
940 | a2R .reg %fr24R | ||
941 | |||
942 | a3 .reg %fr25 | ||
943 | a3L .reg %fr25L | ||
944 | a3R .reg %fr25R | ||
945 | |||
946 | a4 .reg %fr26 | ||
947 | a4L .reg %fr26L | ||
948 | a4R .reg %fr26R | ||
949 | |||
950 | a5 .reg %fr27 | ||
951 | a5L .reg %fr27L | ||
952 | a5R .reg %fr27R | ||
953 | |||
954 | a6 .reg %fr28 | ||
955 | a6L .reg %fr28L | ||
956 | a6R .reg %fr28R | ||
957 | |||
958 | a7 .reg %fr29 | ||
959 | a7L .reg %fr29L | ||
960 | a7R .reg %fr29R | ||
961 | |||
962 | b0 .reg %fr30 | ||
963 | b0L .reg %fr30L | ||
964 | b0R .reg %fr30R | ||
965 | |||
966 | b1 .reg %fr31 | ||
967 | b1L .reg %fr31L | ||
968 | b1R .reg %fr31R | ||
969 | |||
970 | ; | ||
971 | ; Temporary floating point variables, these are all caller save | ||
972 | ; registers | ||
973 | ; | ||
974 | ftemp1 .reg %fr4 | ||
975 | ftemp2 .reg %fr5 | ||
976 | ftemp3 .reg %fr6 | ||
977 | ftemp4 .reg %fr7 | ||
978 | |||
979 | ; | ||
980 | ; The B set of registers when used. | ||
981 | ; | ||
982 | |||
983 | b2 .reg %fr8 | ||
984 | b2L .reg %fr8L | ||
985 | b2R .reg %fr8R | ||
986 | |||
987 | b3 .reg %fr9 | ||
988 | b3L .reg %fr9L | ||
989 | b3R .reg %fr9R | ||
990 | |||
991 | b4 .reg %fr10 | ||
992 | b4L .reg %fr10L | ||
993 | b4R .reg %fr10R | ||
994 | |||
995 | b5 .reg %fr11 | ||
996 | b5L .reg %fr11L | ||
997 | b5R .reg %fr11R | ||
998 | |||
999 | b6 .reg %fr12 | ||
1000 | b6L .reg %fr12L | ||
1001 | b6R .reg %fr12R | ||
1002 | |||
1003 | b7 .reg %fr13 | ||
1004 | b7L .reg %fr13L | ||
1005 | b7R .reg %fr13R | ||
1006 | |||
1007 | c1 .reg %r21 ; only reg | ||
1008 | temp1 .reg %r20 ; only reg | ||
1009 | temp2 .reg %r19 ; only reg | ||
1010 | temp3 .reg %r31 ; only reg | ||
1011 | |||
1012 | m1 .reg %r28 | ||
1013 | c2 .reg %r23 | ||
1014 | high_one .reg %r1 | ||
1015 | ht .reg %r6 | ||
1016 | lt .reg %r5 | ||
1017 | m .reg %r4 | ||
1018 | c3 .reg %r3 | ||
1019 | |||
1020 | SQR_ADD_C .macro A0L,A0R,C1,C2,C3 | ||
1021 | XMPYU A0L,A0R,ftemp1 ; m | ||
1022 | FSTD ftemp1,-24(%sp) ; store m | ||
1023 | |||
1024 | XMPYU A0R,A0R,ftemp2 ; lt | ||
1025 | FSTD ftemp2,-16(%sp) ; store lt | ||
1026 | |||
1027 | XMPYU A0L,A0L,ftemp3 ; ht | ||
1028 | FSTD ftemp3,-8(%sp) ; store ht | ||
1029 | |||
1030 | LDD -24(%sp),m ; load m | ||
1031 | AND m,high_mask,temp2 ; m & Mask | ||
1032 | DEPD,Z m,30,31,temp3 ; m << 32+1 | ||
1033 | LDD -16(%sp),lt ; lt | ||
1034 | |||
1035 | LDD -8(%sp),ht ; ht | ||
1036 | EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 | ||
1037 | ADD temp3,lt,lt ; lt = lt+m | ||
1038 | ADD,L ht,temp1,ht ; ht += temp1 | ||
1039 | ADD,DC ht,%r0,ht ; ht++ | ||
1040 | |||
1041 | ADD C1,lt,C1 ; c1=c1+lt | ||
1042 | ADD,DC ht,%r0,ht ; ht++ | ||
1043 | |||
1044 | ADD C2,ht,C2 ; c2=c2+ht | ||
1045 | ADD,DC C3,%r0,C3 ; c3++ | ||
1046 | .endm | ||
1047 | |||
1048 | SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 | ||
1049 | XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht | ||
1050 | FSTD ftemp1,-16(%sp) ; | ||
1051 | XMPYU A0R,A1L,ftemp2 ; m = bh*lt | ||
1052 | FSTD ftemp2,-8(%sp) ; | ||
1053 | XMPYU A0R,A1R,ftemp3 ; lt = bl*lt | ||
1054 | FSTD ftemp3,-32(%sp) | ||
1055 | XMPYU A0L,A1L,ftemp4 ; ht = bh*ht | ||
1056 | FSTD ftemp4,-24(%sp) ; | ||
1057 | |||
1058 | LDD -8(%sp),m ; r21 = m | ||
1059 | LDD -16(%sp),m1 ; r19 = m1 | ||
1060 | ADD,L m,m1,m ; m+m1 | ||
1061 | |||
1062 | DEPD,Z m,31,32,temp3 ; (m+m1<<32) | ||
1063 | LDD -24(%sp),ht ; r24 = ht | ||
1064 | |||
1065 | CMPCLR,*>>= m,m1,%r0 ; if (m < m1) | ||
1066 | ADD,L ht,high_one,ht ; ht+=high_one | ||
1067 | |||
1068 | EXTRD,U m,31,32,temp1 ; m >> 32 | ||
1069 | LDD -32(%sp),lt ; lt | ||
1070 | ADD,L ht,temp1,ht ; ht+= m>>32 | ||
1071 | ADD lt,temp3,lt ; lt = lt+m1 | ||
1072 | ADD,DC ht,%r0,ht ; ht++ | ||
1073 | |||
1074 | ADD ht,ht,ht ; ht=ht+ht; | ||
1075 | ADD,DC C3,%r0,C3 ; add in carry (c3++) | ||
1076 | |||
1077 | ADD lt,lt,lt ; lt=lt+lt; | ||
1078 | ADD,DC ht,%r0,ht ; add in carry (ht++) | ||
1079 | |||
1080 | ADD C1,lt,C1 ; c1=c1+lt | ||
1081 | ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) | ||
1082 | LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise | ||
1083 | |||
1084 | ADD C2,ht,C2 ; c2 = c2 + ht | ||
1085 | ADD,DC C3,%r0,C3 ; add in carry (c3++) | ||
1086 | .endm | ||
1087 | |||
1088 | ; | ||
1089 | ;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | ||
1090 | ; arg0 = r_ptr | ||
1091 | ; arg1 = a_ptr | ||
1092 | ; | ||
1093 | |||
1094 | bn_sqr_comba8 | ||
280 | .PROC | 1095 | .PROC |
281 | .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 | 1096 | .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE |
282 | .ENTRY | 1097 | .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN |
283 | stw %r2,-20(0,%r30) | 1098 | .ENTRY |
284 | stwm %r8,128(0,%r30) | 1099 | .align 64 |
285 | stw %r7,-124(0,%r30) | 1100 | |
286 | stw %r4,-112(0,%r30) | 1101 | STD %r3,0(%sp) ; save r3 |
287 | stw %r3,-108(0,%r30) | 1102 | STD %r4,8(%sp) ; save r4 |
288 | copy %r26,%r3 | 1103 | STD %r5,16(%sp) ; save r5 |
289 | copy %r25,%r4 | 1104 | STD %r6,24(%sp) ; save r6 |
290 | stw %r6,-120(0,%r30) | 1105 | |
291 | ldi 0,%r7 | 1106 | ; |
292 | stw %r5,-116(0,%r30) | 1107 | ; Zero out carries |
293 | movb,<> %r24,%r5,L$0051 | 1108 | ; |
294 | ldi 2,%r6 | 1109 | COPY %r0,c1 |
295 | bl L$0068,0 | 1110 | COPY %r0,c2 |
296 | ldi -1,%r28 | 1111 | COPY %r0,c3 |
297 | L$0051 | 1112 | |
298 | .CALL ARGW0=GR | 1113 | LDO 128(%sp),%sp ; bump stack |
299 | bl BN_num_bits_word,%r2 | 1114 | DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L |
300 | copy %r5,%r26 | 1115 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 |
301 | copy %r28,%r24 | 1116 | |
302 | ldi 32,%r19 | 1117 | ; |
303 | comb,= %r19,%r24,L$0052 | 1118 | ; Load up all of the values we are going to use |
304 | subi 31,%r24,%r19 | 1119 | ; |
305 | mtsar %r19 | 1120 | FLDD 0(a_ptr),a0 |
306 | zvdepi 1,32,%r19 | 1121 | FLDD 8(a_ptr),a1 |
307 | comb,>>= %r19,%r3,L$0052 | 1122 | FLDD 16(a_ptr),a2 |
308 | addil LR'__iob-$global$+32,%r27 | 1123 | FLDD 24(a_ptr),a3 |
309 | ldo RR'__iob-$global$+32(%r1),%r26 | 1124 | FLDD 32(a_ptr),a4 |
310 | ldil LR'L$C0000,%r25 | 1125 | FLDD 40(a_ptr),a5 |
311 | .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR | 1126 | FLDD 48(a_ptr),a6 |
312 | bl fprintf,%r2 | 1127 | FLDD 56(a_ptr),a7 |
313 | ldo RR'L$C0000(%r25),%r25 | 1128 | |
314 | .CALL | 1129 | SQR_ADD_C a0L,a0R,c1,c2,c3 |
315 | bl abort,%r2 | 1130 | STD c1,0(r_ptr) ; r[0] = c1; |
316 | nop | 1131 | COPY %r0,c1 |
317 | L$0052 | 1132 | |
318 | comb,>> %r5,%r3,L$0053 | 1133 | SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 |
319 | subi 32,%r24,%r24 | 1134 | STD c2,8(r_ptr) ; r[1] = c2; |
320 | sub %r3,%r5,%r3 | 1135 | COPY %r0,c2 |
321 | L$0053 | 1136 | |
322 | comib,= 0,%r24,L$0054 | 1137 | SQR_ADD_C a1L,a1R,c3,c1,c2 |
323 | subi 31,%r24,%r19 | 1138 | SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 |
324 | mtsar %r19 | 1139 | STD c3,16(r_ptr) ; r[2] = c3; |
325 | zvdep %r5,32,%r5 | 1140 | COPY %r0,c3 |
326 | zvdep %r3,32,%r21 | 1141 | |
327 | subi 32,%r24,%r20 | 1142 | SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 |
328 | mtsar %r20 | 1143 | SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 |
329 | vshd 0,%r4,%r20 | 1144 | STD c1,24(r_ptr) ; r[3] = c1; |
330 | or %r21,%r20,%r3 | 1145 | COPY %r0,c1 |
331 | mtsar %r19 | 1146 | |
332 | zvdep %r4,32,%r4 | 1147 | SQR_ADD_C a2L,a2R,c2,c3,c1 |
333 | L$0054 | 1148 | SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 |
334 | extru %r5,15,16,%r23 | 1149 | SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 |
335 | extru %r5,31,16,%r28 | 1150 | STD c2,32(r_ptr) ; r[4] = c2; |
336 | L$0055 | 1151 | COPY %r0,c2 |
337 | extru %r3,15,16,%r19 | 1152 | |
338 | comb,<> %r23,%r19,L$0058 | 1153 | SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 |
339 | copy %r3,%r26 | 1154 | SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 |
340 | bl L$0059,0 | 1155 | SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 |
341 | zdepi -1,31,16,%r29 | 1156 | STD c3,40(r_ptr) ; r[5] = c3; |
342 | L$0058 | 1157 | COPY %r0,c3 |
343 | .IMPORT $$divU,MILLICODE | 1158 | |
344 | bl $$divU,%r31 | 1159 | SQR_ADD_C a3L,a3R,c1,c2,c3 |
345 | copy %r23,%r25 | 1160 | SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 |
346 | L$0059 | 1161 | SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 |
347 | stw %r29,-16(0,%r30) | 1162 | SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 |
348 | fldws -16(0,%r30),%fr10L | 1163 | STD c1,48(r_ptr) ; r[6] = c1; |
349 | stw %r28,-16(0,%r30) | 1164 | COPY %r0,c1 |
350 | fldws -16(0,%r30),%fr10R | 1165 | |
351 | stw %r23,-16(0,%r30) | 1166 | SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 |
352 | xmpyu %fr10L,%fr10R,%fr8 | 1167 | SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 |
353 | fldws -16(0,%r30),%fr10R | 1168 | SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 |
354 | fstws %fr8R,-16(0,%r30) | 1169 | SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 |
355 | xmpyu %fr10L,%fr10R,%fr9 | 1170 | STD c2,56(r_ptr) ; r[7] = c2; |
356 | ldw -16(0,%r30),%r8 | 1171 | COPY %r0,c2 |
357 | fstws %fr9R,-16(0,%r30) | 1172 | |
358 | copy %r8,%r22 | 1173 | SQR_ADD_C a4L,a4R,c3,c1,c2 |
359 | ldw -16(0,%r30),%r8 | 1174 | SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 |
360 | extru %r4,15,16,%r24 | 1175 | SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 |
361 | copy %r8,%r21 | 1176 | SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 |
362 | L$0060 | 1177 | STD c3,64(r_ptr) ; r[8] = c3; |
363 | sub %r3,%r21,%r20 | 1178 | COPY %r0,c3 |
364 | copy %r20,%r19 | 1179 | |
365 | depi 0,31,16,%r19 | 1180 | SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 |
366 | comib,<> 0,%r19,L$0061 | 1181 | SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 |
367 | zdep %r20,15,16,%r19 | 1182 | SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 |
368 | addl %r19,%r24,%r19 | 1183 | STD c1,72(r_ptr) ; r[9] = c1; |
369 | comb,>>= %r19,%r22,L$0061 | 1184 | COPY %r0,c1 |
370 | sub %r22,%r28,%r22 | 1185 | |
371 | sub %r21,%r23,%r21 | 1186 | SQR_ADD_C a5L,a5R,c2,c3,c1 |
372 | bl L$0060,0 | 1187 | SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 |
373 | ldo -1(%r29),%r29 | 1188 | SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 |
374 | L$0061 | 1189 | STD c2,80(r_ptr) ; r[10] = c2; |
375 | stw %r29,-16(0,%r30) | 1190 | COPY %r0,c2 |
376 | fldws -16(0,%r30),%fr10L | 1191 | |
377 | stw %r28,-16(0,%r30) | 1192 | SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 |
378 | fldws -16(0,%r30),%fr10R | 1193 | SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 |
379 | xmpyu %fr10L,%fr10R,%fr8 | 1194 | STD c3,88(r_ptr) ; r[11] = c3; |
380 | fstws %fr8R,-16(0,%r30) | 1195 | COPY %r0,c3 |
381 | ldw -16(0,%r30),%r8 | 1196 | |
382 | stw %r23,-16(0,%r30) | 1197 | SQR_ADD_C a6L,a6R,c1,c2,c3 |
383 | fldws -16(0,%r30),%fr10R | 1198 | SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 |
384 | copy %r8,%r19 | 1199 | STD c1,96(r_ptr) ; r[12] = c1; |
385 | xmpyu %fr10L,%fr10R,%fr8 | 1200 | COPY %r0,c1 |
386 | fstws %fr8R,-16(0,%r30) | 1201 | |
387 | extru %r19,15,16,%r20 | 1202 | SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 |
388 | ldw -16(0,%r30),%r8 | 1203 | STD c2,104(r_ptr) ; r[13] = c2; |
389 | zdep %r19,15,16,%r19 | 1204 | COPY %r0,c2 |
390 | addl %r8,%r20,%r20 | 1205 | |
391 | comclr,<<= %r19,%r4,0 | 1206 | SQR_ADD_C a7L,a7R,c3,c1,c2 |
392 | addi 1,%r20,%r20 | 1207 | STD c3, 112(r_ptr) ; r[14] = c3 |
393 | comb,<<= %r20,%r3,L$0066 | 1208 | STD c1, 120(r_ptr) ; r[15] = c1 |
394 | sub %r4,%r19,%r4 | 1209 | |
395 | addl %r3,%r5,%r3 | 1210 | .EXIT |
396 | ldo -1(%r29),%r29 | 1211 | LDD -104(%sp),%r6 ; restore r6 |
397 | L$0066 | 1212 | LDD -112(%sp),%r5 ; restore r5 |
398 | addib,= -1,%r6,L$0056 | 1213 | LDD -120(%sp),%r4 ; restore r4 |
399 | sub %r3,%r20,%r3 | 1214 | BVE (%rp) |
400 | zdep %r29,15,16,%r7 | 1215 | LDD,MB -128(%sp),%r3 |
401 | shd %r3,%r4,16,%r3 | 1216 | |
402 | bl L$0055,0 | 1217 | .PROCEND |
403 | zdep %r4,15,16,%r4 | 1218 | |
404 | L$0056 | 1219 | ;----------------------------------------------------------------------------- |
405 | or %r7,%r29,%r28 | 1220 | ; |
406 | L$0068 | 1221 | ;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) |
407 | ldw -148(0,%r30),%r2 | 1222 | ; arg0 = r_ptr |
408 | ldw -124(0,%r30),%r7 | 1223 | ; arg1 = a_ptr |
409 | ldw -120(0,%r30),%r6 | 1224 | ; |
410 | ldw -116(0,%r30),%r5 | 1225 | |
411 | ldw -112(0,%r30),%r4 | 1226 | bn_sqr_comba4 |
412 | ldw -108(0,%r30),%r3 | 1227 | .proc |
413 | bv 0(%r2) | 1228 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE |
414 | ldwm -128(0,%r30),%r8 | 1229 | .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN |
415 | .EXIT | 1230 | .entry |
416 | .PROCEND | 1231 | .align 64 |
1232 | STD %r3,0(%sp) ; save r3 | ||
1233 | STD %r4,8(%sp) ; save r4 | ||
1234 | STD %r5,16(%sp) ; save r5 | ||
1235 | STD %r6,24(%sp) ; save r6 | ||
1236 | |||
1237 | ; | ||
1238 | ; Zero out carries | ||
1239 | ; | ||
1240 | COPY %r0,c1 | ||
1241 | COPY %r0,c2 | ||
1242 | COPY %r0,c3 | ||
1243 | |||
1244 | LDO 128(%sp),%sp ; bump stack | ||
1245 | DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L | ||
1246 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1247 | |||
1248 | ; | ||
1249 | ; Load up all of the values we are going to use | ||
1250 | ; | ||
1251 | FLDD 0(a_ptr),a0 | ||
1252 | FLDD 8(a_ptr),a1 | ||
1253 | FLDD 16(a_ptr),a2 | ||
1254 | FLDD 24(a_ptr),a3 | ||
1255 | FLDD 32(a_ptr),a4 | ||
1256 | FLDD 40(a_ptr),a5 | ||
1257 | FLDD 48(a_ptr),a6 | ||
1258 | FLDD 56(a_ptr),a7 | ||
1259 | |||
1260 | SQR_ADD_C a0L,a0R,c1,c2,c3 | ||
1261 | |||
1262 | STD c1,0(r_ptr) ; r[0] = c1; | ||
1263 | COPY %r0,c1 | ||
1264 | |||
1265 | SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 | ||
1266 | |||
1267 | STD c2,8(r_ptr) ; r[1] = c2; | ||
1268 | COPY %r0,c2 | ||
1269 | |||
1270 | SQR_ADD_C a1L,a1R,c3,c1,c2 | ||
1271 | SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 | ||
1272 | |||
1273 | STD c3,16(r_ptr) ; r[2] = c3; | ||
1274 | COPY %r0,c3 | ||
1275 | |||
1276 | SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 | ||
1277 | SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 | ||
1278 | |||
1279 | STD c1,24(r_ptr) ; r[3] = c1; | ||
1280 | COPY %r0,c1 | ||
1281 | |||
1282 | SQR_ADD_C a2L,a2R,c2,c3,c1 | ||
1283 | SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 | ||
1284 | |||
1285 | STD c2,32(r_ptr) ; r[4] = c2; | ||
1286 | COPY %r0,c2 | ||
1287 | |||
1288 | SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 | ||
1289 | STD c3,40(r_ptr) ; r[5] = c3; | ||
1290 | COPY %r0,c3 | ||
1291 | |||
1292 | SQR_ADD_C a3L,a3R,c1,c2,c3 | ||
1293 | STD c1,48(r_ptr) ; r[6] = c1; | ||
1294 | STD c2,56(r_ptr) ; r[7] = c2; | ||
1295 | |||
1296 | .EXIT | ||
1297 | LDD -104(%sp),%r6 ; restore r6 | ||
1298 | LDD -112(%sp),%r5 ; restore r5 | ||
1299 | LDD -120(%sp),%r4 ; restore r4 | ||
1300 | BVE (%rp) | ||
1301 | LDD,MB -128(%sp),%r3 | ||
1302 | |||
1303 | .PROCEND | ||
1304 | |||
1305 | |||
1306 | ;--------------------------------------------------------------------------- | ||
1307 | |||
1308 | MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 | ||
1309 | XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht | ||
1310 | FSTD ftemp1,-16(%sp) ; | ||
1311 | XMPYU A0R,B0L,ftemp2 ; m = bh*lt | ||
1312 | FSTD ftemp2,-8(%sp) ; | ||
1313 | XMPYU A0R,B0R,ftemp3 ; lt = bl*lt | ||
1314 | FSTD ftemp3,-32(%sp) | ||
1315 | XMPYU A0L,B0L,ftemp4 ; ht = bh*ht | ||
1316 | FSTD ftemp4,-24(%sp) ; | ||
1317 | |||
1318 | LDD -8(%sp),m ; r21 = m | ||
1319 | LDD -16(%sp),m1 ; r19 = m1 | ||
1320 | ADD,L m,m1,m ; m+m1 | ||
1321 | |||
1322 | DEPD,Z m,31,32,temp3 ; (m+m1<<32) | ||
1323 | LDD -24(%sp),ht ; r24 = ht | ||
1324 | |||
1325 | CMPCLR,*>>= m,m1,%r0 ; if (m < m1) | ||
1326 | ADD,L ht,high_one,ht ; ht+=high_one | ||
1327 | |||
1328 | EXTRD,U m,31,32,temp1 ; m >> 32 | ||
1329 | LDD -32(%sp),lt ; lt | ||
1330 | ADD,L ht,temp1,ht ; ht+= m>>32 | ||
1331 | ADD lt,temp3,lt ; lt = lt+m1 | ||
1332 | ADD,DC ht,%r0,ht ; ht++ | ||
1333 | |||
1334 | ADD C1,lt,C1 ; c1=c1+lt | ||
1335 | ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise | ||
1336 | |||
1337 | ADD C2,ht,C2 ; c2 = c2 + ht | ||
1338 | ADD,DC C3,%r0,C3 ; add in carry (c3++) | ||
1339 | .endm | ||
1340 | |||
1341 | |||
1342 | ; | ||
1343 | ;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
1344 | ; arg0 = r_ptr | ||
1345 | ; arg1 = a_ptr | ||
1346 | ; arg2 = b_ptr | ||
1347 | ; | ||
1348 | |||
1349 | bn_mul_comba8 | ||
1350 | .proc | ||
1351 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
1352 | .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
1353 | .entry | ||
1354 | .align 64 | ||
1355 | |||
1356 | STD %r3,0(%sp) ; save r3 | ||
1357 | STD %r4,8(%sp) ; save r4 | ||
1358 | STD %r5,16(%sp) ; save r5 | ||
1359 | STD %r6,24(%sp) ; save r6 | ||
1360 | FSTD %fr12,32(%sp) ; save r6 | ||
1361 | FSTD %fr13,40(%sp) ; save r7 | ||
1362 | |||
1363 | ; | ||
1364 | ; Zero out carries | ||
1365 | ; | ||
1366 | COPY %r0,c1 | ||
1367 | COPY %r0,c2 | ||
1368 | COPY %r0,c3 | ||
1369 | |||
1370 | LDO 128(%sp),%sp ; bump stack | ||
1371 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1372 | |||
1373 | ; | ||
1374 | ; Load up all of the values we are going to use | ||
1375 | ; | ||
1376 | FLDD 0(a_ptr),a0 | ||
1377 | FLDD 8(a_ptr),a1 | ||
1378 | FLDD 16(a_ptr),a2 | ||
1379 | FLDD 24(a_ptr),a3 | ||
1380 | FLDD 32(a_ptr),a4 | ||
1381 | FLDD 40(a_ptr),a5 | ||
1382 | FLDD 48(a_ptr),a6 | ||
1383 | FLDD 56(a_ptr),a7 | ||
1384 | |||
1385 | FLDD 0(b_ptr),b0 | ||
1386 | FLDD 8(b_ptr),b1 | ||
1387 | FLDD 16(b_ptr),b2 | ||
1388 | FLDD 24(b_ptr),b3 | ||
1389 | FLDD 32(b_ptr),b4 | ||
1390 | FLDD 40(b_ptr),b5 | ||
1391 | FLDD 48(b_ptr),b6 | ||
1392 | FLDD 56(b_ptr),b7 | ||
1393 | |||
1394 | MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 | ||
1395 | STD c1,0(r_ptr) | ||
1396 | COPY %r0,c1 | ||
1397 | |||
1398 | MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 | ||
1399 | MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 | ||
1400 | STD c2,8(r_ptr) | ||
1401 | COPY %r0,c2 | ||
1402 | |||
1403 | MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 | ||
1404 | MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 | ||
1405 | MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 | ||
1406 | STD c3,16(r_ptr) | ||
1407 | COPY %r0,c3 | ||
1408 | |||
1409 | MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 | ||
1410 | MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 | ||
1411 | MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 | ||
1412 | MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 | ||
1413 | STD c1,24(r_ptr) | ||
1414 | COPY %r0,c1 | ||
1415 | |||
1416 | MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 | ||
1417 | MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 | ||
1418 | MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 | ||
1419 | MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 | ||
1420 | MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 | ||
1421 | STD c2,32(r_ptr) | ||
1422 | COPY %r0,c2 | ||
1423 | |||
1424 | MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 | ||
1425 | MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 | ||
1426 | MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 | ||
1427 | MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 | ||
1428 | MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 | ||
1429 | MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 | ||
1430 | STD c3,40(r_ptr) | ||
1431 | COPY %r0,c3 | ||
1432 | |||
1433 | MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 | ||
1434 | MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 | ||
1435 | MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 | ||
1436 | MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 | ||
1437 | MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 | ||
1438 | MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 | ||
1439 | MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 | ||
1440 | STD c1,48(r_ptr) | ||
1441 | COPY %r0,c1 | ||
1442 | |||
1443 | MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 | ||
1444 | MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 | ||
1445 | MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 | ||
1446 | MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 | ||
1447 | MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 | ||
1448 | MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 | ||
1449 | MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 | ||
1450 | MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 | ||
1451 | STD c2,56(r_ptr) | ||
1452 | COPY %r0,c2 | ||
1453 | |||
1454 | MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 | ||
1455 | MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 | ||
1456 | MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 | ||
1457 | MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 | ||
1458 | MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 | ||
1459 | MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 | ||
1460 | MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 | ||
1461 | STD c3,64(r_ptr) | ||
1462 | COPY %r0,c3 | ||
1463 | |||
1464 | MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 | ||
1465 | MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 | ||
1466 | MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 | ||
1467 | MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 | ||
1468 | MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 | ||
1469 | MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 | ||
1470 | STD c1,72(r_ptr) | ||
1471 | COPY %r0,c1 | ||
1472 | |||
1473 | MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 | ||
1474 | MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 | ||
1475 | MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 | ||
1476 | MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 | ||
1477 | MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 | ||
1478 | STD c2,80(r_ptr) | ||
1479 | COPY %r0,c2 | ||
1480 | |||
1481 | MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 | ||
1482 | MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 | ||
1483 | MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 | ||
1484 | MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 | ||
1485 | STD c3,88(r_ptr) | ||
1486 | COPY %r0,c3 | ||
1487 | |||
1488 | MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 | ||
1489 | MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 | ||
1490 | MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 | ||
1491 | STD c1,96(r_ptr) | ||
1492 | COPY %r0,c1 | ||
1493 | |||
1494 | MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 | ||
1495 | MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 | ||
1496 | STD c2,104(r_ptr) | ||
1497 | COPY %r0,c2 | ||
1498 | |||
1499 | MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 | ||
1500 | STD c3,112(r_ptr) | ||
1501 | STD c1,120(r_ptr) | ||
1502 | |||
1503 | .EXIT | ||
1504 | FLDD -88(%sp),%fr13 | ||
1505 | FLDD -96(%sp),%fr12 | ||
1506 | LDD -104(%sp),%r6 ; restore r6 | ||
1507 | LDD -112(%sp),%r5 ; restore r5 | ||
1508 | LDD -120(%sp),%r4 ; restore r4 | ||
1509 | BVE (%rp) | ||
1510 | LDD,MB -128(%sp),%r3 | ||
1511 | |||
1512 | .PROCEND | ||
1513 | |||
1514 | ;----------------------------------------------------------------------------- | ||
1515 | ; | ||
1516 | ;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
1517 | ; arg0 = r_ptr | ||
1518 | ; arg1 = a_ptr | ||
1519 | ; arg2 = b_ptr | ||
1520 | ; | ||
1521 | |||
1522 | bn_mul_comba4 | ||
1523 | .proc | ||
1524 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
1525 | .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
1526 | .entry | ||
1527 | .align 64 | ||
1528 | |||
1529 | STD %r3,0(%sp) ; save r3 | ||
1530 | STD %r4,8(%sp) ; save r4 | ||
1531 | STD %r5,16(%sp) ; save r5 | ||
1532 | STD %r6,24(%sp) ; save r6 | ||
1533 | FSTD %fr12,32(%sp) ; save r6 | ||
1534 | FSTD %fr13,40(%sp) ; save r7 | ||
1535 | |||
1536 | ; | ||
1537 | ; Zero out carries | ||
1538 | ; | ||
1539 | COPY %r0,c1 | ||
1540 | COPY %r0,c2 | ||
1541 | COPY %r0,c3 | ||
1542 | |||
1543 | LDO 128(%sp),%sp ; bump stack | ||
1544 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1545 | |||
1546 | ; | ||
1547 | ; Load up all of the values we are going to use | ||
1548 | ; | ||
1549 | FLDD 0(a_ptr),a0 | ||
1550 | FLDD 8(a_ptr),a1 | ||
1551 | FLDD 16(a_ptr),a2 | ||
1552 | FLDD 24(a_ptr),a3 | ||
1553 | |||
1554 | FLDD 0(b_ptr),b0 | ||
1555 | FLDD 8(b_ptr),b1 | ||
1556 | FLDD 16(b_ptr),b2 | ||
1557 | FLDD 24(b_ptr),b3 | ||
1558 | |||
1559 | MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 | ||
1560 | STD c1,0(r_ptr) | ||
1561 | COPY %r0,c1 | ||
1562 | |||
1563 | MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 | ||
1564 | MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 | ||
1565 | STD c2,8(r_ptr) | ||
1566 | COPY %r0,c2 | ||
1567 | |||
1568 | MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 | ||
1569 | MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 | ||
1570 | MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 | ||
1571 | STD c3,16(r_ptr) | ||
1572 | COPY %r0,c3 | ||
1573 | |||
1574 | MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 | ||
1575 | MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 | ||
1576 | MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 | ||
1577 | MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 | ||
1578 | STD c1,24(r_ptr) | ||
1579 | COPY %r0,c1 | ||
1580 | |||
1581 | MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 | ||
1582 | MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 | ||
1583 | MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 | ||
1584 | STD c2,32(r_ptr) | ||
1585 | COPY %r0,c2 | ||
1586 | |||
1587 | MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 | ||
1588 | MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 | ||
1589 | STD c3,40(r_ptr) | ||
1590 | COPY %r0,c3 | ||
1591 | |||
1592 | MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 | ||
1593 | STD c1,48(r_ptr) | ||
1594 | STD c2,56(r_ptr) | ||
1595 | |||
1596 | .EXIT | ||
1597 | FLDD -88(%sp),%fr13 | ||
1598 | FLDD -96(%sp),%fr12 | ||
1599 | LDD -104(%sp),%r6 ; restore r6 | ||
1600 | LDD -112(%sp),%r5 ; restore r5 | ||
1601 | LDD -120(%sp),%r4 ; restore r4 | ||
1602 | BVE (%rp) | ||
1603 | LDD,MB -128(%sp),%r3 | ||
1604 | |||
1605 | .PROCEND | ||
1606 | |||
1607 | |||
1608 | .SPACE $TEXT$ | ||
1609 | .SUBSPA $CODE$ | ||
1610 | .SPACE $PRIVATE$,SORT=16 | ||
1611 | .IMPORT $global$,DATA | ||
1612 | .SPACE $TEXT$ | ||
1613 | .SUBSPA $CODE$ | ||
1614 | .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16 | ||
1615 | C$7 | ||
1616 | .ALIGN 8 | ||
1617 | .STRINGZ "Division would overflow (%d)\n" | ||
1618 | .END | ||
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s new file mode 100644 index 0000000000..54b6606252 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s | |||
@@ -0,0 +1,1605 @@ | |||
1 | ; | ||
2 | ; PA-RISC 64-bit implementation of bn_asm code | ||
3 | ; | ||
4 | ; This code is approximately 2x faster than the C version | ||
5 | ; for RSA/DSA. | ||
6 | ; | ||
7 | ; See http://devresource.hp.com/ for more details on the PA-RISC | ||
8 | ; architecture. Also see the book "PA-RISC 2.0 Architecture" | ||
9 | ; by Gerry Kane for information on the instruction set architecture. | ||
10 | ; | ||
11 | ; Code written by Chris Ruemmler (with some help from the HP C | ||
12 | ; compiler). | ||
13 | ; | ||
14 | ; The code compiles with HP's assembler | ||
15 | ; | ||
16 | |||
17 | .level 2.0W | ||
18 | .space $TEXT$ | ||
19 | .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY | ||
20 | |||
21 | ; | ||
22 | ; Global Register definitions used for the routines. | ||
23 | ; | ||
24 | ; Some information about HP's runtime architecture for 64-bits. | ||
25 | ; | ||
26 | ; "Caller save" means the calling function must save the register | ||
27 | ; if it wants the register to be preserved. | ||
28 | ; "Callee save" means if a function uses the register, it must save | ||
29 | ; the value before using it. | ||
30 | ; | ||
31 | ; For the floating point registers | ||
32 | ; | ||
33 | ; "caller save" registers: fr4-fr11, fr22-fr31 | ||
34 | ; "callee save" registers: fr12-fr21 | ||
35 | ; "special" registers: fr0-fr3 (status and exception registers) | ||
36 | ; | ||
37 | ; For the integer registers | ||
38 | ; value zero : r0 | ||
39 | ; "caller save" registers: r1,r19-r26 | ||
40 | ; "callee save" registers: r3-r18 | ||
41 | ; return register : r2 (rp) | ||
42 | ; return values ; r28 (ret0,ret1) | ||
43 | ; Stack pointer ; r30 (sp) | ||
44 | ; global data pointer ; r27 (dp) | ||
45 | ; argument pointer ; r29 (ap) | ||
46 | ; millicode return ptr ; r31 (also a caller save register) | ||
47 | |||
48 | |||
49 | ; | ||
50 | ; Arguments to the routines | ||
51 | ; | ||
52 | r_ptr .reg %r26 | ||
53 | a_ptr .reg %r25 | ||
54 | b_ptr .reg %r24 | ||
55 | num .reg %r24 | ||
56 | w .reg %r23 | ||
57 | n .reg %r23 | ||
58 | |||
59 | |||
60 | ; | ||
61 | ; Globals used in some routines | ||
62 | ; | ||
63 | |||
64 | top_overflow .reg %r29 | ||
65 | high_mask .reg %r22 ; value 0xffffffff80000000L | ||
66 | |||
67 | |||
68 | ;------------------------------------------------------------------------------ | ||
69 | ; | ||
70 | ; bn_mul_add_words | ||
71 | ; | ||
72 | ;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, | ||
73 | ; int num, BN_ULONG w) | ||
74 | ; | ||
75 | ; arg0 = r_ptr | ||
76 | ; arg1 = a_ptr | ||
77 | ; arg2 = num | ||
78 | ; arg3 = w | ||
79 | ; | ||
80 | ; Local register definitions | ||
81 | ; | ||
82 | |||
83 | fm1 .reg %fr22 | ||
84 | fm .reg %fr23 | ||
85 | ht_temp .reg %fr24 | ||
86 | ht_temp_1 .reg %fr25 | ||
87 | lt_temp .reg %fr26 | ||
88 | lt_temp_1 .reg %fr27 | ||
89 | fm1_1 .reg %fr28 | ||
90 | fm_1 .reg %fr29 | ||
91 | |||
92 | fw_h .reg %fr7L | ||
93 | fw_l .reg %fr7R | ||
94 | fw .reg %fr7 | ||
95 | |||
96 | fht_0 .reg %fr8L | ||
97 | flt_0 .reg %fr8R | ||
98 | t_float_0 .reg %fr8 | ||
99 | |||
100 | fht_1 .reg %fr9L | ||
101 | flt_1 .reg %fr9R | ||
102 | t_float_1 .reg %fr9 | ||
103 | |||
104 | tmp_0 .reg %r31 | ||
105 | tmp_1 .reg %r21 | ||
106 | m_0 .reg %r20 | ||
107 | m_1 .reg %r19 | ||
108 | ht_0 .reg %r1 | ||
109 | ht_1 .reg %r3 | ||
110 | lt_0 .reg %r4 | ||
111 | lt_1 .reg %r5 | ||
112 | m1_0 .reg %r6 | ||
113 | m1_1 .reg %r7 | ||
114 | rp_val .reg %r8 | ||
115 | rp_val_1 .reg %r9 | ||
116 | |||
117 | bn_mul_add_words | ||
118 | .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN | ||
119 | .proc | ||
120 | .callinfo frame=128 | ||
121 | .entry | ||
122 | .align 64 | ||
123 | |||
124 | STD %r3,0(%sp) ; save r3 | ||
125 | STD %r4,8(%sp) ; save r4 | ||
126 | NOP ; Needed to make the loop 16-byte aligned | ||
127 | NOP ; Needed to make the loop 16-byte aligned | ||
128 | |||
129 | STD %r5,16(%sp) ; save r5 | ||
130 | STD %r6,24(%sp) ; save r6 | ||
131 | STD %r7,32(%sp) ; save r7 | ||
132 | STD %r8,40(%sp) ; save r8 | ||
133 | |||
134 | STD %r9,48(%sp) ; save r9 | ||
135 | COPY %r0,%ret0 ; return 0 by default | ||
136 | DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 | ||
137 | STD w,56(%sp) ; store w on stack | ||
138 | |||
139 | CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit | ||
140 | LDO 128(%sp),%sp ; bump stack | ||
141 | |||
142 | ; | ||
143 | ; The loop is unrolled twice, so if there is only 1 number | ||
144 | ; then go straight to the cleanup code. | ||
145 | ; | ||
146 | CMPIB,= 1,num,bn_mul_add_words_single_top | ||
147 | FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) | ||
148 | |||
149 | ; | ||
150 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
151 | ; | ||
152 | ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus | ||
153 | ; two 32-bit mutiplies can be issued per cycle. | ||
154 | ; | ||
155 | bn_mul_add_words_unroll2 | ||
156 | |||
157 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
158 | FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
159 | LDD 0(r_ptr),rp_val ; rp[0] | ||
160 | LDD 8(r_ptr),rp_val_1 ; rp[1] | ||
161 | |||
162 | XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l | ||
163 | XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l | ||
164 | FSTD fm1,-16(%sp) ; -16(sp) = m1[0] | ||
165 | FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] | ||
166 | |||
167 | XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h | ||
168 | XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h | ||
169 | FSTD fm,-8(%sp) ; -8(sp) = m[0] | ||
170 | FSTD fm_1,-40(%sp) ; -40(sp) = m[1] | ||
171 | |||
172 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h | ||
173 | XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h | ||
174 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp | ||
175 | FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 | ||
176 | |||
177 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l | ||
178 | XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l | ||
179 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp | ||
180 | FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 | ||
181 | |||
182 | LDD -8(%sp),m_0 ; m[0] | ||
183 | LDD -40(%sp),m_1 ; m[1] | ||
184 | LDD -16(%sp),m1_0 ; m1[0] | ||
185 | LDD -48(%sp),m1_1 ; m1[1] | ||
186 | |||
187 | LDD -24(%sp),ht_0 ; ht[0] | ||
188 | LDD -56(%sp),ht_1 ; ht[1] | ||
189 | ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; | ||
190 | ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; | ||
191 | |||
192 | LDD -32(%sp),lt_0 | ||
193 | LDD -64(%sp),lt_1 | ||
194 | CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) | ||
195 | ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) | ||
196 | |||
197 | CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) | ||
198 | ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) | ||
199 | EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 | ||
200 | DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 | ||
201 | |||
202 | EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 | ||
203 | DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 | ||
204 | ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) | ||
205 | ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) | ||
206 | |||
207 | ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; | ||
208 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ | ||
209 | ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; | ||
210 | ADD,DC ht_1,%r0,ht_1 ; ht[1]++ | ||
211 | |||
212 | ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; | ||
213 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ | ||
214 | ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] | ||
215 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ | ||
216 | |||
217 | LDO -2(num),num ; num = num - 2; | ||
218 | ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); | ||
219 | ADD,DC ht_1,%r0,ht_1 ; ht[1]++ | ||
220 | STD lt_0,0(r_ptr) ; rp[0] = lt[0] | ||
221 | |||
222 | ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] | ||
223 | ADD,DC ht_1,%r0,%ret0 ; ht[1]++ | ||
224 | LDO 16(a_ptr),a_ptr ; a_ptr += 2 | ||
225 | |||
226 | STD lt_1,8(r_ptr) ; rp[1] = lt[1] | ||
227 | CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do | ||
228 | LDO 16(r_ptr),r_ptr ; r_ptr += 2 | ||
229 | |||
230 | CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one | ||
231 | |||
232 | ; | ||
233 | ; Top of loop aligned on 64-byte boundary | ||
234 | ; | ||
235 | bn_mul_add_words_single_top | ||
236 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
237 | LDD 0(r_ptr),rp_val ; rp[0] | ||
238 | LDO 8(a_ptr),a_ptr ; a_ptr++ | ||
239 | XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l | ||
240 | FSTD fm1,-16(%sp) ; -16(sp) = m1 | ||
241 | XMPYU flt_0,fw_h,fm ; m = lt*fw_h | ||
242 | FSTD fm,-8(%sp) ; -8(sp) = m | ||
243 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h | ||
244 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht | ||
245 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l | ||
246 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt | ||
247 | |||
248 | LDD -8(%sp),m_0 | ||
249 | LDD -16(%sp),m1_0 ; m1 = temp1 | ||
250 | ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; | ||
251 | LDD -24(%sp),ht_0 | ||
252 | LDD -32(%sp),lt_0 | ||
253 | |||
254 | CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) | ||
255 | ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) | ||
256 | |||
257 | EXTRD,U tmp_0,31,32,m_0 ; m>>32 | ||
258 | DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 | ||
259 | |||
260 | ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) | ||
261 | ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; | ||
262 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
263 | ADD %ret0,tmp_0,lt_0 ; lt = lt + c; | ||
264 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
265 | ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] | ||
266 | ADD,DC ht_0,%r0,%ret0 ; ht++ | ||
267 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
268 | |||
269 | bn_mul_add_words_exit | ||
270 | .EXIT | ||
271 | LDD -80(%sp),%r9 ; restore r9 | ||
272 | LDD -88(%sp),%r8 ; restore r8 | ||
273 | LDD -96(%sp),%r7 ; restore r7 | ||
274 | LDD -104(%sp),%r6 ; restore r6 | ||
275 | LDD -112(%sp),%r5 ; restore r5 | ||
276 | LDD -120(%sp),%r4 ; restore r4 | ||
277 | BVE (%rp) | ||
278 | LDD,MB -128(%sp),%r3 ; restore r3 | ||
279 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
280 | |||
281 | ;---------------------------------------------------------------------------- | ||
282 | ; | ||
283 | ;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | ||
284 | ; | ||
285 | ; arg0 = rp | ||
286 | ; arg1 = ap | ||
287 | ; arg2 = num | ||
288 | ; arg3 = w | ||
289 | |||
290 | bn_mul_words | ||
291 | .proc | ||
292 | .callinfo frame=128 | ||
293 | .entry | ||
294 | .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
295 | .align 64 | ||
296 | |||
297 | STD %r3,0(%sp) ; save r3 | ||
298 | STD %r4,8(%sp) ; save r4 | ||
299 | STD %r5,16(%sp) ; save r5 | ||
300 | STD %r6,24(%sp) ; save r6 | ||
301 | |||
302 | STD %r7,32(%sp) ; save r7 | ||
303 | COPY %r0,%ret0 ; return 0 by default | ||
304 | DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 | ||
305 | STD w,56(%sp) ; w on stack | ||
306 | |||
307 | CMPIB,>= 0,num,bn_mul_words_exit | ||
308 | LDO 128(%sp),%sp ; bump stack | ||
309 | |||
310 | ; | ||
311 | ; See if only 1 word to do, thus just do cleanup | ||
312 | ; | ||
313 | CMPIB,= 1,num,bn_mul_words_single_top | ||
314 | FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) | ||
315 | |||
316 | ; | ||
317 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
318 | ; | ||
319 | ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus | ||
320 | ; two 32-bit mutiplies can be issued per cycle. | ||
321 | ; | ||
322 | bn_mul_words_unroll2 | ||
323 | |||
324 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
325 | FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
326 | XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l | ||
327 | XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l | ||
328 | |||
329 | FSTD fm1,-16(%sp) ; -16(sp) = m1 | ||
330 | FSTD fm1_1,-48(%sp) ; -48(sp) = m1 | ||
331 | XMPYU flt_0,fw_h,fm ; m = lt*fw_h | ||
332 | XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h | ||
333 | |||
334 | FSTD fm,-8(%sp) ; -8(sp) = m | ||
335 | FSTD fm_1,-40(%sp) ; -40(sp) = m | ||
336 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h | ||
337 | XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h | ||
338 | |||
339 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht | ||
340 | FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht | ||
341 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l | ||
342 | XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l | ||
343 | |||
344 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt | ||
345 | FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt | ||
346 | LDD -8(%sp),m_0 | ||
347 | LDD -40(%sp),m_1 | ||
348 | |||
349 | LDD -16(%sp),m1_0 | ||
350 | LDD -48(%sp),m1_1 | ||
351 | LDD -24(%sp),ht_0 | ||
352 | LDD -56(%sp),ht_1 | ||
353 | |||
354 | ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; | ||
355 | ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; | ||
356 | LDD -32(%sp),lt_0 | ||
357 | LDD -64(%sp),lt_1 | ||
358 | |||
359 | CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) | ||
360 | ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) | ||
361 | CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) | ||
362 | ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) | ||
363 | |||
364 | EXTRD,U tmp_0,31,32,m_0 ; m>>32 | ||
365 | DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 | ||
366 | EXTRD,U tmp_1,31,32,m_1 ; m>>32 | ||
367 | DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 | ||
368 | |||
369 | ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) | ||
370 | ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) | ||
371 | ADD lt_0,m1_0,lt_0 ; lt = lt+m1; | ||
372 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
373 | |||
374 | ADD lt_1,m1_1,lt_1 ; lt = lt+m1; | ||
375 | ADD,DC ht_1,%r0,ht_1 ; ht++ | ||
376 | ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); | ||
377 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
378 | |||
379 | ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) | ||
380 | ADD,DC ht_1,%r0,ht_1 ; ht++ | ||
381 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
382 | STD lt_1,8(r_ptr) ; rp[1] = lt | ||
383 | |||
384 | COPY ht_1,%ret0 ; carry = ht | ||
385 | LDO -2(num),num ; num = num - 2; | ||
386 | LDO 16(a_ptr),a_ptr ; ap += 2 | ||
387 | CMPIB,<= 2,num,bn_mul_words_unroll2 | ||
388 | LDO 16(r_ptr),r_ptr ; rp++ | ||
389 | |||
390 | CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? | ||
391 | |||
392 | ; | ||
393 | ; Top of loop aligned on 64-byte boundary | ||
394 | ; | ||
395 | bn_mul_words_single_top | ||
396 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
397 | |||
398 | XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l | ||
399 | FSTD fm1,-16(%sp) ; -16(sp) = m1 | ||
400 | XMPYU flt_0,fw_h,fm ; m = lt*fw_h | ||
401 | FSTD fm,-8(%sp) ; -8(sp) = m | ||
402 | XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h | ||
403 | FSTD ht_temp,-24(%sp) ; -24(sp) = ht | ||
404 | XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l | ||
405 | FSTD lt_temp,-32(%sp) ; -32(sp) = lt | ||
406 | |||
407 | LDD -8(%sp),m_0 | ||
408 | LDD -16(%sp),m1_0 | ||
409 | ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; | ||
410 | LDD -24(%sp),ht_0 | ||
411 | LDD -32(%sp),lt_0 | ||
412 | |||
413 | CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) | ||
414 | ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) | ||
415 | |||
416 | EXTRD,U tmp_0,31,32,m_0 ; m>>32 | ||
417 | DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 | ||
418 | |||
419 | ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) | ||
420 | ADD lt_0,m1_0,lt_0 ; lt= lt+m1; | ||
421 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
422 | |||
423 | ADD %ret0,lt_0,lt_0 ; lt = lt + c; | ||
424 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
425 | |||
426 | COPY ht_0,%ret0 ; copy carry | ||
427 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
428 | |||
429 | bn_mul_words_exit | ||
430 | .EXIT | ||
431 | LDD -96(%sp),%r7 ; restore r7 | ||
432 | LDD -104(%sp),%r6 ; restore r6 | ||
433 | LDD -112(%sp),%r5 ; restore r5 | ||
434 | LDD -120(%sp),%r4 ; restore r4 | ||
435 | BVE (%rp) | ||
436 | LDD,MB -128(%sp),%r3 ; restore r3 | ||
437 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
438 | |||
439 | ;---------------------------------------------------------------------------- | ||
440 | ; | ||
441 | ;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) | ||
442 | ; | ||
443 | ; arg0 = rp | ||
444 | ; arg1 = ap | ||
445 | ; arg2 = num | ||
446 | ; | ||
447 | |||
448 | bn_sqr_words | ||
449 | .proc | ||
450 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
451 | .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
452 | .entry | ||
453 | .align 64 | ||
454 | |||
455 | STD %r3,0(%sp) ; save r3 | ||
456 | STD %r4,8(%sp) ; save r4 | ||
457 | NOP | ||
458 | STD %r5,16(%sp) ; save r5 | ||
459 | |||
460 | CMPIB,>= 0,num,bn_sqr_words_exit | ||
461 | LDO 128(%sp),%sp ; bump stack | ||
462 | |||
463 | ; | ||
464 | ; If only 1, the goto straight to cleanup | ||
465 | ; | ||
466 | CMPIB,= 1,num,bn_sqr_words_single_top | ||
467 | DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L | ||
468 | |||
469 | ; | ||
470 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
471 | ; | ||
472 | |||
473 | bn_sqr_words_unroll2 | ||
474 | FLDD 0(a_ptr),t_float_0 ; a[0] | ||
475 | FLDD 8(a_ptr),t_float_1 ; a[1] | ||
476 | XMPYU fht_0,flt_0,fm ; m[0] | ||
477 | XMPYU fht_1,flt_1,fm_1 ; m[1] | ||
478 | |||
479 | FSTD fm,-24(%sp) ; store m[0] | ||
480 | FSTD fm_1,-56(%sp) ; store m[1] | ||
481 | XMPYU flt_0,flt_0,lt_temp ; lt[0] | ||
482 | XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] | ||
483 | |||
484 | FSTD lt_temp,-16(%sp) ; store lt[0] | ||
485 | FSTD lt_temp_1,-48(%sp) ; store lt[1] | ||
486 | XMPYU fht_0,fht_0,ht_temp ; ht[0] | ||
487 | XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] | ||
488 | |||
489 | FSTD ht_temp,-8(%sp) ; store ht[0] | ||
490 | FSTD ht_temp_1,-40(%sp) ; store ht[1] | ||
491 | LDD -24(%sp),m_0 | ||
492 | LDD -56(%sp),m_1 | ||
493 | |||
494 | AND m_0,high_mask,tmp_0 ; m[0] & Mask | ||
495 | AND m_1,high_mask,tmp_1 ; m[1] & Mask | ||
496 | DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 | ||
497 | DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 | ||
498 | |||
499 | LDD -16(%sp),lt_0 | ||
500 | LDD -48(%sp),lt_1 | ||
501 | EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 | ||
502 | EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 | ||
503 | |||
504 | LDD -8(%sp),ht_0 | ||
505 | LDD -40(%sp),ht_1 | ||
506 | ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 | ||
507 | ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 | ||
508 | |||
509 | ADD lt_0,m_0,lt_0 ; lt = lt+m | ||
510 | ADD,DC ht_0,%r0,ht_0 ; ht[0]++ | ||
511 | STD lt_0,0(r_ptr) ; rp[0] = lt[0] | ||
512 | STD ht_0,8(r_ptr) ; rp[1] = ht[1] | ||
513 | |||
514 | ADD lt_1,m_1,lt_1 ; lt = lt+m | ||
515 | ADD,DC ht_1,%r0,ht_1 ; ht[1]++ | ||
516 | STD lt_1,16(r_ptr) ; rp[2] = lt[1] | ||
517 | STD ht_1,24(r_ptr) ; rp[3] = ht[1] | ||
518 | |||
519 | LDO -2(num),num ; num = num - 2; | ||
520 | LDO 16(a_ptr),a_ptr ; ap += 2 | ||
521 | CMPIB,<= 2,num,bn_sqr_words_unroll2 | ||
522 | LDO 32(r_ptr),r_ptr ; rp += 4 | ||
523 | |||
524 | CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? | ||
525 | |||
526 | ; | ||
527 | ; Top of loop aligned on 64-byte boundary | ||
528 | ; | ||
529 | bn_sqr_words_single_top | ||
530 | FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) | ||
531 | |||
532 | XMPYU fht_0,flt_0,fm ; m | ||
533 | FSTD fm,-24(%sp) ; store m | ||
534 | |||
535 | XMPYU flt_0,flt_0,lt_temp ; lt | ||
536 | FSTD lt_temp,-16(%sp) ; store lt | ||
537 | |||
538 | XMPYU fht_0,fht_0,ht_temp ; ht | ||
539 | FSTD ht_temp,-8(%sp) ; store ht | ||
540 | |||
541 | LDD -24(%sp),m_0 ; load m | ||
542 | AND m_0,high_mask,tmp_0 ; m & Mask | ||
543 | DEPD,Z m_0,30,31,m_0 ; m << 32+1 | ||
544 | LDD -16(%sp),lt_0 ; lt | ||
545 | |||
546 | LDD -8(%sp),ht_0 ; ht | ||
547 | EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 | ||
548 | ADD m_0,lt_0,lt_0 ; lt = lt+m | ||
549 | ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 | ||
550 | ADD,DC ht_0,%r0,ht_0 ; ht++ | ||
551 | |||
552 | STD lt_0,0(r_ptr) ; rp[0] = lt | ||
553 | STD ht_0,8(r_ptr) ; rp[1] = ht | ||
554 | |||
555 | bn_sqr_words_exit | ||
556 | .EXIT | ||
557 | LDD -112(%sp),%r5 ; restore r5 | ||
558 | LDD -120(%sp),%r4 ; restore r4 | ||
559 | BVE (%rp) | ||
560 | LDD,MB -128(%sp),%r3 | ||
561 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
562 | |||
563 | |||
564 | ;---------------------------------------------------------------------------- | ||
565 | ; | ||
566 | ;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
567 | ; | ||
568 | ; arg0 = rp | ||
569 | ; arg1 = ap | ||
570 | ; arg2 = bp | ||
571 | ; arg3 = n | ||
572 | |||
573 | t .reg %r22 | ||
574 | b .reg %r21 | ||
575 | l .reg %r20 | ||
576 | |||
577 | bn_add_words | ||
578 | .proc | ||
579 | .entry | ||
580 | .callinfo | ||
581 | .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
582 | .align 64 | ||
583 | |||
584 | CMPIB,>= 0,n,bn_add_words_exit | ||
585 | COPY %r0,%ret0 ; return 0 by default | ||
586 | |||
587 | ; | ||
588 | ; If 2 or more numbers do the loop | ||
589 | ; | ||
590 | CMPIB,= 1,n,bn_add_words_single_top | ||
591 | NOP | ||
592 | |||
593 | ; | ||
594 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
595 | ; | ||
596 | bn_add_words_unroll2 | ||
597 | LDD 0(a_ptr),t | ||
598 | LDD 0(b_ptr),b | ||
599 | ADD t,%ret0,t ; t = t+c; | ||
600 | ADD,DC %r0,%r0,%ret0 ; set c to carry | ||
601 | ADD t,b,l ; l = t + b[0] | ||
602 | ADD,DC %ret0,%r0,%ret0 ; c+= carry | ||
603 | STD l,0(r_ptr) | ||
604 | |||
605 | LDD 8(a_ptr),t | ||
606 | LDD 8(b_ptr),b | ||
607 | ADD t,%ret0,t ; t = t+c; | ||
608 | ADD,DC %r0,%r0,%ret0 ; set c to carry | ||
609 | ADD t,b,l ; l = t + b[0] | ||
610 | ADD,DC %ret0,%r0,%ret0 ; c+= carry | ||
611 | STD l,8(r_ptr) | ||
612 | |||
613 | LDO -2(n),n | ||
614 | LDO 16(a_ptr),a_ptr | ||
615 | LDO 16(b_ptr),b_ptr | ||
616 | |||
617 | CMPIB,<= 2,n,bn_add_words_unroll2 | ||
618 | LDO 16(r_ptr),r_ptr | ||
619 | |||
620 | CMPIB,=,N 0,n,bn_add_words_exit ; are we done? | ||
621 | |||
622 | bn_add_words_single_top | ||
623 | LDD 0(a_ptr),t | ||
624 | LDD 0(b_ptr),b | ||
625 | |||
626 | ADD t,%ret0,t ; t = t+c; | ||
627 | ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) | ||
628 | ADD t,b,l ; l = t + b[0] | ||
629 | ADD,DC %ret0,%r0,%ret0 ; c+= carry | ||
630 | STD l,0(r_ptr) | ||
631 | |||
632 | bn_add_words_exit | ||
633 | .EXIT | ||
634 | BVE (%rp) | ||
635 | NOP | ||
636 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
637 | |||
638 | ;---------------------------------------------------------------------------- | ||
639 | ; | ||
640 | ;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
641 | ; | ||
642 | ; arg0 = rp | ||
643 | ; arg1 = ap | ||
644 | ; arg2 = bp | ||
645 | ; arg3 = n | ||
646 | |||
647 | t1 .reg %r22 | ||
648 | t2 .reg %r21 | ||
649 | sub_tmp1 .reg %r20 | ||
650 | sub_tmp2 .reg %r19 | ||
651 | |||
652 | |||
653 | bn_sub_words | ||
654 | .proc | ||
655 | .callinfo | ||
656 | .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
657 | .entry | ||
658 | .align 64 | ||
659 | |||
660 | CMPIB,>= 0,n,bn_sub_words_exit | ||
661 | COPY %r0,%ret0 ; return 0 by default | ||
662 | |||
663 | ; | ||
664 | ; If 2 or more numbers do the loop | ||
665 | ; | ||
666 | CMPIB,= 1,n,bn_sub_words_single_top | ||
667 | NOP | ||
668 | |||
669 | ; | ||
670 | ; This loop is unrolled 2 times (64-byte aligned as well) | ||
671 | ; | ||
672 | bn_sub_words_unroll2 | ||
673 | LDD 0(a_ptr),t1 | ||
674 | LDD 0(b_ptr),t2 | ||
675 | SUB t1,t2,sub_tmp1 ; t3 = t1-t2; | ||
676 | SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; | ||
677 | |||
678 | CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 | ||
679 | LDO 1(%r0),sub_tmp2 | ||
680 | |||
681 | CMPCLR,*= t1,t2,%r0 | ||
682 | COPY sub_tmp2,%ret0 | ||
683 | STD sub_tmp1,0(r_ptr) | ||
684 | |||
685 | LDD 8(a_ptr),t1 | ||
686 | LDD 8(b_ptr),t2 | ||
687 | SUB t1,t2,sub_tmp1 ; t3 = t1-t2; | ||
688 | SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; | ||
689 | CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 | ||
690 | LDO 1(%r0),sub_tmp2 | ||
691 | |||
692 | CMPCLR,*= t1,t2,%r0 | ||
693 | COPY sub_tmp2,%ret0 | ||
694 | STD sub_tmp1,8(r_ptr) | ||
695 | |||
696 | LDO -2(n),n | ||
697 | LDO 16(a_ptr),a_ptr | ||
698 | LDO 16(b_ptr),b_ptr | ||
699 | |||
700 | CMPIB,<= 2,n,bn_sub_words_unroll2 | ||
701 | LDO 16(r_ptr),r_ptr | ||
702 | |||
703 | CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? | ||
704 | |||
705 | bn_sub_words_single_top | ||
706 | LDD 0(a_ptr),t1 | ||
707 | LDD 0(b_ptr),t2 | ||
708 | SUB t1,t2,sub_tmp1 ; t3 = t1-t2; | ||
709 | SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; | ||
710 | CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 | ||
711 | LDO 1(%r0),sub_tmp2 | ||
712 | |||
713 | CMPCLR,*= t1,t2,%r0 | ||
714 | COPY sub_tmp2,%ret0 | ||
715 | |||
716 | STD sub_tmp1,0(r_ptr) | ||
717 | |||
718 | bn_sub_words_exit | ||
719 | .EXIT | ||
720 | BVE (%rp) | ||
721 | NOP | ||
722 | .PROCEND ;in=23,24,25,26,29;out=28; | ||
723 | |||
724 | ;------------------------------------------------------------------------------ | ||
725 | ; | ||
726 | ; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) | ||
727 | ; | ||
728 | ; arg0 = h | ||
729 | ; arg1 = l | ||
730 | ; arg2 = d | ||
731 | ; | ||
732 | ; This is mainly just modified assembly from the compiler, thus the | ||
733 | ; lack of variable names. | ||
734 | ; | ||
735 | ;------------------------------------------------------------------------------ | ||
736 | bn_div_words | ||
737 | .proc | ||
738 | .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE | ||
739 | .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
740 | .IMPORT BN_num_bits_word,CODE,NO_RELOCATION | ||
741 | .IMPORT __iob,DATA | ||
742 | .IMPORT fprintf,CODE,NO_RELOCATION | ||
743 | .IMPORT abort,CODE,NO_RELOCATION | ||
744 | .IMPORT $$div2U,MILLICODE | ||
745 | .entry | ||
746 | STD %r2,-16(%r30) | ||
747 | STD,MA %r3,352(%r30) | ||
748 | STD %r4,-344(%r30) | ||
749 | STD %r5,-336(%r30) | ||
750 | STD %r6,-328(%r30) | ||
751 | STD %r7,-320(%r30) | ||
752 | STD %r8,-312(%r30) | ||
753 | STD %r9,-304(%r30) | ||
754 | STD %r10,-296(%r30) | ||
755 | |||
756 | STD %r27,-288(%r30) ; save gp | ||
757 | |||
758 | COPY %r24,%r3 ; save d | ||
759 | COPY %r26,%r4 ; save h (high 64-bits) | ||
760 | LDO -1(%r0),%ret0 ; return -1 by default | ||
761 | |||
762 | CMPB,*= %r0,%arg2,$D3 ; if (d == 0) | ||
763 | COPY %r25,%r5 ; save l (low 64-bits) | ||
764 | |||
765 | LDO -48(%r30),%r29 ; create ap | ||
766 | .CALL ;in=26,29;out=28; | ||
767 | B,L BN_num_bits_word,%r2 | ||
768 | COPY %r3,%r26 | ||
769 | LDD -288(%r30),%r27 ; restore gp | ||
770 | LDI 64,%r21 | ||
771 | |||
772 | CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) | ||
773 | COPY %ret0,%r24 ; i | ||
774 | MTSARCM %r24 | ||
775 | DEPDI,Z -1,%sar,1,%r29 | ||
776 | CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward) | ||
777 | |||
778 | $00000012 | ||
779 | SUBI 64,%r24,%r31 ; i = 64 - i; | ||
780 | CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d) | ||
781 | SUB %r4,%r3,%r4 ; h -= d | ||
782 | CMPB,= %r31,%r0,$0000001A ; if (i) | ||
783 | COPY %r0,%r10 ; ret = 0 | ||
784 | MTSARCM %r31 ; i to shift | ||
785 | DEPD,Z %r3,%sar,64,%r3 ; d <<= i; | ||
786 | SUBI 64,%r31,%r19 ; 64 - i; redundent | ||
787 | MTSAR %r19 ; (64 -i) to shift | ||
788 | SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) | ||
789 | MTSARCM %r31 ; i to shift | ||
790 | DEPD,Z %r5,%sar,64,%r5 ; l <<= i; | ||
791 | |||
792 | $0000001A | ||
793 | DEPDI,Z -1,31,32,%r19 | ||
794 | EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 | ||
795 | EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff | ||
796 | LDO 2(%r0),%r9 | ||
797 | STD %r3,-280(%r30) ; "d" to stack | ||
798 | |||
799 | $0000001C | ||
800 | DEPDI,Z -1,63,32,%r29 ; | ||
801 | EXTRD,U %r4,31,32,%r31 ; h >> 32 | ||
802 | CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div | ||
803 | COPY %r4,%r26 | ||
804 | EXTRD,U %r4,31,32,%r25 | ||
805 | COPY %r6,%r24 | ||
806 | .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) | ||
807 | B,L $$div2U,%r2 | ||
808 | EXTRD,U %r6,31,32,%r23 | ||
809 | DEPD %r28,31,32,%r29 | ||
810 | $D2 | ||
811 | STD %r29,-272(%r30) ; q | ||
812 | AND %r5,%r19,%r24 ; t & 0xffffffff00000000; | ||
813 | EXTRD,U %r24,31,32,%r24 ; ??? | ||
814 | FLDD -272(%r30),%fr7 ; q | ||
815 | FLDD -280(%r30),%fr8 ; d | ||
816 | XMPYU %fr8L,%fr7L,%fr10 | ||
817 | FSTD %fr10,-256(%r30) | ||
818 | XMPYU %fr8L,%fr7R,%fr22 | ||
819 | FSTD %fr22,-264(%r30) | ||
820 | XMPYU %fr8R,%fr7L,%fr11 | ||
821 | XMPYU %fr8R,%fr7R,%fr23 | ||
822 | FSTD %fr11,-232(%r30) | ||
823 | FSTD %fr23,-240(%r30) | ||
824 | LDD -256(%r30),%r28 | ||
825 | DEPD,Z %r28,31,32,%r2 | ||
826 | LDD -264(%r30),%r20 | ||
827 | ADD,L %r20,%r2,%r31 | ||
828 | LDD -232(%r30),%r22 | ||
829 | DEPD,Z %r22,31,32,%r22 | ||
830 | LDD -240(%r30),%r21 | ||
831 | B $00000024 ; enter loop | ||
832 | ADD,L %r21,%r22,%r23 | ||
833 | |||
834 | $0000002A | ||
835 | LDO -1(%r29),%r29 | ||
836 | SUB %r23,%r8,%r23 | ||
837 | $00000024 | ||
838 | SUB %r4,%r31,%r25 | ||
839 | AND %r25,%r19,%r26 | ||
840 | CMPB,*<>,N %r0,%r26,$00000046 ; (forward) | ||
841 | DEPD,Z %r25,31,32,%r20 | ||
842 | OR %r20,%r24,%r21 | ||
843 | CMPB,*<<,N %r21,%r23,$0000002A ;(backward) | ||
844 | SUB %r31,%r6,%r31 | ||
845 | ;-------------Break path--------------------- | ||
846 | |||
847 | $00000046 | ||
848 | DEPD,Z %r23,31,32,%r25 ;tl | ||
849 | EXTRD,U %r23,31,32,%r26 ;t | ||
850 | AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L | ||
851 | ADD,L %r31,%r26,%r31 ;th += t; | ||
852 | CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl) | ||
853 | LDO 1(%r31),%r31 ; th++; | ||
854 | CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward) | ||
855 | LDO -1(%r29),%r29 ;q--; | ||
856 | ADD,L %r4,%r3,%r4 ;h += d; | ||
857 | $00000036 | ||
858 | ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward) | ||
859 | SUB %r5,%r24,%r28 ; l -= tl; | ||
860 | SUB %r4,%r31,%r24 ; h -= th; | ||
861 | SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32)); | ||
862 | DEPD,Z %r29,31,32,%r10 ; ret = q<<32 | ||
863 | b $0000001C | ||
864 | DEPD,Z %r28,31,32,%r5 ; l = l << 32 | ||
865 | |||
866 | $D1 | ||
867 | OR %r10,%r29,%r28 ; ret |= q | ||
868 | $D3 | ||
869 | LDD -368(%r30),%r2 | ||
870 | $D0 | ||
871 | LDD -296(%r30),%r10 | ||
872 | LDD -304(%r30),%r9 | ||
873 | LDD -312(%r30),%r8 | ||
874 | LDD -320(%r30),%r7 | ||
875 | LDD -328(%r30),%r6 | ||
876 | LDD -336(%r30),%r5 | ||
877 | LDD -344(%r30),%r4 | ||
878 | BVE (%r2) | ||
879 | .EXIT | ||
880 | LDD,MB -352(%r30),%r3 | ||
881 | |||
882 | bn_div_err_case | ||
883 | MFIA %r6 | ||
884 | ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 | ||
885 | LDO R'bn_div_words-bn_div_err_case(%r1),%r6 | ||
886 | ADDIL LT'__iob,%r27,%r1 | ||
887 | LDD RT'__iob(%r1),%r26 | ||
888 | ADDIL L'C$4-bn_div_words,%r6,%r1 | ||
889 | LDO R'C$4-bn_div_words(%r1),%r25 | ||
890 | LDO 64(%r26),%r26 | ||
891 | .CALL ;in=24,25,26,29;out=28; | ||
892 | B,L fprintf,%r2 | ||
893 | LDO -48(%r30),%r29 | ||
894 | LDD -288(%r30),%r27 | ||
895 | .CALL ;in=29; | ||
896 | B,L abort,%r2 | ||
897 | LDO -48(%r30),%r29 | ||
898 | LDD -288(%r30),%r27 | ||
899 | B $D0 | ||
900 | LDD -368(%r30),%r2 | ||
901 | .PROCEND ;in=24,25,26,29;out=28; | ||
902 | |||
903 | ;---------------------------------------------------------------------------- | ||
904 | ; | ||
905 | ; Registers to hold 64-bit values to manipulate. The "L" part | ||
906 | ; of the register corresponds to the upper 32-bits, while the "R" | ||
907 | ; part corresponds to the lower 32-bits | ||
908 | ; | ||
909 | ; Note, that when using b6 and b7, the code must save these before | ||
910 | ; using them because they are callee save registers | ||
911 | ; | ||
912 | ; | ||
913 | ; Floating point registers to use to save values that | ||
914 | ; are manipulated. These don't collide with ftemp1-6 and | ||
915 | ; are all caller save registers | ||
916 | ; | ||
917 | a0 .reg %fr22 | ||
918 | a0L .reg %fr22L | ||
919 | a0R .reg %fr22R | ||
920 | |||
921 | a1 .reg %fr23 | ||
922 | a1L .reg %fr23L | ||
923 | a1R .reg %fr23R | ||
924 | |||
925 | a2 .reg %fr24 | ||
926 | a2L .reg %fr24L | ||
927 | a2R .reg %fr24R | ||
928 | |||
929 | a3 .reg %fr25 | ||
930 | a3L .reg %fr25L | ||
931 | a3R .reg %fr25R | ||
932 | |||
933 | a4 .reg %fr26 | ||
934 | a4L .reg %fr26L | ||
935 | a4R .reg %fr26R | ||
936 | |||
937 | a5 .reg %fr27 | ||
938 | a5L .reg %fr27L | ||
939 | a5R .reg %fr27R | ||
940 | |||
941 | a6 .reg %fr28 | ||
942 | a6L .reg %fr28L | ||
943 | a6R .reg %fr28R | ||
944 | |||
945 | a7 .reg %fr29 | ||
946 | a7L .reg %fr29L | ||
947 | a7R .reg %fr29R | ||
948 | |||
949 | b0 .reg %fr30 | ||
950 | b0L .reg %fr30L | ||
951 | b0R .reg %fr30R | ||
952 | |||
953 | b1 .reg %fr31 | ||
954 | b1L .reg %fr31L | ||
955 | b1R .reg %fr31R | ||
956 | |||
957 | ; | ||
958 | ; Temporary floating point variables, these are all caller save | ||
959 | ; registers | ||
960 | ; | ||
961 | ftemp1 .reg %fr4 | ||
962 | ftemp2 .reg %fr5 | ||
963 | ftemp3 .reg %fr6 | ||
964 | ftemp4 .reg %fr7 | ||
965 | |||
966 | ; | ||
967 | ; The B set of registers when used. | ||
968 | ; | ||
969 | |||
970 | b2 .reg %fr8 | ||
971 | b2L .reg %fr8L | ||
972 | b2R .reg %fr8R | ||
973 | |||
974 | b3 .reg %fr9 | ||
975 | b3L .reg %fr9L | ||
976 | b3R .reg %fr9R | ||
977 | |||
978 | b4 .reg %fr10 | ||
979 | b4L .reg %fr10L | ||
980 | b4R .reg %fr10R | ||
981 | |||
982 | b5 .reg %fr11 | ||
983 | b5L .reg %fr11L | ||
984 | b5R .reg %fr11R | ||
985 | |||
986 | b6 .reg %fr12 | ||
987 | b6L .reg %fr12L | ||
988 | b6R .reg %fr12R | ||
989 | |||
990 | b7 .reg %fr13 | ||
991 | b7L .reg %fr13L | ||
992 | b7R .reg %fr13R | ||
993 | |||
994 | c1 .reg %r21 ; only reg | ||
995 | temp1 .reg %r20 ; only reg | ||
996 | temp2 .reg %r19 ; only reg | ||
997 | temp3 .reg %r31 ; only reg | ||
998 | |||
999 | m1 .reg %r28 | ||
1000 | c2 .reg %r23 | ||
1001 | high_one .reg %r1 | ||
1002 | ht .reg %r6 | ||
1003 | lt .reg %r5 | ||
1004 | m .reg %r4 | ||
1005 | c3 .reg %r3 | ||
1006 | |||
1007 | SQR_ADD_C .macro A0L,A0R,C1,C2,C3 | ||
1008 | XMPYU A0L,A0R,ftemp1 ; m | ||
1009 | FSTD ftemp1,-24(%sp) ; store m | ||
1010 | |||
1011 | XMPYU A0R,A0R,ftemp2 ; lt | ||
1012 | FSTD ftemp2,-16(%sp) ; store lt | ||
1013 | |||
1014 | XMPYU A0L,A0L,ftemp3 ; ht | ||
1015 | FSTD ftemp3,-8(%sp) ; store ht | ||
1016 | |||
1017 | LDD -24(%sp),m ; load m | ||
1018 | AND m,high_mask,temp2 ; m & Mask | ||
1019 | DEPD,Z m,30,31,temp3 ; m << 32+1 | ||
1020 | LDD -16(%sp),lt ; lt | ||
1021 | |||
1022 | LDD -8(%sp),ht ; ht | ||
1023 | EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 | ||
1024 | ADD temp3,lt,lt ; lt = lt+m | ||
1025 | ADD,L ht,temp1,ht ; ht += temp1 | ||
1026 | ADD,DC ht,%r0,ht ; ht++ | ||
1027 | |||
1028 | ADD C1,lt,C1 ; c1=c1+lt | ||
1029 | ADD,DC ht,%r0,ht ; ht++ | ||
1030 | |||
1031 | ADD C2,ht,C2 ; c2=c2+ht | ||
1032 | ADD,DC C3,%r0,C3 ; c3++ | ||
1033 | .endm | ||
1034 | |||
1035 | SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 | ||
1036 | XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht | ||
1037 | FSTD ftemp1,-16(%sp) ; | ||
1038 | XMPYU A0R,A1L,ftemp2 ; m = bh*lt | ||
1039 | FSTD ftemp2,-8(%sp) ; | ||
1040 | XMPYU A0R,A1R,ftemp3 ; lt = bl*lt | ||
1041 | FSTD ftemp3,-32(%sp) | ||
1042 | XMPYU A0L,A1L,ftemp4 ; ht = bh*ht | ||
1043 | FSTD ftemp4,-24(%sp) ; | ||
1044 | |||
1045 | LDD -8(%sp),m ; r21 = m | ||
1046 | LDD -16(%sp),m1 ; r19 = m1 | ||
1047 | ADD,L m,m1,m ; m+m1 | ||
1048 | |||
1049 | DEPD,Z m,31,32,temp3 ; (m+m1<<32) | ||
1050 | LDD -24(%sp),ht ; r24 = ht | ||
1051 | |||
1052 | CMPCLR,*>>= m,m1,%r0 ; if (m < m1) | ||
1053 | ADD,L ht,high_one,ht ; ht+=high_one | ||
1054 | |||
1055 | EXTRD,U m,31,32,temp1 ; m >> 32 | ||
1056 | LDD -32(%sp),lt ; lt | ||
1057 | ADD,L ht,temp1,ht ; ht+= m>>32 | ||
1058 | ADD lt,temp3,lt ; lt = lt+m1 | ||
1059 | ADD,DC ht,%r0,ht ; ht++ | ||
1060 | |||
1061 | ADD ht,ht,ht ; ht=ht+ht; | ||
1062 | ADD,DC C3,%r0,C3 ; add in carry (c3++) | ||
1063 | |||
1064 | ADD lt,lt,lt ; lt=lt+lt; | ||
1065 | ADD,DC ht,%r0,ht ; add in carry (ht++) | ||
1066 | |||
1067 | ADD C1,lt,C1 ; c1=c1+lt | ||
1068 | ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) | ||
1069 | LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise | ||
1070 | |||
1071 | ADD C2,ht,C2 ; c2 = c2 + ht | ||
1072 | ADD,DC C3,%r0,C3 ; add in carry (c3++) | ||
1073 | .endm | ||
1074 | |||
1075 | ; | ||
1076 | ;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | ||
1077 | ; arg0 = r_ptr | ||
1078 | ; arg1 = a_ptr | ||
1079 | ; | ||
1080 | |||
1081 | bn_sqr_comba8 | ||
1082 | .PROC | ||
1083 | .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
1084 | .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
1085 | .ENTRY | ||
1086 | .align 64 | ||
1087 | |||
1088 | STD %r3,0(%sp) ; save r3 | ||
1089 | STD %r4,8(%sp) ; save r4 | ||
1090 | STD %r5,16(%sp) ; save r5 | ||
1091 | STD %r6,24(%sp) ; save r6 | ||
1092 | |||
1093 | ; | ||
1094 | ; Zero out carries | ||
1095 | ; | ||
1096 | COPY %r0,c1 | ||
1097 | COPY %r0,c2 | ||
1098 | COPY %r0,c3 | ||
1099 | |||
1100 | LDO 128(%sp),%sp ; bump stack | ||
1101 | DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L | ||
1102 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1103 | |||
1104 | ; | ||
1105 | ; Load up all of the values we are going to use | ||
1106 | ; | ||
1107 | FLDD 0(a_ptr),a0 | ||
1108 | FLDD 8(a_ptr),a1 | ||
1109 | FLDD 16(a_ptr),a2 | ||
1110 | FLDD 24(a_ptr),a3 | ||
1111 | FLDD 32(a_ptr),a4 | ||
1112 | FLDD 40(a_ptr),a5 | ||
1113 | FLDD 48(a_ptr),a6 | ||
1114 | FLDD 56(a_ptr),a7 | ||
1115 | |||
1116 | SQR_ADD_C a0L,a0R,c1,c2,c3 | ||
1117 | STD c1,0(r_ptr) ; r[0] = c1; | ||
1118 | COPY %r0,c1 | ||
1119 | |||
1120 | SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 | ||
1121 | STD c2,8(r_ptr) ; r[1] = c2; | ||
1122 | COPY %r0,c2 | ||
1123 | |||
1124 | SQR_ADD_C a1L,a1R,c3,c1,c2 | ||
1125 | SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 | ||
1126 | STD c3,16(r_ptr) ; r[2] = c3; | ||
1127 | COPY %r0,c3 | ||
1128 | |||
1129 | SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 | ||
1130 | SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 | ||
1131 | STD c1,24(r_ptr) ; r[3] = c1; | ||
1132 | COPY %r0,c1 | ||
1133 | |||
1134 | SQR_ADD_C a2L,a2R,c2,c3,c1 | ||
1135 | SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 | ||
1136 | SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 | ||
1137 | STD c2,32(r_ptr) ; r[4] = c2; | ||
1138 | COPY %r0,c2 | ||
1139 | |||
1140 | SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 | ||
1141 | SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 | ||
1142 | SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 | ||
1143 | STD c3,40(r_ptr) ; r[5] = c3; | ||
1144 | COPY %r0,c3 | ||
1145 | |||
1146 | SQR_ADD_C a3L,a3R,c1,c2,c3 | ||
1147 | SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 | ||
1148 | SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 | ||
1149 | SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 | ||
1150 | STD c1,48(r_ptr) ; r[6] = c1; | ||
1151 | COPY %r0,c1 | ||
1152 | |||
1153 | SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 | ||
1154 | SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 | ||
1155 | SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 | ||
1156 | SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 | ||
1157 | STD c2,56(r_ptr) ; r[7] = c2; | ||
1158 | COPY %r0,c2 | ||
1159 | |||
1160 | SQR_ADD_C a4L,a4R,c3,c1,c2 | ||
1161 | SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 | ||
1162 | SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 | ||
1163 | SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 | ||
1164 | STD c3,64(r_ptr) ; r[8] = c3; | ||
1165 | COPY %r0,c3 | ||
1166 | |||
1167 | SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 | ||
1168 | SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 | ||
1169 | SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 | ||
1170 | STD c1,72(r_ptr) ; r[9] = c1; | ||
1171 | COPY %r0,c1 | ||
1172 | |||
1173 | SQR_ADD_C a5L,a5R,c2,c3,c1 | ||
1174 | SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 | ||
1175 | SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 | ||
1176 | STD c2,80(r_ptr) ; r[10] = c2; | ||
1177 | COPY %r0,c2 | ||
1178 | |||
1179 | SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 | ||
1180 | SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 | ||
1181 | STD c3,88(r_ptr) ; r[11] = c3; | ||
1182 | COPY %r0,c3 | ||
1183 | |||
1184 | SQR_ADD_C a6L,a6R,c1,c2,c3 | ||
1185 | SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 | ||
1186 | STD c1,96(r_ptr) ; r[12] = c1; | ||
1187 | COPY %r0,c1 | ||
1188 | |||
1189 | SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 | ||
1190 | STD c2,104(r_ptr) ; r[13] = c2; | ||
1191 | COPY %r0,c2 | ||
1192 | |||
1193 | SQR_ADD_C a7L,a7R,c3,c1,c2 | ||
1194 | STD c3, 112(r_ptr) ; r[14] = c3 | ||
1195 | STD c1, 120(r_ptr) ; r[15] = c1 | ||
1196 | |||
1197 | .EXIT | ||
1198 | LDD -104(%sp),%r6 ; restore r6 | ||
1199 | LDD -112(%sp),%r5 ; restore r5 | ||
1200 | LDD -120(%sp),%r4 ; restore r4 | ||
1201 | BVE (%rp) | ||
1202 | LDD,MB -128(%sp),%r3 | ||
1203 | |||
1204 | .PROCEND | ||
1205 | |||
1206 | ;----------------------------------------------------------------------------- | ||
1207 | ; | ||
1208 | ;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | ||
1209 | ; arg0 = r_ptr | ||
1210 | ; arg1 = a_ptr | ||
1211 | ; | ||
1212 | |||
1213 | bn_sqr_comba4 | ||
1214 | .proc | ||
1215 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
1216 | .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
1217 | .entry | ||
1218 | .align 64 | ||
1219 | STD %r3,0(%sp) ; save r3 | ||
1220 | STD %r4,8(%sp) ; save r4 | ||
1221 | STD %r5,16(%sp) ; save r5 | ||
1222 | STD %r6,24(%sp) ; save r6 | ||
1223 | |||
1224 | ; | ||
1225 | ; Zero out carries | ||
1226 | ; | ||
1227 | COPY %r0,c1 | ||
1228 | COPY %r0,c2 | ||
1229 | COPY %r0,c3 | ||
1230 | |||
1231 | LDO 128(%sp),%sp ; bump stack | ||
1232 | DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L | ||
1233 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1234 | |||
1235 | ; | ||
1236 | ; Load up all of the values we are going to use | ||
1237 | ; | ||
1238 | FLDD 0(a_ptr),a0 | ||
1239 | FLDD 8(a_ptr),a1 | ||
1240 | FLDD 16(a_ptr),a2 | ||
1241 | FLDD 24(a_ptr),a3 | ||
1242 | FLDD 32(a_ptr),a4 | ||
1243 | FLDD 40(a_ptr),a5 | ||
1244 | FLDD 48(a_ptr),a6 | ||
1245 | FLDD 56(a_ptr),a7 | ||
1246 | |||
1247 | SQR_ADD_C a0L,a0R,c1,c2,c3 | ||
1248 | |||
1249 | STD c1,0(r_ptr) ; r[0] = c1; | ||
1250 | COPY %r0,c1 | ||
1251 | |||
1252 | SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 | ||
1253 | |||
1254 | STD c2,8(r_ptr) ; r[1] = c2; | ||
1255 | COPY %r0,c2 | ||
1256 | |||
1257 | SQR_ADD_C a1L,a1R,c3,c1,c2 | ||
1258 | SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 | ||
1259 | |||
1260 | STD c3,16(r_ptr) ; r[2] = c3; | ||
1261 | COPY %r0,c3 | ||
1262 | |||
1263 | SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 | ||
1264 | SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 | ||
1265 | |||
1266 | STD c1,24(r_ptr) ; r[3] = c1; | ||
1267 | COPY %r0,c1 | ||
1268 | |||
1269 | SQR_ADD_C a2L,a2R,c2,c3,c1 | ||
1270 | SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 | ||
1271 | |||
1272 | STD c2,32(r_ptr) ; r[4] = c2; | ||
1273 | COPY %r0,c2 | ||
1274 | |||
1275 | SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 | ||
1276 | STD c3,40(r_ptr) ; r[5] = c3; | ||
1277 | COPY %r0,c3 | ||
1278 | |||
1279 | SQR_ADD_C a3L,a3R,c1,c2,c3 | ||
1280 | STD c1,48(r_ptr) ; r[6] = c1; | ||
1281 | STD c2,56(r_ptr) ; r[7] = c2; | ||
1282 | |||
1283 | .EXIT | ||
1284 | LDD -104(%sp),%r6 ; restore r6 | ||
1285 | LDD -112(%sp),%r5 ; restore r5 | ||
1286 | LDD -120(%sp),%r4 ; restore r4 | ||
1287 | BVE (%rp) | ||
1288 | LDD,MB -128(%sp),%r3 | ||
1289 | |||
1290 | .PROCEND | ||
1291 | |||
1292 | |||
1293 | ;--------------------------------------------------------------------------- | ||
1294 | |||
1295 | MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 | ||
1296 | XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht | ||
1297 | FSTD ftemp1,-16(%sp) ; | ||
1298 | XMPYU A0R,B0L,ftemp2 ; m = bh*lt | ||
1299 | FSTD ftemp2,-8(%sp) ; | ||
1300 | XMPYU A0R,B0R,ftemp3 ; lt = bl*lt | ||
1301 | FSTD ftemp3,-32(%sp) | ||
1302 | XMPYU A0L,B0L,ftemp4 ; ht = bh*ht | ||
1303 | FSTD ftemp4,-24(%sp) ; | ||
1304 | |||
1305 | LDD -8(%sp),m ; r21 = m | ||
1306 | LDD -16(%sp),m1 ; r19 = m1 | ||
1307 | ADD,L m,m1,m ; m+m1 | ||
1308 | |||
1309 | DEPD,Z m,31,32,temp3 ; (m+m1<<32) | ||
1310 | LDD -24(%sp),ht ; r24 = ht | ||
1311 | |||
1312 | CMPCLR,*>>= m,m1,%r0 ; if (m < m1) | ||
1313 | ADD,L ht,high_one,ht ; ht+=high_one | ||
1314 | |||
1315 | EXTRD,U m,31,32,temp1 ; m >> 32 | ||
1316 | LDD -32(%sp),lt ; lt | ||
1317 | ADD,L ht,temp1,ht ; ht+= m>>32 | ||
1318 | ADD lt,temp3,lt ; lt = lt+m1 | ||
1319 | ADD,DC ht,%r0,ht ; ht++ | ||
1320 | |||
1321 | ADD C1,lt,C1 ; c1=c1+lt | ||
1322 | ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise | ||
1323 | |||
1324 | ADD C2,ht,C2 ; c2 = c2 + ht | ||
1325 | ADD,DC C3,%r0,C3 ; add in carry (c3++) | ||
1326 | .endm | ||
1327 | |||
1328 | |||
1329 | ; | ||
1330 | ;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
1331 | ; arg0 = r_ptr | ||
1332 | ; arg1 = a_ptr | ||
1333 | ; arg2 = b_ptr | ||
1334 | ; | ||
1335 | |||
1336 | bn_mul_comba8 | ||
1337 | .proc | ||
1338 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
1339 | .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
1340 | .entry | ||
1341 | .align 64 | ||
1342 | |||
1343 | STD %r3,0(%sp) ; save r3 | ||
1344 | STD %r4,8(%sp) ; save r4 | ||
1345 | STD %r5,16(%sp) ; save r5 | ||
1346 | STD %r6,24(%sp) ; save r6 | ||
1347 | FSTD %fr12,32(%sp) ; save r6 | ||
1348 | FSTD %fr13,40(%sp) ; save r7 | ||
1349 | |||
1350 | ; | ||
1351 | ; Zero out carries | ||
1352 | ; | ||
1353 | COPY %r0,c1 | ||
1354 | COPY %r0,c2 | ||
1355 | COPY %r0,c3 | ||
1356 | |||
1357 | LDO 128(%sp),%sp ; bump stack | ||
1358 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1359 | |||
1360 | ; | ||
1361 | ; Load up all of the values we are going to use | ||
1362 | ; | ||
1363 | FLDD 0(a_ptr),a0 | ||
1364 | FLDD 8(a_ptr),a1 | ||
1365 | FLDD 16(a_ptr),a2 | ||
1366 | FLDD 24(a_ptr),a3 | ||
1367 | FLDD 32(a_ptr),a4 | ||
1368 | FLDD 40(a_ptr),a5 | ||
1369 | FLDD 48(a_ptr),a6 | ||
1370 | FLDD 56(a_ptr),a7 | ||
1371 | |||
1372 | FLDD 0(b_ptr),b0 | ||
1373 | FLDD 8(b_ptr),b1 | ||
1374 | FLDD 16(b_ptr),b2 | ||
1375 | FLDD 24(b_ptr),b3 | ||
1376 | FLDD 32(b_ptr),b4 | ||
1377 | FLDD 40(b_ptr),b5 | ||
1378 | FLDD 48(b_ptr),b6 | ||
1379 | FLDD 56(b_ptr),b7 | ||
1380 | |||
1381 | MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 | ||
1382 | STD c1,0(r_ptr) | ||
1383 | COPY %r0,c1 | ||
1384 | |||
1385 | MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 | ||
1386 | MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 | ||
1387 | STD c2,8(r_ptr) | ||
1388 | COPY %r0,c2 | ||
1389 | |||
1390 | MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 | ||
1391 | MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 | ||
1392 | MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 | ||
1393 | STD c3,16(r_ptr) | ||
1394 | COPY %r0,c3 | ||
1395 | |||
1396 | MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 | ||
1397 | MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 | ||
1398 | MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 | ||
1399 | MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 | ||
1400 | STD c1,24(r_ptr) | ||
1401 | COPY %r0,c1 | ||
1402 | |||
1403 | MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 | ||
1404 | MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 | ||
1405 | MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 | ||
1406 | MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 | ||
1407 | MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 | ||
1408 | STD c2,32(r_ptr) | ||
1409 | COPY %r0,c2 | ||
1410 | |||
1411 | MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 | ||
1412 | MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 | ||
1413 | MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 | ||
1414 | MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 | ||
1415 | MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 | ||
1416 | MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 | ||
1417 | STD c3,40(r_ptr) | ||
1418 | COPY %r0,c3 | ||
1419 | |||
1420 | MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 | ||
1421 | MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 | ||
1422 | MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 | ||
1423 | MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 | ||
1424 | MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 | ||
1425 | MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 | ||
1426 | MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 | ||
1427 | STD c1,48(r_ptr) | ||
1428 | COPY %r0,c1 | ||
1429 | |||
1430 | MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 | ||
1431 | MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 | ||
1432 | MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 | ||
1433 | MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 | ||
1434 | MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 | ||
1435 | MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 | ||
1436 | MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 | ||
1437 | MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 | ||
1438 | STD c2,56(r_ptr) | ||
1439 | COPY %r0,c2 | ||
1440 | |||
1441 | MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 | ||
1442 | MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 | ||
1443 | MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 | ||
1444 | MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 | ||
1445 | MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 | ||
1446 | MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 | ||
1447 | MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 | ||
1448 | STD c3,64(r_ptr) | ||
1449 | COPY %r0,c3 | ||
1450 | |||
1451 | MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 | ||
1452 | MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 | ||
1453 | MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 | ||
1454 | MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 | ||
1455 | MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 | ||
1456 | MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 | ||
1457 | STD c1,72(r_ptr) | ||
1458 | COPY %r0,c1 | ||
1459 | |||
1460 | MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 | ||
1461 | MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 | ||
1462 | MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 | ||
1463 | MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 | ||
1464 | MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 | ||
1465 | STD c2,80(r_ptr) | ||
1466 | COPY %r0,c2 | ||
1467 | |||
1468 | MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 | ||
1469 | MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 | ||
1470 | MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 | ||
1471 | MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 | ||
1472 | STD c3,88(r_ptr) | ||
1473 | COPY %r0,c3 | ||
1474 | |||
1475 | MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 | ||
1476 | MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 | ||
1477 | MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 | ||
1478 | STD c1,96(r_ptr) | ||
1479 | COPY %r0,c1 | ||
1480 | |||
1481 | MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 | ||
1482 | MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 | ||
1483 | STD c2,104(r_ptr) | ||
1484 | COPY %r0,c2 | ||
1485 | |||
1486 | MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 | ||
1487 | STD c3,112(r_ptr) | ||
1488 | STD c1,120(r_ptr) | ||
1489 | |||
1490 | .EXIT | ||
1491 | FLDD -88(%sp),%fr13 | ||
1492 | FLDD -96(%sp),%fr12 | ||
1493 | LDD -104(%sp),%r6 ; restore r6 | ||
1494 | LDD -112(%sp),%r5 ; restore r5 | ||
1495 | LDD -120(%sp),%r4 ; restore r4 | ||
1496 | BVE (%rp) | ||
1497 | LDD,MB -128(%sp),%r3 | ||
1498 | |||
1499 | .PROCEND | ||
1500 | |||
1501 | ;----------------------------------------------------------------------------- | ||
1502 | ; | ||
1503 | ;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
1504 | ; arg0 = r_ptr | ||
1505 | ; arg1 = a_ptr | ||
1506 | ; arg2 = b_ptr | ||
1507 | ; | ||
1508 | |||
1509 | bn_mul_comba4 | ||
1510 | .proc | ||
1511 | .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE | ||
1512 | .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN | ||
1513 | .entry | ||
1514 | .align 64 | ||
1515 | |||
1516 | STD %r3,0(%sp) ; save r3 | ||
1517 | STD %r4,8(%sp) ; save r4 | ||
1518 | STD %r5,16(%sp) ; save r5 | ||
1519 | STD %r6,24(%sp) ; save r6 | ||
1520 | FSTD %fr12,32(%sp) ; save r6 | ||
1521 | FSTD %fr13,40(%sp) ; save r7 | ||
1522 | |||
1523 | ; | ||
1524 | ; Zero out carries | ||
1525 | ; | ||
1526 | COPY %r0,c1 | ||
1527 | COPY %r0,c2 | ||
1528 | COPY %r0,c3 | ||
1529 | |||
1530 | LDO 128(%sp),%sp ; bump stack | ||
1531 | DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 | ||
1532 | |||
1533 | ; | ||
1534 | ; Load up all of the values we are going to use | ||
1535 | ; | ||
1536 | FLDD 0(a_ptr),a0 | ||
1537 | FLDD 8(a_ptr),a1 | ||
1538 | FLDD 16(a_ptr),a2 | ||
1539 | FLDD 24(a_ptr),a3 | ||
1540 | |||
1541 | FLDD 0(b_ptr),b0 | ||
1542 | FLDD 8(b_ptr),b1 | ||
1543 | FLDD 16(b_ptr),b2 | ||
1544 | FLDD 24(b_ptr),b3 | ||
1545 | |||
1546 | MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 | ||
1547 | STD c1,0(r_ptr) | ||
1548 | COPY %r0,c1 | ||
1549 | |||
1550 | MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 | ||
1551 | MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 | ||
1552 | STD c2,8(r_ptr) | ||
1553 | COPY %r0,c2 | ||
1554 | |||
1555 | MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 | ||
1556 | MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 | ||
1557 | MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 | ||
1558 | STD c3,16(r_ptr) | ||
1559 | COPY %r0,c3 | ||
1560 | |||
1561 | MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 | ||
1562 | MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 | ||
1563 | MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 | ||
1564 | MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 | ||
1565 | STD c1,24(r_ptr) | ||
1566 | COPY %r0,c1 | ||
1567 | |||
1568 | MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 | ||
1569 | MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 | ||
1570 | MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 | ||
1571 | STD c2,32(r_ptr) | ||
1572 | COPY %r0,c2 | ||
1573 | |||
1574 | MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 | ||
1575 | MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 | ||
1576 | STD c3,40(r_ptr) | ||
1577 | COPY %r0,c3 | ||
1578 | |||
1579 | MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 | ||
1580 | STD c1,48(r_ptr) | ||
1581 | STD c2,56(r_ptr) | ||
1582 | |||
1583 | .EXIT | ||
1584 | FLDD -88(%sp),%fr13 | ||
1585 | FLDD -96(%sp),%fr12 | ||
1586 | LDD -104(%sp),%r6 ; restore r6 | ||
1587 | LDD -112(%sp),%r5 ; restore r5 | ||
1588 | LDD -120(%sp),%r4 ; restore r4 | ||
1589 | BVE (%rp) | ||
1590 | LDD,MB -128(%sp),%r3 | ||
1591 | |||
1592 | .PROCEND | ||
1593 | |||
1594 | |||
1595 | .SPACE $TEXT$ | ||
1596 | .SUBSPA $CODE$ | ||
1597 | .SPACE $PRIVATE$,SORT=16 | ||
1598 | .IMPORT $global$,DATA | ||
1599 | .SPACE $TEXT$ | ||
1600 | .SUBSPA $CODE$ | ||
1601 | .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16 | ||
1602 | C$4 | ||
1603 | .ALIGN 8 | ||
1604 | .STRINGZ "Division would overflow (%d)\n" | ||
1605 | .END | ||
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index 009b0eb685..1eb8395b25 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h | |||
@@ -59,7 +59,7 @@ | |||
59 | #ifndef HEADER_BN_H | 59 | #ifndef HEADER_BN_H |
60 | #define HEADER_BN_H | 60 | #define HEADER_BN_H |
61 | 61 | ||
62 | #ifndef WIN16 | 62 | #ifndef NO_FP_API |
63 | #include <stdio.h> /* FILE */ | 63 | #include <stdio.h> /* FILE */ |
64 | #endif | 64 | #endif |
65 | #include <openssl/opensslconf.h> | 65 | #include <openssl/opensslconf.h> |
@@ -233,7 +233,7 @@ typedef struct bignum_st | |||
233 | BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ | 233 | BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ |
234 | int top; /* Index of last used d +1. */ | 234 | int top; /* Index of last used d +1. */ |
235 | /* The next are internal book keeping for bn_expand. */ | 235 | /* The next are internal book keeping for bn_expand. */ |
236 | int max; /* Size of the d array. */ | 236 | int dmax; /* Size of the d array. */ |
237 | int neg; /* one if the number is negative */ | 237 | int neg; /* one if the number is negative */ |
238 | int flags; | 238 | int flags; |
239 | } BIGNUM; | 239 | } BIGNUM; |
@@ -364,6 +364,8 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, | |||
364 | const BIGNUM *m,BN_CTX *ctx); | 364 | const BIGNUM *m,BN_CTX *ctx); |
365 | int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p, | 365 | int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p, |
366 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); | 366 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); |
367 | int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p, | ||
368 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); | ||
367 | int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2, | 369 | int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2, |
368 | BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx); | 370 | BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx); |
369 | int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, | 371 | int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, |
@@ -433,9 +435,9 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, | |||
433 | 435 | ||
434 | /* library internal functions */ | 436 | /* library internal functions */ |
435 | 437 | ||
436 | #define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->max)?\ | 438 | #define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ |
437 | (a):bn_expand2((a),(bits)/BN_BITS2+1)) | 439 | (a):bn_expand2((a),(bits)/BN_BITS2+1)) |
438 | #define bn_wexpand(a,words) (((words) <= (a)->max)?(a):bn_expand2((a),(words))) | 440 | #define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) |
439 | BIGNUM *bn_expand2(BIGNUM *a, int words); | 441 | BIGNUM *bn_expand2(BIGNUM *a, int words); |
440 | 442 | ||
441 | #define bn_fix_top(a) \ | 443 | #define bn_fix_top(a) \ |
@@ -483,7 +485,9 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); | |||
483 | #define BN_F_BN_CTX_NEW 106 | 485 | #define BN_F_BN_CTX_NEW 106 |
484 | #define BN_F_BN_DIV 107 | 486 | #define BN_F_BN_DIV 107 |
485 | #define BN_F_BN_EXPAND2 108 | 487 | #define BN_F_BN_EXPAND2 108 |
488 | #define BN_F_BN_MOD_EXP2_MONT 118 | ||
486 | #define BN_F_BN_MOD_EXP_MONT 109 | 489 | #define BN_F_BN_MOD_EXP_MONT 109 |
490 | #define BN_F_BN_MOD_EXP_MONT_WORD 117 | ||
487 | #define BN_F_BN_MOD_INVERSE 110 | 491 | #define BN_F_BN_MOD_INVERSE 110 |
488 | #define BN_F_BN_MOD_MUL_RECIPROCAL 111 | 492 | #define BN_F_BN_MOD_MUL_RECIPROCAL 111 |
489 | #define BN_F_BN_MPI2BN 112 | 493 | #define BN_F_BN_MPI2BN 112 |
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 3329cc18e6..44e52a40db 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -227,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
227 | 227 | ||
228 | #else | 228 | #else |
229 | 229 | ||
230 | /* Divide h-l by d and return the result. */ | 230 | /* Divide h,l by d and return the result. */ |
231 | /* I need to test this some more :-( */ | 231 | /* I need to test this some more :-( */ |
232 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | 232 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
233 | { | 233 | { |
@@ -237,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
237 | if (d == 0) return(BN_MASK2); | 237 | if (d == 0) return(BN_MASK2); |
238 | 238 | ||
239 | i=BN_num_bits_word(d); | 239 | i=BN_num_bits_word(d); |
240 | if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) | 240 | assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); |
241 | { | 241 | |
242 | #if !defined(NO_STDIO) && !defined(WIN16) | ||
243 | fprintf(stderr,"Division would overflow (%d)\n",i); | ||
244 | #endif | ||
245 | abort(); | ||
246 | } | ||
247 | i=BN_BITS2-i; | 242 | i=BN_BITS2-i; |
248 | if (h >= d) h-=d; | 243 | if (h >= d) h-=d; |
249 | 244 | ||
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c index 1b1bb06046..2d287e6d1b 100644 --- a/src/lib/libcrypto/bn/bn_blind.c +++ b/src/lib/libcrypto/bn/bn_blind.c | |||
@@ -67,7 +67,7 @@ BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod) | |||
67 | bn_check_top(Ai); | 67 | bn_check_top(Ai); |
68 | bn_check_top(mod); | 68 | bn_check_top(mod); |
69 | 69 | ||
70 | if ((ret=(BN_BLINDING *)Malloc(sizeof(BN_BLINDING))) == NULL) | 70 | if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL) |
71 | { | 71 | { |
72 | BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE); | 72 | BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE); |
73 | return(NULL); | 73 | return(NULL); |
@@ -91,7 +91,7 @@ void BN_BLINDING_free(BN_BLINDING *r) | |||
91 | 91 | ||
92 | if (r->A != NULL) BN_free(r->A ); | 92 | if (r->A != NULL) BN_free(r->A ); |
93 | if (r->Ai != NULL) BN_free(r->Ai); | 93 | if (r->Ai != NULL) BN_free(r->Ai); |
94 | Free(r); | 94 | OPENSSL_free(r); |
95 | } | 95 | } |
96 | 96 | ||
97 | int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) | 97 | int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) |
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c index 46132fd180..b1a8d7571e 100644 --- a/src/lib/libcrypto/bn/bn_ctx.c +++ b/src/lib/libcrypto/bn/bn_ctx.c | |||
@@ -69,7 +69,7 @@ BN_CTX *BN_CTX_new(void) | |||
69 | { | 69 | { |
70 | BN_CTX *ret; | 70 | BN_CTX *ret; |
71 | 71 | ||
72 | ret=(BN_CTX *)Malloc(sizeof(BN_CTX)); | 72 | ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX)); |
73 | if (ret == NULL) | 73 | if (ret == NULL) |
74 | { | 74 | { |
75 | BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); | 75 | BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); |
@@ -102,7 +102,7 @@ void BN_CTX_free(BN_CTX *ctx) | |||
102 | for (i=0; i < BN_CTX_NUM; i++) | 102 | for (i=0; i < BN_CTX_NUM; i++) |
103 | BN_clear_free(&(ctx->bn[i])); | 103 | BN_clear_free(&(ctx->bn[i])); |
104 | if (ctx->flags & BN_FLG_MALLOCED) | 104 | if (ctx->flags & BN_FLG_MALLOCED) |
105 | Free(ctx); | 105 | OPENSSL_free(ctx); |
106 | } | 106 | } |
107 | 107 | ||
108 | void BN_CTX_start(BN_CTX *ctx) | 108 | void BN_CTX_start(BN_CTX *ctx) |
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index 07af1d3b44..c3772c243b 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c | |||
@@ -205,7 +205,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, | |||
205 | BN_init(&wnum); | 205 | BN_init(&wnum); |
206 | wnum.d= &(snum->d[loop]); | 206 | wnum.d= &(snum->d[loop]); |
207 | wnum.top= div_n; | 207 | wnum.top= div_n; |
208 | wnum.max= snum->max+1; /* a bit of a lie */ | 208 | wnum.dmax= snum->dmax+1; /* a bit of a lie */ |
209 | 209 | ||
210 | /* Get the top 2 words of sdiv */ | 210 | /* Get the top 2 words of sdiv */ |
211 | /* i=sdiv->top; */ | 211 | /* i=sdiv->top; */ |
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c index 988270bcf4..86550c4c21 100644 --- a/src/lib/libcrypto/bn/bn_err.c +++ b/src/lib/libcrypto/bn/bn_err.c | |||
@@ -76,7 +76,9 @@ static ERR_STRING_DATA BN_str_functs[]= | |||
76 | {ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, | 76 | {ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, |
77 | {ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, | 77 | {ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, |
78 | {ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, | 78 | {ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, |
79 | {ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"}, | ||
79 | {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, | 80 | {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, |
81 | {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"}, | ||
80 | {ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, | 82 | {ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, |
81 | {ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, | 83 | {ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, |
82 | {ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, | 84 | {ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, |
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index 0c11601675..d2c91628ac 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c | |||
@@ -55,18 +55,66 @@ | |||
55 | * copied and put under another distribution licence | 55 | * copied and put under another distribution licence |
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | /* ==================================================================== | ||
59 | * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. | ||
60 | * | ||
61 | * Redistribution and use in source and binary forms, with or without | ||
62 | * modification, are permitted provided that the following conditions | ||
63 | * are met: | ||
64 | * | ||
65 | * 1. Redistributions of source code must retain the above copyright | ||
66 | * notice, this list of conditions and the following disclaimer. | ||
67 | * | ||
68 | * 2. Redistributions in binary form must reproduce the above copyright | ||
69 | * notice, this list of conditions and the following disclaimer in | ||
70 | * the documentation and/or other materials provided with the | ||
71 | * distribution. | ||
72 | * | ||
73 | * 3. All advertising materials mentioning features or use of this | ||
74 | * software must display the following acknowledgment: | ||
75 | * "This product includes software developed by the OpenSSL Project | ||
76 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | ||
77 | * | ||
78 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | ||
79 | * endorse or promote products derived from this software without | ||
80 | * prior written permission. For written permission, please contact | ||
81 | * openssl-core@openssl.org. | ||
82 | * | ||
83 | * 5. Products derived from this software may not be called "OpenSSL" | ||
84 | * nor may "OpenSSL" appear in their names without prior written | ||
85 | * permission of the OpenSSL Project. | ||
86 | * | ||
87 | * 6. Redistributions of any form whatsoever must retain the following | ||
88 | * acknowledgment: | ||
89 | * "This product includes software developed by the OpenSSL Project | ||
90 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | ||
91 | * | ||
92 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | ||
93 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
94 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
95 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | ||
96 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
97 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
98 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
99 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
100 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
101 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
102 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
103 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
104 | * ==================================================================== | ||
105 | * | ||
106 | * This product includes cryptographic software written by Eric Young | ||
107 | * (eay@cryptsoft.com). This product includes software written by Tim | ||
108 | * Hudson (tjh@cryptsoft.com). | ||
109 | * | ||
110 | */ | ||
111 | |||
58 | 112 | ||
59 | #include <stdio.h> | 113 | #include <stdio.h> |
60 | #include "cryptlib.h" | 114 | #include "cryptlib.h" |
61 | #include "bn_lcl.h" | 115 | #include "bn_lcl.h" |
62 | #ifdef ATALLA | ||
63 | # include <alloca.h> | ||
64 | # include <atasi.h> | ||
65 | # include <assert.h> | ||
66 | # include <dlfcn.h> | ||
67 | #endif | ||
68 | 116 | ||
69 | #define TABLE_SIZE 16 | 117 | #define TABLE_SIZE 32 |
70 | 118 | ||
71 | /* slow but works */ | 119 | /* slow but works */ |
72 | int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) | 120 | int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) |
@@ -91,42 +139,6 @@ err: | |||
91 | return(r); | 139 | return(r); |
92 | } | 140 | } |
93 | 141 | ||
94 | #if 0 | ||
95 | /* this one works - simple but works */ | ||
96 | int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, BN_CTX *ctx) | ||
97 | { | ||
98 | int i,bits,ret=0; | ||
99 | BIGNUM *v,*tmp; | ||
100 | |||
101 | BN_CTX_start(ctx); | ||
102 | v = BN_CTX_get(ctx); | ||
103 | tmp = BN_CTX_get(ctx); | ||
104 | if (v == NULL || tmp == NULL) goto err; | ||
105 | |||
106 | if (BN_copy(v,a) == NULL) goto err; | ||
107 | bits=BN_num_bits(p); | ||
108 | |||
109 | if (BN_is_odd(p)) | ||
110 | { if (BN_copy(r,a) == NULL) goto err; } | ||
111 | else { if (!BN_one(r)) goto err; } | ||
112 | |||
113 | for (i=1; i<bits; i++) | ||
114 | { | ||
115 | if (!BN_sqr(tmp,v,ctx)) goto err; | ||
116 | if (!BN_mod(v,tmp,m,ctx)) goto err; | ||
117 | if (BN_is_bit_set(p,i)) | ||
118 | { | ||
119 | if (!BN_mul(tmp,r,v,ctx)) goto err; | ||
120 | if (!BN_mod(r,tmp,m,ctx)) goto err; | ||
121 | } | ||
122 | } | ||
123 | ret=1; | ||
124 | err: | ||
125 | BN_CTX_end(ctx); | ||
126 | return(ret); | ||
127 | } | ||
128 | |||
129 | #endif | ||
130 | 142 | ||
131 | /* this one works - simple but works */ | 143 | /* this one works - simple but works */ |
132 | int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx) | 144 | int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BN_CTX *ctx) |
@@ -163,172 +175,6 @@ err: | |||
163 | return(ret); | 175 | return(ret); |
164 | } | 176 | } |
165 | 177 | ||
166 | #ifdef ATALLA | ||
167 | |||
168 | /* | ||
169 | * This routine will dynamically check for the existance of an Atalla AXL-200 | ||
170 | * SSL accelerator module. If one is found, the variable | ||
171 | * asi_accelerator_present is set to 1 and the function pointers | ||
172 | * ptr_ASI_xxxxxx above will be initialized to corresponding ASI API calls. | ||
173 | */ | ||
174 | typedef int tfnASI_GetPerformanceStatistics(int reset_flag, | ||
175 | unsigned int *ret_buf); | ||
176 | typedef int tfnASI_GetHardwareConfig(long card_num, unsigned int *ret_buf); | ||
177 | typedef int tfnASI_RSAPrivateKeyOpFn(RSAPrivateKey * rsaKey, | ||
178 | unsigned char *output, | ||
179 | unsigned char *input, | ||
180 | unsigned int modulus_len); | ||
181 | |||
182 | static tfnASI_GetHardwareConfig *ptr_ASI_GetHardwareConfig; | ||
183 | static tfnASI_RSAPrivateKeyOpFn *ptr_ASI_RSAPrivateKeyOpFn; | ||
184 | static tfnASI_GetPerformanceStatistics *ptr_ASI_GetPerformanceStatistics; | ||
185 | static int asi_accelerator_present; | ||
186 | static int tried_atalla; | ||
187 | |||
188 | void atalla_initialize_accelerator_handle(void) | ||
189 | { | ||
190 | void *dl_handle; | ||
191 | int status; | ||
192 | unsigned int config_buf[1024]; | ||
193 | static int tested; | ||
194 | |||
195 | if(tested) | ||
196 | return; | ||
197 | |||
198 | tested=1; | ||
199 | |||
200 | bzero((void *)config_buf, 1024); | ||
201 | |||
202 | /* | ||
203 | * Check to see if the library is present on the system | ||
204 | */ | ||
205 | dl_handle = dlopen("atasi.so", RTLD_NOW); | ||
206 | if (dl_handle == (void *) NULL) | ||
207 | { | ||
208 | /* printf("atasi.so library is not present on the system\n"); | ||
209 | printf("No HW acceleration available\n");*/ | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * The library is present. Now we'll check to insure that the | ||
215 | * LDM is up and running. First we'll get the address of the | ||
216 | * function in the atasi library that we need to see if the | ||
217 | * LDM is operating. | ||
218 | */ | ||
219 | |||
220 | ptr_ASI_GetHardwareConfig = | ||
221 | (tfnASI_GetHardwareConfig *)dlsym(dl_handle,"ASI_GetHardwareConfig"); | ||
222 | |||
223 | if (ptr_ASI_GetHardwareConfig) | ||
224 | { | ||
225 | /* | ||
226 | * We found the call, now we'll get our config | ||
227 | * status. If we get a non 0 result, the LDM is not | ||
228 | * running and we cannot use the Atalla ASI * | ||
229 | * library. | ||
230 | */ | ||
231 | status = (*ptr_ASI_GetHardwareConfig)(0L, config_buf); | ||
232 | if (status != 0) | ||
233 | { | ||
234 | printf("atasi.so library is present but not initialized\n"); | ||
235 | printf("No HW acceleration available\n"); | ||
236 | return; | ||
237 | } | ||
238 | } | ||
239 | else | ||
240 | { | ||
241 | /* printf("We found the library, but not the function. Very Strange!\n");*/ | ||
242 | return ; | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * It looks like we have acceleration capabilities. Load up the | ||
247 | * pointers to our ASI API calls. | ||
248 | */ | ||
249 | ptr_ASI_RSAPrivateKeyOpFn= | ||
250 | (tfnASI_RSAPrivateKeyOpFn *)dlsym(dl_handle, "ASI_RSAPrivateKeyOpFn"); | ||
251 | if (ptr_ASI_RSAPrivateKeyOpFn == NULL) | ||
252 | { | ||
253 | /* printf("We found the library, but no RSA function. Very Strange!\n");*/ | ||
254 | return; | ||
255 | } | ||
256 | |||
257 | ptr_ASI_GetPerformanceStatistics = | ||
258 | (tfnASI_GetPerformanceStatistics *)dlsym(dl_handle, "ASI_GetPerformanceStatistics"); | ||
259 | if (ptr_ASI_GetPerformanceStatistics == NULL) | ||
260 | { | ||
261 | /* printf("We found the library, but no stat function. Very Strange!\n");*/ | ||
262 | return; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Indicate that acceleration is available | ||
267 | */ | ||
268 | asi_accelerator_present = 1; | ||
269 | |||
270 | /* printf("This system has acceleration!\n");*/ | ||
271 | |||
272 | return; | ||
273 | } | ||
274 | |||
275 | /* make sure this only gets called once when bn_mod_exp calls bn_mod_exp_mont */ | ||
276 | int BN_mod_exp_atalla(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m) | ||
277 | { | ||
278 | unsigned char *abin; | ||
279 | unsigned char *pbin; | ||
280 | unsigned char *mbin; | ||
281 | unsigned char *rbin; | ||
282 | int an,pn,mn,ret; | ||
283 | RSAPrivateKey keydata; | ||
284 | |||
285 | atalla_initialize_accelerator_handle(); | ||
286 | if(!asi_accelerator_present) | ||
287 | return 0; | ||
288 | |||
289 | |||
290 | /* We should be able to run without size testing */ | ||
291 | # define ASIZE 128 | ||
292 | an=BN_num_bytes(a); | ||
293 | pn=BN_num_bytes(p); | ||
294 | mn=BN_num_bytes(m); | ||
295 | |||
296 | if(an <= ASIZE && pn <= ASIZE && mn <= ASIZE) | ||
297 | { | ||
298 | int size=mn; | ||
299 | |||
300 | assert(an <= mn); | ||
301 | abin=alloca(size); | ||
302 | memset(abin,'\0',mn); | ||
303 | BN_bn2bin(a,abin+size-an); | ||
304 | |||
305 | pbin=alloca(pn); | ||
306 | BN_bn2bin(p,pbin); | ||
307 | |||
308 | mbin=alloca(size); | ||
309 | memset(mbin,'\0',mn); | ||
310 | BN_bn2bin(m,mbin+size-mn); | ||
311 | |||
312 | rbin=alloca(size); | ||
313 | |||
314 | memset(&keydata,'\0',sizeof keydata); | ||
315 | keydata.privateExponent.data=pbin; | ||
316 | keydata.privateExponent.len=pn; | ||
317 | keydata.modulus.data=mbin; | ||
318 | keydata.modulus.len=size; | ||
319 | |||
320 | ret=(*ptr_ASI_RSAPrivateKeyOpFn)(&keydata,rbin,abin,keydata.modulus.len); | ||
321 | /*fprintf(stderr,"!%s\n",BN_bn2hex(a));*/ | ||
322 | if(!ret) | ||
323 | { | ||
324 | BN_bin2bn(rbin,keydata.modulus.len,r); | ||
325 | /*fprintf(stderr,"?%s\n",BN_bn2hex(r));*/ | ||
326 | return 1; | ||
327 | } | ||
328 | } | ||
329 | return 0; | ||
330 | } | ||
331 | #endif /* def ATALLA */ | ||
332 | 178 | ||
333 | int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | 179 | int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
334 | BN_CTX *ctx) | 180 | BN_CTX *ctx) |
@@ -339,13 +185,6 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
339 | bn_check_top(p); | 185 | bn_check_top(p); |
340 | bn_check_top(m); | 186 | bn_check_top(m); |
341 | 187 | ||
342 | #ifdef ATALLA | ||
343 | if(BN_mod_exp_atalla(r,a,p,m)) | ||
344 | return 1; | ||
345 | /* If it fails, try the other methods (but don't try atalla again) */ | ||
346 | tried_atalla=1; | ||
347 | #endif | ||
348 | |||
349 | #ifdef MONT_MUL_MOD | 188 | #ifdef MONT_MUL_MOD |
350 | /* I have finally been able to take out this pre-condition of | 189 | /* I have finally been able to take out this pre-condition of |
351 | * the top bit being set. It was caused by an error in BN_div | 190 | * the top bit being set. It was caused by an error in BN_div |
@@ -354,7 +193,15 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
354 | /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ | 193 | /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ |
355 | 194 | ||
356 | if (BN_is_odd(m)) | 195 | if (BN_is_odd(m)) |
357 | { ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); } | 196 | { |
197 | if (a->top == 1) | ||
198 | { | ||
199 | BN_ULONG A = a->d[0]; | ||
200 | ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); | ||
201 | } | ||
202 | else | ||
203 | ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); | ||
204 | } | ||
358 | else | 205 | else |
359 | #endif | 206 | #endif |
360 | #ifdef RECP_MUL_MOD | 207 | #ifdef RECP_MUL_MOD |
@@ -363,14 +210,10 @@ int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
363 | { ret=BN_mod_exp_simple(r,a,p,m,ctx); } | 210 | { ret=BN_mod_exp_simple(r,a,p,m,ctx); } |
364 | #endif | 211 | #endif |
365 | 212 | ||
366 | #ifdef ATALLA | ||
367 | tried_atalla=0; | ||
368 | #endif | ||
369 | |||
370 | return(ret); | 213 | return(ret); |
371 | } | 214 | } |
372 | 215 | ||
373 | /* #ifdef RECP_MUL_MOD */ | 216 | |
374 | int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, | 217 | int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
375 | const BIGNUM *m, BN_CTX *ctx) | 218 | const BIGNUM *m, BN_CTX *ctx) |
376 | { | 219 | { |
@@ -398,27 +241,22 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, | |||
398 | ts=1; | 241 | ts=1; |
399 | 242 | ||
400 | if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ | 243 | if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ |
401 | if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx)) | ||
402 | goto err; /* 2 */ | ||
403 | |||
404 | if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ | ||
405 | window=1; | ||
406 | else if (bits >= 256) | ||
407 | window=5; /* max size of window */ | ||
408 | else if (bits >= 128) | ||
409 | window=4; | ||
410 | else | ||
411 | window=3; | ||
412 | 244 | ||
413 | j=1<<(window-1); | 245 | window = BN_window_bits_for_exponent_size(bits); |
414 | for (i=1; i<j; i++) | 246 | if (window > 1) |
415 | { | 247 | { |
416 | BN_init(&val[i]); | 248 | if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx)) |
417 | if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx)) | 249 | goto err; /* 2 */ |
418 | goto err; | 250 | j=1<<(window-1); |
251 | for (i=1; i<j; i++) | ||
252 | { | ||
253 | BN_init(&val[i]); | ||
254 | if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx)) | ||
255 | goto err; | ||
256 | } | ||
257 | ts=i; | ||
419 | } | 258 | } |
420 | ts=i; | 259 | |
421 | |||
422 | start=1; /* This is used to avoid multiplication etc | 260 | start=1; /* This is used to avoid multiplication etc |
423 | * when there is only the value '1' in the | 261 | * when there is only the value '1' in the |
424 | * buffer. */ | 262 | * buffer. */ |
@@ -485,9 +323,8 @@ err: | |||
485 | BN_RECP_CTX_free(&recp); | 323 | BN_RECP_CTX_free(&recp); |
486 | return(ret); | 324 | return(ret); |
487 | } | 325 | } |
488 | /* #endif */ | ||
489 | 326 | ||
490 | /* #ifdef MONT_MUL_MOD */ | 327 | |
491 | int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, | 328 | int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, |
492 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | 329 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) |
493 | { | 330 | { |
@@ -502,12 +339,6 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, | |||
502 | bn_check_top(p); | 339 | bn_check_top(p); |
503 | bn_check_top(m); | 340 | bn_check_top(m); |
504 | 341 | ||
505 | #ifdef ATALLA | ||
506 | if(!tried_atalla && BN_mod_exp_atalla(rr,a,p,m)) | ||
507 | return 1; | ||
508 | /* If it fails, try the other methods */ | ||
509 | #endif | ||
510 | |||
511 | if (!(m->d[0] & 1)) | 342 | if (!(m->d[0] & 1)) |
512 | { | 343 | { |
513 | BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); | 344 | BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); |
@@ -527,11 +358,9 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, | |||
527 | /* If this is not done, things will break in the montgomery | 358 | /* If this is not done, things will break in the montgomery |
528 | * part */ | 359 | * part */ |
529 | 360 | ||
530 | #if 1 | ||
531 | if (in_mont != NULL) | 361 | if (in_mont != NULL) |
532 | mont=in_mont; | 362 | mont=in_mont; |
533 | else | 363 | else |
534 | #endif | ||
535 | { | 364 | { |
536 | if ((mont=BN_MONT_CTX_new()) == NULL) goto err; | 365 | if ((mont=BN_MONT_CTX_new()) == NULL) goto err; |
537 | if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; | 366 | if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; |
@@ -541,31 +370,27 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, | |||
541 | ts=1; | 370 | ts=1; |
542 | if (BN_ucmp(a,m) >= 0) | 371 | if (BN_ucmp(a,m) >= 0) |
543 | { | 372 | { |
544 | BN_mod(&(val[0]),a,m,ctx); | 373 | if (!BN_mod(&(val[0]),a,m,ctx)) |
374 | goto err; | ||
545 | aa= &(val[0]); | 375 | aa= &(val[0]); |
546 | } | 376 | } |
547 | else | 377 | else |
548 | aa=a; | 378 | aa=a; |
549 | if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ | 379 | if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ |
550 | if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */ | ||
551 | |||
552 | if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ | ||
553 | window=1; | ||
554 | else if (bits >= 256) | ||
555 | window=5; /* max size of window */ | ||
556 | else if (bits >= 128) | ||
557 | window=4; | ||
558 | else | ||
559 | window=3; | ||
560 | 380 | ||
561 | j=1<<(window-1); | 381 | window = BN_window_bits_for_exponent_size(bits); |
562 | for (i=1; i<j; i++) | 382 | if (window > 1) |
563 | { | 383 | { |
564 | BN_init(&(val[i])); | 384 | if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */ |
565 | if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx)) | 385 | j=1<<(window-1); |
566 | goto err; | 386 | for (i=1; i<j; i++) |
387 | { | ||
388 | BN_init(&(val[i])); | ||
389 | if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx)) | ||
390 | goto err; | ||
391 | } | ||
392 | ts=i; | ||
567 | } | 393 | } |
568 | ts=i; | ||
569 | 394 | ||
570 | start=1; /* This is used to avoid multiplication etc | 395 | start=1; /* This is used to avoid multiplication etc |
571 | * when there is only the value '1' in the | 396 | * when there is only the value '1' in the |
@@ -574,7 +399,7 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, | |||
574 | wstart=bits-1; /* The top bit of the window */ | 399 | wstart=bits-1; /* The top bit of the window */ |
575 | wend=0; /* The bottom bit of the window */ | 400 | wend=0; /* The bottom bit of the window */ |
576 | 401 | ||
577 | if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; | 402 | if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; |
578 | for (;;) | 403 | for (;;) |
579 | { | 404 | { |
580 | if (BN_is_bit_set(p,wstart) == 0) | 405 | if (BN_is_bit_set(p,wstart) == 0) |
@@ -626,7 +451,7 @@ int BN_mod_exp_mont(BIGNUM *rr, BIGNUM *a, const BIGNUM *p, | |||
626 | start=0; | 451 | start=0; |
627 | if (wstart < 0) break; | 452 | if (wstart < 0) break; |
628 | } | 453 | } |
629 | BN_from_montgomery(rr,r,mont,ctx); | 454 | if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; |
630 | ret=1; | 455 | ret=1; |
631 | err: | 456 | err: |
632 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); | 457 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); |
@@ -635,7 +460,134 @@ err: | |||
635 | BN_clear_free(&(val[i])); | 460 | BN_clear_free(&(val[i])); |
636 | return(ret); | 461 | return(ret); |
637 | } | 462 | } |
638 | /* #endif */ | 463 | |
464 | int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, | ||
465 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
466 | { | ||
467 | BN_MONT_CTX *mont = NULL; | ||
468 | int b, bits, ret=0; | ||
469 | int r_is_one; | ||
470 | BN_ULONG w, next_w; | ||
471 | BIGNUM *d, *r, *t; | ||
472 | BIGNUM *swap_tmp; | ||
473 | #define BN_MOD_MUL_WORD(r, w, m) \ | ||
474 | (BN_mul_word(r, (w)) && \ | ||
475 | (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ | ||
476 | (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) | ||
477 | /* BN_MOD_MUL_WORD is only used with 'w' large, | ||
478 | * so the BN_ucmp test is probably more overhead | ||
479 | * than always using BN_mod (which uses BN_copy if | ||
480 | * a similar test returns true). */ | ||
481 | #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ | ||
482 | (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) | ||
483 | |||
484 | bn_check_top(p); | ||
485 | bn_check_top(m); | ||
486 | |||
487 | if (!(m->d[0] & 1)) | ||
488 | { | ||
489 | BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); | ||
490 | return(0); | ||
491 | } | ||
492 | bits = BN_num_bits(p); | ||
493 | if (bits == 0) | ||
494 | { | ||
495 | BN_one(rr); | ||
496 | return(1); | ||
497 | } | ||
498 | BN_CTX_start(ctx); | ||
499 | d = BN_CTX_get(ctx); | ||
500 | r = BN_CTX_get(ctx); | ||
501 | t = BN_CTX_get(ctx); | ||
502 | if (d == NULL || r == NULL || t == NULL) goto err; | ||
503 | |||
504 | if (in_mont != NULL) | ||
505 | mont=in_mont; | ||
506 | else | ||
507 | { | ||
508 | if ((mont = BN_MONT_CTX_new()) == NULL) goto err; | ||
509 | if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; | ||
510 | } | ||
511 | |||
512 | r_is_one = 1; /* except for Montgomery factor */ | ||
513 | |||
514 | /* bits-1 >= 0 */ | ||
515 | |||
516 | /* The result is accumulated in the product r*w. */ | ||
517 | w = a; /* bit 'bits-1' of 'p' is always set */ | ||
518 | for (b = bits-2; b >= 0; b--) | ||
519 | { | ||
520 | /* First, square r*w. */ | ||
521 | next_w = w*w; | ||
522 | if ((next_w/w) != w) /* overflow */ | ||
523 | { | ||
524 | if (r_is_one) | ||
525 | { | ||
526 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; | ||
527 | r_is_one = 0; | ||
528 | } | ||
529 | else | ||
530 | { | ||
531 | if (!BN_MOD_MUL_WORD(r, w, m)) goto err; | ||
532 | } | ||
533 | next_w = 1; | ||
534 | } | ||
535 | w = next_w; | ||
536 | if (!r_is_one) | ||
537 | { | ||
538 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; | ||
539 | } | ||
540 | |||
541 | /* Second, multiply r*w by 'a' if exponent bit is set. */ | ||
542 | if (BN_is_bit_set(p, b)) | ||
543 | { | ||
544 | next_w = w*a; | ||
545 | if ((next_w/a) != w) /* overflow */ | ||
546 | { | ||
547 | if (r_is_one) | ||
548 | { | ||
549 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; | ||
550 | r_is_one = 0; | ||
551 | } | ||
552 | else | ||
553 | { | ||
554 | if (!BN_MOD_MUL_WORD(r, w, m)) goto err; | ||
555 | } | ||
556 | next_w = a; | ||
557 | } | ||
558 | w = next_w; | ||
559 | } | ||
560 | } | ||
561 | |||
562 | /* Finally, set r:=r*w. */ | ||
563 | if (w != 1) | ||
564 | { | ||
565 | if (r_is_one) | ||
566 | { | ||
567 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; | ||
568 | r_is_one = 0; | ||
569 | } | ||
570 | else | ||
571 | { | ||
572 | if (!BN_MOD_MUL_WORD(r, w, m)) goto err; | ||
573 | } | ||
574 | } | ||
575 | |||
576 | if (r_is_one) /* can happen only if a == 1*/ | ||
577 | { | ||
578 | if (!BN_one(rr)) goto err; | ||
579 | } | ||
580 | else | ||
581 | { | ||
582 | if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; | ||
583 | } | ||
584 | ret = 1; | ||
585 | err: | ||
586 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); | ||
587 | BN_CTX_end(ctx); | ||
588 | return(ret); | ||
589 | } | ||
590 | |||
639 | 591 | ||
640 | /* The old fallback, simple version :-) */ | 592 | /* The old fallback, simple version :-) */ |
641 | int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, | 593 | int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, |
@@ -660,26 +612,21 @@ int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m, | |||
660 | BN_init(&(val[0])); | 612 | BN_init(&(val[0])); |
661 | ts=1; | 613 | ts=1; |
662 | if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ | 614 | if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */ |
663 | if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx)) | ||
664 | goto err; /* 2 */ | ||
665 | |||
666 | if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ | ||
667 | window=1; | ||
668 | else if (bits >= 256) | ||
669 | window=5; /* max size of window */ | ||
670 | else if (bits >= 128) | ||
671 | window=4; | ||
672 | else | ||
673 | window=3; | ||
674 | 615 | ||
675 | j=1<<(window-1); | 616 | window = BN_window_bits_for_exponent_size(bits); |
676 | for (i=1; i<j; i++) | 617 | if (window > 1) |
677 | { | 618 | { |
678 | BN_init(&(val[i])); | 619 | if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx)) |
679 | if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx)) | 620 | goto err; /* 2 */ |
680 | goto err; | 621 | j=1<<(window-1); |
622 | for (i=1; i<j; i++) | ||
623 | { | ||
624 | BN_init(&(val[i])); | ||
625 | if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx)) | ||
626 | goto err; | ||
627 | } | ||
628 | ts=i; | ||
681 | } | 629 | } |
682 | ts=i; | ||
683 | 630 | ||
684 | start=1; /* This is used to avoid multiplication etc | 631 | start=1; /* This is used to avoid multiplication etc |
685 | * when there is only the value '1' in the | 632 | * when there is only the value '1' in the |
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c index 4f4e9e3299..29029f4c72 100644 --- a/src/lib/libcrypto/bn/bn_exp2.c +++ b/src/lib/libcrypto/bn/bn_exp2.c | |||
@@ -1,27 +1,128 @@ | |||
1 | /* crypto/bn/bn_exp2.c */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | /* ==================================================================== | ||
59 | * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. | ||
60 | * | ||
61 | * Redistribution and use in source and binary forms, with or without | ||
62 | * modification, are permitted provided that the following conditions | ||
63 | * are met: | ||
64 | * | ||
65 | * 1. Redistributions of source code must retain the above copyright | ||
66 | * notice, this list of conditions and the following disclaimer. | ||
67 | * | ||
68 | * 2. Redistributions in binary form must reproduce the above copyright | ||
69 | * notice, this list of conditions and the following disclaimer in | ||
70 | * the documentation and/or other materials provided with the | ||
71 | * distribution. | ||
72 | * | ||
73 | * 3. All advertising materials mentioning features or use of this | ||
74 | * software must display the following acknowledgment: | ||
75 | * "This product includes software developed by the OpenSSL Project | ||
76 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | ||
77 | * | ||
78 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | ||
79 | * endorse or promote products derived from this software without | ||
80 | * prior written permission. For written permission, please contact | ||
81 | * openssl-core@openssl.org. | ||
82 | * | ||
83 | * 5. Products derived from this software may not be called "OpenSSL" | ||
84 | * nor may "OpenSSL" appear in their names without prior written | ||
85 | * permission of the OpenSSL Project. | ||
86 | * | ||
87 | * 6. Redistributions of any form whatsoever must retain the following | ||
88 | * acknowledgment: | ||
89 | * "This product includes software developed by the OpenSSL Project | ||
90 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | ||
91 | * | ||
92 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | ||
93 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
94 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
95 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | ||
96 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
97 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
98 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
99 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
100 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
101 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
102 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
103 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
104 | * ==================================================================== | ||
105 | * | ||
106 | * This product includes cryptographic software written by Eric Young | ||
107 | * (eay@cryptsoft.com). This product includes software written by Tim | ||
108 | * Hudson (tjh@cryptsoft.com). | ||
109 | * | ||
110 | */ | ||
111 | |||
1 | #include <stdio.h> | 112 | #include <stdio.h> |
2 | #include "cryptlib.h" | 113 | #include "cryptlib.h" |
3 | #include "bn_lcl.h" | 114 | #include "bn_lcl.h" |
4 | 115 | ||
5 | /* I've done some timing with different table sizes. | 116 | #define TABLE_SIZE 32 |
6 | * The main hassle is that even with bits set at 3, this requires | ||
7 | * 63 BIGNUMs to store the pre-calculated values. | ||
8 | * 512 1024 | ||
9 | * bits=1 75.4% 79.4% | ||
10 | * bits=2 61.2% 62.4% | ||
11 | * bits=3 61.3% 59.3% | ||
12 | * The lack of speed improvement is also a function of the pre-calculation | ||
13 | * which could be removed. | ||
14 | */ | ||
15 | #define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */ | ||
16 | #define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */ | ||
17 | 117 | ||
18 | int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, | 118 | int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, |
19 | BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | 119 | BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) |
20 | { | 120 | { |
21 | int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue; | 121 | int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2; |
22 | int start=1,ts=0,x,y; | 122 | int r_is_one=1,ts1=0,ts2=0; |
23 | BIGNUM *d,*aa1,*aa2,*r; | 123 | BIGNUM *d,*r; |
24 | BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE]; | 124 | BIGNUM *a_mod_m; |
125 | BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE]; | ||
25 | BN_MONT_CTX *mont=NULL; | 126 | BN_MONT_CTX *mont=NULL; |
26 | 127 | ||
27 | bn_check_top(a1); | 128 | bn_check_top(a1); |
@@ -32,7 +133,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, | |||
32 | 133 | ||
33 | if (!(m->d[0] & 1)) | 134 | if (!(m->d[0] & 1)) |
34 | { | 135 | { |
35 | BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); | 136 | BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); |
36 | return(0); | 137 | return(0); |
37 | } | 138 | } |
38 | bits1=BN_num_bits(p1); | 139 | bits1=BN_num_bits(p1); |
@@ -42,17 +143,13 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, | |||
42 | BN_one(rr); | 143 | BN_one(rr); |
43 | return(1); | 144 | return(1); |
44 | } | 145 | } |
146 | bits=(bits1 > bits2)?bits1:bits2; | ||
45 | 147 | ||
46 | BN_CTX_start(ctx); | 148 | BN_CTX_start(ctx); |
47 | d = BN_CTX_get(ctx); | 149 | d = BN_CTX_get(ctx); |
48 | r = BN_CTX_get(ctx); | 150 | r = BN_CTX_get(ctx); |
49 | if (d == NULL || r == NULL) goto err; | 151 | if (d == NULL || r == NULL) goto err; |
50 | 152 | ||
51 | bits=(bits1 > bits2)?bits1:bits2; | ||
52 | |||
53 | /* If this is not done, things will break in the montgomery | ||
54 | * part */ | ||
55 | |||
56 | if (in_mont != NULL) | 153 | if (in_mont != NULL) |
57 | mont=in_mont; | 154 | mont=in_mont; |
58 | else | 155 | else |
@@ -61,139 +158,143 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, | |||
61 | if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; | 158 | if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; |
62 | } | 159 | } |
63 | 160 | ||
64 | BN_init(&(val[0][0])); | 161 | window1 = BN_window_bits_for_exponent_size(bits1); |
65 | BN_init(&(val[1][1])); | 162 | window2 = BN_window_bits_for_exponent_size(bits2); |
66 | BN_init(&(val[0][1])); | 163 | |
67 | BN_init(&(val[1][0])); | 164 | /* |
68 | ts=1; | 165 | * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) |
166 | */ | ||
167 | BN_init(&val1[0]); | ||
168 | ts1=1; | ||
69 | if (BN_ucmp(a1,m) >= 0) | 169 | if (BN_ucmp(a1,m) >= 0) |
70 | { | 170 | { |
71 | BN_mod(&(val[1][0]),a1,m,ctx); | 171 | if (!BN_mod(&(val1[0]),a1,m,ctx)) |
72 | aa1= &(val[1][0]); | 172 | goto err; |
173 | a_mod_m = &(val1[0]); | ||
73 | } | 174 | } |
74 | else | 175 | else |
75 | aa1=a1; | 176 | a_mod_m = a1; |
177 | if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err; | ||
178 | if (window1 > 1) | ||
179 | { | ||
180 | if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err; | ||
181 | |||
182 | j=1<<(window1-1); | ||
183 | for (i=1; i<j; i++) | ||
184 | { | ||
185 | BN_init(&(val1[i])); | ||
186 | if (!BN_mod_mul_montgomery(&(val1[i]),&(val1[i-1]),d,mont,ctx)) | ||
187 | goto err; | ||
188 | } | ||
189 | ts1=i; | ||
190 | } | ||
191 | |||
192 | |||
193 | /* | ||
194 | * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1) | ||
195 | */ | ||
196 | BN_init(&val2[0]); | ||
197 | ts2=1; | ||
76 | if (BN_ucmp(a2,m) >= 0) | 198 | if (BN_ucmp(a2,m) >= 0) |
77 | { | 199 | { |
78 | BN_mod(&(val[0][1]),a2,m,ctx); | 200 | if (!BN_mod(&(val2[0]),a2,m,ctx)) |
79 | aa2= &(val[0][1]); | 201 | goto err; |
202 | a_mod_m = &(val2[0]); | ||
80 | } | 203 | } |
81 | else | 204 | else |
82 | aa2=a2; | 205 | a_mod_m = a2; |
83 | if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err; | 206 | if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err; |
84 | if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err; | 207 | if (window2 > 1) |
85 | if (!BN_mod_mul_montgomery(&(val[1][1]), | ||
86 | &(val[1][0]),&(val[0][1]),mont,ctx)) | ||
87 | goto err; | ||
88 | |||
89 | #if 0 | ||
90 | if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ | ||
91 | window=1; | ||
92 | else if (bits > 250) | ||
93 | window=5; /* max size of window */ | ||
94 | else if (bits >= 120) | ||
95 | window=4; | ||
96 | else | ||
97 | window=3; | ||
98 | #else | ||
99 | window=EXP2_TABLE_BITS; | ||
100 | #endif | ||
101 | |||
102 | k=1<<window; | ||
103 | for (x=0; x<k; x++) | ||
104 | { | 208 | { |
105 | if (x >= 2) | 209 | if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err; |
106 | { | 210 | |
107 | BN_init(&(val[x][0])); | 211 | j=1<<(window2-1); |
108 | BN_init(&(val[x][1])); | 212 | for (i=1; i<j; i++) |
109 | if (!BN_mod_mul_montgomery(&(val[x][0]), | ||
110 | &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err; | ||
111 | if (!BN_mod_mul_montgomery(&(val[x][1]), | ||
112 | &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err; | ||
113 | } | ||
114 | for (y=2; y<k; y++) | ||
115 | { | 213 | { |
116 | BN_init(&(val[x][y])); | 214 | BN_init(&(val2[i])); |
117 | if (!BN_mod_mul_montgomery(&(val[x][y]), | 215 | if (!BN_mod_mul_montgomery(&(val2[i]),&(val2[i-1]),d,mont,ctx)) |
118 | &(val[x][y-1]),&(val[0][1]),mont,ctx)) | ||
119 | goto err; | 216 | goto err; |
120 | } | 217 | } |
218 | ts2=i; | ||
121 | } | 219 | } |
122 | ts=k; | 220 | |
123 | 221 | ||
124 | start=1; /* This is used to avoid multiplication etc | 222 | /* Now compute the power product, using independent windows. */ |
125 | * when there is only the value '1' in the | 223 | r_is_one=1; |
126 | * buffer. */ | 224 | wvalue1=0; /* The 'value' of the first window */ |
127 | xvalue=0; /* The 'x value' of the window */ | 225 | wvalue2=0; /* The 'value' of the second window */ |
128 | yvalue=0; /* The 'y value' of the window */ | 226 | wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */ |
129 | wstart=bits-1; /* The top bit of the window */ | 227 | wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */ |
130 | wend=0; /* The bottom bit of the window */ | 228 | |
131 | 229 | if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; | |
132 | if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; | 230 | for (b=bits-1; b>=0; b--) |
133 | for (;;) | ||
134 | { | 231 | { |
135 | xvalue=BN_is_bit_set(p1,wstart); | 232 | if (!r_is_one) |
136 | yvalue=BN_is_bit_set(p2,wstart); | ||
137 | if (!(xvalue || yvalue)) | ||
138 | { | 233 | { |
139 | if (!start) | 234 | if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) |
235 | goto err; | ||
236 | } | ||
237 | |||
238 | if (!wvalue1) | ||
239 | if (BN_is_bit_set(p1, b)) | ||
140 | { | 240 | { |
141 | if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) | 241 | /* consider bits b-window1+1 .. b for this window */ |
142 | goto err; | 242 | i = b-window1+1; |
243 | while (!BN_is_bit_set(p1, i)) /* works for i<0 */ | ||
244 | i++; | ||
245 | wpos1 = i; | ||
246 | wvalue1 = 1; | ||
247 | for (i = b-1; i >= wpos1; i--) | ||
248 | { | ||
249 | wvalue1 <<= 1; | ||
250 | if (BN_is_bit_set(p1, i)) | ||
251 | wvalue1++; | ||
252 | } | ||
143 | } | 253 | } |
144 | wstart--; | 254 | |
145 | if (wstart < 0) break; | 255 | if (!wvalue2) |
146 | continue; | 256 | if (BN_is_bit_set(p2, b)) |
147 | } | ||
148 | /* We now have wstart on a 'set' bit, we now need to work out | ||
149 | * how bit a window to do. To do this we need to scan | ||
150 | * forward until the last set bit before the end of the | ||
151 | * window */ | ||
152 | j=wstart; | ||
153 | /* xvalue=BN_is_bit_set(p1,wstart); already set */ | ||
154 | /* yvalue=BN_is_bit_set(p1,wstart); already set */ | ||
155 | wend=0; | ||
156 | for (i=1; i<window; i++) | ||
157 | { | ||
158 | if (wstart-i < 0) break; | ||
159 | xvalue+=xvalue; | ||
160 | xvalue|=BN_is_bit_set(p1,wstart-i); | ||
161 | yvalue+=yvalue; | ||
162 | yvalue|=BN_is_bit_set(p2,wstart-i); | ||
163 | } | ||
164 | |||
165 | /* i is the size of the current window */ | ||
166 | /* add the 'bytes above' */ | ||
167 | if (!start) | ||
168 | for (j=0; j<i; j++) | ||
169 | { | 257 | { |
170 | if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) | 258 | /* consider bits b-window2+1 .. b for this window */ |
171 | goto err; | 259 | i = b-window2+1; |
260 | while (!BN_is_bit_set(p2, i)) | ||
261 | i++; | ||
262 | wpos2 = i; | ||
263 | wvalue2 = 1; | ||
264 | for (i = b-1; i >= wpos2; i--) | ||
265 | { | ||
266 | wvalue2 <<= 1; | ||
267 | if (BN_is_bit_set(p2, i)) | ||
268 | wvalue2++; | ||
269 | } | ||
172 | } | 270 | } |
271 | |||
272 | if (wvalue1 && b == wpos1) | ||
273 | { | ||
274 | /* wvalue1 is odd and < 2^window1 */ | ||
275 | if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx)) | ||
276 | goto err; | ||
277 | wvalue1 = 0; | ||
278 | r_is_one = 0; | ||
279 | } | ||
173 | 280 | ||
174 | /* wvalue will be an odd number < 2^window */ | 281 | if (wvalue2 && b == wpos2) |
175 | if (xvalue || yvalue) | ||
176 | { | 282 | { |
177 | if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]), | 283 | /* wvalue2 is odd and < 2^window2 */ |
178 | mont,ctx)) goto err; | 284 | if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx)) |
285 | goto err; | ||
286 | wvalue2 = 0; | ||
287 | r_is_one = 0; | ||
179 | } | 288 | } |
180 | |||
181 | /* move the 'window' down further */ | ||
182 | wstart-=i; | ||
183 | start=0; | ||
184 | if (wstart < 0) break; | ||
185 | } | 289 | } |
186 | BN_from_montgomery(rr,r,mont,ctx); | 290 | BN_from_montgomery(rr,r,mont,ctx); |
187 | ret=1; | 291 | ret=1; |
188 | err: | 292 | err: |
189 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); | 293 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); |
190 | BN_CTX_end(ctx); | 294 | BN_CTX_end(ctx); |
191 | for (i=0; i<ts; i++) | 295 | for (i=0; i<ts1; i++) |
192 | { | 296 | BN_clear_free(&(val1[i])); |
193 | for (j=0; j<ts; j++) | 297 | for (i=0; i<ts2; i++) |
194 | { | 298 | BN_clear_free(&(val2[i])); |
195 | BN_clear_free(&(val[i][j])); | ||
196 | } | ||
197 | } | ||
198 | return(ret); | 299 | return(ret); |
199 | } | 300 | } |
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index e36ccbc4c2..9c959921b4 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h | |||
@@ -55,6 +55,59 @@ | |||
55 | * copied and put under another distribution licence | 55 | * copied and put under another distribution licence |
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | /* ==================================================================== | ||
59 | * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. | ||
60 | * | ||
61 | * Redistribution and use in source and binary forms, with or without | ||
62 | * modification, are permitted provided that the following conditions | ||
63 | * are met: | ||
64 | * | ||
65 | * 1. Redistributions of source code must retain the above copyright | ||
66 | * notice, this list of conditions and the following disclaimer. | ||
67 | * | ||
68 | * 2. Redistributions in binary form must reproduce the above copyright | ||
69 | * notice, this list of conditions and the following disclaimer in | ||
70 | * the documentation and/or other materials provided with the | ||
71 | * distribution. | ||
72 | * | ||
73 | * 3. All advertising materials mentioning features or use of this | ||
74 | * software must display the following acknowledgment: | ||
75 | * "This product includes software developed by the OpenSSL Project | ||
76 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | ||
77 | * | ||
78 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | ||
79 | * endorse or promote products derived from this software without | ||
80 | * prior written permission. For written permission, please contact | ||
81 | * openssl-core@openssl.org. | ||
82 | * | ||
83 | * 5. Products derived from this software may not be called "OpenSSL" | ||
84 | * nor may "OpenSSL" appear in their names without prior written | ||
85 | * permission of the OpenSSL Project. | ||
86 | * | ||
87 | * 6. Redistributions of any form whatsoever must retain the following | ||
88 | * acknowledgment: | ||
89 | * "This product includes software developed by the OpenSSL Project | ||
90 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | ||
91 | * | ||
92 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | ||
93 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
94 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
95 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | ||
96 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
97 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
98 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
99 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
100 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
101 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
102 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
103 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
104 | * ==================================================================== | ||
105 | * | ||
106 | * This product includes cryptographic software written by Eric Young | ||
107 | * (eay@cryptsoft.com). This product includes software written by Tim | ||
108 | * Hudson (tjh@cryptsoft.com). | ||
109 | * | ||
110 | */ | ||
58 | 111 | ||
59 | #ifndef HEADER_BN_LCL_H | 112 | #ifndef HEADER_BN_LCL_H |
60 | #define HEADER_BN_LCL_H | 113 | #define HEADER_BN_LCL_H |
@@ -65,6 +118,51 @@ | |||
65 | extern "C" { | 118 | extern "C" { |
66 | #endif | 119 | #endif |
67 | 120 | ||
121 | |||
122 | /* | ||
123 | * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions | ||
124 | * | ||
125 | * | ||
126 | * For window size 'w' (w >= 2) and a random 'b' bits exponent, | ||
127 | * the number of multiplications is a constant plus on average | ||
128 | * | ||
129 | * 2^(w-1) + (b-w)/(w+1); | ||
130 | * | ||
131 | * here 2^(w-1) is for precomputing the table (we actually need | ||
132 | * entries only for windows that have the lowest bit set), and | ||
133 | * (b-w)/(w+1) is an approximation for the expected number of | ||
134 | * w-bit windows, not counting the first one. | ||
135 | * | ||
136 | * Thus we should use | ||
137 | * | ||
138 | * w >= 6 if b > 671 | ||
139 | * w = 5 if 671 > b > 239 | ||
140 | * w = 4 if 239 > b > 79 | ||
141 | * w = 3 if 79 > b > 23 | ||
142 | * w <= 2 if 23 > b | ||
143 | * | ||
144 | * (with draws in between). Very small exponents are often selected | ||
145 | * with low Hamming weight, so we use w = 1 for b <= 23. | ||
146 | */ | ||
147 | #if 1 | ||
148 | #define BN_window_bits_for_exponent_size(b) \ | ||
149 | ((b) > 671 ? 6 : \ | ||
150 | (b) > 239 ? 5 : \ | ||
151 | (b) > 79 ? 4 : \ | ||
152 | (b) > 23 ? 3 : 1) | ||
153 | #else | ||
154 | /* Old SSLeay/OpenSSL table. | ||
155 | * Maximum window size was 5, so this table differs for b==1024; | ||
156 | * but it coincides for other interesting values (b==160, b==512). | ||
157 | */ | ||
158 | #define BN_window_bits_for_exponent_size(b) \ | ||
159 | ((b) > 255 ? 5 : \ | ||
160 | (b) > 127 ? 4 : \ | ||
161 | (b) > 17 ? 3 : 1) | ||
162 | #endif | ||
163 | |||
164 | |||
165 | |||
68 | /* Pentium pro 16,16,16,32,64 */ | 166 | /* Pentium pro 16,16,16,32,64 */ |
69 | /* Alpha 16,16,16,16.64 */ | 167 | /* Alpha 16,16,16,16.64 */ |
70 | #define BN_MULL_SIZE_NORMAL (16) /* 32 */ | 168 | #define BN_MULL_SIZE_NORMAL (16) /* 32 */ |
@@ -130,7 +228,7 @@ extern "C" { | |||
130 | /* This is used for internal error checking and is not normally used */ | 228 | /* This is used for internal error checking and is not normally used */ |
131 | #ifdef BN_DEBUG | 229 | #ifdef BN_DEBUG |
132 | # include <assert.h> | 230 | # include <assert.h> |
133 | # define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->max); | 231 | # define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax); |
134 | #else | 232 | #else |
135 | # define bn_check_top(a) | 233 | # define bn_check_top(a) |
136 | #endif | 234 | #endif |
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index 0e6b12d9c3..b6b0ce4b3c 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c | |||
@@ -56,6 +56,12 @@ | |||
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | 58 | ||
59 | #ifndef BN_DEBUG | ||
60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
61 | # define NDEBUG | ||
62 | #endif | ||
63 | |||
64 | #include <assert.h> | ||
59 | #include <stdio.h> | 65 | #include <stdio.h> |
60 | #include "cryptlib.h" | 66 | #include "cryptlib.h" |
61 | #include "bn_lcl.h" | 67 | #include "bn_lcl.h" |
@@ -244,14 +250,8 @@ int BN_num_bits(const BIGNUM *a) | |||
244 | 250 | ||
245 | if (a->top == 0) return(0); | 251 | if (a->top == 0) return(0); |
246 | l=a->d[a->top-1]; | 252 | l=a->d[a->top-1]; |
253 | assert(l != 0); | ||
247 | i=(a->top-1)*BN_BITS2; | 254 | i=(a->top-1)*BN_BITS2; |
248 | if (l == 0) | ||
249 | { | ||
250 | #if !defined(NO_STDIO) && !defined(WIN16) | ||
251 | fprintf(stderr,"BAD TOP VALUE\n"); | ||
252 | #endif | ||
253 | abort(); | ||
254 | } | ||
255 | return(i+BN_num_bits_word(l)); | 255 | return(i+BN_num_bits_word(l)); |
256 | } | 256 | } |
257 | 257 | ||
@@ -262,24 +262,24 @@ void BN_clear_free(BIGNUM *a) | |||
262 | if (a == NULL) return; | 262 | if (a == NULL) return; |
263 | if (a->d != NULL) | 263 | if (a->d != NULL) |
264 | { | 264 | { |
265 | memset(a->d,0,a->max*sizeof(a->d[0])); | 265 | memset(a->d,0,a->dmax*sizeof(a->d[0])); |
266 | if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) | 266 | if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) |
267 | Free(a->d); | 267 | OPENSSL_free(a->d); |
268 | } | 268 | } |
269 | i=BN_get_flags(a,BN_FLG_MALLOCED); | 269 | i=BN_get_flags(a,BN_FLG_MALLOCED); |
270 | memset(a,0,sizeof(BIGNUM)); | 270 | memset(a,0,sizeof(BIGNUM)); |
271 | if (i) | 271 | if (i) |
272 | Free(a); | 272 | OPENSSL_free(a); |
273 | } | 273 | } |
274 | 274 | ||
275 | void BN_free(BIGNUM *a) | 275 | void BN_free(BIGNUM *a) |
276 | { | 276 | { |
277 | if (a == NULL) return; | 277 | if (a == NULL) return; |
278 | if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) | 278 | if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) |
279 | Free(a->d); | 279 | OPENSSL_free(a->d); |
280 | a->flags|=BN_FLG_FREE; /* REMOVE? */ | 280 | a->flags|=BN_FLG_FREE; /* REMOVE? */ |
281 | if (a->flags & BN_FLG_MALLOCED) | 281 | if (a->flags & BN_FLG_MALLOCED) |
282 | Free(a); | 282 | OPENSSL_free(a); |
283 | } | 283 | } |
284 | 284 | ||
285 | void BN_init(BIGNUM *a) | 285 | void BN_init(BIGNUM *a) |
@@ -291,7 +291,7 @@ BIGNUM *BN_new(void) | |||
291 | { | 291 | { |
292 | BIGNUM *ret; | 292 | BIGNUM *ret; |
293 | 293 | ||
294 | if ((ret=(BIGNUM *)Malloc(sizeof(BIGNUM))) == NULL) | 294 | if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL) |
295 | { | 295 | { |
296 | BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); | 296 | BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); |
297 | return(NULL); | 297 | return(NULL); |
@@ -299,7 +299,7 @@ BIGNUM *BN_new(void) | |||
299 | ret->flags=BN_FLG_MALLOCED; | 299 | ret->flags=BN_FLG_MALLOCED; |
300 | ret->top=0; | 300 | ret->top=0; |
301 | ret->neg=0; | 301 | ret->neg=0; |
302 | ret->max=0; | 302 | ret->dmax=0; |
303 | ret->d=NULL; | 303 | ret->d=NULL; |
304 | return(ret); | 304 | return(ret); |
305 | } | 305 | } |
@@ -317,7 +317,7 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) | |||
317 | 317 | ||
318 | bn_check_top(b); | 318 | bn_check_top(b); |
319 | 319 | ||
320 | if (words > b->max) | 320 | if (words > b->dmax) |
321 | { | 321 | { |
322 | bn_check_top(b); | 322 | bn_check_top(b); |
323 | if (BN_get_flags(b,BN_FLG_STATIC_DATA)) | 323 | if (BN_get_flags(b,BN_FLG_STATIC_DATA)) |
@@ -325,7 +325,7 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) | |||
325 | BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); | 325 | BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); |
326 | return(NULL); | 326 | return(NULL); |
327 | } | 327 | } |
328 | a=A=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(words+1)); | 328 | a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1)); |
329 | if (A == NULL) | 329 | if (A == NULL) |
330 | { | 330 | { |
331 | BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); | 331 | BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); |
@@ -423,21 +423,21 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) | |||
423 | case 0: ; /* ultrix cc workaround, see above */ | 423 | case 0: ; /* ultrix cc workaround, see above */ |
424 | } | 424 | } |
425 | #endif | 425 | #endif |
426 | Free(b->d); | 426 | OPENSSL_free(b->d); |
427 | } | 427 | } |
428 | 428 | ||
429 | b->d=a; | 429 | b->d=a; |
430 | b->max=words; | 430 | b->dmax=words; |
431 | 431 | ||
432 | /* Now need to zero any data between b->top and b->max */ | 432 | /* Now need to zero any data between b->top and b->max */ |
433 | 433 | ||
434 | A= &(b->d[b->top]); | 434 | A= &(b->d[b->top]); |
435 | for (i=(b->max - b->top)>>3; i>0; i--,A+=8) | 435 | for (i=(b->dmax - b->top)>>3; i>0; i--,A+=8) |
436 | { | 436 | { |
437 | A[0]=0; A[1]=0; A[2]=0; A[3]=0; | 437 | A[0]=0; A[1]=0; A[2]=0; A[3]=0; |
438 | A[4]=0; A[5]=0; A[6]=0; A[7]=0; | 438 | A[4]=0; A[5]=0; A[6]=0; A[7]=0; |
439 | } | 439 | } |
440 | for (i=(b->max - b->top)&7; i>0; i--,A++) | 440 | for (i=(b->dmax - b->top)&7; i>0; i--,A++) |
441 | A[0]=0; | 441 | A[0]=0; |
442 | #else | 442 | #else |
443 | memset(A,0,sizeof(BN_ULONG)*(words+1)); | 443 | memset(A,0,sizeof(BN_ULONG)*(words+1)); |
@@ -508,7 +508,7 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) | |||
508 | void BN_clear(BIGNUM *a) | 508 | void BN_clear(BIGNUM *a) |
509 | { | 509 | { |
510 | if (a->d != NULL) | 510 | if (a->d != NULL) |
511 | memset(a->d,0,a->max*sizeof(a->d[0])); | 511 | memset(a->d,0,a->dmax*sizeof(a->d[0])); |
512 | a->top=0; | 512 | a->top=0; |
513 | a->neg=0; | 513 | a->neg=0; |
514 | } | 514 | } |
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index 598fecbf0c..8cf1febacc 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
@@ -85,16 +85,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *b, | |||
85 | 85 | ||
86 | if (a == b) | 86 | if (a == b) |
87 | { | 87 | { |
88 | #if 0 | ||
89 | bn_wexpand(tmp,a->top*2); | ||
90 | bn_wexpand(tmp2,a->top*4); | ||
91 | bn_sqr_recursive(tmp->d,a->d,a->top,tmp2->d); | ||
92 | tmp->top=a->top*2; | ||
93 | if (tmp->d[tmp->top-1] == 0) | ||
94 | tmp->top--; | ||
95 | #else | ||
96 | if (!BN_sqr(tmp,a,ctx)) goto err; | 88 | if (!BN_sqr(tmp,a,ctx)) goto err; |
97 | #endif | ||
98 | } | 89 | } |
99 | else | 90 | else |
100 | { | 91 | { |
@@ -157,7 +148,22 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont, | |||
157 | #endif | 148 | #endif |
158 | for (i=0; i<nl; i++) | 149 | for (i=0; i<nl; i++) |
159 | { | 150 | { |
151 | #ifdef __TANDEM | ||
152 | { | ||
153 | long long t1; | ||
154 | long long t2; | ||
155 | long long t3; | ||
156 | t1 = rp[0] * (n0 & 0177777); | ||
157 | t2 = 037777600000l; | ||
158 | t2 = n0 & t2; | ||
159 | t3 = rp[0] & 0177777; | ||
160 | t2 = (t3 * t2) & BN_MASK2; | ||
161 | t1 = t1 + t2; | ||
162 | v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1); | ||
163 | } | ||
164 | #else | ||
160 | v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); | 165 | v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); |
166 | #endif | ||
161 | nrp++; | 167 | nrp++; |
162 | rp++; | 168 | rp++; |
163 | if (((nrp[-1]+=v)&BN_MASK2) >= v) | 169 | if (((nrp[-1]+=v)&BN_MASK2) >= v) |
@@ -175,6 +181,7 @@ int BN_from_montgomery(BIGNUM *ret, BIGNUM *a, BN_MONT_CTX *mont, | |||
175 | #if 0 | 181 | #if 0 |
176 | BN_rshift(ret,r,mont->ri); | 182 | BN_rshift(ret,r,mont->ri); |
177 | #else | 183 | #else |
184 | ret->neg = r->neg; | ||
178 | x=ri; | 185 | x=ri; |
179 | rp=ret->d; | 186 | rp=ret->d; |
180 | ap= &(r->d[x]); | 187 | ap= &(r->d[x]); |
@@ -234,7 +241,7 @@ BN_MONT_CTX *BN_MONT_CTX_new(void) | |||
234 | { | 241 | { |
235 | BN_MONT_CTX *ret; | 242 | BN_MONT_CTX *ret; |
236 | 243 | ||
237 | if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL) | 244 | if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL) |
238 | return(NULL); | 245 | return(NULL); |
239 | 246 | ||
240 | BN_MONT_CTX_init(ret); | 247 | BN_MONT_CTX_init(ret); |
@@ -260,7 +267,7 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) | |||
260 | BN_free(&(mont->N)); | 267 | BN_free(&(mont->N)); |
261 | BN_free(&(mont->Ni)); | 268 | BN_free(&(mont->Ni)); |
262 | if (mont->flags & BN_FLG_MALLOCED) | 269 | if (mont->flags & BN_FLG_MALLOCED) |
263 | Free(mont); | 270 | OPENSSL_free(mont); |
264 | } | 271 | } |
265 | 272 | ||
266 | int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | 273 | int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) |
@@ -284,7 +291,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | |||
284 | buf[1]=0; | 291 | buf[1]=0; |
285 | tmod.d=buf; | 292 | tmod.d=buf; |
286 | tmod.top=1; | 293 | tmod.top=1; |
287 | tmod.max=2; | 294 | tmod.dmax=2; |
288 | tmod.neg=mod->neg; | 295 | tmod.neg=mod->neg; |
289 | /* Ri = R^-1 mod N*/ | 296 | /* Ri = R^-1 mod N*/ |
290 | if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) | 297 | if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index 3e8baaad9a..3e8d8b9567 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
@@ -631,7 +631,6 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) | |||
631 | 631 | ||
632 | al=a->top; | 632 | al=a->top; |
633 | bl=b->top; | 633 | bl=b->top; |
634 | r->neg=a->neg^b->neg; | ||
635 | 634 | ||
636 | if ((al == 0) || (bl == 0)) | 635 | if ((al == 0) || (bl == 0)) |
637 | { | 636 | { |
@@ -647,6 +646,7 @@ int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) | |||
647 | } | 646 | } |
648 | else | 647 | else |
649 | rr = r; | 648 | rr = r; |
649 | rr->neg=a->neg^b->neg; | ||
650 | 650 | ||
651 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 651 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
652 | i = al-bl; | 652 | i = al-bl; |
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c index 782a96e7e0..532e66bcc3 100644 --- a/src/lib/libcrypto/bn/bn_print.c +++ b/src/lib/libcrypto/bn/bn_print.c | |||
@@ -64,14 +64,14 @@ | |||
64 | 64 | ||
65 | static const char *Hex="0123456789ABCDEF"; | 65 | static const char *Hex="0123456789ABCDEF"; |
66 | 66 | ||
67 | /* Must 'Free' the returned data */ | 67 | /* Must 'OPENSSL_free' the returned data */ |
68 | char *BN_bn2hex(const BIGNUM *a) | 68 | char *BN_bn2hex(const BIGNUM *a) |
69 | { | 69 | { |
70 | int i,j,v,z=0; | 70 | int i,j,v,z=0; |
71 | char *buf; | 71 | char *buf; |
72 | char *p; | 72 | char *p; |
73 | 73 | ||
74 | buf=(char *)Malloc(a->top*BN_BYTES*2+2); | 74 | buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2); |
75 | if (buf == NULL) | 75 | if (buf == NULL) |
76 | { | 76 | { |
77 | BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE); | 77 | BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE); |
@@ -99,7 +99,7 @@ err: | |||
99 | return(buf); | 99 | return(buf); |
100 | } | 100 | } |
101 | 101 | ||
102 | /* Must 'Free' the returned data */ | 102 | /* Must 'OPENSSL_free' the returned data */ |
103 | char *BN_bn2dec(const BIGNUM *a) | 103 | char *BN_bn2dec(const BIGNUM *a) |
104 | { | 104 | { |
105 | int i=0,num; | 105 | int i=0,num; |
@@ -110,8 +110,8 @@ char *BN_bn2dec(const BIGNUM *a) | |||
110 | 110 | ||
111 | i=BN_num_bits(a)*3; | 111 | i=BN_num_bits(a)*3; |
112 | num=(i/10+i/1000+3)+1; | 112 | num=(i/10+i/1000+3)+1; |
113 | bn_data=(BN_ULONG *)Malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); | 113 | bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); |
114 | buf=(char *)Malloc(num+3); | 114 | buf=(char *)OPENSSL_malloc(num+3); |
115 | if ((buf == NULL) || (bn_data == NULL)) | 115 | if ((buf == NULL) || (bn_data == NULL)) |
116 | { | 116 | { |
117 | BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE); | 117 | BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE); |
@@ -149,7 +149,7 @@ char *BN_bn2dec(const BIGNUM *a) | |||
149 | } | 149 | } |
150 | } | 150 | } |
151 | err: | 151 | err: |
152 | if (bn_data != NULL) Free(bn_data); | 152 | if (bn_data != NULL) OPENSSL_free(bn_data); |
153 | if (t != NULL) BN_free(t); | 153 | if (t != NULL) BN_free(t); |
154 | return(buf); | 154 | return(buf); |
155 | } | 155 | } |
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c index 943712c15b..21ecbc04ed 100644 --- a/src/lib/libcrypto/bn/bn_rand.c +++ b/src/lib/libcrypto/bn/bn_rand.c | |||
@@ -68,11 +68,17 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) | |||
68 | int ret=0,bit,bytes,mask; | 68 | int ret=0,bit,bytes,mask; |
69 | time_t tim; | 69 | time_t tim; |
70 | 70 | ||
71 | if (bits == 0) | ||
72 | { | ||
73 | BN_zero(rnd); | ||
74 | return 1; | ||
75 | } | ||
76 | |||
71 | bytes=(bits+7)/8; | 77 | bytes=(bits+7)/8; |
72 | bit=(bits-1)%8; | 78 | bit=(bits-1)%8; |
73 | mask=0xff<<bit; | 79 | mask=0xff<<bit; |
74 | 80 | ||
75 | buf=(unsigned char *)Malloc(bytes); | 81 | buf=(unsigned char *)OPENSSL_malloc(bytes); |
76 | if (buf == NULL) | 82 | if (buf == NULL) |
77 | { | 83 | { |
78 | BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE); | 84 | BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE); |
@@ -120,7 +126,7 @@ err: | |||
120 | if (buf != NULL) | 126 | if (buf != NULL) |
121 | { | 127 | { |
122 | memset(buf,0,bytes); | 128 | memset(buf,0,bytes); |
123 | Free(buf); | 129 | OPENSSL_free(buf); |
124 | } | 130 | } |
125 | return(ret); | 131 | return(ret); |
126 | } | 132 | } |
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c index a8796bd0aa..d019941d6b 100644 --- a/src/lib/libcrypto/bn/bn_recp.c +++ b/src/lib/libcrypto/bn/bn_recp.c | |||
@@ -72,7 +72,7 @@ BN_RECP_CTX *BN_RECP_CTX_new(void) | |||
72 | { | 72 | { |
73 | BN_RECP_CTX *ret; | 73 | BN_RECP_CTX *ret; |
74 | 74 | ||
75 | if ((ret=(BN_RECP_CTX *)Malloc(sizeof(BN_RECP_CTX))) == NULL) | 75 | if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL) |
76 | return(NULL); | 76 | return(NULL); |
77 | 77 | ||
78 | BN_RECP_CTX_init(ret); | 78 | BN_RECP_CTX_init(ret); |
@@ -88,7 +88,7 @@ void BN_RECP_CTX_free(BN_RECP_CTX *recp) | |||
88 | BN_free(&(recp->N)); | 88 | BN_free(&(recp->N)); |
89 | BN_free(&(recp->Nr)); | 89 | BN_free(&(recp->Nr)); |
90 | if (recp->flags & BN_FLG_MALLOCED) | 90 | if (recp->flags & BN_FLG_MALLOCED) |
91 | Free(recp); | 91 | OPENSSL_free(recp); |
92 | } | 92 | } |
93 | 93 | ||
94 | int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) | 94 | int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) |
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c index 61aae65a6b..0883247384 100644 --- a/src/lib/libcrypto/bn/bn_shift.c +++ b/src/lib/libcrypto/bn/bn_shift.c | |||
@@ -162,7 +162,7 @@ int BN_rshift(BIGNUM *r, BIGNUM *a, int n) | |||
162 | nw=n/BN_BITS2; | 162 | nw=n/BN_BITS2; |
163 | rb=n%BN_BITS2; | 163 | rb=n%BN_BITS2; |
164 | lb=BN_BITS2-rb; | 164 | lb=BN_BITS2-rb; |
165 | if (nw > a->top) | 165 | if (nw > a->top || a->top == 0) |
166 | { | 166 | { |
167 | BN_zero(r); | 167 | BN_zero(r); |
168 | return(1); | 168 | return(1); |
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index fe00c5f69a..75f4f38392 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
@@ -188,7 +188,7 @@ void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp) | |||
188 | 188 | ||
189 | #ifdef BN_RECURSION | 189 | #ifdef BN_RECURSION |
190 | /* r is 2*n words in size, | 190 | /* r is 2*n words in size, |
191 | * a and b are both n words in size. | 191 | * a and b are both n words in size. (There's not actually a 'b' here ...) |
192 | * n must be a power of 2. | 192 | * n must be a power of 2. |
193 | * We multiply and return the result. | 193 | * We multiply and return the result. |
194 | * t must be 2*n words in size | 194 | * t must be 2*n words in size |
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c index 73157a7d43..cd59baa2c4 100644 --- a/src/lib/libcrypto/bn/bn_word.c +++ b/src/lib/libcrypto/bn/bn_word.c | |||
@@ -115,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) | |||
115 | a->neg=0; | 115 | a->neg=0; |
116 | i=BN_sub_word(a,w); | 116 | i=BN_sub_word(a,w); |
117 | if (!BN_is_zero(a)) | 117 | if (!BN_is_zero(a)) |
118 | a->neg=1; | 118 | a->neg=!(a->neg); |
119 | return(i); | 119 | return(i); |
120 | } | 120 | } |
121 | w&=BN_MASK2; | 121 | w&=BN_MASK2; |
@@ -140,7 +140,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) | |||
140 | { | 140 | { |
141 | int i; | 141 | int i; |
142 | 142 | ||
143 | if (a->neg) | 143 | if (BN_is_zero(a) || a->neg) |
144 | { | 144 | { |
145 | a->neg=0; | 145 | a->neg=0; |
146 | i=BN_add_word(a,w); | 146 | i=BN_add_word(a,w); |
@@ -182,11 +182,16 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w) | |||
182 | w&=BN_MASK2; | 182 | w&=BN_MASK2; |
183 | if (a->top) | 183 | if (a->top) |
184 | { | 184 | { |
185 | ll=bn_mul_words(a->d,a->d,a->top,w); | 185 | if (w == 0) |
186 | if (ll) | 186 | BN_zero(a); |
187 | else | ||
187 | { | 188 | { |
188 | if (bn_wexpand(a,a->top+1) == NULL) return(0); | 189 | ll=bn_mul_words(a->d,a->d,a->top,w); |
189 | a->d[a->top++]=ll; | 190 | if (ll) |
191 | { | ||
192 | if (bn_wexpand(a,a->top+1) == NULL) return(0); | ||
193 | a->d[a->top++]=ll; | ||
194 | } | ||
190 | } | 195 | } |
191 | } | 196 | } |
192 | return(1); | 197 | return(1); |
diff --git a/src/lib/libcrypto/bn/vms-helper.c b/src/lib/libcrypto/bn/vms-helper.c index 73af337069..0fa79c4edb 100644 --- a/src/lib/libcrypto/bn/vms-helper.c +++ b/src/lib/libcrypto/bn/vms-helper.c | |||
@@ -59,8 +59,10 @@ | |||
59 | 59 | ||
60 | bn_div_words_abort(int i) | 60 | bn_div_words_abort(int i) |
61 | { | 61 | { |
62 | #ifdef BN_DEBUG | ||
62 | #if !defined(NO_STDIO) && !defined(WIN16) | 63 | #if !defined(NO_STDIO) && !defined(WIN16) |
63 | fprintf(stderr,"Division would overflow (%d)\n",i); | 64 | fprintf(stderr,"Division would overflow (%d)\n",i); |
64 | #endif | 65 | #endif |
65 | abort(); | 66 | abort(); |
67 | #endif | ||
66 | } | 68 | } |