diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/mips3.s')
-rw-r--r-- | src/lib/libcrypto/bn/asm/mips3.s | 2138 |
1 files changed, 2138 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s new file mode 100644 index 0000000000..191345d920 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/mips3.s | |||
@@ -0,0 +1,2138 @@ | |||
1 | .rdata | ||
2 | .asciiz "mips3.s, Version 1.0" | ||
3 | .asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | ||
4 | |||
5 | /* | ||
6 | * ==================================================================== | ||
7 | * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
8 | * project. | ||
9 | * | ||
10 | * Rights for redistribution and usage in source and binary forms are | ||
11 | * granted according to the OpenSSL license. Warranty of any kind is | ||
12 | * disclaimed. | ||
13 | * ==================================================================== | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * This is my modest contributon to the OpenSSL project (see | ||
18 | * http://www.openssl.org/ for more information about it) and is | ||
19 | * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c | ||
20 | * module. For updates see http://fy.chalmers.se/~appro/hpe/. | ||
21 | * | ||
22 | * The module is designed to work with either of the "new" MIPS ABI(5), | ||
23 | * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | ||
24 | * IRIX 5.x not only because it doesn't support new ABIs but also | ||
25 | * because 5.x kernels put R4x00 CPU into 32-bit mode and all those | ||
26 | * 64-bit instructions (daddu, dmultu, etc.) found below gonna only | ||
27 | * cause illegal instruction exception:-( | ||
28 | * | ||
29 | * In addition the code depends on preprocessor flags set up by MIPSpro | ||
30 | * compiler driver (either as or cc) and therefore (probably?) can't be | ||
31 | * compiled by the GNU assembler. GNU C driver manages fine though... | ||
32 | * I mean as long as -mmips-as is specified or is the default option, | ||
33 | * because then it simply invokes /usr/bin/as which in turn takes | ||
34 | * perfect care of the preprocessor definitions. Another neat feature | ||
35 | * offered by the MIPSpro assembler is an optimization pass. This gave | ||
36 | * me the opportunity to have the code looking more regular as all those | ||
37 | * architecture dependent instruction rescheduling details were left to | ||
38 | * the assembler. Cool, huh? | ||
39 | * | ||
40 | * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | ||
41 | * goes way over 3 times faster! | ||
42 | * | ||
43 | * <appro@fy.chalmers.se> | ||
44 | */ | ||
45 | #include <asm.h> | ||
46 | #include <regdef.h> | ||
47 | |||
48 | #if _MIPS_ISA>=4 | ||
49 | #define MOVNZ(cond,dst,src) \ | ||
50 | movn dst,src,cond | ||
51 | #else | ||
52 | #define MOVNZ(cond,dst,src) \ | ||
53 | .set noreorder; \ | ||
54 | bnezl cond,.+8; \ | ||
55 | move dst,src; \ | ||
56 | .set reorder | ||
57 | #endif | ||
58 | |||
59 | .text | ||
60 | |||
61 | .set noat | ||
62 | .set reorder | ||
63 | |||
64 | #define MINUS4 v1 | ||
65 | |||
66 | .align 5 | ||
67 | LEAF(bn_mul_add_words) | ||
68 | .set noreorder | ||
69 | bgtzl a2,.L_bn_mul_add_words_proceed | ||
70 | ld t0,0(a1) | ||
71 | jr ra | ||
72 | move v0,zero | ||
73 | .set reorder | ||
74 | |||
75 | .L_bn_mul_add_words_proceed: | ||
76 | li MINUS4,-4 | ||
77 | and ta0,a2,MINUS4 | ||
78 | move v0,zero | ||
79 | beqz ta0,.L_bn_mul_add_words_tail | ||
80 | |||
81 | .L_bn_mul_add_words_loop: | ||
82 | dmultu t0,a3 | ||
83 | ld t1,0(a0) | ||
84 | ld t2,8(a1) | ||
85 | ld t3,8(a0) | ||
86 | ld ta0,16(a1) | ||
87 | ld ta1,16(a0) | ||
88 | daddu t1,v0 | ||
89 | sltu v0,t1,v0 /* All manuals say it "compares 32-bit | ||
90 | * values", but it seems to work fine | ||
91 | * even on 64-bit registers. */ | ||
92 | mflo AT | ||
93 | mfhi t0 | ||
94 | daddu t1,AT | ||
95 | daddu v0,t0 | ||
96 | sltu AT,t1,AT | ||
97 | sd t1,0(a0) | ||
98 | daddu v0,AT | ||
99 | |||
100 | dmultu t2,a3 | ||
101 | ld ta2,24(a1) | ||
102 | ld ta3,24(a0) | ||
103 | daddu t3,v0 | ||
104 | sltu v0,t3,v0 | ||
105 | mflo AT | ||
106 | mfhi t2 | ||
107 | daddu t3,AT | ||
108 | daddu v0,t2 | ||
109 | sltu AT,t3,AT | ||
110 | sd t3,8(a0) | ||
111 | daddu v0,AT | ||
112 | |||
113 | dmultu ta0,a3 | ||
114 | subu a2,4 | ||
115 | PTR_ADD a0,32 | ||
116 | PTR_ADD a1,32 | ||
117 | daddu ta1,v0 | ||
118 | sltu v0,ta1,v0 | ||
119 | mflo AT | ||
120 | mfhi ta0 | ||
121 | daddu ta1,AT | ||
122 | daddu v0,ta0 | ||
123 | sltu AT,ta1,AT | ||
124 | sd ta1,-16(a0) | ||
125 | daddu v0,AT | ||
126 | |||
127 | |||
128 | dmultu ta2,a3 | ||
129 | and ta0,a2,MINUS4 | ||
130 | daddu ta3,v0 | ||
131 | sltu v0,ta3,v0 | ||
132 | mflo AT | ||
133 | mfhi ta2 | ||
134 | daddu ta3,AT | ||
135 | daddu v0,ta2 | ||
136 | sltu AT,ta3,AT | ||
137 | sd ta3,-8(a0) | ||
138 | daddu v0,AT | ||
139 | .set noreorder | ||
140 | bgtzl ta0,.L_bn_mul_add_words_loop | ||
141 | ld t0,0(a1) | ||
142 | |||
143 | bnezl a2,.L_bn_mul_add_words_tail | ||
144 | ld t0,0(a1) | ||
145 | .set reorder | ||
146 | |||
147 | .L_bn_mul_add_words_return: | ||
148 | jr ra | ||
149 | |||
150 | .L_bn_mul_add_words_tail: | ||
151 | dmultu t0,a3 | ||
152 | ld t1,0(a0) | ||
153 | subu a2,1 | ||
154 | daddu t1,v0 | ||
155 | sltu v0,t1,v0 | ||
156 | mflo AT | ||
157 | mfhi t0 | ||
158 | daddu t1,AT | ||
159 | daddu v0,t0 | ||
160 | sltu AT,t1,AT | ||
161 | sd t1,0(a0) | ||
162 | daddu v0,AT | ||
163 | beqz a2,.L_bn_mul_add_words_return | ||
164 | |||
165 | ld t0,8(a1) | ||
166 | dmultu t0,a3 | ||
167 | ld t1,8(a0) | ||
168 | subu a2,1 | ||
169 | daddu t1,v0 | ||
170 | sltu v0,t1,v0 | ||
171 | mflo AT | ||
172 | mfhi t0 | ||
173 | daddu t1,AT | ||
174 | daddu v0,t0 | ||
175 | sltu AT,t1,AT | ||
176 | sd t1,8(a0) | ||
177 | daddu v0,AT | ||
178 | beqz a2,.L_bn_mul_add_words_return | ||
179 | |||
180 | ld t0,16(a1) | ||
181 | dmultu t0,a3 | ||
182 | ld t1,16(a0) | ||
183 | daddu t1,v0 | ||
184 | sltu v0,t1,v0 | ||
185 | mflo AT | ||
186 | mfhi t0 | ||
187 | daddu t1,AT | ||
188 | daddu v0,t0 | ||
189 | sltu AT,t1,AT | ||
190 | sd t1,16(a0) | ||
191 | daddu v0,AT | ||
192 | jr ra | ||
193 | END(bn_mul_add_words) | ||
194 | |||
195 | .align 5 | ||
196 | LEAF(bn_mul_words) | ||
197 | .set noreorder | ||
198 | bgtzl a2,.L_bn_mul_words_proceed | ||
199 | ld t0,0(a1) | ||
200 | jr ra | ||
201 | move v0,zero | ||
202 | .set reorder | ||
203 | |||
204 | .L_bn_mul_words_proceed: | ||
205 | li MINUS4,-4 | ||
206 | and ta0,a2,MINUS4 | ||
207 | move v0,zero | ||
208 | beqz ta0,.L_bn_mul_words_tail | ||
209 | |||
210 | .L_bn_mul_words_loop: | ||
211 | dmultu t0,a3 | ||
212 | ld t2,8(a1) | ||
213 | ld ta0,16(a1) | ||
214 | ld ta2,24(a1) | ||
215 | mflo AT | ||
216 | mfhi t0 | ||
217 | daddu v0,AT | ||
218 | sltu t1,v0,AT | ||
219 | sd v0,0(a0) | ||
220 | daddu v0,t1,t0 | ||
221 | |||
222 | dmultu t2,a3 | ||
223 | subu a2,4 | ||
224 | PTR_ADD a0,32 | ||
225 | PTR_ADD a1,32 | ||
226 | mflo AT | ||
227 | mfhi t2 | ||
228 | daddu v0,AT | ||
229 | sltu t3,v0,AT | ||
230 | sd v0,-24(a0) | ||
231 | daddu v0,t3,t2 | ||
232 | |||
233 | dmultu ta0,a3 | ||
234 | mflo AT | ||
235 | mfhi ta0 | ||
236 | daddu v0,AT | ||
237 | sltu ta1,v0,AT | ||
238 | sd v0,-16(a0) | ||
239 | daddu v0,ta1,ta0 | ||
240 | |||
241 | |||
242 | dmultu ta2,a3 | ||
243 | and ta0,a2,MINUS4 | ||
244 | mflo AT | ||
245 | mfhi ta2 | ||
246 | daddu v0,AT | ||
247 | sltu ta3,v0,AT | ||
248 | sd v0,-8(a0) | ||
249 | daddu v0,ta3,ta2 | ||
250 | .set noreorder | ||
251 | bgtzl ta0,.L_bn_mul_words_loop | ||
252 | ld t0,0(a1) | ||
253 | |||
254 | bnezl a2,.L_bn_mul_words_tail | ||
255 | ld t0,0(a1) | ||
256 | .set reorder | ||
257 | |||
258 | .L_bn_mul_words_return: | ||
259 | jr ra | ||
260 | |||
261 | .L_bn_mul_words_tail: | ||
262 | dmultu t0,a3 | ||
263 | subu a2,1 | ||
264 | mflo AT | ||
265 | mfhi t0 | ||
266 | daddu v0,AT | ||
267 | sltu t1,v0,AT | ||
268 | sd v0,0(a0) | ||
269 | daddu v0,t1,t0 | ||
270 | beqz a2,.L_bn_mul_words_return | ||
271 | |||
272 | ld t0,8(a1) | ||
273 | dmultu t0,a3 | ||
274 | subu a2,1 | ||
275 | mflo AT | ||
276 | mfhi t0 | ||
277 | daddu v0,AT | ||
278 | sltu t1,v0,AT | ||
279 | sd v0,8(a0) | ||
280 | daddu v0,t1,t0 | ||
281 | beqz a2,.L_bn_mul_words_return | ||
282 | |||
283 | ld t0,16(a1) | ||
284 | dmultu t0,a3 | ||
285 | mflo AT | ||
286 | mfhi t0 | ||
287 | daddu v0,AT | ||
288 | sltu t1,v0,AT | ||
289 | sd v0,16(a0) | ||
290 | daddu v0,t1,t0 | ||
291 | jr ra | ||
292 | END(bn_mul_words) | ||
293 | |||
294 | .align 5 | ||
295 | LEAF(bn_sqr_words) | ||
296 | .set noreorder | ||
297 | bgtzl a2,.L_bn_sqr_words_proceed | ||
298 | ld t0,0(a1) | ||
299 | jr ra | ||
300 | move v0,zero | ||
301 | .set reorder | ||
302 | |||
303 | .L_bn_sqr_words_proceed: | ||
304 | li MINUS4,-4 | ||
305 | and ta0,a2,MINUS4 | ||
306 | move v0,zero | ||
307 | beqz ta0,.L_bn_sqr_words_tail | ||
308 | |||
309 | .L_bn_sqr_words_loop: | ||
310 | dmultu t0,t0 | ||
311 | ld t2,8(a1) | ||
312 | ld ta0,16(a1) | ||
313 | ld ta2,24(a1) | ||
314 | mflo t1 | ||
315 | mfhi t0 | ||
316 | sd t1,0(a0) | ||
317 | sd t0,8(a0) | ||
318 | |||
319 | dmultu t2,t2 | ||
320 | subu a2,4 | ||
321 | PTR_ADD a0,64 | ||
322 | PTR_ADD a1,32 | ||
323 | mflo t3 | ||
324 | mfhi t2 | ||
325 | sd t3,-48(a0) | ||
326 | sd t2,-40(a0) | ||
327 | |||
328 | dmultu ta0,ta0 | ||
329 | mflo ta1 | ||
330 | mfhi ta0 | ||
331 | sd ta1,-32(a0) | ||
332 | sd ta0,-24(a0) | ||
333 | |||
334 | |||
335 | dmultu ta2,ta2 | ||
336 | and ta0,a2,MINUS4 | ||
337 | mflo ta3 | ||
338 | mfhi ta2 | ||
339 | sd ta3,-16(a0) | ||
340 | sd ta2,-8(a0) | ||
341 | |||
342 | .set noreorder | ||
343 | bgtzl ta0,.L_bn_sqr_words_loop | ||
344 | ld t0,0(a1) | ||
345 | |||
346 | bnezl a2,.L_bn_sqr_words_tail | ||
347 | ld t0,0(a1) | ||
348 | .set reorder | ||
349 | |||
350 | .L_bn_sqr_words_return: | ||
351 | move v0,zero | ||
352 | jr ra | ||
353 | |||
354 | .L_bn_sqr_words_tail: | ||
355 | dmultu t0,t0 | ||
356 | subu a2,1 | ||
357 | mflo t1 | ||
358 | mfhi t0 | ||
359 | sd t1,0(a0) | ||
360 | sd t0,8(a0) | ||
361 | beqz a2,.L_bn_sqr_words_return | ||
362 | |||
363 | ld t0,8(a1) | ||
364 | dmultu t0,t0 | ||
365 | subu a2,1 | ||
366 | mflo t1 | ||
367 | mfhi t0 | ||
368 | sd t1,16(a0) | ||
369 | sd t0,24(a0) | ||
370 | beqz a2,.L_bn_sqr_words_return | ||
371 | |||
372 | ld t0,16(a1) | ||
373 | dmultu t0,t0 | ||
374 | mflo t1 | ||
375 | mfhi t0 | ||
376 | sd t1,32(a0) | ||
377 | sd t0,40(a0) | ||
378 | jr ra | ||
379 | END(bn_sqr_words) | ||
380 | |||
381 | .align 5 | ||
382 | LEAF(bn_add_words) | ||
383 | .set noreorder | ||
384 | bgtzl a3,.L_bn_add_words_proceed | ||
385 | ld t0,0(a1) | ||
386 | jr ra | ||
387 | move v0,zero | ||
388 | .set reorder | ||
389 | |||
390 | .L_bn_add_words_proceed: | ||
391 | li MINUS4,-4 | ||
392 | and AT,a3,MINUS4 | ||
393 | move v0,zero | ||
394 | beqz AT,.L_bn_add_words_tail | ||
395 | |||
396 | .L_bn_add_words_loop: | ||
397 | ld ta0,0(a2) | ||
398 | ld t1,8(a1) | ||
399 | ld ta1,8(a2) | ||
400 | ld t2,16(a1) | ||
401 | ld ta2,16(a2) | ||
402 | ld t3,24(a1) | ||
403 | ld ta3,24(a2) | ||
404 | daddu ta0,t0 | ||
405 | subu a3,4 | ||
406 | sltu t8,ta0,t0 | ||
407 | daddu t0,ta0,v0 | ||
408 | PTR_ADD a0,32 | ||
409 | sltu v0,t0,ta0 | ||
410 | sd t0,-32(a0) | ||
411 | daddu v0,t8 | ||
412 | |||
413 | daddu ta1,t1 | ||
414 | PTR_ADD a1,32 | ||
415 | sltu t9,ta1,t1 | ||
416 | daddu t1,ta1,v0 | ||
417 | PTR_ADD a2,32 | ||
418 | sltu v0,t1,ta1 | ||
419 | sd t1,-24(a0) | ||
420 | daddu v0,t9 | ||
421 | |||
422 | daddu ta2,t2 | ||
423 | and AT,a3,MINUS4 | ||
424 | sltu t8,ta2,t2 | ||
425 | daddu t2,ta2,v0 | ||
426 | sltu v0,t2,ta2 | ||
427 | sd t2,-16(a0) | ||
428 | daddu v0,t8 | ||
429 | |||
430 | daddu ta3,t3 | ||
431 | sltu t9,ta3,t3 | ||
432 | daddu t3,ta3,v0 | ||
433 | sltu v0,t3,ta3 | ||
434 | sd t3,-8(a0) | ||
435 | daddu v0,t9 | ||
436 | |||
437 | .set noreorder | ||
438 | bgtzl AT,.L_bn_add_words_loop | ||
439 | ld t0,0(a1) | ||
440 | |||
441 | bnezl a3,.L_bn_add_words_tail | ||
442 | ld t0,0(a1) | ||
443 | .set reorder | ||
444 | |||
445 | .L_bn_add_words_return: | ||
446 | jr ra | ||
447 | |||
448 | .L_bn_add_words_tail: | ||
449 | ld ta0,0(a2) | ||
450 | daddu ta0,t0 | ||
451 | subu a3,1 | ||
452 | sltu t8,ta0,t0 | ||
453 | daddu t0,ta0,v0 | ||
454 | sltu v0,t0,ta0 | ||
455 | sd t0,0(a0) | ||
456 | daddu v0,t8 | ||
457 | beqz a3,.L_bn_add_words_return | ||
458 | |||
459 | ld t1,8(a1) | ||
460 | ld ta1,8(a2) | ||
461 | daddu ta1,t1 | ||
462 | subu a3,1 | ||
463 | sltu t9,ta1,t1 | ||
464 | daddu t1,ta1,v0 | ||
465 | sltu v0,t1,ta1 | ||
466 | sd t1,8(a0) | ||
467 | daddu v0,t9 | ||
468 | beqz a3,.L_bn_add_words_return | ||
469 | |||
470 | ld t2,16(a1) | ||
471 | ld ta2,16(a2) | ||
472 | daddu ta2,t2 | ||
473 | sltu t8,ta2,t2 | ||
474 | daddu t2,ta2,v0 | ||
475 | sltu v0,t2,ta2 | ||
476 | sd t2,16(a0) | ||
477 | daddu v0,t8 | ||
478 | jr ra | ||
479 | END(bn_add_words) | ||
480 | |||
481 | .align 5 | ||
482 | LEAF(bn_sub_words) | ||
483 | .set noreorder | ||
484 | bgtzl a3,.L_bn_sub_words_proceed | ||
485 | ld t0,0(a1) | ||
486 | jr ra | ||
487 | move v0,zero | ||
488 | .set reorder | ||
489 | |||
490 | .L_bn_sub_words_proceed: | ||
491 | li MINUS4,-4 | ||
492 | and AT,a3,MINUS4 | ||
493 | move v0,zero | ||
494 | beqz AT,.L_bn_sub_words_tail | ||
495 | |||
496 | .L_bn_sub_words_loop: | ||
497 | ld ta0,0(a2) | ||
498 | ld t1,8(a1) | ||
499 | ld ta1,8(a2) | ||
500 | ld t2,16(a1) | ||
501 | ld ta2,16(a2) | ||
502 | ld t3,24(a1) | ||
503 | ld ta3,24(a2) | ||
504 | sltu t8,t0,ta0 | ||
505 | dsubu t0,ta0 | ||
506 | subu a3,4 | ||
507 | dsubu ta0,t0,v0 | ||
508 | and AT,a3,MINUS4 | ||
509 | sd ta0,0(a0) | ||
510 | MOVNZ (t0,v0,t8) | ||
511 | |||
512 | sltu t9,t1,ta1 | ||
513 | dsubu t1,ta1 | ||
514 | PTR_ADD a0,32 | ||
515 | dsubu ta1,t1,v0 | ||
516 | PTR_ADD a1,32 | ||
517 | sd ta1,-24(a0) | ||
518 | MOVNZ (t1,v0,t9) | ||
519 | |||
520 | |||
521 | sltu t8,t2,ta2 | ||
522 | dsubu t2,ta2 | ||
523 | dsubu ta2,t2,v0 | ||
524 | PTR_ADD a2,32 | ||
525 | sd ta2,-16(a0) | ||
526 | MOVNZ (t2,v0,t8) | ||
527 | |||
528 | sltu t9,t3,ta3 | ||
529 | dsubu t3,ta3 | ||
530 | dsubu ta3,t3,v0 | ||
531 | sd ta3,-8(a0) | ||
532 | MOVNZ (t3,v0,t9) | ||
533 | |||
534 | .set noreorder | ||
535 | bgtzl AT,.L_bn_sub_words_loop | ||
536 | ld t0,0(a1) | ||
537 | |||
538 | bnezl a3,.L_bn_sub_words_tail | ||
539 | ld t0,0(a1) | ||
540 | .set reorder | ||
541 | |||
542 | .L_bn_sub_words_return: | ||
543 | jr ra | ||
544 | |||
545 | .L_bn_sub_words_tail: | ||
546 | ld ta0,0(a2) | ||
547 | subu a3,1 | ||
548 | sltu t8,t0,ta0 | ||
549 | dsubu t0,ta0 | ||
550 | dsubu ta0,t0,v0 | ||
551 | MOVNZ (t0,v0,t8) | ||
552 | sd ta0,0(a0) | ||
553 | beqz a3,.L_bn_sub_words_return | ||
554 | |||
555 | ld t1,8(a1) | ||
556 | subu a3,1 | ||
557 | ld ta1,8(a2) | ||
558 | sltu t9,t1,ta1 | ||
559 | dsubu t1,ta1 | ||
560 | dsubu ta1,t1,v0 | ||
561 | MOVNZ (t1,v0,t9) | ||
562 | sd ta1,8(a0) | ||
563 | beqz a3,.L_bn_sub_words_return | ||
564 | |||
565 | ld t2,16(a1) | ||
566 | ld ta2,16(a2) | ||
567 | sltu t8,t2,ta2 | ||
568 | dsubu t2,ta2 | ||
569 | dsubu ta2,t2,v0 | ||
570 | MOVNZ (t2,v0,t8) | ||
571 | sd ta2,16(a0) | ||
572 | jr ra | ||
573 | END(bn_sub_words) | ||
574 | |||
575 | #undef MINUS4 | ||
576 | |||
577 | .align 5 | ||
578 | LEAF(bn_div_words) | ||
579 | .set noreorder | ||
580 | bnezl a2,.L_bn_div_words_proceed | ||
581 | move v1,zero | ||
582 | jr ra | ||
583 | li v0,-1 /* I'd rather signal div-by-zero | ||
584 | * which can be done with 'break 7' */ | ||
585 | |||
586 | .L_bn_div_words_proceed: | ||
587 | bltz a2,.L_bn_div_words_body | ||
588 | move t9,v1 | ||
589 | dsll a2,1 | ||
590 | bgtz a2,.-4 | ||
591 | addu t9,1 | ||
592 | |||
593 | .set reorder | ||
594 | negu t1,t9 | ||
595 | li t2,-1 | ||
596 | dsll t2,t1 | ||
597 | and t2,a0 | ||
598 | dsrl AT,a1,t1 | ||
599 | .set noreorder | ||
600 | bnezl t2,.+8 | ||
601 | break 6 /* signal overflow */ | ||
602 | .set reorder | ||
603 | dsll a0,t9 | ||
604 | dsll a1,t9 | ||
605 | or a0,AT | ||
606 | |||
607 | #define QT ta0 | ||
608 | #define HH ta1 | ||
609 | #define DH v1 | ||
610 | .L_bn_div_words_body: | ||
611 | dsrl DH,a2,32 | ||
612 | sgeu AT,a0,a2 | ||
613 | .set noreorder | ||
614 | bnezl AT,.+8 | ||
615 | dsubu a0,a2 | ||
616 | .set reorder | ||
617 | |||
618 | li QT,-1 | ||
619 | dsrl HH,a0,32 | ||
620 | dsrl QT,32 /* q=0xffffffff */ | ||
621 | beq DH,HH,.L_bn_div_words_skip_div1 | ||
622 | ddivu zero,a0,DH | ||
623 | mflo QT | ||
624 | .L_bn_div_words_skip_div1: | ||
625 | dmultu a2,QT | ||
626 | dsll t3,a0,32 | ||
627 | dsrl AT,a1,32 | ||
628 | or t3,AT | ||
629 | mflo t0 | ||
630 | mfhi t1 | ||
631 | .L_bn_div_words_inner_loop1: | ||
632 | sltu t2,t3,t0 | ||
633 | seq t8,HH,t1 | ||
634 | sltu AT,HH,t1 | ||
635 | and t2,t8 | ||
636 | or AT,t2 | ||
637 | .set noreorder | ||
638 | beqz AT,.L_bn_div_words_inner_loop1_done | ||
639 | sltu t2,t0,a2 | ||
640 | .set reorder | ||
641 | dsubu QT,1 | ||
642 | dsubu t0,a2 | ||
643 | dsubu t1,t2 | ||
644 | b .L_bn_div_words_inner_loop1 | ||
645 | .L_bn_div_words_inner_loop1_done: | ||
646 | |||
647 | dsll a1,32 | ||
648 | dsubu a0,t3,t0 | ||
649 | dsll v0,QT,32 | ||
650 | |||
651 | li QT,-1 | ||
652 | dsrl HH,a0,32 | ||
653 | dsrl QT,32 /* q=0xffffffff */ | ||
654 | beq DH,HH,.L_bn_div_words_skip_div2 | ||
655 | ddivu zero,a0,DH | ||
656 | mflo QT | ||
657 | .L_bn_div_words_skip_div2: | ||
658 | dmultu a2,QT | ||
659 | dsll t3,a0,32 | ||
660 | dsrl AT,a1,32 | ||
661 | or t3,AT | ||
662 | mflo t0 | ||
663 | mfhi t1 | ||
664 | .L_bn_div_words_inner_loop2: | ||
665 | sltu t2,t3,t0 | ||
666 | seq t8,HH,t1 | ||
667 | sltu AT,HH,t1 | ||
668 | and t2,t8 | ||
669 | or AT,t2 | ||
670 | .set noreorder | ||
671 | beqz AT,.L_bn_div_words_inner_loop2_done | ||
672 | sltu t2,t0,a2 | ||
673 | .set reorder | ||
674 | dsubu QT,1 | ||
675 | dsubu t0,a2 | ||
676 | dsubu t1,t2 | ||
677 | b .L_bn_div_words_inner_loop2 | ||
678 | .L_bn_div_words_inner_loop2_done: | ||
679 | |||
680 | dsubu a0,t3,t0 | ||
681 | or v0,QT | ||
682 | dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ | ||
683 | dsrl a2,t9 /* restore a2 */ | ||
684 | jr ra | ||
685 | #undef HH | ||
686 | #undef DH | ||
687 | #undef QT | ||
688 | END(bn_div_words) | ||
689 | |||
690 | .align 5 | ||
691 | LEAF(bn_div_3_words) | ||
692 | .set reorder | ||
693 | move a3,a0 /* we know that bn_div_words doesn't | ||
694 | * touch a3, ta2, ta3 and preserves a2 | ||
695 | * so that we can save two arguments | ||
696 | * and return address in registers | ||
697 | * instead of stack:-) | ||
698 | */ | ||
699 | ld a0,(a3) | ||
700 | move ta2,a2 | ||
701 | move a2,a1 | ||
702 | ld a1,-8(a3) | ||
703 | move ta3,ra | ||
704 | move v1,zero | ||
705 | li v0,-1 | ||
706 | beq a0,a2,.L_bn_div_3_words_skip_div | ||
707 | jal bn_div_words | ||
708 | move ra,ta3 | ||
709 | .L_bn_div_3_words_skip_div: | ||
710 | dmultu ta2,v0 | ||
711 | ld t2,-16(a3) | ||
712 | mflo t0 | ||
713 | mfhi t1 | ||
714 | .L_bn_div_3_words_inner_loop: | ||
715 | sgeu AT,t2,t0 | ||
716 | seq t9,t1,v1 | ||
717 | sltu t8,t1,v1 | ||
718 | and AT,t9 | ||
719 | or AT,t8 | ||
720 | bnez AT,.L_bn_div_3_words_inner_loop_done | ||
721 | daddu v1,a2 | ||
722 | sltu t3,t0,ta2 | ||
723 | sltu AT,v1,a2 | ||
724 | dsubu v0,1 | ||
725 | dsubu t0,ta2 | ||
726 | dsubu t1,t3 | ||
727 | beqz AT,.L_bn_div_3_words_inner_loop | ||
728 | .L_bn_div_3_words_inner_loop_done: | ||
729 | jr ra | ||
730 | END(bn_div_3_words) | ||
731 | |||
732 | #define a_0 t0 | ||
733 | #define a_1 t1 | ||
734 | #define a_2 t2 | ||
735 | #define a_3 t3 | ||
736 | #define b_0 ta0 | ||
737 | #define b_1 ta1 | ||
738 | #define b_2 ta2 | ||
739 | #define b_3 ta3 | ||
740 | |||
741 | #define a_4 s0 | ||
742 | #define a_5 s2 | ||
743 | #define a_6 s4 | ||
744 | #define a_7 a1 /* once we load a[7] we don't need a anymore */ | ||
745 | #define b_4 s1 | ||
746 | #define b_5 s3 | ||
747 | #define b_6 s5 | ||
748 | #define b_7 a2 /* once we load b[7] we don't need b anymore */ | ||
749 | |||
750 | #define t_1 t8 | ||
751 | #define t_2 t9 | ||
752 | |||
753 | #define c_1 v0 | ||
754 | #define c_2 v1 | ||
755 | #define c_3 a3 | ||
756 | |||
757 | #define FRAME_SIZE 48 | ||
758 | |||
759 | .align 5 | ||
760 | LEAF(bn_mul_comba8) | ||
761 | .set noreorder | ||
762 | PTR_SUB sp,FRAME_SIZE | ||
763 | .frame sp,64,ra | ||
764 | .set reorder | ||
765 | ld a_0,0(a1) /* If compiled with -mips3 option on | ||
766 | * R5000 box assembler barks on this | ||
767 | * line with "shouldn't have mult/div | ||
768 | * as last instruction in bb (R10K | ||
769 | * bug)" warning. If anybody out there | ||
770 | * has a clue about how to circumvent | ||
771 | * this do send me a note. | ||
772 | * <appro@fy.chalmers.se> | ||
773 | */ | ||
774 | ld b_0,0(a2) | ||
775 | ld a_1,8(a1) | ||
776 | ld a_2,16(a1) | ||
777 | ld a_3,24(a1) | ||
778 | ld b_1,8(a2) | ||
779 | ld b_2,16(a2) | ||
780 | ld b_3,24(a2) | ||
781 | dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
782 | sd s0,0(sp) | ||
783 | sd s1,8(sp) | ||
784 | sd s2,16(sp) | ||
785 | sd s3,24(sp) | ||
786 | sd s4,32(sp) | ||
787 | sd s5,40(sp) | ||
788 | mflo c_1 | ||
789 | mfhi c_2 | ||
790 | |||
791 | dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
792 | ld a_4,32(a1) | ||
793 | ld a_5,40(a1) | ||
794 | ld a_6,48(a1) | ||
795 | ld a_7,56(a1) | ||
796 | ld b_4,32(a2) | ||
797 | ld b_5,40(a2) | ||
798 | mflo t_1 | ||
799 | mfhi t_2 | ||
800 | daddu c_2,t_1 | ||
801 | sltu AT,c_2,t_1 | ||
802 | daddu c_3,t_2,AT | ||
803 | dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
804 | ld b_6,48(a2) | ||
805 | ld b_7,56(a2) | ||
806 | sd c_1,0(a0) /* r[0]=c1; */ | ||
807 | mflo t_1 | ||
808 | mfhi t_2 | ||
809 | daddu c_2,t_1 | ||
810 | sltu AT,c_2,t_1 | ||
811 | daddu t_2,AT | ||
812 | daddu c_3,t_2 | ||
813 | sltu c_1,c_3,t_2 | ||
814 | sd c_2,8(a0) /* r[1]=c2; */ | ||
815 | |||
816 | dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
817 | mflo t_1 | ||
818 | mfhi t_2 | ||
819 | daddu c_3,t_1 | ||
820 | sltu AT,c_3,t_1 | ||
821 | daddu t_2,AT | ||
822 | daddu c_1,t_2 | ||
823 | dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
824 | mflo t_1 | ||
825 | mfhi t_2 | ||
826 | daddu c_3,t_1 | ||
827 | sltu AT,c_3,t_1 | ||
828 | daddu t_2,AT | ||
829 | daddu c_1,t_2 | ||
830 | sltu c_2,c_1,t_2 | ||
831 | dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
832 | mflo t_1 | ||
833 | mfhi t_2 | ||
834 | daddu c_3,t_1 | ||
835 | sltu AT,c_3,t_1 | ||
836 | daddu t_2,AT | ||
837 | daddu c_1,t_2 | ||
838 | sltu AT,c_1,t_2 | ||
839 | daddu c_2,AT | ||
840 | sd c_3,16(a0) /* r[2]=c3; */ | ||
841 | |||
842 | dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
843 | mflo t_1 | ||
844 | mfhi t_2 | ||
845 | daddu c_1,t_1 | ||
846 | sltu AT,c_1,t_1 | ||
847 | daddu t_2,AT | ||
848 | daddu c_2,t_2 | ||
849 | dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
850 | mflo t_1 | ||
851 | mfhi t_2 | ||
852 | daddu c_1,t_1 | ||
853 | sltu AT,c_1,t_1 | ||
854 | daddu t_2,AT | ||
855 | daddu c_2,t_2 | ||
856 | sltu c_3,c_2,t_2 | ||
857 | dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
858 | mflo t_1 | ||
859 | mfhi t_2 | ||
860 | daddu c_1,t_1 | ||
861 | sltu AT,c_1,t_1 | ||
862 | daddu t_2,AT | ||
863 | daddu c_2,t_2 | ||
864 | sltu AT,c_2,t_2 | ||
865 | daddu c_3,AT | ||
866 | dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
867 | mflo t_1 | ||
868 | mfhi t_2 | ||
869 | daddu c_1,t_1 | ||
870 | sltu AT,c_1,t_1 | ||
871 | daddu t_2,AT | ||
872 | daddu c_2,t_2 | ||
873 | sltu AT,c_2,t_2 | ||
874 | daddu c_3,AT | ||
875 | sd c_1,24(a0) /* r[3]=c1; */ | ||
876 | |||
877 | dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ | ||
878 | mflo t_1 | ||
879 | mfhi t_2 | ||
880 | daddu c_2,t_1 | ||
881 | sltu AT,c_2,t_1 | ||
882 | daddu t_2,AT | ||
883 | daddu c_3,t_2 | ||
884 | dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
885 | mflo t_1 | ||
886 | mfhi t_2 | ||
887 | daddu c_2,t_1 | ||
888 | sltu AT,c_2,t_1 | ||
889 | daddu t_2,AT | ||
890 | daddu c_3,t_2 | ||
891 | sltu c_1,c_3,t_2 | ||
892 | dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
893 | mflo t_1 | ||
894 | mfhi t_2 | ||
895 | daddu c_2,t_1 | ||
896 | sltu AT,c_2,t_1 | ||
897 | daddu t_2,AT | ||
898 | daddu c_3,t_2 | ||
899 | sltu AT,c_3,t_2 | ||
900 | daddu c_1,AT | ||
901 | dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
902 | mflo t_1 | ||
903 | mfhi t_2 | ||
904 | daddu c_2,t_1 | ||
905 | sltu AT,c_2,t_1 | ||
906 | daddu t_2,AT | ||
907 | daddu c_3,t_2 | ||
908 | sltu AT,c_3,t_2 | ||
909 | daddu c_1,AT | ||
910 | dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ | ||
911 | mflo t_1 | ||
912 | mfhi t_2 | ||
913 | daddu c_2,t_1 | ||
914 | sltu AT,c_2,t_1 | ||
915 | daddu t_2,AT | ||
916 | daddu c_3,t_2 | ||
917 | sltu AT,c_3,t_2 | ||
918 | daddu c_1,AT | ||
919 | sd c_2,32(a0) /* r[4]=c2; */ | ||
920 | |||
921 | dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ | ||
922 | mflo t_1 | ||
923 | mfhi t_2 | ||
924 | daddu c_3,t_1 | ||
925 | sltu AT,c_3,t_1 | ||
926 | daddu t_2,AT | ||
927 | daddu c_1,t_2 | ||
928 | dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ | ||
929 | mflo t_1 | ||
930 | mfhi t_2 | ||
931 | daddu c_3,t_1 | ||
932 | sltu AT,c_3,t_1 | ||
933 | daddu t_2,AT | ||
934 | daddu c_1,t_2 | ||
935 | sltu c_2,c_1,t_2 | ||
936 | dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
937 | mflo t_1 | ||
938 | mfhi t_2 | ||
939 | daddu c_3,t_1 | ||
940 | sltu AT,c_3,t_1 | ||
941 | daddu t_2,AT | ||
942 | daddu c_1,t_2 | ||
943 | sltu AT,c_1,t_2 | ||
944 | daddu c_2,AT | ||
945 | dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
946 | mflo t_1 | ||
947 | mfhi t_2 | ||
948 | daddu c_3,t_1 | ||
949 | sltu AT,c_3,t_1 | ||
950 | daddu t_2,AT | ||
951 | daddu c_1,t_2 | ||
952 | sltu AT,c_1,t_2 | ||
953 | daddu c_2,AT | ||
954 | dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ | ||
955 | mflo t_1 | ||
956 | mfhi t_2 | ||
957 | daddu c_3,t_1 | ||
958 | sltu AT,c_3,t_1 | ||
959 | daddu t_2,AT | ||
960 | daddu c_1,t_2 | ||
961 | sltu AT,c_1,t_2 | ||
962 | daddu c_2,AT | ||
963 | dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ | ||
964 | mflo t_1 | ||
965 | mfhi t_2 | ||
966 | daddu c_3,t_1 | ||
967 | sltu AT,c_3,t_1 | ||
968 | daddu t_2,AT | ||
969 | daddu c_1,t_2 | ||
970 | sltu AT,c_1,t_2 | ||
971 | daddu c_2,AT | ||
972 | sd c_3,40(a0) /* r[5]=c3; */ | ||
973 | |||
974 | dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ | ||
975 | mflo t_1 | ||
976 | mfhi t_2 | ||
977 | daddu c_1,t_1 | ||
978 | sltu AT,c_1,t_1 | ||
979 | daddu t_2,AT | ||
980 | daddu c_2,t_2 | ||
981 | dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ | ||
982 | mflo t_1 | ||
983 | mfhi t_2 | ||
984 | daddu c_1,t_1 | ||
985 | sltu AT,c_1,t_1 | ||
986 | daddu t_2,AT | ||
987 | daddu c_2,t_2 | ||
988 | sltu c_3,c_2,t_2 | ||
989 | dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ | ||
990 | mflo t_1 | ||
991 | mfhi t_2 | ||
992 | daddu c_1,t_1 | ||
993 | sltu AT,c_1,t_1 | ||
994 | daddu t_2,AT | ||
995 | daddu c_2,t_2 | ||
996 | sltu AT,c_2,t_2 | ||
997 | daddu c_3,AT | ||
998 | dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
999 | mflo t_1 | ||
1000 | mfhi t_2 | ||
1001 | daddu c_1,t_1 | ||
1002 | sltu AT,c_1,t_1 | ||
1003 | daddu t_2,AT | ||
1004 | daddu c_2,t_2 | ||
1005 | sltu AT,c_2,t_2 | ||
1006 | daddu c_3,AT | ||
1007 | dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ | ||
1008 | mflo t_1 | ||
1009 | mfhi t_2 | ||
1010 | daddu c_1,t_1 | ||
1011 | sltu AT,c_1,t_1 | ||
1012 | daddu t_2,AT | ||
1013 | daddu c_2,t_2 | ||
1014 | sltu AT,c_2,t_2 | ||
1015 | daddu c_3,AT | ||
1016 | dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ | ||
1017 | mflo t_1 | ||
1018 | mfhi t_2 | ||
1019 | daddu c_1,t_1 | ||
1020 | sltu AT,c_1,t_1 | ||
1021 | daddu t_2,AT | ||
1022 | daddu c_2,t_2 | ||
1023 | sltu AT,c_2,t_2 | ||
1024 | daddu c_3,AT | ||
1025 | dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ | ||
1026 | mflo t_1 | ||
1027 | mfhi t_2 | ||
1028 | daddu c_1,t_1 | ||
1029 | sltu AT,c_1,t_1 | ||
1030 | daddu t_2,AT | ||
1031 | daddu c_2,t_2 | ||
1032 | sltu AT,c_2,t_2 | ||
1033 | daddu c_3,AT | ||
1034 | sd c_1,48(a0) /* r[6]=c1; */ | ||
1035 | |||
1036 | dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ | ||
1037 | mflo t_1 | ||
1038 | mfhi t_2 | ||
1039 | daddu c_2,t_1 | ||
1040 | sltu AT,c_2,t_1 | ||
1041 | daddu t_2,AT | ||
1042 | daddu c_3,t_2 | ||
1043 | dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ | ||
1044 | mflo t_1 | ||
1045 | mfhi t_2 | ||
1046 | daddu c_2,t_1 | ||
1047 | sltu AT,c_2,t_1 | ||
1048 | daddu t_2,AT | ||
1049 | daddu c_3,t_2 | ||
1050 | sltu c_1,c_3,t_2 | ||
1051 | dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ | ||
1052 | mflo t_1 | ||
1053 | mfhi t_2 | ||
1054 | daddu c_2,t_1 | ||
1055 | sltu AT,c_2,t_1 | ||
1056 | daddu t_2,AT | ||
1057 | daddu c_3,t_2 | ||
1058 | sltu AT,c_3,t_2 | ||
1059 | daddu c_1,AT | ||
1060 | dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ | ||
1061 | mflo t_1 | ||
1062 | mfhi t_2 | ||
1063 | daddu c_2,t_1 | ||
1064 | sltu AT,c_2,t_1 | ||
1065 | daddu t_2,AT | ||
1066 | daddu c_3,t_2 | ||
1067 | sltu AT,c_3,t_2 | ||
1068 | daddu c_1,AT | ||
1069 | dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ | ||
1070 | mflo t_1 | ||
1071 | mfhi t_2 | ||
1072 | daddu c_2,t_1 | ||
1073 | sltu AT,c_2,t_1 | ||
1074 | daddu t_2,AT | ||
1075 | daddu c_3,t_2 | ||
1076 | sltu AT,c_3,t_2 | ||
1077 | daddu c_1,AT | ||
1078 | dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ | ||
1079 | mflo t_1 | ||
1080 | mfhi t_2 | ||
1081 | daddu c_2,t_1 | ||
1082 | sltu AT,c_2,t_1 | ||
1083 | daddu t_2,AT | ||
1084 | daddu c_3,t_2 | ||
1085 | sltu AT,c_3,t_2 | ||
1086 | daddu c_1,AT | ||
1087 | dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ | ||
1088 | mflo t_1 | ||
1089 | mfhi t_2 | ||
1090 | daddu c_2,t_1 | ||
1091 | sltu AT,c_2,t_1 | ||
1092 | daddu t_2,AT | ||
1093 | daddu c_3,t_2 | ||
1094 | sltu AT,c_3,t_2 | ||
1095 | daddu c_1,AT | ||
1096 | dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ | ||
1097 | mflo t_1 | ||
1098 | mfhi t_2 | ||
1099 | daddu c_2,t_1 | ||
1100 | sltu AT,c_2,t_1 | ||
1101 | daddu t_2,AT | ||
1102 | daddu c_3,t_2 | ||
1103 | sltu AT,c_3,t_2 | ||
1104 | daddu c_1,AT | ||
1105 | sd c_2,56(a0) /* r[7]=c2; */ | ||
1106 | |||
1107 | dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ | ||
1108 | mflo t_1 | ||
1109 | mfhi t_2 | ||
1110 | daddu c_3,t_1 | ||
1111 | sltu AT,c_3,t_1 | ||
1112 | daddu t_2,AT | ||
1113 | daddu c_1,t_2 | ||
1114 | dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ | ||
1115 | mflo t_1 | ||
1116 | mfhi t_2 | ||
1117 | daddu c_3,t_1 | ||
1118 | sltu AT,c_3,t_1 | ||
1119 | daddu t_2,AT | ||
1120 | daddu c_1,t_2 | ||
1121 | sltu c_2,c_1,t_2 | ||
1122 | dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ | ||
1123 | mflo t_1 | ||
1124 | mfhi t_2 | ||
1125 | daddu c_3,t_1 | ||
1126 | sltu AT,c_3,t_1 | ||
1127 | daddu t_2,AT | ||
1128 | daddu c_1,t_2 | ||
1129 | sltu AT,c_1,t_2 | ||
1130 | daddu c_2,AT | ||
1131 | dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1132 | mflo t_1 | ||
1133 | mfhi t_2 | ||
1134 | daddu c_3,t_1 | ||
1135 | sltu AT,c_3,t_1 | ||
1136 | daddu t_2,AT | ||
1137 | daddu c_1,t_2 | ||
1138 | sltu AT,c_1,t_2 | ||
1139 | daddu c_2,AT | ||
1140 | dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ | ||
1141 | mflo t_1 | ||
1142 | mfhi t_2 | ||
1143 | daddu c_3,t_1 | ||
1144 | sltu AT,c_3,t_1 | ||
1145 | daddu t_2,AT | ||
1146 | daddu c_1,t_2 | ||
1147 | sltu AT,c_1,t_2 | ||
1148 | daddu c_2,AT | ||
1149 | dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ | ||
1150 | mflo t_1 | ||
1151 | mfhi t_2 | ||
1152 | daddu c_3,t_1 | ||
1153 | sltu AT,c_3,t_1 | ||
1154 | daddu t_2,AT | ||
1155 | daddu c_1,t_2 | ||
1156 | sltu AT,c_1,t_2 | ||
1157 | daddu c_2,AT | ||
1158 | dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ | ||
1159 | mflo t_1 | ||
1160 | mfhi t_2 | ||
1161 | daddu c_3,t_1 | ||
1162 | sltu AT,c_3,t_1 | ||
1163 | daddu t_2,AT | ||
1164 | daddu c_1,t_2 | ||
1165 | sltu AT,c_1,t_2 | ||
1166 | daddu c_2,AT | ||
1167 | sd c_3,64(a0) /* r[8]=c3; */ | ||
1168 | |||
1169 | dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ | ||
1170 | mflo t_1 | ||
1171 | mfhi t_2 | ||
1172 | daddu c_1,t_1 | ||
1173 | sltu AT,c_1,t_1 | ||
1174 | daddu t_2,AT | ||
1175 | daddu c_2,t_2 | ||
1176 | dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ | ||
1177 | mflo t_1 | ||
1178 | mfhi t_2 | ||
1179 | daddu c_1,t_1 | ||
1180 | sltu AT,c_1,t_1 | ||
1181 | daddu t_2,AT | ||
1182 | daddu c_2,t_2 | ||
1183 | sltu c_3,c_2,t_2 | ||
1184 | dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ | ||
1185 | mflo t_1 | ||
1186 | mfhi t_2 | ||
1187 | daddu c_1,t_1 | ||
1188 | sltu AT,c_1,t_1 | ||
1189 | daddu t_2,AT | ||
1190 | daddu c_2,t_2 | ||
1191 | sltu AT,c_2,t_2 | ||
1192 | daddu c_3,AT | ||
1193 | dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ | ||
1194 | mflo t_1 | ||
1195 | mfhi t_2 | ||
1196 | daddu c_1,t_1 | ||
1197 | sltu AT,c_1,t_1 | ||
1198 | daddu t_2,AT | ||
1199 | daddu c_2,t_2 | ||
1200 | sltu AT,c_2,t_2 | ||
1201 | daddu c_3,AT | ||
1202 | dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ | ||
1203 | mflo t_1 | ||
1204 | mfhi t_2 | ||
1205 | daddu c_1,t_1 | ||
1206 | sltu AT,c_1,t_1 | ||
1207 | daddu t_2,AT | ||
1208 | daddu c_2,t_2 | ||
1209 | sltu AT,c_2,t_2 | ||
1210 | daddu c_3,AT | ||
1211 | dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ | ||
1212 | mflo t_1 | ||
1213 | mfhi t_2 | ||
1214 | daddu c_1,t_1 | ||
1215 | sltu AT,c_1,t_1 | ||
1216 | daddu t_2,AT | ||
1217 | daddu c_2,t_2 | ||
1218 | sltu AT,c_2,t_2 | ||
1219 | daddu c_3,AT | ||
1220 | sd c_1,72(a0) /* r[9]=c1; */ | ||
1221 | |||
1222 | dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ | ||
1223 | mflo t_1 | ||
1224 | mfhi t_2 | ||
1225 | daddu c_2,t_1 | ||
1226 | sltu AT,c_2,t_1 | ||
1227 | daddu t_2,AT | ||
1228 | daddu c_3,t_2 | ||
1229 | dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ | ||
1230 | mflo t_1 | ||
1231 | mfhi t_2 | ||
1232 | daddu c_2,t_1 | ||
1233 | sltu AT,c_2,t_1 | ||
1234 | daddu t_2,AT | ||
1235 | daddu c_3,t_2 | ||
1236 | sltu c_1,c_3,t_2 | ||
1237 | dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1238 | mflo t_1 | ||
1239 | mfhi t_2 | ||
1240 | daddu c_2,t_1 | ||
1241 | sltu AT,c_2,t_1 | ||
1242 | daddu t_2,AT | ||
1243 | daddu c_3,t_2 | ||
1244 | sltu AT,c_3,t_2 | ||
1245 | daddu c_1,AT | ||
1246 | dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ | ||
1247 | mflo t_1 | ||
1248 | mfhi t_2 | ||
1249 | daddu c_2,t_1 | ||
1250 | sltu AT,c_2,t_1 | ||
1251 | daddu t_2,AT | ||
1252 | daddu c_3,t_2 | ||
1253 | sltu AT,c_3,t_2 | ||
1254 | daddu c_1,AT | ||
1255 | dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ | ||
1256 | mflo t_1 | ||
1257 | mfhi t_2 | ||
1258 | daddu c_2,t_1 | ||
1259 | sltu AT,c_2,t_1 | ||
1260 | daddu t_2,AT | ||
1261 | daddu c_3,t_2 | ||
1262 | sltu AT,c_3,t_2 | ||
1263 | daddu c_1,AT | ||
1264 | sd c_2,80(a0) /* r[10]=c2; */ | ||
1265 | |||
1266 | dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ | ||
1267 | mflo t_1 | ||
1268 | mfhi t_2 | ||
1269 | daddu c_3,t_1 | ||
1270 | sltu AT,c_3,t_1 | ||
1271 | daddu t_2,AT | ||
1272 | daddu c_1,t_2 | ||
1273 | dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ | ||
1274 | mflo t_1 | ||
1275 | mfhi t_2 | ||
1276 | daddu c_3,t_1 | ||
1277 | sltu AT,c_3,t_1 | ||
1278 | daddu t_2,AT | ||
1279 | daddu c_1,t_2 | ||
1280 | sltu c_2,c_1,t_2 | ||
1281 | dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ | ||
1282 | mflo t_1 | ||
1283 | mfhi t_2 | ||
1284 | daddu c_3,t_1 | ||
1285 | sltu AT,c_3,t_1 | ||
1286 | daddu t_2,AT | ||
1287 | daddu c_1,t_2 | ||
1288 | sltu AT,c_1,t_2 | ||
1289 | daddu c_2,AT | ||
1290 | dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ | ||
1291 | mflo t_1 | ||
1292 | mfhi t_2 | ||
1293 | daddu c_3,t_1 | ||
1294 | sltu AT,c_3,t_1 | ||
1295 | daddu t_2,AT | ||
1296 | daddu c_1,t_2 | ||
1297 | sltu AT,c_1,t_2 | ||
1298 | daddu c_2,AT | ||
1299 | sd c_3,88(a0) /* r[11]=c3; */ | ||
1300 | |||
1301 | dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ | ||
1302 | mflo t_1 | ||
1303 | mfhi t_2 | ||
1304 | daddu c_1,t_1 | ||
1305 | sltu AT,c_1,t_1 | ||
1306 | daddu t_2,AT | ||
1307 | daddu c_2,t_2 | ||
1308 | dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
1309 | mflo t_1 | ||
1310 | mfhi t_2 | ||
1311 | daddu c_1,t_1 | ||
1312 | sltu AT,c_1,t_1 | ||
1313 | daddu t_2,AT | ||
1314 | daddu c_2,t_2 | ||
1315 | sltu c_3,c_2,t_2 | ||
1316 | dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ | ||
1317 | mflo t_1 | ||
1318 | mfhi t_2 | ||
1319 | daddu c_1,t_1 | ||
1320 | sltu AT,c_1,t_1 | ||
1321 | daddu t_2,AT | ||
1322 | daddu c_2,t_2 | ||
1323 | sltu AT,c_2,t_2 | ||
1324 | daddu c_3,AT | ||
1325 | sd c_1,96(a0) /* r[12]=c1; */ | ||
1326 | |||
1327 | dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ | ||
1328 | mflo t_1 | ||
1329 | mfhi t_2 | ||
1330 | daddu c_2,t_1 | ||
1331 | sltu AT,c_2,t_1 | ||
1332 | daddu t_2,AT | ||
1333 | daddu c_3,t_2 | ||
1334 | dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ | ||
1335 | mflo t_1 | ||
1336 | mfhi t_2 | ||
1337 | daddu c_2,t_1 | ||
1338 | sltu AT,c_2,t_1 | ||
1339 | daddu t_2,AT | ||
1340 | daddu c_3,t_2 | ||
1341 | sltu c_1,c_3,t_2 | ||
1342 | sd c_2,104(a0) /* r[13]=c2; */ | ||
1343 | |||
1344 | dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
1345 | ld s0,0(sp) | ||
1346 | ld s1,8(sp) | ||
1347 | ld s2,16(sp) | ||
1348 | ld s3,24(sp) | ||
1349 | ld s4,32(sp) | ||
1350 | ld s5,40(sp) | ||
1351 | mflo t_1 | ||
1352 | mfhi t_2 | ||
1353 | daddu c_3,t_1 | ||
1354 | sltu AT,c_3,t_1 | ||
1355 | daddu t_2,AT | ||
1356 | daddu c_1,t_2 | ||
1357 | sd c_3,112(a0) /* r[14]=c3; */ | ||
1358 | sd c_1,120(a0) /* r[15]=c1; */ | ||
1359 | |||
1360 | PTR_ADD sp,FRAME_SIZE | ||
1361 | |||
1362 | jr ra | ||
1363 | END(bn_mul_comba8) | ||
1364 | |||
1365 | .align 5 | ||
1366 | LEAF(bn_mul_comba4) | ||
1367 | .set reorder | ||
1368 | ld a_0,0(a1) | ||
1369 | ld b_0,0(a2) | ||
1370 | ld a_1,8(a1) | ||
1371 | ld a_2,16(a1) | ||
1372 | dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1373 | ld a_3,24(a1) | ||
1374 | ld b_1,8(a2) | ||
1375 | ld b_2,16(a2) | ||
1376 | ld b_3,24(a2) | ||
1377 | mflo c_1 | ||
1378 | mfhi c_2 | ||
1379 | sd c_1,0(a0) | ||
1380 | |||
1381 | dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
1382 | mflo t_1 | ||
1383 | mfhi t_2 | ||
1384 | daddu c_2,t_1 | ||
1385 | sltu AT,c_2,t_1 | ||
1386 | daddu c_3,t_2,AT | ||
1387 | dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
1388 | mflo t_1 | ||
1389 | mfhi t_2 | ||
1390 | daddu c_2,t_1 | ||
1391 | sltu AT,c_2,t_1 | ||
1392 | daddu t_2,AT | ||
1393 | daddu c_3,t_2 | ||
1394 | sltu c_1,c_3,t_2 | ||
1395 | sd c_2,8(a0) | ||
1396 | |||
1397 | dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
1398 | mflo t_1 | ||
1399 | mfhi t_2 | ||
1400 | daddu c_3,t_1 | ||
1401 | sltu AT,c_3,t_1 | ||
1402 | daddu t_2,AT | ||
1403 | daddu c_1,t_2 | ||
1404 | dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1405 | mflo t_1 | ||
1406 | mfhi t_2 | ||
1407 | daddu c_3,t_1 | ||
1408 | sltu AT,c_3,t_1 | ||
1409 | daddu t_2,AT | ||
1410 | daddu c_1,t_2 | ||
1411 | sltu c_2,c_1,t_2 | ||
1412 | dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
1413 | mflo t_1 | ||
1414 | mfhi t_2 | ||
1415 | daddu c_3,t_1 | ||
1416 | sltu AT,c_3,t_1 | ||
1417 | daddu t_2,AT | ||
1418 | daddu c_1,t_2 | ||
1419 | sltu AT,c_1,t_2 | ||
1420 | daddu c_2,AT | ||
1421 | sd c_3,16(a0) | ||
1422 | |||
1423 | dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
1424 | mflo t_1 | ||
1425 | mfhi t_2 | ||
1426 | daddu c_1,t_1 | ||
1427 | sltu AT,c_1,t_1 | ||
1428 | daddu t_2,AT | ||
1429 | daddu c_2,t_2 | ||
1430 | dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
1431 | mflo t_1 | ||
1432 | mfhi t_2 | ||
1433 | daddu c_1,t_1 | ||
1434 | sltu AT,c_1,t_1 | ||
1435 | daddu t_2,AT | ||
1436 | daddu c_2,t_2 | ||
1437 | sltu c_3,c_2,t_2 | ||
1438 | dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
1439 | mflo t_1 | ||
1440 | mfhi t_2 | ||
1441 | daddu c_1,t_1 | ||
1442 | sltu AT,c_1,t_1 | ||
1443 | daddu t_2,AT | ||
1444 | daddu c_2,t_2 | ||
1445 | sltu AT,c_2,t_2 | ||
1446 | daddu c_3,AT | ||
1447 | dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
1448 | mflo t_1 | ||
1449 | mfhi t_2 | ||
1450 | daddu c_1,t_1 | ||
1451 | sltu AT,c_1,t_1 | ||
1452 | daddu t_2,AT | ||
1453 | daddu c_2,t_2 | ||
1454 | sltu AT,c_2,t_2 | ||
1455 | daddu c_3,AT | ||
1456 | sd c_1,24(a0) | ||
1457 | |||
1458 | dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
1459 | mflo t_1 | ||
1460 | mfhi t_2 | ||
1461 | daddu c_2,t_1 | ||
1462 | sltu AT,c_2,t_1 | ||
1463 | daddu t_2,AT | ||
1464 | daddu c_3,t_2 | ||
1465 | dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1466 | mflo t_1 | ||
1467 | mfhi t_2 | ||
1468 | daddu c_2,t_1 | ||
1469 | sltu AT,c_2,t_1 | ||
1470 | daddu t_2,AT | ||
1471 | daddu c_3,t_2 | ||
1472 | sltu c_1,c_3,t_2 | ||
1473 | dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
1474 | mflo t_1 | ||
1475 | mfhi t_2 | ||
1476 | daddu c_2,t_1 | ||
1477 | sltu AT,c_2,t_1 | ||
1478 | daddu t_2,AT | ||
1479 | daddu c_3,t_2 | ||
1480 | sltu AT,c_3,t_2 | ||
1481 | daddu c_1,AT | ||
1482 | sd c_2,32(a0) | ||
1483 | |||
1484 | dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
1485 | mflo t_1 | ||
1486 | mfhi t_2 | ||
1487 | daddu c_3,t_1 | ||
1488 | sltu AT,c_3,t_1 | ||
1489 | daddu t_2,AT | ||
1490 | daddu c_1,t_2 | ||
1491 | dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
1492 | mflo t_1 | ||
1493 | mfhi t_2 | ||
1494 | daddu c_3,t_1 | ||
1495 | sltu AT,c_3,t_1 | ||
1496 | daddu t_2,AT | ||
1497 | daddu c_1,t_2 | ||
1498 | sltu c_2,c_1,t_2 | ||
1499 | sd c_3,40(a0) | ||
1500 | |||
1501 | dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1502 | mflo t_1 | ||
1503 | mfhi t_2 | ||
1504 | daddu c_1,t_1 | ||
1505 | sltu AT,c_1,t_1 | ||
1506 | daddu t_2,AT | ||
1507 | daddu c_2,t_2 | ||
1508 | sd c_1,48(a0) | ||
1509 | sd c_2,56(a0) | ||
1510 | |||
1511 | jr ra | ||
1512 | END(bn_mul_comba4) | ||
1513 | |||
1514 | #undef a_4 | ||
1515 | #undef a_5 | ||
1516 | #undef a_6 | ||
1517 | #undef a_7 | ||
1518 | #define a_4 b_0 | ||
1519 | #define a_5 b_1 | ||
1520 | #define a_6 b_2 | ||
1521 | #define a_7 b_3 | ||
1522 | |||
1523 | .align 5 | ||
1524 | LEAF(bn_sqr_comba8) | ||
1525 | .set reorder | ||
1526 | ld a_0,0(a1) | ||
1527 | ld a_1,8(a1) | ||
1528 | ld a_2,16(a1) | ||
1529 | ld a_3,24(a1) | ||
1530 | |||
1531 | dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1532 | ld a_4,32(a1) | ||
1533 | ld a_5,40(a1) | ||
1534 | ld a_6,48(a1) | ||
1535 | ld a_7,56(a1) | ||
1536 | mflo c_1 | ||
1537 | mfhi c_2 | ||
1538 | sd c_1,0(a0) | ||
1539 | |||
1540 | dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
1541 | mflo t_1 | ||
1542 | mfhi t_2 | ||
1543 | daddu c_2,t_1 | ||
1544 | sltu AT,c_2,t_1 | ||
1545 | daddu c_3,t_2,AT | ||
1546 | daddu c_2,t_1 | ||
1547 | sltu AT,c_2,t_1 | ||
1548 | daddu t_2,AT | ||
1549 | daddu c_3,t_2 | ||
1550 | sltu c_1,c_3,t_2 | ||
1551 | sd c_2,8(a0) | ||
1552 | |||
1553 | dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
1554 | mflo t_1 | ||
1555 | mfhi t_2 | ||
1556 | daddu c_3,t_1 | ||
1557 | sltu AT,c_3,t_1 | ||
1558 | daddu a2,t_2,AT | ||
1559 | daddu c_1,a2 | ||
1560 | daddu c_3,t_1 | ||
1561 | sltu AT,c_3,t_1 | ||
1562 | daddu t_2,AT | ||
1563 | daddu c_1,t_2 | ||
1564 | sltu c_2,c_1,t_2 | ||
1565 | dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1566 | mflo t_1 | ||
1567 | mfhi t_2 | ||
1568 | daddu c_3,t_1 | ||
1569 | sltu AT,c_3,t_1 | ||
1570 | daddu t_2,AT | ||
1571 | daddu c_1,t_2 | ||
1572 | sltu AT,c_1,t_2 | ||
1573 | daddu c_2,AT | ||
1574 | sd c_3,16(a0) | ||
1575 | |||
1576 | dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
1577 | mflo t_1 | ||
1578 | mfhi t_2 | ||
1579 | daddu c_1,t_1 | ||
1580 | sltu AT,c_1,t_1 | ||
1581 | daddu a2,t_2,AT | ||
1582 | daddu c_2,a2 | ||
1583 | daddu c_1,t_1 | ||
1584 | sltu AT,c_1,t_1 | ||
1585 | daddu t_2,AT | ||
1586 | daddu c_2,t_2 | ||
1587 | sltu c_3,c_2,t_2 | ||
1588 | dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ | ||
1589 | mflo t_1 | ||
1590 | mfhi t_2 | ||
1591 | daddu c_1,t_1 | ||
1592 | sltu AT,c_1,t_1 | ||
1593 | daddu a2,t_2,AT | ||
1594 | daddu c_2,a2 | ||
1595 | sltu AT,c_2,a2 | ||
1596 | daddu c_3,AT | ||
1597 | daddu c_1,t_1 | ||
1598 | sltu AT,c_1,t_1 | ||
1599 | daddu t_2,AT | ||
1600 | daddu c_2,t_2 | ||
1601 | sltu AT,c_2,t_2 | ||
1602 | daddu c_3,AT | ||
1603 | sd c_1,24(a0) | ||
1604 | |||
1605 | dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ | ||
1606 | mflo t_1 | ||
1607 | mfhi t_2 | ||
1608 | daddu c_2,t_1 | ||
1609 | sltu AT,c_2,t_1 | ||
1610 | daddu a2,t_2,AT | ||
1611 | daddu c_3,a2 | ||
1612 | daddu c_2,t_1 | ||
1613 | sltu AT,c_2,t_1 | ||
1614 | daddu t_2,AT | ||
1615 | daddu c_3,t_2 | ||
1616 | sltu c_1,c_3,t_2 | ||
1617 | dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
1618 | mflo t_1 | ||
1619 | mfhi t_2 | ||
1620 | daddu c_2,t_1 | ||
1621 | sltu AT,c_2,t_1 | ||
1622 | daddu a2,t_2,AT | ||
1623 | daddu c_3,a2 | ||
1624 | sltu AT,c_3,a2 | ||
1625 | daddu c_1,AT | ||
1626 | daddu c_2,t_1 | ||
1627 | sltu AT,c_2,t_1 | ||
1628 | daddu t_2,AT | ||
1629 | daddu c_3,t_2 | ||
1630 | sltu AT,c_3,t_2 | ||
1631 | daddu c_1,AT | ||
1632 | dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1633 | mflo t_1 | ||
1634 | mfhi t_2 | ||
1635 | daddu c_2,t_1 | ||
1636 | sltu AT,c_2,t_1 | ||
1637 | daddu t_2,AT | ||
1638 | daddu c_3,t_2 | ||
1639 | sltu AT,c_3,t_2 | ||
1640 | daddu c_1,AT | ||
1641 | sd c_2,32(a0) | ||
1642 | |||
1643 | dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ | ||
1644 | mflo t_1 | ||
1645 | mfhi t_2 | ||
1646 | daddu c_3,t_1 | ||
1647 | sltu AT,c_3,t_1 | ||
1648 | daddu a2,t_2,AT | ||
1649 | daddu c_1,a2 | ||
1650 | daddu c_3,t_1 | ||
1651 | sltu AT,c_3,t_1 | ||
1652 | daddu t_2,AT | ||
1653 | daddu c_1,t_2 | ||
1654 | sltu c_2,c_1,t_2 | ||
1655 | dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ | ||
1656 | mflo t_1 | ||
1657 | mfhi t_2 | ||
1658 | daddu c_3,t_1 | ||
1659 | sltu AT,c_3,t_1 | ||
1660 | daddu a2,t_2,AT | ||
1661 | daddu c_1,a2 | ||
1662 | sltu AT,c_1,a2 | ||
1663 | daddu c_2,AT | ||
1664 | daddu c_3,t_1 | ||
1665 | sltu AT,c_3,t_1 | ||
1666 | daddu t_2,AT | ||
1667 | daddu c_1,t_2 | ||
1668 | sltu AT,c_1,t_2 | ||
1669 | daddu c_2,AT | ||
1670 | dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
1671 | mflo t_1 | ||
1672 | mfhi t_2 | ||
1673 | daddu c_3,t_1 | ||
1674 | sltu AT,c_3,t_1 | ||
1675 | daddu a2,t_2,AT | ||
1676 | daddu c_1,a2 | ||
1677 | sltu AT,c_1,a2 | ||
1678 | daddu c_2,AT | ||
1679 | daddu c_3,t_1 | ||
1680 | sltu AT,c_3,t_1 | ||
1681 | daddu t_2,AT | ||
1682 | daddu c_1,t_2 | ||
1683 | sltu AT,c_1,t_2 | ||
1684 | daddu c_2,AT | ||
1685 | sd c_3,40(a0) | ||
1686 | |||
1687 | dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ | ||
1688 | mflo t_1 | ||
1689 | mfhi t_2 | ||
1690 | daddu c_1,t_1 | ||
1691 | sltu AT,c_1,t_1 | ||
1692 | daddu a2,t_2,AT | ||
1693 | daddu c_2,a2 | ||
1694 | daddu c_1,t_1 | ||
1695 | sltu AT,c_1,t_1 | ||
1696 | daddu t_2,AT | ||
1697 | daddu c_2,t_2 | ||
1698 | sltu c_3,c_2,t_2 | ||
1699 | dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ | ||
1700 | mflo t_1 | ||
1701 | mfhi t_2 | ||
1702 | daddu c_1,t_1 | ||
1703 | sltu AT,c_1,t_1 | ||
1704 | daddu a2,t_2,AT | ||
1705 | daddu c_2,a2 | ||
1706 | sltu AT,c_2,a2 | ||
1707 | daddu c_3,AT | ||
1708 | daddu c_1,t_1 | ||
1709 | sltu AT,c_1,t_1 | ||
1710 | daddu t_2,AT | ||
1711 | daddu c_2,t_2 | ||
1712 | sltu AT,c_2,t_2 | ||
1713 | daddu c_3,AT | ||
1714 | dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ | ||
1715 | mflo t_1 | ||
1716 | mfhi t_2 | ||
1717 | daddu c_1,t_1 | ||
1718 | sltu AT,c_1,t_1 | ||
1719 | daddu a2,t_2,AT | ||
1720 | daddu c_2,a2 | ||
1721 | sltu AT,c_2,a2 | ||
1722 | daddu c_3,AT | ||
1723 | daddu c_1,t_1 | ||
1724 | sltu AT,c_1,t_1 | ||
1725 | daddu t_2,AT | ||
1726 | daddu c_2,t_2 | ||
1727 | sltu AT,c_2,t_2 | ||
1728 | daddu c_3,AT | ||
1729 | dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1730 | mflo t_1 | ||
1731 | mfhi t_2 | ||
1732 | daddu c_1,t_1 | ||
1733 | sltu AT,c_1,t_1 | ||
1734 | daddu t_2,AT | ||
1735 | daddu c_2,t_2 | ||
1736 | sltu AT,c_2,t_2 | ||
1737 | daddu c_3,AT | ||
1738 | sd c_1,48(a0) | ||
1739 | |||
1740 | dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ | ||
1741 | mflo t_1 | ||
1742 | mfhi t_2 | ||
1743 | daddu c_2,t_1 | ||
1744 | sltu AT,c_2,t_1 | ||
1745 | daddu a2,t_2,AT | ||
1746 | daddu c_3,a2 | ||
1747 | daddu c_2,t_1 | ||
1748 | sltu AT,c_2,t_1 | ||
1749 | daddu t_2,AT | ||
1750 | daddu c_3,t_2 | ||
1751 | sltu c_1,c_3,t_2 | ||
1752 | dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ | ||
1753 | mflo t_1 | ||
1754 | mfhi t_2 | ||
1755 | daddu c_2,t_1 | ||
1756 | sltu AT,c_2,t_1 | ||
1757 | daddu a2,t_2,AT | ||
1758 | daddu c_3,a2 | ||
1759 | sltu AT,c_3,a2 | ||
1760 | daddu c_1,AT | ||
1761 | daddu c_2,t_1 | ||
1762 | sltu AT,c_2,t_1 | ||
1763 | daddu t_2,AT | ||
1764 | daddu c_3,t_2 | ||
1765 | sltu AT,c_3,t_2 | ||
1766 | daddu c_1,AT | ||
1767 | dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ | ||
1768 | mflo t_1 | ||
1769 | mfhi t_2 | ||
1770 | daddu c_2,t_1 | ||
1771 | sltu AT,c_2,t_1 | ||
1772 | daddu a2,t_2,AT | ||
1773 | daddu c_3,a2 | ||
1774 | sltu AT,c_3,a2 | ||
1775 | daddu c_1,AT | ||
1776 | daddu c_2,t_1 | ||
1777 | sltu AT,c_2,t_1 | ||
1778 | daddu t_2,AT | ||
1779 | daddu c_3,t_2 | ||
1780 | sltu AT,c_3,t_2 | ||
1781 | daddu c_1,AT | ||
1782 | dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ | ||
1783 | mflo t_1 | ||
1784 | mfhi t_2 | ||
1785 | daddu c_2,t_1 | ||
1786 | sltu AT,c_2,t_1 | ||
1787 | daddu a2,t_2,AT | ||
1788 | daddu c_3,a2 | ||
1789 | sltu AT,c_3,a2 | ||
1790 | daddu c_1,AT | ||
1791 | daddu c_2,t_1 | ||
1792 | sltu AT,c_2,t_1 | ||
1793 | daddu t_2,AT | ||
1794 | daddu c_3,t_2 | ||
1795 | sltu AT,c_3,t_2 | ||
1796 | daddu c_1,AT | ||
1797 | sd c_2,56(a0) | ||
1798 | |||
1799 | dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ | ||
1800 | mflo t_1 | ||
1801 | mfhi t_2 | ||
1802 | daddu c_3,t_1 | ||
1803 | sltu AT,c_3,t_1 | ||
1804 | daddu a2,t_2,AT | ||
1805 | daddu c_1,a2 | ||
1806 | daddu c_3,t_1 | ||
1807 | sltu AT,c_3,t_1 | ||
1808 | daddu t_2,AT | ||
1809 | daddu c_1,t_2 | ||
1810 | sltu c_2,c_1,t_2 | ||
1811 | dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ | ||
1812 | mflo t_1 | ||
1813 | mfhi t_2 | ||
1814 | daddu c_3,t_1 | ||
1815 | sltu AT,c_3,t_1 | ||
1816 | daddu a2,t_2,AT | ||
1817 | daddu c_1,a2 | ||
1818 | sltu AT,c_1,a2 | ||
1819 | daddu c_2,AT | ||
1820 | daddu c_3,t_1 | ||
1821 | sltu AT,c_3,t_1 | ||
1822 | daddu t_2,AT | ||
1823 | daddu c_1,t_2 | ||
1824 | sltu AT,c_1,t_2 | ||
1825 | daddu c_2,AT | ||
1826 | dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ | ||
1827 | mflo t_1 | ||
1828 | mfhi t_2 | ||
1829 | daddu c_3,t_1 | ||
1830 | sltu AT,c_3,t_1 | ||
1831 | daddu a2,t_2,AT | ||
1832 | daddu c_1,a2 | ||
1833 | sltu AT,c_1,a2 | ||
1834 | daddu c_2,AT | ||
1835 | daddu c_3,t_1 | ||
1836 | sltu AT,c_3,t_1 | ||
1837 | daddu t_2,AT | ||
1838 | daddu c_1,t_2 | ||
1839 | sltu AT,c_1,t_2 | ||
1840 | daddu c_2,AT | ||
1841 | dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1842 | mflo t_1 | ||
1843 | mfhi t_2 | ||
1844 | daddu c_3,t_1 | ||
1845 | sltu AT,c_3,t_1 | ||
1846 | daddu t_2,AT | ||
1847 | daddu c_1,t_2 | ||
1848 | sltu AT,c_1,t_2 | ||
1849 | daddu c_2,AT | ||
1850 | sd c_3,64(a0) | ||
1851 | |||
1852 | dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ | ||
1853 | mflo t_1 | ||
1854 | mfhi t_2 | ||
1855 | daddu c_1,t_1 | ||
1856 | sltu AT,c_1,t_1 | ||
1857 | daddu a2,t_2,AT | ||
1858 | daddu c_2,a2 | ||
1859 | daddu c_1,t_1 | ||
1860 | sltu AT,c_1,t_1 | ||
1861 | daddu t_2,AT | ||
1862 | daddu c_2,t_2 | ||
1863 | sltu c_3,c_2,t_2 | ||
1864 | dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ | ||
1865 | mflo t_1 | ||
1866 | mfhi t_2 | ||
1867 | daddu c_1,t_1 | ||
1868 | sltu AT,c_1,t_1 | ||
1869 | daddu a2,t_2,AT | ||
1870 | daddu c_2,a2 | ||
1871 | sltu AT,c_2,a2 | ||
1872 | daddu c_3,AT | ||
1873 | daddu c_1,t_1 | ||
1874 | sltu AT,c_1,t_1 | ||
1875 | daddu t_2,AT | ||
1876 | daddu c_2,t_2 | ||
1877 | sltu AT,c_2,t_2 | ||
1878 | daddu c_3,AT | ||
1879 | dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ | ||
1880 | mflo t_1 | ||
1881 | mfhi t_2 | ||
1882 | daddu c_1,t_1 | ||
1883 | sltu AT,c_1,t_1 | ||
1884 | daddu a2,t_2,AT | ||
1885 | daddu c_2,a2 | ||
1886 | sltu AT,c_2,a2 | ||
1887 | daddu c_3,AT | ||
1888 | daddu c_1,t_1 | ||
1889 | sltu AT,c_1,t_1 | ||
1890 | daddu t_2,AT | ||
1891 | daddu c_2,t_2 | ||
1892 | sltu AT,c_2,t_2 | ||
1893 | daddu c_3,AT | ||
1894 | sd c_1,72(a0) | ||
1895 | |||
1896 | dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ | ||
1897 | mflo t_1 | ||
1898 | mfhi t_2 | ||
1899 | daddu c_2,t_1 | ||
1900 | sltu AT,c_2,t_1 | ||
1901 | daddu a2,t_2,AT | ||
1902 | daddu c_3,a2 | ||
1903 | daddu c_2,t_1 | ||
1904 | sltu AT,c_2,t_1 | ||
1905 | daddu t_2,AT | ||
1906 | daddu c_3,t_2 | ||
1907 | sltu c_1,c_3,t_2 | ||
1908 | dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ | ||
1909 | mflo t_1 | ||
1910 | mfhi t_2 | ||
1911 | daddu c_2,t_1 | ||
1912 | sltu AT,c_2,t_1 | ||
1913 | daddu a2,t_2,AT | ||
1914 | daddu c_3,a2 | ||
1915 | sltu AT,c_3,a2 | ||
1916 | daddu c_1,AT | ||
1917 | daddu c_2,t_1 | ||
1918 | sltu AT,c_2,t_1 | ||
1919 | daddu t_2,AT | ||
1920 | daddu c_3,t_2 | ||
1921 | sltu AT,c_3,t_2 | ||
1922 | daddu c_1,AT | ||
1923 | dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1924 | mflo t_1 | ||
1925 | mfhi t_2 | ||
1926 | daddu c_2,t_1 | ||
1927 | sltu AT,c_2,t_1 | ||
1928 | daddu t_2,AT | ||
1929 | daddu c_3,t_2 | ||
1930 | sltu AT,c_3,t_2 | ||
1931 | daddu c_1,AT | ||
1932 | sd c_2,80(a0) | ||
1933 | |||
1934 | dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ | ||
1935 | mflo t_1 | ||
1936 | mfhi t_2 | ||
1937 | daddu c_3,t_1 | ||
1938 | sltu AT,c_3,t_1 | ||
1939 | daddu a2,t_2,AT | ||
1940 | daddu c_1,a2 | ||
1941 | daddu c_3,t_1 | ||
1942 | sltu AT,c_3,t_1 | ||
1943 | daddu t_2,AT | ||
1944 | daddu c_1,t_2 | ||
1945 | sltu c_2,c_1,t_2 | ||
1946 | dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ | ||
1947 | mflo t_1 | ||
1948 | mfhi t_2 | ||
1949 | daddu c_3,t_1 | ||
1950 | sltu AT,c_3,t_1 | ||
1951 | daddu a2,t_2,AT | ||
1952 | daddu c_1,a2 | ||
1953 | sltu AT,c_1,a2 | ||
1954 | daddu c_2,AT | ||
1955 | daddu c_3,t_1 | ||
1956 | sltu AT,c_3,t_1 | ||
1957 | daddu t_2,AT | ||
1958 | daddu c_1,t_2 | ||
1959 | sltu AT,c_1,t_2 | ||
1960 | daddu c_2,AT | ||
1961 | sd c_3,88(a0) | ||
1962 | |||
1963 | dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ | ||
1964 | mflo t_1 | ||
1965 | mfhi t_2 | ||
1966 | daddu c_1,t_1 | ||
1967 | sltu AT,c_1,t_1 | ||
1968 | daddu a2,t_2,AT | ||
1969 | daddu c_2,a2 | ||
1970 | daddu c_1,t_1 | ||
1971 | sltu AT,c_1,t_1 | ||
1972 | daddu t_2,AT | ||
1973 | daddu c_2,t_2 | ||
1974 | sltu c_3,c_2,t_2 | ||
1975 | dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
1976 | mflo t_1 | ||
1977 | mfhi t_2 | ||
1978 | daddu c_1,t_1 | ||
1979 | sltu AT,c_1,t_1 | ||
1980 | daddu t_2,AT | ||
1981 | daddu c_2,t_2 | ||
1982 | sltu AT,c_2,t_2 | ||
1983 | daddu c_3,AT | ||
1984 | sd c_1,96(a0) | ||
1985 | |||
1986 | dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ | ||
1987 | mflo t_1 | ||
1988 | mfhi t_2 | ||
1989 | daddu c_2,t_1 | ||
1990 | sltu AT,c_2,t_1 | ||
1991 | daddu a2,t_2,AT | ||
1992 | daddu c_3,a2 | ||
1993 | daddu c_2,t_1 | ||
1994 | sltu AT,c_2,t_1 | ||
1995 | daddu t_2,AT | ||
1996 | daddu c_3,t_2 | ||
1997 | sltu c_1,c_3,t_2 | ||
1998 | sd c_2,104(a0) | ||
1999 | |||
2000 | dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
2001 | mflo t_1 | ||
2002 | mfhi t_2 | ||
2003 | daddu c_3,t_1 | ||
2004 | sltu AT,c_3,t_1 | ||
2005 | daddu t_2,AT | ||
2006 | daddu c_1,t_2 | ||
2007 | sd c_3,112(a0) | ||
2008 | sd c_1,120(a0) | ||
2009 | |||
2010 | jr ra | ||
2011 | END(bn_sqr_comba8) | ||
2012 | |||
2013 | .align 5 | ||
2014 | LEAF(bn_sqr_comba4) | ||
2015 | .set reorder | ||
2016 | ld a_0,0(a1) | ||
2017 | ld a_1,8(a1) | ||
2018 | ld a_2,16(a1) | ||
2019 | ld a_3,24(a1) | ||
2020 | dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
2021 | mflo c_1 | ||
2022 | mfhi c_2 | ||
2023 | sd c_1,0(a0) | ||
2024 | |||
2025 | dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
2026 | mflo t_1 | ||
2027 | mfhi t_2 | ||
2028 | daddu c_2,t_1 | ||
2029 | sltu AT,c_2,t_1 | ||
2030 | daddu c_3,t_2,AT | ||
2031 | daddu c_2,t_1 | ||
2032 | sltu AT,c_2,t_1 | ||
2033 | daddu t_2,AT | ||
2034 | daddu c_3,t_2 | ||
2035 | sltu c_1,c_3,t_2 | ||
2036 | sd c_2,8(a0) | ||
2037 | |||
2038 | dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
2039 | mflo t_1 | ||
2040 | mfhi t_2 | ||
2041 | daddu c_3,t_1 | ||
2042 | sltu AT,c_3,t_1 | ||
2043 | daddu a2,t_2,AT | ||
2044 | daddu c_1,a2 | ||
2045 | daddu c_3,t_1 | ||
2046 | sltu AT,c_3,t_1 | ||
2047 | daddu t_2,AT | ||
2048 | daddu c_1,t_2 | ||
2049 | sltu c_2,c_1,t_2 | ||
2050 | dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
2051 | mflo t_1 | ||
2052 | mfhi t_2 | ||
2053 | daddu c_3,t_1 | ||
2054 | sltu AT,c_3,t_1 | ||
2055 | daddu t_2,AT | ||
2056 | daddu c_1,t_2 | ||
2057 | sltu AT,c_1,t_2 | ||
2058 | daddu c_2,AT | ||
2059 | sd c_3,16(a0) | ||
2060 | |||
2061 | dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
2062 | mflo t_1 | ||
2063 | mfhi t_2 | ||
2064 | daddu c_1,t_1 | ||
2065 | sltu AT,c_1,t_1 | ||
2066 | daddu a2,t_2,AT | ||
2067 | daddu c_2,a2 | ||
2068 | daddu c_1,t_1 | ||
2069 | sltu AT,c_1,t_1 | ||
2070 | daddu t_2,AT | ||
2071 | daddu c_2,t_2 | ||
2072 | sltu c_3,c_2,t_2 | ||
2073 | dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ | ||
2074 | mflo t_1 | ||
2075 | mfhi t_2 | ||
2076 | daddu c_1,t_1 | ||
2077 | sltu AT,c_1,t_1 | ||
2078 | daddu a2,t_2,AT | ||
2079 | daddu c_2,a2 | ||
2080 | sltu AT,c_2,a2 | ||
2081 | daddu c_3,AT | ||
2082 | daddu c_1,t_1 | ||
2083 | sltu AT,c_1,t_1 | ||
2084 | daddu t_2,AT | ||
2085 | daddu c_2,t_2 | ||
2086 | sltu AT,c_2,t_2 | ||
2087 | daddu c_3,AT | ||
2088 | sd c_1,24(a0) | ||
2089 | |||
2090 | dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
2091 | mflo t_1 | ||
2092 | mfhi t_2 | ||
2093 | daddu c_2,t_1 | ||
2094 | sltu AT,c_2,t_1 | ||
2095 | daddu a2,t_2,AT | ||
2096 | daddu c_3,a2 | ||
2097 | daddu c_2,t_1 | ||
2098 | sltu AT,c_2,t_1 | ||
2099 | daddu t_2,AT | ||
2100 | daddu c_3,t_2 | ||
2101 | sltu c_1,c_3,t_2 | ||
2102 | dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
2103 | mflo t_1 | ||
2104 | mfhi t_2 | ||
2105 | daddu c_2,t_1 | ||
2106 | sltu AT,c_2,t_1 | ||
2107 | daddu t_2,AT | ||
2108 | daddu c_3,t_2 | ||
2109 | sltu AT,c_3,t_2 | ||
2110 | daddu c_1,AT | ||
2111 | sd c_2,32(a0) | ||
2112 | |||
2113 | dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
2114 | mflo t_1 | ||
2115 | mfhi t_2 | ||
2116 | daddu c_3,t_1 | ||
2117 | sltu AT,c_3,t_1 | ||
2118 | daddu a2,t_2,AT | ||
2119 | daddu c_1,a2 | ||
2120 | daddu c_3,t_1 | ||
2121 | sltu AT,c_3,t_1 | ||
2122 | daddu t_2,AT | ||
2123 | daddu c_1,t_2 | ||
2124 | sltu c_2,c_1,t_2 | ||
2125 | sd c_3,40(a0) | ||
2126 | |||
2127 | dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
2128 | mflo t_1 | ||
2129 | mfhi t_2 | ||
2130 | daddu c_1,t_1 | ||
2131 | sltu AT,c_1,t_1 | ||
2132 | daddu t_2,AT | ||
2133 | daddu c_2,t_2 | ||
2134 | sd c_1,48(a0) | ||
2135 | sd c_2,56(a0) | ||
2136 | |||
2137 | jr ra | ||
2138 | END(bn_sqr_comba4) | ||