summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/mips3.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/mips3.s')
-rw-r--r--src/lib/libcrypto/bn/asm/mips3.s2138
1 files changed, 2138 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s
new file mode 100644
index 0000000000..191345d920
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/mips3.s
@@ -0,0 +1,2138 @@
1.rdata
2.asciiz "mips3.s, Version 1.0"
3.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4
5/*
6 * ====================================================================
7 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
8 * project.
9 *
10 * Rights for redistribution and usage in source and binary forms are
11 * granted according to the OpenSSL license. Warranty of any kind is
12 * disclaimed.
13 * ====================================================================
14 */
15
16/*
17 * This is my modest contributon to the OpenSSL project (see
18 * http://www.openssl.org/ for more information about it) and is
19 * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
20 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 *
22 * The module is designed to work with either of the "new" MIPS ABI(5),
23 * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
24 * IRIX 5.x not only because it doesn't support new ABIs but also
25 * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
26 * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
27 * cause illegal instruction exception:-(
28 *
29 * In addition the code depends on preprocessor flags set up by MIPSpro
30 * compiler driver (either as or cc) and therefore (probably?) can't be
31 * compiled by the GNU assembler. GNU C driver manages fine though...
32 * I mean as long as -mmips-as is specified or is the default option,
33 * because then it simply invokes /usr/bin/as which in turn takes
34 * perfect care of the preprocessor definitions. Another neat feature
35 * offered by the MIPSpro assembler is an optimization pass. This gave
36 * me the opportunity to have the code looking more regular as all those
37 * architecture dependent instruction rescheduling details were left to
38 * the assembler. Cool, huh?
39 *
40 * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
41 * goes way over 3 times faster!
42 *
43 * <appro@fy.chalmers.se>
44 */
45#include <asm.h>
46#include <regdef.h>
47
48#if _MIPS_ISA>=4
49#define MOVNZ(cond,dst,src) \
50 movn dst,src,cond
51#else
52#define MOVNZ(cond,dst,src) \
53 .set noreorder; \
54 bnezl cond,.+8; \
55 move dst,src; \
56 .set reorder
57#endif
58
59.text
60
61.set noat
62.set reorder
63
64#define MINUS4 v1
65
66.align 5
67LEAF(bn_mul_add_words)
68 .set noreorder
69 bgtzl a2,.L_bn_mul_add_words_proceed
70 ld t0,0(a1)
71 jr ra
72 move v0,zero
73 .set reorder
74
75.L_bn_mul_add_words_proceed:
76 li MINUS4,-4
77 and ta0,a2,MINUS4
78 move v0,zero
79 beqz ta0,.L_bn_mul_add_words_tail
80
81.L_bn_mul_add_words_loop:
82 dmultu t0,a3
83 ld t1,0(a0)
84 ld t2,8(a1)
85 ld t3,8(a0)
86 ld ta0,16(a1)
87 ld ta1,16(a0)
88 daddu t1,v0
89 sltu v0,t1,v0 /* All manuals say it "compares 32-bit
90 * values", but it seems to work fine
91 * even on 64-bit registers. */
92 mflo AT
93 mfhi t0
94 daddu t1,AT
95 daddu v0,t0
96 sltu AT,t1,AT
97 sd t1,0(a0)
98 daddu v0,AT
99
100 dmultu t2,a3
101 ld ta2,24(a1)
102 ld ta3,24(a0)
103 daddu t3,v0
104 sltu v0,t3,v0
105 mflo AT
106 mfhi t2
107 daddu t3,AT
108 daddu v0,t2
109 sltu AT,t3,AT
110 sd t3,8(a0)
111 daddu v0,AT
112
113 dmultu ta0,a3
114 subu a2,4
115 PTR_ADD a0,32
116 PTR_ADD a1,32
117 daddu ta1,v0
118 sltu v0,ta1,v0
119 mflo AT
120 mfhi ta0
121 daddu ta1,AT
122 daddu v0,ta0
123 sltu AT,ta1,AT
124 sd ta1,-16(a0)
125 daddu v0,AT
126
127
128 dmultu ta2,a3
129 and ta0,a2,MINUS4
130 daddu ta3,v0
131 sltu v0,ta3,v0
132 mflo AT
133 mfhi ta2
134 daddu ta3,AT
135 daddu v0,ta2
136 sltu AT,ta3,AT
137 sd ta3,-8(a0)
138 daddu v0,AT
139 .set noreorder
140 bgtzl ta0,.L_bn_mul_add_words_loop
141 ld t0,0(a1)
142
143 bnezl a2,.L_bn_mul_add_words_tail
144 ld t0,0(a1)
145 .set reorder
146
147.L_bn_mul_add_words_return:
148 jr ra
149
150.L_bn_mul_add_words_tail:
151 dmultu t0,a3
152 ld t1,0(a0)
153 subu a2,1
154 daddu t1,v0
155 sltu v0,t1,v0
156 mflo AT
157 mfhi t0
158 daddu t1,AT
159 daddu v0,t0
160 sltu AT,t1,AT
161 sd t1,0(a0)
162 daddu v0,AT
163 beqz a2,.L_bn_mul_add_words_return
164
165 ld t0,8(a1)
166 dmultu t0,a3
167 ld t1,8(a0)
168 subu a2,1
169 daddu t1,v0
170 sltu v0,t1,v0
171 mflo AT
172 mfhi t0
173 daddu t1,AT
174 daddu v0,t0
175 sltu AT,t1,AT
176 sd t1,8(a0)
177 daddu v0,AT
178 beqz a2,.L_bn_mul_add_words_return
179
180 ld t0,16(a1)
181 dmultu t0,a3
182 ld t1,16(a0)
183 daddu t1,v0
184 sltu v0,t1,v0
185 mflo AT
186 mfhi t0
187 daddu t1,AT
188 daddu v0,t0
189 sltu AT,t1,AT
190 sd t1,16(a0)
191 daddu v0,AT
192 jr ra
193END(bn_mul_add_words)
194
195.align 5
196LEAF(bn_mul_words)
197 .set noreorder
198 bgtzl a2,.L_bn_mul_words_proceed
199 ld t0,0(a1)
200 jr ra
201 move v0,zero
202 .set reorder
203
204.L_bn_mul_words_proceed:
205 li MINUS4,-4
206 and ta0,a2,MINUS4
207 move v0,zero
208 beqz ta0,.L_bn_mul_words_tail
209
210.L_bn_mul_words_loop:
211 dmultu t0,a3
212 ld t2,8(a1)
213 ld ta0,16(a1)
214 ld ta2,24(a1)
215 mflo AT
216 mfhi t0
217 daddu v0,AT
218 sltu t1,v0,AT
219 sd v0,0(a0)
220 daddu v0,t1,t0
221
222 dmultu t2,a3
223 subu a2,4
224 PTR_ADD a0,32
225 PTR_ADD a1,32
226 mflo AT
227 mfhi t2
228 daddu v0,AT
229 sltu t3,v0,AT
230 sd v0,-24(a0)
231 daddu v0,t3,t2
232
233 dmultu ta0,a3
234 mflo AT
235 mfhi ta0
236 daddu v0,AT
237 sltu ta1,v0,AT
238 sd v0,-16(a0)
239 daddu v0,ta1,ta0
240
241
242 dmultu ta2,a3
243 and ta0,a2,MINUS4
244 mflo AT
245 mfhi ta2
246 daddu v0,AT
247 sltu ta3,v0,AT
248 sd v0,-8(a0)
249 daddu v0,ta3,ta2
250 .set noreorder
251 bgtzl ta0,.L_bn_mul_words_loop
252 ld t0,0(a1)
253
254 bnezl a2,.L_bn_mul_words_tail
255 ld t0,0(a1)
256 .set reorder
257
258.L_bn_mul_words_return:
259 jr ra
260
261.L_bn_mul_words_tail:
262 dmultu t0,a3
263 subu a2,1
264 mflo AT
265 mfhi t0
266 daddu v0,AT
267 sltu t1,v0,AT
268 sd v0,0(a0)
269 daddu v0,t1,t0
270 beqz a2,.L_bn_mul_words_return
271
272 ld t0,8(a1)
273 dmultu t0,a3
274 subu a2,1
275 mflo AT
276 mfhi t0
277 daddu v0,AT
278 sltu t1,v0,AT
279 sd v0,8(a0)
280 daddu v0,t1,t0
281 beqz a2,.L_bn_mul_words_return
282
283 ld t0,16(a1)
284 dmultu t0,a3
285 mflo AT
286 mfhi t0
287 daddu v0,AT
288 sltu t1,v0,AT
289 sd v0,16(a0)
290 daddu v0,t1,t0
291 jr ra
292END(bn_mul_words)
293
294.align 5
295LEAF(bn_sqr_words)
296 .set noreorder
297 bgtzl a2,.L_bn_sqr_words_proceed
298 ld t0,0(a1)
299 jr ra
300 move v0,zero
301 .set reorder
302
303.L_bn_sqr_words_proceed:
304 li MINUS4,-4
305 and ta0,a2,MINUS4
306 move v0,zero
307 beqz ta0,.L_bn_sqr_words_tail
308
309.L_bn_sqr_words_loop:
310 dmultu t0,t0
311 ld t2,8(a1)
312 ld ta0,16(a1)
313 ld ta2,24(a1)
314 mflo t1
315 mfhi t0
316 sd t1,0(a0)
317 sd t0,8(a0)
318
319 dmultu t2,t2
320 subu a2,4
321 PTR_ADD a0,64
322 PTR_ADD a1,32
323 mflo t3
324 mfhi t2
325 sd t3,-48(a0)
326 sd t2,-40(a0)
327
328 dmultu ta0,ta0
329 mflo ta1
330 mfhi ta0
331 sd ta1,-32(a0)
332 sd ta0,-24(a0)
333
334
335 dmultu ta2,ta2
336 and ta0,a2,MINUS4
337 mflo ta3
338 mfhi ta2
339 sd ta3,-16(a0)
340 sd ta2,-8(a0)
341
342 .set noreorder
343 bgtzl ta0,.L_bn_sqr_words_loop
344 ld t0,0(a1)
345
346 bnezl a2,.L_bn_sqr_words_tail
347 ld t0,0(a1)
348 .set reorder
349
350.L_bn_sqr_words_return:
351 move v0,zero
352 jr ra
353
354.L_bn_sqr_words_tail:
355 dmultu t0,t0
356 subu a2,1
357 mflo t1
358 mfhi t0
359 sd t1,0(a0)
360 sd t0,8(a0)
361 beqz a2,.L_bn_sqr_words_return
362
363 ld t0,8(a1)
364 dmultu t0,t0
365 subu a2,1
366 mflo t1
367 mfhi t0
368 sd t1,16(a0)
369 sd t0,24(a0)
370 beqz a2,.L_bn_sqr_words_return
371
372 ld t0,16(a1)
373 dmultu t0,t0
374 mflo t1
375 mfhi t0
376 sd t1,32(a0)
377 sd t0,40(a0)
378 jr ra
379END(bn_sqr_words)
380
381.align 5
382LEAF(bn_add_words)
383 .set noreorder
384 bgtzl a3,.L_bn_add_words_proceed
385 ld t0,0(a1)
386 jr ra
387 move v0,zero
388 .set reorder
389
390.L_bn_add_words_proceed:
391 li MINUS4,-4
392 and AT,a3,MINUS4
393 move v0,zero
394 beqz AT,.L_bn_add_words_tail
395
396.L_bn_add_words_loop:
397 ld ta0,0(a2)
398 ld t1,8(a1)
399 ld ta1,8(a2)
400 ld t2,16(a1)
401 ld ta2,16(a2)
402 ld t3,24(a1)
403 ld ta3,24(a2)
404 daddu ta0,t0
405 subu a3,4
406 sltu t8,ta0,t0
407 daddu t0,ta0,v0
408 PTR_ADD a0,32
409 sltu v0,t0,ta0
410 sd t0,-32(a0)
411 daddu v0,t8
412
413 daddu ta1,t1
414 PTR_ADD a1,32
415 sltu t9,ta1,t1
416 daddu t1,ta1,v0
417 PTR_ADD a2,32
418 sltu v0,t1,ta1
419 sd t1,-24(a0)
420 daddu v0,t9
421
422 daddu ta2,t2
423 and AT,a3,MINUS4
424 sltu t8,ta2,t2
425 daddu t2,ta2,v0
426 sltu v0,t2,ta2
427 sd t2,-16(a0)
428 daddu v0,t8
429
430 daddu ta3,t3
431 sltu t9,ta3,t3
432 daddu t3,ta3,v0
433 sltu v0,t3,ta3
434 sd t3,-8(a0)
435 daddu v0,t9
436
437 .set noreorder
438 bgtzl AT,.L_bn_add_words_loop
439 ld t0,0(a1)
440
441 bnezl a3,.L_bn_add_words_tail
442 ld t0,0(a1)
443 .set reorder
444
445.L_bn_add_words_return:
446 jr ra
447
448.L_bn_add_words_tail:
449 ld ta0,0(a2)
450 daddu ta0,t0
451 subu a3,1
452 sltu t8,ta0,t0
453 daddu t0,ta0,v0
454 sltu v0,t0,ta0
455 sd t0,0(a0)
456 daddu v0,t8
457 beqz a3,.L_bn_add_words_return
458
459 ld t1,8(a1)
460 ld ta1,8(a2)
461 daddu ta1,t1
462 subu a3,1
463 sltu t9,ta1,t1
464 daddu t1,ta1,v0
465 sltu v0,t1,ta1
466 sd t1,8(a0)
467 daddu v0,t9
468 beqz a3,.L_bn_add_words_return
469
470 ld t2,16(a1)
471 ld ta2,16(a2)
472 daddu ta2,t2
473 sltu t8,ta2,t2
474 daddu t2,ta2,v0
475 sltu v0,t2,ta2
476 sd t2,16(a0)
477 daddu v0,t8
478 jr ra
479END(bn_add_words)
480
481.align 5
482LEAF(bn_sub_words)
483 .set noreorder
484 bgtzl a3,.L_bn_sub_words_proceed
485 ld t0,0(a1)
486 jr ra
487 move v0,zero
488 .set reorder
489
490.L_bn_sub_words_proceed:
491 li MINUS4,-4
492 and AT,a3,MINUS4
493 move v0,zero
494 beqz AT,.L_bn_sub_words_tail
495
496.L_bn_sub_words_loop:
497 ld ta0,0(a2)
498 ld t1,8(a1)
499 ld ta1,8(a2)
500 ld t2,16(a1)
501 ld ta2,16(a2)
502 ld t3,24(a1)
503 ld ta3,24(a2)
504 sltu t8,t0,ta0
505 dsubu t0,ta0
506 subu a3,4
507 dsubu ta0,t0,v0
508 and AT,a3,MINUS4
509 sd ta0,0(a0)
510 MOVNZ (t0,v0,t8)
511
512 sltu t9,t1,ta1
513 dsubu t1,ta1
514 PTR_ADD a0,32
515 dsubu ta1,t1,v0
516 PTR_ADD a1,32
517 sd ta1,-24(a0)
518 MOVNZ (t1,v0,t9)
519
520
521 sltu t8,t2,ta2
522 dsubu t2,ta2
523 dsubu ta2,t2,v0
524 PTR_ADD a2,32
525 sd ta2,-16(a0)
526 MOVNZ (t2,v0,t8)
527
528 sltu t9,t3,ta3
529 dsubu t3,ta3
530 dsubu ta3,t3,v0
531 sd ta3,-8(a0)
532 MOVNZ (t3,v0,t9)
533
534 .set noreorder
535 bgtzl AT,.L_bn_sub_words_loop
536 ld t0,0(a1)
537
538 bnezl a3,.L_bn_sub_words_tail
539 ld t0,0(a1)
540 .set reorder
541
542.L_bn_sub_words_return:
543 jr ra
544
545.L_bn_sub_words_tail:
546 ld ta0,0(a2)
547 subu a3,1
548 sltu t8,t0,ta0
549 dsubu t0,ta0
550 dsubu ta0,t0,v0
551 MOVNZ (t0,v0,t8)
552 sd ta0,0(a0)
553 beqz a3,.L_bn_sub_words_return
554
555 ld t1,8(a1)
556 subu a3,1
557 ld ta1,8(a2)
558 sltu t9,t1,ta1
559 dsubu t1,ta1
560 dsubu ta1,t1,v0
561 MOVNZ (t1,v0,t9)
562 sd ta1,8(a0)
563 beqz a3,.L_bn_sub_words_return
564
565 ld t2,16(a1)
566 ld ta2,16(a2)
567 sltu t8,t2,ta2
568 dsubu t2,ta2
569 dsubu ta2,t2,v0
570 MOVNZ (t2,v0,t8)
571 sd ta2,16(a0)
572 jr ra
573END(bn_sub_words)
574
575#undef MINUS4
576
577.align 5
578LEAF(bn_div_words)
579 .set noreorder
580 bnezl a2,.L_bn_div_words_proceed
581 move v1,zero
582 jr ra
583 li v0,-1 /* I'd rather signal div-by-zero
584 * which can be done with 'break 7' */
585
586.L_bn_div_words_proceed:
587 bltz a2,.L_bn_div_words_body
588 move t9,v1
589 dsll a2,1
590 bgtz a2,.-4
591 addu t9,1
592
593 .set reorder
594 negu t1,t9
595 li t2,-1
596 dsll t2,t1
597 and t2,a0
598 dsrl AT,a1,t1
599 .set noreorder
600 bnezl t2,.+8
601 break 6 /* signal overflow */
602 .set reorder
603 dsll a0,t9
604 dsll a1,t9
605 or a0,AT
606
607#define QT ta0
608#define HH ta1
609#define DH v1
610.L_bn_div_words_body:
611 dsrl DH,a2,32
612 sgeu AT,a0,a2
613 .set noreorder
614 bnezl AT,.+8
615 dsubu a0,a2
616 .set reorder
617
618 li QT,-1
619 dsrl HH,a0,32
620 dsrl QT,32 /* q=0xffffffff */
621 beq DH,HH,.L_bn_div_words_skip_div1
622 ddivu zero,a0,DH
623 mflo QT
624.L_bn_div_words_skip_div1:
625 dmultu a2,QT
626 dsll t3,a0,32
627 dsrl AT,a1,32
628 or t3,AT
629 mflo t0
630 mfhi t1
631.L_bn_div_words_inner_loop1:
632 sltu t2,t3,t0
633 seq t8,HH,t1
634 sltu AT,HH,t1
635 and t2,t8
636 or AT,t2
637 .set noreorder
638 beqz AT,.L_bn_div_words_inner_loop1_done
639 sltu t2,t0,a2
640 .set reorder
641 dsubu QT,1
642 dsubu t0,a2
643 dsubu t1,t2
644 b .L_bn_div_words_inner_loop1
645.L_bn_div_words_inner_loop1_done:
646
647 dsll a1,32
648 dsubu a0,t3,t0
649 dsll v0,QT,32
650
651 li QT,-1
652 dsrl HH,a0,32
653 dsrl QT,32 /* q=0xffffffff */
654 beq DH,HH,.L_bn_div_words_skip_div2
655 ddivu zero,a0,DH
656 mflo QT
657.L_bn_div_words_skip_div2:
658 dmultu a2,QT
659 dsll t3,a0,32
660 dsrl AT,a1,32
661 or t3,AT
662 mflo t0
663 mfhi t1
664.L_bn_div_words_inner_loop2:
665 sltu t2,t3,t0
666 seq t8,HH,t1
667 sltu AT,HH,t1
668 and t2,t8
669 or AT,t2
670 .set noreorder
671 beqz AT,.L_bn_div_words_inner_loop2_done
672 sltu t2,t0,a2
673 .set reorder
674 dsubu QT,1
675 dsubu t0,a2
676 dsubu t1,t2
677 b .L_bn_div_words_inner_loop2
678.L_bn_div_words_inner_loop2_done:
679
680 dsubu a0,t3,t0
681 or v0,QT
682 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
683 dsrl a2,t9 /* restore a2 */
684 jr ra
685#undef HH
686#undef DH
687#undef QT
688END(bn_div_words)
689
690.align 5
691LEAF(bn_div_3_words)
692 .set reorder
693 move a3,a0 /* we know that bn_div_words doesn't
694 * touch a3, ta2, ta3 and preserves a2
695 * so that we can save two arguments
696 * and return address in registers
697 * instead of stack:-)
698 */
699 ld a0,(a3)
700 move ta2,a2
701 move a2,a1
702 ld a1,-8(a3)
703 move ta3,ra
704 move v1,zero
705 li v0,-1
706 beq a0,a2,.L_bn_div_3_words_skip_div
707 jal bn_div_words
708 move ra,ta3
709.L_bn_div_3_words_skip_div:
710 dmultu ta2,v0
711 ld t2,-16(a3)
712 mflo t0
713 mfhi t1
714.L_bn_div_3_words_inner_loop:
715 sgeu AT,t2,t0
716 seq t9,t1,v1
717 sltu t8,t1,v1
718 and AT,t9
719 or AT,t8
720 bnez AT,.L_bn_div_3_words_inner_loop_done
721 daddu v1,a2
722 sltu t3,t0,ta2
723 sltu AT,v1,a2
724 dsubu v0,1
725 dsubu t0,ta2
726 dsubu t1,t3
727 beqz AT,.L_bn_div_3_words_inner_loop
728.L_bn_div_3_words_inner_loop_done:
729 jr ra
730END(bn_div_3_words)
731
732#define a_0 t0
733#define a_1 t1
734#define a_2 t2
735#define a_3 t3
736#define b_0 ta0
737#define b_1 ta1
738#define b_2 ta2
739#define b_3 ta3
740
741#define a_4 s0
742#define a_5 s2
743#define a_6 s4
744#define a_7 a1 /* once we load a[7] we don't need a anymore */
745#define b_4 s1
746#define b_5 s3
747#define b_6 s5
748#define b_7 a2 /* once we load b[7] we don't need b anymore */
749
750#define t_1 t8
751#define t_2 t9
752
753#define c_1 v0
754#define c_2 v1
755#define c_3 a3
756
757#define FRAME_SIZE 48
758
759.align 5
760LEAF(bn_mul_comba8)
761 .set noreorder
762 PTR_SUB sp,FRAME_SIZE
763 .frame sp,64,ra
764 .set reorder
765 ld a_0,0(a1) /* If compiled with -mips3 option on
766 * R5000 box assembler barks on this
767 * line with "shouldn't have mult/div
768 * as last instruction in bb (R10K
769 * bug)" warning. If anybody out there
770 * has a clue about how to circumvent
771 * this do send me a note.
772 * <appro@fy.chalmers.se>
773 */
774 ld b_0,0(a2)
775 ld a_1,8(a1)
776 ld a_2,16(a1)
777 ld a_3,24(a1)
778 ld b_1,8(a2)
779 ld b_2,16(a2)
780 ld b_3,24(a2)
781 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
782 sd s0,0(sp)
783 sd s1,8(sp)
784 sd s2,16(sp)
785 sd s3,24(sp)
786 sd s4,32(sp)
787 sd s5,40(sp)
788 mflo c_1
789 mfhi c_2
790
791 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
792 ld a_4,32(a1)
793 ld a_5,40(a1)
794 ld a_6,48(a1)
795 ld a_7,56(a1)
796 ld b_4,32(a2)
797 ld b_5,40(a2)
798 mflo t_1
799 mfhi t_2
800 daddu c_2,t_1
801 sltu AT,c_2,t_1
802 daddu c_3,t_2,AT
803 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
804 ld b_6,48(a2)
805 ld b_7,56(a2)
806 sd c_1,0(a0) /* r[0]=c1; */
807 mflo t_1
808 mfhi t_2
809 daddu c_2,t_1
810 sltu AT,c_2,t_1
811 daddu t_2,AT
812 daddu c_3,t_2
813 sltu c_1,c_3,t_2
814 sd c_2,8(a0) /* r[1]=c2; */
815
816 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
817 mflo t_1
818 mfhi t_2
819 daddu c_3,t_1
820 sltu AT,c_3,t_1
821 daddu t_2,AT
822 daddu c_1,t_2
823 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
824 mflo t_1
825 mfhi t_2
826 daddu c_3,t_1
827 sltu AT,c_3,t_1
828 daddu t_2,AT
829 daddu c_1,t_2
830 sltu c_2,c_1,t_2
831 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
832 mflo t_1
833 mfhi t_2
834 daddu c_3,t_1
835 sltu AT,c_3,t_1
836 daddu t_2,AT
837 daddu c_1,t_2
838 sltu AT,c_1,t_2
839 daddu c_2,AT
840 sd c_3,16(a0) /* r[2]=c3; */
841
842 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
843 mflo t_1
844 mfhi t_2
845 daddu c_1,t_1
846 sltu AT,c_1,t_1
847 daddu t_2,AT
848 daddu c_2,t_2
849 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
850 mflo t_1
851 mfhi t_2
852 daddu c_1,t_1
853 sltu AT,c_1,t_1
854 daddu t_2,AT
855 daddu c_2,t_2
856 sltu c_3,c_2,t_2
857 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
858 mflo t_1
859 mfhi t_2
860 daddu c_1,t_1
861 sltu AT,c_1,t_1
862 daddu t_2,AT
863 daddu c_2,t_2
864 sltu AT,c_2,t_2
865 daddu c_3,AT
866 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
867 mflo t_1
868 mfhi t_2
869 daddu c_1,t_1
870 sltu AT,c_1,t_1
871 daddu t_2,AT
872 daddu c_2,t_2
873 sltu AT,c_2,t_2
874 daddu c_3,AT
875 sd c_1,24(a0) /* r[3]=c1; */
876
877 dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
878 mflo t_1
879 mfhi t_2
880 daddu c_2,t_1
881 sltu AT,c_2,t_1
882 daddu t_2,AT
883 daddu c_3,t_2
884 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
885 mflo t_1
886 mfhi t_2
887 daddu c_2,t_1
888 sltu AT,c_2,t_1
889 daddu t_2,AT
890 daddu c_3,t_2
891 sltu c_1,c_3,t_2
892 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
893 mflo t_1
894 mfhi t_2
895 daddu c_2,t_1
896 sltu AT,c_2,t_1
897 daddu t_2,AT
898 daddu c_3,t_2
899 sltu AT,c_3,t_2
900 daddu c_1,AT
901 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
902 mflo t_1
903 mfhi t_2
904 daddu c_2,t_1
905 sltu AT,c_2,t_1
906 daddu t_2,AT
907 daddu c_3,t_2
908 sltu AT,c_3,t_2
909 daddu c_1,AT
910 dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
911 mflo t_1
912 mfhi t_2
913 daddu c_2,t_1
914 sltu AT,c_2,t_1
915 daddu t_2,AT
916 daddu c_3,t_2
917 sltu AT,c_3,t_2
918 daddu c_1,AT
919 sd c_2,32(a0) /* r[4]=c2; */
920
921 dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
922 mflo t_1
923 mfhi t_2
924 daddu c_3,t_1
925 sltu AT,c_3,t_1
926 daddu t_2,AT
927 daddu c_1,t_2
928 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
929 mflo t_1
930 mfhi t_2
931 daddu c_3,t_1
932 sltu AT,c_3,t_1
933 daddu t_2,AT
934 daddu c_1,t_2
935 sltu c_2,c_1,t_2
936 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
937 mflo t_1
938 mfhi t_2
939 daddu c_3,t_1
940 sltu AT,c_3,t_1
941 daddu t_2,AT
942 daddu c_1,t_2
943 sltu AT,c_1,t_2
944 daddu c_2,AT
945 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
946 mflo t_1
947 mfhi t_2
948 daddu c_3,t_1
949 sltu AT,c_3,t_1
950 daddu t_2,AT
951 daddu c_1,t_2
952 sltu AT,c_1,t_2
953 daddu c_2,AT
954 dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
955 mflo t_1
956 mfhi t_2
957 daddu c_3,t_1
958 sltu AT,c_3,t_1
959 daddu t_2,AT
960 daddu c_1,t_2
961 sltu AT,c_1,t_2
962 daddu c_2,AT
963 dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
964 mflo t_1
965 mfhi t_2
966 daddu c_3,t_1
967 sltu AT,c_3,t_1
968 daddu t_2,AT
969 daddu c_1,t_2
970 sltu AT,c_1,t_2
971 daddu c_2,AT
972 sd c_3,40(a0) /* r[5]=c3; */
973
974 dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
975 mflo t_1
976 mfhi t_2
977 daddu c_1,t_1
978 sltu AT,c_1,t_1
979 daddu t_2,AT
980 daddu c_2,t_2
981 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
982 mflo t_1
983 mfhi t_2
984 daddu c_1,t_1
985 sltu AT,c_1,t_1
986 daddu t_2,AT
987 daddu c_2,t_2
988 sltu c_3,c_2,t_2
989 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
990 mflo t_1
991 mfhi t_2
992 daddu c_1,t_1
993 sltu AT,c_1,t_1
994 daddu t_2,AT
995 daddu c_2,t_2
996 sltu AT,c_2,t_2
997 daddu c_3,AT
998 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
999 mflo t_1
1000 mfhi t_2
1001 daddu c_1,t_1
1002 sltu AT,c_1,t_1
1003 daddu t_2,AT
1004 daddu c_2,t_2
1005 sltu AT,c_2,t_2
1006 daddu c_3,AT
1007 dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
1008 mflo t_1
1009 mfhi t_2
1010 daddu c_1,t_1
1011 sltu AT,c_1,t_1
1012 daddu t_2,AT
1013 daddu c_2,t_2
1014 sltu AT,c_2,t_2
1015 daddu c_3,AT
1016 dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
1017 mflo t_1
1018 mfhi t_2
1019 daddu c_1,t_1
1020 sltu AT,c_1,t_1
1021 daddu t_2,AT
1022 daddu c_2,t_2
1023 sltu AT,c_2,t_2
1024 daddu c_3,AT
1025 dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
1026 mflo t_1
1027 mfhi t_2
1028 daddu c_1,t_1
1029 sltu AT,c_1,t_1
1030 daddu t_2,AT
1031 daddu c_2,t_2
1032 sltu AT,c_2,t_2
1033 daddu c_3,AT
1034 sd c_1,48(a0) /* r[6]=c1; */
1035
1036 dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
1037 mflo t_1
1038 mfhi t_2
1039 daddu c_2,t_1
1040 sltu AT,c_2,t_1
1041 daddu t_2,AT
1042 daddu c_3,t_2
1043 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1044 mflo t_1
1045 mfhi t_2
1046 daddu c_2,t_1
1047 sltu AT,c_2,t_1
1048 daddu t_2,AT
1049 daddu c_3,t_2
1050 sltu c_1,c_3,t_2
1051 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1052 mflo t_1
1053 mfhi t_2
1054 daddu c_2,t_1
1055 sltu AT,c_2,t_1
1056 daddu t_2,AT
1057 daddu c_3,t_2
1058 sltu AT,c_3,t_2
1059 daddu c_1,AT
1060 dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
1061 mflo t_1
1062 mfhi t_2
1063 daddu c_2,t_1
1064 sltu AT,c_2,t_1
1065 daddu t_2,AT
1066 daddu c_3,t_2
1067 sltu AT,c_3,t_2
1068 daddu c_1,AT
1069 dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
1070 mflo t_1
1071 mfhi t_2
1072 daddu c_2,t_1
1073 sltu AT,c_2,t_1
1074 daddu t_2,AT
1075 daddu c_3,t_2
1076 sltu AT,c_3,t_2
1077 daddu c_1,AT
1078 dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
1079 mflo t_1
1080 mfhi t_2
1081 daddu c_2,t_1
1082 sltu AT,c_2,t_1
1083 daddu t_2,AT
1084 daddu c_3,t_2
1085 sltu AT,c_3,t_2
1086 daddu c_1,AT
1087 dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
1088 mflo t_1
1089 mfhi t_2
1090 daddu c_2,t_1
1091 sltu AT,c_2,t_1
1092 daddu t_2,AT
1093 daddu c_3,t_2
1094 sltu AT,c_3,t_2
1095 daddu c_1,AT
1096 dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
1097 mflo t_1
1098 mfhi t_2
1099 daddu c_2,t_1
1100 sltu AT,c_2,t_1
1101 daddu t_2,AT
1102 daddu c_3,t_2
1103 sltu AT,c_3,t_2
1104 daddu c_1,AT
1105 sd c_2,56(a0) /* r[7]=c2; */
1106
1107 dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
1108 mflo t_1
1109 mfhi t_2
1110 daddu c_3,t_1
1111 sltu AT,c_3,t_1
1112 daddu t_2,AT
1113 daddu c_1,t_2
1114 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1115 mflo t_1
1116 mfhi t_2
1117 daddu c_3,t_1
1118 sltu AT,c_3,t_1
1119 daddu t_2,AT
1120 daddu c_1,t_2
1121 sltu c_2,c_1,t_2
1122 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1123 mflo t_1
1124 mfhi t_2
1125 daddu c_3,t_1
1126 sltu AT,c_3,t_1
1127 daddu t_2,AT
1128 daddu c_1,t_2
1129 sltu AT,c_1,t_2
1130 daddu c_2,AT
1131 dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1132 mflo t_1
1133 mfhi t_2
1134 daddu c_3,t_1
1135 sltu AT,c_3,t_1
1136 daddu t_2,AT
1137 daddu c_1,t_2
1138 sltu AT,c_1,t_2
1139 daddu c_2,AT
1140 dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
1141 mflo t_1
1142 mfhi t_2
1143 daddu c_3,t_1
1144 sltu AT,c_3,t_1
1145 daddu t_2,AT
1146 daddu c_1,t_2
1147 sltu AT,c_1,t_2
1148 daddu c_2,AT
1149 dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
1150 mflo t_1
1151 mfhi t_2
1152 daddu c_3,t_1
1153 sltu AT,c_3,t_1
1154 daddu t_2,AT
1155 daddu c_1,t_2
1156 sltu AT,c_1,t_2
1157 daddu c_2,AT
1158 dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
1159 mflo t_1
1160 mfhi t_2
1161 daddu c_3,t_1
1162 sltu AT,c_3,t_1
1163 daddu t_2,AT
1164 daddu c_1,t_2
1165 sltu AT,c_1,t_2
1166 daddu c_2,AT
1167 sd c_3,64(a0) /* r[8]=c3; */
1168
1169 dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
1170 mflo t_1
1171 mfhi t_2
1172 daddu c_1,t_1
1173 sltu AT,c_1,t_1
1174 daddu t_2,AT
1175 daddu c_2,t_2
1176 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1177 mflo t_1
1178 mfhi t_2
1179 daddu c_1,t_1
1180 sltu AT,c_1,t_1
1181 daddu t_2,AT
1182 daddu c_2,t_2
1183 sltu c_3,c_2,t_2
1184 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1185 mflo t_1
1186 mfhi t_2
1187 daddu c_1,t_1
1188 sltu AT,c_1,t_1
1189 daddu t_2,AT
1190 daddu c_2,t_2
1191 sltu AT,c_2,t_2
1192 daddu c_3,AT
1193 dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
1194 mflo t_1
1195 mfhi t_2
1196 daddu c_1,t_1
1197 sltu AT,c_1,t_1
1198 daddu t_2,AT
1199 daddu c_2,t_2
1200 sltu AT,c_2,t_2
1201 daddu c_3,AT
1202 dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
1203 mflo t_1
1204 mfhi t_2
1205 daddu c_1,t_1
1206 sltu AT,c_1,t_1
1207 daddu t_2,AT
1208 daddu c_2,t_2
1209 sltu AT,c_2,t_2
1210 daddu c_3,AT
1211 dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
1212 mflo t_1
1213 mfhi t_2
1214 daddu c_1,t_1
1215 sltu AT,c_1,t_1
1216 daddu t_2,AT
1217 daddu c_2,t_2
1218 sltu AT,c_2,t_2
1219 daddu c_3,AT
1220 sd c_1,72(a0) /* r[9]=c1; */
1221
1222 dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
1223 mflo t_1
1224 mfhi t_2
1225 daddu c_2,t_1
1226 sltu AT,c_2,t_1
1227 daddu t_2,AT
1228 daddu c_3,t_2
1229 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1230 mflo t_1
1231 mfhi t_2
1232 daddu c_2,t_1
1233 sltu AT,c_2,t_1
1234 daddu t_2,AT
1235 daddu c_3,t_2
1236 sltu c_1,c_3,t_2
1237 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1238 mflo t_1
1239 mfhi t_2
1240 daddu c_2,t_1
1241 sltu AT,c_2,t_1
1242 daddu t_2,AT
1243 daddu c_3,t_2
1244 sltu AT,c_3,t_2
1245 daddu c_1,AT
1246 dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
1247 mflo t_1
1248 mfhi t_2
1249 daddu c_2,t_1
1250 sltu AT,c_2,t_1
1251 daddu t_2,AT
1252 daddu c_3,t_2
1253 sltu AT,c_3,t_2
1254 daddu c_1,AT
1255 dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
1256 mflo t_1
1257 mfhi t_2
1258 daddu c_2,t_1
1259 sltu AT,c_2,t_1
1260 daddu t_2,AT
1261 daddu c_3,t_2
1262 sltu AT,c_3,t_2
1263 daddu c_1,AT
1264 sd c_2,80(a0) /* r[10]=c2; */
1265
1266 dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
1267 mflo t_1
1268 mfhi t_2
1269 daddu c_3,t_1
1270 sltu AT,c_3,t_1
1271 daddu t_2,AT
1272 daddu c_1,t_2
1273 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
1274 mflo t_1
1275 mfhi t_2
1276 daddu c_3,t_1
1277 sltu AT,c_3,t_1
1278 daddu t_2,AT
1279 daddu c_1,t_2
1280 sltu c_2,c_1,t_2
1281 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
1282 mflo t_1
1283 mfhi t_2
1284 daddu c_3,t_1
1285 sltu AT,c_3,t_1
1286 daddu t_2,AT
1287 daddu c_1,t_2
1288 sltu AT,c_1,t_2
1289 daddu c_2,AT
1290 dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
1291 mflo t_1
1292 mfhi t_2
1293 daddu c_3,t_1
1294 sltu AT,c_3,t_1
1295 daddu t_2,AT
1296 daddu c_1,t_2
1297 sltu AT,c_1,t_2
1298 daddu c_2,AT
1299 sd c_3,88(a0) /* r[11]=c3; */
1300
1301 dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
1302 mflo t_1
1303 mfhi t_2
1304 daddu c_1,t_1
1305 sltu AT,c_1,t_1
1306 daddu t_2,AT
1307 daddu c_2,t_2
1308 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1309 mflo t_1
1310 mfhi t_2
1311 daddu c_1,t_1
1312 sltu AT,c_1,t_1
1313 daddu t_2,AT
1314 daddu c_2,t_2
1315 sltu c_3,c_2,t_2
1316 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
1317 mflo t_1
1318 mfhi t_2
1319 daddu c_1,t_1
1320 sltu AT,c_1,t_1
1321 daddu t_2,AT
1322 daddu c_2,t_2
1323 sltu AT,c_2,t_2
1324 daddu c_3,AT
1325 sd c_1,96(a0) /* r[12]=c1; */
1326
1327 dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
1328 mflo t_1
1329 mfhi t_2
1330 daddu c_2,t_1
1331 sltu AT,c_2,t_1
1332 daddu t_2,AT
1333 daddu c_3,t_2
1334 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
1335 mflo t_1
1336 mfhi t_2
1337 daddu c_2,t_1
1338 sltu AT,c_2,t_1
1339 daddu t_2,AT
1340 daddu c_3,t_2
1341 sltu c_1,c_3,t_2
1342 sd c_2,104(a0) /* r[13]=c2; */
1343
1344 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
1345 ld s0,0(sp)
1346 ld s1,8(sp)
1347 ld s2,16(sp)
1348 ld s3,24(sp)
1349 ld s4,32(sp)
1350 ld s5,40(sp)
1351 mflo t_1
1352 mfhi t_2
1353 daddu c_3,t_1
1354 sltu AT,c_3,t_1
1355 daddu t_2,AT
1356 daddu c_1,t_2
1357 sd c_3,112(a0) /* r[14]=c3; */
1358 sd c_1,120(a0) /* r[15]=c1; */
1359
1360 PTR_ADD sp,FRAME_SIZE
1361
1362 jr ra
1363END(bn_mul_comba8)
1364
1365.align 5
1366LEAF(bn_mul_comba4)
1367 .set reorder
1368 ld a_0,0(a1)
1369 ld b_0,0(a2)
1370 ld a_1,8(a1)
1371 ld a_2,16(a1)
1372 dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1373 ld a_3,24(a1)
1374 ld b_1,8(a2)
1375 ld b_2,16(a2)
1376 ld b_3,24(a2)
1377 mflo c_1
1378 mfhi c_2
1379 sd c_1,0(a0)
1380
1381 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
1382 mflo t_1
1383 mfhi t_2
1384 daddu c_2,t_1
1385 sltu AT,c_2,t_1
1386 daddu c_3,t_2,AT
1387 dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
1388 mflo t_1
1389 mfhi t_2
1390 daddu c_2,t_1
1391 sltu AT,c_2,t_1
1392 daddu t_2,AT
1393 daddu c_3,t_2
1394 sltu c_1,c_3,t_2
1395 sd c_2,8(a0)
1396
1397 dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
1398 mflo t_1
1399 mfhi t_2
1400 daddu c_3,t_1
1401 sltu AT,c_3,t_1
1402 daddu t_2,AT
1403 daddu c_1,t_2
1404 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1405 mflo t_1
1406 mfhi t_2
1407 daddu c_3,t_1
1408 sltu AT,c_3,t_1
1409 daddu t_2,AT
1410 daddu c_1,t_2
1411 sltu c_2,c_1,t_2
1412 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
1413 mflo t_1
1414 mfhi t_2
1415 daddu c_3,t_1
1416 sltu AT,c_3,t_1
1417 daddu t_2,AT
1418 daddu c_1,t_2
1419 sltu AT,c_1,t_2
1420 daddu c_2,AT
1421 sd c_3,16(a0)
1422
1423 dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
1424 mflo t_1
1425 mfhi t_2
1426 daddu c_1,t_1
1427 sltu AT,c_1,t_1
1428 daddu t_2,AT
1429 daddu c_2,t_2
1430 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1431 mflo t_1
1432 mfhi t_2
1433 daddu c_1,t_1
1434 sltu AT,c_1,t_1
1435 daddu t_2,AT
1436 daddu c_2,t_2
1437 sltu c_3,c_2,t_2
1438 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1439 mflo t_1
1440 mfhi t_2
1441 daddu c_1,t_1
1442 sltu AT,c_1,t_1
1443 daddu t_2,AT
1444 daddu c_2,t_2
1445 sltu AT,c_2,t_2
1446 daddu c_3,AT
1447 dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
1448 mflo t_1
1449 mfhi t_2
1450 daddu c_1,t_1
1451 sltu AT,c_1,t_1
1452 daddu t_2,AT
1453 daddu c_2,t_2
1454 sltu AT,c_2,t_2
1455 daddu c_3,AT
1456 sd c_1,24(a0)
1457
1458 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
1459 mflo t_1
1460 mfhi t_2
1461 daddu c_2,t_1
1462 sltu AT,c_2,t_1
1463 daddu t_2,AT
1464 daddu c_3,t_2
1465 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1466 mflo t_1
1467 mfhi t_2
1468 daddu c_2,t_1
1469 sltu AT,c_2,t_1
1470 daddu t_2,AT
1471 daddu c_3,t_2
1472 sltu c_1,c_3,t_2
1473 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1474 mflo t_1
1475 mfhi t_2
1476 daddu c_2,t_1
1477 sltu AT,c_2,t_1
1478 daddu t_2,AT
1479 daddu c_3,t_2
1480 sltu AT,c_3,t_2
1481 daddu c_1,AT
1482 sd c_2,32(a0)
1483
1484 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
1485 mflo t_1
1486 mfhi t_2
1487 daddu c_3,t_1
1488 sltu AT,c_3,t_1
1489 daddu t_2,AT
1490 daddu c_1,t_2
1491 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1492 mflo t_1
1493 mfhi t_2
1494 daddu c_3,t_1
1495 sltu AT,c_3,t_1
1496 daddu t_2,AT
1497 daddu c_1,t_2
1498 sltu c_2,c_1,t_2
1499 sd c_3,40(a0)
1500
1501 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1502 mflo t_1
1503 mfhi t_2
1504 daddu c_1,t_1
1505 sltu AT,c_1,t_1
1506 daddu t_2,AT
1507 daddu c_2,t_2
1508 sd c_1,48(a0)
1509 sd c_2,56(a0)
1510
1511 jr ra
1512END(bn_mul_comba4)
1513
1514#undef a_4
1515#undef a_5
1516#undef a_6
1517#undef a_7
1518#define a_4 b_0
1519#define a_5 b_1
1520#define a_6 b_2
1521#define a_7 b_3
1522
1523.align 5
1524LEAF(bn_sqr_comba8)
1525 .set reorder
1526 ld a_0,0(a1)
1527 ld a_1,8(a1)
1528 ld a_2,16(a1)
1529 ld a_3,24(a1)
1530
1531 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
1532 ld a_4,32(a1)
1533 ld a_5,40(a1)
1534 ld a_6,48(a1)
1535 ld a_7,56(a1)
1536 mflo c_1
1537 mfhi c_2
1538 sd c_1,0(a0)
1539
1540 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
1541 mflo t_1
1542 mfhi t_2
1543 daddu c_2,t_1
1544 sltu AT,c_2,t_1
1545 daddu c_3,t_2,AT
1546 daddu c_2,t_1
1547 sltu AT,c_2,t_1
1548 daddu t_2,AT
1549 daddu c_3,t_2
1550 sltu c_1,c_3,t_2
1551 sd c_2,8(a0)
1552
1553 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
1554 mflo t_1
1555 mfhi t_2
1556 daddu c_3,t_1
1557 sltu AT,c_3,t_1
1558 daddu a2,t_2,AT
1559 daddu c_1,a2
1560 daddu c_3,t_1
1561 sltu AT,c_3,t_1
1562 daddu t_2,AT
1563 daddu c_1,t_2
1564 sltu c_2,c_1,t_2
1565 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1566 mflo t_1
1567 mfhi t_2
1568 daddu c_3,t_1
1569 sltu AT,c_3,t_1
1570 daddu t_2,AT
1571 daddu c_1,t_2
1572 sltu AT,c_1,t_2
1573 daddu c_2,AT
1574 sd c_3,16(a0)
1575
1576 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
1577 mflo t_1
1578 mfhi t_2
1579 daddu c_1,t_1
1580 sltu AT,c_1,t_1
1581 daddu a2,t_2,AT
1582 daddu c_2,a2
1583 daddu c_1,t_1
1584 sltu AT,c_1,t_1
1585 daddu t_2,AT
1586 daddu c_2,t_2
1587 sltu c_3,c_2,t_2
1588 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
1589 mflo t_1
1590 mfhi t_2
1591 daddu c_1,t_1
1592 sltu AT,c_1,t_1
1593 daddu a2,t_2,AT
1594 daddu c_2,a2
1595 sltu AT,c_2,a2
1596 daddu c_3,AT
1597 daddu c_1,t_1
1598 sltu AT,c_1,t_1
1599 daddu t_2,AT
1600 daddu c_2,t_2
1601 sltu AT,c_2,t_2
1602 daddu c_3,AT
1603 sd c_1,24(a0)
1604
1605 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
1606 mflo t_1
1607 mfhi t_2
1608 daddu c_2,t_1
1609 sltu AT,c_2,t_1
1610 daddu a2,t_2,AT
1611 daddu c_3,a2
1612 daddu c_2,t_1
1613 sltu AT,c_2,t_1
1614 daddu t_2,AT
1615 daddu c_3,t_2
1616 sltu c_1,c_3,t_2
1617 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
1618 mflo t_1
1619 mfhi t_2
1620 daddu c_2,t_1
1621 sltu AT,c_2,t_1
1622 daddu a2,t_2,AT
1623 daddu c_3,a2
1624 sltu AT,c_3,a2
1625 daddu c_1,AT
1626 daddu c_2,t_1
1627 sltu AT,c_2,t_1
1628 daddu t_2,AT
1629 daddu c_3,t_2
1630 sltu AT,c_3,t_2
1631 daddu c_1,AT
1632 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1633 mflo t_1
1634 mfhi t_2
1635 daddu c_2,t_1
1636 sltu AT,c_2,t_1
1637 daddu t_2,AT
1638 daddu c_3,t_2
1639 sltu AT,c_3,t_2
1640 daddu c_1,AT
1641 sd c_2,32(a0)
1642
1643 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
1644 mflo t_1
1645 mfhi t_2
1646 daddu c_3,t_1
1647 sltu AT,c_3,t_1
1648 daddu a2,t_2,AT
1649 daddu c_1,a2
1650 daddu c_3,t_1
1651 sltu AT,c_3,t_1
1652 daddu t_2,AT
1653 daddu c_1,t_2
1654 sltu c_2,c_1,t_2
1655 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
1656 mflo t_1
1657 mfhi t_2
1658 daddu c_3,t_1
1659 sltu AT,c_3,t_1
1660 daddu a2,t_2,AT
1661 daddu c_1,a2
1662 sltu AT,c_1,a2
1663 daddu c_2,AT
1664 daddu c_3,t_1
1665 sltu AT,c_3,t_1
1666 daddu t_2,AT
1667 daddu c_1,t_2
1668 sltu AT,c_1,t_2
1669 daddu c_2,AT
1670 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
1671 mflo t_1
1672 mfhi t_2
1673 daddu c_3,t_1
1674 sltu AT,c_3,t_1
1675 daddu a2,t_2,AT
1676 daddu c_1,a2
1677 sltu AT,c_1,a2
1678 daddu c_2,AT
1679 daddu c_3,t_1
1680 sltu AT,c_3,t_1
1681 daddu t_2,AT
1682 daddu c_1,t_2
1683 sltu AT,c_1,t_2
1684 daddu c_2,AT
1685 sd c_3,40(a0)
1686
1687 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
1688 mflo t_1
1689 mfhi t_2
1690 daddu c_1,t_1
1691 sltu AT,c_1,t_1
1692 daddu a2,t_2,AT
1693 daddu c_2,a2
1694 daddu c_1,t_1
1695 sltu AT,c_1,t_1
1696 daddu t_2,AT
1697 daddu c_2,t_2
1698 sltu c_3,c_2,t_2
1699 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
1700 mflo t_1
1701 mfhi t_2
1702 daddu c_1,t_1
1703 sltu AT,c_1,t_1
1704 daddu a2,t_2,AT
1705 daddu c_2,a2
1706 sltu AT,c_2,a2
1707 daddu c_3,AT
1708 daddu c_1,t_1
1709 sltu AT,c_1,t_1
1710 daddu t_2,AT
1711 daddu c_2,t_2
1712 sltu AT,c_2,t_2
1713 daddu c_3,AT
1714 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
1715 mflo t_1
1716 mfhi t_2
1717 daddu c_1,t_1
1718 sltu AT,c_1,t_1
1719 daddu a2,t_2,AT
1720 daddu c_2,a2
1721 sltu AT,c_2,a2
1722 daddu c_3,AT
1723 daddu c_1,t_1
1724 sltu AT,c_1,t_1
1725 daddu t_2,AT
1726 daddu c_2,t_2
1727 sltu AT,c_2,t_2
1728 daddu c_3,AT
1729 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
1730 mflo t_1
1731 mfhi t_2
1732 daddu c_1,t_1
1733 sltu AT,c_1,t_1
1734 daddu t_2,AT
1735 daddu c_2,t_2
1736 sltu AT,c_2,t_2
1737 daddu c_3,AT
1738 sd c_1,48(a0)
1739
1740 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
1741 mflo t_1
1742 mfhi t_2
1743 daddu c_2,t_1
1744 sltu AT,c_2,t_1
1745 daddu a2,t_2,AT
1746 daddu c_3,a2
1747 daddu c_2,t_1
1748 sltu AT,c_2,t_1
1749 daddu t_2,AT
1750 daddu c_3,t_2
1751 sltu c_1,c_3,t_2
1752 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
1753 mflo t_1
1754 mfhi t_2
1755 daddu c_2,t_1
1756 sltu AT,c_2,t_1
1757 daddu a2,t_2,AT
1758 daddu c_3,a2
1759 sltu AT,c_3,a2
1760 daddu c_1,AT
1761 daddu c_2,t_1
1762 sltu AT,c_2,t_1
1763 daddu t_2,AT
1764 daddu c_3,t_2
1765 sltu AT,c_3,t_2
1766 daddu c_1,AT
1767 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
1768 mflo t_1
1769 mfhi t_2
1770 daddu c_2,t_1
1771 sltu AT,c_2,t_1
1772 daddu a2,t_2,AT
1773 daddu c_3,a2
1774 sltu AT,c_3,a2
1775 daddu c_1,AT
1776 daddu c_2,t_1
1777 sltu AT,c_2,t_1
1778 daddu t_2,AT
1779 daddu c_3,t_2
1780 sltu AT,c_3,t_2
1781 daddu c_1,AT
1782 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
1783 mflo t_1
1784 mfhi t_2
1785 daddu c_2,t_1
1786 sltu AT,c_2,t_1
1787 daddu a2,t_2,AT
1788 daddu c_3,a2
1789 sltu AT,c_3,a2
1790 daddu c_1,AT
1791 daddu c_2,t_1
1792 sltu AT,c_2,t_1
1793 daddu t_2,AT
1794 daddu c_3,t_2
1795 sltu AT,c_3,t_2
1796 daddu c_1,AT
1797 sd c_2,56(a0)
1798
1799 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
1800 mflo t_1
1801 mfhi t_2
1802 daddu c_3,t_1
1803 sltu AT,c_3,t_1
1804 daddu a2,t_2,AT
1805 daddu c_1,a2
1806 daddu c_3,t_1
1807 sltu AT,c_3,t_1
1808 daddu t_2,AT
1809 daddu c_1,t_2
1810 sltu c_2,c_1,t_2
1811 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
1812 mflo t_1
1813 mfhi t_2
1814 daddu c_3,t_1
1815 sltu AT,c_3,t_1
1816 daddu a2,t_2,AT
1817 daddu c_1,a2
1818 sltu AT,c_1,a2
1819 daddu c_2,AT
1820 daddu c_3,t_1
1821 sltu AT,c_3,t_1
1822 daddu t_2,AT
1823 daddu c_1,t_2
1824 sltu AT,c_1,t_2
1825 daddu c_2,AT
1826 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
1827 mflo t_1
1828 mfhi t_2
1829 daddu c_3,t_1
1830 sltu AT,c_3,t_1
1831 daddu a2,t_2,AT
1832 daddu c_1,a2
1833 sltu AT,c_1,a2
1834 daddu c_2,AT
1835 daddu c_3,t_1
1836 sltu AT,c_3,t_1
1837 daddu t_2,AT
1838 daddu c_1,t_2
1839 sltu AT,c_1,t_2
1840 daddu c_2,AT
1841 dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
1842 mflo t_1
1843 mfhi t_2
1844 daddu c_3,t_1
1845 sltu AT,c_3,t_1
1846 daddu t_2,AT
1847 daddu c_1,t_2
1848 sltu AT,c_1,t_2
1849 daddu c_2,AT
1850 sd c_3,64(a0)
1851
1852 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
1853 mflo t_1
1854 mfhi t_2
1855 daddu c_1,t_1
1856 sltu AT,c_1,t_1
1857 daddu a2,t_2,AT
1858 daddu c_2,a2
1859 daddu c_1,t_1
1860 sltu AT,c_1,t_1
1861 daddu t_2,AT
1862 daddu c_2,t_2
1863 sltu c_3,c_2,t_2
1864 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
1865 mflo t_1
1866 mfhi t_2
1867 daddu c_1,t_1
1868 sltu AT,c_1,t_1
1869 daddu a2,t_2,AT
1870 daddu c_2,a2
1871 sltu AT,c_2,a2
1872 daddu c_3,AT
1873 daddu c_1,t_1
1874 sltu AT,c_1,t_1
1875 daddu t_2,AT
1876 daddu c_2,t_2
1877 sltu AT,c_2,t_2
1878 daddu c_3,AT
1879 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
1880 mflo t_1
1881 mfhi t_2
1882 daddu c_1,t_1
1883 sltu AT,c_1,t_1
1884 daddu a2,t_2,AT
1885 daddu c_2,a2
1886 sltu AT,c_2,a2
1887 daddu c_3,AT
1888 daddu c_1,t_1
1889 sltu AT,c_1,t_1
1890 daddu t_2,AT
1891 daddu c_2,t_2
1892 sltu AT,c_2,t_2
1893 daddu c_3,AT
1894 sd c_1,72(a0)
1895
1896 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
1897 mflo t_1
1898 mfhi t_2
1899 daddu c_2,t_1
1900 sltu AT,c_2,t_1
1901 daddu a2,t_2,AT
1902 daddu c_3,a2
1903 daddu c_2,t_1
1904 sltu AT,c_2,t_1
1905 daddu t_2,AT
1906 daddu c_3,t_2
1907 sltu c_1,c_3,t_2
1908 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
1909 mflo t_1
1910 mfhi t_2
1911 daddu c_2,t_1
1912 sltu AT,c_2,t_1
1913 daddu a2,t_2,AT
1914 daddu c_3,a2
1915 sltu AT,c_3,a2
1916 daddu c_1,AT
1917 daddu c_2,t_1
1918 sltu AT,c_2,t_1
1919 daddu t_2,AT
1920 daddu c_3,t_2
1921 sltu AT,c_3,t_2
1922 daddu c_1,AT
1923 dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1924 mflo t_1
1925 mfhi t_2
1926 daddu c_2,t_1
1927 sltu AT,c_2,t_1
1928 daddu t_2,AT
1929 daddu c_3,t_2
1930 sltu AT,c_3,t_2
1931 daddu c_1,AT
1932 sd c_2,80(a0)
1933
1934 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
1935 mflo t_1
1936 mfhi t_2
1937 daddu c_3,t_1
1938 sltu AT,c_3,t_1
1939 daddu a2,t_2,AT
1940 daddu c_1,a2
1941 daddu c_3,t_1
1942 sltu AT,c_3,t_1
1943 daddu t_2,AT
1944 daddu c_1,t_2
1945 sltu c_2,c_1,t_2
1946 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
1947 mflo t_1
1948 mfhi t_2
1949 daddu c_3,t_1
1950 sltu AT,c_3,t_1
1951 daddu a2,t_2,AT
1952 daddu c_1,a2
1953 sltu AT,c_1,a2
1954 daddu c_2,AT
1955 daddu c_3,t_1
1956 sltu AT,c_3,t_1
1957 daddu t_2,AT
1958 daddu c_1,t_2
1959 sltu AT,c_1,t_2
1960 daddu c_2,AT
1961 sd c_3,88(a0)
1962
1963 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
1964 mflo t_1
1965 mfhi t_2
1966 daddu c_1,t_1
1967 sltu AT,c_1,t_1
1968 daddu a2,t_2,AT
1969 daddu c_2,a2
1970 daddu c_1,t_1
1971 sltu AT,c_1,t_1
1972 daddu t_2,AT
1973 daddu c_2,t_2
1974 sltu c_3,c_2,t_2
1975 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1976 mflo t_1
1977 mfhi t_2
1978 daddu c_1,t_1
1979 sltu AT,c_1,t_1
1980 daddu t_2,AT
1981 daddu c_2,t_2
1982 sltu AT,c_2,t_2
1983 daddu c_3,AT
1984 sd c_1,96(a0)
1985
1986 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
1987 mflo t_1
1988 mfhi t_2
1989 daddu c_2,t_1
1990 sltu AT,c_2,t_1
1991 daddu a2,t_2,AT
1992 daddu c_3,a2
1993 daddu c_2,t_1
1994 sltu AT,c_2,t_1
1995 daddu t_2,AT
1996 daddu c_3,t_2
1997 sltu c_1,c_3,t_2
1998 sd c_2,104(a0)
1999
2000 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
2001 mflo t_1
2002 mfhi t_2
2003 daddu c_3,t_1
2004 sltu AT,c_3,t_1
2005 daddu t_2,AT
2006 daddu c_1,t_2
2007 sd c_3,112(a0)
2008 sd c_1,120(a0)
2009
2010 jr ra
2011END(bn_sqr_comba8)
2012
2013.align 5
2014LEAF(bn_sqr_comba4)
2015 .set reorder
2016 ld a_0,0(a1)
2017 ld a_1,8(a1)
2018 ld a_2,16(a1)
2019 ld a_3,24(a1)
2020 dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
2021 mflo c_1
2022 mfhi c_2
2023 sd c_1,0(a0)
2024
2025 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2026 mflo t_1
2027 mfhi t_2
2028 daddu c_2,t_1
2029 sltu AT,c_2,t_1
2030 daddu c_3,t_2,AT
2031 daddu c_2,t_1
2032 sltu AT,c_2,t_1
2033 daddu t_2,AT
2034 daddu c_3,t_2
2035 sltu c_1,c_3,t_2
2036 sd c_2,8(a0)
2037
2038 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2039 mflo t_1
2040 mfhi t_2
2041 daddu c_3,t_1
2042 sltu AT,c_3,t_1
2043 daddu a2,t_2,AT
2044 daddu c_1,a2
2045 daddu c_3,t_1
2046 sltu AT,c_3,t_1
2047 daddu t_2,AT
2048 daddu c_1,t_2
2049 sltu c_2,c_1,t_2
2050 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2051 mflo t_1
2052 mfhi t_2
2053 daddu c_3,t_1
2054 sltu AT,c_3,t_1
2055 daddu t_2,AT
2056 daddu c_1,t_2
2057 sltu AT,c_1,t_2
2058 daddu c_2,AT
2059 sd c_3,16(a0)
2060
2061 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2062 mflo t_1
2063 mfhi t_2
2064 daddu c_1,t_1
2065 sltu AT,c_1,t_1
2066 daddu a2,t_2,AT
2067 daddu c_2,a2
2068 daddu c_1,t_1
2069 sltu AT,c_1,t_1
2070 daddu t_2,AT
2071 daddu c_2,t_2
2072 sltu c_3,c_2,t_2
2073 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2074 mflo t_1
2075 mfhi t_2
2076 daddu c_1,t_1
2077 sltu AT,c_1,t_1
2078 daddu a2,t_2,AT
2079 daddu c_2,a2
2080 sltu AT,c_2,a2
2081 daddu c_3,AT
2082 daddu c_1,t_1
2083 sltu AT,c_1,t_1
2084 daddu t_2,AT
2085 daddu c_2,t_2
2086 sltu AT,c_2,t_2
2087 daddu c_3,AT
2088 sd c_1,24(a0)
2089
2090 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2091 mflo t_1
2092 mfhi t_2
2093 daddu c_2,t_1
2094 sltu AT,c_2,t_1
2095 daddu a2,t_2,AT
2096 daddu c_3,a2
2097 daddu c_2,t_1
2098 sltu AT,c_2,t_1
2099 daddu t_2,AT
2100 daddu c_3,t_2
2101 sltu c_1,c_3,t_2
2102 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2103 mflo t_1
2104 mfhi t_2
2105 daddu c_2,t_1
2106 sltu AT,c_2,t_1
2107 daddu t_2,AT
2108 daddu c_3,t_2
2109 sltu AT,c_3,t_2
2110 daddu c_1,AT
2111 sd c_2,32(a0)
2112
2113 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2114 mflo t_1
2115 mfhi t_2
2116 daddu c_3,t_1
2117 sltu AT,c_3,t_1
2118 daddu a2,t_2,AT
2119 daddu c_1,a2
2120 daddu c_3,t_1
2121 sltu AT,c_3,t_1
2122 daddu t_2,AT
2123 daddu c_1,t_2
2124 sltu c_2,c_1,t_2
2125 sd c_3,40(a0)
2126
2127 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
2128 mflo t_1
2129 mfhi t_2
2130 daddu c_1,t_1
2131 sltu AT,c_1,t_1
2132 daddu t_2,AT
2133 daddu c_2,t_2
2134 sd c_1,48(a0)
2135 sd c_2,56(a0)
2136
2137 jr ra
2138END(bn_sqr_comba4)