summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/mips3.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/mips3.s')
-rw-r--r--src/lib/libcrypto/bn/asm/mips3.s573
1 files changed, 318 insertions, 255 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s
index 191345d920..dca4105c7d 100644
--- a/src/lib/libcrypto/bn/asm/mips3.s
+++ b/src/lib/libcrypto/bn/asm/mips3.s
@@ -1,5 +1,5 @@
1.rdata 1.rdata
2.asciiz "mips3.s, Version 1.0" 2.asciiz "mips3.s, Version 1.1"
3.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 3.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
4 4
5/* 5/*
@@ -395,32 +395,32 @@ LEAF(bn_add_words)
395 395
396.L_bn_add_words_loop: 396.L_bn_add_words_loop:
397 ld ta0,0(a2) 397 ld ta0,0(a2)
398 subu a3,4
398 ld t1,8(a1) 399 ld t1,8(a1)
399 ld ta1,8(a2) 400 and AT,a3,MINUS4
400 ld t2,16(a1) 401 ld t2,16(a1)
401 ld ta2,16(a2) 402 PTR_ADD a2,32
402 ld t3,24(a1) 403 ld t3,24(a1)
403 ld ta3,24(a2) 404 PTR_ADD a0,32
405 ld ta1,-24(a2)
406 PTR_ADD a1,32
407 ld ta2,-16(a2)
408 ld ta3,-8(a2)
404 daddu ta0,t0 409 daddu ta0,t0
405 subu a3,4
406 sltu t8,ta0,t0 410 sltu t8,ta0,t0
407 daddu t0,ta0,v0 411 daddu t0,ta0,v0
408 PTR_ADD a0,32
409 sltu v0,t0,ta0 412 sltu v0,t0,ta0
410 sd t0,-32(a0) 413 sd t0,-32(a0)
411 daddu v0,t8 414 daddu v0,t8
412 415
413 daddu ta1,t1 416 daddu ta1,t1
414 PTR_ADD a1,32
415 sltu t9,ta1,t1 417 sltu t9,ta1,t1
416 daddu t1,ta1,v0 418 daddu t1,ta1,v0
417 PTR_ADD a2,32
418 sltu v0,t1,ta1 419 sltu v0,t1,ta1
419 sd t1,-24(a0) 420 sd t1,-24(a0)
420 daddu v0,t9 421 daddu v0,t9
421 422
422 daddu ta2,t2 423 daddu ta2,t2
423 and AT,a3,MINUS4
424 sltu t8,ta2,t2 424 sltu t8,ta2,t2
425 daddu t2,ta2,v0 425 daddu t2,ta2,v0
426 sltu v0,t2,ta2 426 sltu v0,t2,ta2
@@ -495,25 +495,26 @@ LEAF(bn_sub_words)
495 495
496.L_bn_sub_words_loop: 496.L_bn_sub_words_loop:
497 ld ta0,0(a2) 497 ld ta0,0(a2)
498 subu a3,4
498 ld t1,8(a1) 499 ld t1,8(a1)
499 ld ta1,8(a2) 500 and AT,a3,MINUS4
500 ld t2,16(a1) 501 ld t2,16(a1)
501 ld ta2,16(a2) 502 PTR_ADD a2,32
502 ld t3,24(a1) 503 ld t3,24(a1)
503 ld ta3,24(a2) 504 PTR_ADD a0,32
505 ld ta1,-24(a2)
506 PTR_ADD a1,32
507 ld ta2,-16(a2)
508 ld ta3,-8(a2)
504 sltu t8,t0,ta0 509 sltu t8,t0,ta0
505 dsubu t0,ta0 510 dsubu t0,ta0
506 subu a3,4
507 dsubu ta0,t0,v0 511 dsubu ta0,t0,v0
508 and AT,a3,MINUS4 512 sd ta0,-32(a0)
509 sd ta0,0(a0)
510 MOVNZ (t0,v0,t8) 513 MOVNZ (t0,v0,t8)
511 514
512 sltu t9,t1,ta1 515 sltu t9,t1,ta1
513 dsubu t1,ta1 516 dsubu t1,ta1
514 PTR_ADD a0,32
515 dsubu ta1,t1,v0 517 dsubu ta1,t1,v0
516 PTR_ADD a1,32
517 sd ta1,-24(a0) 518 sd ta1,-24(a0)
518 MOVNZ (t1,v0,t9) 519 MOVNZ (t1,v0,t9)
519 520
@@ -521,7 +522,6 @@ LEAF(bn_sub_words)
521 sltu t8,t2,ta2 522 sltu t8,t2,ta2
522 dsubu t2,ta2 523 dsubu t2,ta2
523 dsubu ta2,t2,v0 524 dsubu ta2,t2,v0
524 PTR_ADD a2,32
525 sd ta2,-16(a0) 525 sd ta2,-16(a0)
526 MOVNZ (t2,v0,t8) 526 MOVNZ (t2,v0,t8)
527 527
@@ -574,6 +574,51 @@ END(bn_sub_words)
574 574
575#undef MINUS4 575#undef MINUS4
576 576
577.align 5
578LEAF(bn_div_3_words)
579 .set reorder
580 move a3,a0 /* we know that bn_div_words doesn't
581 * touch a3, ta2, ta3 and preserves a2
582 * so that we can save two arguments
583 * and return address in registers
584 * instead of stack:-)
585 */
586 ld a0,(a3)
587 move ta2,a1
588 ld a1,-8(a3)
589 bne a0,a2,.L_bn_div_3_words_proceed
590 li v0,-1
591 jr ra
592.L_bn_div_3_words_proceed:
593 move ta3,ra
594 bal bn_div_words
595 move ra,ta3
596 dmultu ta2,v0
597 ld t2,-16(a3)
598 move ta0,zero
599 mfhi t1
600 mflo t0
601 sltu t8,t1,v1
602.L_bn_div_3_words_inner_loop:
603 bnez t8,.L_bn_div_3_words_inner_loop_done
604 sgeu AT,t2,t0
605 seq t9,t1,v1
606 and AT,t9
607 sltu t3,t0,ta2
608 daddu v1,a2
609 dsubu t1,t3
610 dsubu t0,ta2
611 sltu t8,t1,v1
612 sltu ta0,v1,a2
613 or t8,ta0
614 .set noreorder
615 beqzl AT,.L_bn_div_3_words_inner_loop
616 dsubu v0,1
617 .set reorder
618.L_bn_div_3_words_inner_loop_done:
619 jr ra
620END(bn_div_3_words)
621
577.align 5 622.align 5
578LEAF(bn_div_words) 623LEAF(bn_div_words)
579 .set noreorder 624 .set noreorder
@@ -633,16 +678,16 @@ LEAF(bn_div_words)
633 seq t8,HH,t1 678 seq t8,HH,t1
634 sltu AT,HH,t1 679 sltu AT,HH,t1
635 and t2,t8 680 and t2,t8
681 sltu v0,t0,a2
636 or AT,t2 682 or AT,t2
637 .set noreorder 683 .set noreorder
638 beqz AT,.L_bn_div_words_inner_loop1_done 684 beqz AT,.L_bn_div_words_inner_loop1_done
639 sltu t2,t0,a2 685 dsubu t1,v0
640 .set reorder
641 dsubu QT,1
642 dsubu t0,a2 686 dsubu t0,a2
643 dsubu t1,t2
644 b .L_bn_div_words_inner_loop1 687 b .L_bn_div_words_inner_loop1
645.L_bn_div_words_inner_loop1_done: 688 dsubu QT,1
689 .set reorder
690.L_bn_div_words_inner_loop1_done:
646 691
647 dsll a1,32 692 dsll a1,32
648 dsubu a0,t3,t0 693 dsubu a0,t3,t0
@@ -655,6 +700,7 @@ LEAF(bn_div_words)
655 ddivu zero,a0,DH 700 ddivu zero,a0,DH
656 mflo QT 701 mflo QT
657.L_bn_div_words_skip_div2: 702.L_bn_div_words_skip_div2:
703#undef DH
658 dmultu a2,QT 704 dmultu a2,QT
659 dsll t3,a0,32 705 dsll t3,a0,32
660 dsrl AT,a1,32 706 dsrl AT,a1,32
@@ -666,69 +712,26 @@ LEAF(bn_div_words)
666 seq t8,HH,t1 712 seq t8,HH,t1
667 sltu AT,HH,t1 713 sltu AT,HH,t1
668 and t2,t8 714 and t2,t8
715 sltu v1,t0,a2
669 or AT,t2 716 or AT,t2
670 .set noreorder 717 .set noreorder
671 beqz AT,.L_bn_div_words_inner_loop2_done 718 beqz AT,.L_bn_div_words_inner_loop2_done
672 sltu t2,t0,a2 719 dsubu t1,v1
673 .set reorder
674 dsubu QT,1
675 dsubu t0,a2 720 dsubu t0,a2
676 dsubu t1,t2
677 b .L_bn_div_words_inner_loop2 721 b .L_bn_div_words_inner_loop2
722 dsubu QT,1
723 .set reorder
678.L_bn_div_words_inner_loop2_done: 724.L_bn_div_words_inner_loop2_done:
725#undef HH
679 726
680 dsubu a0,t3,t0 727 dsubu a0,t3,t0
681 or v0,QT 728 or v0,QT
682 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ 729 dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
683 dsrl a2,t9 /* restore a2 */ 730 dsrl a2,t9 /* restore a2 */
684 jr ra 731 jr ra
685#undef HH
686#undef DH
687#undef QT 732#undef QT
688END(bn_div_words) 733END(bn_div_words)
689 734
690.align 5
691LEAF(bn_div_3_words)
692 .set reorder
693 move a3,a0 /* we know that bn_div_words doesn't
694 * touch a3, ta2, ta3 and preserves a2
695 * so that we can save two arguments
696 * and return address in registers
697 * instead of stack:-)
698 */
699 ld a0,(a3)
700 move ta2,a2
701 move a2,a1
702 ld a1,-8(a3)
703 move ta3,ra
704 move v1,zero
705 li v0,-1
706 beq a0,a2,.L_bn_div_3_words_skip_div
707 jal bn_div_words
708 move ra,ta3
709.L_bn_div_3_words_skip_div:
710 dmultu ta2,v0
711 ld t2,-16(a3)
712 mflo t0
713 mfhi t1
714.L_bn_div_3_words_inner_loop:
715 sgeu AT,t2,t0
716 seq t9,t1,v1
717 sltu t8,t1,v1
718 and AT,t9
719 or AT,t8
720 bnez AT,.L_bn_div_3_words_inner_loop_done
721 daddu v1,a2
722 sltu t3,t0,ta2
723 sltu AT,v1,a2
724 dsubu v0,1
725 dsubu t0,ta2
726 dsubu t1,t3
727 beqz AT,.L_bn_div_3_words_inner_loop
728.L_bn_div_3_words_inner_loop_done:
729 jr ra
730END(bn_div_3_words)
731
732#define a_0 t0 735#define a_0 t0
733#define a_1 t1 736#define a_1 t1
734#define a_2 t2 737#define a_2 t2
@@ -846,6 +849,7 @@ LEAF(bn_mul_comba8)
846 sltu AT,c_1,t_1 849 sltu AT,c_1,t_1
847 daddu t_2,AT 850 daddu t_2,AT
848 daddu c_2,t_2 851 daddu c_2,t_2
852 sltu c_3,c_2,t_2
849 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ 853 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
850 mflo t_1 854 mflo t_1
851 mfhi t_2 855 mfhi t_2
@@ -853,7 +857,8 @@ LEAF(bn_mul_comba8)
853 sltu AT,c_1,t_1 857 sltu AT,c_1,t_1
854 daddu t_2,AT 858 daddu t_2,AT
855 daddu c_2,t_2 859 daddu c_2,t_2
856 sltu c_3,c_2,t_2 860 sltu AT,c_2,t_2
861 daddu c_3,AT
857 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ 862 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
858 mflo t_1 863 mflo t_1
859 mfhi t_2 864 mfhi t_2
@@ -881,6 +886,7 @@ LEAF(bn_mul_comba8)
881 sltu AT,c_2,t_1 886 sltu AT,c_2,t_1
882 daddu t_2,AT 887 daddu t_2,AT
883 daddu c_3,t_2 888 daddu c_3,t_2
889 sltu c_1,c_3,t_2
884 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ 890 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
885 mflo t_1 891 mflo t_1
886 mfhi t_2 892 mfhi t_2
@@ -888,7 +894,8 @@ LEAF(bn_mul_comba8)
888 sltu AT,c_2,t_1 894 sltu AT,c_2,t_1
889 daddu t_2,AT 895 daddu t_2,AT
890 daddu c_3,t_2 896 daddu c_3,t_2
891 sltu c_1,c_3,t_2 897 sltu AT,c_3,t_2
898 daddu c_1,AT
892 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ 899 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
893 mflo t_1 900 mflo t_1
894 mfhi t_2 901 mfhi t_2
@@ -925,6 +932,7 @@ LEAF(bn_mul_comba8)
925 sltu AT,c_3,t_1 932 sltu AT,c_3,t_1
926 daddu t_2,AT 933 daddu t_2,AT
927 daddu c_1,t_2 934 daddu c_1,t_2
935 sltu c_2,c_1,t_2
928 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ 936 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
929 mflo t_1 937 mflo t_1
930 mfhi t_2 938 mfhi t_2
@@ -932,7 +940,8 @@ LEAF(bn_mul_comba8)
932 sltu AT,c_3,t_1 940 sltu AT,c_3,t_1
933 daddu t_2,AT 941 daddu t_2,AT
934 daddu c_1,t_2 942 daddu c_1,t_2
935 sltu c_2,c_1,t_2 943 sltu AT,c_1,t_2
944 daddu c_2,AT
936 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ 945 dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
937 mflo t_1 946 mflo t_1
938 mfhi t_2 947 mfhi t_2
@@ -978,6 +987,7 @@ LEAF(bn_mul_comba8)
978 sltu AT,c_1,t_1 987 sltu AT,c_1,t_1
979 daddu t_2,AT 988 daddu t_2,AT
980 daddu c_2,t_2 989 daddu c_2,t_2
990 sltu c_3,c_2,t_2
981 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ 991 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
982 mflo t_1 992 mflo t_1
983 mfhi t_2 993 mfhi t_2
@@ -985,7 +995,8 @@ LEAF(bn_mul_comba8)
985 sltu AT,c_1,t_1 995 sltu AT,c_1,t_1
986 daddu t_2,AT 996 daddu t_2,AT
987 daddu c_2,t_2 997 daddu c_2,t_2
988 sltu c_3,c_2,t_2 998 sltu AT,c_2,t_2
999 daddu c_3,AT
989 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ 1000 dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
990 mflo t_1 1001 mflo t_1
991 mfhi t_2 1002 mfhi t_2
@@ -1040,6 +1051,7 @@ LEAF(bn_mul_comba8)
1040 sltu AT,c_2,t_1 1051 sltu AT,c_2,t_1
1041 daddu t_2,AT 1052 daddu t_2,AT
1042 daddu c_3,t_2 1053 daddu c_3,t_2
1054 sltu c_1,c_3,t_2
1043 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ 1055 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
1044 mflo t_1 1056 mflo t_1
1045 mfhi t_2 1057 mfhi t_2
@@ -1047,7 +1059,8 @@ LEAF(bn_mul_comba8)
1047 sltu AT,c_2,t_1 1059 sltu AT,c_2,t_1
1048 daddu t_2,AT 1060 daddu t_2,AT
1049 daddu c_3,t_2 1061 daddu c_3,t_2
1050 sltu c_1,c_3,t_2 1062 sltu AT,c_3,t_2
1063 daddu c_1,AT
1051 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ 1064 dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
1052 mflo t_1 1065 mflo t_1
1053 mfhi t_2 1066 mfhi t_2
@@ -1111,6 +1124,7 @@ LEAF(bn_mul_comba8)
1111 sltu AT,c_3,t_1 1124 sltu AT,c_3,t_1
1112 daddu t_2,AT 1125 daddu t_2,AT
1113 daddu c_1,t_2 1126 daddu c_1,t_2
1127 sltu c_2,c_1,t_2
1114 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ 1128 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
1115 mflo t_1 1129 mflo t_1
1116 mfhi t_2 1130 mfhi t_2
@@ -1118,7 +1132,8 @@ LEAF(bn_mul_comba8)
1118 sltu AT,c_3,t_1 1132 sltu AT,c_3,t_1
1119 daddu t_2,AT 1133 daddu t_2,AT
1120 daddu c_1,t_2 1134 daddu c_1,t_2
1121 sltu c_2,c_1,t_2 1135 sltu AT,c_1,t_2
1136 daddu c_2,AT
1122 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ 1137 dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
1123 mflo t_1 1138 mflo t_1
1124 mfhi t_2 1139 mfhi t_2
@@ -1173,6 +1188,7 @@ LEAF(bn_mul_comba8)
1173 sltu AT,c_1,t_1 1188 sltu AT,c_1,t_1
1174 daddu t_2,AT 1189 daddu t_2,AT
1175 daddu c_2,t_2 1190 daddu c_2,t_2
1191 sltu c_3,c_2,t_2
1176 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ 1192 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
1177 mflo t_1 1193 mflo t_1
1178 mfhi t_2 1194 mfhi t_2
@@ -1180,7 +1196,8 @@ LEAF(bn_mul_comba8)
1180 sltu AT,c_1,t_1 1196 sltu AT,c_1,t_1
1181 daddu t_2,AT 1197 daddu t_2,AT
1182 daddu c_2,t_2 1198 daddu c_2,t_2
1183 sltu c_3,c_2,t_2 1199 sltu AT,c_2,t_2
1200 daddu c_3,AT
1184 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ 1201 dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
1185 mflo t_1 1202 mflo t_1
1186 mfhi t_2 1203 mfhi t_2
@@ -1226,6 +1243,7 @@ LEAF(bn_mul_comba8)
1226 sltu AT,c_2,t_1 1243 sltu AT,c_2,t_1
1227 daddu t_2,AT 1244 daddu t_2,AT
1228 daddu c_3,t_2 1245 daddu c_3,t_2
1246 sltu c_1,c_3,t_2
1229 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ 1247 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
1230 mflo t_1 1248 mflo t_1
1231 mfhi t_2 1249 mfhi t_2
@@ -1233,7 +1251,8 @@ LEAF(bn_mul_comba8)
1233 sltu AT,c_2,t_1 1251 sltu AT,c_2,t_1
1234 daddu t_2,AT 1252 daddu t_2,AT
1235 daddu c_3,t_2 1253 daddu c_3,t_2
1236 sltu c_1,c_3,t_2 1254 sltu AT,c_3,t_2
1255 daddu c_1,AT
1237 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ 1256 dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
1238 mflo t_1 1257 mflo t_1
1239 mfhi t_2 1258 mfhi t_2
@@ -1270,6 +1289,7 @@ LEAF(bn_mul_comba8)
1270 sltu AT,c_3,t_1 1289 sltu AT,c_3,t_1
1271 daddu t_2,AT 1290 daddu t_2,AT
1272 daddu c_1,t_2 1291 daddu c_1,t_2
1292 sltu c_2,c_1,t_2
1273 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ 1293 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
1274 mflo t_1 1294 mflo t_1
1275 mfhi t_2 1295 mfhi t_2
@@ -1277,7 +1297,8 @@ LEAF(bn_mul_comba8)
1277 sltu AT,c_3,t_1 1297 sltu AT,c_3,t_1
1278 daddu t_2,AT 1298 daddu t_2,AT
1279 daddu c_1,t_2 1299 daddu c_1,t_2
1280 sltu c_2,c_1,t_2 1300 sltu AT,c_1,t_2
1301 daddu c_2,AT
1281 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ 1302 dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
1282 mflo t_1 1303 mflo t_1
1283 mfhi t_2 1304 mfhi t_2
@@ -1305,6 +1326,7 @@ LEAF(bn_mul_comba8)
1305 sltu AT,c_1,t_1 1326 sltu AT,c_1,t_1
1306 daddu t_2,AT 1327 daddu t_2,AT
1307 daddu c_2,t_2 1328 daddu c_2,t_2
1329 sltu c_3,c_2,t_2
1308 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ 1330 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1309 mflo t_1 1331 mflo t_1
1310 mfhi t_2 1332 mfhi t_2
@@ -1312,7 +1334,8 @@ LEAF(bn_mul_comba8)
1312 sltu AT,c_1,t_1 1334 sltu AT,c_1,t_1
1313 daddu t_2,AT 1335 daddu t_2,AT
1314 daddu c_2,t_2 1336 daddu c_2,t_2
1315 sltu c_3,c_2,t_2 1337 sltu AT,c_2,t_2
1338 daddu c_3,AT
1316 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ 1339 dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
1317 mflo t_1 1340 mflo t_1
1318 mfhi t_2 1341 mfhi t_2
@@ -1331,6 +1354,7 @@ LEAF(bn_mul_comba8)
1331 sltu AT,c_2,t_1 1354 sltu AT,c_2,t_1
1332 daddu t_2,AT 1355 daddu t_2,AT
1333 daddu c_3,t_2 1356 daddu c_3,t_2
1357 sltu c_1,c_3,t_2
1334 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ 1358 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
1335 mflo t_1 1359 mflo t_1
1336 mfhi t_2 1360 mfhi t_2
@@ -1338,7 +1362,8 @@ LEAF(bn_mul_comba8)
1338 sltu AT,c_2,t_1 1362 sltu AT,c_2,t_1
1339 daddu t_2,AT 1363 daddu t_2,AT
1340 daddu c_3,t_2 1364 daddu c_3,t_2
1341 sltu c_1,c_3,t_2 1365 sltu AT,c_3,t_2
1366 daddu c_1,AT
1342 sd c_2,104(a0) /* r[13]=c2; */ 1367 sd c_2,104(a0) /* r[13]=c2; */
1343 1368
1344 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ 1369 dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
@@ -1427,6 +1452,7 @@ LEAF(bn_mul_comba4)
1427 sltu AT,c_1,t_1 1452 sltu AT,c_1,t_1
1428 daddu t_2,AT 1453 daddu t_2,AT
1429 daddu c_2,t_2 1454 daddu c_2,t_2
1455 sltu c_3,c_2,t_2
1430 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ 1456 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
1431 mflo t_1 1457 mflo t_1
1432 mfhi t_2 1458 mfhi t_2
@@ -1434,7 +1460,8 @@ LEAF(bn_mul_comba4)
1434 sltu AT,c_1,t_1 1460 sltu AT,c_1,t_1
1435 daddu t_2,AT 1461 daddu t_2,AT
1436 daddu c_2,t_2 1462 daddu c_2,t_2
1437 sltu c_3,c_2,t_2 1463 sltu AT,c_2,t_2
1464 daddu c_3,AT
1438 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ 1465 dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
1439 mflo t_1 1466 mflo t_1
1440 mfhi t_2 1467 mfhi t_2
@@ -1462,6 +1489,7 @@ LEAF(bn_mul_comba4)
1462 sltu AT,c_2,t_1 1489 sltu AT,c_2,t_1
1463 daddu t_2,AT 1490 daddu t_2,AT
1464 daddu c_3,t_2 1491 daddu c_3,t_2
1492 sltu c_1,c_3,t_2
1465 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ 1493 dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
1466 mflo t_1 1494 mflo t_1
1467 mfhi t_2 1495 mfhi t_2
@@ -1469,7 +1497,8 @@ LEAF(bn_mul_comba4)
1469 sltu AT,c_2,t_1 1497 sltu AT,c_2,t_1
1470 daddu t_2,AT 1498 daddu t_2,AT
1471 daddu c_3,t_2 1499 daddu c_3,t_2
1472 sltu c_1,c_3,t_2 1500 sltu AT,c_3,t_2
1501 daddu c_1,AT
1473 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ 1502 dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
1474 mflo t_1 1503 mflo t_1
1475 mfhi t_2 1504 mfhi t_2
@@ -1488,6 +1517,7 @@ LEAF(bn_mul_comba4)
1488 sltu AT,c_3,t_1 1517 sltu AT,c_3,t_1
1489 daddu t_2,AT 1518 daddu t_2,AT
1490 daddu c_1,t_2 1519 daddu c_1,t_2
1520 sltu c_2,c_1,t_2
1491 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ 1521 dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
1492 mflo t_1 1522 mflo t_1
1493 mfhi t_2 1523 mfhi t_2
@@ -1495,7 +1525,8 @@ LEAF(bn_mul_comba4)
1495 sltu AT,c_3,t_1 1525 sltu AT,c_3,t_1
1496 daddu t_2,AT 1526 daddu t_2,AT
1497 daddu c_1,t_2 1527 daddu c_1,t_2
1498 sltu c_2,c_1,t_2 1528 sltu AT,c_1,t_2
1529 daddu c_2,AT
1499 sd c_3,40(a0) 1530 sd c_3,40(a0)
1500 1531
1501 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ 1532 dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
@@ -1540,28 +1571,30 @@ LEAF(bn_sqr_comba8)
1540 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ 1571 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
1541 mflo t_1 1572 mflo t_1
1542 mfhi t_2 1573 mfhi t_2
1574 slt c_1,t_2,zero
1575 dsll t_2,1
1576 slt a2,t_1,zero
1577 daddu t_2,a2
1578 dsll t_1,1
1543 daddu c_2,t_1 1579 daddu c_2,t_1
1544 sltu AT,c_2,t_1 1580 sltu AT,c_2,t_1
1545 daddu c_3,t_2,AT 1581 daddu c_3,t_2,AT
1546 daddu c_2,t_1
1547 sltu AT,c_2,t_1
1548 daddu t_2,AT
1549 daddu c_3,t_2
1550 sltu c_1,c_3,t_2
1551 sd c_2,8(a0) 1582 sd c_2,8(a0)
1552 1583
1553 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ 1584 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
1554 mflo t_1 1585 mflo t_1
1555 mfhi t_2 1586 mfhi t_2
1556 daddu c_3,t_1 1587 slt c_2,t_2,zero
1557 sltu AT,c_3,t_1 1588 dsll t_2,1
1558 daddu a2,t_2,AT 1589 slt a2,t_1,zero
1559 daddu c_1,a2 1590 daddu t_2,a2
1591 dsll t_1,1
1560 daddu c_3,t_1 1592 daddu c_3,t_1
1561 sltu AT,c_3,t_1 1593 sltu AT,c_3,t_1
1562 daddu t_2,AT 1594 daddu t_2,AT
1563 daddu c_1,t_2 1595 daddu c_1,t_2
1564 sltu c_2,c_1,t_2 1596 sltu AT,c_1,t_2
1597 daddu c_2,AT
1565 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ 1598 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
1566 mflo t_1 1599 mflo t_1
1567 mfhi t_2 1600 mfhi t_2
@@ -1576,24 +1609,26 @@ LEAF(bn_sqr_comba8)
1576 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ 1609 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
1577 mflo t_1 1610 mflo t_1
1578 mfhi t_2 1611 mfhi t_2
1579 daddu c_1,t_1 1612 slt c_3,t_2,zero
1580 sltu AT,c_1,t_1 1613 dsll t_2,1
1581 daddu a2,t_2,AT 1614 slt a2,t_1,zero
1582 daddu c_2,a2 1615 daddu t_2,a2
1616 dsll t_1,1
1583 daddu c_1,t_1 1617 daddu c_1,t_1
1584 sltu AT,c_1,t_1 1618 sltu AT,c_1,t_1
1585 daddu t_2,AT 1619 daddu t_2,AT
1586 daddu c_2,t_2 1620 daddu c_2,t_2
1587 sltu c_3,c_2,t_2 1621 sltu AT,c_2,t_2
1622 daddu c_3,AT
1588 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ 1623 dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
1589 mflo t_1 1624 mflo t_1
1590 mfhi t_2 1625 mfhi t_2
1591 daddu c_1,t_1 1626 slt AT,t_2,zero
1592 sltu AT,c_1,t_1
1593 daddu a2,t_2,AT
1594 daddu c_2,a2
1595 sltu AT,c_2,a2
1596 daddu c_3,AT 1627 daddu c_3,AT
1628 dsll t_2,1
1629 slt a2,t_1,zero
1630 daddu t_2,a2
1631 dsll t_1,1
1597 daddu c_1,t_1 1632 daddu c_1,t_1
1598 sltu AT,c_1,t_1 1633 sltu AT,c_1,t_1
1599 daddu t_2,AT 1634 daddu t_2,AT
@@ -1605,24 +1640,26 @@ LEAF(bn_sqr_comba8)
1605 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ 1640 dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
1606 mflo t_1 1641 mflo t_1
1607 mfhi t_2 1642 mfhi t_2
1608 daddu c_2,t_1 1643 slt c_1,t_2,zero
1609 sltu AT,c_2,t_1 1644 dsll t_2,1
1610 daddu a2,t_2,AT 1645 slt a2,t_1,zero
1611 daddu c_3,a2 1646 daddu t_2,a2
1647 dsll t_1,1
1612 daddu c_2,t_1 1648 daddu c_2,t_1
1613 sltu AT,c_2,t_1 1649 sltu AT,c_2,t_1
1614 daddu t_2,AT 1650 daddu t_2,AT
1615 daddu c_3,t_2 1651 daddu c_3,t_2
1616 sltu c_1,c_3,t_2 1652 sltu AT,c_3,t_2
1653 daddu c_1,AT
1617 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ 1654 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
1618 mflo t_1 1655 mflo t_1
1619 mfhi t_2 1656 mfhi t_2
1620 daddu c_2,t_1 1657 slt AT,t_2,zero
1621 sltu AT,c_2,t_1
1622 daddu a2,t_2,AT
1623 daddu c_3,a2
1624 sltu AT,c_3,a2
1625 daddu c_1,AT 1658 daddu c_1,AT
1659 dsll t_2,1
1660 slt a2,t_1,zero
1661 daddu t_2,a2
1662 dsll t_1,1
1626 daddu c_2,t_1 1663 daddu c_2,t_1
1627 sltu AT,c_2,t_1 1664 sltu AT,c_2,t_1
1628 daddu t_2,AT 1665 daddu t_2,AT
@@ -1643,24 +1680,26 @@ LEAF(bn_sqr_comba8)
1643 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ 1680 dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
1644 mflo t_1 1681 mflo t_1
1645 mfhi t_2 1682 mfhi t_2
1646 daddu c_3,t_1 1683 slt c_2,t_2,zero
1647 sltu AT,c_3,t_1 1684 dsll t_2,1
1648 daddu a2,t_2,AT 1685 slt a2,t_1,zero
1649 daddu c_1,a2 1686 daddu t_2,a2
1687 dsll t_1,1
1650 daddu c_3,t_1 1688 daddu c_3,t_1
1651 sltu AT,c_3,t_1 1689 sltu AT,c_3,t_1
1652 daddu t_2,AT 1690 daddu t_2,AT
1653 daddu c_1,t_2 1691 daddu c_1,t_2
1654 sltu c_2,c_1,t_2 1692 sltu AT,c_1,t_2
1693 daddu c_2,AT
1655 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ 1694 dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
1656 mflo t_1 1695 mflo t_1
1657 mfhi t_2 1696 mfhi t_2
1658 daddu c_3,t_1 1697 slt AT,t_2,zero
1659 sltu AT,c_3,t_1
1660 daddu a2,t_2,AT
1661 daddu c_1,a2
1662 sltu AT,c_1,a2
1663 daddu c_2,AT 1698 daddu c_2,AT
1699 dsll t_2,1
1700 slt a2,t_1,zero
1701 daddu t_2,a2
1702 dsll t_1,1
1664 daddu c_3,t_1 1703 daddu c_3,t_1
1665 sltu AT,c_3,t_1 1704 sltu AT,c_3,t_1
1666 daddu t_2,AT 1705 daddu t_2,AT
@@ -1670,12 +1709,12 @@ LEAF(bn_sqr_comba8)
1670 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ 1709 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
1671 mflo t_1 1710 mflo t_1
1672 mfhi t_2 1711 mfhi t_2
1673 daddu c_3,t_1 1712 slt AT,t_2,zero
1674 sltu AT,c_3,t_1
1675 daddu a2,t_2,AT
1676 daddu c_1,a2
1677 sltu AT,c_1,a2
1678 daddu c_2,AT 1713 daddu c_2,AT
1714 dsll t_2,1
1715 slt a2,t_1,zero
1716 daddu t_2,a2
1717 dsll t_1,1
1679 daddu c_3,t_1 1718 daddu c_3,t_1
1680 sltu AT,c_3,t_1 1719 sltu AT,c_3,t_1
1681 daddu t_2,AT 1720 daddu t_2,AT
@@ -1687,24 +1726,26 @@ LEAF(bn_sqr_comba8)
1687 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ 1726 dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
1688 mflo t_1 1727 mflo t_1
1689 mfhi t_2 1728 mfhi t_2
1690 daddu c_1,t_1 1729 slt c_3,t_2,zero
1691 sltu AT,c_1,t_1 1730 dsll t_2,1
1692 daddu a2,t_2,AT 1731 slt a2,t_1,zero
1693 daddu c_2,a2 1732 daddu t_2,a2
1733 dsll t_1,1
1694 daddu c_1,t_1 1734 daddu c_1,t_1
1695 sltu AT,c_1,t_1 1735 sltu AT,c_1,t_1
1696 daddu t_2,AT 1736 daddu t_2,AT
1697 daddu c_2,t_2 1737 daddu c_2,t_2
1698 sltu c_3,c_2,t_2 1738 sltu AT,c_2,t_2
1739 daddu c_3,AT
1699 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ 1740 dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
1700 mflo t_1 1741 mflo t_1
1701 mfhi t_2 1742 mfhi t_2
1702 daddu c_1,t_1 1743 slt AT,t_2,zero
1703 sltu AT,c_1,t_1
1704 daddu a2,t_2,AT
1705 daddu c_2,a2
1706 sltu AT,c_2,a2
1707 daddu c_3,AT 1744 daddu c_3,AT
1745 dsll t_2,1
1746 slt a2,t_1,zero
1747 daddu t_2,a2
1748 dsll t_1,1
1708 daddu c_1,t_1 1749 daddu c_1,t_1
1709 sltu AT,c_1,t_1 1750 sltu AT,c_1,t_1
1710 daddu t_2,AT 1751 daddu t_2,AT
@@ -1714,12 +1755,12 @@ LEAF(bn_sqr_comba8)
1714 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ 1755 dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
1715 mflo t_1 1756 mflo t_1
1716 mfhi t_2 1757 mfhi t_2
1717 daddu c_1,t_1 1758 slt AT,t_2,zero
1718 sltu AT,c_1,t_1
1719 daddu a2,t_2,AT
1720 daddu c_2,a2
1721 sltu AT,c_2,a2
1722 daddu c_3,AT 1759 daddu c_3,AT
1760 dsll t_2,1
1761 slt a2,t_1,zero
1762 daddu t_2,a2
1763 dsll t_1,1
1723 daddu c_1,t_1 1764 daddu c_1,t_1
1724 sltu AT,c_1,t_1 1765 sltu AT,c_1,t_1
1725 daddu t_2,AT 1766 daddu t_2,AT
@@ -1740,24 +1781,26 @@ LEAF(bn_sqr_comba8)
1740 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ 1781 dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
1741 mflo t_1 1782 mflo t_1
1742 mfhi t_2 1783 mfhi t_2
1743 daddu c_2,t_1 1784 slt c_1,t_2,zero
1744 sltu AT,c_2,t_1 1785 dsll t_2,1
1745 daddu a2,t_2,AT 1786 slt a2,t_1,zero
1746 daddu c_3,a2 1787 daddu t_2,a2
1788 dsll t_1,1
1747 daddu c_2,t_1 1789 daddu c_2,t_1
1748 sltu AT,c_2,t_1 1790 sltu AT,c_2,t_1
1749 daddu t_2,AT 1791 daddu t_2,AT
1750 daddu c_3,t_2 1792 daddu c_3,t_2
1751 sltu c_1,c_3,t_2 1793 sltu AT,c_3,t_2
1794 daddu c_1,AT
1752 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ 1795 dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
1753 mflo t_1 1796 mflo t_1
1754 mfhi t_2 1797 mfhi t_2
1755 daddu c_2,t_1 1798 slt AT,t_2,zero
1756 sltu AT,c_2,t_1
1757 daddu a2,t_2,AT
1758 daddu c_3,a2
1759 sltu AT,c_3,a2
1760 daddu c_1,AT 1799 daddu c_1,AT
1800 dsll t_2,1
1801 slt a2,t_1,zero
1802 daddu t_2,a2
1803 dsll t_1,1
1761 daddu c_2,t_1 1804 daddu c_2,t_1
1762 sltu AT,c_2,t_1 1805 sltu AT,c_2,t_1
1763 daddu t_2,AT 1806 daddu t_2,AT
@@ -1767,12 +1810,12 @@ LEAF(bn_sqr_comba8)
1767 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ 1810 dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
1768 mflo t_1 1811 mflo t_1
1769 mfhi t_2 1812 mfhi t_2
1770 daddu c_2,t_1 1813 slt AT,t_2,zero
1771 sltu AT,c_2,t_1
1772 daddu a2,t_2,AT
1773 daddu c_3,a2
1774 sltu AT,c_3,a2
1775 daddu c_1,AT 1814 daddu c_1,AT
1815 dsll t_2,1
1816 slt a2,t_1,zero
1817 daddu t_2,a2
1818 dsll t_1,1
1776 daddu c_2,t_1 1819 daddu c_2,t_1
1777 sltu AT,c_2,t_1 1820 sltu AT,c_2,t_1
1778 daddu t_2,AT 1821 daddu t_2,AT
@@ -1782,12 +1825,12 @@ LEAF(bn_sqr_comba8)
1782 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ 1825 dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
1783 mflo t_1 1826 mflo t_1
1784 mfhi t_2 1827 mfhi t_2
1785 daddu c_2,t_1 1828 slt AT,t_2,zero
1786 sltu AT,c_2,t_1
1787 daddu a2,t_2,AT
1788 daddu c_3,a2
1789 sltu AT,c_3,a2
1790 daddu c_1,AT 1829 daddu c_1,AT
1830 dsll t_2,1
1831 slt a2,t_1,zero
1832 daddu t_2,a2
1833 dsll t_1,1
1791 daddu c_2,t_1 1834 daddu c_2,t_1
1792 sltu AT,c_2,t_1 1835 sltu AT,c_2,t_1
1793 daddu t_2,AT 1836 daddu t_2,AT
@@ -1799,24 +1842,26 @@ LEAF(bn_sqr_comba8)
1799 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ 1842 dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
1800 mflo t_1 1843 mflo t_1
1801 mfhi t_2 1844 mfhi t_2
1802 daddu c_3,t_1 1845 slt c_2,t_2,zero
1803 sltu AT,c_3,t_1 1846 dsll t_2,1
1804 daddu a2,t_2,AT 1847 slt a2,t_1,zero
1805 daddu c_1,a2 1848 daddu t_2,a2
1849 dsll t_1,1
1806 daddu c_3,t_1 1850 daddu c_3,t_1
1807 sltu AT,c_3,t_1 1851 sltu AT,c_3,t_1
1808 daddu t_2,AT 1852 daddu t_2,AT
1809 daddu c_1,t_2 1853 daddu c_1,t_2
1810 sltu c_2,c_1,t_2 1854 sltu AT,c_1,t_2
1855 daddu c_2,AT
1811 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ 1856 dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
1812 mflo t_1 1857 mflo t_1
1813 mfhi t_2 1858 mfhi t_2
1814 daddu c_3,t_1 1859 slt AT,t_2,zero
1815 sltu AT,c_3,t_1
1816 daddu a2,t_2,AT
1817 daddu c_1,a2
1818 sltu AT,c_1,a2
1819 daddu c_2,AT 1860 daddu c_2,AT
1861 dsll t_2,1
1862 slt a2,t_1,zero
1863 daddu t_2,a2
1864 dsll t_1,1
1820 daddu c_3,t_1 1865 daddu c_3,t_1
1821 sltu AT,c_3,t_1 1866 sltu AT,c_3,t_1
1822 daddu t_2,AT 1867 daddu t_2,AT
@@ -1826,12 +1871,12 @@ LEAF(bn_sqr_comba8)
1826 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ 1871 dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
1827 mflo t_1 1872 mflo t_1
1828 mfhi t_2 1873 mfhi t_2
1829 daddu c_3,t_1 1874 slt AT,t_2,zero
1830 sltu AT,c_3,t_1
1831 daddu a2,t_2,AT
1832 daddu c_1,a2
1833 sltu AT,c_1,a2
1834 daddu c_2,AT 1875 daddu c_2,AT
1876 dsll t_2,1
1877 slt a2,t_1,zero
1878 daddu t_2,a2
1879 dsll t_1,1
1835 daddu c_3,t_1 1880 daddu c_3,t_1
1836 sltu AT,c_3,t_1 1881 sltu AT,c_3,t_1
1837 daddu t_2,AT 1882 daddu t_2,AT
@@ -1852,24 +1897,26 @@ LEAF(bn_sqr_comba8)
1852 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ 1897 dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
1853 mflo t_1 1898 mflo t_1
1854 mfhi t_2 1899 mfhi t_2
1855 daddu c_1,t_1 1900 slt c_3,t_2,zero
1856 sltu AT,c_1,t_1 1901 dsll t_2,1
1857 daddu a2,t_2,AT 1902 slt a2,t_1,zero
1858 daddu c_2,a2 1903 daddu t_2,a2
1904 dsll t_1,1
1859 daddu c_1,t_1 1905 daddu c_1,t_1
1860 sltu AT,c_1,t_1 1906 sltu AT,c_1,t_1
1861 daddu t_2,AT 1907 daddu t_2,AT
1862 daddu c_2,t_2 1908 daddu c_2,t_2
1863 sltu c_3,c_2,t_2 1909 sltu AT,c_2,t_2
1910 daddu c_3,AT
1864 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ 1911 dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
1865 mflo t_1 1912 mflo t_1
1866 mfhi t_2 1913 mfhi t_2
1867 daddu c_1,t_1 1914 slt AT,t_2,zero
1868 sltu AT,c_1,t_1
1869 daddu a2,t_2,AT
1870 daddu c_2,a2
1871 sltu AT,c_2,a2
1872 daddu c_3,AT 1915 daddu c_3,AT
1916 dsll t_2,1
1917 slt a2,t_1,zero
1918 daddu t_2,a2
1919 dsll t_1,1
1873 daddu c_1,t_1 1920 daddu c_1,t_1
1874 sltu AT,c_1,t_1 1921 sltu AT,c_1,t_1
1875 daddu t_2,AT 1922 daddu t_2,AT
@@ -1879,12 +1926,12 @@ LEAF(bn_sqr_comba8)
1879 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ 1926 dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
1880 mflo t_1 1927 mflo t_1
1881 mfhi t_2 1928 mfhi t_2
1882 daddu c_1,t_1 1929 slt AT,t_2,zero
1883 sltu AT,c_1,t_1
1884 daddu a2,t_2,AT
1885 daddu c_2,a2
1886 sltu AT,c_2,a2
1887 daddu c_3,AT 1930 daddu c_3,AT
1931 dsll t_2,1
1932 slt a2,t_1,zero
1933 daddu t_2,a2
1934 dsll t_1,1
1888 daddu c_1,t_1 1935 daddu c_1,t_1
1889 sltu AT,c_1,t_1 1936 sltu AT,c_1,t_1
1890 daddu t_2,AT 1937 daddu t_2,AT
@@ -1896,24 +1943,26 @@ LEAF(bn_sqr_comba8)
1896 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ 1943 dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
1897 mflo t_1 1944 mflo t_1
1898 mfhi t_2 1945 mfhi t_2
1899 daddu c_2,t_1 1946 slt c_1,t_2,zero
1900 sltu AT,c_2,t_1 1947 dsll t_2,1
1901 daddu a2,t_2,AT 1948 slt a2,t_1,zero
1902 daddu c_3,a2 1949 daddu t_2,a2
1950 dsll t_1,1
1903 daddu c_2,t_1 1951 daddu c_2,t_1
1904 sltu AT,c_2,t_1 1952 sltu AT,c_2,t_1
1905 daddu t_2,AT 1953 daddu t_2,AT
1906 daddu c_3,t_2 1954 daddu c_3,t_2
1907 sltu c_1,c_3,t_2 1955 sltu AT,c_3,t_2
1956 daddu c_1,AT
1908 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ 1957 dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
1909 mflo t_1 1958 mflo t_1
1910 mfhi t_2 1959 mfhi t_2
1911 daddu c_2,t_1 1960 slt AT,t_2,zero
1912 sltu AT,c_2,t_1
1913 daddu a2,t_2,AT
1914 daddu c_3,a2
1915 sltu AT,c_3,a2
1916 daddu c_1,AT 1961 daddu c_1,AT
1962 dsll t_2,1
1963 slt a2,t_1,zero
1964 daddu t_2,a2
1965 dsll t_1,1
1917 daddu c_2,t_1 1966 daddu c_2,t_1
1918 sltu AT,c_2,t_1 1967 sltu AT,c_2,t_1
1919 daddu t_2,AT 1968 daddu t_2,AT
@@ -1934,24 +1983,26 @@ LEAF(bn_sqr_comba8)
1934 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ 1983 dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
1935 mflo t_1 1984 mflo t_1
1936 mfhi t_2 1985 mfhi t_2
1937 daddu c_3,t_1 1986 slt c_2,t_2,zero
1938 sltu AT,c_3,t_1 1987 dsll t_2,1
1939 daddu a2,t_2,AT 1988 slt a2,t_1,zero
1940 daddu c_1,a2 1989 daddu t_2,a2
1990 dsll t_1,1
1941 daddu c_3,t_1 1991 daddu c_3,t_1
1942 sltu AT,c_3,t_1 1992 sltu AT,c_3,t_1
1943 daddu t_2,AT 1993 daddu t_2,AT
1944 daddu c_1,t_2 1994 daddu c_1,t_2
1945 sltu c_2,c_1,t_2 1995 sltu AT,c_1,t_2
1996 daddu c_2,AT
1946 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ 1997 dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
1947 mflo t_1 1998 mflo t_1
1948 mfhi t_2 1999 mfhi t_2
1949 daddu c_3,t_1 2000 slt AT,t_2,zero
1950 sltu AT,c_3,t_1
1951 daddu a2,t_2,AT
1952 daddu c_1,a2
1953 sltu AT,c_1,a2
1954 daddu c_2,AT 2001 daddu c_2,AT
2002 dsll t_2,1
2003 slt a2,t_1,zero
2004 daddu t_2,a2
2005 dsll t_1,1
1955 daddu c_3,t_1 2006 daddu c_3,t_1
1956 sltu AT,c_3,t_1 2007 sltu AT,c_3,t_1
1957 daddu t_2,AT 2008 daddu t_2,AT
@@ -1963,15 +2014,17 @@ LEAF(bn_sqr_comba8)
1963 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ 2014 dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
1964 mflo t_1 2015 mflo t_1
1965 mfhi t_2 2016 mfhi t_2
1966 daddu c_1,t_1 2017 slt c_3,t_2,zero
1967 sltu AT,c_1,t_1 2018 dsll t_2,1
1968 daddu a2,t_2,AT 2019 slt a2,t_1,zero
1969 daddu c_2,a2 2020 daddu t_2,a2
2021 dsll t_1,1
1970 daddu c_1,t_1 2022 daddu c_1,t_1
1971 sltu AT,c_1,t_1 2023 sltu AT,c_1,t_1
1972 daddu t_2,AT 2024 daddu t_2,AT
1973 daddu c_2,t_2 2025 daddu c_2,t_2
1974 sltu c_3,c_2,t_2 2026 sltu AT,c_2,t_2
2027 daddu c_3,AT
1975 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ 2028 dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
1976 mflo t_1 2029 mflo t_1
1977 mfhi t_2 2030 mfhi t_2
@@ -1986,15 +2039,17 @@ LEAF(bn_sqr_comba8)
1986 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ 2039 dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
1987 mflo t_1 2040 mflo t_1
1988 mfhi t_2 2041 mfhi t_2
1989 daddu c_2,t_1 2042 slt c_1,t_2,zero
1990 sltu AT,c_2,t_1 2043 dsll t_2,1
1991 daddu a2,t_2,AT 2044 slt a2,t_1,zero
1992 daddu c_3,a2 2045 daddu t_2,a2
2046 dsll t_1,1
1993 daddu c_2,t_1 2047 daddu c_2,t_1
1994 sltu AT,c_2,t_1 2048 sltu AT,c_2,t_1
1995 daddu t_2,AT 2049 daddu t_2,AT
1996 daddu c_3,t_2 2050 daddu c_3,t_2
1997 sltu c_1,c_3,t_2 2051 sltu AT,c_3,t_2
2052 daddu c_1,AT
1998 sd c_2,104(a0) 2053 sd c_2,104(a0)
1999 2054
2000 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ 2055 dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
@@ -2025,28 +2080,30 @@ LEAF(bn_sqr_comba4)
2025 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ 2080 dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
2026 mflo t_1 2081 mflo t_1
2027 mfhi t_2 2082 mfhi t_2
2083 slt c_1,t_2,zero
2084 dsll t_2,1
2085 slt a2,t_1,zero
2086 daddu t_2,a2
2087 dsll t_1,1
2028 daddu c_2,t_1 2088 daddu c_2,t_1
2029 sltu AT,c_2,t_1 2089 sltu AT,c_2,t_1
2030 daddu c_3,t_2,AT 2090 daddu c_3,t_2,AT
2031 daddu c_2,t_1
2032 sltu AT,c_2,t_1
2033 daddu t_2,AT
2034 daddu c_3,t_2
2035 sltu c_1,c_3,t_2
2036 sd c_2,8(a0) 2091 sd c_2,8(a0)
2037 2092
2038 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ 2093 dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
2039 mflo t_1 2094 mflo t_1
2040 mfhi t_2 2095 mfhi t_2
2041 daddu c_3,t_1 2096 slt c_2,t_2,zero
2042 sltu AT,c_3,t_1 2097 dsll t_2,1
2043 daddu a2,t_2,AT 2098 slt a2,t_1,zero
2044 daddu c_1,a2 2099 daddu t_2,a2
2100 dsll t_1,1
2045 daddu c_3,t_1 2101 daddu c_3,t_1
2046 sltu AT,c_3,t_1 2102 sltu AT,c_3,t_1
2047 daddu t_2,AT 2103 daddu t_2,AT
2048 daddu c_1,t_2 2104 daddu c_1,t_2
2049 sltu c_2,c_1,t_2 2105 sltu AT,c_1,t_2
2106 daddu c_2,AT
2050 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ 2107 dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
2051 mflo t_1 2108 mflo t_1
2052 mfhi t_2 2109 mfhi t_2
@@ -2061,24 +2118,26 @@ LEAF(bn_sqr_comba4)
2061 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ 2118 dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
2062 mflo t_1 2119 mflo t_1
2063 mfhi t_2 2120 mfhi t_2
2064 daddu c_1,t_1 2121 slt c_3,t_2,zero
2065 sltu AT,c_1,t_1 2122 dsll t_2,1
2066 daddu a2,t_2,AT 2123 slt a2,t_1,zero
2067 daddu c_2,a2 2124 daddu t_2,a2
2125 dsll t_1,1
2068 daddu c_1,t_1 2126 daddu c_1,t_1
2069 sltu AT,c_1,t_1 2127 sltu AT,c_1,t_1
2070 daddu t_2,AT 2128 daddu t_2,AT
2071 daddu c_2,t_2 2129 daddu c_2,t_2
2072 sltu c_3,c_2,t_2 2130 sltu AT,c_2,t_2
2131 daddu c_3,AT
2073 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ 2132 dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
2074 mflo t_1 2133 mflo t_1
2075 mfhi t_2 2134 mfhi t_2
2076 daddu c_1,t_1 2135 slt AT,t_2,zero
2077 sltu AT,c_1,t_1
2078 daddu a2,t_2,AT
2079 daddu c_2,a2
2080 sltu AT,c_2,a2
2081 daddu c_3,AT 2136 daddu c_3,AT
2137 dsll t_2,1
2138 slt a2,t_1,zero
2139 daddu t_2,a2
2140 dsll t_1,1
2082 daddu c_1,t_1 2141 daddu c_1,t_1
2083 sltu AT,c_1,t_1 2142 sltu AT,c_1,t_1
2084 daddu t_2,AT 2143 daddu t_2,AT
@@ -2090,15 +2149,17 @@ LEAF(bn_sqr_comba4)
2090 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ 2149 dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
2091 mflo t_1 2150 mflo t_1
2092 mfhi t_2 2151 mfhi t_2
2093 daddu c_2,t_1 2152 slt c_1,t_2,zero
2094 sltu AT,c_2,t_1 2153 dsll t_2,1
2095 daddu a2,t_2,AT 2154 slt a2,t_1,zero
2096 daddu c_3,a2 2155 daddu t_2,a2
2156 dsll t_1,1
2097 daddu c_2,t_1 2157 daddu c_2,t_1
2098 sltu AT,c_2,t_1 2158 sltu AT,c_2,t_1
2099 daddu t_2,AT 2159 daddu t_2,AT
2100 daddu c_3,t_2 2160 daddu c_3,t_2
2101 sltu c_1,c_3,t_2 2161 sltu AT,c_3,t_2
2162 daddu c_1,AT
2102 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ 2163 dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
2103 mflo t_1 2164 mflo t_1
2104 mfhi t_2 2165 mfhi t_2
@@ -2113,15 +2174,17 @@ LEAF(bn_sqr_comba4)
2113 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ 2174 dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
2114 mflo t_1 2175 mflo t_1
2115 mfhi t_2 2176 mfhi t_2
2116 daddu c_3,t_1 2177 slt c_2,t_2,zero
2117 sltu AT,c_3,t_1 2178 dsll t_2,1
2118 daddu a2,t_2,AT 2179 slt a2,t_1,zero
2119 daddu c_1,a2 2180 daddu t_2,a2
2181 dsll t_1,1
2120 daddu c_3,t_1 2182 daddu c_3,t_1
2121 sltu AT,c_3,t_1 2183 sltu AT,c_3,t_1
2122 daddu t_2,AT 2184 daddu t_2,AT
2123 daddu c_1,t_2 2185 daddu c_1,t_2
2124 sltu c_2,c_1,t_2 2186 sltu AT,c_1,t_2
2187 daddu c_2,AT
2125 sd c_3,40(a0) 2188 sd c_3,40(a0)
2126 2189
2127 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ 2190 dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */