aboutsummaryrefslogtreecommitdiff
path: root/src/buildvm_ppc.dasc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/buildvm_ppc.dasc1266
1 files changed, 1006 insertions, 260 deletions
diff --git a/src/buildvm_ppc.dasc b/src/buildvm_ppc.dasc
index de3346cb..9130ee41 100644
--- a/src/buildvm_ppc.dasc
+++ b/src/buildvm_ppc.dasc
@@ -30,8 +30,8 @@
30|.define TISNUM, r22 30|.define TISNUM, r22
31|.define TISNIL, r23 31|.define TISNIL, r23
32|.define ZERO, r24 32|.define ZERO, r24
33|.define TOBIT, f30 33|.define TOBIT, f30 // 2^52 + 2^51.
34|.define TONUM, f31 34|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
35| 35|
36|// The following temporaries are not saved across C calls, except for RA. 36|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r20 // Callee-save. 37|.define RA, r20 // Callee-save.
@@ -652,11 +652,18 @@ static void build_subroutines(BuildCtx *ctx)
652 | b >1 652 | b >1
653 | 653 |
654 |->vmeta_tgetb: // TMP0 = index 654 |->vmeta_tgetb: // TMP0 = index
655 | tonum_u f0, TMP0 655 if (!LJ_DUALNUM) {
656 | tonum_u f0, TMP0
657 }
656 | decode_RB8 RB, INS 658 | decode_RB8 RB, INS
657 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 659 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
658 | add CARG2, BASE, RB 660 | add CARG2, BASE, RB
659 | stfd f0, 0(CARG3) 661 if (LJ_DUALNUM) {
662 | stw TISNUM, 0(CARG3)
663 | stw TMP0, 4(CARG3)
664 } else {
665 | stfd f0, 0(CARG3)
666 }
660 | b >1 667 | b >1
661 | 668 |
662 |->vmeta_tgetv: 669 |->vmeta_tgetv:
@@ -710,11 +717,18 @@ static void build_subroutines(BuildCtx *ctx)
710 | b >1 717 | b >1
711 | 718 |
712 |->vmeta_tsetb: // TMP0 = index 719 |->vmeta_tsetb: // TMP0 = index
713 | tonum_u f0, TMP0 720 if (!LJ_DUALNUM) {
721 | tonum_u f0, TMP0
722 }
714 | decode_RB8 RB, INS 723 | decode_RB8 RB, INS
715 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 724 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
716 | add CARG2, BASE, RB 725 | add CARG2, BASE, RB
717 | stfd f0, 0(CARG3) 726 if (LJ_DUALNUM) {
727 | stw TISNUM, 0(CARG3)
728 | stw TMP0, 4(CARG3)
729 } else {
730 | stfd f0, 0(CARG3)
731 }
718 | b >1 732 | b >1
719 | 733 |
720 |->vmeta_tsetv: 734 |->vmeta_tsetv:
@@ -752,9 +766,17 @@ static void build_subroutines(BuildCtx *ctx)
752 |->vmeta_comp: 766 |->vmeta_comp:
753 | mr CARG1, L 767 | mr CARG1, L
754 | subi PC, PC, 4 768 | subi PC, PC, 4
755 | add CARG2, BASE, RA 769 if (LJ_DUALNUM) {
770 | mr CARG2, RA
771 } else {
772 | add CARG2, BASE, RA
773 }
756 | stw PC, SAVE_PC 774 | stw PC, SAVE_PC
757 | add CARG3, BASE, RD 775 if (LJ_DUALNUM) {
776 | mr CARG3, RD
777 } else {
778 | add CARG3, BASE, RD
779 }
758 | stw BASE, L->base 780 | stw BASE, L->base
759 | decode_OP1 CARG4, INS 781 | decode_OP1 CARG4, INS
760 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 782 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
@@ -803,26 +825,53 @@ static void build_subroutines(BuildCtx *ctx)
803 | // Returns 0/1 or TValue * (metamethod). 825 | // Returns 0/1 or TValue * (metamethod).
804 | b <3 826 | b <3
805 | 827 |
806 |//-- Arithmetic metamethods --------------------------------------------- 828 |->vmeta_equal_cd:
829#if LJ_HASFFI
830 | mr CARG2, INS
831 | subi PC, PC, 4
832 | stw BASE, L->base
833 | mr CARG1, L
834 | stw PC, SAVE_PC
835 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
836 | // Returns 0/1 or TValue * (metamethod).
837 | b <3
838#endif
807 | 839 |
808 |->vmeta_arith_vn: 840 |//-- Arithmetic metamethods ---------------------------------------------
809 | add CARG3, BASE, RB
810 | add CARG4, KBASE, RC
811 | b >1
812 | 841 |
813 |->vmeta_arith_nv: 842 |->vmeta_arith_nv:
814 | add CARG3, KBASE, RC 843 | add CARG3, KBASE, RC
815 | add CARG4, BASE, RB 844 | add CARG4, BASE, RB
816 | b >1 845 | b >1
846 |->vmeta_arith_nv2:
847 if (LJ_DUALNUM) {
848 | mr CARG3, RC
849 | mr CARG4, RB
850 | b >1
851 }
817 | 852 |
818 |->vmeta_unm: 853 |->vmeta_unm:
819 | mr CARG3, RD 854 | mr CARG3, RD
820 | mr CARG4, RD 855 | mr CARG4, RD
821 | b >1 856 | b >1
822 | 857 |
858 |->vmeta_arith_vn:
859 | add CARG3, BASE, RB
860 | add CARG4, KBASE, RC
861 | b >1
862 |
823 |->vmeta_arith_vv: 863 |->vmeta_arith_vv:
824 | add CARG3, BASE, RB 864 | add CARG3, BASE, RB
825 | add CARG4, BASE, RC 865 | add CARG4, BASE, RC
866 if (LJ_DUALNUM) {
867 | b >1
868 }
869 |->vmeta_arith_vn2:
870 |->vmeta_arith_vv2:
871 if (LJ_DUALNUM) {
872 | mr CARG3, RB
873 | mr CARG4, RC
874 }
826 |1: 875 |1:
827 | add CARG2, BASE, RA 876 | add CARG2, BASE, RA
828 | stw BASE, L->base 877 | stw BASE, L->base
@@ -1109,7 +1158,11 @@ static void build_subroutines(BuildCtx *ctx)
1109 | ffgccheck 1158 | ffgccheck
1110 | mr CARG1, L 1159 | mr CARG1, L
1111 | mr CARG2, BASE 1160 | mr CARG2, BASE
1112 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1161 if (LJ_DUALNUM) {
1162 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o)
1163 } else {
1164 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1165 }
1113 | // Returns GCstr *. 1166 | // Returns GCstr *.
1114 | li CARG3, LJ_TSTR 1167 | li CARG3, LJ_TSTR
1115 | b ->fff_restv 1168 | b ->fff_restv
@@ -1167,26 +1220,43 @@ static void build_subroutines(BuildCtx *ctx)
1167 | lwz CARG3, 0(BASE) 1220 | lwz CARG3, 0(BASE)
1168 | lwz TAB:CARG1, 4(BASE) 1221 | lwz TAB:CARG1, 4(BASE)
1169 | lwz CARG4, 8(BASE) 1222 | lwz CARG4, 8(BASE)
1170 | lfd FARG2, 8(BASE) 1223 if (LJ_DUALNUM) {
1224 | lwz TMP2, 12(BASE)
1225 } else {
1226 | lfd FARG2, 8(BASE)
1227 }
1171 | blt ->fff_fallback 1228 | blt ->fff_fallback
1172 | checktab CARG3 1229 | checktab CARG3
1173 | checknum cr1, CARG4 1230 | checknum cr1, CARG4
1174 | lwz PC, FRAME_PC(BASE) 1231 | lwz PC, FRAME_PC(BASE)
1175 | lus TMP0, 0x3ff0 1232 if (LJ_DUALNUM) {
1176 | stw ZERO, TMPD_LO 1233 | bne ->fff_fallback
1177 | bne ->fff_fallback 1234 | bne cr1, ->fff_fallback
1178 | stw TMP0, TMPD_HI 1235 } else {
1179 | bge cr1, ->fff_fallback 1236 | lus TMP0, 0x3ff0
1180 | lfd FARG1, TMPD 1237 | stw ZERO, TMPD_LO
1181 | toint TMP2, FARG2, f0 1238 | bne ->fff_fallback
1239 | stw TMP0, TMPD_HI
1240 | bge cr1, ->fff_fallback
1241 | lfd FARG1, TMPD
1242 | toint TMP2, FARG2, f0
1243 }
1182 | lwz TMP0, TAB:CARG1->asize 1244 | lwz TMP0, TAB:CARG1->asize
1183 | lwz TMP1, TAB:CARG1->array 1245 | lwz TMP1, TAB:CARG1->array
1184 | fadd FARG2, FARG2, FARG1 1246 if (!LJ_DUALNUM) {
1247 | fadd FARG2, FARG2, FARG1
1248 }
1185 | addi TMP2, TMP2, 1 1249 | addi TMP2, TMP2, 1
1186 | la RA, -8(BASE) 1250 | la RA, -8(BASE)
1187 | cmplw TMP0, TMP2 1251 | cmplw TMP0, TMP2
1188 | slwi TMP3, TMP2, 3 1252 if (LJ_DUALNUM) {
1189 | stfd FARG2, 0(RA) 1253 | stw TISNUM, 0(RA)
1254 | slwi TMP3, TMP2, 3
1255 | stw TMP2, 4(RA)
1256 } else {
1257 | slwi TMP3, TMP2, 3
1258 | stfd FARG2, 0(RA)
1259 }
1190 | ble >2 // Not in array part? 1260 | ble >2 // Not in array part?
1191 | lwzx TMP2, TMP1, TMP3 1261 | lwzx TMP2, TMP1, TMP3
1192 | lfdx f0, TMP1, TMP3 1262 | lfdx f0, TMP1, TMP3
@@ -1226,7 +1296,11 @@ static void build_subroutines(BuildCtx *ctx)
1226 | lfd f0, CFUNC:RB->upvalue[0] 1296 | lfd f0, CFUNC:RB->upvalue[0]
1227 | la RA, -8(BASE) 1297 | la RA, -8(BASE)
1228#endif 1298#endif
1229 | stw ZERO, 8(BASE) 1299 if (LJ_DUALNUM) {
1300 | stw TISNUM, 8(BASE)
1301 } else {
1302 | stw ZERO, 8(BASE)
1303 }
1230 | stw ZERO, 12(BASE) 1304 | stw ZERO, 12(BASE)
1231 | li RD, (3+1)*8 1305 | li RD, (3+1)*8
1232 | stfd f0, 0(RA) 1306 | stfd f0, 0(RA)
@@ -1401,7 +1475,26 @@ static void build_subroutines(BuildCtx *ctx)
1401 |//-- Math library ------------------------------------------------------- 1475 |//-- Math library -------------------------------------------------------
1402 | 1476 |
1403 |.ffunc_1 math_abs 1477 |.ffunc_1 math_abs
1404 | checknum CARG3; bge ->fff_fallback 1478 | checknum CARG3
1479 if (LJ_DUALNUM) {
1480 | bne >2
1481 | srawi TMP1, CARG1, 31
1482 | xor TMP2, TMP1, CARG1
1483 | sub. CARG1, TMP2, TMP1
1484 | blt >1
1485 |->fff_resi:
1486 | lwz PC, FRAME_PC(BASE)
1487 | la RA, -8(BASE)
1488 | stw TISNUM, -8(BASE)
1489 | stw CRET1, -4(BASE)
1490 | b ->fff_res1
1491 |1:
1492 | lus CARG3, 0x41e0 // 2^31.
1493 | li CARG1, 0
1494 | b ->fff_restv
1495 |2:
1496 }
1497 | bge ->fff_fallback
1405 | rlwinm CARG3, CARG3, 0, 1, 31 1498 | rlwinm CARG3, CARG3, 0, 1, 31
1406 | // Fallthrough. 1499 | // Fallthrough.
1407 | 1500 |
@@ -1448,9 +1541,85 @@ static void build_subroutines(BuildCtx *ctx)
1448 | b ->fff_resn 1541 | b ->fff_resn
1449 |.endmacro 1542 |.endmacro
1450 | 1543 |
1451 | // NYI: Use internal implementation. 1544 |.macro math_round, func
1452 | math_extern floor 1545 | .ffunc_1 math_ .. func
1453 | math_extern ceil 1546 | checknum CARG3; beq ->fff_restv
1547 | rlwinm TMP2, CARG3, 12, 21, 31
1548 | bge ->fff_fallback
1549 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
1550 | cmplwi cr1, TMP2, 31 // 0 <= exp < 31?
1551 | subfic TMP0, TMP2, 31
1552 | blt >3
1553 | slwi TMP1, CARG3, 11
1554 | srwi TMP3, CARG1, 21
1555 | oris TMP1, TMP1, 0x8000
1556 | addi TMP2, TMP2, 1
1557 | or TMP1, TMP1, TMP3
1558 | slwi CARG2, CARG1, 11
1559 | bge cr1, >4
1560 | slw TMP3, TMP1, TMP2
1561 | srw CARG1, TMP1, TMP0
1562 | or TMP3, TMP3, CARG2
1563 | srawi TMP2, CARG3, 31
1564 |.if "func" == "floor"
1565 | and TMP1, TMP3, TMP2
1566 | addic TMP0, TMP1, -1
1567 | subfe TMP1, TMP0, TMP1
1568 | add CARG1, CARG1, TMP1
1569 | xor CARG1, CARG1, TMP2
1570 | sub CARG1, CARG1, TMP2
1571 | b ->fff_resi
1572 |.else
1573 | andc TMP1, TMP3, TMP2
1574 | addic TMP0, TMP1, -1
1575 | subfe TMP1, TMP0, TMP1
1576 | addo. CARG1, CARG1, TMP1
1577 | xor CARG1, CARG1, TMP2
1578 | sub CARG1, CARG1, TMP2
1579 | bns ->fff_resi
1580 | // Potential overflow.
1581 | mcrxr cr0; ble ->fff_resi // Ignore unrelated overflow.
1582 | lus CARG3, 0x41e0 // 2^31.
1583 | li CARG1, 0
1584 | b ->fff_restv
1585 |.endif
1586 |3: // |x| < 1
1587 | add TMP2, CARG3, CARG3
1588 | srawi TMP1, CARG3, 31
1589 | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo
1590 |.if "func" == "floor"
1591 | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1
1592 | subfic TMP2, TMP1, 0
1593 | subfe CARG1, CARG1, CARG1
1594 |.else
1595 | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1
1596 | addic TMP2, TMP1, -1
1597 | subfe CARG1, TMP2, TMP1
1598 |.endif
1599 | b ->fff_resi
1600 |4: // exp >= 31. Check for -(2^31).
1601 | xoris TMP1, TMP1, 0x8000
1602 | srawi TMP2, CARG3, 31
1603 |.if "func" == "floor"
1604 | or TMP1, TMP1, CARG2
1605 |.endif
1606 | orc. TMP1, TMP1, TMP2
1607 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
1608 | lus CARG1, 0x8000 // -(2^31).
1609 | beq ->fff_resi
1610 |5:
1611 | bl extern func
1612 | b ->fff_resn
1613 |.endmacro
1614 |
1615 if (LJ_DUALNUM) {
1616 | math_round floor
1617 | math_round ceil
1618 } else {
1619 | // NYI: use internal implementation.
1620 | math_extern floor
1621 | math_extern ceil
1622 }
1454 | 1623 |
1455 | math_extern sqrt 1624 | math_extern sqrt
1456 | math_extern log 1625 | math_extern log
@@ -1475,8 +1644,20 @@ static void build_subroutines(BuildCtx *ctx)
1475 | fmul FARG1, FARG1, FARG2 1644 | fmul FARG1, FARG1, FARG2
1476 | b ->fff_resn 1645 | b ->fff_resn
1477 | 1646 |
1478 |.ffunc_nn math_ldexp 1647 if (LJ_DUALNUM) {
1479 | toint CARG1, FARG2 1648 |.ffunc math_ldexp
1649 | cmplwi NARGS8:RC, 16
1650 | lwz CARG3, 0(BASE)
1651 | lfd FARG1, 0(BASE)
1652 | lwz CARG4, 8(BASE)
1653 | lwz CARG1, 12(BASE)
1654 | blt ->fff_fallback
1655 | checknum CARG3; bge ->fff_fallback
1656 | checknum CARG4; bne ->fff_fallback
1657 } else {
1658 |.ffunc_nn math_ldexp
1659 | toint CARG1, FARG2
1660 }
1480 | bl extern ldexp 1661 | bl extern ldexp
1481 | b ->fff_resn 1662 | b ->fff_resn
1482 | 1663 |
@@ -1486,10 +1667,17 @@ static void build_subroutines(BuildCtx *ctx)
1486 | bl extern frexp 1667 | bl extern frexp
1487 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1668 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1488 | la RA, -8(BASE) 1669 | la RA, -8(BASE)
1489 | tonum_i FARG2, TMP1 1670 if (!LJ_DUALNUM) {
1671 | tonum_i FARG2, TMP1
1672 }
1490 | stfd FARG1, 0(RA) 1673 | stfd FARG1, 0(RA)
1491 | li RD, (2+1)*8 1674 | li RD, (2+1)*8
1492 | stfd FARG2, 8(RA) 1675 if (LJ_DUALNUM) {
1676 | stw TISNUM, 8(RA)
1677 | stw TMP1, 12(RA)
1678 } else {
1679 | stfd FARG2, 8(RA)
1680 }
1493 | b ->fff_res 1681 | b ->fff_res
1494 | 1682 |
1495 |.ffunc_n math_modf 1683 |.ffunc_n math_modf
@@ -1502,6 +1690,61 @@ static void build_subroutines(BuildCtx *ctx)
1502 | b ->fff_res 1690 | b ->fff_res
1503 | 1691 |
1504 |.macro math_minmax, name, ismax 1692 |.macro math_minmax, name, ismax
1693 ||if (LJ_DUALNUM) {
1694 | .ffunc_1 name
1695 | checknum CARG3
1696 | addi TMP1, BASE, 8
1697 | add TMP2, BASE, NARGS8:RC
1698 | bne >4
1699 |1: // Handle integers.
1700 | lwz CARG4, 0(TMP1)
1701 | cmplw cr1, TMP1, TMP2
1702 | lwz CARG2, 4(TMP1)
1703 | bge cr1, ->fff_resi
1704 | checknum CARG4
1705 | xoris TMP0, CARG1, 0x8000
1706 | xoris TMP3, CARG2, 0x8000
1707 | bne >3
1708 | subfc TMP3, TMP3, TMP0
1709 | subfe TMP0, TMP0, TMP0
1710 |.if ismax
1711 | andc TMP3, TMP3, TMP0
1712 |.else
1713 | and TMP3, TMP3, TMP0
1714 |.endif
1715 | add CARG1, TMP3, CARG2
1716 | addi TMP1, TMP1, 8
1717 | b <1
1718 |3:
1719 | bge ->fff_fallback
1720 | // Convert intermediate result to number and continue below.
1721 | tonum_i FARG1, CARG1
1722 | lfd FARG2, 0(TMP1)
1723 | b >6
1724 |4:
1725 | lfd FARG1, 0(BASE)
1726 | bge ->fff_fallback
1727 |5: // Handle numbers.
1728 | lwz CARG4, 0(TMP1)
1729 | cmplw cr1, TMP1, TMP2
1730 | lfd FARG2, 0(TMP1)
1731 | bge cr1, ->fff_resn
1732 | checknum CARG4; bge >7
1733 |6:
1734 | fsub f0, FARG1, FARG2
1735 | addi TMP1, TMP1, 8
1736 |.if ismax
1737 | fsel FARG1, f0, FARG1, FARG2
1738 |.else
1739 | fsel FARG1, f0, FARG2, FARG1
1740 |.endif
1741 | b <5
1742 |7: // Convert integer to number and continue above.
1743 | lwz CARG2, 4(TMP1)
1744 | bne ->fff_fallback
1745 | tonum_i FARG2, CARG2
1746 | b <6
1747 ||} else {
1505 | .ffunc_n name 1748 | .ffunc_n name
1506 | li TMP1, 8 1749 | li TMP1, 8
1507 |1: 1750 |1:
@@ -1519,6 +1762,7 @@ static void build_subroutines(BuildCtx *ctx)
1519 | fsel FARG1, f0, FARG2, FARG1 1762 | fsel FARG1, f0, FARG2, FARG1
1520 |.endif 1763 |.endif
1521 | b <1 1764 | b <1
1765 ||}
1522 |.endmacro 1766 |.endmacro
1523 | 1767 |
1524 | math_minmax math_min, 0 1768 | math_minmax math_min, 0
@@ -1539,28 +1783,45 @@ static void build_subroutines(BuildCtx *ctx)
1539 | checkstr CARG3 1783 | checkstr CARG3
1540 | bne ->fff_fallback 1784 | bne ->fff_fallback
1541 | lwz TMP0, STR:CARG1->len 1785 | lwz TMP0, STR:CARG1->len
1542 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 1786 if (LJ_DUALNUM) {
1543 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 1787 | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end).
1544 | subfe RD, TMP3, TMP0 1788 | li RD, (0+1)*8
1545 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. 1789 | lwz PC, FRAME_PC(BASE)
1546 | addi RD, RD, 1 1790 | cmplwi TMP0, 0
1547 | lfd f0, TONUM_D 1791 | la RA, -8(BASE)
1548 | la RA, -8(BASE) 1792 | beq ->fff_res
1549 | lwz PC, FRAME_PC(BASE) 1793 | b ->fff_resi
1550 | fsub f0, f0, TOBIT 1794 } else {
1551 | slwi RD, RD, 3 1795 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1552 | stfd f0, 0(RA) 1796 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8
1553 | b ->fff_res 1797 | subfe RD, TMP3, TMP0
1798 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1.
1799 | addi RD, RD, 1
1800 | lfd f0, TONUM_D
1801 | la RA, -8(BASE)
1802 | lwz PC, FRAME_PC(BASE)
1803 | fsub f0, f0, TOBIT
1804 | slwi RD, RD, 3
1805 | stfd f0, 0(RA)
1806 | b ->fff_res
1807 }
1554 | 1808 |
1555 |.ffunc string_char // Only handle the 1-arg case here. 1809 |.ffunc string_char // Only handle the 1-arg case here.
1556 | ffgccheck 1810 | ffgccheck
1557 | cmplwi NARGS8:RC, 8 1811 | cmplwi NARGS8:RC, 8
1558 | lwz CARG3, 0(BASE) 1812 | lwz CARG3, 0(BASE)
1559 | lfd FARG1, 0(BASE) 1813 if (LJ_DUALNUM) {
1560 | bne ->fff_fallback // Exactly 1 argument. 1814 | lwz TMP0, 4(BASE)
1561 | checknum CARG3; bge ->fff_fallback 1815 | bne ->fff_fallback // Exactly 1 argument.
1562 | toint TMP0, FARG1 1816 | checknum CARG3; bne ->fff_fallback
1563 | la CARG2, TMPD_BLO 1817 | la CARG2, 7(BASE)
1818 } else {
1819 | lfd FARG1, 0(BASE)
1820 | bne ->fff_fallback // Exactly 1 argument.
1821 | checknum CARG3; bge ->fff_fallback
1822 | toint TMP0, FARG1
1823 | la CARG2, TMPD_BLO
1824 }
1564 | li CARG3, 1 1825 | li CARG3, 1
1565 | cmplwi TMP0, 255; bgt ->fff_fallback 1826 | cmplwi TMP0, 255; bgt ->fff_fallback
1566 |->fff_newstr: 1827 |->fff_newstr:
@@ -1577,20 +1838,36 @@ static void build_subroutines(BuildCtx *ctx)
1577 | ffgccheck 1838 | ffgccheck
1578 | cmplwi NARGS8:RC, 16 1839 | cmplwi NARGS8:RC, 16
1579 | lwz CARG3, 16(BASE) 1840 | lwz CARG3, 16(BASE)
1580 | lfd f0, 16(BASE) 1841 if (!LJ_DUALNUM) {
1842 | lfd f0, 16(BASE)
1843 }
1581 | lwz TMP0, 0(BASE) 1844 | lwz TMP0, 0(BASE)
1582 | lwz STR:CARG1, 4(BASE) 1845 | lwz STR:CARG1, 4(BASE)
1583 | blt ->fff_fallback 1846 | blt ->fff_fallback
1584 | lwz CARG2, 8(BASE) 1847 | lwz CARG2, 8(BASE)
1585 | lfd f1, 8(BASE) 1848 if (LJ_DUALNUM) {
1849 | lwz TMP1, 12(BASE)
1850 } else {
1851 | lfd f1, 8(BASE)
1852 }
1586 | li TMP2, -1 1853 | li TMP2, -1
1587 | beq >1 1854 | beq >1
1588 | checknum CARG3; bge ->fff_fallback 1855 if (LJ_DUALNUM) {
1589 | toint TMP2, f0 1856 | checknum CARG3
1590 |1: 1857 | lwz TMP2, 20(BASE)
1591 | checknum CARG2; bge ->fff_fallback 1858 | bne ->fff_fallback
1859 |1:
1860 | checknum CARG2; bne ->fff_fallback
1861 } else {
1862 | checknum CARG3; bge ->fff_fallback
1863 | toint TMP2, f0
1864 |1:
1865 | checknum CARG2; bge ->fff_fallback
1866 }
1592 | checkstr TMP0; bne ->fff_fallback 1867 | checkstr TMP0; bne ->fff_fallback
1593 | toint TMP1, f1 1868 if (!LJ_DUALNUM) {
1869 | toint TMP1, f1
1870 }
1594 | lwz TMP0, STR:CARG1->len 1871 | lwz TMP0, STR:CARG1->len
1595 | cmplw TMP0, TMP2 // len < end? (unsigned compare) 1872 | cmplw TMP0, TMP2 // len < end? (unsigned compare)
1596 | addi TMP3, TMP2, 1 1873 | addi TMP3, TMP2, 1
@@ -1627,14 +1904,22 @@ static void build_subroutines(BuildCtx *ctx)
1627 |.ffunc string_rep // Only handle the 1-char case inline. 1904 |.ffunc string_rep // Only handle the 1-char case inline.
1628 | ffgccheck 1905 | ffgccheck
1629 | cmplwi NARGS8:RC, 16 1906 | cmplwi NARGS8:RC, 16
1630 | lwz CARG3, 0(BASE) 1907 | lwz TMP0, 0(BASE)
1631 | lwz STR:CARG1, 4(BASE) 1908 | lwz STR:CARG1, 4(BASE)
1632 | lwz CARG4, 8(BASE) 1909 | lwz CARG4, 8(BASE)
1633 | lfd FARG2, 8(BASE) 1910 if (LJ_DUALNUM) {
1911 | lwz CARG3, 12(BASE)
1912 } else {
1913 | lfd FARG2, 8(BASE)
1914 }
1634 | blt ->fff_fallback 1915 | blt ->fff_fallback
1635 | checkstr CARG3; bne ->fff_fallback 1916 | checkstr TMP0; bne ->fff_fallback
1636 | checknum CARG4; bge ->fff_fallback 1917 if (LJ_DUALNUM) {
1637 | toint CARG3, FARG2 1918 | checknum CARG4; bne ->fff_fallback
1919 } else {
1920 | checknum CARG4; bge ->fff_fallback
1921 | toint CARG3, FARG2
1922 }
1638 | lwz TMP0, STR:CARG1->len 1923 | lwz TMP0, STR:CARG1->len
1639 | cmpwi CARG3, 0 1924 | cmpwi CARG3, 0
1640 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1925 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
@@ -1728,34 +2013,39 @@ static void build_subroutines(BuildCtx *ctx)
1728 |//-- Bit library -------------------------------------------------------- 2013 |//-- Bit library --------------------------------------------------------
1729 | 2014 |
1730 |.macro .ffunc_bit, name 2015 |.macro .ffunc_bit, name
2016 ||if (LJ_DUALNUM) {
2017 | .ffunc_1 bit_..name
2018 | checknum CARG3; bnel ->fff_tobit_fb
2019 ||} else {
1731 | .ffunc_n bit_..name 2020 | .ffunc_n bit_..name
1732 | fadd FARG1, FARG1, TOBIT 2021 | fadd FARG1, FARG1, TOBIT
1733 | stfd FARG1, TMPD 2022 | stfd FARG1, TMPD
1734 | lwz CARG1, TMPD_LO 2023 | lwz CARG1, TMPD_LO
2024 ||}
1735 |.endmacro 2025 |.endmacro
1736 | 2026 |
1737 |.ffunc_bit tobit
1738 |->fff_resi:
1739 | tonum_i FARG1, CRET1
1740 |->fff_resn:
1741 | lwz PC, FRAME_PC(BASE)
1742 | la RA, -8(BASE)
1743 | stfd FARG1, -8(BASE)
1744 | b ->fff_res1
1745 |
1746 |.macro .ffunc_bit_op, name, ins 2027 |.macro .ffunc_bit_op, name, ins
1747 | .ffunc_bit name 2028 | .ffunc_bit name
1748 | li TMP1, 8 2029 | addi TMP1, BASE, 8
2030 | add TMP2, BASE, NARGS8:RC
1749 |1: 2031 |1:
1750 | lwzx CARG4, BASE, TMP1 2032 | lwz CARG4, 0(TMP1)
1751 | cmplw cr1, TMP1, NARGS8:RC 2033 | cmplw cr1, TMP1, TMP2
1752 | lfdx FARG1, BASE, TMP1 2034 ||if (LJ_DUALNUM) {
1753 | checknum CARG4 2035 | lwz CARG2, 4(TMP1)
2036 ||} else {
2037 | lfd FARG1, 0(TMP1)
2038 ||}
1754 | bge cr1, ->fff_resi 2039 | bge cr1, ->fff_resi
2040 | checknum CARG4
2041 ||if (LJ_DUALNUM) {
2042 | bnel ->fff_bitop_fb
2043 ||} else {
1755 | fadd FARG1, FARG1, TOBIT 2044 | fadd FARG1, FARG1, TOBIT
1756 | bge ->fff_fallback 2045 | bge ->fff_fallback
1757 | stfd FARG1, TMPD 2046 | stfd FARG1, TMPD
1758 | lwz CARG2, TMPD_LO 2047 | lwz CARG2, TMPD_LO
2048 ||}
1759 | ins CARG1, CARG1, CARG2 2049 | ins CARG1, CARG1, CARG2
1760 | addi TMP1, TMP1, 8 2050 | addi TMP1, TMP1, 8
1761 | b <1 2051 | b <1
@@ -1777,6 +2067,12 @@ static void build_subroutines(BuildCtx *ctx)
1777 | b ->fff_resi 2067 | b ->fff_resi
1778 | 2068 |
1779 |.macro .ffunc_bit_sh, name, ins, shmod 2069 |.macro .ffunc_bit_sh, name, ins, shmod
2070 ||if (LJ_DUALNUM) {
2071 | .ffunc_2 bit_..name
2072 | checknum CARG3; bnel ->fff_tobit_fb
2073 | // Note: no inline conversion from number for 2nd argument!
2074 | checknum CARG4; bne ->fff_fallback
2075 ||} else {
1780 | .ffunc_nn bit_..name 2076 | .ffunc_nn bit_..name
1781 | fadd FARG1, FARG1, TOBIT 2077 | fadd FARG1, FARG1, TOBIT
1782 | fadd FARG2, FARG2, TOBIT 2078 | fadd FARG2, FARG2, TOBIT
@@ -1784,6 +2080,7 @@ static void build_subroutines(BuildCtx *ctx)
1784 | lwz CARG1, TMPD_LO 2080 | lwz CARG1, TMPD_LO
1785 | stfd FARG2, TMPD 2081 | stfd FARG2, TMPD
1786 | lwz CARG2, TMPD_LO 2082 | lwz CARG2, TMPD_LO
2083 ||}
1787 |.if shmod == 1 2084 |.if shmod == 1
1788 | rlwinm CARG2, CARG2, 0, 27, 31 2085 | rlwinm CARG2, CARG2, 0, 27, 31
1789 |.elif shmod == 2 2086 |.elif shmod == 2
@@ -1799,6 +2096,39 @@ static void build_subroutines(BuildCtx *ctx)
1799 |.ffunc_bit_sh rol, rotlw, 0 2096 |.ffunc_bit_sh rol, rotlw, 0
1800 |.ffunc_bit_sh ror, rotlw, 2 2097 |.ffunc_bit_sh ror, rotlw, 2
1801 | 2098 |
2099 |.ffunc_bit tobit
2100 if (LJ_DUALNUM) {
2101 | b ->fff_resi
2102 } else {
2103 |->fff_resi:
2104 | tonum_i FARG1, CRET1
2105 }
2106 |->fff_resn:
2107 | lwz PC, FRAME_PC(BASE)
2108 | la RA, -8(BASE)
2109 | stfd FARG1, -8(BASE)
2110 | b ->fff_res1
2111 |
2112 |// Fallback FP number to bit conversion.
2113 |->fff_tobit_fb:
2114 if (LJ_DUALNUM) {
2115 | lfd FARG1, 0(BASE)
2116 | bgt ->fff_fallback
2117 | fadd FARG1, FARG1, TOBIT
2118 | stfd FARG1, TMPD
2119 | lwz CARG1, TMPD_LO
2120 | blr
2121 }
2122 |->fff_bitop_fb:
2123 if (LJ_DUALNUM) {
2124 | lfd FARG1, 0(TMP1)
2125 | bgt ->fff_fallback
2126 | fadd FARG1, FARG1, TOBIT
2127 | stfd FARG1, TMPD
2128 | lwz CARG2, TMPD_LO
2129 | blr
2130 }
2131 |
1802 |//----------------------------------------------------------------------- 2132 |//-----------------------------------------------------------------------
1803 | 2133 |
1804 |->fff_fallback: // Call fast function fallback handler. 2134 |->fff_fallback: // Call fast function fallback handler.
@@ -1981,6 +2311,17 @@ static void build_subroutines(BuildCtx *ctx)
1981 | b extern trunc 2311 | b extern trunc
1982#endif 2312#endif
1983 | 2313 |
2314 |->vm_modi:
2315 | divwo. TMP0, CARG1, CARG2
2316 | bsolr
2317 | xor. CARG3, CARG1, CARG2
2318 | mullw TMP0, TMP0, CARG2
2319 | sub CARG1, CARG1, TMP0
2320 | bgelr
2321 | cmpwi CARG1, 0; beqlr
2322 | add CARG1, CARG1, CARG2
2323 | blr
2324 |
1984 |->vm_powi: 2325 |->vm_powi:
1985#if LJ_HASJIT 2326#if LJ_HASJIT
1986 | NYI 2327 | NYI
@@ -2060,64 +2401,142 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2060 2401
2061 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2402 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2062 | // RA = src1*8, RD = src2*8, JMP with RD = target 2403 | // RA = src1*8, RD = src2*8, JMP with RD = target
2063 | lwzx TMP0, BASE, RA 2404 if (LJ_DUALNUM) {
2064 | addi PC, PC, 4 2405 | lwzux TMP0, RA, BASE
2065 | lfdx f0, BASE, RA 2406 | addi PC, PC, 4
2066 | lwzx TMP1, BASE, RD 2407 | lwz CARG2, 4(RA)
2067 | checknum cr0, TMP0 2408 | lwzux TMP1, RD, BASE
2068 | lwz TMP2, -4(PC) 2409 | lwz TMP2, -4(PC)
2069 | lfdx f1, BASE, RD 2410 | checknum cr0, TMP0
2070 | checknum cr1, TMP1 2411 | lwz CARG3, 4(RD)
2071 | decode_RD4 TMP2, TMP2 2412 | decode_RD4 TMP2, TMP2
2072 | bge cr0, ->vmeta_comp 2413 | checknum cr1, TMP1
2073 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2414 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2074 | bge cr1, ->vmeta_comp 2415 | bne cr0, >7
2075 | fcmpu cr0, f0, f1 2416 | bne cr1, >8
2076 if (op == BC_ISLT) { 2417 | cmpw CARG2, CARG3
2077 | bge >1 2418 if (op == BC_ISLT) {
2078 } else if (op == BC_ISGE) { 2419 | bge >2
2079 | blt >1 2420 } else if (op == BC_ISGE) {
2080 } else if (op == BC_ISLE) { 2421 | blt >2
2081 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2422 } else if (op == BC_ISLE) {
2082 | bge >1 2423 | bgt >2
2424 } else {
2425 | ble >2
2426 }
2427 |1:
2428 | add PC, PC, TMP2
2429 |2:
2430 | ins_next
2431 |
2432 |7: // RA is not an integer.
2433 | bgt cr0, ->vmeta_comp
2434 | // RA is a number.
2435 | lfd f0, 0(RA)
2436 | bgt cr1, ->vmeta_comp
2437 | blt cr1, >4
2438 | // RA is a number, RD is an integer.
2439 | tonum_i f1, CARG3
2440 | b >5
2441 |
2442 |8: // RA is an integer, RD is a number.
2443 | tonum_i f0, CARG2
2444 |4:
2445 | lfd f1, 0(RD)
2446 |5:
2447 | fcmpu cr0, f0, f1
2448 if (op == BC_ISLT) {
2449 | bge <2
2450 } else if (op == BC_ISGE) {
2451 | blt <2
2452 } else if (op == BC_ISLE) {
2453 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
2454 | bge <2
2455 } else {
2456 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
2457 | blt <2
2458 }
2459 | b <1
2083 } else { 2460 } else {
2084 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2461 | lwzx TMP0, BASE, RA
2085 | blt >1 2462 | addi PC, PC, 4
2463 | lfdx f0, BASE, RA
2464 | lwzx TMP1, BASE, RD
2465 | checknum cr0, TMP0
2466 | lwz TMP2, -4(PC)
2467 | lfdx f1, BASE, RD
2468 | checknum cr1, TMP1
2469 | decode_RD4 TMP2, TMP2
2470 | bge cr0, ->vmeta_comp
2471 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2472 | bge cr1, ->vmeta_comp
2473 | fcmpu cr0, f0, f1
2474 if (op == BC_ISLT) {
2475 | bge >1
2476 } else if (op == BC_ISGE) {
2477 | blt >1
2478 } else if (op == BC_ISLE) {
2479 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
2480 | bge >1
2481 } else {
2482 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
2483 | blt >1
2484 }
2485 | add PC, PC, TMP2
2486 |1:
2487 | ins_next
2086 } 2488 }
2087 | add PC, PC, TMP2
2088 |1:
2089 | ins_next
2090 break; 2489 break;
2091 2490
2092 case BC_ISEQV: case BC_ISNEV: 2491 case BC_ISEQV: case BC_ISNEV:
2093 vk = op == BC_ISEQV; 2492 vk = op == BC_ISEQV;
2094 | // RA = src1*8, RD = src2*8, JMP with RD = target 2493 | // RA = src1*8, RD = src2*8, JMP with RD = target
2095 | lwzux TMP0, RA, BASE 2494 if (LJ_DUALNUM) {
2096 | lwz TMP2, 0(PC) 2495 | lwzux TMP0, RA, BASE
2097 | lfd f0, 0(RA) 2496 | addi PC, PC, 4
2098 | addi PC, PC, 4 2497 | lwz CARG2, 4(RA)
2099 | lwzux TMP1, RD, BASE 2498 | lwzux TMP1, RD, BASE
2100 | checknum cr0, TMP0 2499 | checknum cr0, TMP0
2101 | decode_RD4 TMP2, TMP2 2500 | lwz INS, -4(PC)
2102 | lfd f1, 0(RD) 2501 | checknum cr1, TMP1
2103 | checknum cr1, TMP1 2502 | decode_RD4 TMP2, INS
2104 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2503 | lwz CARG3, 4(RD)
2105 | bge cr0, >5 2504 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
2106 | bge cr1, >5 2505 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2107 | fcmpu cr0, f0, f1 2506 if (vk) {
2108 if (vk) { 2507 | ble cr7, ->BC_ISEQN_Z
2109 | bne >1 2508 } else {
2110 | add PC, PC, TMP2 2509 | ble cr7, ->BC_ISNEN_Z
2510 }
2111 } else { 2511 } else {
2112 | beq >1 2512 | lwzux TMP0, RA, BASE
2113 | add PC, PC, TMP2 2513 | lwz TMP2, 0(PC)
2514 | lfd f0, 0(RA)
2515 | addi PC, PC, 4
2516 | lwzux TMP1, RD, BASE
2517 | checknum cr0, TMP0
2518 | decode_RD4 TMP2, TMP2
2519 | lfd f1, 0(RD)
2520 | checknum cr1, TMP1
2521 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2522 | bge cr0, >5
2523 | bge cr1, >5
2524 | fcmpu cr0, f0, f1
2525 if (vk) {
2526 | bne >1
2527 | add PC, PC, TMP2
2528 } else {
2529 | beq >1
2530 | add PC, PC, TMP2
2531 }
2532 |1:
2533 | ins_next
2114 } 2534 }
2115 |1:
2116 | ins_next
2117 |
2118 |5: // Either or both types are not numbers. 2535 |5: // Either or both types are not numbers.
2119 | lwz CARG2, 4(RA) 2536 if (!LJ_DUALNUM) {
2120 | lwz CARG3, 4(RD) 2537 | lwz CARG2, 4(RA)
2538 | lwz CARG3, 4(RD)
2539 }
2121 | not TMP3, TMP0 2540 | not TMP3, TMP0
2122 | cmplw TMP0, TMP1 2541 | cmplw TMP0, TMP1
2123 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 2542 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
@@ -2138,7 +2557,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2138 | add PC, PC, TMP2 2557 | add PC, PC, TMP2
2139 |6: 2558 |6:
2140 } 2559 }
2141 | blt cr0, <1 // Done if 1 or 2. 2560 if (LJ_DUALNUM) {
2561 | bge cr0, >2 // Done if 1 or 2.
2562 |1:
2563 | ins_next
2564 |2:
2565 } else {
2566 | blt cr0, <1 // Done if 1 or 2.
2567 }
2142 | blt cr6, <1 // Done if not tab/ud. 2568 | blt cr6, <1 // Done if not tab/ud.
2143 | 2569 |
2144 | // Different tables or userdatas. Need to check __eq metamethod. 2570 | // Different tables or userdatas. Need to check __eq metamethod.
@@ -2183,32 +2609,84 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2183 case BC_ISEQN: case BC_ISNEN: 2609 case BC_ISEQN: case BC_ISNEN:
2184 vk = op == BC_ISEQN; 2610 vk = op == BC_ISEQN;
2185 | // RA = src*8, RD = num_const*8, JMP with RD = target 2611 | // RA = src*8, RD = num_const*8, JMP with RD = target
2186 | lwzx TMP0, BASE, RA 2612 if (LJ_DUALNUM) {
2187 | lfdx f0, BASE, RA 2613 | lwzux TMP0, RA, BASE
2188 | addi PC, PC, 4 2614 | addi PC, PC, 4
2189 | lfdx f1, KBASE, RD 2615 | lwz CARG2, 4(RA)
2190 | lwz INS, -4(PC) 2616 | lwzux TMP1, RD, KBASE
2191 | checknum TMP0; bge >5 2617 | checknum cr0, TMP0
2192 | fcmpu cr0, f0, f1 2618 | lwz INS, -4(PC)
2193 | decode_RD4 TMP2, INS 2619 | checknum cr1, TMP1
2194 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2620 | decode_RD4 TMP2, INS
2621 | lwz CARG3, 4(RD)
2622 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2623 if (vk) {
2624 |->BC_ISEQN_Z:
2625 } else {
2626 |->BC_ISNEN_Z:
2627 }
2628 | bne cr0, >7
2629 | bne cr1, >8
2630 | cmpw CARG2, CARG3
2631 |4:
2632 } else {
2633 if (vk) {
2634 |->BC_ISEQN_Z: // Dummy label.
2635 } else {
2636 |->BC_ISNEN_Z: // Dummy label.
2637 }
2638 | lwzx TMP0, BASE, RA
2639 | addi PC, PC, 4
2640 | lfdx f0, BASE, RA
2641 | lwz INS, -4(PC)
2642 | lfdx f1, KBASE, RD
2643 | decode_RD4 TMP2, INS
2644 | checknum TMP0
2645 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
2646 | bge >3
2647 | fcmpu cr0, f0, f1
2648 }
2195 if (vk) { 2649 if (vk) {
2196 | bne >5 2650 | bne >1
2197 | add PC, PC, TMP2 2651 | add PC, PC, TMP2
2198 |5: 2652 |1:
2653 if (!LJ_HASFFI) {
2654 |3:
2655 }
2199 } else { 2656 } else {
2200 | beq >2 2657 | beq >2
2201 |1: 2658 |1:
2659 if (!LJ_HASFFI) {
2660 |3:
2661 }
2202 | add PC, PC, TMP2 2662 | add PC, PC, TMP2
2203 |2: 2663 |2:
2204 } 2664 }
2205 | ins_next 2665 | ins_next
2206 if (!vk) { 2666 if (LJ_HASFFI) {
2207 |5: 2667 |3:
2208 | decode_RD4 TMP2, INS 2668 | cmpwi TMP0, LJ_TCDATA
2209 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2669 | beq ->vmeta_equal_cd
2210 | b <1 2670 | b <1
2211 } 2671 }
2672 if (LJ_DUALNUM) {
2673 |7: // RA is not an integer.
2674 | bge cr0, <3
2675 | // RA is a number.
2676 | lfd f0, 0(RA)
2677 | blt cr1, >1
2678 | // RA is a number, RD is an integer.
2679 | tonum_i f1, CARG3
2680 | b >2
2681 |
2682 |8: // RA is an integer, RD is a number.
2683 | tonum_i f0, CARG2
2684 |1:
2685 | lfd f1, 0(RD)
2686 |2:
2687 | fcmpu cr0, f0, f1
2688 | b <4
2689 }
2212 break; 2690 break;
2213 2691
2214 case BC_ISEQP: case BC_ISNEP: 2692 case BC_ISEQP: case BC_ISNEP:
@@ -2291,12 +2769,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2291 | // RA = dst*8, RD = src*8 2769 | // RA = dst*8, RD = src*8
2292 | lwzux TMP1, RD, BASE 2770 | lwzux TMP1, RD, BASE
2293 | lwz TMP0, 4(RD) 2771 | lwz TMP0, 4(RD)
2294 | checknum TMP1; bge ->vmeta_unm 2772 | checknum TMP1
2773 if (LJ_DUALNUM) {
2774 | bne >5
2775 | nego. TMP0, TMP0
2776 | bso >4
2777 |1:
2778 | ins_next1
2779 | stwux TISNUM, RA, BASE
2780 | stw TMP0, 4(RA)
2781 |3:
2782 | ins_next2
2783 |4: // Potential overflow.
2784 | mcrxr cr0; ble <1 // Ignore unrelated overflow.
2785 | lus TMP1, 0x41e0 // 2^31.
2786 | li TMP0, 0
2787 | b >7
2788 }
2789 |5:
2790 | bge ->vmeta_unm
2295 | xoris TMP1, TMP1, 0x8000 2791 | xoris TMP1, TMP1, 0x8000
2792 |7:
2296 | ins_next1 2793 | ins_next1
2297 | stwux TMP1, RA, BASE 2794 | stwux TMP1, RA, BASE
2298 | stw TMP0, 4(RA) 2795 | stw TMP0, 4(RA)
2299 | ins_next2 2796 if (LJ_DUALNUM) {
2797 | b <3
2798 } else {
2799 | ins_next2
2800 }
2300 break; 2801 break;
2301 case BC_LEN: 2802 case BC_LEN:
2302 | // RA = dst*8, RD = src*8 2803 | // RA = dst*8, RD = src*8
@@ -2305,9 +2806,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2305 | checkstr TMP0; bne >2 2806 | checkstr TMP0; bne >2
2306 | lwz CRET1, STR:CARG1->len 2807 | lwz CRET1, STR:CARG1->len
2307 |1: 2808 |1:
2308 | tonum_u f0, CRET1 // Result is a non-negative integer. 2809 if (LJ_DUALNUM) {
2309 | ins_next1 2810 | ins_next1
2310 | stfdx f0, BASE, RA 2811 | stwux TISNUM, RA, BASE
2812 | stw CRET1, 4(RA)
2813 } else {
2814 | tonum_u f0, CRET1 // Result is a non-negative integer.
2815 | ins_next1
2816 | stfdx f0, BASE, RA
2817 }
2311 | ins_next2 2818 | ins_next2
2312 |2: 2819 |2:
2313 | checktab TMP0; bne ->vmeta_len 2820 | checktab TMP0; bne ->vmeta_len
@@ -2332,78 +2839,197 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2332 2839
2333 /* -- Binary ops -------------------------------------------------------- */ 2840 /* -- Binary ops -------------------------------------------------------- */
2334 2841
2335 |.macro ins_arithpre, t0, t1 2842 |.macro ins_arithpre
2336 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 2843 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2337 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 2844 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2338 ||switch (vk) { 2845 ||switch (vk) {
2339 ||case 0: 2846 ||case 0:
2340 | lwzx CARG1, BASE, RB 2847 | lwzx TMP1, BASE, RB
2341 | lfdx t0, BASE, RB 2848 ||if (LJ_DUALNUM) {
2342 | lfdx t1, KBASE, RC 2849 | lwzx TMP2, KBASE, RC
2343 | checknum CARG1; bge ->vmeta_arith_vn 2850 ||}
2851 | lfdx f14, BASE, RB
2852 | lfdx f15, KBASE, RC
2853 ||if (LJ_DUALNUM) {
2854 | checknum cr0, TMP1
2855 | checknum cr1, TMP2
2856 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2857 | bge ->vmeta_arith_vn
2858 ||} else {
2859 | checknum TMP1; bge ->vmeta_arith_vn
2860 ||}
2344 || break; 2861 || break;
2345 ||case 1: 2862 ||case 1:
2346 | lwzx CARG1, BASE, RB 2863 | lwzx TMP1, BASE, RB
2347 | lfdx t1, BASE, RB 2864 ||if (LJ_DUALNUM) {
2348 | lfdx t0, KBASE, RC 2865 | lwzx TMP2, KBASE, RC
2349 | checknum CARG1; bge ->vmeta_arith_nv 2866 ||}
2867 | lfdx f15, BASE, RB
2868 | lfdx f14, KBASE, RC
2869 ||if (LJ_DUALNUM) {
2870 | checknum cr0, TMP1
2871 | checknum cr1, TMP2
2872 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2873 | bge ->vmeta_arith_nv
2874 ||} else {
2875 | checknum TMP1; bge ->vmeta_arith_nv
2876 ||}
2350 || break; 2877 || break;
2351 ||default: 2878 ||default:
2352 | lwzx CARG1, BASE, RB 2879 | lwzx TMP1, BASE, RB
2353 | lwzx CARG2, BASE, RC 2880 | lwzx TMP2, BASE, RC
2354 | lfdx t0, BASE, RB 2881 | lfdx f14, BASE, RB
2355 | lfdx t1, BASE, RC 2882 | lfdx f15, BASE, RC
2356 | checknum cr0, CARG1 2883 | checknum cr0, TMP1
2357 | checknum cr1, CARG2 2884 | checknum cr1, TMP2
2358 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 2885 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2359 | bge ->vmeta_arith_vv 2886 | bge ->vmeta_arith_vv
2360 || break; 2887 || break;
2361 ||} 2888 ||}
2362 |.endmacro 2889 |.endmacro
2363 | 2890 |
2364 |.macro ins_arith, ins 2891 |.macro ins_arithfallback, ins
2365 | ins_arithpre f0, f1 2892 ||switch (vk) {
2893 ||case 0:
2894 | ins ->vmeta_arith_vn2
2895 || break;
2896 ||case 1:
2897 | ins ->vmeta_arith_nv2
2898 || break;
2899 ||default:
2900 | ins ->vmeta_arith_vv2
2901 || break;
2902 ||}
2903 |.endmacro
2904 |
2905 |.macro intmod, a, b, c
2906 |->BC_MODVNI_Z:
2907 | bl ->vm_modi
2908 |.endmacro
2909 |
2910 |.macro fpmod, a, b, c
2911 ||if (!LJ_DUALNUM) {
2912 |->BC_MODVNI_Z:
2913 ||}
2914 |->BC_MODVN_Z:
2915 | fdiv FARG1, b, c
2916 | // NYI: Use internal implementation of floor.
2917 | bl extern floor // floor(b/c)
2918 | fmul a, FARG1, c
2919 | fsub a, b, a // b - floor(b/c)*c
2920 |.endmacro
2921 |
2922 |.macro ins_arithfp, fpins
2923 | ins_arithpre
2924 |.if "fpins" == "fpmod_"
2925 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2926 |.else
2927 | fpins f0, f14, f15
2366 | ins_next1 2928 | ins_next1
2367 | ins f0, f0, f1
2368 | stfdx f0, BASE, RA 2929 | stfdx f0, BASE, RA
2369 | ins_next2 2930 | ins_next2
2931 |.endif
2932 |.endmacro
2933 |
2934 |.macro ins_arithdn, intins, fpins
2935 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2936 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2937 ||switch (vk) {
2938 ||case 0:
2939 | lwzux TMP1, RB, BASE
2940 | lwzux TMP2, RC, KBASE
2941 | lwz CARG1, 4(RB)
2942 | checknum cr0, TMP1
2943 | lwz CARG2, 4(RC)
2944 || break;
2945 ||case 1:
2946 | lwzux TMP1, RB, BASE
2947 | lwzux TMP2, RC, KBASE
2948 | lwz CARG2, 4(RB)
2949 | checknum cr0, TMP1
2950 | lwz CARG1, 4(RC)
2951 || break;
2952 ||default:
2953 | lwzux TMP1, RB, BASE
2954 | lwzux TMP2, RC, BASE
2955 | lwz CARG1, 4(RB)
2956 | checknum cr0, TMP1
2957 | lwz CARG2, 4(RC)
2958 || break;
2959 ||}
2960 | checknum cr1, TMP2
2961 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2962 | bne >5
2963 |.if "intins" == "intmod_"
2964 | b ->BC_MODVNI_Z // Avoid 3 copies. It's slow anyway.
2965 |.else
2966 | intins CARG1, CARG1, CARG2
2967 | bso >4
2968 |1:
2969 | ins_next1
2970 | stwux TISNUM, RA, BASE
2971 | stw CARG1, 4(RA)
2972 |2:
2973 | ins_next2
2974 |4: // Overflow.
2975 | mcrxr cr0; ble <1 // Ignore unrelated overflow.
2976 | ins_arithfallback b
2977 |.endif
2978 |5: // FP variant.
2979 ||if (vk == 1) {
2980 | lfd f15, 0(RB)
2981 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2982 | lfd f14, 0(RC)
2983 ||} else {
2984 | lfd f14, 0(RB)
2985 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2986 | lfd f15, 0(RC)
2987 ||}
2988 | ins_arithfallback bge
2989 |.if "fpins" == "fpmod_"
2990 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2991 |.else
2992 | fpins f0, f14, f15
2993 | ins_next1
2994 | stfdx f0, BASE, RA
2995 | b <2
2996 |.endif
2997 |.endmacro
2998 |
2999 |.macro ins_arith, intins, fpins
3000 ||if (LJ_DUALNUM) {
3001 | ins_arithdn intins, fpins
3002 ||} else {
3003 | ins_arithfp fpins
3004 ||}
2370 |.endmacro 3005 |.endmacro
2371 3006
2372 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3007 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2373 | ins_arith fadd 3008 | ins_arith addo., fadd
2374 break; 3009 break;
2375 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3010 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2376 | ins_arith fsub 3011 | ins_arith subo., fsub
2377 break; 3012 break;
2378 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3013 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2379 | ins_arith fmul 3014 | ins_arith mullwo., fmul
2380 break; 3015 break;
2381 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3016 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2382 | ins_arith fdiv 3017 | ins_arithfp fdiv
2383 break; 3018 break;
2384 case BC_MODVN: 3019 case BC_MODVN:
2385 | ins_arithpre f14, f15 3020 | ins_arith intmod, fpmod
2386 |->BC_MODVN_Z:
2387 | fdiv FARG1, f14, f15
2388 | // NYI: Use internal implementation of floor.
2389 | bl extern floor // floor(b/c)
2390 | fmul f0, FARG1, f15
2391 | ins_next1
2392 | fsub f0, f14, f0 // b - floor(b/c)*c
2393 | stfdx f0, BASE, RA
2394 | ins_next2
2395 break; 3021 break;
2396 case BC_MODNV: case BC_MODVV: 3022 case BC_MODNV: case BC_MODVV:
2397 | ins_arithpre f14, f15 3023 | ins_arith intmod_, fpmod_
2398 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2399 break; 3024 break;
2400 case BC_POW: 3025 case BC_POW:
2401 | lwzx CARG1, BASE, RB 3026 | // NYI: (partial) integer arithmetic.
3027 | lwzx TMP1, BASE, RB
2402 | lfdx FARG1, BASE, RB 3028 | lfdx FARG1, BASE, RB
2403 | lwzx CARG2, BASE, RC 3029 | lwzx TMP2, BASE, RC
2404 | lfdx FARG2, BASE, RC 3030 | lfdx FARG2, BASE, RC
2405 | checknum cr0, CARG1 3031 | checknum cr0, TMP1
2406 | checknum cr1, CARG2 3032 | checknum cr1, TMP2
2407 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3033 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
2408 | bge ->vmeta_arith_vv 3034 | bge ->vmeta_arith_vv
2409 | bl extern pow 3035 | bl extern pow
@@ -2459,33 +3085,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2459 break; 3085 break;
2460 case BC_KSHORT: 3086 case BC_KSHORT:
2461 | // RA = dst*8, RD = int16_literal*8 3087 | // RA = dst*8, RD = int16_literal*8
2462 | // NYI: which approach is faster? 3088 if (LJ_DUALNUM) {
2463 |.if 1 3089 | slwi RD, RD, 13
2464 | slwi RD, RD, 13 3090 | srawi RD, RD, 16
2465 | srawi RD, RD, 16 3091 | ins_next1
2466 | tonum_i f0, RD 3092 | stwux TISNUM, RA, BASE
2467 | ins_next1 3093 | stw RD, 4(RA)
2468 | stfdx f0, BASE, RA 3094 | ins_next2
2469 | ins_next2 3095 } else {
2470 |.else 3096 | // NYI: which approach is faster?
2471 | slwi RD, RD, 13 3097 |.if 1
2472 | srawi TMP1, RD, 31 3098 | slwi RD, RD, 13
2473 | xor TMP2, TMP1, RD 3099 | srawi RD, RD, 16
2474 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) 3100 | tonum_i f0, RD
2475 | cntlzw TMP3, TMP2 3101 | ins_next1
2476 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 3102 | stfdx f0, BASE, RA
2477 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa 3103 | ins_next2
2478 | subfic TMP3, RD, 0 3104 |.else
2479 | slwi TMP1, TMP1, 20 3105 | slwi RD, RD, 13
2480 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) 3106 | srawi TMP1, RD, 31
2481 | subfe TMP0, TMP0, TMP0 3107 | xor TMP2, TMP1, RD
2482 | add RD, RD, TMP1 // hi = hi + exponent-1 3108 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
2483 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi 3109 | cntlzw TMP3, TMP2
2484 | ins_next1 3110 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
2485 | stwux RD, RA, BASE 3111 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa
2486 | stw ZERO, 4(RA) 3112 | subfic TMP3, RD, 0
2487 | ins_next2 3113 | slwi TMP1, TMP1, 20
2488 |.endif 3114 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11)
3115 | subfe TMP0, TMP0, TMP0
3116 | add RD, RD, TMP1 // hi = hi + exponent-1
3117 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi
3118 | ins_next1
3119 | stwux RD, RA, BASE
3120 | stw ZERO, 4(RA)
3121 | ins_next2
3122 |.endif
3123 }
2489 break; 3124 break;
2490 case BC_KNUM: 3125 case BC_KNUM:
2491 | // RA = dst*8, RD = num_const*8 3126 | // RA = dst*8, RD = num_const*8
@@ -2718,23 +3353,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2718 | lwzux CARG1, RB, BASE 3353 | lwzux CARG1, RB, BASE
2719 | lwzux CARG2, RC, BASE 3354 | lwzux CARG2, RC, BASE
2720 | lwz TAB:RB, 4(RB) 3355 | lwz TAB:RB, 4(RB)
2721 | lfd f0, 0(RC) 3356 if (LJ_DUALNUM) {
3357 | lwz RC, 4(RC)
3358 } else {
3359 | lfd f0, 0(RC)
3360 }
2722 | checktab CARG1 3361 | checktab CARG1
2723 | checknum cr1, CARG2 3362 | checknum cr1, CARG2
2724 | bne ->vmeta_tgetv 3363 | bne ->vmeta_tgetv
2725 | bge cr1, >5 3364 if (LJ_DUALNUM) {
2726 | // Convert number key to integer, check for integerness and range. 3365 | lwz TMP0, TAB:RB->asize
2727 | fctiwz f1, f0 3366 | bne cr1, >5
2728 | fadd f2, f0, TOBIT 3367 | lwz TMP1, TAB:RB->array
2729 | stfd f1, TMPD 3368 | cmplw TMP0, RC
2730 | lwz TMP0, TAB:RB->asize 3369 | slwi TMP2, RC, 3
2731 | fsub f2, f2, TOBIT 3370 } else {
2732 | lwz TMP2, TMPD_LO 3371 | bge cr1, >5
2733 | lwz TMP1, TAB:RB->array 3372 | // Convert number key to integer, check for integerness and range.
2734 | fcmpu cr1, f0, f2 3373 | fctiwz f1, f0
2735 | cmplw cr0, TMP0, TMP2 3374 | fadd f2, f0, TOBIT
2736 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 3375 | stfd f1, TMPD
2737 | slwi TMP2, TMP2, 3 3376 | lwz TMP0, TAB:RB->asize
3377 | fsub f2, f2, TOBIT
3378 | lwz TMP2, TMPD_LO
3379 | lwz TMP1, TAB:RB->array
3380 | fcmpu cr1, f0, f2
3381 | cmplw cr0, TMP0, TMP2
3382 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq
3383 | slwi TMP2, TMP2, 3
3384 }
2738 | ble ->vmeta_tgetv // Integer key and in array part? 3385 | ble ->vmeta_tgetv // Integer key and in array part?
2739 | lwzx TMP0, TMP1, TMP2 3386 | lwzx TMP0, TMP1, TMP2
2740 | lfdx f14, TMP1, TMP2 3387 | lfdx f14, TMP1, TMP2
@@ -2755,7 +3402,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2755 | 3402 |
2756 |5: 3403 |5:
2757 | checkstr CARG2; bne ->vmeta_tgetv 3404 | checkstr CARG2; bne ->vmeta_tgetv
2758 | lwz STR:RC, 4(RC) 3405 if (!LJ_DUALNUM) {
3406 | lwz STR:RC, 4(RC)
3407 }
2759 | b ->BC_TGETS_Z // String key? 3408 | b ->BC_TGETS_Z // String key?
2760 break; 3409 break;
2761 case BC_TGETS: 3410 case BC_TGETS:
@@ -2838,23 +3487,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2838 | lwzux CARG1, RB, BASE 3487 | lwzux CARG1, RB, BASE
2839 | lwzux CARG2, RC, BASE 3488 | lwzux CARG2, RC, BASE
2840 | lwz TAB:RB, 4(RB) 3489 | lwz TAB:RB, 4(RB)
2841 | lfd f0, 0(RC) 3490 if (LJ_DUALNUM) {
3491 | lwz RC, 4(RC)
3492 } else {
3493 | lfd f0, 0(RC)
3494 }
2842 | checktab CARG1 3495 | checktab CARG1
2843 | checknum cr1, CARG2 3496 | checknum cr1, CARG2
2844 | bne ->vmeta_tsetv 3497 | bne ->vmeta_tsetv
2845 | bge cr1, >5 3498 if (LJ_DUALNUM) {
2846 | // Convert number key to integer, check for integerness and range. 3499 | lwz TMP0, TAB:RB->asize
2847 | fctiwz f1, f0 3500 | bne cr1, >5
2848 | fadd f2, f0, TOBIT 3501 | lwz TMP1, TAB:RB->array
2849 | stfd f1, TMPD 3502 | cmplw TMP0, RC
2850 | lwz TMP0, TAB:RB->asize 3503 | slwi TMP0, RC, 3
2851 | fsub f2, f2, TOBIT 3504 } else {
2852 | lwz TMP2, TMPD_LO 3505 | bge cr1, >5
2853 | lwz TMP1, TAB:RB->array 3506 | // Convert number key to integer, check for integerness and range.
2854 | fcmpu cr1, f0, f2 3507 | fctiwz f1, f0
2855 | cmplw cr0, TMP0, TMP2 3508 | fadd f2, f0, TOBIT
2856 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 3509 | stfd f1, TMPD
2857 | slwi TMP0, TMP2, 3 3510 | lwz TMP0, TAB:RB->asize
3511 | fsub f2, f2, TOBIT
3512 | lwz TMP2, TMPD_LO
3513 | lwz TMP1, TAB:RB->array
3514 | fcmpu cr1, f0, f2
3515 | cmplw cr0, TMP0, TMP2
3516 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq
3517 | slwi TMP0, TMP2, 3
3518 }
2858 | ble ->vmeta_tsetv // Integer key and in array part? 3519 | ble ->vmeta_tsetv // Integer key and in array part?
2859 | lwzx TMP2, TMP1, TMP0 3520 | lwzx TMP2, TMP1, TMP0
2860 | lbz TMP3, TAB:RB->marked 3521 | lbz TMP3, TAB:RB->marked
@@ -2878,7 +3539,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2878 | 3539 |
2879 |5: 3540 |5:
2880 | checkstr CARG2; bne ->vmeta_tsetv 3541 | checkstr CARG2; bne ->vmeta_tsetv
2881 | lwz STR:RC, 4(RC) 3542 if (!LJ_DUALNUM) {
3543 | lwz STR:RC, 4(RC)
3544 }
2882 | b ->BC_TSETS_Z // String key? 3545 | b ->BC_TSETS_Z // String key?
2883 | 3546 |
2884 |7: // Possible table write barrier for the value. Skip valiswhite check. 3547 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -3164,14 +3827,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3164 | checknil TMP2 3827 | checknil TMP2
3165 | lwz INS, -4(PC) 3828 | lwz INS, -4(PC)
3166 | beq >4 3829 | beq >4
3167 | tonum_u f1, RC 3830 if (LJ_DUALNUM) {
3831 | stw RC, 4(RA)
3832 | stw TISNUM, 0(RA)
3833 } else {
3834 | tonum_u f1, RC
3835 }
3168 | addi RC, RC, 1 3836 | addi RC, RC, 1
3169 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 3837 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
3170 | stfd f0, 8(RA) 3838 | stfd f0, 8(RA)
3171 | decode_RD4 TMP1, INS 3839 | decode_RD4 TMP1, INS
3172 | stw RC, -4(RA) // Update control var. 3840 | stw RC, -4(RA) // Update control var.
3173 | add PC, TMP1, TMP3 3841 | add PC, TMP1, TMP3
3174 | stfd f1, 0(RA) 3842 if (!LJ_DUALNUM) {
3843 | stfd f1, 0(RA)
3844 }
3175 |3: 3845 |3:
3176 | ins_next 3846 | ins_next
3177 | 3847 |
@@ -3424,28 +4094,96 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3424 case BC_IFORL: 4094 case BC_IFORL:
3425 | // RA = base*8, RD = target (after end of loop or start of loop) 4095 | // RA = base*8, RD = target (after end of loop or start of loop)
3426 vk = (op == BC_IFORL || op == BC_JFORL); 4096 vk = (op == BC_IFORL || op == BC_JFORL);
4097 if (LJ_DUALNUM) {
4098 | // Integer loop.
4099 | lwzux TMP1, RA, BASE
4100 | lwz CARG1, FORL_IDX*8+4(RA)
4101 | cmplw cr0, TMP1, TISNUM
4102 if (vk) {
4103 | lwz CARG3, FORL_STEP*8+4(RA)
4104 | bne >9
4105 | addo. CARG1, CARG1, CARG3
4106 | cmpwi cr6, CARG3, 0
4107 | lwz CARG2, FORL_STOP*8+4(RA)
4108 | bso >6
4109 |4:
4110 | stw CARG1, FORL_IDX*8+4(RA)
4111 } else {
4112 | lwz TMP3, FORL_STEP*8(RA)
4113 | lwz CARG3, FORL_STEP*8+4(RA)
4114 | lwz TMP2, FORL_STOP*8(RA)
4115 | lwz CARG2, FORL_STOP*8+4(RA)
4116 | cmplw cr7, TMP3, TISNUM
4117 | cmplw cr1, TMP2, TISNUM
4118 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4119 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
4120 | cmpwi cr6, CARG3, 0
4121 | bne >9
4122 }
4123 | blt cr6, >5
4124 | cmpw CARG1, CARG2
4125 |1:
4126 | stw TISNUM, FORL_EXT*8(RA)
4127 if (op != BC_JFORL) {
4128 | srwi RD, RD, 1
4129 }
4130 | stw CARG1, FORL_EXT*8+4(RA)
4131 if (op != BC_JFORL) {
4132 | add RD, PC, RD
4133 }
4134 if (op == BC_FORI) {
4135 | bgt >3 // See FP loop below.
4136 } else if (op == BC_JFORI) {
4137 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4138 | ble =>BC_JLOOP
4139 } else if (op == BC_IFORL) {
4140 | bgt >2
4141 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4142 } else {
4143 | ble =>BC_JLOOP
4144 }
4145 |2:
4146 | ins_next
4147 |5: // Invert check for negative step.
4148 | cmpw CARG2, CARG1
4149 | b <1
4150 if (vk) {
4151 |6: // Potential overflow.
4152 | mcrxr cr0; ble <4 // Ignore unrelated overflow.
4153 | b <2
4154 }
4155 }
3427 if (vk) { 4156 if (vk) {
3428 | lfdux f1, RA, BASE 4157 if (LJ_DUALNUM) {
4158 |9: // FP loop.
4159 | lfd f1, FORL_IDX*8(RA)
4160 } else {
4161 | lfdux f1, RA, BASE
4162 }
3429 | lfd f3, FORL_STEP*8(RA) 4163 | lfd f3, FORL_STEP*8(RA)
3430 | lfd f2, FORL_STOP*8(RA) 4164 | lfd f2, FORL_STOP*8(RA)
3431 | lwz TMP3, FORL_STEP*8(RA) 4165 | lwz TMP3, FORL_STEP*8(RA)
3432 | fadd f1, f1, f3 4166 | fadd f1, f1, f3
3433 | stfd f1, FORL_IDX*8(RA) 4167 | stfd f1, FORL_IDX*8(RA)
3434 } else { 4168 } else {
3435 | lwzux TMP1, RA, BASE 4169 if (LJ_DUALNUM) {
4170 |9: // FP loop.
4171 } else {
4172 | lwzux TMP1, RA, BASE
4173 | lwz TMP3, FORL_STEP*8(RA)
4174 | lwz TMP2, FORL_STOP*8(RA)
4175 | cmplw cr0, TMP1, TISNUM
4176 | cmplw cr7, TMP3, TISNUM
4177 | cmplw cr1, TMP2, TISNUM
4178 }
3436 | lfd f1, FORL_IDX*8(RA) 4179 | lfd f1, FORL_IDX*8(RA)
3437 | lwz TMP3, FORL_STEP*8(RA)
3438 | lfd f3, FORL_STEP*8(RA)
3439 | lwz TMP2, FORL_STOP*8(RA)
3440 | lfd f2, FORL_STOP*8(RA)
3441 | cmplw cr0, TMP1, TISNUM
3442 | cmplw cr7, TMP3, TISNUM
3443 | cmplw cr1, TMP2, TISNUM
3444 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 4180 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4181 | lfd f3, FORL_STEP*8(RA)
3445 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4182 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
4183 | lfd f2, FORL_STOP*8(RA)
3446 | bge ->vmeta_for 4184 | bge ->vmeta_for
3447 } 4185 }
3448 | cmpwi cr3, TMP3, 0 4186 | cmpwi cr6, TMP3, 0
3449 if (op != BC_JFORL) { 4187 if (op != BC_JFORL) {
3450 | srwi RD, RD, 1 4188 | srwi RD, RD, 1
3451 } 4189 }
@@ -3457,22 +4195,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3457 if (op == BC_JFORI) { 4195 if (op == BC_JFORI) {
3458 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4196 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3459 } 4197 }
3460 | blt cr3, >5 4198 | blt cr6, >5
3461 if (op == BC_FORI) { 4199 if (op == BC_FORI) {
3462 | bgt >3 4200 | bgt >3
3463 } else if (op == BC_IFORL) { 4201 } else if (op == BC_IFORL) {
3464 | bgt >2 4202 if (LJ_DUALNUM) {
4203 | bgt <2
4204 } else {
4205 | bgt >2
4206 }
3465 |1: 4207 |1:
3466 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4208 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3467 } else { 4209 } else {
3468 | ble =>BC_JLOOP 4210 | ble =>BC_JLOOP
3469 } 4211 }
3470 |2: 4212 if (LJ_DUALNUM) {
3471 | ins_next 4213 | b <2
4214 } else {
4215 |2:
4216 | ins_next
4217 }
3472 |5: // Negative step. 4218 |5: // Negative step.
3473 if (op == BC_FORI) { 4219 if (op == BC_FORI) {
3474 | bge <2 4220 | bge <2
3475 |3: 4221 |3: // Used by integer loop, too.
3476 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4222 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3477 } else if (op == BC_IFORL) { 4223 } else if (op == BC_IFORL) {
3478 | bge <1 4224 | bge <1