diff options
Diffstat (limited to 'src/buildvm_ppc.dasc')
-rw-r--r-- | src/buildvm_ppc.dasc | 1266 |
1 files changed, 1006 insertions, 260 deletions
diff --git a/src/buildvm_ppc.dasc b/src/buildvm_ppc.dasc index de3346cb..9130ee41 100644 --- a/src/buildvm_ppc.dasc +++ b/src/buildvm_ppc.dasc | |||
@@ -30,8 +30,8 @@ | |||
30 | |.define TISNUM, r22 | 30 | |.define TISNUM, r22 |
31 | |.define TISNIL, r23 | 31 | |.define TISNIL, r23 |
32 | |.define ZERO, r24 | 32 | |.define ZERO, r24 |
33 | |.define TOBIT, f30 | 33 | |.define TOBIT, f30 // 2^52 + 2^51. |
34 | |.define TONUM, f31 | 34 | |.define TONUM, f31 // 2^52 + 2^51 + 2^31. |
35 | | | 35 | | |
36 | |// The following temporaries are not saved across C calls, except for RA. | 36 | |// The following temporaries are not saved across C calls, except for RA. |
37 | |.define RA, r20 // Callee-save. | 37 | |.define RA, r20 // Callee-save. |
@@ -652,11 +652,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
652 | | b >1 | 652 | | b >1 |
653 | | | 653 | | |
654 | |->vmeta_tgetb: // TMP0 = index | 654 | |->vmeta_tgetb: // TMP0 = index |
655 | | tonum_u f0, TMP0 | 655 | if (!LJ_DUALNUM) { |
656 | | tonum_u f0, TMP0 | ||
657 | } | ||
656 | | decode_RB8 RB, INS | 658 | | decode_RB8 RB, INS |
657 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | 659 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) |
658 | | add CARG2, BASE, RB | 660 | | add CARG2, BASE, RB |
659 | | stfd f0, 0(CARG3) | 661 | if (LJ_DUALNUM) { |
662 | | stw TISNUM, 0(CARG3) | ||
663 | | stw TMP0, 4(CARG3) | ||
664 | } else { | ||
665 | | stfd f0, 0(CARG3) | ||
666 | } | ||
660 | | b >1 | 667 | | b >1 |
661 | | | 668 | | |
662 | |->vmeta_tgetv: | 669 | |->vmeta_tgetv: |
@@ -710,11 +717,18 @@ static void build_subroutines(BuildCtx *ctx) | |||
710 | | b >1 | 717 | | b >1 |
711 | | | 718 | | |
712 | |->vmeta_tsetb: // TMP0 = index | 719 | |->vmeta_tsetb: // TMP0 = index |
713 | | tonum_u f0, TMP0 | 720 | if (!LJ_DUALNUM) { |
721 | | tonum_u f0, TMP0 | ||
722 | } | ||
714 | | decode_RB8 RB, INS | 723 | | decode_RB8 RB, INS |
715 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) | 724 | | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) |
716 | | add CARG2, BASE, RB | 725 | | add CARG2, BASE, RB |
717 | | stfd f0, 0(CARG3) | 726 | if (LJ_DUALNUM) { |
727 | | stw TISNUM, 0(CARG3) | ||
728 | | stw TMP0, 4(CARG3) | ||
729 | } else { | ||
730 | | stfd f0, 0(CARG3) | ||
731 | } | ||
718 | | b >1 | 732 | | b >1 |
719 | | | 733 | | |
720 | |->vmeta_tsetv: | 734 | |->vmeta_tsetv: |
@@ -752,9 +766,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
752 | |->vmeta_comp: | 766 | |->vmeta_comp: |
753 | | mr CARG1, L | 767 | | mr CARG1, L |
754 | | subi PC, PC, 4 | 768 | | subi PC, PC, 4 |
755 | | add CARG2, BASE, RA | 769 | if (LJ_DUALNUM) { |
770 | | mr CARG2, RA | ||
771 | } else { | ||
772 | | add CARG2, BASE, RA | ||
773 | } | ||
756 | | stw PC, SAVE_PC | 774 | | stw PC, SAVE_PC |
757 | | add CARG3, BASE, RD | 775 | if (LJ_DUALNUM) { |
776 | | mr CARG3, RD | ||
777 | } else { | ||
778 | | add CARG3, BASE, RD | ||
779 | } | ||
758 | | stw BASE, L->base | 780 | | stw BASE, L->base |
759 | | decode_OP1 CARG4, INS | 781 | | decode_OP1 CARG4, INS |
760 | | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | 782 | | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) |
@@ -803,26 +825,53 @@ static void build_subroutines(BuildCtx *ctx) | |||
803 | | // Returns 0/1 or TValue * (metamethod). | 825 | | // Returns 0/1 or TValue * (metamethod). |
804 | | b <3 | 826 | | b <3 |
805 | | | 827 | | |
806 | |//-- Arithmetic metamethods --------------------------------------------- | 828 | |->vmeta_equal_cd: |
829 | #if LJ_HASFFI | ||
830 | | mr CARG2, INS | ||
831 | | subi PC, PC, 4 | ||
832 | | stw BASE, L->base | ||
833 | | mr CARG1, L | ||
834 | | stw PC, SAVE_PC | ||
835 | | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) | ||
836 | | // Returns 0/1 or TValue * (metamethod). | ||
837 | | b <3 | ||
838 | #endif | ||
807 | | | 839 | | |
808 | |->vmeta_arith_vn: | 840 | |//-- Arithmetic metamethods --------------------------------------------- |
809 | | add CARG3, BASE, RB | ||
810 | | add CARG4, KBASE, RC | ||
811 | | b >1 | ||
812 | | | 841 | | |
813 | |->vmeta_arith_nv: | 842 | |->vmeta_arith_nv: |
814 | | add CARG3, KBASE, RC | 843 | | add CARG3, KBASE, RC |
815 | | add CARG4, BASE, RB | 844 | | add CARG4, BASE, RB |
816 | | b >1 | 845 | | b >1 |
846 | |->vmeta_arith_nv2: | ||
847 | if (LJ_DUALNUM) { | ||
848 | | mr CARG3, RC | ||
849 | | mr CARG4, RB | ||
850 | | b >1 | ||
851 | } | ||
817 | | | 852 | | |
818 | |->vmeta_unm: | 853 | |->vmeta_unm: |
819 | | mr CARG3, RD | 854 | | mr CARG3, RD |
820 | | mr CARG4, RD | 855 | | mr CARG4, RD |
821 | | b >1 | 856 | | b >1 |
822 | | | 857 | | |
858 | |->vmeta_arith_vn: | ||
859 | | add CARG3, BASE, RB | ||
860 | | add CARG4, KBASE, RC | ||
861 | | b >1 | ||
862 | | | ||
823 | |->vmeta_arith_vv: | 863 | |->vmeta_arith_vv: |
824 | | add CARG3, BASE, RB | 864 | | add CARG3, BASE, RB |
825 | | add CARG4, BASE, RC | 865 | | add CARG4, BASE, RC |
866 | if (LJ_DUALNUM) { | ||
867 | | b >1 | ||
868 | } | ||
869 | |->vmeta_arith_vn2: | ||
870 | |->vmeta_arith_vv2: | ||
871 | if (LJ_DUALNUM) { | ||
872 | | mr CARG3, RB | ||
873 | | mr CARG4, RC | ||
874 | } | ||
826 | |1: | 875 | |1: |
827 | | add CARG2, BASE, RA | 876 | | add CARG2, BASE, RA |
828 | | stw BASE, L->base | 877 | | stw BASE, L->base |
@@ -1109,7 +1158,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1109 | | ffgccheck | 1158 | | ffgccheck |
1110 | | mr CARG1, L | 1159 | | mr CARG1, L |
1111 | | mr CARG2, BASE | 1160 | | mr CARG2, BASE |
1112 | | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1161 | if (LJ_DUALNUM) { |
1162 | | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) | ||
1163 | } else { | ||
1164 | | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) | ||
1165 | } | ||
1113 | | // Returns GCstr *. | 1166 | | // Returns GCstr *. |
1114 | | li CARG3, LJ_TSTR | 1167 | | li CARG3, LJ_TSTR |
1115 | | b ->fff_restv | 1168 | | b ->fff_restv |
@@ -1167,26 +1220,43 @@ static void build_subroutines(BuildCtx *ctx) | |||
1167 | | lwz CARG3, 0(BASE) | 1220 | | lwz CARG3, 0(BASE) |
1168 | | lwz TAB:CARG1, 4(BASE) | 1221 | | lwz TAB:CARG1, 4(BASE) |
1169 | | lwz CARG4, 8(BASE) | 1222 | | lwz CARG4, 8(BASE) |
1170 | | lfd FARG2, 8(BASE) | 1223 | if (LJ_DUALNUM) { |
1224 | | lwz TMP2, 12(BASE) | ||
1225 | } else { | ||
1226 | | lfd FARG2, 8(BASE) | ||
1227 | } | ||
1171 | | blt ->fff_fallback | 1228 | | blt ->fff_fallback |
1172 | | checktab CARG3 | 1229 | | checktab CARG3 |
1173 | | checknum cr1, CARG4 | 1230 | | checknum cr1, CARG4 |
1174 | | lwz PC, FRAME_PC(BASE) | 1231 | | lwz PC, FRAME_PC(BASE) |
1175 | | lus TMP0, 0x3ff0 | 1232 | if (LJ_DUALNUM) { |
1176 | | stw ZERO, TMPD_LO | 1233 | | bne ->fff_fallback |
1177 | | bne ->fff_fallback | 1234 | | bne cr1, ->fff_fallback |
1178 | | stw TMP0, TMPD_HI | 1235 | } else { |
1179 | | bge cr1, ->fff_fallback | 1236 | | lus TMP0, 0x3ff0 |
1180 | | lfd FARG1, TMPD | 1237 | | stw ZERO, TMPD_LO |
1181 | | toint TMP2, FARG2, f0 | 1238 | | bne ->fff_fallback |
1239 | | stw TMP0, TMPD_HI | ||
1240 | | bge cr1, ->fff_fallback | ||
1241 | | lfd FARG1, TMPD | ||
1242 | | toint TMP2, FARG2, f0 | ||
1243 | } | ||
1182 | | lwz TMP0, TAB:CARG1->asize | 1244 | | lwz TMP0, TAB:CARG1->asize |
1183 | | lwz TMP1, TAB:CARG1->array | 1245 | | lwz TMP1, TAB:CARG1->array |
1184 | | fadd FARG2, FARG2, FARG1 | 1246 | if (!LJ_DUALNUM) { |
1247 | | fadd FARG2, FARG2, FARG1 | ||
1248 | } | ||
1185 | | addi TMP2, TMP2, 1 | 1249 | | addi TMP2, TMP2, 1 |
1186 | | la RA, -8(BASE) | 1250 | | la RA, -8(BASE) |
1187 | | cmplw TMP0, TMP2 | 1251 | | cmplw TMP0, TMP2 |
1188 | | slwi TMP3, TMP2, 3 | 1252 | if (LJ_DUALNUM) { |
1189 | | stfd FARG2, 0(RA) | 1253 | | stw TISNUM, 0(RA) |
1254 | | slwi TMP3, TMP2, 3 | ||
1255 | | stw TMP2, 4(RA) | ||
1256 | } else { | ||
1257 | | slwi TMP3, TMP2, 3 | ||
1258 | | stfd FARG2, 0(RA) | ||
1259 | } | ||
1190 | | ble >2 // Not in array part? | 1260 | | ble >2 // Not in array part? |
1191 | | lwzx TMP2, TMP1, TMP3 | 1261 | | lwzx TMP2, TMP1, TMP3 |
1192 | | lfdx f0, TMP1, TMP3 | 1262 | | lfdx f0, TMP1, TMP3 |
@@ -1226,7 +1296,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
1226 | | lfd f0, CFUNC:RB->upvalue[0] | 1296 | | lfd f0, CFUNC:RB->upvalue[0] |
1227 | | la RA, -8(BASE) | 1297 | | la RA, -8(BASE) |
1228 | #endif | 1298 | #endif |
1229 | | stw ZERO, 8(BASE) | 1299 | if (LJ_DUALNUM) { |
1300 | | stw TISNUM, 8(BASE) | ||
1301 | } else { | ||
1302 | | stw ZERO, 8(BASE) | ||
1303 | } | ||
1230 | | stw ZERO, 12(BASE) | 1304 | | stw ZERO, 12(BASE) |
1231 | | li RD, (3+1)*8 | 1305 | | li RD, (3+1)*8 |
1232 | | stfd f0, 0(RA) | 1306 | | stfd f0, 0(RA) |
@@ -1401,7 +1475,26 @@ static void build_subroutines(BuildCtx *ctx) | |||
1401 | |//-- Math library ------------------------------------------------------- | 1475 | |//-- Math library ------------------------------------------------------- |
1402 | | | 1476 | | |
1403 | |.ffunc_1 math_abs | 1477 | |.ffunc_1 math_abs |
1404 | | checknum CARG3; bge ->fff_fallback | 1478 | | checknum CARG3 |
1479 | if (LJ_DUALNUM) { | ||
1480 | | bne >2 | ||
1481 | | srawi TMP1, CARG1, 31 | ||
1482 | | xor TMP2, TMP1, CARG1 | ||
1483 | | sub. CARG1, TMP2, TMP1 | ||
1484 | | blt >1 | ||
1485 | |->fff_resi: | ||
1486 | | lwz PC, FRAME_PC(BASE) | ||
1487 | | la RA, -8(BASE) | ||
1488 | | stw TISNUM, -8(BASE) | ||
1489 | | stw CRET1, -4(BASE) | ||
1490 | | b ->fff_res1 | ||
1491 | |1: | ||
1492 | | lus CARG3, 0x41e0 // 2^31. | ||
1493 | | li CARG1, 0 | ||
1494 | | b ->fff_restv | ||
1495 | |2: | ||
1496 | } | ||
1497 | | bge ->fff_fallback | ||
1405 | | rlwinm CARG3, CARG3, 0, 1, 31 | 1498 | | rlwinm CARG3, CARG3, 0, 1, 31 |
1406 | | // Fallthrough. | 1499 | | // Fallthrough. |
1407 | | | 1500 | | |
@@ -1448,9 +1541,85 @@ static void build_subroutines(BuildCtx *ctx) | |||
1448 | | b ->fff_resn | 1541 | | b ->fff_resn |
1449 | |.endmacro | 1542 | |.endmacro |
1450 | | | 1543 | | |
1451 | | // NYI: Use internal implementation. | 1544 | |.macro math_round, func |
1452 | | math_extern floor | 1545 | | .ffunc_1 math_ .. func |
1453 | | math_extern ceil | 1546 | | checknum CARG3; beq ->fff_restv |
1547 | | rlwinm TMP2, CARG3, 12, 21, 31 | ||
1548 | | bge ->fff_fallback | ||
1549 | | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 | ||
1550 | | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? | ||
1551 | | subfic TMP0, TMP2, 31 | ||
1552 | | blt >3 | ||
1553 | | slwi TMP1, CARG3, 11 | ||
1554 | | srwi TMP3, CARG1, 21 | ||
1555 | | oris TMP1, TMP1, 0x8000 | ||
1556 | | addi TMP2, TMP2, 1 | ||
1557 | | or TMP1, TMP1, TMP3 | ||
1558 | | slwi CARG2, CARG1, 11 | ||
1559 | | bge cr1, >4 | ||
1560 | | slw TMP3, TMP1, TMP2 | ||
1561 | | srw CARG1, TMP1, TMP0 | ||
1562 | | or TMP3, TMP3, CARG2 | ||
1563 | | srawi TMP2, CARG3, 31 | ||
1564 | |.if "func" == "floor" | ||
1565 | | and TMP1, TMP3, TMP2 | ||
1566 | | addic TMP0, TMP1, -1 | ||
1567 | | subfe TMP1, TMP0, TMP1 | ||
1568 | | add CARG1, CARG1, TMP1 | ||
1569 | | xor CARG1, CARG1, TMP2 | ||
1570 | | sub CARG1, CARG1, TMP2 | ||
1571 | | b ->fff_resi | ||
1572 | |.else | ||
1573 | | andc TMP1, TMP3, TMP2 | ||
1574 | | addic TMP0, TMP1, -1 | ||
1575 | | subfe TMP1, TMP0, TMP1 | ||
1576 | | addo. CARG1, CARG1, TMP1 | ||
1577 | | xor CARG1, CARG1, TMP2 | ||
1578 | | sub CARG1, CARG1, TMP2 | ||
1579 | | bns ->fff_resi | ||
1580 | | // Potential overflow. | ||
1581 | | mcrxr cr0; ble ->fff_resi // Ignore unrelated overflow. | ||
1582 | | lus CARG3, 0x41e0 // 2^31. | ||
1583 | | li CARG1, 0 | ||
1584 | | b ->fff_restv | ||
1585 | |.endif | ||
1586 | |3: // |x| < 1 | ||
1587 | | add TMP2, CARG3, CARG3 | ||
1588 | | srawi TMP1, CARG3, 31 | ||
1589 | | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo | ||
1590 | |.if "func" == "floor" | ||
1591 | | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 | ||
1592 | | subfic TMP2, TMP1, 0 | ||
1593 | | subfe CARG1, CARG1, CARG1 | ||
1594 | |.else | ||
1595 | | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 | ||
1596 | | addic TMP2, TMP1, -1 | ||
1597 | | subfe CARG1, TMP2, TMP1 | ||
1598 | |.endif | ||
1599 | | b ->fff_resi | ||
1600 | |4: // exp >= 31. Check for -(2^31). | ||
1601 | | xoris TMP1, TMP1, 0x8000 | ||
1602 | | srawi TMP2, CARG3, 31 | ||
1603 | |.if "func" == "floor" | ||
1604 | | or TMP1, TMP1, CARG2 | ||
1605 | |.endif | ||
1606 | | orc. TMP1, TMP1, TMP2 | ||
1607 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | ||
1608 | | lus CARG1, 0x8000 // -(2^31). | ||
1609 | | beq ->fff_resi | ||
1610 | |5: | ||
1611 | | bl extern func | ||
1612 | | b ->fff_resn | ||
1613 | |.endmacro | ||
1614 | | | ||
1615 | if (LJ_DUALNUM) { | ||
1616 | | math_round floor | ||
1617 | | math_round ceil | ||
1618 | } else { | ||
1619 | | // NYI: use internal implementation. | ||
1620 | | math_extern floor | ||
1621 | | math_extern ceil | ||
1622 | } | ||
1454 | | | 1623 | | |
1455 | | math_extern sqrt | 1624 | | math_extern sqrt |
1456 | | math_extern log | 1625 | | math_extern log |
@@ -1475,8 +1644,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
1475 | | fmul FARG1, FARG1, FARG2 | 1644 | | fmul FARG1, FARG1, FARG2 |
1476 | | b ->fff_resn | 1645 | | b ->fff_resn |
1477 | | | 1646 | | |
1478 | |.ffunc_nn math_ldexp | 1647 | if (LJ_DUALNUM) { |
1479 | | toint CARG1, FARG2 | 1648 | |.ffunc math_ldexp |
1649 | | cmplwi NARGS8:RC, 16 | ||
1650 | | lwz CARG3, 0(BASE) | ||
1651 | | lfd FARG1, 0(BASE) | ||
1652 | | lwz CARG4, 8(BASE) | ||
1653 | | lwz CARG1, 12(BASE) | ||
1654 | | blt ->fff_fallback | ||
1655 | | checknum CARG3; bge ->fff_fallback | ||
1656 | | checknum CARG4; bne ->fff_fallback | ||
1657 | } else { | ||
1658 | |.ffunc_nn math_ldexp | ||
1659 | | toint CARG1, FARG2 | ||
1660 | } | ||
1480 | | bl extern ldexp | 1661 | | bl extern ldexp |
1481 | | b ->fff_resn | 1662 | | b ->fff_resn |
1482 | | | 1663 | | |
@@ -1486,10 +1667,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1486 | | bl extern frexp | 1667 | | bl extern frexp |
1487 | | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) | 1668 | | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) |
1488 | | la RA, -8(BASE) | 1669 | | la RA, -8(BASE) |
1489 | | tonum_i FARG2, TMP1 | 1670 | if (!LJ_DUALNUM) { |
1671 | | tonum_i FARG2, TMP1 | ||
1672 | } | ||
1490 | | stfd FARG1, 0(RA) | 1673 | | stfd FARG1, 0(RA) |
1491 | | li RD, (2+1)*8 | 1674 | | li RD, (2+1)*8 |
1492 | | stfd FARG2, 8(RA) | 1675 | if (LJ_DUALNUM) { |
1676 | | stw TISNUM, 8(RA) | ||
1677 | | stw TMP1, 12(RA) | ||
1678 | } else { | ||
1679 | | stfd FARG2, 8(RA) | ||
1680 | } | ||
1493 | | b ->fff_res | 1681 | | b ->fff_res |
1494 | | | 1682 | | |
1495 | |.ffunc_n math_modf | 1683 | |.ffunc_n math_modf |
@@ -1502,6 +1690,61 @@ static void build_subroutines(BuildCtx *ctx) | |||
1502 | | b ->fff_res | 1690 | | b ->fff_res |
1503 | | | 1691 | | |
1504 | |.macro math_minmax, name, ismax | 1692 | |.macro math_minmax, name, ismax |
1693 | ||if (LJ_DUALNUM) { | ||
1694 | | .ffunc_1 name | ||
1695 | | checknum CARG3 | ||
1696 | | addi TMP1, BASE, 8 | ||
1697 | | add TMP2, BASE, NARGS8:RC | ||
1698 | | bne >4 | ||
1699 | |1: // Handle integers. | ||
1700 | | lwz CARG4, 0(TMP1) | ||
1701 | | cmplw cr1, TMP1, TMP2 | ||
1702 | | lwz CARG2, 4(TMP1) | ||
1703 | | bge cr1, ->fff_resi | ||
1704 | | checknum CARG4 | ||
1705 | | xoris TMP0, CARG1, 0x8000 | ||
1706 | | xoris TMP3, CARG2, 0x8000 | ||
1707 | | bne >3 | ||
1708 | | subfc TMP3, TMP3, TMP0 | ||
1709 | | subfe TMP0, TMP0, TMP0 | ||
1710 | |.if ismax | ||
1711 | | andc TMP3, TMP3, TMP0 | ||
1712 | |.else | ||
1713 | | and TMP3, TMP3, TMP0 | ||
1714 | |.endif | ||
1715 | | add CARG1, TMP3, CARG2 | ||
1716 | | addi TMP1, TMP1, 8 | ||
1717 | | b <1 | ||
1718 | |3: | ||
1719 | | bge ->fff_fallback | ||
1720 | | // Convert intermediate result to number and continue below. | ||
1721 | | tonum_i FARG1, CARG1 | ||
1722 | | lfd FARG2, 0(TMP1) | ||
1723 | | b >6 | ||
1724 | |4: | ||
1725 | | lfd FARG1, 0(BASE) | ||
1726 | | bge ->fff_fallback | ||
1727 | |5: // Handle numbers. | ||
1728 | | lwz CARG4, 0(TMP1) | ||
1729 | | cmplw cr1, TMP1, TMP2 | ||
1730 | | lfd FARG2, 0(TMP1) | ||
1731 | | bge cr1, ->fff_resn | ||
1732 | | checknum CARG4; bge >7 | ||
1733 | |6: | ||
1734 | | fsub f0, FARG1, FARG2 | ||
1735 | | addi TMP1, TMP1, 8 | ||
1736 | |.if ismax | ||
1737 | | fsel FARG1, f0, FARG1, FARG2 | ||
1738 | |.else | ||
1739 | | fsel FARG1, f0, FARG2, FARG1 | ||
1740 | |.endif | ||
1741 | | b <5 | ||
1742 | |7: // Convert integer to number and continue above. | ||
1743 | | lwz CARG2, 4(TMP1) | ||
1744 | | bne ->fff_fallback | ||
1745 | | tonum_i FARG2, CARG2 | ||
1746 | | b <6 | ||
1747 | ||} else { | ||
1505 | | .ffunc_n name | 1748 | | .ffunc_n name |
1506 | | li TMP1, 8 | 1749 | | li TMP1, 8 |
1507 | |1: | 1750 | |1: |
@@ -1519,6 +1762,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1519 | | fsel FARG1, f0, FARG2, FARG1 | 1762 | | fsel FARG1, f0, FARG2, FARG1 |
1520 | |.endif | 1763 | |.endif |
1521 | | b <1 | 1764 | | b <1 |
1765 | ||} | ||
1522 | |.endmacro | 1766 | |.endmacro |
1523 | | | 1767 | | |
1524 | | math_minmax math_min, 0 | 1768 | | math_minmax math_min, 0 |
@@ -1539,28 +1783,45 @@ static void build_subroutines(BuildCtx *ctx) | |||
1539 | | checkstr CARG3 | 1783 | | checkstr CARG3 |
1540 | | bne ->fff_fallback | 1784 | | bne ->fff_fallback |
1541 | | lwz TMP0, STR:CARG1->len | 1785 | | lwz TMP0, STR:CARG1->len |
1542 | | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | 1786 | if (LJ_DUALNUM) { |
1543 | | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 | 1787 | | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). |
1544 | | subfe RD, TMP3, TMP0 | 1788 | | li RD, (0+1)*8 |
1545 | | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. | 1789 | | lwz PC, FRAME_PC(BASE) |
1546 | | addi RD, RD, 1 | 1790 | | cmplwi TMP0, 0 |
1547 | | lfd f0, TONUM_D | 1791 | | la RA, -8(BASE) |
1548 | | la RA, -8(BASE) | 1792 | | beq ->fff_res |
1549 | | lwz PC, FRAME_PC(BASE) | 1793 | | b ->fff_resi |
1550 | | fsub f0, f0, TOBIT | 1794 | } else { |
1551 | | slwi RD, RD, 3 | 1795 | | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). |
1552 | | stfd f0, 0(RA) | 1796 | | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 |
1553 | | b ->fff_res | 1797 | | subfe RD, TMP3, TMP0 |
1798 | | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. | ||
1799 | | addi RD, RD, 1 | ||
1800 | | lfd f0, TONUM_D | ||
1801 | | la RA, -8(BASE) | ||
1802 | | lwz PC, FRAME_PC(BASE) | ||
1803 | | fsub f0, f0, TOBIT | ||
1804 | | slwi RD, RD, 3 | ||
1805 | | stfd f0, 0(RA) | ||
1806 | | b ->fff_res | ||
1807 | } | ||
1554 | | | 1808 | | |
1555 | |.ffunc string_char // Only handle the 1-arg case here. | 1809 | |.ffunc string_char // Only handle the 1-arg case here. |
1556 | | ffgccheck | 1810 | | ffgccheck |
1557 | | cmplwi NARGS8:RC, 8 | 1811 | | cmplwi NARGS8:RC, 8 |
1558 | | lwz CARG3, 0(BASE) | 1812 | | lwz CARG3, 0(BASE) |
1559 | | lfd FARG1, 0(BASE) | 1813 | if (LJ_DUALNUM) { |
1560 | | bne ->fff_fallback // Exactly 1 argument. | 1814 | | lwz TMP0, 4(BASE) |
1561 | | checknum CARG3; bge ->fff_fallback | 1815 | | bne ->fff_fallback // Exactly 1 argument. |
1562 | | toint TMP0, FARG1 | 1816 | | checknum CARG3; bne ->fff_fallback |
1563 | | la CARG2, TMPD_BLO | 1817 | | la CARG2, 7(BASE) |
1818 | } else { | ||
1819 | | lfd FARG1, 0(BASE) | ||
1820 | | bne ->fff_fallback // Exactly 1 argument. | ||
1821 | | checknum CARG3; bge ->fff_fallback | ||
1822 | | toint TMP0, FARG1 | ||
1823 | | la CARG2, TMPD_BLO | ||
1824 | } | ||
1564 | | li CARG3, 1 | 1825 | | li CARG3, 1 |
1565 | | cmplwi TMP0, 255; bgt ->fff_fallback | 1826 | | cmplwi TMP0, 255; bgt ->fff_fallback |
1566 | |->fff_newstr: | 1827 | |->fff_newstr: |
@@ -1577,20 +1838,36 @@ static void build_subroutines(BuildCtx *ctx) | |||
1577 | | ffgccheck | 1838 | | ffgccheck |
1578 | | cmplwi NARGS8:RC, 16 | 1839 | | cmplwi NARGS8:RC, 16 |
1579 | | lwz CARG3, 16(BASE) | 1840 | | lwz CARG3, 16(BASE) |
1580 | | lfd f0, 16(BASE) | 1841 | if (!LJ_DUALNUM) { |
1842 | | lfd f0, 16(BASE) | ||
1843 | } | ||
1581 | | lwz TMP0, 0(BASE) | 1844 | | lwz TMP0, 0(BASE) |
1582 | | lwz STR:CARG1, 4(BASE) | 1845 | | lwz STR:CARG1, 4(BASE) |
1583 | | blt ->fff_fallback | 1846 | | blt ->fff_fallback |
1584 | | lwz CARG2, 8(BASE) | 1847 | | lwz CARG2, 8(BASE) |
1585 | | lfd f1, 8(BASE) | 1848 | if (LJ_DUALNUM) { |
1849 | | lwz TMP1, 12(BASE) | ||
1850 | } else { | ||
1851 | | lfd f1, 8(BASE) | ||
1852 | } | ||
1586 | | li TMP2, -1 | 1853 | | li TMP2, -1 |
1587 | | beq >1 | 1854 | | beq >1 |
1588 | | checknum CARG3; bge ->fff_fallback | 1855 | if (LJ_DUALNUM) { |
1589 | | toint TMP2, f0 | 1856 | | checknum CARG3 |
1590 | |1: | 1857 | | lwz TMP2, 20(BASE) |
1591 | | checknum CARG2; bge ->fff_fallback | 1858 | | bne ->fff_fallback |
1859 | |1: | ||
1860 | | checknum CARG2; bne ->fff_fallback | ||
1861 | } else { | ||
1862 | | checknum CARG3; bge ->fff_fallback | ||
1863 | | toint TMP2, f0 | ||
1864 | |1: | ||
1865 | | checknum CARG2; bge ->fff_fallback | ||
1866 | } | ||
1592 | | checkstr TMP0; bne ->fff_fallback | 1867 | | checkstr TMP0; bne ->fff_fallback |
1593 | | toint TMP1, f1 | 1868 | if (!LJ_DUALNUM) { |
1869 | | toint TMP1, f1 | ||
1870 | } | ||
1594 | | lwz TMP0, STR:CARG1->len | 1871 | | lwz TMP0, STR:CARG1->len |
1595 | | cmplw TMP0, TMP2 // len < end? (unsigned compare) | 1872 | | cmplw TMP0, TMP2 // len < end? (unsigned compare) |
1596 | | addi TMP3, TMP2, 1 | 1873 | | addi TMP3, TMP2, 1 |
@@ -1627,14 +1904,22 @@ static void build_subroutines(BuildCtx *ctx) | |||
1627 | |.ffunc string_rep // Only handle the 1-char case inline. | 1904 | |.ffunc string_rep // Only handle the 1-char case inline. |
1628 | | ffgccheck | 1905 | | ffgccheck |
1629 | | cmplwi NARGS8:RC, 16 | 1906 | | cmplwi NARGS8:RC, 16 |
1630 | | lwz CARG3, 0(BASE) | 1907 | | lwz TMP0, 0(BASE) |
1631 | | lwz STR:CARG1, 4(BASE) | 1908 | | lwz STR:CARG1, 4(BASE) |
1632 | | lwz CARG4, 8(BASE) | 1909 | | lwz CARG4, 8(BASE) |
1633 | | lfd FARG2, 8(BASE) | 1910 | if (LJ_DUALNUM) { |
1911 | | lwz CARG3, 12(BASE) | ||
1912 | } else { | ||
1913 | | lfd FARG2, 8(BASE) | ||
1914 | } | ||
1634 | | blt ->fff_fallback | 1915 | | blt ->fff_fallback |
1635 | | checkstr CARG3; bne ->fff_fallback | 1916 | | checkstr TMP0; bne ->fff_fallback |
1636 | | checknum CARG4; bge ->fff_fallback | 1917 | if (LJ_DUALNUM) { |
1637 | | toint CARG3, FARG2 | 1918 | | checknum CARG4; bne ->fff_fallback |
1919 | } else { | ||
1920 | | checknum CARG4; bge ->fff_fallback | ||
1921 | | toint CARG3, FARG2 | ||
1922 | } | ||
1638 | | lwz TMP0, STR:CARG1->len | 1923 | | lwz TMP0, STR:CARG1->len |
1639 | | cmpwi CARG3, 0 | 1924 | | cmpwi CARG3, 0 |
1640 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) | 1925 | | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) |
@@ -1728,34 +2013,39 @@ static void build_subroutines(BuildCtx *ctx) | |||
1728 | |//-- Bit library -------------------------------------------------------- | 2013 | |//-- Bit library -------------------------------------------------------- |
1729 | | | 2014 | | |
1730 | |.macro .ffunc_bit, name | 2015 | |.macro .ffunc_bit, name |
2016 | ||if (LJ_DUALNUM) { | ||
2017 | | .ffunc_1 bit_..name | ||
2018 | | checknum CARG3; bnel ->fff_tobit_fb | ||
2019 | ||} else { | ||
1731 | | .ffunc_n bit_..name | 2020 | | .ffunc_n bit_..name |
1732 | | fadd FARG1, FARG1, TOBIT | 2021 | | fadd FARG1, FARG1, TOBIT |
1733 | | stfd FARG1, TMPD | 2022 | | stfd FARG1, TMPD |
1734 | | lwz CARG1, TMPD_LO | 2023 | | lwz CARG1, TMPD_LO |
2024 | ||} | ||
1735 | |.endmacro | 2025 | |.endmacro |
1736 | | | 2026 | | |
1737 | |.ffunc_bit tobit | ||
1738 | |->fff_resi: | ||
1739 | | tonum_i FARG1, CRET1 | ||
1740 | |->fff_resn: | ||
1741 | | lwz PC, FRAME_PC(BASE) | ||
1742 | | la RA, -8(BASE) | ||
1743 | | stfd FARG1, -8(BASE) | ||
1744 | | b ->fff_res1 | ||
1745 | | | ||
1746 | |.macro .ffunc_bit_op, name, ins | 2027 | |.macro .ffunc_bit_op, name, ins |
1747 | | .ffunc_bit name | 2028 | | .ffunc_bit name |
1748 | | li TMP1, 8 | 2029 | | addi TMP1, BASE, 8 |
2030 | | add TMP2, BASE, NARGS8:RC | ||
1749 | |1: | 2031 | |1: |
1750 | | lwzx CARG4, BASE, TMP1 | 2032 | | lwz CARG4, 0(TMP1) |
1751 | | cmplw cr1, TMP1, NARGS8:RC | 2033 | | cmplw cr1, TMP1, TMP2 |
1752 | | lfdx FARG1, BASE, TMP1 | 2034 | ||if (LJ_DUALNUM) { |
1753 | | checknum CARG4 | 2035 | | lwz CARG2, 4(TMP1) |
2036 | ||} else { | ||
2037 | | lfd FARG1, 0(TMP1) | ||
2038 | ||} | ||
1754 | | bge cr1, ->fff_resi | 2039 | | bge cr1, ->fff_resi |
2040 | | checknum CARG4 | ||
2041 | ||if (LJ_DUALNUM) { | ||
2042 | | bnel ->fff_bitop_fb | ||
2043 | ||} else { | ||
1755 | | fadd FARG1, FARG1, TOBIT | 2044 | | fadd FARG1, FARG1, TOBIT |
1756 | | bge ->fff_fallback | 2045 | | bge ->fff_fallback |
1757 | | stfd FARG1, TMPD | 2046 | | stfd FARG1, TMPD |
1758 | | lwz CARG2, TMPD_LO | 2047 | | lwz CARG2, TMPD_LO |
2048 | ||} | ||
1759 | | ins CARG1, CARG1, CARG2 | 2049 | | ins CARG1, CARG1, CARG2 |
1760 | | addi TMP1, TMP1, 8 | 2050 | | addi TMP1, TMP1, 8 |
1761 | | b <1 | 2051 | | b <1 |
@@ -1777,6 +2067,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
1777 | | b ->fff_resi | 2067 | | b ->fff_resi |
1778 | | | 2068 | | |
1779 | |.macro .ffunc_bit_sh, name, ins, shmod | 2069 | |.macro .ffunc_bit_sh, name, ins, shmod |
2070 | ||if (LJ_DUALNUM) { | ||
2071 | | .ffunc_2 bit_..name | ||
2072 | | checknum CARG3; bnel ->fff_tobit_fb | ||
2073 | | // Note: no inline conversion from number for 2nd argument! | ||
2074 | | checknum CARG4; bne ->fff_fallback | ||
2075 | ||} else { | ||
1780 | | .ffunc_nn bit_..name | 2076 | | .ffunc_nn bit_..name |
1781 | | fadd FARG1, FARG1, TOBIT | 2077 | | fadd FARG1, FARG1, TOBIT |
1782 | | fadd FARG2, FARG2, TOBIT | 2078 | | fadd FARG2, FARG2, TOBIT |
@@ -1784,6 +2080,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
1784 | | lwz CARG1, TMPD_LO | 2080 | | lwz CARG1, TMPD_LO |
1785 | | stfd FARG2, TMPD | 2081 | | stfd FARG2, TMPD |
1786 | | lwz CARG2, TMPD_LO | 2082 | | lwz CARG2, TMPD_LO |
2083 | ||} | ||
1787 | |.if shmod == 1 | 2084 | |.if shmod == 1 |
1788 | | rlwinm CARG2, CARG2, 0, 27, 31 | 2085 | | rlwinm CARG2, CARG2, 0, 27, 31 |
1789 | |.elif shmod == 2 | 2086 | |.elif shmod == 2 |
@@ -1799,6 +2096,39 @@ static void build_subroutines(BuildCtx *ctx) | |||
1799 | |.ffunc_bit_sh rol, rotlw, 0 | 2096 | |.ffunc_bit_sh rol, rotlw, 0 |
1800 | |.ffunc_bit_sh ror, rotlw, 2 | 2097 | |.ffunc_bit_sh ror, rotlw, 2 |
1801 | | | 2098 | | |
2099 | |.ffunc_bit tobit | ||
2100 | if (LJ_DUALNUM) { | ||
2101 | | b ->fff_resi | ||
2102 | } else { | ||
2103 | |->fff_resi: | ||
2104 | | tonum_i FARG1, CRET1 | ||
2105 | } | ||
2106 | |->fff_resn: | ||
2107 | | lwz PC, FRAME_PC(BASE) | ||
2108 | | la RA, -8(BASE) | ||
2109 | | stfd FARG1, -8(BASE) | ||
2110 | | b ->fff_res1 | ||
2111 | | | ||
2112 | |// Fallback FP number to bit conversion. | ||
2113 | |->fff_tobit_fb: | ||
2114 | if (LJ_DUALNUM) { | ||
2115 | | lfd FARG1, 0(BASE) | ||
2116 | | bgt ->fff_fallback | ||
2117 | | fadd FARG1, FARG1, TOBIT | ||
2118 | | stfd FARG1, TMPD | ||
2119 | | lwz CARG1, TMPD_LO | ||
2120 | | blr | ||
2121 | } | ||
2122 | |->fff_bitop_fb: | ||
2123 | if (LJ_DUALNUM) { | ||
2124 | | lfd FARG1, 0(TMP1) | ||
2125 | | bgt ->fff_fallback | ||
2126 | | fadd FARG1, FARG1, TOBIT | ||
2127 | | stfd FARG1, TMPD | ||
2128 | | lwz CARG2, TMPD_LO | ||
2129 | | blr | ||
2130 | } | ||
2131 | | | ||
1802 | |//----------------------------------------------------------------------- | 2132 | |//----------------------------------------------------------------------- |
1803 | | | 2133 | | |
1804 | |->fff_fallback: // Call fast function fallback handler. | 2134 | |->fff_fallback: // Call fast function fallback handler. |
@@ -1981,6 +2311,17 @@ static void build_subroutines(BuildCtx *ctx) | |||
1981 | | b extern trunc | 2311 | | b extern trunc |
1982 | #endif | 2312 | #endif |
1983 | | | 2313 | | |
2314 | |->vm_modi: | ||
2315 | | divwo. TMP0, CARG1, CARG2 | ||
2316 | | bsolr | ||
2317 | | xor. CARG3, CARG1, CARG2 | ||
2318 | | mullw TMP0, TMP0, CARG2 | ||
2319 | | sub CARG1, CARG1, TMP0 | ||
2320 | | bgelr | ||
2321 | | cmpwi CARG1, 0; beqlr | ||
2322 | | add CARG1, CARG1, CARG2 | ||
2323 | | blr | ||
2324 | | | ||
1984 | |->vm_powi: | 2325 | |->vm_powi: |
1985 | #if LJ_HASJIT | 2326 | #if LJ_HASJIT |
1986 | | NYI | 2327 | | NYI |
@@ -2060,64 +2401,142 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2060 | 2401 | ||
2061 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | 2402 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
2062 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 2403 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2063 | | lwzx TMP0, BASE, RA | 2404 | if (LJ_DUALNUM) { |
2064 | | addi PC, PC, 4 | 2405 | | lwzux TMP0, RA, BASE |
2065 | | lfdx f0, BASE, RA | 2406 | | addi PC, PC, 4 |
2066 | | lwzx TMP1, BASE, RD | 2407 | | lwz CARG2, 4(RA) |
2067 | | checknum cr0, TMP0 | 2408 | | lwzux TMP1, RD, BASE |
2068 | | lwz TMP2, -4(PC) | 2409 | | lwz TMP2, -4(PC) |
2069 | | lfdx f1, BASE, RD | 2410 | | checknum cr0, TMP0 |
2070 | | checknum cr1, TMP1 | 2411 | | lwz CARG3, 4(RD) |
2071 | | decode_RD4 TMP2, TMP2 | 2412 | | decode_RD4 TMP2, TMP2 |
2072 | | bge cr0, ->vmeta_comp | 2413 | | checknum cr1, TMP1 |
2073 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 2414 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) |
2074 | | bge cr1, ->vmeta_comp | 2415 | | bne cr0, >7 |
2075 | | fcmpu cr0, f0, f1 | 2416 | | bne cr1, >8 |
2076 | if (op == BC_ISLT) { | 2417 | | cmpw CARG2, CARG3 |
2077 | | bge >1 | 2418 | if (op == BC_ISLT) { |
2078 | } else if (op == BC_ISGE) { | 2419 | | bge >2 |
2079 | | blt >1 | 2420 | } else if (op == BC_ISGE) { |
2080 | } else if (op == BC_ISLE) { | 2421 | | blt >2 |
2081 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq | 2422 | } else if (op == BC_ISLE) { |
2082 | | bge >1 | 2423 | | bgt >2 |
2424 | } else { | ||
2425 | | ble >2 | ||
2426 | } | ||
2427 | |1: | ||
2428 | | add PC, PC, TMP2 | ||
2429 | |2: | ||
2430 | | ins_next | ||
2431 | | | ||
2432 | |7: // RA is not an integer. | ||
2433 | | bgt cr0, ->vmeta_comp | ||
2434 | | // RA is a number. | ||
2435 | | lfd f0, 0(RA) | ||
2436 | | bgt cr1, ->vmeta_comp | ||
2437 | | blt cr1, >4 | ||
2438 | | // RA is a number, RD is an integer. | ||
2439 | | tonum_i f1, CARG3 | ||
2440 | | b >5 | ||
2441 | | | ||
2442 | |8: // RA is an integer, RD is a number. | ||
2443 | | tonum_i f0, CARG2 | ||
2444 | |4: | ||
2445 | | lfd f1, 0(RD) | ||
2446 | |5: | ||
2447 | | fcmpu cr0, f0, f1 | ||
2448 | if (op == BC_ISLT) { | ||
2449 | | bge <2 | ||
2450 | } else if (op == BC_ISGE) { | ||
2451 | | blt <2 | ||
2452 | } else if (op == BC_ISLE) { | ||
2453 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq | ||
2454 | | bge <2 | ||
2455 | } else { | ||
2456 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq | ||
2457 | | blt <2 | ||
2458 | } | ||
2459 | | b <1 | ||
2083 | } else { | 2460 | } else { |
2084 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq | 2461 | | lwzx TMP0, BASE, RA |
2085 | | blt >1 | 2462 | | addi PC, PC, 4 |
2463 | | lfdx f0, BASE, RA | ||
2464 | | lwzx TMP1, BASE, RD | ||
2465 | | checknum cr0, TMP0 | ||
2466 | | lwz TMP2, -4(PC) | ||
2467 | | lfdx f1, BASE, RD | ||
2468 | | checknum cr1, TMP1 | ||
2469 | | decode_RD4 TMP2, TMP2 | ||
2470 | | bge cr0, ->vmeta_comp | ||
2471 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | ||
2472 | | bge cr1, ->vmeta_comp | ||
2473 | | fcmpu cr0, f0, f1 | ||
2474 | if (op == BC_ISLT) { | ||
2475 | | bge >1 | ||
2476 | } else if (op == BC_ISGE) { | ||
2477 | | blt >1 | ||
2478 | } else if (op == BC_ISLE) { | ||
2479 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq | ||
2480 | | bge >1 | ||
2481 | } else { | ||
2482 | | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq | ||
2483 | | blt >1 | ||
2484 | } | ||
2485 | | add PC, PC, TMP2 | ||
2486 | |1: | ||
2487 | | ins_next | ||
2086 | } | 2488 | } |
2087 | | add PC, PC, TMP2 | ||
2088 | |1: | ||
2089 | | ins_next | ||
2090 | break; | 2489 | break; |
2091 | 2490 | ||
2092 | case BC_ISEQV: case BC_ISNEV: | 2491 | case BC_ISEQV: case BC_ISNEV: |
2093 | vk = op == BC_ISEQV; | 2492 | vk = op == BC_ISEQV; |
2094 | | // RA = src1*8, RD = src2*8, JMP with RD = target | 2493 | | // RA = src1*8, RD = src2*8, JMP with RD = target |
2095 | | lwzux TMP0, RA, BASE | 2494 | if (LJ_DUALNUM) { |
2096 | | lwz TMP2, 0(PC) | 2495 | | lwzux TMP0, RA, BASE |
2097 | | lfd f0, 0(RA) | 2496 | | addi PC, PC, 4 |
2098 | | addi PC, PC, 4 | 2497 | | lwz CARG2, 4(RA) |
2099 | | lwzux TMP1, RD, BASE | 2498 | | lwzux TMP1, RD, BASE |
2100 | | checknum cr0, TMP0 | 2499 | | checknum cr0, TMP0 |
2101 | | decode_RD4 TMP2, TMP2 | 2500 | | lwz INS, -4(PC) |
2102 | | lfd f1, 0(RD) | 2501 | | checknum cr1, TMP1 |
2103 | | checknum cr1, TMP1 | 2502 | | decode_RD4 TMP2, INS |
2104 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 2503 | | lwz CARG3, 4(RD) |
2105 | | bge cr0, >5 | 2504 | | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt |
2106 | | bge cr1, >5 | 2505 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) |
2107 | | fcmpu cr0, f0, f1 | 2506 | if (vk) { |
2108 | if (vk) { | 2507 | | ble cr7, ->BC_ISEQN_Z |
2109 | | bne >1 | 2508 | } else { |
2110 | | add PC, PC, TMP2 | 2509 | | ble cr7, ->BC_ISNEN_Z |
2510 | } | ||
2111 | } else { | 2511 | } else { |
2112 | | beq >1 | 2512 | | lwzux TMP0, RA, BASE |
2113 | | add PC, PC, TMP2 | 2513 | | lwz TMP2, 0(PC) |
2514 | | lfd f0, 0(RA) | ||
2515 | | addi PC, PC, 4 | ||
2516 | | lwzux TMP1, RD, BASE | ||
2517 | | checknum cr0, TMP0 | ||
2518 | | decode_RD4 TMP2, TMP2 | ||
2519 | | lfd f1, 0(RD) | ||
2520 | | checknum cr1, TMP1 | ||
2521 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | ||
2522 | | bge cr0, >5 | ||
2523 | | bge cr1, >5 | ||
2524 | | fcmpu cr0, f0, f1 | ||
2525 | if (vk) { | ||
2526 | | bne >1 | ||
2527 | | add PC, PC, TMP2 | ||
2528 | } else { | ||
2529 | | beq >1 | ||
2530 | | add PC, PC, TMP2 | ||
2531 | } | ||
2532 | |1: | ||
2533 | | ins_next | ||
2114 | } | 2534 | } |
2115 | |1: | ||
2116 | | ins_next | ||
2117 | | | ||
2118 | |5: // Either or both types are not numbers. | 2535 | |5: // Either or both types are not numbers. |
2119 | | lwz CARG2, 4(RA) | 2536 | if (!LJ_DUALNUM) { |
2120 | | lwz CARG3, 4(RD) | 2537 | | lwz CARG2, 4(RA) |
2538 | | lwz CARG3, 4(RD) | ||
2539 | } | ||
2121 | | not TMP3, TMP0 | 2540 | | not TMP3, TMP0 |
2122 | | cmplw TMP0, TMP1 | 2541 | | cmplw TMP0, TMP1 |
2123 | | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? | 2542 | | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? |
@@ -2138,7 +2557,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2138 | | add PC, PC, TMP2 | 2557 | | add PC, PC, TMP2 |
2139 | |6: | 2558 | |6: |
2140 | } | 2559 | } |
2141 | | blt cr0, <1 // Done if 1 or 2. | 2560 | if (LJ_DUALNUM) { |
2561 | | bge cr0, >2 // Done if 1 or 2. | ||
2562 | |1: | ||
2563 | | ins_next | ||
2564 | |2: | ||
2565 | } else { | ||
2566 | | blt cr0, <1 // Done if 1 or 2. | ||
2567 | } | ||
2142 | | blt cr6, <1 // Done if not tab/ud. | 2568 | | blt cr6, <1 // Done if not tab/ud. |
2143 | | | 2569 | | |
2144 | | // Different tables or userdatas. Need to check __eq metamethod. | 2570 | | // Different tables or userdatas. Need to check __eq metamethod. |
@@ -2183,32 +2609,84 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2183 | case BC_ISEQN: case BC_ISNEN: | 2609 | case BC_ISEQN: case BC_ISNEN: |
2184 | vk = op == BC_ISEQN; | 2610 | vk = op == BC_ISEQN; |
2185 | | // RA = src*8, RD = num_const*8, JMP with RD = target | 2611 | | // RA = src*8, RD = num_const*8, JMP with RD = target |
2186 | | lwzx TMP0, BASE, RA | 2612 | if (LJ_DUALNUM) { |
2187 | | lfdx f0, BASE, RA | 2613 | | lwzux TMP0, RA, BASE |
2188 | | addi PC, PC, 4 | 2614 | | addi PC, PC, 4 |
2189 | | lfdx f1, KBASE, RD | 2615 | | lwz CARG2, 4(RA) |
2190 | | lwz INS, -4(PC) | 2616 | | lwzux TMP1, RD, KBASE |
2191 | | checknum TMP0; bge >5 | 2617 | | checknum cr0, TMP0 |
2192 | | fcmpu cr0, f0, f1 | 2618 | | lwz INS, -4(PC) |
2193 | | decode_RD4 TMP2, INS | 2619 | | checknum cr1, TMP1 |
2194 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 2620 | | decode_RD4 TMP2, INS |
2621 | | lwz CARG3, 4(RD) | ||
2622 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | ||
2623 | if (vk) { | ||
2624 | |->BC_ISEQN_Z: | ||
2625 | } else { | ||
2626 | |->BC_ISNEN_Z: | ||
2627 | } | ||
2628 | | bne cr0, >7 | ||
2629 | | bne cr1, >8 | ||
2630 | | cmpw CARG2, CARG3 | ||
2631 | |4: | ||
2632 | } else { | ||
2633 | if (vk) { | ||
2634 | |->BC_ISEQN_Z: // Dummy label. | ||
2635 | } else { | ||
2636 | |->BC_ISNEN_Z: // Dummy label. | ||
2637 | } | ||
2638 | | lwzx TMP0, BASE, RA | ||
2639 | | addi PC, PC, 4 | ||
2640 | | lfdx f0, BASE, RA | ||
2641 | | lwz INS, -4(PC) | ||
2642 | | lfdx f1, KBASE, RD | ||
2643 | | decode_RD4 TMP2, INS | ||
2644 | | checknum TMP0 | ||
2645 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | ||
2646 | | bge >3 | ||
2647 | | fcmpu cr0, f0, f1 | ||
2648 | } | ||
2195 | if (vk) { | 2649 | if (vk) { |
2196 | | bne >5 | 2650 | | bne >1 |
2197 | | add PC, PC, TMP2 | 2651 | | add PC, PC, TMP2 |
2198 | |5: | 2652 | |1: |
2653 | if (!LJ_HASFFI) { | ||
2654 | |3: | ||
2655 | } | ||
2199 | } else { | 2656 | } else { |
2200 | | beq >2 | 2657 | | beq >2 |
2201 | |1: | 2658 | |1: |
2659 | if (!LJ_HASFFI) { | ||
2660 | |3: | ||
2661 | } | ||
2202 | | add PC, PC, TMP2 | 2662 | | add PC, PC, TMP2 |
2203 | |2: | 2663 | |2: |
2204 | } | 2664 | } |
2205 | | ins_next | 2665 | | ins_next |
2206 | if (!vk) { | 2666 | if (LJ_HASFFI) { |
2207 | |5: | 2667 | |3: |
2208 | | decode_RD4 TMP2, INS | 2668 | | cmpwi TMP0, LJ_TCDATA |
2209 | | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) | 2669 | | beq ->vmeta_equal_cd |
2210 | | b <1 | 2670 | | b <1 |
2211 | } | 2671 | } |
2672 | if (LJ_DUALNUM) { | ||
2673 | |7: // RA is not an integer. | ||
2674 | | bge cr0, <3 | ||
2675 | | // RA is a number. | ||
2676 | | lfd f0, 0(RA) | ||
2677 | | blt cr1, >1 | ||
2678 | | // RA is a number, RD is an integer. | ||
2679 | | tonum_i f1, CARG3 | ||
2680 | | b >2 | ||
2681 | | | ||
2682 | |8: // RA is an integer, RD is a number. | ||
2683 | | tonum_i f0, CARG2 | ||
2684 | |1: | ||
2685 | | lfd f1, 0(RD) | ||
2686 | |2: | ||
2687 | | fcmpu cr0, f0, f1 | ||
2688 | | b <4 | ||
2689 | } | ||
2212 | break; | 2690 | break; |
2213 | 2691 | ||
2214 | case BC_ISEQP: case BC_ISNEP: | 2692 | case BC_ISEQP: case BC_ISNEP: |
@@ -2291,12 +2769,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2291 | | // RA = dst*8, RD = src*8 | 2769 | | // RA = dst*8, RD = src*8 |
2292 | | lwzux TMP1, RD, BASE | 2770 | | lwzux TMP1, RD, BASE |
2293 | | lwz TMP0, 4(RD) | 2771 | | lwz TMP0, 4(RD) |
2294 | | checknum TMP1; bge ->vmeta_unm | 2772 | | checknum TMP1 |
2773 | if (LJ_DUALNUM) { | ||
2774 | | bne >5 | ||
2775 | | nego. TMP0, TMP0 | ||
2776 | | bso >4 | ||
2777 | |1: | ||
2778 | | ins_next1 | ||
2779 | | stwux TISNUM, RA, BASE | ||
2780 | | stw TMP0, 4(RA) | ||
2781 | |3: | ||
2782 | | ins_next2 | ||
2783 | |4: // Potential overflow. | ||
2784 | | mcrxr cr0; ble <1 // Ignore unrelated overflow. | ||
2785 | | lus TMP1, 0x41e0 // 2^31. | ||
2786 | | li TMP0, 0 | ||
2787 | | b >7 | ||
2788 | } | ||
2789 | |5: | ||
2790 | | bge ->vmeta_unm | ||
2295 | | xoris TMP1, TMP1, 0x8000 | 2791 | | xoris TMP1, TMP1, 0x8000 |
2792 | |7: | ||
2296 | | ins_next1 | 2793 | | ins_next1 |
2297 | | stwux TMP1, RA, BASE | 2794 | | stwux TMP1, RA, BASE |
2298 | | stw TMP0, 4(RA) | 2795 | | stw TMP0, 4(RA) |
2299 | | ins_next2 | 2796 | if (LJ_DUALNUM) { |
2797 | | b <3 | ||
2798 | } else { | ||
2799 | | ins_next2 | ||
2800 | } | ||
2300 | break; | 2801 | break; |
2301 | case BC_LEN: | 2802 | case BC_LEN: |
2302 | | // RA = dst*8, RD = src*8 | 2803 | | // RA = dst*8, RD = src*8 |
@@ -2305,9 +2806,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2305 | | checkstr TMP0; bne >2 | 2806 | | checkstr TMP0; bne >2 |
2306 | | lwz CRET1, STR:CARG1->len | 2807 | | lwz CRET1, STR:CARG1->len |
2307 | |1: | 2808 | |1: |
2308 | | tonum_u f0, CRET1 // Result is a non-negative integer. | 2809 | if (LJ_DUALNUM) { |
2309 | | ins_next1 | 2810 | | ins_next1 |
2310 | | stfdx f0, BASE, RA | 2811 | | stwux TISNUM, RA, BASE |
2812 | | stw CRET1, 4(RA) | ||
2813 | } else { | ||
2814 | | tonum_u f0, CRET1 // Result is a non-negative integer. | ||
2815 | | ins_next1 | ||
2816 | | stfdx f0, BASE, RA | ||
2817 | } | ||
2311 | | ins_next2 | 2818 | | ins_next2 |
2312 | |2: | 2819 | |2: |
2313 | | checktab TMP0; bne ->vmeta_len | 2820 | | checktab TMP0; bne ->vmeta_len |
@@ -2332,78 +2839,197 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2332 | 2839 | ||
2333 | /* -- Binary ops -------------------------------------------------------- */ | 2840 | /* -- Binary ops -------------------------------------------------------- */ |
2334 | 2841 | ||
2335 | |.macro ins_arithpre, t0, t1 | 2842 | |.macro ins_arithpre |
2336 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | 2843 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 |
2337 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | 2844 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); |
2338 | ||switch (vk) { | 2845 | ||switch (vk) { |
2339 | ||case 0: | 2846 | ||case 0: |
2340 | | lwzx CARG1, BASE, RB | 2847 | | lwzx TMP1, BASE, RB |
2341 | | lfdx t0, BASE, RB | 2848 | ||if (LJ_DUALNUM) { |
2342 | | lfdx t1, KBASE, RC | 2849 | | lwzx TMP2, KBASE, RC |
2343 | | checknum CARG1; bge ->vmeta_arith_vn | 2850 | ||} |
2851 | | lfdx f14, BASE, RB | ||
2852 | | lfdx f15, KBASE, RC | ||
2853 | ||if (LJ_DUALNUM) { | ||
2854 | | checknum cr0, TMP1 | ||
2855 | | checknum cr1, TMP2 | ||
2856 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
2857 | | bge ->vmeta_arith_vn | ||
2858 | ||} else { | ||
2859 | | checknum TMP1; bge ->vmeta_arith_vn | ||
2860 | ||} | ||
2344 | || break; | 2861 | || break; |
2345 | ||case 1: | 2862 | ||case 1: |
2346 | | lwzx CARG1, BASE, RB | 2863 | | lwzx TMP1, BASE, RB |
2347 | | lfdx t1, BASE, RB | 2864 | ||if (LJ_DUALNUM) { |
2348 | | lfdx t0, KBASE, RC | 2865 | | lwzx TMP2, KBASE, RC |
2349 | | checknum CARG1; bge ->vmeta_arith_nv | 2866 | ||} |
2867 | | lfdx f15, BASE, RB | ||
2868 | | lfdx f14, KBASE, RC | ||
2869 | ||if (LJ_DUALNUM) { | ||
2870 | | checknum cr0, TMP1 | ||
2871 | | checknum cr1, TMP2 | ||
2872 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
2873 | | bge ->vmeta_arith_nv | ||
2874 | ||} else { | ||
2875 | | checknum TMP1; bge ->vmeta_arith_nv | ||
2876 | ||} | ||
2350 | || break; | 2877 | || break; |
2351 | ||default: | 2878 | ||default: |
2352 | | lwzx CARG1, BASE, RB | 2879 | | lwzx TMP1, BASE, RB |
2353 | | lwzx CARG2, BASE, RC | 2880 | | lwzx TMP2, BASE, RC |
2354 | | lfdx t0, BASE, RB | 2881 | | lfdx f14, BASE, RB |
2355 | | lfdx t1, BASE, RC | 2882 | | lfdx f15, BASE, RC |
2356 | | checknum cr0, CARG1 | 2883 | | checknum cr0, TMP1 |
2357 | | checknum cr1, CARG2 | 2884 | | checknum cr1, TMP2 |
2358 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 2885 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
2359 | | bge ->vmeta_arith_vv | 2886 | | bge ->vmeta_arith_vv |
2360 | || break; | 2887 | || break; |
2361 | ||} | 2888 | ||} |
2362 | |.endmacro | 2889 | |.endmacro |
2363 | | | 2890 | | |
2364 | |.macro ins_arith, ins | 2891 | |.macro ins_arithfallback, ins |
2365 | | ins_arithpre f0, f1 | 2892 | ||switch (vk) { |
2893 | ||case 0: | ||
2894 | | ins ->vmeta_arith_vn2 | ||
2895 | || break; | ||
2896 | ||case 1: | ||
2897 | | ins ->vmeta_arith_nv2 | ||
2898 | || break; | ||
2899 | ||default: | ||
2900 | | ins ->vmeta_arith_vv2 | ||
2901 | || break; | ||
2902 | ||} | ||
2903 | |.endmacro | ||
2904 | | | ||
2905 | |.macro intmod, a, b, c | ||
2906 | |->BC_MODVNI_Z: | ||
2907 | | bl ->vm_modi | ||
2908 | |.endmacro | ||
2909 | | | ||
2910 | |.macro fpmod, a, b, c | ||
2911 | ||if (!LJ_DUALNUM) { | ||
2912 | |->BC_MODVNI_Z: | ||
2913 | ||} | ||
2914 | |->BC_MODVN_Z: | ||
2915 | | fdiv FARG1, b, c | ||
2916 | | // NYI: Use internal implementation of floor. | ||
2917 | | bl extern floor // floor(b/c) | ||
2918 | | fmul a, FARG1, c | ||
2919 | | fsub a, b, a // b - floor(b/c)*c | ||
2920 | |.endmacro | ||
2921 | | | ||
2922 | |.macro ins_arithfp, fpins | ||
2923 | | ins_arithpre | ||
2924 | |.if "fpins" == "fpmod_" | ||
2925 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
2926 | |.else | ||
2927 | | fpins f0, f14, f15 | ||
2366 | | ins_next1 | 2928 | | ins_next1 |
2367 | | ins f0, f0, f1 | ||
2368 | | stfdx f0, BASE, RA | 2929 | | stfdx f0, BASE, RA |
2369 | | ins_next2 | 2930 | | ins_next2 |
2931 | |.endif | ||
2932 | |.endmacro | ||
2933 | | | ||
2934 | |.macro ins_arithdn, intins, fpins | ||
2935 | | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 | ||
2936 | ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | ||
2937 | ||switch (vk) { | ||
2938 | ||case 0: | ||
2939 | | lwzux TMP1, RB, BASE | ||
2940 | | lwzux TMP2, RC, KBASE | ||
2941 | | lwz CARG1, 4(RB) | ||
2942 | | checknum cr0, TMP1 | ||
2943 | | lwz CARG2, 4(RC) | ||
2944 | || break; | ||
2945 | ||case 1: | ||
2946 | | lwzux TMP1, RB, BASE | ||
2947 | | lwzux TMP2, RC, KBASE | ||
2948 | | lwz CARG2, 4(RB) | ||
2949 | | checknum cr0, TMP1 | ||
2950 | | lwz CARG1, 4(RC) | ||
2951 | || break; | ||
2952 | ||default: | ||
2953 | | lwzux TMP1, RB, BASE | ||
2954 | | lwzux TMP2, RC, BASE | ||
2955 | | lwz CARG1, 4(RB) | ||
2956 | | checknum cr0, TMP1 | ||
2957 | | lwz CARG2, 4(RC) | ||
2958 | || break; | ||
2959 | ||} | ||
2960 | | checknum cr1, TMP2 | ||
2961 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | ||
2962 | | bne >5 | ||
2963 | |.if "intins" == "intmod_" | ||
2964 | | b ->BC_MODVNI_Z // Avoid 3 copies. It's slow anyway. | ||
2965 | |.else | ||
2966 | | intins CARG1, CARG1, CARG2 | ||
2967 | | bso >4 | ||
2968 | |1: | ||
2969 | | ins_next1 | ||
2970 | | stwux TISNUM, RA, BASE | ||
2971 | | stw CARG1, 4(RA) | ||
2972 | |2: | ||
2973 | | ins_next2 | ||
2974 | |4: // Overflow. | ||
2975 | | mcrxr cr0; ble <1 // Ignore unrelated overflow. | ||
2976 | | ins_arithfallback b | ||
2977 | |.endif | ||
2978 | |5: // FP variant. | ||
2979 | ||if (vk == 1) { | ||
2980 | | lfd f15, 0(RB) | ||
2981 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
2982 | | lfd f14, 0(RC) | ||
2983 | ||} else { | ||
2984 | | lfd f14, 0(RB) | ||
2985 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ||
2986 | | lfd f15, 0(RC) | ||
2987 | ||} | ||
2988 | | ins_arithfallback bge | ||
2989 | |.if "fpins" == "fpmod_" | ||
2990 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
2991 | |.else | ||
2992 | | fpins f0, f14, f15 | ||
2993 | | ins_next1 | ||
2994 | | stfdx f0, BASE, RA | ||
2995 | | b <2 | ||
2996 | |.endif | ||
2997 | |.endmacro | ||
2998 | | | ||
2999 | |.macro ins_arith, intins, fpins | ||
3000 | ||if (LJ_DUALNUM) { | ||
3001 | | ins_arithdn intins, fpins | ||
3002 | ||} else { | ||
3003 | | ins_arithfp fpins | ||
3004 | ||} | ||
2370 | |.endmacro | 3005 | |.endmacro |
2371 | 3006 | ||
2372 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | 3007 | case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: |
2373 | | ins_arith fadd | 3008 | | ins_arith addo., fadd |
2374 | break; | 3009 | break; |
2375 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | 3010 | case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: |
2376 | | ins_arith fsub | 3011 | | ins_arith subo., fsub |
2377 | break; | 3012 | break; |
2378 | case BC_MULVN: case BC_MULNV: case BC_MULVV: | 3013 | case BC_MULVN: case BC_MULNV: case BC_MULVV: |
2379 | | ins_arith fmul | 3014 | | ins_arith mullwo., fmul |
2380 | break; | 3015 | break; |
2381 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | 3016 | case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: |
2382 | | ins_arith fdiv | 3017 | | ins_arithfp fdiv |
2383 | break; | 3018 | break; |
2384 | case BC_MODVN: | 3019 | case BC_MODVN: |
2385 | | ins_arithpre f14, f15 | 3020 | | ins_arith intmod, fpmod |
2386 | |->BC_MODVN_Z: | ||
2387 | | fdiv FARG1, f14, f15 | ||
2388 | | // NYI: Use internal implementation of floor. | ||
2389 | | bl extern floor // floor(b/c) | ||
2390 | | fmul f0, FARG1, f15 | ||
2391 | | ins_next1 | ||
2392 | | fsub f0, f14, f0 // b - floor(b/c)*c | ||
2393 | | stfdx f0, BASE, RA | ||
2394 | | ins_next2 | ||
2395 | break; | 3021 | break; |
2396 | case BC_MODNV: case BC_MODVV: | 3022 | case BC_MODNV: case BC_MODVV: |
2397 | | ins_arithpre f14, f15 | 3023 | | ins_arith intmod_, fpmod_ |
2398 | | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | ||
2399 | break; | 3024 | break; |
2400 | case BC_POW: | 3025 | case BC_POW: |
2401 | | lwzx CARG1, BASE, RB | 3026 | | // NYI: (partial) integer arithmetic. |
3027 | | lwzx TMP1, BASE, RB | ||
2402 | | lfdx FARG1, BASE, RB | 3028 | | lfdx FARG1, BASE, RB |
2403 | | lwzx CARG2, BASE, RC | 3029 | | lwzx TMP2, BASE, RC |
2404 | | lfdx FARG2, BASE, RC | 3030 | | lfdx FARG2, BASE, RC |
2405 | | checknum cr0, CARG1 | 3031 | | checknum cr0, TMP1 |
2406 | | checknum cr1, CARG2 | 3032 | | checknum cr1, TMP2 |
2407 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3033 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
2408 | | bge ->vmeta_arith_vv | 3034 | | bge ->vmeta_arith_vv |
2409 | | bl extern pow | 3035 | | bl extern pow |
@@ -2459,33 +3085,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2459 | break; | 3085 | break; |
2460 | case BC_KSHORT: | 3086 | case BC_KSHORT: |
2461 | | // RA = dst*8, RD = int16_literal*8 | 3087 | | // RA = dst*8, RD = int16_literal*8 |
2462 | | // NYI: which approach is faster? | 3088 | if (LJ_DUALNUM) { |
2463 | |.if 1 | 3089 | | slwi RD, RD, 13 |
2464 | | slwi RD, RD, 13 | 3090 | | srawi RD, RD, 16 |
2465 | | srawi RD, RD, 16 | 3091 | | ins_next1 |
2466 | | tonum_i f0, RD | 3092 | | stwux TISNUM, RA, BASE |
2467 | | ins_next1 | 3093 | | stw RD, 4(RA) |
2468 | | stfdx f0, BASE, RA | 3094 | | ins_next2 |
2469 | | ins_next2 | 3095 | } else { |
2470 | |.else | 3096 | | // NYI: which approach is faster? |
2471 | | slwi RD, RD, 13 | 3097 | |.if 1 |
2472 | | srawi TMP1, RD, 31 | 3098 | | slwi RD, RD, 13 |
2473 | | xor TMP2, TMP1, RD | 3099 | | srawi RD, RD, 16 |
2474 | | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) | 3100 | | tonum_i f0, RD |
2475 | | cntlzw TMP3, TMP2 | 3101 | | ins_next1 |
2476 | | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 | 3102 | | stfdx f0, BASE, RA |
2477 | | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa | 3103 | | ins_next2 |
2478 | | subfic TMP3, RD, 0 | 3104 | |.else |
2479 | | slwi TMP1, TMP1, 20 | 3105 | | slwi RD, RD, 13 |
2480 | | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) | 3106 | | srawi TMP1, RD, 31 |
2481 | | subfe TMP0, TMP0, TMP0 | 3107 | | xor TMP2, TMP1, RD |
2482 | | add RD, RD, TMP1 // hi = hi + exponent-1 | 3108 | | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) |
2483 | | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi | 3109 | | cntlzw TMP3, TMP2 |
2484 | | ins_next1 | 3110 | | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 |
2485 | | stwux RD, RA, BASE | 3111 | | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa |
2486 | | stw ZERO, 4(RA) | 3112 | | subfic TMP3, RD, 0 |
2487 | | ins_next2 | 3113 | | slwi TMP1, TMP1, 20 |
2488 | |.endif | 3114 | | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) |
3115 | | subfe TMP0, TMP0, TMP0 | ||
3116 | | add RD, RD, TMP1 // hi = hi + exponent-1 | ||
3117 | | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi | ||
3118 | | ins_next1 | ||
3119 | | stwux RD, RA, BASE | ||
3120 | | stw ZERO, 4(RA) | ||
3121 | | ins_next2 | ||
3122 | |.endif | ||
3123 | } | ||
2489 | break; | 3124 | break; |
2490 | case BC_KNUM: | 3125 | case BC_KNUM: |
2491 | | // RA = dst*8, RD = num_const*8 | 3126 | | // RA = dst*8, RD = num_const*8 |
@@ -2718,23 +3353,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2718 | | lwzux CARG1, RB, BASE | 3353 | | lwzux CARG1, RB, BASE |
2719 | | lwzux CARG2, RC, BASE | 3354 | | lwzux CARG2, RC, BASE |
2720 | | lwz TAB:RB, 4(RB) | 3355 | | lwz TAB:RB, 4(RB) |
2721 | | lfd f0, 0(RC) | 3356 | if (LJ_DUALNUM) { |
3357 | | lwz RC, 4(RC) | ||
3358 | } else { | ||
3359 | | lfd f0, 0(RC) | ||
3360 | } | ||
2722 | | checktab CARG1 | 3361 | | checktab CARG1 |
2723 | | checknum cr1, CARG2 | 3362 | | checknum cr1, CARG2 |
2724 | | bne ->vmeta_tgetv | 3363 | | bne ->vmeta_tgetv |
2725 | | bge cr1, >5 | 3364 | if (LJ_DUALNUM) { |
2726 | | // Convert number key to integer, check for integerness and range. | 3365 | | lwz TMP0, TAB:RB->asize |
2727 | | fctiwz f1, f0 | 3366 | | bne cr1, >5 |
2728 | | fadd f2, f0, TOBIT | 3367 | | lwz TMP1, TAB:RB->array |
2729 | | stfd f1, TMPD | 3368 | | cmplw TMP0, RC |
2730 | | lwz TMP0, TAB:RB->asize | 3369 | | slwi TMP2, RC, 3 |
2731 | | fsub f2, f2, TOBIT | 3370 | } else { |
2732 | | lwz TMP2, TMPD_LO | 3371 | | bge cr1, >5 |
2733 | | lwz TMP1, TAB:RB->array | 3372 | | // Convert number key to integer, check for integerness and range. |
2734 | | fcmpu cr1, f0, f2 | 3373 | | fctiwz f1, f0 |
2735 | | cmplw cr0, TMP0, TMP2 | 3374 | | fadd f2, f0, TOBIT |
2736 | | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq | 3375 | | stfd f1, TMPD |
2737 | | slwi TMP2, TMP2, 3 | 3376 | | lwz TMP0, TAB:RB->asize |
3377 | | fsub f2, f2, TOBIT | ||
3378 | | lwz TMP2, TMPD_LO | ||
3379 | | lwz TMP1, TAB:RB->array | ||
3380 | | fcmpu cr1, f0, f2 | ||
3381 | | cmplw cr0, TMP0, TMP2 | ||
3382 | | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq | ||
3383 | | slwi TMP2, TMP2, 3 | ||
3384 | } | ||
2738 | | ble ->vmeta_tgetv // Integer key and in array part? | 3385 | | ble ->vmeta_tgetv // Integer key and in array part? |
2739 | | lwzx TMP0, TMP1, TMP2 | 3386 | | lwzx TMP0, TMP1, TMP2 |
2740 | | lfdx f14, TMP1, TMP2 | 3387 | | lfdx f14, TMP1, TMP2 |
@@ -2755,7 +3402,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2755 | | | 3402 | | |
2756 | |5: | 3403 | |5: |
2757 | | checkstr CARG2; bne ->vmeta_tgetv | 3404 | | checkstr CARG2; bne ->vmeta_tgetv |
2758 | | lwz STR:RC, 4(RC) | 3405 | if (!LJ_DUALNUM) { |
3406 | | lwz STR:RC, 4(RC) | ||
3407 | } | ||
2759 | | b ->BC_TGETS_Z // String key? | 3408 | | b ->BC_TGETS_Z // String key? |
2760 | break; | 3409 | break; |
2761 | case BC_TGETS: | 3410 | case BC_TGETS: |
@@ -2838,23 +3487,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2838 | | lwzux CARG1, RB, BASE | 3487 | | lwzux CARG1, RB, BASE |
2839 | | lwzux CARG2, RC, BASE | 3488 | | lwzux CARG2, RC, BASE |
2840 | | lwz TAB:RB, 4(RB) | 3489 | | lwz TAB:RB, 4(RB) |
2841 | | lfd f0, 0(RC) | 3490 | if (LJ_DUALNUM) { |
3491 | | lwz RC, 4(RC) | ||
3492 | } else { | ||
3493 | | lfd f0, 0(RC) | ||
3494 | } | ||
2842 | | checktab CARG1 | 3495 | | checktab CARG1 |
2843 | | checknum cr1, CARG2 | 3496 | | checknum cr1, CARG2 |
2844 | | bne ->vmeta_tsetv | 3497 | | bne ->vmeta_tsetv |
2845 | | bge cr1, >5 | 3498 | if (LJ_DUALNUM) { |
2846 | | // Convert number key to integer, check for integerness and range. | 3499 | | lwz TMP0, TAB:RB->asize |
2847 | | fctiwz f1, f0 | 3500 | | bne cr1, >5 |
2848 | | fadd f2, f0, TOBIT | 3501 | | lwz TMP1, TAB:RB->array |
2849 | | stfd f1, TMPD | 3502 | | cmplw TMP0, RC |
2850 | | lwz TMP0, TAB:RB->asize | 3503 | | slwi TMP0, RC, 3 |
2851 | | fsub f2, f2, TOBIT | 3504 | } else { |
2852 | | lwz TMP2, TMPD_LO | 3505 | | bge cr1, >5 |
2853 | | lwz TMP1, TAB:RB->array | 3506 | | // Convert number key to integer, check for integerness and range. |
2854 | | fcmpu cr1, f0, f2 | 3507 | | fctiwz f1, f0 |
2855 | | cmplw cr0, TMP0, TMP2 | 3508 | | fadd f2, f0, TOBIT |
2856 | | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq | 3509 | | stfd f1, TMPD |
2857 | | slwi TMP0, TMP2, 3 | 3510 | | lwz TMP0, TAB:RB->asize |
3511 | | fsub f2, f2, TOBIT | ||
3512 | | lwz TMP2, TMPD_LO | ||
3513 | | lwz TMP1, TAB:RB->array | ||
3514 | | fcmpu cr1, f0, f2 | ||
3515 | | cmplw cr0, TMP0, TMP2 | ||
3516 | | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq | ||
3517 | | slwi TMP0, TMP2, 3 | ||
3518 | } | ||
2858 | | ble ->vmeta_tsetv // Integer key and in array part? | 3519 | | ble ->vmeta_tsetv // Integer key and in array part? |
2859 | | lwzx TMP2, TMP1, TMP0 | 3520 | | lwzx TMP2, TMP1, TMP0 |
2860 | | lbz TMP3, TAB:RB->marked | 3521 | | lbz TMP3, TAB:RB->marked |
@@ -2878,7 +3539,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2878 | | | 3539 | | |
2879 | |5: | 3540 | |5: |
2880 | | checkstr CARG2; bne ->vmeta_tsetv | 3541 | | checkstr CARG2; bne ->vmeta_tsetv |
2881 | | lwz STR:RC, 4(RC) | 3542 | if (!LJ_DUALNUM) { |
3543 | | lwz STR:RC, 4(RC) | ||
3544 | } | ||
2882 | | b ->BC_TSETS_Z // String key? | 3545 | | b ->BC_TSETS_Z // String key? |
2883 | | | 3546 | | |
2884 | |7: // Possible table write barrier for the value. Skip valiswhite check. | 3547 | |7: // Possible table write barrier for the value. Skip valiswhite check. |
@@ -3164,14 +3827,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3164 | | checknil TMP2 | 3827 | | checknil TMP2 |
3165 | | lwz INS, -4(PC) | 3828 | | lwz INS, -4(PC) |
3166 | | beq >4 | 3829 | | beq >4 |
3167 | | tonum_u f1, RC | 3830 | if (LJ_DUALNUM) { |
3831 | | stw RC, 4(RA) | ||
3832 | | stw TISNUM, 0(RA) | ||
3833 | } else { | ||
3834 | | tonum_u f1, RC | ||
3835 | } | ||
3168 | | addi RC, RC, 1 | 3836 | | addi RC, RC, 1 |
3169 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) | 3837 | | addis TMP3, PC, -(BCBIAS_J*4 >> 16) |
3170 | | stfd f0, 8(RA) | 3838 | | stfd f0, 8(RA) |
3171 | | decode_RD4 TMP1, INS | 3839 | | decode_RD4 TMP1, INS |
3172 | | stw RC, -4(RA) // Update control var. | 3840 | | stw RC, -4(RA) // Update control var. |
3173 | | add PC, TMP1, TMP3 | 3841 | | add PC, TMP1, TMP3 |
3174 | | stfd f1, 0(RA) | 3842 | if (!LJ_DUALNUM) { |
3843 | | stfd f1, 0(RA) | ||
3844 | } | ||
3175 | |3: | 3845 | |3: |
3176 | | ins_next | 3846 | | ins_next |
3177 | | | 3847 | | |
@@ -3424,28 +4094,96 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3424 | case BC_IFORL: | 4094 | case BC_IFORL: |
3425 | | // RA = base*8, RD = target (after end of loop or start of loop) | 4095 | | // RA = base*8, RD = target (after end of loop or start of loop) |
3426 | vk = (op == BC_IFORL || op == BC_JFORL); | 4096 | vk = (op == BC_IFORL || op == BC_JFORL); |
4097 | if (LJ_DUALNUM) { | ||
4098 | | // Integer loop. | ||
4099 | | lwzux TMP1, RA, BASE | ||
4100 | | lwz CARG1, FORL_IDX*8+4(RA) | ||
4101 | | cmplw cr0, TMP1, TISNUM | ||
4102 | if (vk) { | ||
4103 | | lwz CARG3, FORL_STEP*8+4(RA) | ||
4104 | | bne >9 | ||
4105 | | addo. CARG1, CARG1, CARG3 | ||
4106 | | cmpwi cr6, CARG3, 0 | ||
4107 | | lwz CARG2, FORL_STOP*8+4(RA) | ||
4108 | | bso >6 | ||
4109 | |4: | ||
4110 | | stw CARG1, FORL_IDX*8+4(RA) | ||
4111 | } else { | ||
4112 | | lwz TMP3, FORL_STEP*8(RA) | ||
4113 | | lwz CARG3, FORL_STEP*8+4(RA) | ||
4114 | | lwz TMP2, FORL_STOP*8(RA) | ||
4115 | | lwz CARG2, FORL_STOP*8+4(RA) | ||
4116 | | cmplw cr7, TMP3, TISNUM | ||
4117 | | cmplw cr1, TMP2, TISNUM | ||
4118 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | ||
4119 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | ||
4120 | | cmpwi cr6, CARG3, 0 | ||
4121 | | bne >9 | ||
4122 | } | ||
4123 | | blt cr6, >5 | ||
4124 | | cmpw CARG1, CARG2 | ||
4125 | |1: | ||
4126 | | stw TISNUM, FORL_EXT*8(RA) | ||
4127 | if (op != BC_JFORL) { | ||
4128 | | srwi RD, RD, 1 | ||
4129 | } | ||
4130 | | stw CARG1, FORL_EXT*8+4(RA) | ||
4131 | if (op != BC_JFORL) { | ||
4132 | | add RD, PC, RD | ||
4133 | } | ||
4134 | if (op == BC_FORI) { | ||
4135 | | bgt >3 // See FP loop below. | ||
4136 | } else if (op == BC_JFORI) { | ||
4137 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | ||
4138 | | ble =>BC_JLOOP | ||
4139 | } else if (op == BC_IFORL) { | ||
4140 | | bgt >2 | ||
4141 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | ||
4142 | } else { | ||
4143 | | ble =>BC_JLOOP | ||
4144 | } | ||
4145 | |2: | ||
4146 | | ins_next | ||
4147 | |5: // Invert check for negative step. | ||
4148 | | cmpw CARG2, CARG1 | ||
4149 | | b <1 | ||
4150 | if (vk) { | ||
4151 | |6: // Potential overflow. | ||
4152 | | mcrxr cr0; ble <4 // Ignore unrelated overflow. | ||
4153 | | b <2 | ||
4154 | } | ||
4155 | } | ||
3427 | if (vk) { | 4156 | if (vk) { |
3428 | | lfdux f1, RA, BASE | 4157 | if (LJ_DUALNUM) { |
4158 | |9: // FP loop. | ||
4159 | | lfd f1, FORL_IDX*8(RA) | ||
4160 | } else { | ||
4161 | | lfdux f1, RA, BASE | ||
4162 | } | ||
3429 | | lfd f3, FORL_STEP*8(RA) | 4163 | | lfd f3, FORL_STEP*8(RA) |
3430 | | lfd f2, FORL_STOP*8(RA) | 4164 | | lfd f2, FORL_STOP*8(RA) |
3431 | | lwz TMP3, FORL_STEP*8(RA) | 4165 | | lwz TMP3, FORL_STEP*8(RA) |
3432 | | fadd f1, f1, f3 | 4166 | | fadd f1, f1, f3 |
3433 | | stfd f1, FORL_IDX*8(RA) | 4167 | | stfd f1, FORL_IDX*8(RA) |
3434 | } else { | 4168 | } else { |
3435 | | lwzux TMP1, RA, BASE | 4169 | if (LJ_DUALNUM) { |
4170 | |9: // FP loop. | ||
4171 | } else { | ||
4172 | | lwzux TMP1, RA, BASE | ||
4173 | | lwz TMP3, FORL_STEP*8(RA) | ||
4174 | | lwz TMP2, FORL_STOP*8(RA) | ||
4175 | | cmplw cr0, TMP1, TISNUM | ||
4176 | | cmplw cr7, TMP3, TISNUM | ||
4177 | | cmplw cr1, TMP2, TISNUM | ||
4178 | } | ||
3436 | | lfd f1, FORL_IDX*8(RA) | 4179 | | lfd f1, FORL_IDX*8(RA) |
3437 | | lwz TMP3, FORL_STEP*8(RA) | ||
3438 | | lfd f3, FORL_STEP*8(RA) | ||
3439 | | lwz TMP2, FORL_STOP*8(RA) | ||
3440 | | lfd f2, FORL_STOP*8(RA) | ||
3441 | | cmplw cr0, TMP1, TISNUM | ||
3442 | | cmplw cr7, TMP3, TISNUM | ||
3443 | | cmplw cr1, TMP2, TISNUM | ||
3444 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt | 4180 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt |
4181 | | lfd f3, FORL_STEP*8(RA) | ||
3445 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 4182 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
4183 | | lfd f2, FORL_STOP*8(RA) | ||
3446 | | bge ->vmeta_for | 4184 | | bge ->vmeta_for |
3447 | } | 4185 | } |
3448 | | cmpwi cr3, TMP3, 0 | 4186 | | cmpwi cr6, TMP3, 0 |
3449 | if (op != BC_JFORL) { | 4187 | if (op != BC_JFORL) { |
3450 | | srwi RD, RD, 1 | 4188 | | srwi RD, RD, 1 |
3451 | } | 4189 | } |
@@ -3457,22 +4195,30 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3457 | if (op == BC_JFORI) { | 4195 | if (op == BC_JFORI) { |
3458 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | 4196 | | addis PC, RD, -(BCBIAS_J*4 >> 16) |
3459 | } | 4197 | } |
3460 | | blt cr3, >5 | 4198 | | blt cr6, >5 |
3461 | if (op == BC_FORI) { | 4199 | if (op == BC_FORI) { |
3462 | | bgt >3 | 4200 | | bgt >3 |
3463 | } else if (op == BC_IFORL) { | 4201 | } else if (op == BC_IFORL) { |
3464 | | bgt >2 | 4202 | if (LJ_DUALNUM) { |
4203 | | bgt <2 | ||
4204 | } else { | ||
4205 | | bgt >2 | ||
4206 | } | ||
3465 | |1: | 4207 | |1: |
3466 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | 4208 | | addis PC, RD, -(BCBIAS_J*4 >> 16) |
3467 | } else { | 4209 | } else { |
3468 | | ble =>BC_JLOOP | 4210 | | ble =>BC_JLOOP |
3469 | } | 4211 | } |
3470 | |2: | 4212 | if (LJ_DUALNUM) { |
3471 | | ins_next | 4213 | | b <2 |
4214 | } else { | ||
4215 | |2: | ||
4216 | | ins_next | ||
4217 | } | ||
3472 | |5: // Negative step. | 4218 | |5: // Negative step. |
3473 | if (op == BC_FORI) { | 4219 | if (op == BC_FORI) { |
3474 | | bge <2 | 4220 | | bge <2 |
3475 | |3: | 4221 | |3: // Used by integer loop, too. |
3476 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | 4222 | | addis PC, RD, -(BCBIAS_J*4 >> 16) |
3477 | } else if (op == BC_IFORL) { | 4223 | } else if (op == BC_IFORL) { |
3478 | | bge <1 | 4224 | | bge <1 |