aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2013-04-22 22:32:41 +0200
committerMike Pall <mike>2013-04-22 22:32:41 +0200
commita2c78810ca0162c06b3ae02b52d6b4c04a8d5be3 (patch)
treed82fe00c6ca8ff6a2bfce89176e0d97b3095be38
parent2ab5e7c5dce9e8bd19b7f4c9d7a90ef30af53d0a (diff)
downloadluajit-a2c78810ca0162c06b3ae02b52d6b4c04a8d5be3.tar.gz
luajit-a2c78810ca0162c06b3ae02b52d6b4c04a8d5be3.tar.bz2
luajit-a2c78810ca0162c06b3ae02b52d6b4c04a8d5be3.zip
Combine IR instruction dispatch for all assembler backends.
-rw-r--r--src/lj_asm.c118
-rw-r--r--src/lj_asm_arm.h298
-rw-r--r--src/lj_asm_mips.h181
-rw-r--r--src/lj_asm_ppc.h232
-rw-r--r--src/lj_asm_x86.h279
5 files changed, 454 insertions, 654 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 231e76fc..7ebde7b8 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1515,6 +1515,124 @@ static void asm_loop(ASMState *as)
1515#error "Missing assembler for target CPU" 1515#error "Missing assembler for target CPU"
1516#endif 1516#endif
1517 1517
1518/* -- Instruction dispatch ------------------------------------------------ */
1519
1520/* Assemble a single instruction. */
1521static void asm_ir(ASMState *as, IRIns *ir)
1522{
1523 switch ((IROp)ir->o) {
1524 /* Miscellaneous ops. */
1525 case IR_LOOP: asm_loop(as); break;
1526 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1527 case IR_USE:
1528 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1529 case IR_PHI: asm_phi(as, ir); break;
1530 case IR_HIOP: asm_hiop(as, ir); break;
1531 case IR_GCSTEP: asm_gcstep(as, ir); break;
1532
1533 /* Guarded assertions. */
1534 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1535 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1536 case IR_ABC:
1537 asm_comp(as, ir);
1538 break;
1539 case IR_EQ: case IR_NE:
1540 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1541 as->curins--;
1542 asm_href(as, ir-1, (IROp)ir->o);
1543 } else {
1544 asm_equal(as, ir);
1545 }
1546 break;
1547
1548 case IR_RETF: asm_retf(as, ir); break;
1549
1550 /* Bit ops. */
1551 case IR_BNOT: asm_bnot(as, ir); break;
1552 case IR_BSWAP: asm_bswap(as, ir); break;
1553 case IR_BAND: asm_band(as, ir); break;
1554 case IR_BOR: asm_bor(as, ir); break;
1555 case IR_BXOR: asm_bxor(as, ir); break;
1556 case IR_BSHL: asm_bshl(as, ir); break;
1557 case IR_BSHR: asm_bshr(as, ir); break;
1558 case IR_BSAR: asm_bsar(as, ir); break;
1559 case IR_BROL: asm_brol(as, ir); break;
1560 case IR_BROR: asm_bror(as, ir); break;
1561
1562 /* Arithmetic ops. */
1563 case IR_ADD: asm_add(as, ir); break;
1564 case IR_SUB: asm_sub(as, ir); break;
1565 case IR_MUL: asm_mul(as, ir); break;
1566 case IR_DIV: asm_div(as, ir); break;
1567 case IR_MOD: asm_mod(as, ir); break;
1568 case IR_POW: asm_pow(as, ir); break;
1569 case IR_NEG: asm_neg(as, ir); break;
1570 case IR_ABS: asm_abs(as, ir); break;
1571 case IR_ATAN2: asm_atan2(as, ir); break;
1572 case IR_LDEXP: asm_ldexp(as, ir); break;
1573 case IR_MIN: asm_min(as, ir); break;
1574 case IR_MAX: asm_max(as, ir); break;
1575 case IR_FPMATH: asm_fpmath(as, ir); break;
1576
1577 /* Overflow-checking arithmetic ops. */
1578 case IR_ADDOV: asm_addov(as, ir); break;
1579 case IR_SUBOV: asm_subov(as, ir); break;
1580 case IR_MULOV: asm_mulov(as, ir); break;
1581
1582 /* Memory references. */
1583 case IR_AREF: asm_aref(as, ir); break;
1584 case IR_HREF: asm_href(as, ir, 0); break;
1585 case IR_HREFK: asm_hrefk(as, ir); break;
1586 case IR_NEWREF: asm_newref(as, ir); break;
1587 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1588 case IR_FREF: asm_fref(as, ir); break;
1589 case IR_STRREF: asm_strref(as, ir); break;
1590
1591 /* Loads and stores. */
1592 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1593 asm_ahuvload(as, ir);
1594 break;
1595 case IR_FLOAD: asm_fload(as, ir); break;
1596 case IR_XLOAD: asm_xload(as, ir); break;
1597 case IR_SLOAD: asm_sload(as, ir); break;
1598
1599 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1600 case IR_FSTORE: asm_fstore(as, ir); break;
1601 case IR_XSTORE: asm_xstore(as, ir); break;
1602
1603 /* Allocations. */
1604 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1605 case IR_TNEW: asm_tnew(as, ir); break;
1606 case IR_TDUP: asm_tdup(as, ir); break;
1607 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1608
1609 /* Buffer operations. */
1610 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1611 case IR_BUFPUT: asm_bufput(as, ir); break;
1612 case IR_BUFSTR: asm_bufstr(as, ir); break;
1613
1614 /* Write barriers. */
1615 case IR_TBAR: asm_tbar(as, ir); break;
1616 case IR_OBAR: asm_obar(as, ir); break;
1617
1618 /* Type conversions. */
1619 case IR_TOBIT: asm_tobit(as, ir); break;
1620 case IR_CONV: asm_conv(as, ir); break;
1621 case IR_TOSTR: asm_tostr(as, ir); break;
1622 case IR_STRTO: asm_strto(as, ir); break;
1623
1624 /* Calls. */
1625 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1626 case IR_CALLXS: asm_callx(as, ir); break;
1627 case IR_CARG: break;
1628
1629 default:
1630 setintV(&as->J->errinfo, ir->o);
1631 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1632 break;
1633 }
1634}
1635
1518/* -- Head of trace ------------------------------------------------------- */ 1636/* -- Head of trace ------------------------------------------------------- */
1519 1637
1520/* Head of a root trace. */ 1638/* Head of a root trace. */
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 25a28bd7..039a2a9a 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -519,6 +519,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
519 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 519 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
520 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); 520 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
521} 521}
522#else
523#define asm_tobit(as, ir) lua_assert(0)
522#endif 524#endif
523 525
524static void asm_conv(ASMState *as, IRIns *ir) 526static void asm_conv(ASMState *as, IRIns *ir)
@@ -1038,7 +1040,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1038 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1040 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1039} 1041}
1040 1042
1041static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1043static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1042{ 1044{
1043 if (ir->r != RID_SINK) { 1045 if (ir->r != RID_SINK) {
1044 Reg src = ra_alloc1(as, ir->op2, 1046 Reg src = ra_alloc1(as, ir->op2,
@@ -1048,6 +1050,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1048 } 1050 }
1049} 1051}
1050 1052
1053#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1054
1051static void asm_ahuvload(ASMState *as, IRIns *ir) 1055static void asm_ahuvload(ASMState *as, IRIns *ir)
1052{ 1056{
1053 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1057 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1324,6 +1328,42 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1324 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1328 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1325 emit_dm(as, ai, (dest & 15), (left & 15)); 1329 emit_dm(as, ai, (dest & 15), (left & 15));
1326} 1330}
1331
1332static void asm_callround(ASMState *as, IRIns *ir, int id)
1333{
1334 /* The modified regs must match with the *.dasc implementation. */
1335 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1336 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1337 RegSet of;
1338 Reg dest, src;
1339 ra_evictset(as, drop);
1340 dest = ra_dest(as, ir, RSET_FPR);
1341 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1342 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1343 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1344 (void *)lj_vm_trunc_sf);
1345 /* Workaround to protect argument GPRs from being used for remat. */
1346 of = as->freeset;
1347 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1348 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1349 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1350 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1351 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1352}
1353
1354static void asm_fpmath(ASMState *as, IRIns *ir)
1355{
1356 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1357 return;
1358 if (ir->op2 <= IRFPM_TRUNC)
1359 asm_callround(as, ir, ir->op2);
1360 else if (ir->op2 == IRFPM_SQRT)
1361 asm_fpunary(as, ir, ARMI_VSQRT_D);
1362 else
1363 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1364}
1365#else
1366#define asm_fpmath(as, ir) lua_assert(0)
1327#endif 1367#endif
1328 1368
1329static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) 1369static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1373,32 +1413,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1373 asm_intop(as, ir, ai); 1413 asm_intop(as, ir, ai);
1374} 1414}
1375 1415
1376static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1377{
1378 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1379 uint32_t cc = (as->mcp[1] >> 28);
1380 as->flagmcp = NULL;
1381 if (cc <= CC_NE) {
1382 as->mcp++;
1383 ai |= ARMI_S;
1384 } else if (cc == CC_GE) {
1385 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1386 ai |= ARMI_S;
1387 } else if (cc == CC_LT) {
1388 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1389 ai |= ARMI_S;
1390 } /* else: other conds don't work with bit ops. */
1391 }
1392 if (ir->op2 == 0) {
1393 Reg dest = ra_dest(as, ir, RSET_GPR);
1394 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1395 emit_d(as, ai^m, dest);
1396 } else {
1397 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1398 asm_intop(as, ir, ai);
1399 }
1400}
1401
1402static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1416static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1403{ 1417{
1404 Reg dest = ra_dest(as, ir, RSET_GPR); 1418 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1464,6 +1478,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
1464 asm_intmul(as, ir); 1478 asm_intmul(as, ir);
1465} 1479}
1466 1480
1481#define asm_addov(as, ir) asm_add(as, ir)
1482#define asm_subov(as, ir) asm_sub(as, ir)
1483#define asm_mulov(as, ir) asm_mul(as, ir)
1484
1485#if LJ_SOFTFP
1486#define asm_div(as, ir) lua_assert(0)
1487#define asm_pow(as, ir) lua_assert(0)
1488#define asm_abs(as, ir) lua_assert(0)
1489#define asm_atan2(as, ir) lua_assert(0)
1490#define asm_ldexp(as, ir) lua_assert(0)
1491#else
1492#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1493#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1494#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1495#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1496#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1497#endif
1498
1499#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1500
1467static void asm_neg(ASMState *as, IRIns *ir) 1501static void asm_neg(ASMState *as, IRIns *ir)
1468{ 1502{
1469#if !LJ_SOFTFP 1503#if !LJ_SOFTFP
@@ -1475,31 +1509,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
1475 asm_intneg(as, ir, ARMI_RSB); 1509 asm_intneg(as, ir, ARMI_RSB);
1476} 1510}
1477 1511
1478#if !LJ_SOFTFP 1512static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1479static void asm_callround(ASMState *as, IRIns *ir, int id)
1480{ 1513{
1481 /* The modified regs must match with the *.dasc implementation. */ 1514 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1482 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| 1515 uint32_t cc = (as->mcp[1] >> 28);
1483 RID2RSET(RID_R3)|RID2RSET(RID_R12); 1516 as->flagmcp = NULL;
1484 RegSet of; 1517 if (cc <= CC_NE) {
1485 Reg dest, src; 1518 as->mcp++;
1486 ra_evictset(as, drop); 1519 ai |= ARMI_S;
1487 dest = ra_dest(as, ir, RSET_FPR); 1520 } else if (cc == CC_GE) {
1488 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); 1521 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1489 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : 1522 ai |= ARMI_S;
1490 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : 1523 } else if (cc == CC_LT) {
1491 (void *)lj_vm_trunc_sf); 1524 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1492 /* Workaround to protect argument GPRs from being used for remat. */ 1525 ai |= ARMI_S;
1493 of = as->freeset; 1526 } /* else: other conds don't work with bit ops. */
1494 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); 1527 }
1495 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); 1528 if (ir->op2 == 0) {
1496 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ 1529 Reg dest = ra_dest(as, ir, RSET_GPR);
1497 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); 1530 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1498 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); 1531 emit_d(as, ai^m, dest);
1532 } else {
1533 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1534 asm_intop(as, ir, ai);
1535 }
1499} 1536}
1500#endif
1501 1537
1502static void asm_bitswap(ASMState *as, IRIns *ir) 1538#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1539
1540static void asm_bswap(ASMState *as, IRIns *ir)
1503{ 1541{
1504 Reg dest = ra_dest(as, ir, RSET_GPR); 1542 Reg dest = ra_dest(as, ir, RSET_GPR);
1505 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1543 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1516,6 +1554,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1516 } 1554 }
1517} 1555}
1518 1556
1557#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1558#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1559#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1560
1519static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1561static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1520{ 1562{
1521 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1563 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1533,6 +1575,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1533 } 1575 }
1534} 1576}
1535 1577
1578#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1579#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1580#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1581#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1582#define asm_brol(as, ir) lua_assert(0)
1583
1536static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1584static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1537{ 1585{
1538 uint32_t kcmp = 0, kmov = 0; 1586 uint32_t kcmp = 0, kmov = 0;
@@ -1606,6 +1654,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1606 asm_intmin_max(as, ir, cc); 1654 asm_intmin_max(as, ir, cc);
1607} 1655}
1608 1656
1657#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1658#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1659
1609/* -- Comparisons --------------------------------------------------------- */ 1660/* -- Comparisons --------------------------------------------------------- */
1610 1661
1611/* Map of comparisons to flags. ORDER IR. */ 1662/* Map of comparisons to flags. ORDER IR. */
@@ -1721,6 +1772,18 @@ notst:
1721 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1772 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1722} 1773}
1723 1774
1775static void asm_comp(ASMState *as, IRIns *ir)
1776{
1777#if !LJ_SOFTFP
1778 if (irt_isnum(ir->t))
1779 asm_fpcomp(as, ir);
1780 else
1781#endif
1782 asm_intcomp(as, ir);
1783}
1784
1785#define asm_equal(as, ir) asm_comp(as, ir)
1786
1724#if LJ_HASFFI 1787#if LJ_HASFFI
1725/* 64 bit integer comparisons. */ 1788/* 64 bit integer comparisons. */
1726static void asm_int64comp(ASMState *as, IRIns *ir) 1789static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1795,7 +1858,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1795#endif 1858#endif
1796 } else if ((ir-1)->o == IR_XSTORE) { 1859 } else if ((ir-1)->o == IR_XSTORE) {
1797 if ((ir-1)->r != RID_SINK) 1860 if ((ir-1)->r != RID_SINK)
1798 asm_xstore(as, ir, 4); 1861 asm_xstore_(as, ir, 4);
1799 return; 1862 return;
1800 } 1863 }
1801 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1864 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -2064,141 +2127,6 @@ static void asm_tail_prep(ASMState *as)
2064 *p = 0; /* Prevent load/store merging. */ 2127 *p = 0; /* Prevent load/store merging. */
2065} 2128}
2066 2129
2067/* -- Instruction dispatch ------------------------------------------------ */
2068
2069/* Assemble a single instruction. */
2070static void asm_ir(ASMState *as, IRIns *ir)
2071{
2072 switch ((IROp)ir->o) {
2073 /* Miscellaneous ops. */
2074 case IR_LOOP: asm_loop(as); break;
2075 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2076 case IR_USE:
2077 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2078 case IR_PHI: asm_phi(as, ir); break;
2079 case IR_HIOP: asm_hiop(as, ir); break;
2080 case IR_GCSTEP: asm_gcstep(as, ir); break;
2081
2082 /* Guarded assertions. */
2083 case IR_EQ: case IR_NE:
2084 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2085 as->curins--;
2086 asm_href(as, ir-1, (IROp)ir->o);
2087 break;
2088 }
2089 /* fallthrough */
2090 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2091 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2092 case IR_ABC:
2093#if !LJ_SOFTFP
2094 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2095#endif
2096 asm_intcomp(as, ir);
2097 break;
2098
2099 case IR_RETF: asm_retf(as, ir); break;
2100
2101 /* Bit ops. */
2102 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2103 case IR_BSWAP: asm_bitswap(as, ir); break;
2104
2105 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2106 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2107 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2108
2109 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2110 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2111 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2112 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2113 case IR_BROL: lua_assert(0); break;
2114
2115 /* Arithmetic ops. */
2116 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2117 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2118 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2119 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2120 case IR_NEG: asm_neg(as, ir); break;
2121
2122#if LJ_SOFTFP
2123 case IR_DIV: case IR_POW: case IR_ABS:
2124 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2125 lua_assert(0); /* Unused for LJ_SOFTFP. */
2126 break;
2127#else
2128 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2129 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2130 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2131 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2132 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2133 case IR_FPMATH:
2134 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2135 break;
2136 if (ir->op2 <= IRFPM_TRUNC)
2137 asm_callround(as, ir, ir->op2);
2138 else if (ir->op2 == IRFPM_SQRT)
2139 asm_fpunary(as, ir, ARMI_VSQRT_D);
2140 else
2141 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2142 break;
2143 case IR_TOBIT: asm_tobit(as, ir); break;
2144#endif
2145
2146 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2147 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2148
2149 /* Memory references. */
2150 case IR_AREF: asm_aref(as, ir); break;
2151 case IR_HREF: asm_href(as, ir, 0); break;
2152 case IR_HREFK: asm_hrefk(as, ir); break;
2153 case IR_NEWREF: asm_newref(as, ir); break;
2154 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2155 case IR_FREF: asm_fref(as, ir); break;
2156 case IR_STRREF: asm_strref(as, ir); break;
2157
2158 /* Loads and stores. */
2159 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2160 asm_ahuvload(as, ir);
2161 break;
2162 case IR_FLOAD: asm_fload(as, ir); break;
2163 case IR_XLOAD: asm_xload(as, ir); break;
2164 case IR_SLOAD: asm_sload(as, ir); break;
2165
2166 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2167 case IR_FSTORE: asm_fstore(as, ir); break;
2168 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2169
2170 /* Allocations. */
2171 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2172 case IR_TNEW: asm_tnew(as, ir); break;
2173 case IR_TDUP: asm_tdup(as, ir); break;
2174 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2175
2176 /* Buffer operations. */
2177 case IR_BUFHDR: asm_bufhdr(as, ir); break;
2178 case IR_BUFPUT: asm_bufput(as, ir); break;
2179 case IR_BUFSTR: asm_bufstr(as, ir); break;
2180
2181 /* Write barriers. */
2182 case IR_TBAR: asm_tbar(as, ir); break;
2183 case IR_OBAR: asm_obar(as, ir); break;
2184
2185 /* Type conversions. */
2186 case IR_CONV: asm_conv(as, ir); break;
2187 case IR_TOSTR: asm_tostr(as, ir); break;
2188 case IR_STRTO: asm_strto(as, ir); break;
2189
2190 /* Calls. */
2191 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2192 case IR_CALLXS: asm_callx(as, ir); break;
2193 case IR_CARG: break;
2194
2195 default:
2196 setintV(&as->J->errinfo, ir->o);
2197 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2198 break;
2199 }
2200}
2201
2202/* -- Trace setup --------------------------------------------------------- */ 2130/* -- Trace setup --------------------------------------------------------- */
2203 2131
2204/* Ensure there are enough stack slots for call arguments. */ 2132/* Ensure there are enough stack slots for call arguments. */
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index cbbd2966..122e5ecd 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -849,7 +849,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
849 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 849 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
850} 850}
851 851
852static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 852static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
853{ 853{
854 if (ir->r != RID_SINK) { 854 if (ir->r != RID_SINK) {
855 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 855 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
@@ -858,6 +858,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
858 } 858 }
859} 859}
860 860
861#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
862
861static void asm_ahuvload(ASMState *as, IRIns *ir) 863static void asm_ahuvload(ASMState *as, IRIns *ir)
862{ 864{
863 IRType1 t = ir->t; 865 IRType1 t = ir->t;
@@ -1083,6 +1085,18 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1083 emit_fg(as, mi, dest, left); 1085 emit_fg(as, mi, dest, left);
1084} 1086}
1085 1087
1088static void asm_fpmath(ASMState *as, IRIns *ir)
1089{
1090 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1091 return;
1092 if (ir->op2 <= IRFPM_TRUNC)
1093 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1094 else if (ir->op2 == IRFPM_SQRT)
1095 asm_fpunary(as, ir, MIPSI_SQRT_D);
1096 else
1097 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1098}
1099
1086static void asm_add(ASMState *as, IRIns *ir) 1100static void asm_add(ASMState *as, IRIns *ir)
1087{ 1101{
1088 if (irt_isnum(ir->t)) { 1102 if (irt_isnum(ir->t)) {
@@ -1126,6 +1140,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1126 } 1140 }
1127} 1141}
1128 1142
1143#define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D)
1144#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1145#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1146
1129static void asm_neg(ASMState *as, IRIns *ir) 1147static void asm_neg(ASMState *as, IRIns *ir)
1130{ 1148{
1131 if (irt_isnum(ir->t)) { 1149 if (irt_isnum(ir->t)) {
@@ -1137,6 +1155,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1137 } 1155 }
1138} 1156}
1139 1157
1158#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1159#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1160#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1161
1140static void asm_arithov(ASMState *as, IRIns *ir) 1162static void asm_arithov(ASMState *as, IRIns *ir)
1141{ 1163{
1142 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1164 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
@@ -1170,6 +1192,9 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1170 emit_move(as, RID_TMP, dest == left ? left : right); 1192 emit_move(as, RID_TMP, dest == left ? left : right);
1171} 1193}
1172 1194
1195#define asm_addov(as, ir) asm_arithov(as, ir)
1196#define asm_subov(as, ir) asm_arithov(as, ir)
1197
1173static void asm_mulov(ASMState *as, IRIns *ir) 1198static void asm_mulov(ASMState *as, IRIns *ir)
1174{ 1199{
1175#if LJ_DUALNUM 1200#if LJ_DUALNUM
@@ -1263,7 +1288,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1263} 1288}
1264#endif 1289#endif
1265 1290
1266static void asm_bitnot(ASMState *as, IRIns *ir) 1291static void asm_bnot(ASMState *as, IRIns *ir)
1267{ 1292{
1268 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 1293 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1269 IRIns *irl = IR(ir->op1); 1294 IRIns *irl = IR(ir->op1);
@@ -1277,7 +1302,7 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1277 emit_dst(as, MIPSI_NOR, dest, left, right); 1302 emit_dst(as, MIPSI_NOR, dest, left, right);
1278} 1303}
1279 1304
1280static void asm_bitswap(ASMState *as, IRIns *ir) 1305static void asm_bswap(ASMState *as, IRIns *ir)
1281{ 1306{
1282 Reg dest = ra_dest(as, ir, RSET_GPR); 1307 Reg dest = ra_dest(as, ir, RSET_GPR);
1283 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1308 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1313,6 +1338,10 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1313 emit_dst(as, mi, dest, left, right); 1338 emit_dst(as, mi, dest, left, right);
1314} 1339}
1315 1340
1341#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
1342#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
1343#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
1344
1316static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1345static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1317{ 1346{
1318 Reg dest = ra_dest(as, ir, RSET_GPR); 1347 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1326,7 +1355,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1326 } 1355 }
1327} 1356}
1328 1357
1329static void asm_bitror(ASMState *as, IRIns *ir) 1358#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
1359#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
1360#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
1361#define asm_brol(as, ir) lua_assert(0)
1362
1363static void asm_bror(ASMState *as, IRIns *ir)
1330{ 1364{
1331 if ((as->flags & JIT_F_MIPS32R2)) { 1365 if ((as->flags & JIT_F_MIPS32R2)) {
1332 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 1366 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
@@ -1375,6 +1409,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1375 } 1409 }
1376} 1410}
1377 1411
1412#define asm_min(as, ir) asm_min_max(as, ir, 0)
1413#define asm_max(as, ir) asm_min_max(as, ir, 1)
1414
1378/* -- Comparisons --------------------------------------------------------- */ 1415/* -- Comparisons --------------------------------------------------------- */
1379 1416
1380static void asm_comp(ASMState *as, IRIns *ir) 1417static void asm_comp(ASMState *as, IRIns *ir)
@@ -1412,7 +1449,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
1412 } 1449 }
1413} 1450}
1414 1451
1415static void asm_compeq(ASMState *as, IRIns *ir) 1452static void asm_equal(ASMState *as, IRIns *ir)
1416{ 1453{
1417 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 1454 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
1418 right = (left >> 8); left &= 255; 1455 right = (left >> 8); left &= 255;
@@ -1486,8 +1523,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1486 } else if ((ir-1)->o == IR_XSTORE) { 1523 } else if ((ir-1)->o == IR_XSTORE) {
1487 as->curins--; /* Handle both stores here. */ 1524 as->curins--; /* Handle both stores here. */
1488 if ((ir-1)->r != RID_SINK) { 1525 if ((ir-1)->r != RID_SINK) {
1489 asm_xstore(as, ir, LJ_LE ? 4 : 0); 1526 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1490 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 1527 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1491 } 1528 }
1492 return; 1529 return;
1493 } 1530 }
@@ -1683,136 +1720,6 @@ static void asm_tail_prep(ASMState *as)
1683 as->invmcp = as->loopref ? as->mcp : NULL; 1720 as->invmcp = as->loopref ? as->mcp : NULL;
1684} 1721}
1685 1722
1686/* -- Instruction dispatch ------------------------------------------------ */
1687
1688/* Assemble a single instruction. */
1689static void asm_ir(ASMState *as, IRIns *ir)
1690{
1691 switch ((IROp)ir->o) {
1692 /* Miscellaneous ops. */
1693 case IR_LOOP: asm_loop(as); break;
1694 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1695 case IR_USE:
1696 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1697 case IR_PHI: asm_phi(as, ir); break;
1698 case IR_HIOP: asm_hiop(as, ir); break;
1699 case IR_GCSTEP: asm_gcstep(as, ir); break;
1700
1701 /* Guarded assertions. */
1702 case IR_EQ: case IR_NE:
1703 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1704 as->curins--;
1705 asm_href(as, ir-1, (IROp)ir->o);
1706 break;
1707 }
1708 asm_compeq(as, ir);
1709 break;
1710 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1711 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1712 case IR_ABC:
1713 asm_comp(as, ir);
1714 break;
1715
1716 case IR_RETF: asm_retf(as, ir); break;
1717
1718 /* Bit ops. */
1719 case IR_BNOT: asm_bitnot(as, ir); break;
1720 case IR_BSWAP: asm_bitswap(as, ir); break;
1721
1722 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1723 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1724 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1725
1726 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1727 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1728 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1729 case IR_BROL: lua_assert(0); break;
1730 case IR_BROR: asm_bitror(as, ir); break;
1731
1732 /* Arithmetic ops. */
1733 case IR_ADD: asm_add(as, ir); break;
1734 case IR_SUB: asm_sub(as, ir); break;
1735 case IR_MUL: asm_mul(as, ir); break;
1736 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1737 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1738 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1739 case IR_NEG: asm_neg(as, ir); break;
1740
1741 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1742 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1743 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1744 case IR_MIN: asm_min_max(as, ir, 0); break;
1745 case IR_MAX: asm_min_max(as, ir, 1); break;
1746 case IR_FPMATH:
1747 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1748 break;
1749 if (ir->op2 <= IRFPM_TRUNC)
1750 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1751 else if (ir->op2 == IRFPM_SQRT)
1752 asm_fpunary(as, ir, MIPSI_SQRT_D);
1753 else
1754 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1755 break;
1756
1757 /* Overflow-checking arithmetic ops. */
1758 case IR_ADDOV: asm_arithov(as, ir); break;
1759 case IR_SUBOV: asm_arithov(as, ir); break;
1760 case IR_MULOV: asm_mulov(as, ir); break;
1761
1762 /* Memory references. */
1763 case IR_AREF: asm_aref(as, ir); break;
1764 case IR_HREF: asm_href(as, ir, 0); break;
1765 case IR_HREFK: asm_hrefk(as, ir); break;
1766 case IR_NEWREF: asm_newref(as, ir); break;
1767 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1768 case IR_FREF: asm_fref(as, ir); break;
1769 case IR_STRREF: asm_strref(as, ir); break;
1770
1771 /* Loads and stores. */
1772 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1773 asm_ahuvload(as, ir);
1774 break;
1775 case IR_FLOAD: asm_fload(as, ir); break;
1776 case IR_XLOAD: asm_xload(as, ir); break;
1777 case IR_SLOAD: asm_sload(as, ir); break;
1778
1779 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1780 case IR_FSTORE: asm_fstore(as, ir); break;
1781 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1782
1783 /* Allocations. */
1784 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1785 case IR_TNEW: asm_tnew(as, ir); break;
1786 case IR_TDUP: asm_tdup(as, ir); break;
1787 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1788
1789 /* Buffer operations. */
1790 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1791 case IR_BUFPUT: asm_bufput(as, ir); break;
1792 case IR_BUFSTR: asm_bufstr(as, ir); break;
1793
1794 /* Write barriers. */
1795 case IR_TBAR: asm_tbar(as, ir); break;
1796 case IR_OBAR: asm_obar(as, ir); break;
1797
1798 /* Type conversions. */
1799 case IR_CONV: asm_conv(as, ir); break;
1800 case IR_TOBIT: asm_tobit(as, ir); break;
1801 case IR_TOSTR: asm_tostr(as, ir); break;
1802 case IR_STRTO: asm_strto(as, ir); break;
1803
1804 /* Calls. */
1805 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1806 case IR_CALLXS: asm_callx(as, ir); break;
1807 case IR_CARG: break;
1808
1809 default:
1810 setintV(&as->J->errinfo, ir->o);
1811 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1812 break;
1813 }
1814}
1815
1816/* -- Trace setup --------------------------------------------------------- */ 1723/* -- Trace setup --------------------------------------------------------- */
1817 1724
1818/* Ensure there are enough stack slots for call arguments. */ 1725/* Ensure there are enough stack slots for call arguments. */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 9c9c3ea4..d9174e7d 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -840,7 +840,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
840 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 840 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
841} 841}
842 842
843static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 843static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
844{ 844{
845 IRIns *irb; 845 IRIns *irb;
846 if (ir->r == RID_SINK) 846 if (ir->r == RID_SINK)
@@ -857,6 +857,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
857 } 857 }
858} 858}
859 859
860#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
861
860static void asm_ahuvload(ASMState *as, IRIns *ir) 862static void asm_ahuvload(ASMState *as, IRIns *ir)
861{ 863{
862 IRType1 t = ir->t; 864 IRType1 t = ir->t;
@@ -1120,6 +1122,16 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1120 emit_fb(as, pi, dest, left); 1122 emit_fb(as, pi, dest, left);
1121} 1123}
1122 1124
1125static void asm_fpmath(ASMState *as, IRIns *ir)
1126{
1127 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1128 return;
1129 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1130 asm_fpunary(as, ir, PPCI_FSQRT);
1131 else
1132 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1133}
1134
1123static void asm_add(ASMState *as, IRIns *ir) 1135static void asm_add(ASMState *as, IRIns *ir)
1124{ 1136{
1125 if (irt_isnum(ir->t)) { 1137 if (irt_isnum(ir->t)) {
@@ -1217,6 +1229,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
1217 } 1229 }
1218} 1230}
1219 1231
1232#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1233#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1234#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1235
1220static void asm_neg(ASMState *as, IRIns *ir) 1236static void asm_neg(ASMState *as, IRIns *ir)
1221{ 1237{
1222 if (irt_isnum(ir->t)) { 1238 if (irt_isnum(ir->t)) {
@@ -1235,6 +1251,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1235 } 1251 }
1236} 1252}
1237 1253
1254#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1255#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1256#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1257
1238static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1258static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1239{ 1259{
1240 Reg dest, left, right; 1260 Reg dest, left, right;
@@ -1250,6 +1270,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1250 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1270 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1251} 1271}
1252 1272
1273#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1274#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1275#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1276
1253#if LJ_HASFFI 1277#if LJ_HASFFI
1254static void asm_add64(ASMState *as, IRIns *ir) 1278static void asm_add64(ASMState *as, IRIns *ir)
1255{ 1279{
@@ -1329,7 +1353,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1329} 1353}
1330#endif 1354#endif
1331 1355
1332static void asm_bitnot(ASMState *as, IRIns *ir) 1356static void asm_bnot(ASMState *as, IRIns *ir)
1333{ 1357{
1334 Reg dest, left, right; 1358 Reg dest, left, right;
1335 PPCIns pi = PPCI_NOR; 1359 PPCIns pi = PPCI_NOR;
@@ -1356,7 +1380,7 @@ nofuse:
1356 emit_asb(as, pi, dest, left, right); 1380 emit_asb(as, pi, dest, left, right);
1357} 1381}
1358 1382
1359static void asm_bitswap(ASMState *as, IRIns *ir) 1383static void asm_bswap(ASMState *as, IRIns *ir)
1360{ 1384{
1361 Reg dest = ra_dest(as, ir, RSET_GPR); 1385 Reg dest = ra_dest(as, ir, RSET_GPR);
1362 IRIns *irx; 1386 IRIns *irx;
@@ -1377,32 +1401,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1377 } 1401 }
1378} 1402}
1379 1403
1380static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1381{
1382 Reg dest = ra_dest(as, ir, RSET_GPR);
1383 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1384 if (irref_isk(ir->op2)) {
1385 int32_t k = IR(ir->op2)->i;
1386 Reg tmp = left;
1387 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1388 if (!checku16(k)) {
1389 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1390 if ((k & 0xffff) == 0) return;
1391 }
1392 emit_asi(as, pik, dest, left, k);
1393 return;
1394 }
1395 }
1396 /* May fail due to spills/restores above, but simplifies the logic. */
1397 if (as->flagmcp == as->mcp) {
1398 as->flagmcp = NULL;
1399 as->mcp++;
1400 pi |= PPCF_DOT;
1401 }
1402 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1403 emit_asb(as, pi, dest, left, right);
1404}
1405
1406/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1404/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1407static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1405static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1408{ 1406{
@@ -1433,7 +1431,7 @@ nofuse:
1433 *--as->mcp = pi | PPCF_T(left); 1431 *--as->mcp = pi | PPCF_T(left);
1434} 1432}
1435 1433
1436static void asm_bitand(ASMState *as, IRIns *ir) 1434static void asm_band(ASMState *as, IRIns *ir)
1437{ 1435{
1438 Reg dest, left, right; 1436 Reg dest, left, right;
1439 IRRef lref = ir->op1; 1437 IRRef lref = ir->op1;
@@ -1488,6 +1486,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1488 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1486 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1489} 1487}
1490 1488
1489static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1490{
1491 Reg dest = ra_dest(as, ir, RSET_GPR);
1492 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1493 if (irref_isk(ir->op2)) {
1494 int32_t k = IR(ir->op2)->i;
1495 Reg tmp = left;
1496 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1497 if (!checku16(k)) {
1498 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1499 if ((k & 0xffff) == 0) return;
1500 }
1501 emit_asi(as, pik, dest, left, k);
1502 return;
1503 }
1504 }
1505 /* May fail due to spills/restores above, but simplifies the logic. */
1506 if (as->flagmcp == as->mcp) {
1507 as->flagmcp = NULL;
1508 as->mcp++;
1509 pi |= PPCF_DOT;
1510 }
1511 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1512 emit_asb(as, pi, dest, left, right);
1513}
1514
1515#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1516#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1517
1491static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1518static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1492{ 1519{
1493 Reg dest, left; 1520 Reg dest, left;
@@ -1513,6 +1540,14 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1513 } 1540 }
1514} 1541}
1515 1542
1543#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1544#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1545#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1546#define asm_brol(as, ir) \
1547 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1548 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1549#define asm_bror(as, ir) lua_assert(0)
1550
1516static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1551static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1517{ 1552{
1518 if (irt_isnum(ir->t)) { 1553 if (irt_isnum(ir->t)) {
@@ -1543,6 +1578,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1543 } 1578 }
1544} 1579}
1545 1580
1581#define asm_min(as, ir) asm_min_max(as, ir, 0)
1582#define asm_max(as, ir) asm_min_max(as, ir, 1)
1583
1546/* -- Comparisons --------------------------------------------------------- */ 1584/* -- Comparisons --------------------------------------------------------- */
1547 1585
1548#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1586#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1619,6 +1657,8 @@ static void asm_comp(ASMState *as, IRIns *ir)
1619 } 1657 }
1620} 1658}
1621 1659
1660#define asm_equal(as, ir) asm_comp(as, ir)
1661
1622#if LJ_HASFFI 1662#if LJ_HASFFI
1623/* 64 bit integer comparisons. */ 1663/* 64 bit integer comparisons. */
1624static void asm_comp64(ASMState *as, IRIns *ir) 1664static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1664,8 +1704,8 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1664 } else if ((ir-1)->o == IR_XSTORE) { 1704 } else if ((ir-1)->o == IR_XSTORE) {
1665 as->curins--; /* Handle both stores here. */ 1705 as->curins--; /* Handle both stores here. */
1666 if ((ir-1)->r != RID_SINK) { 1706 if ((ir-1)->r != RID_SINK) {
1667 asm_xstore(as, ir, 0); 1707 asm_xstore_(as, ir, 0);
1668 asm_xstore(as, ir-1, 4); 1708 asm_xstore_(as, ir-1, 4);
1669 } 1709 }
1670 return; 1710 return;
1671 } 1711 }
@@ -1871,134 +1911,6 @@ static void asm_tail_prep(ASMState *as)
1871 } 1911 }
1872} 1912}
1873 1913
1874/* -- Instruction dispatch ------------------------------------------------ */
1875
1876/* Assemble a single instruction. */
1877static void asm_ir(ASMState *as, IRIns *ir)
1878{
1879 switch ((IROp)ir->o) {
1880 /* Miscellaneous ops. */
1881 case IR_LOOP: asm_loop(as); break;
1882 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1883 case IR_USE:
1884 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1885 case IR_PHI: asm_phi(as, ir); break;
1886 case IR_HIOP: asm_hiop(as, ir); break;
1887 case IR_GCSTEP: asm_gcstep(as, ir); break;
1888
1889 /* Guarded assertions. */
1890 case IR_EQ: case IR_NE:
1891 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1892 as->curins--;
1893 asm_href(as, ir-1, (IROp)ir->o);
1894 break;
1895 }
1896 /* fallthrough */
1897 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1898 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1899 case IR_ABC:
1900 asm_comp(as, ir);
1901 break;
1902
1903 case IR_RETF: asm_retf(as, ir); break;
1904
1905 /* Bit ops. */
1906 case IR_BNOT: asm_bitnot(as, ir); break;
1907 case IR_BSWAP: asm_bitswap(as, ir); break;
1908
1909 case IR_BAND: asm_bitand(as, ir); break;
1910 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
1911 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
1912
1913 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
1914 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
1915 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
1916 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
1917 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
1918 case IR_BROR: lua_assert(0); break;
1919
1920 /* Arithmetic ops. */
1921 case IR_ADD: asm_add(as, ir); break;
1922 case IR_SUB: asm_sub(as, ir); break;
1923 case IR_MUL: asm_mul(as, ir); break;
1924 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
1925 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1926 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1927 case IR_NEG: asm_neg(as, ir); break;
1928
1929 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
1930 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1931 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1932 case IR_MIN: asm_min_max(as, ir, 0); break;
1933 case IR_MAX: asm_min_max(as, ir, 1); break;
1934 case IR_FPMATH:
1935 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1936 break;
1937 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1938 asm_fpunary(as, ir, PPCI_FSQRT);
1939 else
1940 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1941 break;
1942
1943 /* Overflow-checking arithmetic ops. */
1944 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
1945 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
1946 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
1947
1948 /* Memory references. */
1949 case IR_AREF: asm_aref(as, ir); break;
1950 case IR_HREF: asm_href(as, ir, 0); break;
1951 case IR_HREFK: asm_hrefk(as, ir); break;
1952 case IR_NEWREF: asm_newref(as, ir); break;
1953 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1954 case IR_FREF: asm_fref(as, ir); break;
1955 case IR_STRREF: asm_strref(as, ir); break;
1956
1957 /* Loads and stores. */
1958 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1959 asm_ahuvload(as, ir);
1960 break;
1961 case IR_FLOAD: asm_fload(as, ir); break;
1962 case IR_XLOAD: asm_xload(as, ir); break;
1963 case IR_SLOAD: asm_sload(as, ir); break;
1964
1965 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1966 case IR_FSTORE: asm_fstore(as, ir); break;
1967 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1968
1969 /* Allocations. */
1970 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1971 case IR_TNEW: asm_tnew(as, ir); break;
1972 case IR_TDUP: asm_tdup(as, ir); break;
1973 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1974
1975 /* Buffer operations. */
1976 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1977 case IR_BUFPUT: asm_bufput(as, ir); break;
1978 case IR_BUFSTR: asm_bufstr(as, ir); break;
1979
1980 /* Write barriers. */
1981 case IR_TBAR: asm_tbar(as, ir); break;
1982 case IR_OBAR: asm_obar(as, ir); break;
1983
1984 /* Type conversions. */
1985 case IR_CONV: asm_conv(as, ir); break;
1986 case IR_TOBIT: asm_tobit(as, ir); break;
1987 case IR_TOSTR: asm_tostr(as, ir); break;
1988 case IR_STRTO: asm_strto(as, ir); break;
1989
1990 /* Calls. */
1991 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1992 case IR_CALLXS: asm_callx(as, ir); break;
1993 case IR_CARG: break;
1994
1995 default:
1996 setintV(&as->J->errinfo, ir->o);
1997 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1998 break;
1999 }
2000}
2001
2002/* -- Trace setup --------------------------------------------------------- */ 1914/* -- Trace setup --------------------------------------------------------- */
2003 1915
2004/* Ensure there are enough stack slots for call arguments. */ 1916/* Ensure there are enough stack slots for call arguments. */
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 45fc7e85..2ab1dbf5 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1218,6 +1218,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1218 emit_mrm(as, xo, dest, RID_MRM); 1218 emit_mrm(as, xo, dest, RID_MRM);
1219} 1219}
1220 1220
1221#define asm_fload(as, ir) asm_fxload(as, ir)
1222#define asm_xload(as, ir) asm_fxload(as, ir)
1223
1221static void asm_fxstore(ASMState *as, IRIns *ir) 1224static void asm_fxstore(ASMState *as, IRIns *ir)
1222{ 1225{
1223 RegSet allow = RSET_GPR; 1226 RegSet allow = RSET_GPR;
@@ -1281,6 +1284,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1281 } 1284 }
1282} 1285}
1283 1286
1287#define asm_fstore(as, ir) asm_fxstore(as, ir)
1288#define asm_xstore(as, ir) asm_fxstore(as, ir)
1289
1284#if LJ_64 1290#if LJ_64
1285static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1291static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1286{ 1292{
@@ -1666,6 +1672,9 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1666 } 1672 }
1667} 1673}
1668 1674
1675#define asm_atan2(as, ir) asm_fpmath(as, ir)
1676#define asm_ldexp(as, ir) asm_fpmath(as, ir)
1677
1669static void asm_fppowi(ASMState *as, IRIns *ir) 1678static void asm_fppowi(ASMState *as, IRIns *ir)
1670{ 1679{
1671 /* The modified regs must match with the *.dasc implementation. */ 1680 /* The modified regs must match with the *.dasc implementation. */
@@ -1679,6 +1688,17 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1679 ra_left(as, RID_EAX, ir->op2); 1688 ra_left(as, RID_EAX, ir->op2);
1680} 1689}
1681 1690
1691static void asm_pow(ASMState *as, IRIns *ir)
1692{
1693#if LJ_64 && LJ_HASFFI
1694 if (!irt_isnum(ir->t))
1695 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1696 IRCALL_lj_carith_powu64);
1697 else
1698#endif
1699 asm_fppowi(as, ir);
1700}
1701
1682static int asm_swapops(ASMState *as, IRIns *ir) 1702static int asm_swapops(ASMState *as, IRIns *ir)
1683{ 1703{
1684 IRIns *irl = IR(ir->op1); 1704 IRIns *irl = IR(ir->op1);
@@ -1855,6 +1875,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1855 asm_intarith(as, ir, XOg_ADD); 1875 asm_intarith(as, ir, XOg_ADD);
1856} 1876}
1857 1877
1878static void asm_sub(ASMState *as, IRIns *ir)
1879{
1880 if (irt_isnum(ir->t))
1881 asm_fparith(as, ir, XO_SUBSD);
1882 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1883 asm_intarith(as, ir, XOg_SUB);
1884}
1885
1886static void asm_mul(ASMState *as, IRIns *ir)
1887{
1888 if (irt_isnum(ir->t))
1889 asm_fparith(as, ir, XO_MULSD);
1890 else
1891 asm_intarith(as, ir, XOg_X_IMUL);
1892}
1893
1894static void asm_div(ASMState *as, IRIns *ir)
1895{
1896#if LJ_64 && LJ_HASFFI
1897 if (!irt_isnum(ir->t))
1898 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1899 IRCALL_lj_carith_divu64);
1900 else
1901#endif
1902 asm_fparith(as, ir, XO_DIVSD);
1903}
1904
1905static void asm_mod(ASMState *as, IRIns *ir)
1906{
1907#if LJ_64 && LJ_HASFFI
1908 if (!irt_isint(ir->t))
1909 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1910 IRCALL_lj_carith_modu64);
1911 else
1912#endif
1913 asm_callid(as, ir, IRCALL_lj_vm_modi);
1914}
1915
1858static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 1916static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1859{ 1917{
1860 Reg dest = ra_dest(as, ir, RSET_GPR); 1918 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1862,7 +1920,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1862 ra_left(as, dest, ir->op1); 1920 ra_left(as, dest, ir->op1);
1863} 1921}
1864 1922
1865static void asm_min_max(ASMState *as, IRIns *ir, int cc) 1923static void asm_neg(ASMState *as, IRIns *ir)
1924{
1925 if (irt_isnum(ir->t))
1926 asm_fparith(as, ir, XO_XORPS);
1927 else
1928 asm_neg_not(as, ir, XOg_NEG);
1929}
1930
1931#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1932
1933static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1866{ 1934{
1867 Reg right, dest = ra_dest(as, ir, RSET_GPR); 1935 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1868 IRRef lref = ir->op1, rref = ir->op2; 1936 IRRef lref = ir->op1, rref = ir->op2;
@@ -1873,7 +1941,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1873 ra_left(as, dest, lref); 1941 ra_left(as, dest, lref);
1874} 1942}
1875 1943
1876static void asm_bitswap(ASMState *as, IRIns *ir) 1944static void asm_min(ASMState *as, IRIns *ir)
1945{
1946 if (irt_isnum(ir->t))
1947 asm_fparith(as, ir, XO_MINSD);
1948 else
1949 asm_intmin_max(as, ir, CC_G);
1950}
1951
1952static void asm_max(ASMState *as, IRIns *ir)
1953{
1954 if (irt_isnum(ir->t))
1955 asm_fparith(as, ir, XO_MAXSD);
1956 else
1957 asm_intmin_max(as, ir, CC_L);
1958}
1959
1960/* Note: don't use LEA for overflow-checking arithmetic! */
1961#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1962#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1963#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1964
1965#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1966
1967static void asm_bswap(ASMState *as, IRIns *ir)
1877{ 1968{
1878 Reg dest = ra_dest(as, ir, RSET_GPR); 1969 Reg dest = ra_dest(as, ir, RSET_GPR);
1879 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 1970 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1881,6 +1972,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1881 ra_left(as, dest, ir->op1); 1972 ra_left(as, dest, ir->op1);
1882} 1973}
1883 1974
1975#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1976#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1977#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1978
1884static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1979static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1885{ 1980{
1886 IRRef rref = ir->op2; 1981 IRRef rref = ir->op2;
@@ -1920,6 +2015,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1920 */ 2015 */
1921} 2016}
1922 2017
2018#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
2019#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2020#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2021#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2022#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2023
1923/* -- Comparisons --------------------------------------------------------- */ 2024/* -- Comparisons --------------------------------------------------------- */
1924 2025
1925/* Virtual flags for unordered FP comparisons. */ 2026/* Virtual flags for unordered FP comparisons. */
@@ -1946,8 +2047,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
1946}; 2047};
1947 2048
1948/* FP and integer comparisons. */ 2049/* FP and integer comparisons. */
1949static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2050static void asm_comp(ASMState *as, IRIns *ir)
1950{ 2051{
2052 uint32_t cc = asm_compmap[ir->o];
1951 if (irt_isnum(ir->t)) { 2053 if (irt_isnum(ir->t)) {
1952 IRRef lref = ir->op1; 2054 IRRef lref = ir->op1;
1953 IRRef rref = ir->op2; 2055 IRRef rref = ir->op2;
@@ -2102,6 +2204,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2102 } 2204 }
2103} 2205}
2104 2206
2207#define asm_equal(as, ir) asm_comp(as, ir)
2208
2105#if LJ_32 && LJ_HASFFI 2209#if LJ_32 && LJ_HASFFI
2106/* 64 bit integer comparisons in 32 bit mode. */ 2210/* 64 bit integer comparisons in 32 bit mode. */
2107static void asm_comp_int64(ASMState *as, IRIns *ir) 2211static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2484,175 +2588,6 @@ static void asm_tail_prep(ASMState *as)
2484 } 2588 }
2485} 2589}
2486 2590
2487/* -- Instruction dispatch ------------------------------------------------ */
2488
2489/* Assemble a single instruction. */
2490static void asm_ir(ASMState *as, IRIns *ir)
2491{
2492 switch ((IROp)ir->o) {
2493 /* Miscellaneous ops. */
2494 case IR_LOOP: asm_loop(as); break;
2495 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2496 case IR_USE:
2497 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2498 case IR_PHI: asm_phi(as, ir); break;
2499 case IR_HIOP: asm_hiop(as, ir); break;
2500 case IR_GCSTEP: asm_gcstep(as, ir); break;
2501
2502 /* Guarded assertions. */
2503 case IR_EQ: case IR_NE:
2504 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2505 as->curins--;
2506 asm_href(as, ir-1, (IROp)ir->o);
2507 break;
2508 }
2509 /* fallthrough */
2510 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2511 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2512 case IR_ABC:
2513 asm_comp(as, ir, asm_compmap[ir->o]);
2514 break;
2515
2516 case IR_RETF: asm_retf(as, ir); break;
2517
2518 /* Bit ops. */
2519 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2520 case IR_BSWAP: asm_bitswap(as, ir); break;
2521
2522 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2523 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2524 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2525
2526 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2527 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2528 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2529 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2530 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2531
2532 /* Arithmetic ops. */
2533 case IR_ADD: asm_add(as, ir); break;
2534 case IR_SUB:
2535 if (irt_isnum(ir->t))
2536 asm_fparith(as, ir, XO_SUBSD);
2537 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2538 asm_intarith(as, ir, XOg_SUB);
2539 break;
2540 case IR_MUL:
2541 if (irt_isnum(ir->t))
2542 asm_fparith(as, ir, XO_MULSD);
2543 else
2544 asm_intarith(as, ir, XOg_X_IMUL);
2545 break;
2546 case IR_DIV:
2547#if LJ_64 && LJ_HASFFI
2548 if (!irt_isnum(ir->t))
2549 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2550 IRCALL_lj_carith_divu64);
2551 else
2552#endif
2553 asm_fparith(as, ir, XO_DIVSD);
2554 break;
2555 case IR_MOD:
2556#if LJ_64 && LJ_HASFFI
2557 if (!irt_isint(ir->t))
2558 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2559 IRCALL_lj_carith_modu64);
2560 else
2561#endif
2562 asm_callid(as, ir, IRCALL_lj_vm_modi);
2563 break;
2564
2565 case IR_NEG:
2566 if (irt_isnum(ir->t))
2567 asm_fparith(as, ir, XO_XORPS);
2568 else
2569 asm_neg_not(as, ir, XOg_NEG);
2570 break;
2571 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2572
2573 case IR_MIN:
2574 if (irt_isnum(ir->t))
2575 asm_fparith(as, ir, XO_MINSD);
2576 else
2577 asm_min_max(as, ir, CC_G);
2578 break;
2579 case IR_MAX:
2580 if (irt_isnum(ir->t))
2581 asm_fparith(as, ir, XO_MAXSD);
2582 else
2583 asm_min_max(as, ir, CC_L);
2584 break;
2585
2586 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2587 asm_fpmath(as, ir);
2588 break;
2589 case IR_POW:
2590#if LJ_64 && LJ_HASFFI
2591 if (!irt_isnum(ir->t))
2592 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2593 IRCALL_lj_carith_powu64);
2594 else
2595#endif
2596 asm_fppowi(as, ir);
2597 break;
2598
2599 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2600 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2601 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2602 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2603
2604 /* Memory references. */
2605 case IR_AREF: asm_aref(as, ir); break;
2606 case IR_HREF: asm_href(as, ir, 0); break;
2607 case IR_HREFK: asm_hrefk(as, ir); break;
2608 case IR_NEWREF: asm_newref(as, ir); break;
2609 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2610 case IR_FREF: asm_fref(as, ir); break;
2611 case IR_STRREF: asm_strref(as, ir); break;
2612
2613 /* Loads and stores. */
2614 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2615 asm_ahuvload(as, ir);
2616 break;
2617 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2618 case IR_SLOAD: asm_sload(as, ir); break;
2619
2620 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2621 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2622
2623 /* Allocations. */
2624 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2625 case IR_TNEW: asm_tnew(as, ir); break;
2626 case IR_TDUP: asm_tdup(as, ir); break;
2627 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2628
2629 /* Buffer operations. */
2630 case IR_BUFHDR: asm_bufhdr(as, ir); break;
2631 case IR_BUFPUT: asm_bufput(as, ir); break;
2632 case IR_BUFSTR: asm_bufstr(as, ir); break;
2633
2634 /* Write barriers. */
2635 case IR_TBAR: asm_tbar(as, ir); break;
2636 case IR_OBAR: asm_obar(as, ir); break;
2637
2638 /* Type conversions. */
2639 case IR_TOBIT: asm_tobit(as, ir); break;
2640 case IR_CONV: asm_conv(as, ir); break;
2641 case IR_TOSTR: asm_tostr(as, ir); break;
2642 case IR_STRTO: asm_strto(as, ir); break;
2643
2644 /* Calls. */
2645 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2646 case IR_CALLXS: asm_callx(as, ir); break;
2647 case IR_CARG: break;
2648
2649 default:
2650 setintV(&as->J->errinfo, ir->o);
2651 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2652 break;
2653 }
2654}
2655
2656/* -- Trace setup --------------------------------------------------------- */ 2591/* -- Trace setup --------------------------------------------------------- */
2657 2592
2658/* Ensure there are enough stack slots for call arguments. */ 2593/* Ensure there are enough stack slots for call arguments. */