aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_arm.h')
-rw-r--r--src/lj_asm_arm.h440
1 files changed, 148 insertions, 292 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 72f205d9..8339367b 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -529,6 +520,8 @@ static void asm_tobit(ASMState *as, IRIns *ir)
529 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 520 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
530 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); 521 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
531} 522}
523#else
524#define asm_tobit(as, ir) lua_assert(0)
532#endif 525#endif
533 526
534static void asm_conv(ASMState *as, IRIns *ir) 527static void asm_conv(ASMState *as, IRIns *ir)
@@ -601,31 +594,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 594 }
602} 595}
603 596
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 597static void asm_strto(ASMState *as, IRIns *ir)
630{ 598{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 599 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +657,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 657 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 658}
691 659
660/* -- Memory references --------------------------------------------------- */
661
692/* Get pointer to TValue. */ 662/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 663static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
694{ 664{
@@ -714,7 +684,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
714 Reg src = ra_alloc1(as, ref, allow); 684 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0); 685 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 } 686 }
717 if ((ir+1)->o == IR_HIOP) 687 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow); 688 type = ra_alloc1(as, ref+1, allow);
719 else 689 else
720 type = ra_allock(as, irt_toitype(ir->t), allow); 690 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +692,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
722 } 692 }
723} 693}
724 694
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 695static void asm_aref(ASMState *as, IRIns *ir)
747{ 696{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 697 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 909 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 910}
962 911
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 912static void asm_uref(ASMState *as, IRIns *ir)
978{ 913{
979 /* NYI: Check that UREFO is still open and not aliasing a slot. */ 914 /* NYI: Check that UREFO is still open and not aliasing a slot. */
@@ -1106,7 +1041,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1106 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1041 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1107} 1042}
1108 1043
1109static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1044static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1110{ 1045{
1111 if (ir->r != RID_SINK) { 1046 if (ir->r != RID_SINK) {
1112 Reg src = ra_alloc1(as, ir->op2, 1047 Reg src = ra_alloc1(as, ir->op2,
@@ -1116,6 +1051,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1116 } 1051 }
1117} 1052}
1118 1053
1054#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1055
1119static void asm_ahuvload(ASMState *as, IRIns *ir) 1056static void asm_ahuvload(ASMState *as, IRIns *ir)
1120{ 1057{
1121 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1058 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1273,19 +1210,16 @@ dotypecheck:
1273static void asm_cnew(ASMState *as, IRIns *ir) 1210static void asm_cnew(ASMState *as, IRIns *ir)
1274{ 1211{
1275 CTState *cts = ctype_ctsG(J2G(as->J)); 1212 CTState *cts = ctype_ctsG(J2G(as->J));
1276 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1213 CTypeID id = (CTypeID)IR(ir->op1)->i;
1277 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1214 CTSize sz;
1278 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1215 CTInfo info = lj_ctype_info(cts, id, &sz);
1279 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1216 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1280 IRRef args[2]; 1217 IRRef args[4];
1281 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1218 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1282 RegSet drop = RSET_SCRATCH; 1219 RegSet drop = RSET_SCRATCH;
1283 lua_assert(sz != CTSIZE_INVALID); 1220 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1284 1221
1285 args[0] = ASMREF_L; /* lua_State *L */
1286 args[1] = ASMREF_TMP1; /* MSize size */
1287 as->gcsteps++; 1222 as->gcsteps++;
1288
1289 if (ra_hasreg(ir->r)) 1223 if (ra_hasreg(ir->r))
1290 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1224 rset_clear(drop, ir->r); /* Dest reg handled below. */
1291 ra_evictset(as, drop); 1225 ra_evictset(as, drop);
@@ -1307,16 +1241,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1307 if (ofs == sizeof(GCcdata)) break; 1241 if (ofs == sizeof(GCcdata)) break;
1308 ofs -= 4; ir--; 1242 ofs -= 4; ir--;
1309 } 1243 }
1244 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1245 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1246 args[0] = ASMREF_L; /* lua_State *L */
1247 args[1] = ir->op1; /* CTypeID id */
1248 args[2] = ir->op2; /* CTSize sz */
1249 args[3] = ASMREF_TMP1; /* CTSize align */
1250 asm_gencall(as, ci, args);
1251 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1252 return;
1310 } 1253 }
1254
1311 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1255 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1312 { 1256 {
1313 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1257 uint32_t k = emit_isk12(ARMI_MOV, id);
1314 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1258 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1315 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1259 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1316 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1260 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1317 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1261 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1318 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1262 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1319 } 1263 }
1264 args[0] = ASMREF_L; /* lua_State *L */
1265 args[1] = ASMREF_TMP1; /* MSize size */
1320 asm_gencall(as, ci, args); 1266 asm_gencall(as, ci, args);
1321 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1267 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1322 ra_releasetmp(as, ASMREF_TMP1)); 1268 ra_releasetmp(as, ASMREF_TMP1));
@@ -1393,24 +1339,41 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1393 emit_dm(as, ai, (dest & 15), (left & 15)); 1339 emit_dm(as, ai, (dest & 15), (left & 15));
1394} 1340}
1395 1341
1396static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1342static void asm_callround(ASMState *as, IRIns *ir, int id)
1397{ 1343{
1398 IRIns *irp = IR(ir->op1); 1344 /* The modified regs must match with the *.dasc implementation. */
1399 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1345 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1400 IRIns *irpp = IR(irp->op1); 1346 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1401 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1347 RegSet of;
1402 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1348 Reg dest, src;
1403 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1349 ra_evictset(as, drop);
1404 IRRef args[2]; 1350 dest = ra_dest(as, ir, RSET_FPR);
1405 args[0] = irpp->op1; 1351 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1406 args[1] = irp->op2; 1352 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1407 asm_setupresult(as, ir, ci); 1353 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1408 asm_gencall(as, ci, args); 1354 (void *)lj_vm_trunc_sf);
1409 return 1; 1355 /* Workaround to protect argument GPRs from being used for remat. */
1410 } 1356 of = as->freeset;
1411 } 1357 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1412 return 0; 1358 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1359 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1360 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1361 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1362}
1363
1364static void asm_fpmath(ASMState *as, IRIns *ir)
1365{
1366 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1367 return;
1368 if (ir->op2 <= IRFPM_TRUNC)
1369 asm_callround(as, ir, ir->op2);
1370 else if (ir->op2 == IRFPM_SQRT)
1371 asm_fpunary(as, ir, ARMI_VSQRT_D);
1372 else
1373 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1413} 1374}
1375#else
1376#define asm_fpmath(as, ir) lua_assert(0)
1414#endif 1377#endif
1415 1378
1416static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) 1379static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
@@ -1460,32 +1423,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1460 asm_intop(as, ir, ai); 1423 asm_intop(as, ir, ai);
1461} 1424}
1462 1425
1463static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1464{
1465 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1466 uint32_t cc = (as->mcp[1] >> 28);
1467 as->flagmcp = NULL;
1468 if (cc <= CC_NE) {
1469 as->mcp++;
1470 ai |= ARMI_S;
1471 } else if (cc == CC_GE) {
1472 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1473 ai |= ARMI_S;
1474 } else if (cc == CC_LT) {
1475 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1476 ai |= ARMI_S;
1477 } /* else: other conds don't work with bit ops. */
1478 }
1479 if (ir->op2 == 0) {
1480 Reg dest = ra_dest(as, ir, RSET_GPR);
1481 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1482 emit_d(as, ai^m, dest);
1483 } else {
1484 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1485 asm_intop(as, ir, ai);
1486 }
1487}
1488
1489static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1426static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1490{ 1427{
1491 Reg dest = ra_dest(as, ir, RSET_GPR); 1428 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1551,6 +1488,26 @@ static void asm_mul(ASMState *as, IRIns *ir)
1551 asm_intmul(as, ir); 1488 asm_intmul(as, ir);
1552} 1489}
1553 1490
1491#define asm_addov(as, ir) asm_add(as, ir)
1492#define asm_subov(as, ir) asm_sub(as, ir)
1493#define asm_mulov(as, ir) asm_mul(as, ir)
1494
1495#if LJ_SOFTFP
1496#define asm_div(as, ir) lua_assert(0)
1497#define asm_pow(as, ir) lua_assert(0)
1498#define asm_abs(as, ir) lua_assert(0)
1499#define asm_atan2(as, ir) lua_assert(0)
1500#define asm_ldexp(as, ir) lua_assert(0)
1501#else
1502#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1503#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1504#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1505#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1506#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1507#endif
1508
1509#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1510
1554static void asm_neg(ASMState *as, IRIns *ir) 1511static void asm_neg(ASMState *as, IRIns *ir)
1555{ 1512{
1556#if !LJ_SOFTFP 1513#if !LJ_SOFTFP
@@ -1562,41 +1519,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
1562 asm_intneg(as, ir, ARMI_RSB); 1519 asm_intneg(as, ir, ARMI_RSB);
1563} 1520}
1564 1521
1565static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1522static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1566{ 1523{
1567 const CCallInfo *ci = &lj_ir_callinfo[id]; 1524 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1568 IRRef args[2]; 1525 uint32_t cc = (as->mcp[1] >> 28);
1569 args[0] = ir->op1; 1526 as->flagmcp = NULL;
1570 args[1] = ir->op2; 1527 if (cc <= CC_NE) {
1571 asm_setupresult(as, ir, ci); 1528 as->mcp++;
1572 asm_gencall(as, ci, args); 1529 ai |= ARMI_S;
1530 } else if (cc == CC_GE) {
1531 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1532 ai |= ARMI_S;
1533 } else if (cc == CC_LT) {
1534 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1535 ai |= ARMI_S;
1536 } /* else: other conds don't work with bit ops. */
1537 }
1538 if (ir->op2 == 0) {
1539 Reg dest = ra_dest(as, ir, RSET_GPR);
1540 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1541 emit_d(as, ai^m, dest);
1542 } else {
1543 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1544 asm_intop(as, ir, ai);
1545 }
1573} 1546}
1574 1547
1575#if !LJ_SOFTFP 1548#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1576static void asm_callround(ASMState *as, IRIns *ir, int id)
1577{
1578 /* The modified regs must match with the *.dasc implementation. */
1579 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1580 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1581 RegSet of;
1582 Reg dest, src;
1583 ra_evictset(as, drop);
1584 dest = ra_dest(as, ir, RSET_FPR);
1585 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1586 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1587 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1588 (void *)lj_vm_trunc_sf);
1589 /* Workaround to protect argument GPRs from being used for remat. */
1590 of = as->freeset;
1591 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1592 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1593 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1594 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1595 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1596}
1597#endif
1598 1549
1599static void asm_bitswap(ASMState *as, IRIns *ir) 1550static void asm_bswap(ASMState *as, IRIns *ir)
1600{ 1551{
1601 Reg dest = ra_dest(as, ir, RSET_GPR); 1552 Reg dest = ra_dest(as, ir, RSET_GPR);
1602 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1553 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1613,6 +1564,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1613 } 1564 }
1614} 1565}
1615 1566
1567#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1568#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1569#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1570
1616static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1571static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1617{ 1572{
1618 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1573 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1630,6 +1585,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1630 } 1585 }
1631} 1586}
1632 1587
1588#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1589#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1590#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1591#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1592#define asm_brol(as, ir) lua_assert(0)
1593
1633static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1594static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1634{ 1595{
1635 uint32_t kcmp = 0, kmov = 0; 1596 uint32_t kcmp = 0, kmov = 0;
@@ -1703,6 +1664,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1703 asm_intmin_max(as, ir, cc); 1664 asm_intmin_max(as, ir, cc);
1704} 1665}
1705 1666
1667#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1668#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1669
1706/* -- Comparisons --------------------------------------------------------- */ 1670/* -- Comparisons --------------------------------------------------------- */
1707 1671
1708/* Map of comparisons to flags. ORDER IR. */ 1672/* Map of comparisons to flags. ORDER IR. */
@@ -1818,6 +1782,18 @@ notst:
1818 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1782 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1819} 1783}
1820 1784
1785static void asm_comp(ASMState *as, IRIns *ir)
1786{
1787#if !LJ_SOFTFP
1788 if (irt_isnum(ir->t))
1789 asm_fpcomp(as, ir);
1790 else
1791#endif
1792 asm_intcomp(as, ir);
1793}
1794
1795#define asm_equal(as, ir) asm_comp(as, ir)
1796
1821#if LJ_HASFFI 1797#if LJ_HASFFI
1822/* 64 bit integer comparisons. */ 1798/* 64 bit integer comparisons. */
1823static void asm_int64comp(ASMState *as, IRIns *ir) 1799static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1892,7 +1868,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1892#endif 1868#endif
1893 } else if ((ir-1)->o == IR_XSTORE) { 1869 } else if ((ir-1)->o == IR_XSTORE) {
1894 if ((ir-1)->r != RID_SINK) 1870 if ((ir-1)->r != RID_SINK)
1895 asm_xstore(as, ir, 4); 1871 asm_xstore_(as, ir, 4);
1896 return; 1872 return;
1897 } 1873 }
1898 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1874 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1940,6 +1916,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1940#endif 1916#endif
1941} 1917}
1942 1918
1919/* -- Profiling ----------------------------------------------------------- */
1920
1921static void asm_prof(ASMState *as, IRIns *ir)
1922{
1923 UNUSED(ir);
1924 asm_guardcc(as, CC_NE);
1925 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1926 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1927}
1928
1943/* -- Stack handling ------------------------------------------------------ */ 1929/* -- Stack handling ------------------------------------------------------ */
1944 1930
1945/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1931/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1969,7 +1955,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1969 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1955 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1970 (int32_t)offsetof(lua_State, maxstack)); 1956 (int32_t)offsetof(lua_State, maxstack));
1971 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1957 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1972 int32_t i = i32ptr(&J2G(as->J)->jit_L); 1958 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1973 if (ra_hasspill(irp->s)) 1959 if (ra_hasspill(irp->s))
1974 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1960 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1975 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1961 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1977,7 +1963,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1977 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1963 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1978 emit_loadi(as, RID_TMP, (i & ~4095)); 1964 emit_loadi(as, RID_TMP, (i & ~4095));
1979 } else { 1965 } else {
1980 emit_getgl(as, RID_TMP, jit_L); 1966 emit_getgl(as, RID_TMP, cur_L);
1981 } 1967 }
1982} 1968}
1983 1969
@@ -2086,13 +2072,13 @@ static void asm_loop_fixup(ASMState *as)
2086 2072
2087/* -- Head of trace ------------------------------------------------------- */ 2073/* -- Head of trace ------------------------------------------------------- */
2088 2074
2089/* Reload L register from g->jit_L. */ 2075/* Reload L register from g->cur_L. */
2090static void asm_head_lreg(ASMState *as) 2076static void asm_head_lreg(ASMState *as)
2091{ 2077{
2092 IRIns *ir = IR(ASMREF_L); 2078 IRIns *ir = IR(ASMREF_L);
2093 if (ra_used(ir)) { 2079 if (ra_used(ir)) {
2094 Reg r = ra_dest(as, ir, RSET_GPR); 2080 Reg r = ra_dest(as, ir, RSET_GPR);
2095 emit_getgl(as, r, jit_L); 2081 emit_getgl(as, r, cur_L);
2096 ra_evictk(as); 2082 ra_evictk(as);
2097 } 2083 }
2098} 2084}
@@ -2163,143 +2149,13 @@ static void asm_tail_prep(ASMState *as)
2163 *p = 0; /* Prevent load/store merging. */ 2149 *p = 0; /* Prevent load/store merging. */
2164} 2150}
2165 2151
2166/* -- Instruction dispatch ------------------------------------------------ */
2167
2168/* Assemble a single instruction. */
2169static void asm_ir(ASMState *as, IRIns *ir)
2170{
2171 switch ((IROp)ir->o) {
2172 /* Miscellaneous ops. */
2173 case IR_LOOP: asm_loop(as); break;
2174 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2175 case IR_USE:
2176 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2177 case IR_PHI: asm_phi(as, ir); break;
2178 case IR_HIOP: asm_hiop(as, ir); break;
2179 case IR_GCSTEP: asm_gcstep(as, ir); break;
2180
2181 /* Guarded assertions. */
2182 case IR_EQ: case IR_NE:
2183 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2184 as->curins--;
2185 asm_href(as, ir-1, (IROp)ir->o);
2186 break;
2187 }
2188 /* fallthrough */
2189 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2190 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2191 case IR_ABC:
2192#if !LJ_SOFTFP
2193 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2194#endif
2195 asm_intcomp(as, ir);
2196 break;
2197
2198 case IR_RETF: asm_retf(as, ir); break;
2199
2200 /* Bit ops. */
2201 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2202 case IR_BSWAP: asm_bitswap(as, ir); break;
2203
2204 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2205 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2206 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2207
2208 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2209 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2210 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2211 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2212 case IR_BROL: lua_assert(0); break;
2213
2214 /* Arithmetic ops. */
2215 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2216 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2217 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2218 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2219 case IR_NEG: asm_neg(as, ir); break;
2220
2221#if LJ_SOFTFP
2222 case IR_DIV: case IR_POW: case IR_ABS:
2223 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2224 lua_assert(0); /* Unused for LJ_SOFTFP. */
2225 break;
2226#else
2227 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2228 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2229 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2230 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2231 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2232 case IR_FPMATH:
2233 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2234 break;
2235 if (ir->op2 <= IRFPM_TRUNC)
2236 asm_callround(as, ir, ir->op2);
2237 else if (ir->op2 == IRFPM_SQRT)
2238 asm_fpunary(as, ir, ARMI_VSQRT_D);
2239 else
2240 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2241 break;
2242 case IR_TOBIT: asm_tobit(as, ir); break;
2243#endif
2244
2245 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2246 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2247
2248 /* Memory references. */
2249 case IR_AREF: asm_aref(as, ir); break;
2250 case IR_HREF: asm_href(as, ir, 0); break;
2251 case IR_HREFK: asm_hrefk(as, ir); break;
2252 case IR_NEWREF: asm_newref(as, ir); break;
2253 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2254 case IR_FREF: asm_fref(as, ir); break;
2255 case IR_STRREF: asm_strref(as, ir); break;
2256
2257 /* Loads and stores. */
2258 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2259 asm_ahuvload(as, ir);
2260 break;
2261 case IR_FLOAD: asm_fload(as, ir); break;
2262 case IR_XLOAD: asm_xload(as, ir); break;
2263 case IR_SLOAD: asm_sload(as, ir); break;
2264
2265 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2266 case IR_FSTORE: asm_fstore(as, ir); break;
2267 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2268
2269 /* Allocations. */
2270 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2271 case IR_TNEW: asm_tnew(as, ir); break;
2272 case IR_TDUP: asm_tdup(as, ir); break;
2273 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2274
2275 /* Write barriers. */
2276 case IR_TBAR: asm_tbar(as, ir); break;
2277 case IR_OBAR: asm_obar(as, ir); break;
2278
2279 /* Type conversions. */
2280 case IR_CONV: asm_conv(as, ir); break;
2281 case IR_TOSTR: asm_tostr(as, ir); break;
2282 case IR_STRTO: asm_strto(as, ir); break;
2283
2284 /* Calls. */
2285 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2286 case IR_CALLXS: asm_callx(as, ir); break;
2287 case IR_CARG: break;
2288
2289 default:
2290 setintV(&as->J->errinfo, ir->o);
2291 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2292 break;
2293 }
2294}
2295
2296/* -- Trace setup --------------------------------------------------------- */ 2152/* -- Trace setup --------------------------------------------------------- */
2297 2153
2298/* Ensure there are enough stack slots for call arguments. */ 2154/* Ensure there are enough stack slots for call arguments. */
2299static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2155static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2300{ 2156{
2301 IRRef args[CCI_NARGS_MAX*2]; 2157 IRRef args[CCI_NARGS_MAX*2];
2302 uint32_t i, nargs = (int)CCI_NARGS(ci); 2158 uint32_t i, nargs = CCI_XNARGS(ci);
2303 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2159 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2304 asm_collectargs(as, ir, ci, args); 2160 asm_collectargs(as, ir, ci, args);
2305 for (i = 0; i < nargs; i++) { 2161 for (i = 0; i < nargs; i++) {