aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_arm.h')
-rw-r--r--src/lj_asm_arm.h438
1 files changed, 144 insertions, 294 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 087530b2..9d055c81 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 481{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 482 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 483 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 484 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 485 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 486 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 487 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -601,31 +592,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 592 }
602} 593}
603 594
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 595static void asm_strto(ASMState *as, IRIns *ir)
630{ 596{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 597 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +655,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 655 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 656}
691 657
658/* -- Memory references --------------------------------------------------- */
659
692/* Get pointer to TValue. */ 660/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 661static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
694{ 662{
@@ -714,7 +682,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
714 Reg src = ra_alloc1(as, ref, allow); 682 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0); 683 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 } 684 }
717 if ((ir+1)->o == IR_HIOP) 685 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow); 686 type = ra_alloc1(as, ref+1, allow);
719 else 687 else
720 type = ra_allock(as, irt_toitype(ir->t), allow); 688 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +690,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
722 } 690 }
723} 691}
724 692
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 693static void asm_aref(ASMState *as, IRIns *ir)
747{ 694{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 695 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +907,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 907 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 908}
962 909
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 910static void asm_uref(ASMState *as, IRIns *ir)
978{ 911{
979 Reg dest = ra_dest(as, ir, RSET_GPR); 912 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1064,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
1064 997
1065static void asm_fload(ASMState *as, IRIns *ir) 998static void asm_fload(ASMState *as, IRIns *ir)
1066{ 999{
1067 Reg dest = ra_dest(as, ir, RSET_GPR); 1000 if (ir->op1 == REF_NIL) {
1068 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1001 lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
1069 ARMIns ai = asm_fxloadins(ir); 1002 } else {
1070 int32_t ofs; 1003 Reg dest = ra_dest(as, ir, RSET_GPR);
1071 if (ir->op2 == IRFL_TAB_ARRAY) { 1004 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
1072 ofs = asm_fuseabase(as, ir->op1); 1005 ARMIns ai = asm_fxloadins(ir);
1073 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1006 int32_t ofs;
1074 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1007 if (ir->op2 == IRFL_TAB_ARRAY) {
1075 return; 1008 ofs = asm_fuseabase(as, ir->op1);
1009 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1010 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
1011 return;
1012 }
1076 } 1013 }
1014 ofs = field_ofs[ir->op2];
1015 if ((ai & 0x04000000))
1016 emit_lso(as, ai, dest, idx, ofs);
1017 else
1018 emit_lsox(as, ai, dest, idx, ofs);
1077 } 1019 }
1078 ofs = field_ofs[ir->op2];
1079 if ((ai & 0x04000000))
1080 emit_lso(as, ai, dest, idx, ofs);
1081 else
1082 emit_lsox(as, ai, dest, idx, ofs);
1083} 1020}
1084 1021
1085static void asm_fstore(ASMState *as, IRIns *ir) 1022static void asm_fstore(ASMState *as, IRIns *ir)
@@ -1105,7 +1042,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1042 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1106} 1043}
1107 1044
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1045static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1046{
1110 if (ir->r != RID_SINK) { 1047 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1048 Reg src = ra_alloc1(as, ir->op2,
@@ -1115,6 +1052,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1115 } 1052 }
1116} 1053}
1117 1054
1055#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1056
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1057static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1058{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1059 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1272,19 +1211,16 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1211static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1212{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1213 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1214 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1215 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1216 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1217 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1218 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1219 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1220 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1221 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1283 1222
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1223 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1224 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1225 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1226 ra_evictset(as, drop);
@@ -1306,16 +1242,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1242 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1243 ofs -= 4; ir--;
1308 } 1244 }
1245 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1246 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1247 args[0] = ASMREF_L; /* lua_State *L */
1248 args[1] = ir->op1; /* CTypeID id */
1249 args[2] = ir->op2; /* CTSize sz */
1250 args[3] = ASMREF_TMP1; /* CTSize align */
1251 asm_gencall(as, ci, args);
1252 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1253 return;
1309 } 1254 }
1255
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1256 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1257 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1258 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1259 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1260 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1261 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1262 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1263 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1264 }
1265 args[0] = ASMREF_L; /* lua_State *L */
1266 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1267 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1268 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1269 ra_releasetmp(as, ASMREF_TMP1));
@@ -1392,23 +1340,38 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1340 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1341}
1394 1342
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1343static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1344{
1397 IRIns *irp = IR(ir->op1); 1345 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1346 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1347 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1348 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1349 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1350 ra_evictset(as, drop);
1403 IRRef args[2]; 1351 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1352 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1353 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1354 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1355 (void *)lj_vm_trunc_sf);
1408 return 1; 1356 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1357 of = as->freeset;
1410 } 1358 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1359 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1360 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1361 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1362 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1363}
1364
1365static void asm_fpmath(ASMState *as, IRIns *ir)
1366{
1367 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1368 return;
1369 if (ir->op2 <= IRFPM_TRUNC)
1370 asm_callround(as, ir, ir->op2);
1371 else if (ir->op2 == IRFPM_SQRT)
1372 asm_fpunary(as, ir, ARMI_VSQRT_D);
1373 else
1374 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1375}
1413#endif 1376#endif
1414 1377
@@ -1474,19 +1437,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1474 asm_intop(as, ir, asm_drop_cmp0(as, ai)); 1437 asm_intop(as, ir, asm_drop_cmp0(as, ai));
1475} 1438}
1476 1439
1477static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1478{
1479 ai = asm_drop_cmp0(as, ai);
1480 if (ir->op2 == 0) {
1481 Reg dest = ra_dest(as, ir, RSET_GPR);
1482 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1483 emit_d(as, ai^m, dest);
1484 } else {
1485 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1486 asm_intop(as, ir, ai);
1487 }
1488}
1489
1490static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1440static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1491{ 1441{
1492 Reg dest = ra_dest(as, ir, RSET_GPR); 1442 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1552,6 +1502,20 @@ static void asm_mul(ASMState *as, IRIns *ir)
1552 asm_intmul(as, ir); 1502 asm_intmul(as, ir);
1553} 1503}
1554 1504
1505#define asm_addov(as, ir) asm_add(as, ir)
1506#define asm_subov(as, ir) asm_sub(as, ir)
1507#define asm_mulov(as, ir) asm_mul(as, ir)
1508
1509#if !LJ_SOFTFP
1510#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1511#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1512#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1513#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1514#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1515#endif
1516
1517#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1518
1555static void asm_neg(ASMState *as, IRIns *ir) 1519static void asm_neg(ASMState *as, IRIns *ir)
1556{ 1520{
1557#if !LJ_SOFTFP 1521#if !LJ_SOFTFP
@@ -1563,41 +1527,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
1563 asm_intneg(as, ir, ARMI_RSB); 1527 asm_intneg(as, ir, ARMI_RSB);
1564} 1528}
1565 1529
1566static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1530static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1567{ 1531{
1568 const CCallInfo *ci = &lj_ir_callinfo[id]; 1532 ai = asm_drop_cmp0(as, ai);
1569 IRRef args[2]; 1533 if (ir->op2 == 0) {
1570 args[0] = ir->op1; 1534 Reg dest = ra_dest(as, ir, RSET_GPR);
1571 args[1] = ir->op2; 1535 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1572 asm_setupresult(as, ir, ci); 1536 emit_d(as, ai^m, dest);
1573 asm_gencall(as, ci, args); 1537 } else {
1538 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1539 asm_intop(as, ir, ai);
1540 }
1574} 1541}
1575 1542
1576#if !LJ_SOFTFP 1543#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1577static void asm_callround(ASMState *as, IRIns *ir, int id)
1578{
1579 /* The modified regs must match with the *.dasc implementation. */
1580 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1581 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1582 RegSet of;
1583 Reg dest, src;
1584 ra_evictset(as, drop);
1585 dest = ra_dest(as, ir, RSET_FPR);
1586 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1587 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1588 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1589 (void *)lj_vm_trunc_sf);
1590 /* Workaround to protect argument GPRs from being used for remat. */
1591 of = as->freeset;
1592 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1593 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1594 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1595 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1596 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1597}
1598#endif
1599 1544
1600static void asm_bitswap(ASMState *as, IRIns *ir) 1545static void asm_bswap(ASMState *as, IRIns *ir)
1601{ 1546{
1602 Reg dest = ra_dest(as, ir, RSET_GPR); 1547 Reg dest = ra_dest(as, ir, RSET_GPR);
1603 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1548 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1614,6 +1559,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1614 } 1559 }
1615} 1560}
1616 1561
1562#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1563#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1564#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1565
1617static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1566static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1618{ 1567{
1619 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1568 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1631,6 +1580,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1631 } 1580 }
1632} 1581}
1633 1582
1583#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1584#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1585#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1586#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1587#define asm_brol(as, ir) lua_assert(0)
1588
1634static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1589static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1635{ 1590{
1636 uint32_t kcmp = 0, kmov = 0; 1591 uint32_t kcmp = 0, kmov = 0;
@@ -1704,6 +1659,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1704 asm_intmin_max(as, ir, cc); 1659 asm_intmin_max(as, ir, cc);
1705} 1660}
1706 1661
1662#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1663#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1664
1707/* -- Comparisons --------------------------------------------------------- */ 1665/* -- Comparisons --------------------------------------------------------- */
1708 1666
1709/* Map of comparisons to flags. ORDER IR. */ 1667/* Map of comparisons to flags. ORDER IR. */
@@ -1819,6 +1777,18 @@ notst:
1819 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1777 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1820} 1778}
1821 1779
1780static void asm_comp(ASMState *as, IRIns *ir)
1781{
1782#if !LJ_SOFTFP
1783 if (irt_isnum(ir->t))
1784 asm_fpcomp(as, ir);
1785 else
1786#endif
1787 asm_intcomp(as, ir);
1788}
1789
1790#define asm_equal(as, ir) asm_comp(as, ir)
1791
1822#if LJ_HASFFI 1792#if LJ_HASFFI
1823/* 64 bit integer comparisons. */ 1793/* 64 bit integer comparisons. */
1824static void asm_int64comp(ASMState *as, IRIns *ir) 1794static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1893,7 +1863,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1893#endif 1863#endif
1894 } else if ((ir-1)->o == IR_XSTORE) { 1864 } else if ((ir-1)->o == IR_XSTORE) {
1895 if ((ir-1)->r != RID_SINK) 1865 if ((ir-1)->r != RID_SINK)
1896 asm_xstore(as, ir, 4); 1866 asm_xstore_(as, ir, 4);
1897 return; 1867 return;
1898 } 1868 }
1899 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1869 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1941,6 +1911,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1941#endif 1911#endif
1942} 1912}
1943 1913
1914/* -- Profiling ----------------------------------------------------------- */
1915
1916static void asm_prof(ASMState *as, IRIns *ir)
1917{
1918 UNUSED(ir);
1919 asm_guardcc(as, CC_NE);
1920 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1921 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1922}
1923
1944/* -- Stack handling ------------------------------------------------------ */ 1924/* -- Stack handling ------------------------------------------------------ */
1945 1925
1946/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1926/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1970,7 +1950,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1970 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1950 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1971 (int32_t)offsetof(lua_State, maxstack)); 1951 (int32_t)offsetof(lua_State, maxstack));
1972 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1952 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1973 int32_t i = i32ptr(&J2G(as->J)->jit_L); 1953 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1974 if (ra_hasspill(irp->s)) 1954 if (ra_hasspill(irp->s))
1975 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1955 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1976 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1956 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1978,7 +1958,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1978 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1958 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1979 emit_loadi(as, RID_TMP, (i & ~4095)); 1959 emit_loadi(as, RID_TMP, (i & ~4095));
1980 } else { 1960 } else {
1981 emit_getgl(as, RID_TMP, jit_L); 1961 emit_getgl(as, RID_TMP, cur_L);
1982 } 1962 }
1983} 1963}
1984 1964
@@ -2087,13 +2067,13 @@ static void asm_loop_fixup(ASMState *as)
2087 2067
2088/* -- Head of trace ------------------------------------------------------- */ 2068/* -- Head of trace ------------------------------------------------------- */
2089 2069
2090/* Reload L register from g->jit_L. */ 2070/* Reload L register from g->cur_L. */
2091static void asm_head_lreg(ASMState *as) 2071static void asm_head_lreg(ASMState *as)
2092{ 2072{
2093 IRIns *ir = IR(ASMREF_L); 2073 IRIns *ir = IR(ASMREF_L);
2094 if (ra_used(ir)) { 2074 if (ra_used(ir)) {
2095 Reg r = ra_dest(as, ir, RSET_GPR); 2075 Reg r = ra_dest(as, ir, RSET_GPR);
2096 emit_getgl(as, r, jit_L); 2076 emit_getgl(as, r, cur_L);
2097 ra_evictk(as); 2077 ra_evictk(as);
2098 } 2078 }
2099} 2079}
@@ -2164,143 +2144,13 @@ static void asm_tail_prep(ASMState *as)
2164 *p = 0; /* Prevent load/store merging. */ 2144 *p = 0; /* Prevent load/store merging. */
2165} 2145}
2166 2146
2167/* -- Instruction dispatch ------------------------------------------------ */
2168
2169/* Assemble a single instruction. */
2170static void asm_ir(ASMState *as, IRIns *ir)
2171{
2172 switch ((IROp)ir->o) {
2173 /* Miscellaneous ops. */
2174 case IR_LOOP: asm_loop(as); break;
2175 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2176 case IR_USE:
2177 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2178 case IR_PHI: asm_phi(as, ir); break;
2179 case IR_HIOP: asm_hiop(as, ir); break;
2180 case IR_GCSTEP: asm_gcstep(as, ir); break;
2181
2182 /* Guarded assertions. */
2183 case IR_EQ: case IR_NE:
2184 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2185 as->curins--;
2186 asm_href(as, ir-1, (IROp)ir->o);
2187 break;
2188 }
2189 /* fallthrough */
2190 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2191 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2192 case IR_ABC:
2193#if !LJ_SOFTFP
2194 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2195#endif
2196 asm_intcomp(as, ir);
2197 break;
2198
2199 case IR_RETF: asm_retf(as, ir); break;
2200
2201 /* Bit ops. */
2202 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2203 case IR_BSWAP: asm_bitswap(as, ir); break;
2204
2205 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2206 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2207 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2208
2209 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2210 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2211 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2212 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2213 case IR_BROL: lua_assert(0); break;
2214
2215 /* Arithmetic ops. */
2216 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2217 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2218 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2219 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2220 case IR_NEG: asm_neg(as, ir); break;
2221
2222#if LJ_SOFTFP
2223 case IR_DIV: case IR_POW: case IR_ABS:
2224 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2225 lua_assert(0); /* Unused for LJ_SOFTFP. */
2226 break;
2227#else
2228 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2229 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2230 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2231 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2232 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2233 case IR_FPMATH:
2234 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2235 break;
2236 if (ir->op2 <= IRFPM_TRUNC)
2237 asm_callround(as, ir, ir->op2);
2238 else if (ir->op2 == IRFPM_SQRT)
2239 asm_fpunary(as, ir, ARMI_VSQRT_D);
2240 else
2241 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2242 break;
2243 case IR_TOBIT: asm_tobit(as, ir); break;
2244#endif
2245
2246 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2247 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2248
2249 /* Memory references. */
2250 case IR_AREF: asm_aref(as, ir); break;
2251 case IR_HREF: asm_href(as, ir, 0); break;
2252 case IR_HREFK: asm_hrefk(as, ir); break;
2253 case IR_NEWREF: asm_newref(as, ir); break;
2254 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2255 case IR_FREF: asm_fref(as, ir); break;
2256 case IR_STRREF: asm_strref(as, ir); break;
2257
2258 /* Loads and stores. */
2259 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2260 asm_ahuvload(as, ir);
2261 break;
2262 case IR_FLOAD: asm_fload(as, ir); break;
2263 case IR_XLOAD: asm_xload(as, ir); break;
2264 case IR_SLOAD: asm_sload(as, ir); break;
2265
2266 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2267 case IR_FSTORE: asm_fstore(as, ir); break;
2268 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2269
2270 /* Allocations. */
2271 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2272 case IR_TNEW: asm_tnew(as, ir); break;
2273 case IR_TDUP: asm_tdup(as, ir); break;
2274 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2275
2276 /* Write barriers. */
2277 case IR_TBAR: asm_tbar(as, ir); break;
2278 case IR_OBAR: asm_obar(as, ir); break;
2279
2280 /* Type conversions. */
2281 case IR_CONV: asm_conv(as, ir); break;
2282 case IR_TOSTR: asm_tostr(as, ir); break;
2283 case IR_STRTO: asm_strto(as, ir); break;
2284
2285 /* Calls. */
2286 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2287 case IR_CALLXS: asm_callx(as, ir); break;
2288 case IR_CARG: break;
2289
2290 default:
2291 setintV(&as->J->errinfo, ir->o);
2292 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2293 break;
2294 }
2295}
2296
2297/* -- Trace setup --------------------------------------------------------- */ 2147/* -- Trace setup --------------------------------------------------------- */
2298 2148
2299/* Ensure there are enough stack slots for call arguments. */ 2149/* Ensure there are enough stack slots for call arguments. */
2300static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2150static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2301{ 2151{
2302 IRRef args[CCI_NARGS_MAX*2]; 2152 IRRef args[CCI_NARGS_MAX*2];
2303 uint32_t i, nargs = (int)CCI_NARGS(ci); 2153 uint32_t i, nargs = CCI_XNARGS(ci);
2304 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2154 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2305 asm_collectargs(as, ir, ci, args); 2155 asm_collectargs(as, ir, ci, args);
2306 for (i = 0; i < nargs; i++) { 2156 for (i = 0; i < nargs; i++) {