aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_arm.h')
-rw-r--r--src/lj_asm_arm.h464
1 files changed, 157 insertions, 307 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 961f7e39..37bfa40f 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 481{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 482 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 483 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 484 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 485 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 486 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 487 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -601,31 +592,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 592 }
602} 593}
603 594
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 595static void asm_strto(ASMState *as, IRIns *ir)
630{ 596{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 597 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +655,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 655 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 656}
691 657
658/* -- Memory references --------------------------------------------------- */
659
692/* Get pointer to TValue. */ 660/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 661static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
694{ 662{
@@ -714,7 +682,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
714 Reg src = ra_alloc1(as, ref, allow); 682 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0); 683 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 } 684 }
717 if ((ir+1)->o == IR_HIOP) 685 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow); 686 type = ra_alloc1(as, ref+1, allow);
719 else 687 else
720 type = ra_allock(as, irt_toitype(ir->t), allow); 688 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +690,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
722 } 690 }
723} 691}
724 692
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 693static void asm_aref(ASMState *as, IRIns *ir)
747{ 694{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 695 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +907,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 907 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 908}
962 909
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 910static void asm_uref(ASMState *as, IRIns *ir)
978{ 911{
979 Reg dest = ra_dest(as, ir, RSET_GPR); 912 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1064,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
1064 997
1065static void asm_fload(ASMState *as, IRIns *ir) 998static void asm_fload(ASMState *as, IRIns *ir)
1066{ 999{
1067 Reg dest = ra_dest(as, ir, RSET_GPR); 1000 if (ir->op1 == REF_NIL) {
1068 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1001 lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
1069 ARMIns ai = asm_fxloadins(ir); 1002 } else {
1070 int32_t ofs; 1003 Reg dest = ra_dest(as, ir, RSET_GPR);
1071 if (ir->op2 == IRFL_TAB_ARRAY) { 1004 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
1072 ofs = asm_fuseabase(as, ir->op1); 1005 ARMIns ai = asm_fxloadins(ir);
1073 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1006 int32_t ofs;
1074 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1007 if (ir->op2 == IRFL_TAB_ARRAY) {
1075 return; 1008 ofs = asm_fuseabase(as, ir->op1);
1009 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1010 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
1011 return;
1012 }
1076 } 1013 }
1014 ofs = field_ofs[ir->op2];
1015 if ((ai & 0x04000000))
1016 emit_lso(as, ai, dest, idx, ofs);
1017 else
1018 emit_lsox(as, ai, dest, idx, ofs);
1077 } 1019 }
1078 ofs = field_ofs[ir->op2];
1079 if ((ai & 0x04000000))
1080 emit_lso(as, ai, dest, idx, ofs);
1081 else
1082 emit_lsox(as, ai, dest, idx, ofs);
1083} 1020}
1084 1021
1085static void asm_fstore(ASMState *as, IRIns *ir) 1022static void asm_fstore(ASMState *as, IRIns *ir)
@@ -1105,7 +1042,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1042 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1106} 1043}
1107 1044
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1045static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1046{
1110 if (ir->r != RID_SINK) { 1047 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1048 Reg src = ra_alloc1(as, ir->op2,
@@ -1115,6 +1052,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1115 } 1052 }
1116} 1053}
1117 1054
1055#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1056
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1057static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1058{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1059 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1272,19 +1211,16 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1211static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1212{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1213 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1214 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1215 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1216 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1217 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1218 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1219 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1220 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1221 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1283 1222
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1223 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1224 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1225 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1226 ra_evictset(as, drop);
@@ -1306,16 +1242,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1242 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1243 ofs -= 4; ir--;
1308 } 1244 }
1245 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1246 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1247 args[0] = ASMREF_L; /* lua_State *L */
1248 args[1] = ir->op1; /* CTypeID id */
1249 args[2] = ir->op2; /* CTSize sz */
1250 args[3] = ASMREF_TMP1; /* CTSize align */
1251 asm_gencall(as, ci, args);
1252 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1253 return;
1309 } 1254 }
1255
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1256 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1257 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1258 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1259 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1260 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1261 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1262 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1263 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1264 }
1265 args[0] = ASMREF_L; /* lua_State *L */
1266 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1267 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1268 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1269 ra_releasetmp(as, ASMREF_TMP1));
@@ -1392,23 +1340,38 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1340 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1341}
1394 1342
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1343static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1344{
1397 IRIns *irp = IR(ir->op1); 1345 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1346 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1347 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1348 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1349 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1350 ra_evictset(as, drop);
1403 IRRef args[2]; 1351 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1352 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1353 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1354 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1355 (void *)lj_vm_trunc_sf);
1408 return 1; 1356 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1357 of = as->freeset;
1410 } 1358 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1359 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1360 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1361 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1362 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1363}
1364
1365static void asm_fpmath(ASMState *as, IRIns *ir)
1366{
1367 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1368 return;
1369 if (ir->op2 <= IRFPM_TRUNC)
1370 asm_callround(as, ir, ir->op2);
1371 else if (ir->op2 == IRFPM_SQRT)
1372 asm_fpunary(as, ir, ARMI_VSQRT_D);
1373 else
1374 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1375}
1413#endif 1376#endif
1414 1377
@@ -1459,32 +1422,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1459 asm_intop(as, ir, ai); 1422 asm_intop(as, ir, ai);
1460} 1423}
1461 1424
1462static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1463{
1464 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1465 uint32_t cc = (as->mcp[1] >> 28);
1466 as->flagmcp = NULL;
1467 if (cc <= CC_NE) {
1468 as->mcp++;
1469 ai |= ARMI_S;
1470 } else if (cc == CC_GE) {
1471 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1472 ai |= ARMI_S;
1473 } else if (cc == CC_LT) {
1474 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1475 ai |= ARMI_S;
1476 } /* else: other conds don't work with bit ops. */
1477 }
1478 if (ir->op2 == 0) {
1479 Reg dest = ra_dest(as, ir, RSET_GPR);
1480 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1481 emit_d(as, ai^m, dest);
1482 } else {
1483 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1484 asm_intop(as, ir, ai);
1485 }
1486}
1487
1488static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1425static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1489{ 1426{
1490 Reg dest = ra_dest(as, ir, RSET_GPR); 1427 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1550,6 +1487,20 @@ static void asm_mul(ASMState *as, IRIns *ir)
1550 asm_intmul(as, ir); 1487 asm_intmul(as, ir);
1551} 1488}
1552 1489
1490#define asm_addov(as, ir) asm_add(as, ir)
1491#define asm_subov(as, ir) asm_sub(as, ir)
1492#define asm_mulov(as, ir) asm_mul(as, ir)
1493
1494#if !LJ_SOFTFP
1495#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1496#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1497#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1498#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1499#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1500#endif
1501
1502#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1503
1553static void asm_neg(ASMState *as, IRIns *ir) 1504static void asm_neg(ASMState *as, IRIns *ir)
1554{ 1505{
1555#if !LJ_SOFTFP 1506#if !LJ_SOFTFP
@@ -1561,41 +1512,35 @@ static void asm_neg(ASMState *as, IRIns *ir)
1561 asm_intneg(as, ir, ARMI_RSB); 1512 asm_intneg(as, ir, ARMI_RSB);
1562} 1513}
1563 1514
1564static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1515static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1565{ 1516{
1566 const CCallInfo *ci = &lj_ir_callinfo[id]; 1517 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
1567 IRRef args[2]; 1518 uint32_t cc = (as->mcp[1] >> 28);
1568 args[0] = ir->op1; 1519 as->flagmcp = NULL;
1569 args[1] = ir->op2; 1520 if (cc <= CC_NE) {
1570 asm_setupresult(as, ir, ci); 1521 as->mcp++;
1571 asm_gencall(as, ci, args); 1522 ai |= ARMI_S;
1523 } else if (cc == CC_GE) {
1524 *++as->mcp ^= ((CC_GE^CC_PL) << 28);
1525 ai |= ARMI_S;
1526 } else if (cc == CC_LT) {
1527 *++as->mcp ^= ((CC_LT^CC_MI) << 28);
1528 ai |= ARMI_S;
1529 } /* else: other conds don't work with bit ops. */
1530 }
1531 if (ir->op2 == 0) {
1532 Reg dest = ra_dest(as, ir, RSET_GPR);
1533 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1534 emit_d(as, ai^m, dest);
1535 } else {
1536 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1537 asm_intop(as, ir, ai);
1538 }
1572} 1539}
1573 1540
1574#if !LJ_SOFTFP 1541#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1575static void asm_callround(ASMState *as, IRIns *ir, int id)
1576{
1577 /* The modified regs must match with the *.dasc implementation. */
1578 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1579 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1580 RegSet of;
1581 Reg dest, src;
1582 ra_evictset(as, drop);
1583 dest = ra_dest(as, ir, RSET_FPR);
1584 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1585 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1586 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1587 (void *)lj_vm_trunc_sf);
1588 /* Workaround to protect argument GPRs from being used for remat. */
1589 of = as->freeset;
1590 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1591 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1592 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1593 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1594 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1595}
1596#endif
1597 1542
1598static void asm_bitswap(ASMState *as, IRIns *ir) 1543static void asm_bswap(ASMState *as, IRIns *ir)
1599{ 1544{
1600 Reg dest = ra_dest(as, ir, RSET_GPR); 1545 Reg dest = ra_dest(as, ir, RSET_GPR);
1601 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1546 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1612,6 +1557,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1612 } 1557 }
1613} 1558}
1614 1559
1560#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1561#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1562#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1563
1615static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1564static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1616{ 1565{
1617 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1566 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1629,6 +1578,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1629 } 1578 }
1630} 1579}
1631 1580
1581#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1582#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1583#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1584#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1585#define asm_brol(as, ir) lua_assert(0)
1586
1632static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1587static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1633{ 1588{
1634 uint32_t kcmp = 0, kmov = 0; 1589 uint32_t kcmp = 0, kmov = 0;
@@ -1702,6 +1657,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1702 asm_intmin_max(as, ir, cc); 1657 asm_intmin_max(as, ir, cc);
1703} 1658}
1704 1659
1660#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1661#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1662
1705/* -- Comparisons --------------------------------------------------------- */ 1663/* -- Comparisons --------------------------------------------------------- */
1706 1664
1707/* Map of comparisons to flags. ORDER IR. */ 1665/* Map of comparisons to flags. ORDER IR. */
@@ -1817,6 +1775,18 @@ notst:
1817 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1775 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1818} 1776}
1819 1777
1778static void asm_comp(ASMState *as, IRIns *ir)
1779{
1780#if !LJ_SOFTFP
1781 if (irt_isnum(ir->t))
1782 asm_fpcomp(as, ir);
1783 else
1784#endif
1785 asm_intcomp(as, ir);
1786}
1787
1788#define asm_equal(as, ir) asm_comp(as, ir)
1789
1820#if LJ_HASFFI 1790#if LJ_HASFFI
1821/* 64 bit integer comparisons. */ 1791/* 64 bit integer comparisons. */
1822static void asm_int64comp(ASMState *as, IRIns *ir) 1792static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1891,7 +1861,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1891#endif 1861#endif
1892 } else if ((ir-1)->o == IR_XSTORE) { 1862 } else if ((ir-1)->o == IR_XSTORE) {
1893 if ((ir-1)->r != RID_SINK) 1863 if ((ir-1)->r != RID_SINK)
1894 asm_xstore(as, ir, 4); 1864 asm_xstore_(as, ir, 4);
1895 return; 1865 return;
1896 } 1866 }
1897 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1867 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1939,6 +1909,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1939#endif 1909#endif
1940} 1910}
1941 1911
1912/* -- Profiling ----------------------------------------------------------- */
1913
1914static void asm_prof(ASMState *as, IRIns *ir)
1915{
1916 UNUSED(ir);
1917 asm_guardcc(as, CC_NE);
1918 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1919 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1920}
1921
1942/* -- Stack handling ------------------------------------------------------ */ 1922/* -- Stack handling ------------------------------------------------------ */
1943 1923
1944/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1924/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1968,7 +1948,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1968 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1948 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1969 (int32_t)offsetof(lua_State, maxstack)); 1949 (int32_t)offsetof(lua_State, maxstack));
1970 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1950 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1971 int32_t i = i32ptr(&J2G(as->J)->jit_L); 1951 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1972 if (ra_hasspill(irp->s)) 1952 if (ra_hasspill(irp->s))
1973 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1953 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1974 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1954 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1976,7 +1956,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1976 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1956 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1977 emit_loadi(as, RID_TMP, (i & ~4095)); 1957 emit_loadi(as, RID_TMP, (i & ~4095));
1978 } else { 1958 } else {
1979 emit_getgl(as, RID_TMP, jit_L); 1959 emit_getgl(as, RID_TMP, cur_L);
1980 } 1960 }
1981} 1961}
1982 1962
@@ -2085,13 +2065,13 @@ static void asm_loop_fixup(ASMState *as)
2085 2065
2086/* -- Head of trace ------------------------------------------------------- */ 2066/* -- Head of trace ------------------------------------------------------- */
2087 2067
2088/* Reload L register from g->jit_L. */ 2068/* Reload L register from g->cur_L. */
2089static void asm_head_lreg(ASMState *as) 2069static void asm_head_lreg(ASMState *as)
2090{ 2070{
2091 IRIns *ir = IR(ASMREF_L); 2071 IRIns *ir = IR(ASMREF_L);
2092 if (ra_used(ir)) { 2072 if (ra_used(ir)) {
2093 Reg r = ra_dest(as, ir, RSET_GPR); 2073 Reg r = ra_dest(as, ir, RSET_GPR);
2094 emit_getgl(as, r, jit_L); 2074 emit_getgl(as, r, cur_L);
2095 ra_evictk(as); 2075 ra_evictk(as);
2096 } 2076 }
2097} 2077}
@@ -2162,143 +2142,13 @@ static void asm_tail_prep(ASMState *as)
2162 *p = 0; /* Prevent load/store merging. */ 2142 *p = 0; /* Prevent load/store merging. */
2163} 2143}
2164 2144
2165/* -- Instruction dispatch ------------------------------------------------ */
2166
2167/* Assemble a single instruction. */
2168static void asm_ir(ASMState *as, IRIns *ir)
2169{
2170 switch ((IROp)ir->o) {
2171 /* Miscellaneous ops. */
2172 case IR_LOOP: asm_loop(as); break;
2173 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2174 case IR_USE:
2175 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2176 case IR_PHI: asm_phi(as, ir); break;
2177 case IR_HIOP: asm_hiop(as, ir); break;
2178 case IR_GCSTEP: asm_gcstep(as, ir); break;
2179
2180 /* Guarded assertions. */
2181 case IR_EQ: case IR_NE:
2182 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2183 as->curins--;
2184 asm_href(as, ir-1, (IROp)ir->o);
2185 break;
2186 }
2187 /* fallthrough */
2188 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2189 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2190 case IR_ABC:
2191#if !LJ_SOFTFP
2192 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2193#endif
2194 asm_intcomp(as, ir);
2195 break;
2196
2197 case IR_RETF: asm_retf(as, ir); break;
2198
2199 /* Bit ops. */
2200 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2201 case IR_BSWAP: asm_bitswap(as, ir); break;
2202
2203 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2204 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2205 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2206
2207 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2208 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2209 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2210 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2211 case IR_BROL: lua_assert(0); break;
2212
2213 /* Arithmetic ops. */
2214 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2215 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2216 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2217 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2218 case IR_NEG: asm_neg(as, ir); break;
2219
2220#if LJ_SOFTFP
2221 case IR_DIV: case IR_POW: case IR_ABS:
2222 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2223 lua_assert(0); /* Unused for LJ_SOFTFP. */
2224 break;
2225#else
2226 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2227 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2228 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2229 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2230 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2231 case IR_FPMATH:
2232 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2233 break;
2234 if (ir->op2 <= IRFPM_TRUNC)
2235 asm_callround(as, ir, ir->op2);
2236 else if (ir->op2 == IRFPM_SQRT)
2237 asm_fpunary(as, ir, ARMI_VSQRT_D);
2238 else
2239 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2240 break;
2241 case IR_TOBIT: asm_tobit(as, ir); break;
2242#endif
2243
2244 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2245 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2246
2247 /* Memory references. */
2248 case IR_AREF: asm_aref(as, ir); break;
2249 case IR_HREF: asm_href(as, ir, 0); break;
2250 case IR_HREFK: asm_hrefk(as, ir); break;
2251 case IR_NEWREF: asm_newref(as, ir); break;
2252 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2253 case IR_FREF: asm_fref(as, ir); break;
2254 case IR_STRREF: asm_strref(as, ir); break;
2255
2256 /* Loads and stores. */
2257 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2258 asm_ahuvload(as, ir);
2259 break;
2260 case IR_FLOAD: asm_fload(as, ir); break;
2261 case IR_XLOAD: asm_xload(as, ir); break;
2262 case IR_SLOAD: asm_sload(as, ir); break;
2263
2264 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2265 case IR_FSTORE: asm_fstore(as, ir); break;
2266 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2267
2268 /* Allocations. */
2269 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2270 case IR_TNEW: asm_tnew(as, ir); break;
2271 case IR_TDUP: asm_tdup(as, ir); break;
2272 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2273
2274 /* Write barriers. */
2275 case IR_TBAR: asm_tbar(as, ir); break;
2276 case IR_OBAR: asm_obar(as, ir); break;
2277
2278 /* Type conversions. */
2279 case IR_CONV: asm_conv(as, ir); break;
2280 case IR_TOSTR: asm_tostr(as, ir); break;
2281 case IR_STRTO: asm_strto(as, ir); break;
2282
2283 /* Calls. */
2284 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2285 case IR_CALLXS: asm_callx(as, ir); break;
2286 case IR_CARG: break;
2287
2288 default:
2289 setintV(&as->J->errinfo, ir->o);
2290 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2291 break;
2292 }
2293}
2294
2295/* -- Trace setup --------------------------------------------------------- */ 2145/* -- Trace setup --------------------------------------------------------- */
2296 2146
2297/* Ensure there are enough stack slots for call arguments. */ 2147/* Ensure there are enough stack slots for call arguments. */
2298static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2148static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2299{ 2149{
2300 IRRef args[CCI_NARGS_MAX*2]; 2150 IRRef args[CCI_NARGS_MAX*2];
2301 uint32_t i, nargs = (int)CCI_NARGS(ci); 2151 uint32_t i, nargs = CCI_XNARGS(ci);
2302 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2152 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2303 asm_collectargs(as, ir, ci, args); 2153 asm_collectargs(as, ir, ci, args);
2304 for (i = 0; i < nargs; i++) { 2154 for (i = 0; i < nargs; i++) {