diff options
Diffstat (limited to 'src/lj_asm_arm.h')
-rw-r--r-- | src/lj_asm_arm.h | 438 |
1 files changed, 144 insertions, 294 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 087530b2..9d055c81 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) | |||
338 | /* Generate a call to a C function. */ | 338 | /* Generate a call to a C function. */ |
339 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 339 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
340 | { | 340 | { |
341 | uint32_t n, nargs = CCI_NARGS(ci); | 341 | uint32_t n, nargs = CCI_XNARGS(ci); |
342 | int32_t ofs = 0; | 342 | int32_t ofs = 0; |
343 | #if LJ_SOFTFP | 343 | #if LJ_SOFTFP |
344 | Reg gpr = REGARG_FIRSTGPR; | 344 | Reg gpr = REGARG_FIRSTGPR; |
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
453 | UNUSED(ci); | 453 | UNUSED(ci); |
454 | } | 454 | } |
455 | 455 | ||
456 | static void asm_call(ASMState *as, IRIns *ir) | ||
457 | { | ||
458 | IRRef args[CCI_NARGS_MAX]; | ||
459 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
460 | asm_collectargs(as, ir, ci, args); | ||
461 | asm_setupresult(as, ir, ci); | ||
462 | asm_gencall(as, ci, args); | ||
463 | } | ||
464 | |||
465 | static void asm_callx(ASMState *as, IRIns *ir) | 456 | static void asm_callx(ASMState *as, IRIns *ir) |
466 | { | 457 | { |
467 | IRRef args[CCI_NARGS_MAX*2]; | 458 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
490 | { | 481 | { |
491 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 482 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
492 | void *pc = ir_kptr(IR(ir->op2)); | 483 | void *pc = ir_kptr(IR(ir->op2)); |
493 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 484 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
494 | as->topslot -= (BCReg)delta; | 485 | as->topslot -= (BCReg)delta; |
495 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 486 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
496 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 487 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -601,31 +592,6 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
601 | } | 592 | } |
602 | } | 593 | } |
603 | 594 | ||
604 | #if !LJ_SOFTFP && LJ_HASFFI | ||
605 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
606 | { | ||
607 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
608 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
609 | IRCallID id; | ||
610 | CCallInfo ci; | ||
611 | IRRef args[2]; | ||
612 | args[0] = (ir-1)->op1; | ||
613 | args[1] = ir->op1; | ||
614 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
615 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
616 | ir--; | ||
617 | } else { | ||
618 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
619 | } | ||
620 | ci = lj_ir_callinfo[id]; | ||
621 | #if !LJ_ABI_SOFTFP | ||
622 | ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
623 | #endif | ||
624 | asm_setupresult(as, ir, &ci); | ||
625 | asm_gencall(as, &ci, args); | ||
626 | } | ||
627 | #endif | ||
628 | |||
629 | static void asm_strto(ASMState *as, IRIns *ir) | 595 | static void asm_strto(ASMState *as, IRIns *ir) |
630 | { | 596 | { |
631 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 597 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
@@ -689,6 +655,8 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
689 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); | 655 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); |
690 | } | 656 | } |
691 | 657 | ||
658 | /* -- Memory references --------------------------------------------------- */ | ||
659 | |||
692 | /* Get pointer to TValue. */ | 660 | /* Get pointer to TValue. */ |
693 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 661 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
694 | { | 662 | { |
@@ -714,7 +682,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
714 | Reg src = ra_alloc1(as, ref, allow); | 682 | Reg src = ra_alloc1(as, ref, allow); |
715 | emit_lso(as, ARMI_STR, src, RID_SP, 0); | 683 | emit_lso(as, ARMI_STR, src, RID_SP, 0); |
716 | } | 684 | } |
717 | if ((ir+1)->o == IR_HIOP) | 685 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) |
718 | type = ra_alloc1(as, ref+1, allow); | 686 | type = ra_alloc1(as, ref+1, allow); |
719 | else | 687 | else |
720 | type = ra_allock(as, irt_toitype(ir->t), allow); | 688 | type = ra_allock(as, irt_toitype(ir->t), allow); |
@@ -722,27 +690,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
722 | } | 690 | } |
723 | } | 691 | } |
724 | 692 | ||
725 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
726 | { | ||
727 | IRRef args[2]; | ||
728 | args[0] = ASMREF_L; | ||
729 | as->gcsteps++; | ||
730 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
731 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
732 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
733 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
734 | asm_gencall(as, ci, args); | ||
735 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
736 | } else { | ||
737 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
738 | args[1] = ir->op1; /* int32_t k */ | ||
739 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
740 | asm_gencall(as, ci, args); | ||
741 | } | ||
742 | } | ||
743 | |||
744 | /* -- Memory references --------------------------------------------------- */ | ||
745 | |||
746 | static void asm_aref(ASMState *as, IRIns *ir) | 693 | static void asm_aref(ASMState *as, IRIns *ir) |
747 | { | 694 | { |
748 | Reg dest = ra_dest(as, ir, RSET_GPR); | 695 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -960,20 +907,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
960 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); | 907 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); |
961 | } | 908 | } |
962 | 909 | ||
963 | static void asm_newref(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
966 | IRRef args[3]; | ||
967 | if (ir->r == RID_SINK) | ||
968 | return; | ||
969 | args[0] = ASMREF_L; /* lua_State *L */ | ||
970 | args[1] = ir->op1; /* GCtab *t */ | ||
971 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
972 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
973 | asm_gencall(as, ci, args); | ||
974 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
975 | } | ||
976 | |||
977 | static void asm_uref(ASMState *as, IRIns *ir) | 910 | static void asm_uref(ASMState *as, IRIns *ir) |
978 | { | 911 | { |
979 | Reg dest = ra_dest(as, ir, RSET_GPR); | 912 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1064,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir) | |||
1064 | 997 | ||
1065 | static void asm_fload(ASMState *as, IRIns *ir) | 998 | static void asm_fload(ASMState *as, IRIns *ir) |
1066 | { | 999 | { |
1067 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1000 | if (ir->op1 == REF_NIL) { |
1068 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 1001 | lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ |
1069 | ARMIns ai = asm_fxloadins(ir); | 1002 | } else { |
1070 | int32_t ofs; | 1003 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1071 | if (ir->op2 == IRFL_TAB_ARRAY) { | 1004 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); |
1072 | ofs = asm_fuseabase(as, ir->op1); | 1005 | ARMIns ai = asm_fxloadins(ir); |
1073 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 1006 | int32_t ofs; |
1074 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | 1007 | if (ir->op2 == IRFL_TAB_ARRAY) { |
1075 | return; | 1008 | ofs = asm_fuseabase(as, ir->op1); |
1009 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1010 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
1011 | return; | ||
1012 | } | ||
1076 | } | 1013 | } |
1014 | ofs = field_ofs[ir->op2]; | ||
1015 | if ((ai & 0x04000000)) | ||
1016 | emit_lso(as, ai, dest, idx, ofs); | ||
1017 | else | ||
1018 | emit_lsox(as, ai, dest, idx, ofs); | ||
1077 | } | 1019 | } |
1078 | ofs = field_ofs[ir->op2]; | ||
1079 | if ((ai & 0x04000000)) | ||
1080 | emit_lso(as, ai, dest, idx, ofs); | ||
1081 | else | ||
1082 | emit_lsox(as, ai, dest, idx, ofs); | ||
1083 | } | 1020 | } |
1084 | 1021 | ||
1085 | static void asm_fstore(ASMState *as, IRIns *ir) | 1022 | static void asm_fstore(ASMState *as, IRIns *ir) |
@@ -1105,7 +1042,7 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
1105 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1042 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
1106 | } | 1043 | } |
1107 | 1044 | ||
1108 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1045 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
1109 | { | 1046 | { |
1110 | if (ir->r != RID_SINK) { | 1047 | if (ir->r != RID_SINK) { |
1111 | Reg src = ra_alloc1(as, ir->op2, | 1048 | Reg src = ra_alloc1(as, ir->op2, |
@@ -1115,6 +1052,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
1115 | } | 1052 | } |
1116 | } | 1053 | } |
1117 | 1054 | ||
1055 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
1056 | |||
1118 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1057 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
1119 | { | 1058 | { |
1120 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | 1059 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
@@ -1272,19 +1211,16 @@ dotypecheck: | |||
1272 | static void asm_cnew(ASMState *as, IRIns *ir) | 1211 | static void asm_cnew(ASMState *as, IRIns *ir) |
1273 | { | 1212 | { |
1274 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1213 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1275 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1214 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1276 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1215 | CTSize sz; |
1277 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1216 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1278 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1217 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1279 | IRRef args[2]; | 1218 | IRRef args[4]; |
1280 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | 1219 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); |
1281 | RegSet drop = RSET_SCRATCH; | 1220 | RegSet drop = RSET_SCRATCH; |
1282 | lua_assert(sz != CTSIZE_INVALID); | 1221 | lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); |
1283 | 1222 | ||
1284 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1285 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1286 | as->gcsteps++; | 1223 | as->gcsteps++; |
1287 | |||
1288 | if (ra_hasreg(ir->r)) | 1224 | if (ra_hasreg(ir->r)) |
1289 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1225 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1290 | ra_evictset(as, drop); | 1226 | ra_evictset(as, drop); |
@@ -1306,16 +1242,28 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1306 | if (ofs == sizeof(GCcdata)) break; | 1242 | if (ofs == sizeof(GCcdata)) break; |
1307 | ofs -= 4; ir--; | 1243 | ofs -= 4; ir--; |
1308 | } | 1244 | } |
1245 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1246 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1247 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1248 | args[1] = ir->op1; /* CTypeID id */ | ||
1249 | args[2] = ir->op2; /* CTSize sz */ | ||
1250 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1251 | asm_gencall(as, ci, args); | ||
1252 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1253 | return; | ||
1309 | } | 1254 | } |
1255 | |||
1310 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1256 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1311 | { | 1257 | { |
1312 | uint32_t k = emit_isk12(ARMI_MOV, ctypeid); | 1258 | uint32_t k = emit_isk12(ARMI_MOV, id); |
1313 | Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); | 1259 | Reg r = k ? RID_R1 : ra_allock(as, id, allow); |
1314 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | 1260 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); |
1315 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | 1261 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); |
1316 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); | 1262 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); |
1317 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); | 1263 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); |
1318 | } | 1264 | } |
1265 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1266 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1319 | asm_gencall(as, ci, args); | 1267 | asm_gencall(as, ci, args); |
1320 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1268 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1321 | ra_releasetmp(as, ASMREF_TMP1)); | 1269 | ra_releasetmp(as, ASMREF_TMP1)); |
@@ -1392,23 +1340,38 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) | |||
1392 | emit_dm(as, ai, (dest & 15), (left & 15)); | 1340 | emit_dm(as, ai, (dest & 15), (left & 15)); |
1393 | } | 1341 | } |
1394 | 1342 | ||
1395 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1343 | static void asm_callround(ASMState *as, IRIns *ir, int id) |
1396 | { | 1344 | { |
1397 | IRIns *irp = IR(ir->op1); | 1345 | /* The modified regs must match with the *.dasc implementation. */ |
1398 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1346 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| |
1399 | IRIns *irpp = IR(irp->op1); | 1347 | RID2RSET(RID_R3)|RID2RSET(RID_R12); |
1400 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1348 | RegSet of; |
1401 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1349 | Reg dest, src; |
1402 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1350 | ra_evictset(as, drop); |
1403 | IRRef args[2]; | 1351 | dest = ra_dest(as, ir, RSET_FPR); |
1404 | args[0] = irpp->op1; | 1352 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); |
1405 | args[1] = irp->op2; | 1353 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : |
1406 | asm_setupresult(as, ir, ci); | 1354 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : |
1407 | asm_gencall(as, ci, args); | 1355 | (void *)lj_vm_trunc_sf); |
1408 | return 1; | 1356 | /* Workaround to protect argument GPRs from being used for remat. */ |
1409 | } | 1357 | of = as->freeset; |
1410 | } | 1358 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); |
1411 | return 0; | 1359 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); |
1360 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1361 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1362 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1363 | } | ||
1364 | |||
1365 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1366 | { | ||
1367 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
1368 | return; | ||
1369 | if (ir->op2 <= IRFPM_TRUNC) | ||
1370 | asm_callround(as, ir, ir->op2); | ||
1371 | else if (ir->op2 == IRFPM_SQRT) | ||
1372 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
1373 | else | ||
1374 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1412 | } | 1375 | } |
1413 | #endif | 1376 | #endif |
1414 | 1377 | ||
@@ -1474,19 +1437,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) | |||
1474 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); | 1437 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); |
1475 | } | 1438 | } |
1476 | 1439 | ||
1477 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) | ||
1478 | { | ||
1479 | ai = asm_drop_cmp0(as, ai); | ||
1480 | if (ir->op2 == 0) { | ||
1481 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1482 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1483 | emit_d(as, ai^m, dest); | ||
1484 | } else { | ||
1485 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1486 | asm_intop(as, ir, ai); | ||
1487 | } | ||
1488 | } | ||
1489 | |||
1490 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) | 1440 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) |
1491 | { | 1441 | { |
1492 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1442 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1552,6 +1502,20 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1552 | asm_intmul(as, ir); | 1502 | asm_intmul(as, ir); |
1553 | } | 1503 | } |
1554 | 1504 | ||
1505 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1506 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1507 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1508 | |||
1509 | #if !LJ_SOFTFP | ||
1510 | #define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) | ||
1511 | #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) | ||
1512 | #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) | ||
1513 | #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) | ||
1514 | #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) | ||
1515 | #endif | ||
1516 | |||
1517 | #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) | ||
1518 | |||
1555 | static void asm_neg(ASMState *as, IRIns *ir) | 1519 | static void asm_neg(ASMState *as, IRIns *ir) |
1556 | { | 1520 | { |
1557 | #if !LJ_SOFTFP | 1521 | #if !LJ_SOFTFP |
@@ -1563,41 +1527,22 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1563 | asm_intneg(as, ir, ARMI_RSB); | 1527 | asm_intneg(as, ir, ARMI_RSB); |
1564 | } | 1528 | } |
1565 | 1529 | ||
1566 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | 1530 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) |
1567 | { | 1531 | { |
1568 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1532 | ai = asm_drop_cmp0(as, ai); |
1569 | IRRef args[2]; | 1533 | if (ir->op2 == 0) { |
1570 | args[0] = ir->op1; | 1534 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1571 | args[1] = ir->op2; | 1535 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); |
1572 | asm_setupresult(as, ir, ci); | 1536 | emit_d(as, ai^m, dest); |
1573 | asm_gencall(as, ci, args); | 1537 | } else { |
1538 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1539 | asm_intop(as, ir, ai); | ||
1540 | } | ||
1574 | } | 1541 | } |
1575 | 1542 | ||
1576 | #if !LJ_SOFTFP | 1543 | #define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) |
1577 | static void asm_callround(ASMState *as, IRIns *ir, int id) | ||
1578 | { | ||
1579 | /* The modified regs must match with the *.dasc implementation. */ | ||
1580 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| | ||
1581 | RID2RSET(RID_R3)|RID2RSET(RID_R12); | ||
1582 | RegSet of; | ||
1583 | Reg dest, src; | ||
1584 | ra_evictset(as, drop); | ||
1585 | dest = ra_dest(as, ir, RSET_FPR); | ||
1586 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); | ||
1587 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : | ||
1588 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : | ||
1589 | (void *)lj_vm_trunc_sf); | ||
1590 | /* Workaround to protect argument GPRs from being used for remat. */ | ||
1591 | of = as->freeset; | ||
1592 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); | ||
1593 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); | ||
1594 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1595 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1596 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1597 | } | ||
1598 | #endif | ||
1599 | 1544 | ||
1600 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1545 | static void asm_bswap(ASMState *as, IRIns *ir) |
1601 | { | 1546 | { |
1602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1547 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1603 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 1548 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
@@ -1614,6 +1559,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1614 | } | 1559 | } |
1615 | } | 1560 | } |
1616 | 1561 | ||
1562 | #define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) | ||
1563 | #define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) | ||
1564 | #define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) | ||
1565 | |||
1617 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | 1566 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) |
1618 | { | 1567 | { |
1619 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 1568 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
@@ -1631,6 +1580,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | |||
1631 | } | 1580 | } |
1632 | } | 1581 | } |
1633 | 1582 | ||
1583 | #define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) | ||
1584 | #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) | ||
1585 | #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) | ||
1586 | #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) | ||
1587 | #define asm_brol(as, ir) lua_assert(0) | ||
1588 | |||
1634 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | 1589 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) |
1635 | { | 1590 | { |
1636 | uint32_t kcmp = 0, kmov = 0; | 1591 | uint32_t kcmp = 0, kmov = 0; |
@@ -1704,6 +1659,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) | |||
1704 | asm_intmin_max(as, ir, cc); | 1659 | asm_intmin_max(as, ir, cc); |
1705 | } | 1660 | } |
1706 | 1661 | ||
1662 | #define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) | ||
1663 | #define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) | ||
1664 | |||
1707 | /* -- Comparisons --------------------------------------------------------- */ | 1665 | /* -- Comparisons --------------------------------------------------------- */ |
1708 | 1666 | ||
1709 | /* Map of comparisons to flags. ORDER IR. */ | 1667 | /* Map of comparisons to flags. ORDER IR. */ |
@@ -1819,6 +1777,18 @@ notst: | |||
1819 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | 1777 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ |
1820 | } | 1778 | } |
1821 | 1779 | ||
1780 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1781 | { | ||
1782 | #if !LJ_SOFTFP | ||
1783 | if (irt_isnum(ir->t)) | ||
1784 | asm_fpcomp(as, ir); | ||
1785 | else | ||
1786 | #endif | ||
1787 | asm_intcomp(as, ir); | ||
1788 | } | ||
1789 | |||
1790 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1791 | |||
1822 | #if LJ_HASFFI | 1792 | #if LJ_HASFFI |
1823 | /* 64 bit integer comparisons. */ | 1793 | /* 64 bit integer comparisons. */ |
1824 | static void asm_int64comp(ASMState *as, IRIns *ir) | 1794 | static void asm_int64comp(ASMState *as, IRIns *ir) |
@@ -1893,7 +1863,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1893 | #endif | 1863 | #endif |
1894 | } else if ((ir-1)->o == IR_XSTORE) { | 1864 | } else if ((ir-1)->o == IR_XSTORE) { |
1895 | if ((ir-1)->r != RID_SINK) | 1865 | if ((ir-1)->r != RID_SINK) |
1896 | asm_xstore(as, ir, 4); | 1866 | asm_xstore_(as, ir, 4); |
1897 | return; | 1867 | return; |
1898 | } | 1868 | } |
1899 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1869 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
@@ -1941,6 +1911,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1941 | #endif | 1911 | #endif |
1942 | } | 1912 | } |
1943 | 1913 | ||
1914 | /* -- Profiling ----------------------------------------------------------- */ | ||
1915 | |||
1916 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1917 | { | ||
1918 | UNUSED(ir); | ||
1919 | asm_guardcc(as, CC_NE); | ||
1920 | emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); | ||
1921 | emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1922 | } | ||
1923 | |||
1944 | /* -- Stack handling ------------------------------------------------------ */ | 1924 | /* -- Stack handling ------------------------------------------------------ */ |
1945 | 1925 | ||
1946 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ | 1926 | /* Check Lua stack size for overflow. Use exit handler as fallback. */ |
@@ -1970,7 +1950,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1970 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, | 1950 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, |
1971 | (int32_t)offsetof(lua_State, maxstack)); | 1951 | (int32_t)offsetof(lua_State, maxstack)); |
1972 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | 1952 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ |
1973 | int32_t i = i32ptr(&J2G(as->J)->jit_L); | 1953 | int32_t i = i32ptr(&J2G(as->J)->cur_L); |
1974 | if (ra_hasspill(irp->s)) | 1954 | if (ra_hasspill(irp->s)) |
1975 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); | 1955 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); |
1976 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); | 1956 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); |
@@ -1978,7 +1958,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1978 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ | 1958 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ |
1979 | emit_loadi(as, RID_TMP, (i & ~4095)); | 1959 | emit_loadi(as, RID_TMP, (i & ~4095)); |
1980 | } else { | 1960 | } else { |
1981 | emit_getgl(as, RID_TMP, jit_L); | 1961 | emit_getgl(as, RID_TMP, cur_L); |
1982 | } | 1962 | } |
1983 | } | 1963 | } |
1984 | 1964 | ||
@@ -2087,13 +2067,13 @@ static void asm_loop_fixup(ASMState *as) | |||
2087 | 2067 | ||
2088 | /* -- Head of trace ------------------------------------------------------- */ | 2068 | /* -- Head of trace ------------------------------------------------------- */ |
2089 | 2069 | ||
2090 | /* Reload L register from g->jit_L. */ | 2070 | /* Reload L register from g->cur_L. */ |
2091 | static void asm_head_lreg(ASMState *as) | 2071 | static void asm_head_lreg(ASMState *as) |
2092 | { | 2072 | { |
2093 | IRIns *ir = IR(ASMREF_L); | 2073 | IRIns *ir = IR(ASMREF_L); |
2094 | if (ra_used(ir)) { | 2074 | if (ra_used(ir)) { |
2095 | Reg r = ra_dest(as, ir, RSET_GPR); | 2075 | Reg r = ra_dest(as, ir, RSET_GPR); |
2096 | emit_getgl(as, r, jit_L); | 2076 | emit_getgl(as, r, cur_L); |
2097 | ra_evictk(as); | 2077 | ra_evictk(as); |
2098 | } | 2078 | } |
2099 | } | 2079 | } |
@@ -2164,143 +2144,13 @@ static void asm_tail_prep(ASMState *as) | |||
2164 | *p = 0; /* Prevent load/store merging. */ | 2144 | *p = 0; /* Prevent load/store merging. */ |
2165 | } | 2145 | } |
2166 | 2146 | ||
2167 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2168 | |||
2169 | /* Assemble a single instruction. */ | ||
2170 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2171 | { | ||
2172 | switch ((IROp)ir->o) { | ||
2173 | /* Miscellaneous ops. */ | ||
2174 | case IR_LOOP: asm_loop(as); break; | ||
2175 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2176 | case IR_USE: | ||
2177 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2178 | case IR_PHI: asm_phi(as, ir); break; | ||
2179 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2180 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2181 | |||
2182 | /* Guarded assertions. */ | ||
2183 | case IR_EQ: case IR_NE: | ||
2184 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
2185 | as->curins--; | ||
2186 | asm_href(as, ir-1, (IROp)ir->o); | ||
2187 | break; | ||
2188 | } | ||
2189 | /* fallthrough */ | ||
2190 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2191 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2192 | case IR_ABC: | ||
2193 | #if !LJ_SOFTFP | ||
2194 | if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } | ||
2195 | #endif | ||
2196 | asm_intcomp(as, ir); | ||
2197 | break; | ||
2198 | |||
2199 | case IR_RETF: asm_retf(as, ir); break; | ||
2200 | |||
2201 | /* Bit ops. */ | ||
2202 | case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break; | ||
2203 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2204 | |||
2205 | case IR_BAND: asm_bitop(as, ir, ARMI_AND); break; | ||
2206 | case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break; | ||
2207 | case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break; | ||
2208 | |||
2209 | case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break; | ||
2210 | case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break; | ||
2211 | case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break; | ||
2212 | case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break; | ||
2213 | case IR_BROL: lua_assert(0); break; | ||
2214 | |||
2215 | /* Arithmetic ops. */ | ||
2216 | case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; | ||
2217 | case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; | ||
2218 | case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; | ||
2219 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2220 | case IR_NEG: asm_neg(as, ir); break; | ||
2221 | |||
2222 | #if LJ_SOFTFP | ||
2223 | case IR_DIV: case IR_POW: case IR_ABS: | ||
2224 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
2225 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | ||
2226 | break; | ||
2227 | #else | ||
2228 | case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; | ||
2229 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2230 | case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; | ||
2231 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2232 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2233 | case IR_FPMATH: | ||
2234 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2235 | break; | ||
2236 | if (ir->op2 <= IRFPM_TRUNC) | ||
2237 | asm_callround(as, ir, ir->op2); | ||
2238 | else if (ir->op2 == IRFPM_SQRT) | ||
2239 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
2240 | else | ||
2241 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2242 | break; | ||
2243 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2244 | #endif | ||
2245 | |||
2246 | case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; | ||
2247 | case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; | ||
2248 | |||
2249 | /* Memory references. */ | ||
2250 | case IR_AREF: asm_aref(as, ir); break; | ||
2251 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2252 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2253 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2254 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2255 | case IR_FREF: asm_fref(as, ir); break; | ||
2256 | case IR_STRREF: asm_strref(as, ir); break; | ||
2257 | |||
2258 | /* Loads and stores. */ | ||
2259 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2260 | asm_ahuvload(as, ir); | ||
2261 | break; | ||
2262 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2263 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2264 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2265 | |||
2266 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2267 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2268 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2269 | |||
2270 | /* Allocations. */ | ||
2271 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2272 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2273 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2274 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2275 | |||
2276 | /* Write barriers. */ | ||
2277 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2278 | case IR_OBAR: asm_obar(as, ir); break; | ||
2279 | |||
2280 | /* Type conversions. */ | ||
2281 | case IR_CONV: asm_conv(as, ir); break; | ||
2282 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2283 | case IR_STRTO: asm_strto(as, ir); break; | ||
2284 | |||
2285 | /* Calls. */ | ||
2286 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2287 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2288 | case IR_CARG: break; | ||
2289 | |||
2290 | default: | ||
2291 | setintV(&as->J->errinfo, ir->o); | ||
2292 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2293 | break; | ||
2294 | } | ||
2295 | } | ||
2296 | |||
2297 | /* -- Trace setup --------------------------------------------------------- */ | 2147 | /* -- Trace setup --------------------------------------------------------- */ |
2298 | 2148 | ||
2299 | /* Ensure there are enough stack slots for call arguments. */ | 2149 | /* Ensure there are enough stack slots for call arguments. */ |
2300 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2150 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2301 | { | 2151 | { |
2302 | IRRef args[CCI_NARGS_MAX*2]; | 2152 | IRRef args[CCI_NARGS_MAX*2]; |
2303 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2153 | uint32_t i, nargs = CCI_XNARGS(ci); |
2304 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; | 2154 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; |
2305 | asm_collectargs(as, ir, ci, args); | 2155 | asm_collectargs(as, ir, ci, args); |
2306 | for (i = 0; i < nargs; i++) { | 2156 | for (i = 0; i < nargs; i++) { |