aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lj_asm.c504
1 files changed, 436 insertions, 68 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index a5e0c01e..94d7bfc4 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 179#error "Missing instruction emitter for target CPU"
180#endif 180#endif
181 181
182/* Generic load/store of register from/to stack slot. */
183#define emit_spload(as, ir, r, ofs) \
184 emit_loadofs(as, ir, (r), RID_SP, (ofs))
185#define emit_spstore(as, ir, r, ofs) \
186 emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
182/* -- Register allocator debugging ---------------------------------------- */ 188/* -- Register allocator debugging ---------------------------------------- */
183 189
184/* #define LUAJIT_DEBUG_RA */ 190/* #define LUAJIT_DEBUG_RA */
@@ -336,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
336 emit_getgl(as, r, jit_base); 342 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 343 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 344 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 345 emit_getgl(as, r, cur_L);
340#if LJ_64 346#if LJ_64
341 } else if (ir->o == IR_KINT64) { 347 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 348 emit_loadu64(as, r, ir_kint64(ir)->u64);
@@ -694,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 700 emit_loadu64(as, dest, ir_kint64(ir)->u64);
695 return; 701 return;
696#endif 702#endif
697 } else { 703 } else if (ir->o != IR_KPRI) {
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 704 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 705 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
700 emit_loadi(as, dest, ir->i); 706 emit_loadi(as, dest, ir->i);
@@ -944,44 +950,6 @@ static void asm_snap_prep(ASMState *as)
944 950
945/* -- Miscellaneous helpers ----------------------------------------------- */ 951/* -- Miscellaneous helpers ----------------------------------------------- */
946 952
947/* Collect arguments from CALL* and CARG instructions. */
948static void asm_collectargs(ASMState *as, IRIns *ir,
949 const CCallInfo *ci, IRRef *args)
950{
951 uint32_t n = CCI_NARGS(ci);
952 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
953 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
954 while (n-- > 1) {
955 ir = IR(ir->op1);
956 lua_assert(ir->o == IR_CARG);
957 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
958 }
959 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
960 lua_assert(IR(ir->op1)->o != IR_CARG);
961}
962
963/* Reconstruct CCallInfo flags for CALLX*. */
964static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
965{
966 uint32_t nargs = 0;
967 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
968 IRIns *ira = IR(ir->op1);
969 nargs++;
970 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
971 }
972#if LJ_HASFFI
973 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
974 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
975 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
976 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
977#if LJ_TARGET_X86
978 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
979#endif
980 }
981#endif
982 return (nargs | (ir->t.irt << CCI_OTSHIFT));
983}
984
985/* Calculate stack adjustment. */ 953/* Calculate stack adjustment. */
986static int32_t asm_stack_adjust(ASMState *as) 954static int32_t asm_stack_adjust(ASMState *as)
987{ 955{
@@ -1066,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1066 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1034 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1067} 1035}
1068 1036
1037/* -- Buffer operations --------------------------------------------------- */
1038
1039static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1040
1041static void asm_bufhdr(ASMState *as, IRIns *ir)
1042{
1043 Reg sb = ra_dest(as, ir, RSET_GPR);
1044 if ((ir->op2 & IRBUFHDR_APPEND)) {
1045 /* Rematerialize const buffer pointer instead of likely spill. */
1046 IRIns *irp = IR(ir->op1);
1047 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1048 (irp == ir-2 && !ra_used(ir-1)))) {
1049 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1050 irp = IR(irp->op1);
1051 if (irref_isk(irp->op1)) {
1052 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1053 ir = irp;
1054 }
1055 }
1056 } else {
1057 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1058 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1059 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1060 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1061 }
1062#if LJ_TARGET_X86ORX64
1063 ra_left(as, sb, ir->op1);
1064#else
1065 ra_leftov(as, sb, ir->op1);
1066#endif
1067}
1068
1069static void asm_bufput(ASMState *as, IRIns *ir)
1070{
1071 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1072 IRRef args[3];
1073 IRIns *irs;
1074 int kchar = -1;
1075 args[0] = ir->op1; /* SBuf * */
1076 args[1] = ir->op2; /* GCstr * */
1077 irs = IR(ir->op2);
1078 lua_assert(irt_isstr(irs->t));
1079 if (irs->o == IR_KGC) {
1080 GCstr *s = ir_kstr(irs);
1081 if (s->len == 1) { /* Optimize put of single-char string constant. */
1082 kchar = strdata(s)[0];
1083 args[1] = ASMREF_TMP1; /* int, truncated to char */
1084 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1085 }
1086 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1087 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1088 if (irs->op2 == IRTOSTR_NUM) {
1089 args[1] = ASMREF_TMP1; /* TValue * */
1090 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1091 } else {
1092 lua_assert(irt_isinteger(IR(irs->op1)->t));
1093 args[1] = irs->op1; /* int */
1094 if (irs->op2 == IRTOSTR_INT)
1095 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1096 else
1097 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1098 }
1099 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1100 args[1] = irs->op1; /* const void * */
1101 args[2] = irs->op2; /* MSize */
1102 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1103 }
1104 }
1105 asm_setupresult(as, ir, ci); /* SBuf * */
1106 asm_gencall(as, ci, args);
1107 if (args[1] == ASMREF_TMP1) {
1108 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1109 if (kchar == -1)
1110 asm_tvptr(as, tmp, irs->op1);
1111 else
1112 ra_allockreg(as, kchar, tmp);
1113 }
1114}
1115
1116static void asm_bufstr(ASMState *as, IRIns *ir)
1117{
1118 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1119 IRRef args[1];
1120 args[0] = ir->op1; /* SBuf *sb */
1121 as->gcsteps++;
1122 asm_setupresult(as, ir, ci); /* GCstr * */
1123 asm_gencall(as, ci, args);
1124}
1125
1126/* -- Type conversions ---------------------------------------------------- */
1127
1128static void asm_tostr(ASMState *as, IRIns *ir)
1129{
1130 const CCallInfo *ci;
1131 IRRef args[2];
1132 args[0] = ASMREF_L;
1133 as->gcsteps++;
1134 if (ir->op2 == IRTOSTR_NUM) {
1135 args[1] = ASMREF_TMP1; /* cTValue * */
1136 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1137 } else {
1138 args[1] = ir->op1; /* int32_t k */
1139 if (ir->op2 == IRTOSTR_INT)
1140 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1141 else
1142 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1143 }
1144 asm_setupresult(as, ir, ci); /* GCstr * */
1145 asm_gencall(as, ci, args);
1146 if (ir->op2 == IRTOSTR_NUM)
1147 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1148}
1149
1150#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1151static void asm_conv64(ASMState *as, IRIns *ir)
1152{
1153 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1154 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1155 IRCallID id;
1156 IRRef args[2];
1157 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1158 args[LJ_BE] = (ir-1)->op1;
1159 args[LJ_LE] = ir->op1;
1160 if (st == IRT_NUM || st == IRT_FLOAT) {
1161 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1162 ir--;
1163 } else {
1164 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1165 }
1166 {
1167#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1168 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1169 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1170#else
1171 const CCallInfo *ci = &lj_ir_callinfo[id];
1172#endif
1173 asm_setupresult(as, ir, ci);
1174 asm_gencall(as, ci, args);
1175 }
1176}
1177#endif
1178
1179/* -- Memory references --------------------------------------------------- */
1180
1181static void asm_newref(ASMState *as, IRIns *ir)
1182{
1183 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184 IRRef args[3];
1185 if (ir->r == RID_SINK)
1186 return;
1187 args[0] = ASMREF_L; /* lua_State *L */
1188 args[1] = ir->op1; /* GCtab *t */
1189 args[2] = ASMREF_TMP1; /* cTValue *key */
1190 asm_setupresult(as, ir, ci); /* TValue * */
1191 asm_gencall(as, ci, args);
1192 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1193}
1194
1195static void asm_lref(ASMState *as, IRIns *ir)
1196{
1197 Reg r = ra_dest(as, ir, RSET_GPR);
1198#if LJ_TARGET_X86ORX64
1199 ra_left(as, r, ASMREF_L);
1200#else
1201 ra_leftov(as, r, ASMREF_L);
1202#endif
1203}
1204
1205/* -- Calls --------------------------------------------------------------- */
1206
1207/* Collect arguments from CALL* and CARG instructions. */
1208static void asm_collectargs(ASMState *as, IRIns *ir,
1209 const CCallInfo *ci, IRRef *args)
1210{
1211 uint32_t n = CCI_XNARGS(ci);
1212 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1213 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1214 while (n-- > 1) {
1215 ir = IR(ir->op1);
1216 lua_assert(ir->o == IR_CARG);
1217 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1218 }
1219 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1220 lua_assert(IR(ir->op1)->o != IR_CARG);
1221}
1222
1223/* Reconstruct CCallInfo flags for CALLX*. */
1224static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1225{
1226 uint32_t nargs = 0;
1227 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1228 IRIns *ira = IR(ir->op1);
1229 nargs++;
1230 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1231 }
1232#if LJ_HASFFI
1233 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1234 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1235 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1236 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1237#if LJ_TARGET_X86
1238 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1239#endif
1240 }
1241#endif
1242 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1243}
1244
1245static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1246{
1247 const CCallInfo *ci = &lj_ir_callinfo[id];
1248 IRRef args[2];
1249 args[0] = ir->op1;
1250 args[1] = ir->op2;
1251 asm_setupresult(as, ir, ci);
1252 asm_gencall(as, ci, args);
1253}
1254
1255static void asm_call(ASMState *as, IRIns *ir)
1256{
1257 IRRef args[CCI_NARGS_MAX];
1258 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1259 asm_collectargs(as, ir, ci, args);
1260 asm_setupresult(as, ir, ci);
1261 asm_gencall(as, ci, args);
1262}
1263
1264#if !LJ_SOFTFP
1265static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1266{
1267 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1268 IRRef args[2];
1269 args[0] = lref;
1270 args[1] = rref;
1271 asm_setupresult(as, ir, ci);
1272 asm_gencall(as, ci, args);
1273}
1274
1275static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1276{
1277 IRIns *irp = IR(ir->op1);
1278 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1279 IRIns *irpp = IR(irp->op1);
1280 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1281 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1282 asm_fppow(as, ir, irpp->op1, irp->op2);
1283 return 1;
1284 }
1285 }
1286 return 0;
1287}
1288#endif
1289
1069/* -- PHI and loop handling ----------------------------------------------- */ 1290/* -- PHI and loop handling ----------------------------------------------- */
1070 1291
1071/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1292/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1339,6 +1560,136 @@ static void asm_loop(ASMState *as)
1339#error "Missing assembler for target CPU" 1560#error "Missing assembler for target CPU"
1340#endif 1561#endif
1341 1562
1563/* -- Instruction dispatch ------------------------------------------------ */
1564
1565/* Assemble a single instruction. */
1566static void asm_ir(ASMState *as, IRIns *ir)
1567{
1568 switch ((IROp)ir->o) {
1569 /* Miscellaneous ops. */
1570 case IR_LOOP: asm_loop(as); break;
1571 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1572 case IR_USE:
1573 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1574 case IR_PHI: asm_phi(as, ir); break;
1575 case IR_HIOP: asm_hiop(as, ir); break;
1576 case IR_GCSTEP: asm_gcstep(as, ir); break;
1577 case IR_PROF: asm_prof(as, ir); break;
1578
1579 /* Guarded assertions. */
1580 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1581 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1582 case IR_ABC:
1583 asm_comp(as, ir);
1584 break;
1585 case IR_EQ: case IR_NE:
1586 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1587 as->curins--;
1588 asm_href(as, ir-1, (IROp)ir->o);
1589 } else {
1590 asm_equal(as, ir);
1591 }
1592 break;
1593
1594 case IR_RETF: asm_retf(as, ir); break;
1595
1596 /* Bit ops. */
1597 case IR_BNOT: asm_bnot(as, ir); break;
1598 case IR_BSWAP: asm_bswap(as, ir); break;
1599 case IR_BAND: asm_band(as, ir); break;
1600 case IR_BOR: asm_bor(as, ir); break;
1601 case IR_BXOR: asm_bxor(as, ir); break;
1602 case IR_BSHL: asm_bshl(as, ir); break;
1603 case IR_BSHR: asm_bshr(as, ir); break;
1604 case IR_BSAR: asm_bsar(as, ir); break;
1605 case IR_BROL: asm_brol(as, ir); break;
1606 case IR_BROR: asm_bror(as, ir); break;
1607
1608 /* Arithmetic ops. */
1609 case IR_ADD: asm_add(as, ir); break;
1610 case IR_SUB: asm_sub(as, ir); break;
1611 case IR_MUL: asm_mul(as, ir); break;
1612 case IR_MOD: asm_mod(as, ir); break;
1613 case IR_NEG: asm_neg(as, ir); break;
1614#if LJ_SOFTFP
1615 case IR_DIV: case IR_POW: case IR_ABS:
1616 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1617 lua_assert(0); /* Unused for LJ_SOFTFP. */
1618 break;
1619#else
1620 case IR_DIV: asm_div(as, ir); break;
1621 case IR_POW: asm_pow(as, ir); break;
1622 case IR_ABS: asm_abs(as, ir); break;
1623 case IR_ATAN2: asm_atan2(as, ir); break;
1624 case IR_LDEXP: asm_ldexp(as, ir); break;
1625 case IR_FPMATH: asm_fpmath(as, ir); break;
1626 case IR_TOBIT: asm_tobit(as, ir); break;
1627#endif
1628 case IR_MIN: asm_min(as, ir); break;
1629 case IR_MAX: asm_max(as, ir); break;
1630
1631 /* Overflow-checking arithmetic ops. */
1632 case IR_ADDOV: asm_addov(as, ir); break;
1633 case IR_SUBOV: asm_subov(as, ir); break;
1634 case IR_MULOV: asm_mulov(as, ir); break;
1635
1636 /* Memory references. */
1637 case IR_AREF: asm_aref(as, ir); break;
1638 case IR_HREF: asm_href(as, ir, 0); break;
1639 case IR_HREFK: asm_hrefk(as, ir); break;
1640 case IR_NEWREF: asm_newref(as, ir); break;
1641 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1642 case IR_FREF: asm_fref(as, ir); break;
1643 case IR_STRREF: asm_strref(as, ir); break;
1644 case IR_LREF: asm_lref(as, ir); break;
1645
1646 /* Loads and stores. */
1647 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1648 asm_ahuvload(as, ir);
1649 break;
1650 case IR_FLOAD: asm_fload(as, ir); break;
1651 case IR_XLOAD: asm_xload(as, ir); break;
1652 case IR_SLOAD: asm_sload(as, ir); break;
1653
1654 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1655 case IR_FSTORE: asm_fstore(as, ir); break;
1656 case IR_XSTORE: asm_xstore(as, ir); break;
1657
1658 /* Allocations. */
1659 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1660 case IR_TNEW: asm_tnew(as, ir); break;
1661 case IR_TDUP: asm_tdup(as, ir); break;
1662 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1663
1664 /* Buffer operations. */
1665 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1666 case IR_BUFPUT: asm_bufput(as, ir); break;
1667 case IR_BUFSTR: asm_bufstr(as, ir); break;
1668
1669 /* Write barriers. */
1670 case IR_TBAR: asm_tbar(as, ir); break;
1671 case IR_OBAR: asm_obar(as, ir); break;
1672
1673 /* Type conversions. */
1674 case IR_CONV: asm_conv(as, ir); break;
1675 case IR_TOSTR: asm_tostr(as, ir); break;
1676 case IR_STRTO: asm_strto(as, ir); break;
1677
1678 /* Calls. */
1679 case IR_CALLA:
1680 as->gcsteps++;
1681 /* fallthrough */
1682 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1683 case IR_CALLXS: asm_callx(as, ir); break;
1684 case IR_CARG: break;
1685
1686 default:
1687 setintV(&as->J->errinfo, ir->o);
1688 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1689 break;
1690 }
1691}
1692
1342/* -- Head of trace ------------------------------------------------------- */ 1693/* -- Head of trace ------------------------------------------------------- */
1343 1694
1344/* Head of a root trace. */ 1695/* Head of a root trace. */
@@ -1569,7 +1920,7 @@ static void asm_tail_link(ASMState *as)
1569 mres = (int32_t)(snap->nslots - baseslot); 1920 mres = (int32_t)(snap->nslots - baseslot);
1570 switch (bc_op(*pc)) { 1921 switch (bc_op(*pc)) {
1571 case BC_CALLM: case BC_CALLMT: 1922 case BC_CALLM: case BC_CALLMT:
1572 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 1923 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1573 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1924 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1574 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1925 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1575 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1926 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1677,7 +2028,7 @@ static void asm_setup_regsp(ASMState *as)
1677 as->modset |= RSET_SCRATCH; 2028 as->modset |= RSET_SCRATCH;
1678 continue; 2029 continue;
1679 } 2030 }
1680 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2031 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1681 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2032 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1682 ir->prev = asm_setup_call_slots(as, ir, ci); 2033 ir->prev = asm_setup_call_slots(as, ir, ci);
1683 if (inloop) 2034 if (inloop)
@@ -1722,10 +2073,20 @@ static void asm_setup_regsp(ASMState *as)
1722 /* fallthrough */ 2073 /* fallthrough */
1723#endif 2074#endif
1724 /* C calls evict all scratch regs and return results in RID_RET. */ 2075 /* C calls evict all scratch regs and return results in RID_RET. */
1725 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2076 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1726 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2077 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1727 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2078 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2079#if LJ_TARGET_X86 && LJ_HASFFI
2080 if (0) {
2081 case IR_CNEW:
2082 if (ir->op2 != REF_NIL && as->evenspill < 4)
2083 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2084 }
2085#else
2086 case IR_CNEW:
2087#endif
2088 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2089 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2090 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2091 if (inloop)
1731 as->modset = RSET_SCRATCH; 2092 as->modset = RSET_SCRATCH;
@@ -1734,21 +2095,26 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2095 if (inloop)
1735 as->modset = RSET_SCRATCH; 2096 as->modset = RSET_SCRATCH;
1736 break; 2097 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2098#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2099 case IR_ATAN2:
2100#if LJ_TARGET_X86
2101 if (as->evenspill < 4) /* Leave room to call atan2(). */
2102 as->evenspill = 4;
2103#endif
2104#if !LJ_TARGET_X86ORX64
2105 case IR_LDEXP:
2106#endif
1739#endif 2107#endif
1740 case IR_POW: 2108 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2109 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2110 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2111 as->modset |= RSET_SCRATCH;
2112#if LJ_TARGET_X86
2113 break;
1746#else 2114#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2115 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2116 continue;
2117#endif
1752 } 2118 }
1753 /* fallthrough for integer POW */ 2119 /* fallthrough for integer POW */
1754 case IR_DIV: case IR_MOD: 2120 case IR_DIV: case IR_MOD:
@@ -1761,31 +2127,33 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2127 break;
1762 case IR_FPMATH: 2128 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2129#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2130 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2131 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2132 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2133 if (inloop)
2134 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2135 continue;
2136 }
2137 break;
2138 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2139 if (as->evenspill < 4) /* Leave room to call pow(). */
1768 as->evenspill = 4; 2140 as->evenspill = 4;
1769#endif
1770 if (inloop)
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2141 }
2142#endif
2143 if (inloop)
2144 as->modset |= RSET_SCRATCH;
2145#if LJ_TARGET_X86
1779 break; 2146 break;
1780#else 2147#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2148 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2149 continue;
1785#endif 2150#endif
1786#if LJ_TARGET_X86ORX64 2151#if LJ_TARGET_X86ORX64
1787 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2152 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1788 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2153 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2154 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2155 break;
2156 case IR_BROL: case IR_BROR:
1789 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2157 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1790 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2158 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1791 if (inloop) 2159 if (inloop)