aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c598
1 files changed, 347 insertions, 251 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index a4d0c606..f26a40a5 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_str.h" 14#include "lj_str.h"
15#include "lj_tab.h" 15#include "lj_tab.h"
16#include "lj_frame.h"
16#include "lj_ir.h" 17#include "lj_ir.h"
17#include "lj_jit.h" 18#include "lj_jit.h"
18#include "lj_iropt.h" 19#include "lj_iropt.h"
@@ -81,6 +82,10 @@ typedef struct ASMState {
81 82
82#define IR(ref) (&as->ir[(ref)]) 83#define IR(ref) (&as->ir[(ref)])
83 84
85#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
86#define ASMREF_TMP2 REF_FALSE /* Temp. register. */
87#define ASMREF_L REF_NIL /* Stores register for L. */
88
84/* Check for variant to invariant references. */ 89/* Check for variant to invariant references. */
85#define iscrossref(as, ref) ((ref) < as->sectref) 90#define iscrossref(as, ref) ((ref) < as->sectref)
86 91
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
115 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ 120 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
116 if (rex != 0x40) *--(p) = rex; } 121 if (rex != 0x40) *--(p) = rex; }
117#define FORCE_REX 0x200 122#define FORCE_REX 0x200
123#define REX_64 (FORCE_REX|0x080000)
118#else 124#else
119#define REXRB(p, rr, rb) ((void)0) 125#define REXRB(p, rr, rb) ((void)0)
120#define FORCE_REX 0 126#define FORCE_REX 0
127#define REX_64 0
121#endif 128#endif
122 129
123#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 130#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
144 { 151 {
145 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); 152 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
146 if (rex != 0x40) { 153 if (rex != 0x40) {
154 rex |= (rr >> 16);
147 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } 155 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
148 *--p = (MCode)rex; 156 *--p = (MCode)rex;
149 } 157 }
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target)
451 459
452#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) 460#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
453 461
454/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
455#define emit_setargr(as, narg, r) \
456 emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
457#define emit_setargi(as, narg, imm) \
458 emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
459#define emit_setargp(as, narg, ptr) \
460 emit_setargi(as, (narg), ptr2addr((ptr)))
461
462/* -- Register allocator debugging ---------------------------------------- */ 462/* -- Register allocator debugging ---------------------------------------- */
463 463
464/* #define LUAJIT_DEBUG_RA */ 464/* #define LUAJIT_DEBUG_RA */
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as)
578 memset(as->phireg, 0, sizeof(as->phireg)); 578 memset(as->phireg, 0, sizeof(as->phireg));
579 memset(as->cost, 0, sizeof(as->cost)); 579 memset(as->cost, 0, sizeof(as->cost));
580 as->cost[RID_ESP] = REGCOST(~0u, 0u); 580 as->cost[RID_ESP] = REGCOST(~0u, 0u);
581
582 /* Start slots for spill slot allocation. */
583 as->evenspill = (SPS_FIRST+1)&~1;
584 as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
585} 581}
586 582
587/* Rematerialize constants. */ 583/* Rematerialize constants. */
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
598 } else if (ir->o == IR_BASE) { 594 } else if (ir->o == IR_BASE) {
599 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 595 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
600 emit_getgl(as, r, jit_base); 596 emit_getgl(as, r, jit_base);
597 } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
598 lua_assert(irt_isnil(ir->t));
599 emit_getgl(as, r, jit_L);
601 } else { 600 } else {
602 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 601 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
603 ir->o == IR_KPTR || ir->o == IR_KNULL); 602 ir->o == IR_KPTR || ir->o == IR_KNULL);
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
629 return sps_scale(slot); 628 return sps_scale(slot);
630} 629}
631 630
631/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
632static Reg ra_releasetmp(ASMState *as, IRRef ref)
633{
634 IRIns *ir = IR(ref);
635 Reg r = ir->r;
636 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
637 ra_free(as, r);
638 ra_modified(as, r);
639 ir->r = RID_INIT;
640 return r;
641}
642
632/* Restore a register (marked as free). Rematerialize or force a spill. */ 643/* Restore a register (marked as free). Rematerialize or force a spill. */
633static Reg ra_restore(ASMState *as, IRRef ref) 644static Reg ra_restore(ASMState *as, IRRef ref)
634{ 645{
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc)
1008 1019
1009/* Arch-specific field offsets. */ 1020/* Arch-specific field offsets. */
1010static const uint8_t field_ofs[IRFL__MAX+1] = { 1021static const uint8_t field_ofs[IRFL__MAX+1] = {
1011#define FLOFS(name, type, field) (uint8_t)offsetof(type, field), 1022#define FLOFS(name, ofs) (uint8_t)(ofs),
1012IRFLDEF(FLOFS) 1023IRFLDEF(FLOFS)
1013#undef FLOFS 1024#undef FLOFS
1014 0 1025 0
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
1129{ 1140{
1130 IRIns *irr; 1141 IRIns *irr;
1131 lua_assert(ir->o == IR_STRREF); 1142 lua_assert(ir->o == IR_STRREF);
1132 as->mrm.idx = as->mrm.base = RID_NONE; 1143 as->mrm.base = as->mrm.idx = RID_NONE;
1133 as->mrm.scale = XM_SCALE1; 1144 as->mrm.scale = XM_SCALE1;
1134 as->mrm.ofs = sizeof(GCstr); 1145 as->mrm.ofs = sizeof(GCstr);
1135 if (irref_isk(ir->op1)) { 1146 if (irref_isk(ir->op1)) {
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
1158 } 1169 }
1159} 1170}
1160 1171
1172static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow)
1173{
1174 if (ir->o == IR_KPTR) {
1175 as->mrm.ofs = ir->i;
1176 as->mrm.base = as->mrm.idx = RID_NONE;
1177 } else {
1178 lua_assert(ir->o == IR_STRREF);
1179 asm_fusestrref(as, ir, allow);
1180 }
1181}
1182
1161/* Fuse load into memory operand. */ 1183/* Fuse load into memory operand. */
1162static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 1184static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1163{ 1185{
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1172 return RID_MRM; 1194 return RID_MRM;
1173 } 1195 }
1174 if (ir->o == IR_KNUM) { 1196 if (ir->o == IR_KNUM) {
1197 RegSet avail = as->freeset & ~as->modset & RSET_FPR;
1175 lua_assert(allow != RSET_EMPTY); 1198 lua_assert(allow != RSET_EMPTY);
1176 if (!(as->freeset & ~as->modset & RSET_FPR)) { 1199 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
1177 as->mrm.ofs = ptr2addr(ir_knum(ir)); 1200 as->mrm.ofs = ptr2addr(ir_knum(ir));
1178 as->mrm.base = as->mrm.idx = RID_NONE; 1201 as->mrm.base = as->mrm.idx = RID_NONE;
1179 return RID_MRM; 1202 return RID_MRM;
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1188 return RID_MRM; 1211 return RID_MRM;
1189 } 1212 }
1190 } else if (ir->o == IR_FLOAD) { 1213 } else if (ir->o == IR_FLOAD) {
1191 /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ 1214 /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
1192 if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { 1215 if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
1216 noconflict(as, ref, IR_FSTORE)) {
1193 asm_fusefref(as, ir, xallow); 1217 asm_fusefref(as, ir, xallow);
1194 return RID_MRM; 1218 return RID_MRM;
1195 } 1219 }
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1199 return RID_MRM; 1223 return RID_MRM;
1200 } 1224 }
1201 } else if (ir->o == IR_XLOAD) { 1225 } else if (ir->o == IR_XLOAD) {
1202 /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). 1226 /* Generic fusion is only ok for 32 bit operand (but see asm_comp).
1203 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). 1227 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
1204 */ 1228 */
1205 if (irt_isint(ir->t)) { 1229 if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
1206 asm_fusestrref(as, IR(ir->op1), xallow); 1230 asm_fusexref(as, IR(ir->op1), xallow);
1207 return RID_MRM; 1231 return RID_MRM;
1208 } 1232 }
1209 } 1233 }
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1214 return ra_allocref(as, ref, allow); 1238 return ra_allocref(as, ref, allow);
1215} 1239}
1216 1240
1241/* -- Calls --------------------------------------------------------------- */
1242
1243/* Generate a call to a C function. */
1244static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1245{
1246 RegSet allow = RSET_ALL;
1247 uint32_t n, nargs = CCI_NARGS(ci);
1248 int32_t ofs = 0;
1249 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
1250 emit_call(as, ci->func);
1251 for (n = 0; n < nargs; n++) { /* Setup args. */
1252#if LJ_64
1253#error "NYI: 64 bit mode call argument setup"
1254#endif
1255 IRIns *ir = IR(args[n]);
1256 if (irt_isnum(ir->t)) {
1257 if ((ofs & 4) && irref_isk(args[n])) {
1258 /* Split stores for unaligned FP consts. */
1259 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
1260 emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
1261 } else {
1262 Reg r;
1263 if ((allow & RSET_FPR) == RSET_EMPTY)
1264 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1265 r = ra_alloc1(as, args[n], allow & RSET_FPR);
1266 allow &= ~RID2RSET(r);
1267 emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
1268 }
1269 ofs += 8;
1270 } else {
1271 if ((ci->flags & CCI_FASTCALL) && n < 2) {
1272 Reg r = n == 0 ? RID_ECX : RID_EDX;
1273 if (args[n] < ASMREF_TMP1) {
1274 emit_loadi(as, r, ir->i);
1275 } else {
1276 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
1277 allow &= ~RID2RSET(r);
1278 if (ra_hasreg(ir->r))
1279 emit_movrr(as, r, ir->r);
1280 else
1281 ra_allocref(as, args[n], RID2RSET(r));
1282 }
1283 } else {
1284 if (args[n] < ASMREF_TMP1) {
1285 emit_movmroi(as, RID_ESP, ofs, ir->i);
1286 } else {
1287 Reg r;
1288 if ((allow & RSET_GPR) == RSET_EMPTY)
1289 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1290 r = ra_alloc1(as, args[n], allow & RSET_GPR);
1291 allow &= ~RID2RSET(r);
1292 emit_movtomro(as, r, RID_ESP, ofs);
1293 }
1294 ofs += 4;
1295 }
1296 }
1297 }
1298}
1299
1300/* Setup result reg/sp for call. Evict scratch regs. */
1301static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
1302{
1303 RegSet drop = RSET_SCRATCH;
1304 if ((ci->flags & CCI_NOFPRCLOBBER))
1305 drop &= ~RSET_FPR;
1306 if (ra_hasreg(ir->r))
1307 rset_clear(drop, ir->r); /* Dest reg handled below. */
1308 ra_evictset(as, drop); /* Evictions must be performed first. */
1309 if (ra_used(ir)) {
1310 if (irt_isnum(ir->t)) {
1311 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
1312#if LJ_64
1313 if ((ci->flags & CCI_CASTU64)) {
1314 Reg dest = ir->r;
1315 if (ra_hasreg(dest)) {
1316 ra_free(as, dest);
1317 ra_modified(as, dest);
1318 emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
1319 } else {
1320 emit_movrmro(as, RID_RET, RID_ESP, ofs);
1321 }
1322 } else {
1323 ra_destreg(as, ir, RID_FPRET);
1324 }
1325#else
1326 /* Number result is in x87 st0 for x86 calling convention. */
1327 Reg dest = ir->r;
1328 if (ra_hasreg(dest)) {
1329 ra_free(as, dest);
1330 ra_modified(as, dest);
1331 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
1332 }
1333 if ((ci->flags & CCI_CASTU64)) {
1334 emit_movtomro(as, RID_RET, RID_ESP, ofs);
1335 emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
1336 } else {
1337 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1338 }
1339#endif
1340 } else {
1341 lua_assert(!irt_ispri(ir->t));
1342 ra_destreg(as, ir, RID_RET);
1343 }
1344 }
1345}
1346
1347/* Collect arguments from CALL* and ARG instructions. */
1348static void asm_collectargs(ASMState *as, IRIns *ir,
1349 const CCallInfo *ci, IRRef *args)
1350{
1351 uint32_t n = CCI_NARGS(ci);
1352 lua_assert(n <= CCI_NARGS_MAX);
1353 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1354 while (n-- > 1) {
1355 ir = IR(ir->op1);
1356 lua_assert(ir->o == IR_CARG);
1357 args[n] = ir->op2;
1358 }
1359 args[0] = ir->op1;
1360 lua_assert(IR(ir->op1)->o != IR_CARG);
1361}
1362
1363static void asm_call(ASMState *as, IRIns *ir)
1364{
1365 IRRef args[CCI_NARGS_MAX];
1366 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1367 asm_collectargs(as, ir, ci, args);
1368 asm_setupresult(as, ir, ci);
1369 asm_gencall(as, ci, args);
1370}
1371
1217/* -- Type conversions ---------------------------------------------------- */ 1372/* -- Type conversions ---------------------------------------------------- */
1218 1373
1219static void asm_tonum(ASMState *as, IRIns *ir) 1374static void asm_tonum(ASMState *as, IRIns *ir)
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir)
1260 1415
1261static void asm_strto(ASMState *as, IRIns *ir) 1416static void asm_strto(ASMState *as, IRIns *ir)
1262{ 1417{
1263 Reg str;
1264 int32_t ofs;
1265 RegSet drop = RSET_SCRATCH;
1266 /* Force a spill slot for the destination register (if any). */ 1418 /* Force a spill slot for the destination register (if any). */
1419 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
1420 IRRef args[2];
1421 RegSet drop = RSET_SCRATCH;
1267 if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) 1422 if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
1268 rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ 1423 rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
1269 ra_evictset(as, drop); 1424 ra_evictset(as, drop);
1270 asm_guardcc(as, CC_E); 1425 asm_guardcc(as, CC_E);
1271 emit_rr(as, XO_TEST, RID_RET, RID_RET); 1426 emit_rr(as, XO_TEST, RID_RET, RID_RET);
1272 /* int lj_str_numconv(const char *s, TValue *n) */ 1427 args[0] = ir->op1;
1273 emit_call(as, lj_str_numconv); 1428 args[1] = ASMREF_TMP1;
1274 ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ 1429 asm_gencall(as, ci, args);
1275 if (ofs == 0) { 1430 /* Store the result to the spill slot or slots SPS_TEMP1/2. */
1276 emit_setargr(as, 2, RID_ESP); 1431 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
1277 } else { 1432 RID_ESP, sps_scale(ir->s));
1278 emit_setargr(as, 2, RID_RET);
1279 emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
1280 }
1281 emit_setargr(as, 1, RID_RET);
1282 str = ra_alloc1(as, ir->op1, RSET_GPR);
1283 emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
1284} 1433}
1285 1434
1286static void asm_tostr(ASMState *as, IRIns *ir) 1435static void asm_tostr(ASMState *as, IRIns *ir)
1287{ 1436{
1288 IRIns *irl = IR(ir->op1); 1437 IRIns *irl = IR(ir->op1);
1289 ra_destreg(as, ir, RID_RET); 1438 IRRef args[2];
1290 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1439 args[0] = ASMREF_L;
1291 as->gcsteps++; 1440 as->gcsteps++;
1292 if (irt_isnum(irl->t)) { 1441 if (irt_isnum(irl->t)) {
1293 /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ 1442 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
1294 emit_call(as, lj_str_fromnum); 1443 args[1] = ASMREF_TMP1;
1295 emit_setargr(as, 1, RID_RET); 1444 asm_setupresult(as, ir, ci);
1296 emit_getgl(as, RID_RET, jit_L); 1445 asm_gencall(as, ci, args);
1297 emit_setargr(as, 2, RID_RET); 1446 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
1298 emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); 1447 RID_ESP, ra_spill(as, irl));
1299 } else { 1448 } else {
1300 /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ 1449 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
1301 emit_call(as, lj_str_fromint); 1450 args[1] = ir->op1;
1302 emit_setargr(as, 1, RID_RET); 1451 asm_setupresult(as, ir, ci);
1303 emit_getgl(as, RID_RET, jit_L); 1452 asm_gencall(as, ci, args);
1304 emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
1305 } 1453 }
1306} 1454}
1307 1455
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir)
1330 lua_assert(!irt_isnil(ir->t)); 1478 lua_assert(!irt_isnil(ir->t));
1331 return irt_type(ir->t)-IRT_FALSE; 1479 return irt_type(ir->t)-IRT_FALSE;
1332 } else { 1480 } else {
1333 lua_assert(irt_isaddr(ir->t)); 1481 lua_assert(irt_isgcv(ir->t));
1334 lo = u32ptr(ir_kgc(ir)); 1482 lo = u32ptr(ir_kgc(ir));
1335 hi = lo - 0x04c11db7; 1483 hi = lo - 0x04c11db7;
1336 } 1484 }
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1517 1665
1518static void asm_newref(ASMState *as, IRIns *ir) 1666static void asm_newref(ASMState *as, IRIns *ir)
1519{ 1667{
1520 IRRef keyref = ir->op2; 1668 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1521 IRIns *irkey = IR(keyref); 1669 IRRef args[3];
1522 RegSet allow = RSET_GPR; 1670 IRIns *irkey;
1523 Reg tab, tmp; 1671 Reg tmp;
1524 ra_destreg(as, ir, RID_RET); 1672 args[0] = ASMREF_L;
1525 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1673 args[1] = ir->op1;
1526 tab = ra_alloc1(as, ir->op1, allow); 1674 args[2] = ASMREF_TMP1;
1527 tmp = ra_scratch(as, rset_clear(allow, tab)); 1675 asm_setupresult(as, ir, ci);
1528 /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ 1676 asm_gencall(as, ci, args);
1529 emit_call(as, lj_tab_newkey); 1677 tmp = ra_releasetmp(as, ASMREF_TMP1);
1530 emit_setargr(as, 1, tmp); 1678 irkey = IR(ir->op2);
1531 emit_setargr(as, 2, tab);
1532 emit_getgl(as, tmp, jit_L);
1533 if (irt_isnum(irkey->t)) { 1679 if (irt_isnum(irkey->t)) {
1534 /* For numbers use the constant itself or a spill slot as a TValue. */ 1680 /* For numbers use the constant itself or a spill slot as a TValue. */
1535 if (irref_isk(keyref)) { 1681 if (irref_isk(ir->op2))
1536 emit_setargp(as, 3, ir_knum(irkey)); 1682 emit_loada(as, tmp, ir_knum(irkey));
1537 } else { 1683 else
1538 emit_setargr(as, 3, tmp);
1539 emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); 1684 emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
1540 }
1541 } else { 1685 } else {
1542 /* Otherwise use g->tmptv to hold the TValue. */ 1686 /* Otherwise use g->tmptv to hold the TValue. */
1543 lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); 1687 if (!irref_isk(ir->op2)) {
1544 emit_setargr(as, 3, tmp); 1688 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1545 if (!irref_isk(keyref)) {
1546 Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
1547 emit_movtomro(as, src, tmp, 0); 1689 emit_movtomro(as, src, tmp, 0);
1548 } else if (!irt_ispri(irkey->t)) { 1690 } else if (!irt_ispri(irkey->t)) {
1549 emit_movmroi(as, tmp, 0, irkey->i); 1691 emit_movmroi(as, tmp, 0, irkey->i);
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir)
1600 1742
1601/* -- Loads and stores ---------------------------------------------------- */ 1743/* -- Loads and stores ---------------------------------------------------- */
1602 1744
1603static void asm_fload(ASMState *as, IRIns *ir) 1745static void asm_fxload(ASMState *as, IRIns *ir)
1604{ 1746{
1605 Reg dest = ra_dest(as, ir, RSET_GPR); 1747 Reg dest = ra_dest(as, ir, RSET_GPR);
1606 x86Op xo; 1748 x86Op xo;
1607 asm_fusefref(as, ir, RSET_GPR); 1749 if (ir->o == IR_FLOAD)
1750 asm_fusefref(as, ir, RSET_GPR);
1751 else
1752 asm_fusexref(as, IR(ir->op1), RSET_GPR);
1753 /* ir->op2 is ignored -- unaligned loads are ok on x86. */
1608 switch (irt_type(ir->t)) { 1754 switch (irt_type(ir->t)) {
1609 case IRT_I8: xo = XO_MOVSXb; break; 1755 case IRT_I8: xo = XO_MOVSXb; break;
1610 case IRT_U8: xo = XO_MOVZXb; break; 1756 case IRT_U8: xo = XO_MOVZXb; break;
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
1731 } 1877 }
1732} 1878}
1733 1879
1734static void asm_xload(ASMState *as, IRIns *ir) 1880/* -- Allocations --------------------------------------------------------- */
1735{
1736 Reg dest = ra_dest(as, ir, RSET_GPR);
1737 x86Op xo;
1738 asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
1739 /* ir->op2 is ignored -- unaligned loads are ok on x86. */
1740 switch (irt_type(ir->t)) {
1741 case IRT_I8: xo = XO_MOVSXb; break;
1742 case IRT_U8: xo = XO_MOVZXb; break;
1743 case IRT_I16: xo = XO_MOVSXw; break;
1744 case IRT_U16: xo = XO_MOVZXw; break;
1745 default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
1746 }
1747 emit_mrm(as, xo, dest, RID_MRM);
1748}
1749
1750/* -- String ops ---------------------------------------------------------- */
1751 1881
1752static void asm_snew(ASMState *as, IRIns *ir) 1882static void asm_snew(ASMState *as, IRIns *ir)
1753{ 1883{
1754 RegSet allow = RSET_GPR; 1884 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
1755 Reg left, right; 1885 IRRef args[3];
1756 IRIns *irl; 1886 args[0] = ASMREF_L;
1757 ra_destreg(as, ir, RID_RET); 1887 args[1] = ir->op1;
1758 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1888 args[2] = ir->op2;
1759 irl = IR(ir->op1);
1760 left = irl->r;
1761 right = IR(ir->op2)->r;
1762 if (ra_noreg(left)) {
1763 lua_assert(irl->o == IR_STRREF);
1764 /* Get register only for non-const STRREF. */
1765 if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
1766 if (ra_hasreg(right)) rset_clear(allow, right);
1767 left = ra_allocref(as, ir->op1, allow);
1768 }
1769 }
1770 if (ra_noreg(right) && !irref_isk(ir->op2)) {
1771 if (ra_hasreg(left)) rset_clear(allow, left);
1772 right = ra_allocref(as, ir->op2, allow);
1773 }
1774 /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
1775 emit_call(as, lj_str_new);
1776 emit_setargr(as, 1, RID_RET);
1777 emit_getgl(as, RID_RET, jit_L);
1778 if (ra_noreg(left)) /* Use immediate for const STRREF. */
1779 emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
1780 (int32_t)sizeof(GCstr));
1781 else
1782 emit_setargr(as, 2, left);
1783 if (ra_noreg(right))
1784 emit_setargi(as, 3, IR(ir->op2)->i);
1785 else
1786 emit_setargr(as, 3, right);
1787 as->gcsteps++; 1889 as->gcsteps++;
1890 asm_setupresult(as, ir, ci);
1891 asm_gencall(as, ci, args);
1788} 1892}
1789 1893
1790/* -- Table ops ----------------------------------------------------------- */
1791
1792static void asm_tnew(ASMState *as, IRIns *ir) 1894static void asm_tnew(ASMState *as, IRIns *ir)
1793{ 1895{
1794 ra_destreg(as, ir, RID_RET); 1896 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1795 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1897 IRRef args[2];
1796 /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ 1898 args[0] = ASMREF_L;
1797 emit_call(as, lj_tab_new); 1899 args[1] = ASMREF_TMP1;
1798 emit_setargr(as, 1, RID_RET);
1799 emit_setargi(as, 2, ir->op1);
1800 emit_setargi(as, 3, ir->op2);
1801 emit_getgl(as, RID_RET, jit_L);
1802 as->gcsteps++; 1900 as->gcsteps++;
1901 asm_setupresult(as, ir, ci);
1902 asm_gencall(as, ci, args);
1903 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24));
1803} 1904}
1804 1905
1805static void asm_tdup(ASMState *as, IRIns *ir) 1906static void asm_tdup(ASMState *as, IRIns *ir)
1806{ 1907{
1807 ra_destreg(as, ir, RID_RET); 1908 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1808 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1909 IRRef args[2];
1809 /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ 1910 args[0] = ASMREF_L;
1810 emit_call(as, lj_tab_dup); 1911 args[1] = ir->op1;
1811 emit_setargr(as, 1, RID_RET);
1812 emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
1813 emit_getgl(as, RID_RET, jit_L);
1814 as->gcsteps++; 1912 as->gcsteps++;
1913 asm_setupresult(as, ir, ci);
1914 asm_gencall(as, ci, args);
1815} 1915}
1816 1916
1817static void asm_tlen(ASMState *as, IRIns *ir) 1917/* -- Write barriers ------------------------------------------------------ */
1818{
1819 ra_destreg(as, ir, RID_RET);
1820 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1821 emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
1822 emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
1823}
1824 1918
1825static void asm_tbar(ASMState *as, IRIns *ir) 1919static void asm_tbar(ASMState *as, IRIns *ir)
1826{ 1920{
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1839 1933
1840static void asm_obar(ASMState *as, IRIns *ir) 1934static void asm_obar(ASMState *as, IRIns *ir)
1841{ 1935{
1842 RegSet allow = RSET_GPR; 1936 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1843 Reg obj, val; 1937 IRRef args[2];
1844 GCobj *valp;
1845 MCLabel l_end; 1938 MCLabel l_end;
1846 int32_t ofs; 1939 Reg obj;
1847 ra_evictset(as, RSET_SCRATCH);
1848 if (irref_isk(ir->op2)) {
1849 valp = ir_kgc(IR(ir->op2));
1850 val = RID_NONE;
1851 } else {
1852 valp = NULL;
1853 val = ra_alloc1(as, ir->op2, allow);
1854 rset_clear(allow, val);
1855 }
1856 obj = ra_alloc1(as, ir->op1, allow);
1857 l_end = emit_label(as);
1858 /* No need for other object barriers (yet). */ 1940 /* No need for other object barriers (yet). */
1859 lua_assert(IR(ir->op1)->o == IR_UREFC); 1941 lua_assert(IR(ir->op1)->o == IR_UREFC);
1860 ofs = -(int32_t)offsetof(GCupval, tv); 1942 l_end = emit_label(as);
1861 /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ 1943 args[0] = ASMREF_TMP1;
1862 emit_call(as, lj_gc_barrieruv); 1944 args[1] = ir->op1;
1863 if (ofs == 0) { 1945 asm_gencall(as, ci, args);
1864 emit_setargr(as, 2, obj); 1946 emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
1865 } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { 1947 obj = IR(ir->op1)->r;
1866 emit_setargr(as, 2, obj);
1867 emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
1868 } else {
1869 emit_setargr(as, 2, RID_RET);
1870 emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
1871 }
1872 emit_setargp(as, 1, J2G(as->J));
1873 if (valp)
1874 emit_setargp(as, 3, valp);
1875 else
1876 emit_setargr(as, 3, val);
1877 emit_sjcc(as, CC_Z, l_end); 1948 emit_sjcc(as, CC_Z, l_end);
1878 emit_i8(as, LJ_GC_WHITES); 1949 emit_i8(as, LJ_GC_WHITES);
1879 if (valp) 1950 if (irref_isk(ir->op2)) {
1880 emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); 1951 GCobj *vp = ir_kgc(IR(ir->op2));
1881 else 1952 emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
1953 } else {
1954 Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
1882 emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); 1955 emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
1956 }
1883 emit_sjcc(as, CC_Z, l_end); 1957 emit_sjcc(as, CC_Z, l_end);
1884 emit_i8(as, LJ_GC_BLACK); 1958 emit_i8(as, LJ_GC_BLACK);
1885 emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, 1959 emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
1886 ofs + (int32_t)offsetof(GChead, marked)); 1960 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1887} 1961}
1888 1962
1889/* -- FP/int arithmetic and logic operations ------------------------------ */ 1963/* -- FP/int arithmetic and logic operations ------------------------------ */
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2260 } 2334 }
2261 } 2335 }
2262 emit_mrm(as, XO_UCOMISD, left, right); 2336 emit_mrm(as, XO_UCOMISD, left, right);
2263 } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { 2337 } else {
2264 IRRef lref = ir->op1, rref = ir->op2; 2338 IRRef lref = ir->op1, rref = ir->op2;
2265 IROp leftop = (IROp)(IR(lref)->o); 2339 IROp leftop = (IROp)(IR(lref)->o);
2266 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); 2340 lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E));
2267 /* Swap constants (only for ABC) and fusable loads to the right. */ 2341 /* Swap constants (only for ABC) and fusable loads to the right. */
2268 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { 2342 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2269 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ 2343 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2294 } else { 2368 } else {
2295 Reg left; 2369 Reg left;
2296 if (opisfusableload((IROp)irl->o) && 2370 if (opisfusableload((IROp)irl->o) &&
2297 ((irt_isi8(irl->t) && checki8(imm)) || 2371 ((irt_isu8(irl->t) && checku8(imm)) ||
2298 (irt_isu8(irl->t) && checku8(imm)))) { 2372 ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
2299 /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 2373 (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
2300 ** loads are handled here. The IRT_I16/IRT_U16 loads should never be 2374 /* Only the IRT_INT case is fused by asm_fuseload.
2301 ** fused, since cmp word [mem], imm16 has a length-changing prefix. 2375 ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
2376 ** are handled here.
2377 ** Note that cmp word [mem], imm16 should not be generated,
2378 ** since it has a length-changing prefix. Compares of a word
2379 ** against a sign-extended imm8 are ok, however.
2302 */ 2380 */
2303 IRType1 origt = irl->t; /* Temporarily flip types. */ 2381 IRType1 origt = irl->t; /* Temporarily flip types. */
2304 irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; 2382 irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2307 if (left == RID_MRM) { /* Fusion succeeded? */ 2385 if (left == RID_MRM) { /* Fusion succeeded? */
2308 asm_guardcc(as, cc); 2386 asm_guardcc(as, cc);
2309 emit_i8(as, imm); 2387 emit_i8(as, imm);
2310 emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); 2388 emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
2389 XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM);
2311 return; 2390 return;
2312 } /* Otherwise handle register case as usual. */ 2391 } /* Otherwise handle register case as usual. */
2313 } else { 2392 } else {
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2337 asm_guardcc(as, cc); 2416 asm_guardcc(as, cc);
2338 emit_mrm(as, XO_CMP, left, right); 2417 emit_mrm(as, XO_CMP, left, right);
2339 } 2418 }
2340 } else { /* Handle ordered string compares. */
2341 RegSet allow = RSET_GPR;
2342 /* This assumes lj_str_cmp never uses any SSE registers. */
2343 ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
2344 asm_guardcc(as, cc);
2345 emit_rr(as, XO_TEST, RID_RET, RID_RET);
2346 emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
2347 if (irref_isk(ir->op1)) {
2348 emit_setargi(as, 1, IR(ir->op1)->i);
2349 } else {
2350 Reg left = ra_alloc1(as, ir->op1, allow);
2351 rset_clear(allow, left);
2352 emit_setargr(as, 1, left);
2353 }
2354 if (irref_isk(ir->op2)) {
2355 emit_setargi(as, 2, IR(ir->op2)->i);
2356 } else {
2357 Reg right = ra_alloc1(as, ir->op2, allow);
2358 emit_setargr(as, 2, right);
2359 }
2360 } 2419 }
2361} 2420}
2362 2421
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2366/* -- GC handling --------------------------------------------------------- */ 2425/* -- GC handling --------------------------------------------------------- */
2367 2426
2368/* Sync all live GC values to Lua stack slots. */ 2427/* Sync all live GC values to Lua stack slots. */
2369static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) 2428static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
2370{ 2429{
2430 /* Some care must be taken when allocating registers here, since this is
2431 ** not part of the fast path. All scratch registers are evicted in the
2432 ** fast path, so it's easiest to force allocation from scratch registers
2433 ** only. This avoids register allocation state unification.
2434 */
2435 RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
2371 IRRef2 *map = &as->T->snapmap[snap->mapofs]; 2436 IRRef2 *map = &as->T->snapmap[snap->mapofs];
2372 BCReg s, nslots = snap->nslots; 2437 BCReg s, nslots = snap->nslots;
2373 for (s = 0; s < nslots; s++) { 2438 for (s = 0; s < nslots; s++) {
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
2392/* Check GC threshold and do one or more GC steps. */ 2457/* Check GC threshold and do one or more GC steps. */
2393static void asm_gc_check(ASMState *as, SnapShot *snap) 2458static void asm_gc_check(ASMState *as, SnapShot *snap)
2394{ 2459{
2460 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
2461 IRRef args[2];
2395 MCLabel l_end; 2462 MCLabel l_end;
2396 const BCIns *pc; 2463 Reg base, lstate, tmp;
2397 Reg tmp, base;
2398 RegSet drop = RSET_SCRATCH; 2464 RegSet drop = RSET_SCRATCH;
2399 /* Must evict BASE because the stack may be reallocated by the GC. */ 2465 if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
2400 if (ra_hasreg(IR(REF_BASE)->r)) 2466 drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
2401 drop |= RID2RSET(IR(REF_BASE)->r);
2402 ra_evictset(as, drop); 2467 ra_evictset(as, drop);
2403 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
2404 l_end = emit_label(as); 2468 l_end = emit_label(as);
2405 /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ 2469 args[0] = ASMREF_L;
2406 emit_call(as, lj_gc_step_jit); 2470 args[1] = ASMREF_TMP1;
2407 emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); 2471 asm_gencall(as, ci, args);
2408 emit_setargr(as, 1, RID_RET); 2472 tmp = ra_releasetmp(as, ASMREF_TMP1);
2409 emit_setargi(as, 3, (int32_t)as->gcsteps); 2473 emit_loadi(as, tmp, (int32_t)as->gcsteps);
2410 emit_getgl(as, RID_RET, jit_L); 2474 /* We don't know spadj yet, so get the C frame from L->cframe. */
2411 pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; 2475 emit_movmroi(as, tmp, CFRAME_OFS_PC,
2412 emit_setargp(as, 2, pc); 2476 (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
2413 asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); 2477 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
2414 if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ 2478 lstate = IR(ASMREF_L)->r;
2415 ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ 2479 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
2480 /* It's ok if lstate is already in a non-scratch reg. But all allocations
2481 ** in the non-fast path must use a scratch reg. See comment above.
2482 */
2483 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
2484 emit_movtomro(as, base, lstate, offsetof(lua_State, base));
2485 asm_gc_sync(as, snap, base);
2486 /* BASE/L get restored anyway, better do it inside the slow path. */
2487 if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
2488 if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
2489 ra_restore(as, ASMREF_L);
2416 /* Jump around GC step if GC total < GC threshold. */ 2490 /* Jump around GC step if GC total < GC threshold. */
2417 tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); 2491 tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
2418 emit_sjcc(as, CC_B, l_end); 2492 emit_sjcc(as, CC_B, l_end);
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as)
2666{ 2740{
2667 int32_t spadj; 2741 int32_t spadj;
2668 emit_setgli(as, vmstate, (int32_t)as->J->curtrace); 2742 emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
2669 spadj = sps_adjust(as); 2743 spadj = sps_adjust(as->evenspill);
2670 as->T->spadjust = (uint16_t)spadj; 2744 as->T->spadjust = (uint16_t)spadj;
2671 emit_addptr(as, RID_ESP, -spadj); 2745 emit_addptr(as, RID_ESP, -spadj);
2672} 2746}
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as)
2676{ 2750{
2677 IRIns *ir = IR(REF_BASE); 2751 IRIns *ir = IR(REF_BASE);
2678 Reg r = ir->r; 2752 Reg r = ir->r;
2679 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 2753 lua_assert(!ra_hasspill(ir->s));
2680 ra_free(as, r); 2754 if (ra_hasreg(r)) {
2681 if (r != RID_BASE) { 2755 ra_free(as, r);
2682 ra_scratch(as, RID2RSET(RID_BASE)); 2756 if (r != RID_BASE) {
2683 emit_rr(as, XO_MOV, r, RID_BASE); 2757 ra_scratch(as, RID2RSET(RID_BASE));
2758 emit_rr(as, XO_MOV, r, RID_BASE);
2759 }
2684 } 2760 }
2685} 2761}
2686 2762
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as)
2749 } 2825 }
2750 2826
2751 /* Calculate stack frame adjustment. */ 2827 /* Calculate stack frame adjustment. */
2752 spadj = sps_adjust(as); 2828 spadj = sps_adjust(as->evenspill);
2753 spdelta = spadj - (int32_t)as->parent->spadjust; 2829 spdelta = spadj - (int32_t)as->parent->spadjust;
2754 if (spdelta < 0) { /* Don't shrink the stack frame. */ 2830 if (spdelta < 0) { /* Don't shrink the stack frame. */
2755 spadj = (int32_t)as->parent->spadjust; 2831 spadj = (int32_t)as->parent->spadjust;
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as)
2877 GCfunc *fn = ir_kfunc(IR(ir->op2)); 2953 GCfunc *fn = ir_kfunc(IR(ir->op2));
2878 if (isluafunc(fn)) { 2954 if (isluafunc(fn)) {
2879 BCReg fs = s + funcproto(fn)->framesize; 2955 BCReg fs = s + funcproto(fn)->framesize;
2880 newbase = s;
2881 if (secondbase == ~(BCReg)0) secondbase = s;
2882 if (fs > topslot) topslot = fs; 2956 if (fs > topslot) topslot = fs;
2957 if (s != 0) {
2958 newbase = s;
2959 if (secondbase == ~(BCReg)0) secondbase = s;
2960 }
2883 } 2961 }
2884 } 2962 }
2885 } 2963 }
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir)
3063 3141
3064 /* Loads and stores. */ 3142 /* Loads and stores. */
3065 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; 3143 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
3066 case IR_FLOAD: asm_fload(as, ir); break; 3144 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
3067 case IR_SLOAD: asm_sload(as, ir); break; 3145 case IR_SLOAD: asm_sload(as, ir); break;
3068 case IR_XLOAD: asm_xload(as, ir); break;
3069 3146
3070 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; 3147 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
3071 case IR_FSTORE: asm_fstore(as, ir); break; 3148 case IR_FSTORE: asm_fstore(as, ir); break;
3072 3149
3073 /* String ops. */ 3150 /* Allocations. */
3074 case IR_SNEW: asm_snew(as, ir); break; 3151 case IR_SNEW: asm_snew(as, ir); break;
3075
3076 /* Table ops. */
3077 case IR_TNEW: asm_tnew(as, ir); break; 3152 case IR_TNEW: asm_tnew(as, ir); break;
3078 case IR_TDUP: asm_tdup(as, ir); break; 3153 case IR_TDUP: asm_tdup(as, ir); break;
3079 case IR_TLEN: asm_tlen(as, ir); break; 3154
3155 /* Write barriers. */
3080 case IR_TBAR: asm_tbar(as, ir); break; 3156 case IR_TBAR: asm_tbar(as, ir); break;
3081 case IR_OBAR: asm_obar(as, ir); break; 3157 case IR_OBAR: asm_obar(as, ir); break;
3082 3158
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
3092 case IR_TOSTR: asm_tostr(as, ir); break; 3168 case IR_TOSTR: asm_tostr(as, ir); break;
3093 case IR_STRTO: asm_strto(as, ir); break; 3169 case IR_STRTO: asm_strto(as, ir); break;
3094 3170
3171 /* Calls. */
3172 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
3173 case IR_CARG: break;
3174
3095 default: 3175 default:
3096 setintV(&as->J->errinfo, ir->o); 3176 setintV(&as->J->errinfo, ir->o);
3097 lj_trace_err_info(as->J, LJ_TRERR_NYIIR); 3177 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3123 IRRef i, nins; 3203 IRRef i, nins;
3124 int inloop; 3204 int inloop;
3125 3205
3206 ra_setup(as);
3207
3126 /* Clear reg/sp for constants. */ 3208 /* Clear reg/sp for constants. */
3127 for (i = T->nk; i < REF_BIAS; i++) 3209 for (i = T->nk; i < REF_BIAS; i++)
3128 IR(i)->prev = REGSP_INIT; 3210 IR(i)->prev = REGSP_INIT;
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3144 as->curins = nins; 3226 as->curins = nins;
3145 3227
3146 inloop = 0; 3228 inloop = 0;
3229 as->evenspill = SPS_FIRST;
3147 for (i = REF_FIRST; i < nins; i++) { 3230 for (i = REF_FIRST; i < nins; i++) {
3148 IRIns *ir = IR(i); 3231 IRIns *ir = IR(i);
3149 switch (ir->o) { 3232 switch (ir->o) {
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3166 if (i == as->stopins+1 && ir->op1 == ir->op2) 3249 if (i == as->stopins+1 && ir->op1 == ir->op2)
3167 as->stopins++; 3250 as->stopins++;
3168 break; 3251 break;
3252 case IR_CALLN: case IR_CALLL: case IR_CALLS: {
3253 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
3254 /* NYI: not fastcall-aware, but doesn't matter (yet). */
3255 if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
3256 as->evenspill = (int32_t)CCI_NARGS(ci);
3257#if LJ_64
3258 ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
3259#else
3260 ir->prev = REGSP_HINT(RID_RET);
3261#endif
3262 if (inloop)
3263 as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
3264 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
3265 continue;
3266 }
3169 /* C calls evict all scratch regs and return results in RID_RET. */ 3267 /* C calls evict all scratch regs and return results in RID_RET. */
3170 case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: 3268 case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
3171 case IR_NEWREF: 3269 case IR_NEWREF:
3172 ir->prev = REGSP_HINT(RID_RET); 3270 ir->prev = REGSP_HINT(RID_RET);
3173 if (inloop) 3271 if (inloop)
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3177 if (inloop) 3275 if (inloop)
3178 as->modset = RSET_SCRATCH; 3276 as->modset = RSET_SCRATCH;
3179 break; 3277 break;
3180 /* Ordered string compares evict all integer scratch registers. */
3181 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
3182 if (irt_isstr(ir->t) && inloop)
3183 as->modset |= (RSET_SCRATCH & RSET_GPR);
3184 break;
3185 /* Non-constant shift counts need to be in RID_ECX. */ 3278 /* Non-constant shift counts need to be in RID_ECX. */
3186 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 3279 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
3187 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) 3280 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3200 } 3293 }
3201 ir->prev = REGSP_INIT; 3294 ir->prev = REGSP_INIT;
3202 } 3295 }
3296 if ((as->evenspill & 1))
3297 as->oddspill = as->evenspill++;
3298 else
3299 as->oddspill = 0;
3203} 3300}
3204 3301
3205/* -- Assembler core ------------------------------------------------------ */ 3302/* -- Assembler core ------------------------------------------------------ */
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T)
3263 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; 3360 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
3264 3361
3265 /* Setup register allocation. */ 3362 /* Setup register allocation. */
3266 ra_setup(as);
3267 asm_setup_regsp(as, T); 3363 asm_setup_regsp(as, T);
3268 3364
3269 if (!as->loopref) { 3365 if (!as->loopref) {