aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2010-02-24 07:09:34 +0100
committerMike Pall <mike>2010-02-24 07:09:34 +0100
commit4c9f71be5d4449b717870092839c47a1d5db0dca (patch)
treeace2d0a3cf4756cc5ac7607c21304f7f7baffd09 /src
parente46f4c8a11bcb2ba76d445e34030ab04ba12668d (diff)
downloadluajit-4c9f71be5d4449b717870092839c47a1d5db0dca.tar.gz
luajit-4c9f71be5d4449b717870092839c47a1d5db0dca.tar.bz2
luajit-4c9f71be5d4449b717870092839c47a1d5db0dca.zip
Major 32/64 bit cleanups in assembler and exit handling.
Add 64 bit lightuserdata handling. Keep the tagged 64 bit value. Allocate/save/restore 64 bit spill slots for 64 bit lightuserdata. Fix code generation for 64 bit loads/stores/moves/compares. Fix code generation for stack pointer adjustments. Add fixed spill slot definitions for x64. Reduce reserved spill slots. Disable STRREF + ADD fusion in 64 bit mode (avoid negative 32 bit ofs).
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c218
-rw-r--r--src/lj_ir.h18
-rw-r--r--src/lj_snap.c10
-rw-r--r--src/lj_target_x86.h26
4 files changed, 193 insertions, 79 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 116c6e1f..24467dbc 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -261,18 +261,16 @@ static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
261static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) 261static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
262{ 262{
263 MCode *p = as->mcp; 263 MCode *p = as->mcp;
264 x86Op xo;
264 if (checki8(i)) { 265 if (checki8(i)) {
265 p -= 3; 266 *--p = (MCode)i;
266 p[2] = (MCode)i; 267 xo = XG_TOXOi8(xg);
267 p[0] = (MCode)(xg >> 16);
268 } else { 268 } else {
269 p -= 6; 269 p -= 4;
270 *(int32_t *)(p+2) = i; 270 *(int32_t *)p = i;
271 p[0] = (MCode)(xg >> 8); 271 xo = XG_TOXOi(xg);
272 } 272 }
273 p[1] = MODRM(XM_REG, xg, rb); 273 as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0);
274 REXRB(p, 0, rb);
275 as->mcp = p;
276} 274}
277 275
278/* op [base+ofs], i */ 276/* op [base+ofs], i */
@@ -282,12 +280,12 @@ static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
282 x86Op xo; 280 x86Op xo;
283 if (checki8(i)) { 281 if (checki8(i)) {
284 emit_i8(as, i); 282 emit_i8(as, i);
285 xo = (x86Op)(((xg >> 16) << 24)+0xfe); 283 xo = XG_TOXOi8(xg);
286 } else { 284 } else {
287 emit_i32(as, i); 285 emit_i32(as, i);
288 xo = (x86Op)(((xg >> 8) << 24)+0xfe); 286 xo = XG_TOXOi(xg);
289 } 287 }
290 emit_rmro(as, xo, (Reg)xg, rb, ofs); 288 emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs);
291} 289}
292 290
293#define emit_shifti(as, xg, r, i) \ 291#define emit_shifti(as, xg, r, i) \
@@ -346,18 +344,6 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
346 344
347/* -- Emit moves ---------------------------------------------------------- */ 345/* -- Emit moves ---------------------------------------------------------- */
348 346
349/* Generic move between two regs. */
350static void emit_movrr(ASMState *as, Reg r1, Reg r2)
351{
352 emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2);
353}
354
355/* Generic move from [base+ofs]. */
356static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs)
357{
358 emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs);
359}
360
361/* mov [base+ofs], i */ 347/* mov [base+ofs], i */
362static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 348static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
363{ 349{
@@ -623,7 +609,7 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
623{ 609{
624 int32_t slot = ir->s; 610 int32_t slot = ir->s;
625 if (!ra_hasspill(slot)) { 611 if (!ra_hasspill(slot)) {
626 if (irt_isnum(ir->t)) { 612 if (irt_isnum(ir->t) || (LJ_64 && irt_islightud(ir->t))) {
627 slot = as->evenspill; 613 slot = as->evenspill;
628 as->evenspill += 2; 614 as->evenspill += 2;
629 } else if (as->oddspill) { 615 } else if (as->oddspill) {
@@ -653,6 +639,16 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
653 return r; 639 return r;
654} 640}
655 641
642/* Use 64 bit operations to handle 64 bit lightuserdata. */
643#define REX_64LU(ir, r) \
644 ((r) | ((LJ_64 && irt_islightud((ir)->t)) ? REX_64 : 0))
645
646/* Generic move between two regs. */
647static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2)
648{
649 emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), REX_64LU(ir, r1), r2);
650}
651
656/* Restore a register (marked as free). Rematerialize or force a spill. */ 652/* Restore a register (marked as free). Rematerialize or force a spill. */
657static Reg ra_restore(ASMState *as, IRRef ref) 653static Reg ra_restore(ASMState *as, IRRef ref)
658{ 654{
@@ -668,7 +664,8 @@ static Reg ra_restore(ASMState *as, IRRef ref)
668 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 664 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
669 ra_modified(as, r); 665 ra_modified(as, r);
670 RA_DBGX((as, "restore $i $r", ir, r)); 666 RA_DBGX((as, "restore $i $r", ir, r));
671 emit_movrmro(as, r, RID_ESP, ofs); 667 emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as),
668 REX_64LU(ir, r), RID_ESP, ofs);
672 } 669 }
673 return r; 670 return r;
674 } 671 }
@@ -679,7 +676,7 @@ static void ra_save(ASMState *as, IRIns *ir, Reg r)
679{ 676{
680 RA_DBGX((as, "save $i $r", ir, r)); 677 RA_DBGX((as, "save $i $r", ir, r));
681 emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto, 678 emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto,
682 r, RID_ESP, sps_scale(ir->s)); 679 REX_64LU(ir, r), RID_ESP, sps_scale(ir->s));
683} 680}
684 681
685#define MINCOST(r) \ 682#define MINCOST(r) \
@@ -822,7 +819,8 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
822static void ra_rename(ASMState *as, Reg down, Reg up) 819static void ra_rename(ASMState *as, Reg down, Reg up)
823{ 820{
824 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 821 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
825 IR(ref)->r = (uint8_t)up; 822 IRIns *ir = IR(ref);
823 ir->r = (uint8_t)up;
826 as->cost[down] = 0; 824 as->cost[down] = 0;
827 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 825 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
828 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 826 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
@@ -831,7 +829,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
831 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 829 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
832 ra_noweak(as, up); 830 ra_noweak(as, up);
833 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 831 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
834 emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */ 832 ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
835 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 833 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
836 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 834 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
837 ren = tref_ref(lj_ir_emit(as->J)); 835 ren = tref_ref(lj_ir_emit(as->J));
@@ -864,7 +862,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
864 Reg dest = ra_dest(as, ir, RID2RSET(r)); 862 Reg dest = ra_dest(as, ir, RID2RSET(r));
865 if (dest != r) { 863 if (dest != r) {
866 ra_scratch(as, RID2RSET(r)); 864 ra_scratch(as, RID2RSET(r));
867 emit_movrr(as, dest, r); 865 ra_movrr(as, ir, dest, r);
868 } 866 }
869} 867}
870 868
@@ -903,7 +901,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
903 ra_modified(as, left); 901 ra_modified(as, left);
904 ra_rename(as, left, dest); 902 ra_rename(as, left, dest);
905 } else { 903 } else {
906 emit_movrr(as, dest, left); 904 ra_movrr(as, ir, dest, left);
907 } 905 }
908 } 906 }
909} 907}
@@ -1201,7 +1199,8 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
1201 } else { 1199 } else {
1202 Reg r; 1200 Reg r;
1203 /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ 1201 /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
1204 if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { 1202 if (!LJ_64 && /* NYI: has bad effects with negative index on x64. */
1203 mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
1205 as->mrm.ofs += IR(irr->op2)->i; 1204 as->mrm.ofs += IR(irr->op2)->i;
1206 r = ra_alloc1(as, irr->op1, allow); 1205 r = ra_alloc1(as, irr->op1, allow);
1207 } else { 1206 } else {
@@ -1325,7 +1324,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1325 allow &= ~RID2RSET(r); 1324 allow &= ~RID2RSET(r);
1326 if (ra_hasreg(ir->r)) { 1325 if (ra_hasreg(ir->r)) {
1327 ra_noweak(as, ir->r); 1326 ra_noweak(as, ir->r);
1328 emit_movrr(as, r, ir->r); 1327 ra_movrr(as, ir, r, ir->r);
1329 } else { 1328 } else {
1330 ra_allocref(as, args[n], RID2RSET(r)); 1329 ra_allocref(as, args[n], RID2RSET(r));
1331 } 1330 }
@@ -1358,7 +1357,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
1358 ra_evictset(as, drop); /* Evictions must be performed first. */ 1357 ra_evictset(as, drop); /* Evictions must be performed first. */
1359 if (ra_used(ir)) { 1358 if (ra_used(ir)) {
1360 if (irt_isnum(ir->t)) { 1359 if (irt_isnum(ir->t)) {
1361 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ 1360 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1362#if LJ_64 1361#if LJ_64
1363 if ((ci->flags & CCI_CASTU64)) { 1362 if ((ci->flags & CCI_CASTU64)) {
1364 Reg dest = ir->r; 1363 Reg dest = ir->r;
@@ -1367,7 +1366,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
1367 ra_modified(as, dest); 1366 ra_modified(as, dest);
1368 emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ 1367 emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
1369 } else { 1368 } else {
1370 emit_movrmro(as, RID_RET, RID_ESP, ofs); 1369 emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs);
1371 } 1370 }
1372 } else { 1371 } else {
1373 ra_destreg(as, ir, RID_FPRET); 1372 ra_destreg(as, ir, RID_FPRET);
@@ -1493,8 +1492,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
1493 args[0] = ir->op1; 1492 args[0] = ir->op1;
1494 args[1] = ASMREF_TMP1; 1493 args[1] = ASMREF_TMP1;
1495 asm_gencall(as, ci, args); 1494 asm_gencall(as, ci, args);
1496 /* Store the result to the spill slot or slots SPS_TEMP1/2. */ 1495 /* Store the result to the spill slot or temp slots. */
1497 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), 1496 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
1498 RID_ESP, sps_scale(ir->s)); 1497 RID_ESP, sps_scale(ir->s));
1499} 1498}
1500 1499
@@ -1509,7 +1508,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
1509 args[1] = ASMREF_TMP1; 1508 args[1] = ASMREF_TMP1;
1510 asm_setupresult(as, ir, ci); 1509 asm_setupresult(as, ir, ci);
1511 asm_gencall(as, ci, args); 1510 asm_gencall(as, ci, args);
1512 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), 1511 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
1513 RID_ESP, ra_spill(as, irl)); 1512 RID_ESP, ra_spill(as, irl));
1514 } else { 1513 } else {
1515 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 1514 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
@@ -1627,6 +1626,10 @@ static void asm_href(ASMState *as, IRIns *ir)
1627 emit_i8(as, ~IRT_NUM); 1626 emit_i8(as, ~IRT_NUM);
1628 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1627 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1629 } 1628 }
1629#if LJ_64
1630 } else if (irt_islightud(kt)) {
1631 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1632#endif
1630 } else { 1633 } else {
1631 if (!irt_ispri(kt)) { 1634 if (!irt_ispri(kt)) {
1632 lua_assert(irt_isaddr(kt)); 1635 lua_assert(irt_isaddr(kt));
@@ -1747,16 +1750,17 @@ static void asm_newref(ASMState *as, IRIns *ir)
1747 if (irref_isk(ir->op2)) 1750 if (irref_isk(ir->op2))
1748 emit_loada(as, tmp, ir_knum(irkey)); 1751 emit_loada(as, tmp, ir_knum(irkey));
1749 else 1752 else
1750 emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); 1753 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1751 } else { 1754 } else {
1752 /* Otherwise use g->tmptv to hold the TValue. */ 1755 /* Otherwise use g->tmptv to hold the TValue. */
1753 if (!irref_isk(ir->op2)) { 1756 if (!irref_isk(ir->op2)) {
1754 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); 1757 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1755 emit_movtomro(as, src, tmp, 0); 1758 emit_movtomro(as, REX_64LU(irkey, src), tmp, 0);
1756 } else if (!irt_ispri(irkey->t)) { 1759 } else if (!irt_ispri(irkey->t)) {
1757 emit_movmroi(as, tmp, 0, irkey->i); 1760 emit_movmroi(as, tmp, 0, irkey->i);
1758 } 1761 }
1759 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); 1762 if (!(LJ_64 && irt_islightud(irkey->t)))
1763 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1760 emit_loada(as, tmp, &J2G(as->J)->tmptv); 1764 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1761 } 1765 }
1762} 1766}
@@ -1822,6 +1826,11 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1822 case IRT_U8: xo = XO_MOVZXb; break; 1826 case IRT_U8: xo = XO_MOVZXb; break;
1823 case IRT_I16: xo = XO_MOVSXw; break; 1827 case IRT_I16: xo = XO_MOVSXw; break;
1824 case IRT_U16: xo = XO_MOVZXw; break; 1828 case IRT_U16: xo = XO_MOVZXw; break;
1829#if LJ_64
1830 case IRT_LIGHTUD:
1831 dest |= REX_64;
1832 /* fallthrough */
1833#endif
1825 default: 1834 default:
1826 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); 1835 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
1827 xo = XO_MOV; 1836 xo = XO_MOV;
@@ -1848,6 +1857,9 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1848 switch (irt_type(ir->t)) { 1857 switch (irt_type(ir->t)) {
1849 case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break; 1858 case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
1850 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; 1859 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1860#if LJ_64
1861 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
1862#endif
1851 default: 1863 default:
1852 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); 1864 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
1853 xo = XO_MOVto; 1865 xo = XO_MOVto;
@@ -1866,11 +1878,41 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1866 } 1878 }
1867} 1879}
1868 1880
1881#if LJ_64
1882static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1883{
1884 if (ra_used(ir) || typecheck) {
1885 Reg dest = ra_dest(as, ir, RSET_GPR);
1886 if (typecheck) {
1887 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
1888 asm_guardcc(as, CC_NE);
1889 emit_i8(as, -2);
1890 emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1891 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1892 emit_rr(as, XO_MOV, tmp|REX_64, dest);
1893 }
1894 return dest;
1895 } else {
1896 return RID_NONE;
1897 }
1898}
1899#endif
1900
1869static void asm_ahuload(ASMState *as, IRIns *ir) 1901static void asm_ahuload(ASMState *as, IRIns *ir)
1870{ 1902{
1871 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1872 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); 1903 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t));
1904#if LJ_64
1905 if (irt_islightud(ir->t)) {
1906 Reg dest = asm_load_lightud64(as, ir, 1);
1907 if (ra_hasreg(dest)) {
1908 asm_fuseahuref(as, ir->op1, RSET_GPR);
1909 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1910 }
1911 return;
1912 } else
1913#endif
1873 if (ra_used(ir)) { 1914 if (ra_used(ir)) {
1915 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1874 Reg dest = ra_dest(as, ir, allow); 1916 Reg dest = ra_dest(as, ir, allow);
1875 asm_fuseahuref(as, ir->op1, RSET_GPR); 1917 asm_fuseahuref(as, ir->op1, RSET_GPR);
1876 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1918 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
@@ -1890,6 +1932,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1890 Reg src = ra_alloc1(as, ir->op2, RSET_FPR); 1932 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1891 asm_fuseahuref(as, ir->op1, RSET_GPR); 1933 asm_fuseahuref(as, ir->op1, RSET_GPR);
1892 emit_mrm(as, XO_MOVSDto, src, RID_MRM); 1934 emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1935#if LJ_64
1936 } else if (irt_islightud(ir->t)) {
1937 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1938 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1939 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1940#endif
1893 } else { 1941 } else {
1894 IRIns *irr = IR(ir->op2); 1942 IRIns *irr = IR(ir->op2);
1895 RegSet allow = RSET_GPR; 1943 RegSet allow = RSET_GPR;
@@ -1925,6 +1973,15 @@ static void asm_sload(ASMState *as, IRIns *ir)
1925 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1973 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1926 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1974 emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
1927 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1975 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1976#if LJ_64
1977 } else if (irt_islightud(t)) {
1978 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1979 if (ra_hasreg(dest)) {
1980 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1981 emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1982 }
1983 return;
1984#endif
1928 } else if (ra_used(ir)) { 1985 } else if (ra_used(ir)) {
1929 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1986 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1930 Reg dest = ra_dest(as, ir, allow); 1987 Reg dest = ra_dest(as, ir, allow);
@@ -1932,8 +1989,10 @@ static void asm_sload(ASMState *as, IRIns *ir)
1932 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1989 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1933 if (irt_isint(t)) 1990 if (irt_isint(t))
1934 emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); 1991 emit_rmro(as, XO_CVTSD2SI, dest, base, ofs);
1992 else if (irt_isnum(t))
1993 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1935 else 1994 else
1936 emit_movrmro(as, dest, base, ofs); 1995 emit_rmro(as, XO_MOV, dest, base, ofs);
1937 } else { 1996 } else {
1938 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1997 if (!(ir->op2 & IRSLOAD_TYPECHECK))
1939 return; /* No type check: avoid base alloc. */ 1998 return; /* No type check: avoid base alloc. */
@@ -2117,7 +2176,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
2117 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 2176 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
2118 /* Rejoined to pow(). */ 2177 /* Rejoined to pow(). */
2119 } else { /* Handle x87 ops. */ 2178 } else { /* Handle x87 ops. */
2120 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ 2179 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
2121 Reg dest = ir->r; 2180 Reg dest = ir->r;
2122 if (ra_hasreg(dest)) { 2181 if (ra_hasreg(dest)) {
2123 ra_free(as, dest); 2182 ra_free(as, dest);
@@ -2521,6 +2580,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2521 Reg left = ra_alloc1(as, lref, RSET_GPR); 2580 Reg left = ra_alloc1(as, lref, RSET_GPR);
2522 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); 2581 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
2523 asm_guardcc(as, cc); 2582 asm_guardcc(as, cc);
2583#if LJ_64
2584 if (irt_islightud(ir->t))
2585 left |= REX_64;
2586#endif
2524 emit_mrm(as, XO_CMP, left, right); 2587 emit_mrm(as, XO_CMP, left, right);
2525 } 2588 }
2526 } 2589 }
@@ -2563,7 +2626,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2563 Reg r = allow ? rset_pickbot(allow) : RID_EAX; 2626 Reg r = allow ? rset_pickbot(allow) : RID_EAX;
2564 emit_jcc(as, CC_B, exitstub_addr(as->J, exitno)); 2627 emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
2565 if (allow == RSET_EMPTY) /* Restore temp. register. */ 2628 if (allow == RSET_EMPTY) /* Restore temp. register. */
2566 emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); 2629 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2567 else 2630 else
2568 ra_modified(as, r); 2631 ra_modified(as, r);
2569 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); 2632 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
@@ -2575,7 +2638,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2575 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2638 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2576 emit_getgl(as, r, jit_L); 2639 emit_getgl(as, r, jit_L);
2577 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2640 if (allow == RSET_EMPTY) /* Spill temp. register. */
2578 emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); 2641 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2579} 2642}
2580 2643
2581/* Restore Lua stack from on-trace state. */ 2644/* Restore Lua stack from on-trace state. */
@@ -2600,14 +2663,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2600 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); 2663 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
2601 if (!irref_isk(ref)) { 2664 if (!irref_isk(ref)) {
2602 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); 2665 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2603 emit_movtomro(as, src, RID_BASE, ofs); 2666 emit_movtomro(as, REX_64LU(ir, src), RID_BASE, ofs);
2604 } else if (!irt_ispri(ir->t)) { 2667 } else if (!irt_ispri(ir->t)) {
2605 emit_movmroi(as, RID_BASE, ofs, ir->i); 2668 emit_movmroi(as, RID_BASE, ofs, ir->i);
2606 } 2669 }
2607 if (!(sn & (SNAP_CONT|SNAP_FRAME))) 2670 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2608 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); 2671 if (s != 0) /* Do not overwrite link to previous frame. */
2609 else if (s != 0) /* Do not overwrite link to previous frame. */ 2672 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2610 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); 2673 } else {
2674 if (!(LJ_64 && irt_islightud(ir->t)))
2675 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2676 }
2611 } 2677 }
2612 checkmclim(as); 2678 checkmclim(as);
2613 } 2679 }
@@ -2668,7 +2734,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
2668 (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); 2734 (int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
2669 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); 2735 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
2670 lstate = IR(ASMREF_L)->r; 2736 lstate = IR(ASMREF_L)->r;
2671 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); 2737 emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe));
2672 /* It's ok if lstate is already in a non-scratch reg. But all allocations 2738 /* It's ok if lstate is already in a non-scratch reg. But all allocations
2673 ** in the non-fast path must use a scratch reg. See comment above. 2739 ** in the non-fast path must use a scratch reg. See comment above.
2674 */ 2740 */
@@ -2830,7 +2896,7 @@ static void asm_phi(ASMState *as, IRIns *ir)
2830 r = ra_allocref(as, ir->op2, allow); 2896 r = ra_allocref(as, ir->op2, allow);
2831 } else { /* Duplicate right PHI, need a copy (rare). */ 2897 } else { /* Duplicate right PHI, need a copy (rare). */
2832 r = ra_scratch(as, allow); 2898 r = ra_scratch(as, allow);
2833 emit_movrr(as, r, irr->r); 2899 ra_movrr(as, irr, r, irr->r);
2834 } 2900 }
2835 ir->r = (uint8_t)r; 2901 ir->r = (uint8_t)r;
2836 rset_set(as->phiset, r); 2902 rset_set(as->phiset, r);
@@ -2912,6 +2978,14 @@ static void asm_loop(ASMState *as)
2912 2978
2913/* -- Head of trace ------------------------------------------------------- */ 2979/* -- Head of trace ------------------------------------------------------- */
2914 2980
2981/* Calculate stack adjustment. */
2982static int32_t asm_stack_adjust(ASMState *as)
2983{
2984 if (as->evenspill <= SPS_FIXED)
2985 return 0;
2986 return sps_scale((as->evenspill - SPS_FIXED + 3) & ~3);
2987}
2988
2915/* Coalesce BASE register for a root trace. */ 2989/* Coalesce BASE register for a root trace. */
2916static void asm_head_root_base(ASMState *as) 2990static void asm_head_root_base(ASMState *as)
2917{ 2991{
@@ -2932,9 +3006,9 @@ static void asm_head_root(ASMState *as)
2932 int32_t spadj; 3006 int32_t spadj;
2933 asm_head_root_base(as); 3007 asm_head_root_base(as);
2934 emit_setgli(as, vmstate, (int32_t)as->J->curtrace); 3008 emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
2935 spadj = sps_adjust(as->evenspill); 3009 spadj = asm_stack_adjust(as);
2936 as->T->spadjust = (uint16_t)spadj; 3010 as->T->spadjust = (uint16_t)spadj;
2937 emit_addptr(as, RID_ESP, -spadj); 3011 emit_addptr(as, RID_ESP|REX_64, -spadj);
2938 /* Root traces assume a checked stack for the starting proto. */ 3012 /* Root traces assume a checked stack for the starting proto. */
2939 as->T->topslot = gcref(as->T->startpt)->pt.framesize; 3013 as->T->topslot = gcref(as->T->startpt)->pt.framesize;
2940} 3014}
@@ -3007,7 +3081,7 @@ static void asm_head_side(ASMState *as)
3007 } 3081 }
3008 3082
3009 /* Calculate stack frame adjustment. */ 3083 /* Calculate stack frame adjustment. */
3010 spadj = sps_adjust(as->evenspill); 3084 spadj = asm_stack_adjust(as);
3011 spdelta = spadj - (int32_t)as->parent->spadjust; 3085 spdelta = spadj - (int32_t)as->parent->spadjust;
3012 if (spdelta < 0) { /* Don't shrink the stack frame. */ 3086 if (spdelta < 0) { /* Don't shrink the stack frame. */
3013 spadj = (int32_t)as->parent->spadjust; 3087 spadj = (int32_t)as->parent->spadjust;
@@ -3048,7 +3122,7 @@ static void asm_head_side(ASMState *as)
3048 3122
3049 /* Store trace number and adjust stack frame relative to the parent. */ 3123 /* Store trace number and adjust stack frame relative to the parent. */
3050 emit_setgli(as, vmstate, (int32_t)as->J->curtrace); 3124 emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
3051 emit_addptr(as, RID_ESP, -spdelta); 3125 emit_addptr(as, RID_ESP|REX_64, -spdelta);
3052 3126
3053 /* Restore target registers from parent spill slots. */ 3127 /* Restore target registers from parent spill slots. */
3054 if (pass3) { 3128 if (pass3) {
@@ -3061,7 +3135,8 @@ static void asm_head_side(ASMState *as)
3061 if (ra_hasspill(regsp_spill(rs))) { 3135 if (ra_hasspill(regsp_spill(rs))) {
3062 int32_t ofs = sps_scale(regsp_spill(rs)); 3136 int32_t ofs = sps_scale(regsp_spill(rs));
3063 ra_free(as, r); 3137 ra_free(as, r);
3064 emit_movrmro(as, r, RID_ESP, ofs); 3138 emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as),
3139 REX_64LU(ir, r), RID_ESP, ofs);
3065 checkmclim(as); 3140 checkmclim(as);
3066 } 3141 }
3067 } 3142 }
@@ -3078,7 +3153,7 @@ static void asm_head_side(ASMState *as)
3078 rset_clear(live, rp); 3153 rset_clear(live, rp);
3079 rset_clear(allow, rp); 3154 rset_clear(allow, rp);
3080 ra_free(as, ir->r); 3155 ra_free(as, ir->r);
3081 emit_movrr(as, ir->r, rp); 3156 ra_movrr(as, ir, ir->r, rp);
3082 checkmclim(as); 3157 checkmclim(as);
3083 } 3158 }
3084 3159
@@ -3150,7 +3225,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
3150 MCode *target, *q; 3225 MCode *target, *q;
3151 int32_t spadj = as->T->spadjust; 3226 int32_t spadj = as->T->spadjust;
3152 if (spadj == 0) { 3227 if (spadj == 0) {
3153 p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; 3228 p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
3154 } else { 3229 } else {
3155 MCode *p1; 3230 MCode *p1;
3156 /* Patch stack adjustment. */ 3231 /* Patch stack adjustment. */
@@ -3163,10 +3238,16 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
3163 *(int32_t *)p1 = spadj; 3238 *(int32_t *)p1 = spadj;
3164 } 3239 }
3165 if ((as->flags & JIT_F_LEA_AGU)) { 3240 if ((as->flags & JIT_F_LEA_AGU)) {
3241#if LJ_64
3242 p1[-4] = 0x48;
3243#endif
3166 p1[-3] = (MCode)XI_LEA; 3244 p1[-3] = (MCode)XI_LEA;
3167 p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); 3245 p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
3168 p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 3246 p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
3169 } else { 3247 } else {
3248#if LJ_64
3249 p1[-3] = 0x48;
3250#endif
3170 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); 3251 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
3171 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); 3252 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
3172 } 3253 }
@@ -3365,12 +3446,13 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3365 break; 3446 break;
3366 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 3447 case IR_CALLN: case IR_CALLL: case IR_CALLS: {
3367 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 3448 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
3368 /* NYI: not fastcall-aware, but doesn't matter (yet). */
3369 if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
3370 as->evenspill = (int32_t)CCI_NARGS(ci);
3371#if LJ_64 3449#if LJ_64
3450 /* NYI: add stack slots for calls with more than 4/6 args. */
3372 ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); 3451 ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
3373#else 3452#else
3453 /* NYI: not fastcall-aware, but doesn't matter (yet). */
3454 if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
3455 as->evenspill = (int32_t)CCI_NARGS(ci);
3374 ir->prev = REGSP_HINT(RID_RET); 3456 ir->prev = REGSP_HINT(RID_RET);
3375#endif 3457#endif
3376 if (inloop) 3458 if (inloop)
@@ -3379,8 +3461,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3379 continue; 3461 continue;
3380 } 3462 }
3381 /* C calls evict all scratch regs and return results in RID_RET. */ 3463 /* C calls evict all scratch regs and return results in RID_RET. */
3382 case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: 3464 case IR_SNEW: case IR_NEWREF:
3383 case IR_NEWREF: 3465#if !LJ_64
3466 if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */
3467 as->evenspill = 3;
3468#endif
3469 case IR_TNEW: case IR_TDUP: case IR_TOSTR:
3384 ir->prev = REGSP_HINT(RID_RET); 3470 ir->prev = REGSP_HINT(RID_RET);
3385 if (inloop) 3471 if (inloop)
3386 as->modset = RSET_SCRATCH; 3472 as->modset = RSET_SCRATCH;
@@ -3500,7 +3586,7 @@ void lj_asm_trace(jit_State *J, Trace *T)
3500 3586
3501 if (!as->loopref) { 3587 if (!as->loopref) {
3502 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ 3588 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
3503 as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; 3589 as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6 + (LJ_64 ? 1 : 0);
3504 as->invmcp = NULL; 3590 as->invmcp = NULL;
3505 asm_tail_link(as); 3591 asm_tail_link(as);
3506 } 3592 }
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 75519ed4..ca871238 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -362,6 +362,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
362 362
363#define irt_isnil(t) (irt_type(t) == IRT_NIL) 363#define irt_isnil(t) (irt_type(t) == IRT_NIL)
364#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE) 364#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE)
365#define irt_islightud(t) (irt_type(t) == IRT_LIGHTUD)
365#define irt_isstr(t) (irt_type(t) == IRT_STR) 366#define irt_isstr(t) (irt_type(t) == IRT_STR)
366#define irt_isfunc(t) (irt_type(t) == IRT_FUNC) 367#define irt_isfunc(t) (irt_type(t) == IRT_FUNC)
367#define irt_istab(t) (irt_type(t) == IRT_TAB) 368#define irt_istab(t) (irt_type(t) == IRT_TAB)
@@ -376,9 +377,20 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
376#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) 377#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA))
377#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) 378#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
378 379
379#define itype2irt(tv) \ 380static LJ_AINLINE IRType itype2irt(const TValue *tv)
380 (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM) 381{
381#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t)) 382 if (tvisnum(tv))
383 return IRT_NUM;
384#if LJ_64
385 else if (tvislightud(tv))
386 return IRT_LIGHTUD;
387#endif
388 else
389 return cast(IRType, ~uitype(tv));
390}
391
392#define irt_toitype(t) \
393 check_exp(!(LJ_64 && irt_islightud((t))), (int32_t)~(uint32_t)irt_type((t)))
382 394
383#define irt_isguard(t) ((t).irt & IRT_GUARD) 395#define irt_isguard(t) ((t).irt & IRT_GUARD)
384#define irt_ismarked(t) ((t).irt & IRT_MARK) 396#define irt_ismarked(t) ((t).irt & IRT_MARK)
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 2b82e672..86890a26 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -279,6 +279,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
279 setintV(o, *sps); 279 setintV(o, *sps);
280 } else if (irt_isnum(t)) { 280 } else if (irt_isnum(t)) {
281 o->u64 = *(uint64_t *)sps; 281 o->u64 = *(uint64_t *)sps;
282#if LJ_64
283 } else if (irt_islightud(t)) {
284 /* 64 bit lightuserdata which may escape already has the tag bits. */
285 o->u64 = *(uint64_t *)sps;
286#endif
282 } else { 287 } else {
283 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 288 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
284 setgcrefi(o->gcr, *sps); 289 setgcrefi(o->gcr, *sps);
@@ -291,6 +296,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
291 setintV(o, ex->gpr[r-RID_MIN_GPR]); 296 setintV(o, ex->gpr[r-RID_MIN_GPR]);
292 } else if (irt_isnum(t)) { 297 } else if (irt_isnum(t)) {
293 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 298 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
299#if LJ_64
300 } else if (irt_islightud(t)) {
301 /* 64 bit lightuserdata which may escape already has the tag bits. */
302 o->u64 = ex->gpr[r-RID_MIN_GPR];
303#endif
294 } else { 304 } else {
295 if (!irt_ispri(t)) 305 if (!irt_ispri(t))
296 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); 306 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index cb1892d5..8e9a8788 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -96,20 +96,24 @@ enum {
96 96
97/* -- Spill slots --------------------------------------------------------- */ 97/* -- Spill slots --------------------------------------------------------- */
98 98
99/* Stack layout for the compiled machine code (after stack adjustment). */ 99/* Available fixed spill slots in interpreter frame.
100enum { 100** This definition must match with the *.dasc file(s).
101 SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */ 101*/
102 SPS_TEMP2, 102#if LJ_64
103 SPS_TEMP3, 103#ifdef _WIN64
104 SPS_FIRST, /* First spill slot for general use. */ 104#define SPS_FIXED (5*2)
105#else
106#define SPS_FIXED 2
107#endif
108#else
109#define SPS_FIXED 6
110#endif
105 111
106 /* This definition must match with the *.dasc file(s). */ 112/* First spill slot for general use. Reserve one 64 bit slot. */
107 SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */ 113#define SPS_FIRST 2
108};
109 114
110/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ 115/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
111#define sps_scale(slot) (4 * (int32_t)(slot)) 116#define sps_scale(slot) (4 * (int32_t)(slot))
112#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
113 117
114/* -- Exit state ---------------------------------------------------------- */ 118/* -- Exit state ---------------------------------------------------------- */
115 119
@@ -241,6 +245,8 @@ typedef uint32_t x86Group;
241 245
242#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) 246#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
243#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) 247#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
248#define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
249#define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
244 250
245#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) 251#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
246 252