diff options
author | Mike Pall <mike> | 2010-02-24 07:09:34 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-02-24 07:09:34 +0100 |
commit | 4c9f71be5d4449b717870092839c47a1d5db0dca (patch) | |
tree | ace2d0a3cf4756cc5ac7607c21304f7f7baffd09 /src | |
parent | e46f4c8a11bcb2ba76d445e34030ab04ba12668d (diff) | |
download | luajit-4c9f71be5d4449b717870092839c47a1d5db0dca.tar.gz luajit-4c9f71be5d4449b717870092839c47a1d5db0dca.tar.bz2 luajit-4c9f71be5d4449b717870092839c47a1d5db0dca.zip |
Major 32/64 bit cleanups in assembler and exit handling.
Add 64 bit lightuserdata handling. Keep the tagged 64 bit value.
Allocate/save/restore 64 bit spill slots for 64 bit lightuserdata.
Fix code generation for 64 bit loads/stores/moves/compares.
Fix code generation for stack pointer adjustments.
Add fixed spill slot definitions for x64. Reduce reserved spill slots.
Disable STRREF + ADD fusion in 64 bit mode (avoid negative 32 bit ofs).
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 218 | ||||
-rw-r--r-- | src/lj_ir.h | 18 | ||||
-rw-r--r-- | src/lj_snap.c | 10 | ||||
-rw-r--r-- | src/lj_target_x86.h | 26 |
4 files changed, 193 insertions, 79 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 116c6e1f..24467dbc 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -261,18 +261,16 @@ static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx, | |||
261 | static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) | 261 | static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) |
262 | { | 262 | { |
263 | MCode *p = as->mcp; | 263 | MCode *p = as->mcp; |
264 | x86Op xo; | ||
264 | if (checki8(i)) { | 265 | if (checki8(i)) { |
265 | p -= 3; | 266 | *--p = (MCode)i; |
266 | p[2] = (MCode)i; | 267 | xo = XG_TOXOi8(xg); |
267 | p[0] = (MCode)(xg >> 16); | ||
268 | } else { | 268 | } else { |
269 | p -= 6; | 269 | p -= 4; |
270 | *(int32_t *)(p+2) = i; | 270 | *(int32_t *)p = i; |
271 | p[0] = (MCode)(xg >> 8); | 271 | xo = XG_TOXOi(xg); |
272 | } | 272 | } |
273 | p[1] = MODRM(XM_REG, xg, rb); | 273 | as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0); |
274 | REXRB(p, 0, rb); | ||
275 | as->mcp = p; | ||
276 | } | 274 | } |
277 | 275 | ||
278 | /* op [base+ofs], i */ | 276 | /* op [base+ofs], i */ |
@@ -282,12 +280,12 @@ static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs, | |||
282 | x86Op xo; | 280 | x86Op xo; |
283 | if (checki8(i)) { | 281 | if (checki8(i)) { |
284 | emit_i8(as, i); | 282 | emit_i8(as, i); |
285 | xo = (x86Op)(((xg >> 16) << 24)+0xfe); | 283 | xo = XG_TOXOi8(xg); |
286 | } else { | 284 | } else { |
287 | emit_i32(as, i); | 285 | emit_i32(as, i); |
288 | xo = (x86Op)(((xg >> 8) << 24)+0xfe); | 286 | xo = XG_TOXOi(xg); |
289 | } | 287 | } |
290 | emit_rmro(as, xo, (Reg)xg, rb, ofs); | 288 | emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs); |
291 | } | 289 | } |
292 | 290 | ||
293 | #define emit_shifti(as, xg, r, i) \ | 291 | #define emit_shifti(as, xg, r, i) \ |
@@ -346,18 +344,6 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
346 | 344 | ||
347 | /* -- Emit moves ---------------------------------------------------------- */ | 345 | /* -- Emit moves ---------------------------------------------------------- */ |
348 | 346 | ||
349 | /* Generic move between two regs. */ | ||
350 | static void emit_movrr(ASMState *as, Reg r1, Reg r2) | ||
351 | { | ||
352 | emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2); | ||
353 | } | ||
354 | |||
355 | /* Generic move from [base+ofs]. */ | ||
356 | static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs) | ||
357 | { | ||
358 | emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs); | ||
359 | } | ||
360 | |||
361 | /* mov [base+ofs], i */ | 347 | /* mov [base+ofs], i */ |
362 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 348 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
363 | { | 349 | { |
@@ -623,7 +609,7 @@ static int32_t ra_spill(ASMState *as, IRIns *ir) | |||
623 | { | 609 | { |
624 | int32_t slot = ir->s; | 610 | int32_t slot = ir->s; |
625 | if (!ra_hasspill(slot)) { | 611 | if (!ra_hasspill(slot)) { |
626 | if (irt_isnum(ir->t)) { | 612 | if (irt_isnum(ir->t) || (LJ_64 && irt_islightud(ir->t))) { |
627 | slot = as->evenspill; | 613 | slot = as->evenspill; |
628 | as->evenspill += 2; | 614 | as->evenspill += 2; |
629 | } else if (as->oddspill) { | 615 | } else if (as->oddspill) { |
@@ -653,6 +639,16 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
653 | return r; | 639 | return r; |
654 | } | 640 | } |
655 | 641 | ||
642 | /* Use 64 bit operations to handle 64 bit lightuserdata. */ | ||
643 | #define REX_64LU(ir, r) \ | ||
644 | ((r) | ((LJ_64 && irt_islightud((ir)->t)) ? REX_64 : 0)) | ||
645 | |||
646 | /* Generic move between two regs. */ | ||
647 | static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) | ||
648 | { | ||
649 | emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), REX_64LU(ir, r1), r2); | ||
650 | } | ||
651 | |||
656 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 652 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
657 | static Reg ra_restore(ASMState *as, IRRef ref) | 653 | static Reg ra_restore(ASMState *as, IRRef ref) |
658 | { | 654 | { |
@@ -668,7 +664,8 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
668 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ | 664 | if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ |
669 | ra_modified(as, r); | 665 | ra_modified(as, r); |
670 | RA_DBGX((as, "restore $i $r", ir, r)); | 666 | RA_DBGX((as, "restore $i $r", ir, r)); |
671 | emit_movrmro(as, r, RID_ESP, ofs); | 667 | emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), |
668 | REX_64LU(ir, r), RID_ESP, ofs); | ||
672 | } | 669 | } |
673 | return r; | 670 | return r; |
674 | } | 671 | } |
@@ -679,7 +676,7 @@ static void ra_save(ASMState *as, IRIns *ir, Reg r) | |||
679 | { | 676 | { |
680 | RA_DBGX((as, "save $i $r", ir, r)); | 677 | RA_DBGX((as, "save $i $r", ir, r)); |
681 | emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto, | 678 | emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto, |
682 | r, RID_ESP, sps_scale(ir->s)); | 679 | REX_64LU(ir, r), RID_ESP, sps_scale(ir->s)); |
683 | } | 680 | } |
684 | 681 | ||
685 | #define MINCOST(r) \ | 682 | #define MINCOST(r) \ |
@@ -822,7 +819,8 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
822 | static void ra_rename(ASMState *as, Reg down, Reg up) | 819 | static void ra_rename(ASMState *as, Reg down, Reg up) |
823 | { | 820 | { |
824 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 821 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); |
825 | IR(ref)->r = (uint8_t)up; | 822 | IRIns *ir = IR(ref); |
823 | ir->r = (uint8_t)up; | ||
826 | as->cost[down] = 0; | 824 | as->cost[down] = 0; |
827 | lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); | 825 | lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); |
828 | lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); | 826 | lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); |
@@ -831,7 +829,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
831 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ | 829 | rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ |
832 | ra_noweak(as, up); | 830 | ra_noweak(as, up); |
833 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 831 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
834 | emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */ | 832 | ra_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
835 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 833 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
836 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 834 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); |
837 | ren = tref_ref(lj_ir_emit(as->J)); | 835 | ren = tref_ref(lj_ir_emit(as->J)); |
@@ -864,7 +862,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) | |||
864 | Reg dest = ra_dest(as, ir, RID2RSET(r)); | 862 | Reg dest = ra_dest(as, ir, RID2RSET(r)); |
865 | if (dest != r) { | 863 | if (dest != r) { |
866 | ra_scratch(as, RID2RSET(r)); | 864 | ra_scratch(as, RID2RSET(r)); |
867 | emit_movrr(as, dest, r); | 865 | ra_movrr(as, ir, dest, r); |
868 | } | 866 | } |
869 | } | 867 | } |
870 | 868 | ||
@@ -903,7 +901,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
903 | ra_modified(as, left); | 901 | ra_modified(as, left); |
904 | ra_rename(as, left, dest); | 902 | ra_rename(as, left, dest); |
905 | } else { | 903 | } else { |
906 | emit_movrr(as, dest, left); | 904 | ra_movrr(as, ir, dest, left); |
907 | } | 905 | } |
908 | } | 906 | } |
909 | } | 907 | } |
@@ -1201,7 +1199,8 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
1201 | } else { | 1199 | } else { |
1202 | Reg r; | 1200 | Reg r; |
1203 | /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ | 1201 | /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ |
1204 | if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { | 1202 | if (!LJ_64 && /* NYI: has bad effects with negative index on x64. */ |
1203 | mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { | ||
1205 | as->mrm.ofs += IR(irr->op2)->i; | 1204 | as->mrm.ofs += IR(irr->op2)->i; |
1206 | r = ra_alloc1(as, irr->op1, allow); | 1205 | r = ra_alloc1(as, irr->op1, allow); |
1207 | } else { | 1206 | } else { |
@@ -1325,7 +1324,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1325 | allow &= ~RID2RSET(r); | 1324 | allow &= ~RID2RSET(r); |
1326 | if (ra_hasreg(ir->r)) { | 1325 | if (ra_hasreg(ir->r)) { |
1327 | ra_noweak(as, ir->r); | 1326 | ra_noweak(as, ir->r); |
1328 | emit_movrr(as, r, ir->r); | 1327 | ra_movrr(as, ir, r, ir->r); |
1329 | } else { | 1328 | } else { |
1330 | ra_allocref(as, args[n], RID2RSET(r)); | 1329 | ra_allocref(as, args[n], RID2RSET(r)); |
1331 | } | 1330 | } |
@@ -1358,7 +1357,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1358 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 1357 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
1359 | if (ra_used(ir)) { | 1358 | if (ra_used(ir)) { |
1360 | if (irt_isnum(ir->t)) { | 1359 | if (irt_isnum(ir->t)) { |
1361 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | 1360 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
1362 | #if LJ_64 | 1361 | #if LJ_64 |
1363 | if ((ci->flags & CCI_CASTU64)) { | 1362 | if ((ci->flags & CCI_CASTU64)) { |
1364 | Reg dest = ir->r; | 1363 | Reg dest = ir->r; |
@@ -1367,7 +1366,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1367 | ra_modified(as, dest); | 1366 | ra_modified(as, dest); |
1368 | emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ | 1367 | emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ |
1369 | } else { | 1368 | } else { |
1370 | emit_movrmro(as, RID_RET, RID_ESP, ofs); | 1369 | emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs); |
1371 | } | 1370 | } |
1372 | } else { | 1371 | } else { |
1373 | ra_destreg(as, ir, RID_FPRET); | 1372 | ra_destreg(as, ir, RID_FPRET); |
@@ -1493,8 +1492,8 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
1493 | args[0] = ir->op1; | 1492 | args[0] = ir->op1; |
1494 | args[1] = ASMREF_TMP1; | 1493 | args[1] = ASMREF_TMP1; |
1495 | asm_gencall(as, ci, args); | 1494 | asm_gencall(as, ci, args); |
1496 | /* Store the result to the spill slot or slots SPS_TEMP1/2. */ | 1495 | /* Store the result to the spill slot or temp slots. */ |
1497 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), | 1496 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, |
1498 | RID_ESP, sps_scale(ir->s)); | 1497 | RID_ESP, sps_scale(ir->s)); |
1499 | } | 1498 | } |
1500 | 1499 | ||
@@ -1509,7 +1508,7 @@ static void asm_tostr(ASMState *as, IRIns *ir) | |||
1509 | args[1] = ASMREF_TMP1; | 1508 | args[1] = ASMREF_TMP1; |
1510 | asm_setupresult(as, ir, ci); | 1509 | asm_setupresult(as, ir, ci); |
1511 | asm_gencall(as, ci, args); | 1510 | asm_gencall(as, ci, args); |
1512 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), | 1511 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, |
1513 | RID_ESP, ra_spill(as, irl)); | 1512 | RID_ESP, ra_spill(as, irl)); |
1514 | } else { | 1513 | } else { |
1515 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | 1514 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; |
@@ -1627,6 +1626,10 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
1627 | emit_i8(as, ~IRT_NUM); | 1626 | emit_i8(as, ~IRT_NUM); |
1628 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1627 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
1629 | } | 1628 | } |
1629 | #if LJ_64 | ||
1630 | } else if (irt_islightud(kt)) { | ||
1631 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); | ||
1632 | #endif | ||
1630 | } else { | 1633 | } else { |
1631 | if (!irt_ispri(kt)) { | 1634 | if (!irt_ispri(kt)) { |
1632 | lua_assert(irt_isaddr(kt)); | 1635 | lua_assert(irt_isaddr(kt)); |
@@ -1747,16 +1750,17 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
1747 | if (irref_isk(ir->op2)) | 1750 | if (irref_isk(ir->op2)) |
1748 | emit_loada(as, tmp, ir_knum(irkey)); | 1751 | emit_loada(as, tmp, ir_knum(irkey)); |
1749 | else | 1752 | else |
1750 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); | 1753 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); |
1751 | } else { | 1754 | } else { |
1752 | /* Otherwise use g->tmptv to hold the TValue. */ | 1755 | /* Otherwise use g->tmptv to hold the TValue. */ |
1753 | if (!irref_isk(ir->op2)) { | 1756 | if (!irref_isk(ir->op2)) { |
1754 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); | 1757 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); |
1755 | emit_movtomro(as, src, tmp, 0); | 1758 | emit_movtomro(as, REX_64LU(irkey, src), tmp, 0); |
1756 | } else if (!irt_ispri(irkey->t)) { | 1759 | } else if (!irt_ispri(irkey->t)) { |
1757 | emit_movmroi(as, tmp, 0, irkey->i); | 1760 | emit_movmroi(as, tmp, 0, irkey->i); |
1758 | } | 1761 | } |
1759 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | 1762 | if (!(LJ_64 && irt_islightud(irkey->t))) |
1763 | emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); | ||
1760 | emit_loada(as, tmp, &J2G(as->J)->tmptv); | 1764 | emit_loada(as, tmp, &J2G(as->J)->tmptv); |
1761 | } | 1765 | } |
1762 | } | 1766 | } |
@@ -1822,6 +1826,11 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1822 | case IRT_U8: xo = XO_MOVZXb; break; | 1826 | case IRT_U8: xo = XO_MOVZXb; break; |
1823 | case IRT_I16: xo = XO_MOVSXw; break; | 1827 | case IRT_I16: xo = XO_MOVSXw; break; |
1824 | case IRT_U16: xo = XO_MOVZXw; break; | 1828 | case IRT_U16: xo = XO_MOVZXw; break; |
1829 | #if LJ_64 | ||
1830 | case IRT_LIGHTUD: | ||
1831 | dest |= REX_64; | ||
1832 | /* fallthrough */ | ||
1833 | #endif | ||
1825 | default: | 1834 | default: |
1826 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 1835 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); |
1827 | xo = XO_MOV; | 1836 | xo = XO_MOV; |
@@ -1848,6 +1857,9 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
1848 | switch (irt_type(ir->t)) { | 1857 | switch (irt_type(ir->t)) { |
1849 | case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break; | 1858 | case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break; |
1850 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; | 1859 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; |
1860 | #if LJ_64 | ||
1861 | case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ | ||
1862 | #endif | ||
1851 | default: | 1863 | default: |
1852 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 1864 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); |
1853 | xo = XO_MOVto; | 1865 | xo = XO_MOVto; |
@@ -1866,11 +1878,41 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
1866 | } | 1878 | } |
1867 | } | 1879 | } |
1868 | 1880 | ||
1881 | #if LJ_64 | ||
1882 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | ||
1883 | { | ||
1884 | if (ra_used(ir) || typecheck) { | ||
1885 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1886 | if (typecheck) { | ||
1887 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); | ||
1888 | asm_guardcc(as, CC_NE); | ||
1889 | emit_i8(as, -2); | ||
1890 | emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); | ||
1891 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
1892 | emit_rr(as, XO_MOV, tmp|REX_64, dest); | ||
1893 | } | ||
1894 | return dest; | ||
1895 | } else { | ||
1896 | return RID_NONE; | ||
1897 | } | ||
1898 | } | ||
1899 | #endif | ||
1900 | |||
1869 | static void asm_ahuload(ASMState *as, IRIns *ir) | 1901 | static void asm_ahuload(ASMState *as, IRIns *ir) |
1870 | { | 1902 | { |
1871 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
1872 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); | 1903 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); |
1904 | #if LJ_64 | ||
1905 | if (irt_islightud(ir->t)) { | ||
1906 | Reg dest = asm_load_lightud64(as, ir, 1); | ||
1907 | if (ra_hasreg(dest)) { | ||
1908 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
1909 | emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); | ||
1910 | } | ||
1911 | return; | ||
1912 | } else | ||
1913 | #endif | ||
1873 | if (ra_used(ir)) { | 1914 | if (ra_used(ir)) { |
1915 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | ||
1874 | Reg dest = ra_dest(as, ir, allow); | 1916 | Reg dest = ra_dest(as, ir, allow); |
1875 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1917 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1876 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1918 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); |
@@ -1890,6 +1932,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1890 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | 1932 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); |
1891 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1933 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1892 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); | 1934 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); |
1935 | #if LJ_64 | ||
1936 | } else if (irt_islightud(ir->t)) { | ||
1937 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
1938 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); | ||
1939 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | ||
1940 | #endif | ||
1893 | } else { | 1941 | } else { |
1894 | IRIns *irr = IR(ir->op2); | 1942 | IRIns *irr = IR(ir->op2); |
1895 | RegSet allow = RSET_GPR; | 1943 | RegSet allow = RSET_GPR; |
@@ -1925,6 +1973,15 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1925 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1973 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1926 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1974 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); |
1927 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1975 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1976 | #if LJ_64 | ||
1977 | } else if (irt_islightud(t)) { | ||
1978 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); | ||
1979 | if (ra_hasreg(dest)) { | ||
1980 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | ||
1981 | emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); | ||
1982 | } | ||
1983 | return; | ||
1984 | #endif | ||
1928 | } else if (ra_used(ir)) { | 1985 | } else if (ra_used(ir)) { |
1929 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1986 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1930 | Reg dest = ra_dest(as, ir, allow); | 1987 | Reg dest = ra_dest(as, ir, allow); |
@@ -1932,8 +1989,10 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1932 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 1989 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
1933 | if (irt_isint(t)) | 1990 | if (irt_isint(t)) |
1934 | emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); | 1991 | emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); |
1992 | else if (irt_isnum(t)) | ||
1993 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1935 | else | 1994 | else |
1936 | emit_movrmro(as, dest, base, ofs); | 1995 | emit_rmro(as, XO_MOV, dest, base, ofs); |
1937 | } else { | 1996 | } else { |
1938 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1997 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
1939 | return; /* No type check: avoid base alloc. */ | 1998 | return; /* No type check: avoid base alloc. */ |
@@ -2117,7 +2176,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
2117 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { | 2176 | } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { |
2118 | /* Rejoined to pow(). */ | 2177 | /* Rejoined to pow(). */ |
2119 | } else { /* Handle x87 ops. */ | 2178 | } else { /* Handle x87 ops. */ |
2120 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | 2179 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
2121 | Reg dest = ir->r; | 2180 | Reg dest = ir->r; |
2122 | if (ra_hasreg(dest)) { | 2181 | if (ra_hasreg(dest)) { |
2123 | ra_free(as, dest); | 2182 | ra_free(as, dest); |
@@ -2521,6 +2580,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2521 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 2580 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
2522 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); | 2581 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); |
2523 | asm_guardcc(as, cc); | 2582 | asm_guardcc(as, cc); |
2583 | #if LJ_64 | ||
2584 | if (irt_islightud(ir->t)) | ||
2585 | left |= REX_64; | ||
2586 | #endif | ||
2524 | emit_mrm(as, XO_CMP, left, right); | 2587 | emit_mrm(as, XO_CMP, left, right); |
2525 | } | 2588 | } |
2526 | } | 2589 | } |
@@ -2563,7 +2626,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2563 | Reg r = allow ? rset_pickbot(allow) : RID_EAX; | 2626 | Reg r = allow ? rset_pickbot(allow) : RID_EAX; |
2564 | emit_jcc(as, CC_B, exitstub_addr(as->J, exitno)); | 2627 | emit_jcc(as, CC_B, exitstub_addr(as->J, exitno)); |
2565 | if (allow == RSET_EMPTY) /* Restore temp. register. */ | 2628 | if (allow == RSET_EMPTY) /* Restore temp. register. */ |
2566 | emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); | 2629 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); |
2567 | else | 2630 | else |
2568 | ra_modified(as, r); | 2631 | ra_modified(as, r); |
2569 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); | 2632 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); |
@@ -2575,7 +2638,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2575 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | 2638 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); |
2576 | emit_getgl(as, r, jit_L); | 2639 | emit_getgl(as, r, jit_L); |
2577 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2640 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
2578 | emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); | 2641 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); |
2579 | } | 2642 | } |
2580 | 2643 | ||
2581 | /* Restore Lua stack from on-trace state. */ | 2644 | /* Restore Lua stack from on-trace state. */ |
@@ -2600,14 +2663,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2600 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | 2663 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); |
2601 | if (!irref_isk(ref)) { | 2664 | if (!irref_isk(ref)) { |
2602 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | 2665 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); |
2603 | emit_movtomro(as, src, RID_BASE, ofs); | 2666 | emit_movtomro(as, REX_64LU(ir, src), RID_BASE, ofs); |
2604 | } else if (!irt_ispri(ir->t)) { | 2667 | } else if (!irt_ispri(ir->t)) { |
2605 | emit_movmroi(as, RID_BASE, ofs, ir->i); | 2668 | emit_movmroi(as, RID_BASE, ofs, ir->i); |
2606 | } | 2669 | } |
2607 | if (!(sn & (SNAP_CONT|SNAP_FRAME))) | 2670 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
2608 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | 2671 | if (s != 0) /* Do not overwrite link to previous frame. */ |
2609 | else if (s != 0) /* Do not overwrite link to previous frame. */ | 2672 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); |
2610 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | 2673 | } else { |
2674 | if (!(LJ_64 && irt_islightud(ir->t))) | ||
2675 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
2676 | } | ||
2611 | } | 2677 | } |
2612 | checkmclim(as); | 2678 | checkmclim(as); |
2613 | } | 2679 | } |
@@ -2668,7 +2734,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) | |||
2668 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); | 2734 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); |
2669 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); | 2735 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
2670 | lstate = IR(ASMREF_L)->r; | 2736 | lstate = IR(ASMREF_L)->r; |
2671 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); | 2737 | emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe)); |
2672 | /* It's ok if lstate is already in a non-scratch reg. But all allocations | 2738 | /* It's ok if lstate is already in a non-scratch reg. But all allocations |
2673 | ** in the non-fast path must use a scratch reg. See comment above. | 2739 | ** in the non-fast path must use a scratch reg. See comment above. |
2674 | */ | 2740 | */ |
@@ -2830,7 +2896,7 @@ static void asm_phi(ASMState *as, IRIns *ir) | |||
2830 | r = ra_allocref(as, ir->op2, allow); | 2896 | r = ra_allocref(as, ir->op2, allow); |
2831 | } else { /* Duplicate right PHI, need a copy (rare). */ | 2897 | } else { /* Duplicate right PHI, need a copy (rare). */ |
2832 | r = ra_scratch(as, allow); | 2898 | r = ra_scratch(as, allow); |
2833 | emit_movrr(as, r, irr->r); | 2899 | ra_movrr(as, irr, r, irr->r); |
2834 | } | 2900 | } |
2835 | ir->r = (uint8_t)r; | 2901 | ir->r = (uint8_t)r; |
2836 | rset_set(as->phiset, r); | 2902 | rset_set(as->phiset, r); |
@@ -2912,6 +2978,14 @@ static void asm_loop(ASMState *as) | |||
2912 | 2978 | ||
2913 | /* -- Head of trace ------------------------------------------------------- */ | 2979 | /* -- Head of trace ------------------------------------------------------- */ |
2914 | 2980 | ||
2981 | /* Calculate stack adjustment. */ | ||
2982 | static int32_t asm_stack_adjust(ASMState *as) | ||
2983 | { | ||
2984 | if (as->evenspill <= SPS_FIXED) | ||
2985 | return 0; | ||
2986 | return sps_scale((as->evenspill - SPS_FIXED + 3) & ~3); | ||
2987 | } | ||
2988 | |||
2915 | /* Coalesce BASE register for a root trace. */ | 2989 | /* Coalesce BASE register for a root trace. */ |
2916 | static void asm_head_root_base(ASMState *as) | 2990 | static void asm_head_root_base(ASMState *as) |
2917 | { | 2991 | { |
@@ -2932,9 +3006,9 @@ static void asm_head_root(ASMState *as) | |||
2932 | int32_t spadj; | 3006 | int32_t spadj; |
2933 | asm_head_root_base(as); | 3007 | asm_head_root_base(as); |
2934 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | 3008 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); |
2935 | spadj = sps_adjust(as->evenspill); | 3009 | spadj = asm_stack_adjust(as); |
2936 | as->T->spadjust = (uint16_t)spadj; | 3010 | as->T->spadjust = (uint16_t)spadj; |
2937 | emit_addptr(as, RID_ESP, -spadj); | 3011 | emit_addptr(as, RID_ESP|REX_64, -spadj); |
2938 | /* Root traces assume a checked stack for the starting proto. */ | 3012 | /* Root traces assume a checked stack for the starting proto. */ |
2939 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; | 3013 | as->T->topslot = gcref(as->T->startpt)->pt.framesize; |
2940 | } | 3014 | } |
@@ -3007,7 +3081,7 @@ static void asm_head_side(ASMState *as) | |||
3007 | } | 3081 | } |
3008 | 3082 | ||
3009 | /* Calculate stack frame adjustment. */ | 3083 | /* Calculate stack frame adjustment. */ |
3010 | spadj = sps_adjust(as->evenspill); | 3084 | spadj = asm_stack_adjust(as); |
3011 | spdelta = spadj - (int32_t)as->parent->spadjust; | 3085 | spdelta = spadj - (int32_t)as->parent->spadjust; |
3012 | if (spdelta < 0) { /* Don't shrink the stack frame. */ | 3086 | if (spdelta < 0) { /* Don't shrink the stack frame. */ |
3013 | spadj = (int32_t)as->parent->spadjust; | 3087 | spadj = (int32_t)as->parent->spadjust; |
@@ -3048,7 +3122,7 @@ static void asm_head_side(ASMState *as) | |||
3048 | 3122 | ||
3049 | /* Store trace number and adjust stack frame relative to the parent. */ | 3123 | /* Store trace number and adjust stack frame relative to the parent. */ |
3050 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | 3124 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); |
3051 | emit_addptr(as, RID_ESP, -spdelta); | 3125 | emit_addptr(as, RID_ESP|REX_64, -spdelta); |
3052 | 3126 | ||
3053 | /* Restore target registers from parent spill slots. */ | 3127 | /* Restore target registers from parent spill slots. */ |
3054 | if (pass3) { | 3128 | if (pass3) { |
@@ -3061,7 +3135,8 @@ static void asm_head_side(ASMState *as) | |||
3061 | if (ra_hasspill(regsp_spill(rs))) { | 3135 | if (ra_hasspill(regsp_spill(rs))) { |
3062 | int32_t ofs = sps_scale(regsp_spill(rs)); | 3136 | int32_t ofs = sps_scale(regsp_spill(rs)); |
3063 | ra_free(as, r); | 3137 | ra_free(as, r); |
3064 | emit_movrmro(as, r, RID_ESP, ofs); | 3138 | emit_rmro(as, r < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), |
3139 | REX_64LU(ir, r), RID_ESP, ofs); | ||
3065 | checkmclim(as); | 3140 | checkmclim(as); |
3066 | } | 3141 | } |
3067 | } | 3142 | } |
@@ -3078,7 +3153,7 @@ static void asm_head_side(ASMState *as) | |||
3078 | rset_clear(live, rp); | 3153 | rset_clear(live, rp); |
3079 | rset_clear(allow, rp); | 3154 | rset_clear(allow, rp); |
3080 | ra_free(as, ir->r); | 3155 | ra_free(as, ir->r); |
3081 | emit_movrr(as, ir->r, rp); | 3156 | ra_movrr(as, ir, ir->r, rp); |
3082 | checkmclim(as); | 3157 | checkmclim(as); |
3083 | } | 3158 | } |
3084 | 3159 | ||
@@ -3150,7 +3225,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
3150 | MCode *target, *q; | 3225 | MCode *target, *q; |
3151 | int32_t spadj = as->T->spadjust; | 3226 | int32_t spadj = as->T->spadjust; |
3152 | if (spadj == 0) { | 3227 | if (spadj == 0) { |
3153 | p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; | 3228 | p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); |
3154 | } else { | 3229 | } else { |
3155 | MCode *p1; | 3230 | MCode *p1; |
3156 | /* Patch stack adjustment. */ | 3231 | /* Patch stack adjustment. */ |
@@ -3163,10 +3238,16 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
3163 | *(int32_t *)p1 = spadj; | 3238 | *(int32_t *)p1 = spadj; |
3164 | } | 3239 | } |
3165 | if ((as->flags & JIT_F_LEA_AGU)) { | 3240 | if ((as->flags & JIT_F_LEA_AGU)) { |
3241 | #if LJ_64 | ||
3242 | p1[-4] = 0x48; | ||
3243 | #endif | ||
3166 | p1[-3] = (MCode)XI_LEA; | 3244 | p1[-3] = (MCode)XI_LEA; |
3167 | p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); | 3245 | p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); |
3168 | p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | 3246 | p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); |
3169 | } else { | 3247 | } else { |
3248 | #if LJ_64 | ||
3249 | p1[-3] = 0x48; | ||
3250 | #endif | ||
3170 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); | 3251 | p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); |
3171 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); | 3252 | p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); |
3172 | } | 3253 | } |
@@ -3365,12 +3446,13 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3365 | break; | 3446 | break; |
3366 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 3447 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
3367 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 3448 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
3368 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | ||
3369 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | ||
3370 | as->evenspill = (int32_t)CCI_NARGS(ci); | ||
3371 | #if LJ_64 | 3449 | #if LJ_64 |
3450 | /* NYI: add stack slots for calls with more than 4/6 args. */ | ||
3372 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 3451 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
3373 | #else | 3452 | #else |
3453 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | ||
3454 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | ||
3455 | as->evenspill = (int32_t)CCI_NARGS(ci); | ||
3374 | ir->prev = REGSP_HINT(RID_RET); | 3456 | ir->prev = REGSP_HINT(RID_RET); |
3375 | #endif | 3457 | #endif |
3376 | if (inloop) | 3458 | if (inloop) |
@@ -3379,8 +3461,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3379 | continue; | 3461 | continue; |
3380 | } | 3462 | } |
3381 | /* C calls evict all scratch regs and return results in RID_RET. */ | 3463 | /* C calls evict all scratch regs and return results in RID_RET. */ |
3382 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: | 3464 | case IR_SNEW: case IR_NEWREF: |
3383 | case IR_NEWREF: | 3465 | #if !LJ_64 |
3466 | if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ | ||
3467 | as->evenspill = 3; | ||
3468 | #endif | ||
3469 | case IR_TNEW: case IR_TDUP: case IR_TOSTR: | ||
3384 | ir->prev = REGSP_HINT(RID_RET); | 3470 | ir->prev = REGSP_HINT(RID_RET); |
3385 | if (inloop) | 3471 | if (inloop) |
3386 | as->modset = RSET_SCRATCH; | 3472 | as->modset = RSET_SCRATCH; |
@@ -3500,7 +3586,7 @@ void lj_asm_trace(jit_State *J, Trace *T) | |||
3500 | 3586 | ||
3501 | if (!as->loopref) { | 3587 | if (!as->loopref) { |
3502 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ | 3588 | /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ |
3503 | as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; | 3589 | as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6 + (LJ_64 ? 1 : 0); |
3504 | as->invmcp = NULL; | 3590 | as->invmcp = NULL; |
3505 | asm_tail_link(as); | 3591 | asm_tail_link(as); |
3506 | } | 3592 | } |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 75519ed4..ca871238 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -362,6 +362,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
362 | 362 | ||
363 | #define irt_isnil(t) (irt_type(t) == IRT_NIL) | 363 | #define irt_isnil(t) (irt_type(t) == IRT_NIL) |
364 | #define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE) | 364 | #define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE) |
365 | #define irt_islightud(t) (irt_type(t) == IRT_LIGHTUD) | ||
365 | #define irt_isstr(t) (irt_type(t) == IRT_STR) | 366 | #define irt_isstr(t) (irt_type(t) == IRT_STR) |
366 | #define irt_isfunc(t) (irt_type(t) == IRT_FUNC) | 367 | #define irt_isfunc(t) (irt_type(t) == IRT_FUNC) |
367 | #define irt_istab(t) (irt_type(t) == IRT_TAB) | 368 | #define irt_istab(t) (irt_type(t) == IRT_TAB) |
@@ -376,9 +377,20 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
376 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) | 377 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) |
377 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 378 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
378 | 379 | ||
379 | #define itype2irt(tv) \ | 380 | static LJ_AINLINE IRType itype2irt(const TValue *tv) |
380 | (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM) | 381 | { |
381 | #define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t)) | 382 | if (tvisnum(tv)) |
383 | return IRT_NUM; | ||
384 | #if LJ_64 | ||
385 | else if (tvislightud(tv)) | ||
386 | return IRT_LIGHTUD; | ||
387 | #endif | ||
388 | else | ||
389 | return cast(IRType, ~uitype(tv)); | ||
390 | } | ||
391 | |||
392 | #define irt_toitype(t) \ | ||
393 | check_exp(!(LJ_64 && irt_islightud((t))), (int32_t)~(uint32_t)irt_type((t))) | ||
382 | 394 | ||
383 | #define irt_isguard(t) ((t).irt & IRT_GUARD) | 395 | #define irt_isguard(t) ((t).irt & IRT_GUARD) |
384 | #define irt_ismarked(t) ((t).irt & IRT_MARK) | 396 | #define irt_ismarked(t) ((t).irt & IRT_MARK) |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 2b82e672..86890a26 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -279,6 +279,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
279 | setintV(o, *sps); | 279 | setintV(o, *sps); |
280 | } else if (irt_isnum(t)) { | 280 | } else if (irt_isnum(t)) { |
281 | o->u64 = *(uint64_t *)sps; | 281 | o->u64 = *(uint64_t *)sps; |
282 | #if LJ_64 | ||
283 | } else if (irt_islightud(t)) { | ||
284 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | ||
285 | o->u64 = *(uint64_t *)sps; | ||
286 | #endif | ||
282 | } else { | 287 | } else { |
283 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 288 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
284 | setgcrefi(o->gcr, *sps); | 289 | setgcrefi(o->gcr, *sps); |
@@ -291,6 +296,11 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
291 | setintV(o, ex->gpr[r-RID_MIN_GPR]); | 296 | setintV(o, ex->gpr[r-RID_MIN_GPR]); |
292 | } else if (irt_isnum(t)) { | 297 | } else if (irt_isnum(t)) { |
293 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 298 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
299 | #if LJ_64 | ||
300 | } else if (irt_islightud(t)) { | ||
301 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | ||
302 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | ||
303 | #endif | ||
294 | } else { | 304 | } else { |
295 | if (!irt_ispri(t)) | 305 | if (!irt_ispri(t)) |
296 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | 306 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index cb1892d5..8e9a8788 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -96,20 +96,24 @@ enum { | |||
96 | 96 | ||
97 | /* -- Spill slots --------------------------------------------------------- */ | 97 | /* -- Spill slots --------------------------------------------------------- */ |
98 | 98 | ||
99 | /* Stack layout for the compiled machine code (after stack adjustment). */ | 99 | /* Available fixed spill slots in interpreter frame. |
100 | enum { | 100 | ** This definition must match with the *.dasc file(s). |
101 | SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */ | 101 | */ |
102 | SPS_TEMP2, | 102 | #if LJ_64 |
103 | SPS_TEMP3, | 103 | #ifdef _WIN64 |
104 | SPS_FIRST, /* First spill slot for general use. */ | 104 | #define SPS_FIXED (5*2) |
105 | #else | ||
106 | #define SPS_FIXED 2 | ||
107 | #endif | ||
108 | #else | ||
109 | #define SPS_FIXED 6 | ||
110 | #endif | ||
105 | 111 | ||
106 | /* This definition must match with the *.dasc file(s). */ | 112 | /* First spill slot for general use. Reserve one 64 bit slot. */ |
107 | SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */ | 113 | #define SPS_FIRST 2 |
108 | }; | ||
109 | 114 | ||
110 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ | 115 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ |
111 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 116 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
112 | #define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3)) | ||
113 | 117 | ||
114 | /* -- Exit state ---------------------------------------------------------- */ | 118 | /* -- Exit state ---------------------------------------------------------- */ |
115 | 119 | ||
@@ -241,6 +245,8 @@ typedef uint32_t x86Group; | |||
241 | 245 | ||
242 | #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) | 246 | #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) |
243 | #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) | 247 | #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) |
248 | #define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000))) | ||
249 | #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) | ||
244 | 250 | ||
245 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) | 251 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) |
246 | 252 | ||