diff options
author | Mike Pall <mike> | 2016-05-23 01:49:00 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2016-05-23 01:49:00 +0200 |
commit | 6c8258d74b7d4ae7f288897518f23c809b9395f2 (patch) | |
tree | 7479dce31b17ed704f20ee3920b6de6696521f26 | |
parent | 8f868a9d02340bae8b3b4a703118b324213f5c6d (diff) | |
download | luajit-6c8258d74b7d4ae7f288897518f23c809b9395f2.tar.gz luajit-6c8258d74b7d4ae7f288897518f23c809b9395f2.tar.bz2 luajit-6c8258d74b7d4ae7f288897518f23c809b9395f2.zip |
LJ_FR2: Add support for trace recording and snapshots.
Contributed by Peter Cawley.
-rw-r--r-- | src/jit/dump.lua | 10 | ||||
-rw-r--r-- | src/lj_arch.h | 2 | ||||
-rw-r--r-- | src/lj_asm.c | 6 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 22 | ||||
-rw-r--r-- | src/lj_crecord.c | 30 | ||||
-rw-r--r-- | src/lj_def.h | 2 | ||||
-rw-r--r-- | src/lj_ffrecord.c | 49 | ||||
-rw-r--r-- | src/lj_ir.h | 2 | ||||
-rw-r--r-- | src/lj_jit.h | 14 | ||||
-rw-r--r-- | src/lj_record.c | 231 | ||||
-rw-r--r-- | src/lj_snap.c | 61 |
11 files changed, 291 insertions, 138 deletions
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 9a722f73..a635af10 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
@@ -310,15 +310,17 @@ local function fmtfunc(func, pc) | |||
310 | end | 310 | end |
311 | end | 311 | end |
312 | 312 | ||
313 | local function formatk(tr, idx) | 313 | local function formatk(tr, idx, sn) |
314 | local k, t, slot = tracek(tr, idx) | 314 | local k, t, slot = tracek(tr, idx) |
315 | local tn = type(k) | 315 | local tn = type(k) |
316 | local s | 316 | local s |
317 | if tn == "number" then | 317 | if tn == "number" then |
318 | if k == 2^52+2^51 then | 318 | if band(sn or 0, 0x30000) ~= 0 then |
319 | s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" | ||
320 | elseif k == 2^52+2^51 then | ||
319 | s = "bias" | 321 | s = "bias" |
320 | else | 322 | else |
321 | s = format("%+.14g", k) | 323 | s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) |
322 | end | 324 | end |
323 | elseif tn == "string" then | 325 | elseif tn == "string" then |
324 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) | 326 | s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) |
@@ -354,7 +356,7 @@ local function printsnap(tr, snap) | |||
354 | n = n + 1 | 356 | n = n + 1 |
355 | local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS | 357 | local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS |
356 | if ref < 0 then | 358 | if ref < 0 then |
357 | out:write(formatk(tr, ref)) | 359 | out:write(formatk(tr, ref, sn)) |
358 | elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM | 360 | elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM |
359 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) | 361 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) |
360 | else | 362 | else |
diff --git a/src/lj_arch.h b/src/lj_arch.h index 612c7303..72622a21 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
@@ -453,7 +453,7 @@ | |||
453 | #endif | 453 | #endif |
454 | 454 | ||
455 | /* Disable or enable the JIT compiler. */ | 455 | /* Disable or enable the JIT compiler. */ |
456 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 | 456 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64 |
457 | #define LJ_HASJIT 0 | 457 | #define LJ_HASJIT 0 |
458 | #else | 458 | #else |
459 | #define LJ_HASJIT 1 | 459 | #define LJ_HASJIT 1 |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9f784cc8..5dd7ca3a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1893,7 +1893,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1893 | SnapEntry sn = map[n-1]; | 1893 | SnapEntry sn = map[n-1]; |
1894 | if ((sn & SNAP_FRAME)) { | 1894 | if ((sn & SNAP_FRAME)) { |
1895 | *gotframe = 1; | 1895 | *gotframe = 1; |
1896 | return snap_slot(sn); | 1896 | return snap_slot(sn) - LJ_FR2; |
1897 | } | 1897 | } |
1898 | } | 1898 | } |
1899 | return 0; | 1899 | return 0; |
@@ -1913,7 +1913,7 @@ static void asm_tail_link(ASMState *as) | |||
1913 | 1913 | ||
1914 | if (as->T->link == 0) { | 1914 | if (as->T->link == 0) { |
1915 | /* Setup fixed registers for exit to interpreter. */ | 1915 | /* Setup fixed registers for exit to interpreter. */ |
1916 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 1916 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1917 | int32_t mres; | 1917 | int32_t mres; |
1918 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 1918 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1919 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 1919 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
@@ -1922,7 +1922,7 @@ static void asm_tail_link(ASMState *as) | |||
1922 | } | 1922 | } |
1923 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 1923 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1924 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 1924 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1925 | mres = (int32_t)(snap->nslots - baseslot); | 1925 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); |
1926 | switch (bc_op(*pc)) { | 1926 | switch (bc_op(*pc)) { |
1927 | case BC_CALLM: case BC_CALLMT: | 1927 | case BC_CALLM: case BC_CALLMT: |
1928 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; | 1928 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 0361a965..83fe22b2 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -348,7 +348,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
348 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && | 348 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && |
349 | noconflict(as, ref, IR_RETF, 0)) { | 349 | noconflict(as, ref, IR_RETF, 0)) { |
350 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | 350 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); |
351 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); | 351 | as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
352 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
352 | as->mrm.idx = RID_NONE; | 353 | as->mrm.idx = RID_NONE; |
353 | return RID_MRM; | 354 | return RID_MRM; |
354 | } | 355 | } |
@@ -655,6 +656,9 @@ static void asm_callx(ASMState *as, IRIns *ir) | |||
655 | static void asm_retf(ASMState *as, IRIns *ir) | 656 | static void asm_retf(ASMState *as, IRIns *ir) |
656 | { | 657 | { |
657 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 658 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
659 | #if LJ_FR2 | ||
660 | Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); | ||
661 | #endif | ||
658 | void *pc = ir_kptr(IR(ir->op2)); | 662 | void *pc = ir_kptr(IR(ir->op2)); |
659 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); | 663 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
660 | as->topslot -= (BCReg)delta; | 664 | as->topslot -= (BCReg)delta; |
@@ -663,7 +667,12 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
663 | emit_setgl(as, base, jit_base); | 667 | emit_setgl(as, base, jit_base); |
664 | emit_addptr(as, base, -8*delta); | 668 | emit_addptr(as, base, -8*delta); |
665 | asm_guardcc(as, CC_NE); | 669 | asm_guardcc(as, CC_NE); |
670 | #if LJ_FR2 | ||
671 | emit_rmro(as, XO_CMP, rpc, base, -8); | ||
672 | emit_loadu64(as, rpc, u64ptr(pc)); | ||
673 | #else | ||
666 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); | 674 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); |
675 | #endif | ||
667 | } | 676 | } |
668 | 677 | ||
669 | /* -- Type conversions ---------------------------------------------------- */ | 678 | /* -- Type conversions ---------------------------------------------------- */ |
@@ -1397,7 +1406,8 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
1397 | 1406 | ||
1398 | static void asm_sload(ASMState *as, IRIns *ir) | 1407 | static void asm_sload(ASMState *as, IRIns *ir) |
1399 | { | 1408 | { |
1400 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | 1409 | int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
1410 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | ||
1401 | IRType1 t = ir->t; | 1411 | IRType1 t = ir->t; |
1402 | Reg base; | 1412 | Reg base; |
1403 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1413 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
@@ -2383,13 +2393,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
2383 | static void asm_stack_restore(ASMState *as, SnapShot *snap) | 2393 | static void asm_stack_restore(ASMState *as, SnapShot *snap) |
2384 | { | 2394 | { |
2385 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2395 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2386 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; | 2396 | #if !LJ_FR2 || defined(LUA_USE_ASSERT) |
2397 | SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; | ||
2398 | #endif | ||
2387 | MSize n, nent = snap->nent; | 2399 | MSize n, nent = snap->nent; |
2388 | /* Store the value of all modified slots to the Lua stack. */ | 2400 | /* Store the value of all modified slots to the Lua stack. */ |
2389 | for (n = 0; n < nent; n++) { | 2401 | for (n = 0; n < nent; n++) { |
2390 | SnapEntry sn = map[n]; | 2402 | SnapEntry sn = map[n]; |
2391 | BCReg s = snap_slot(sn); | 2403 | BCReg s = snap_slot(sn); |
2392 | int32_t ofs = 8*((int32_t)s-1); | 2404 | int32_t ofs = 8*((int32_t)s-1-LJ_FR2); |
2393 | IRRef ref = snap_ref(sn); | 2405 | IRRef ref = snap_ref(sn); |
2394 | IRIns *ir = IR(ref); | 2406 | IRIns *ir = IR(ref); |
2395 | if ((sn & SNAP_NORESTORE)) | 2407 | if ((sn & SNAP_NORESTORE)) |
@@ -2407,8 +2419,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2407 | emit_movmroi(as, RID_BASE, ofs, ir->i); | 2419 | emit_movmroi(as, RID_BASE, ofs, ir->i); |
2408 | } | 2420 | } |
2409 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2421 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
2422 | #if !LJ_FR2 | ||
2410 | if (s != 0) /* Do not overwrite link to previous frame. */ | 2423 | if (s != 0) /* Do not overwrite link to previous frame. */ |
2411 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | 2424 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); |
2425 | #endif | ||
2412 | } else { | 2426 | } else { |
2413 | if (!(LJ_64 && irt_islightud(ir->t))) | 2427 | if (!(LJ_64 && irt_islightud(ir->t))) |
2414 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | 2428 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c0f7e3d7..d568b20a 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
@@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) | |||
712 | return tr; | 712 | return tr; |
713 | } | 713 | } |
714 | 714 | ||
715 | /* Tailcall to function. */ | ||
716 | static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) | ||
717 | { | ||
718 | TRef kfunc = lj_ir_kfunc(J, funcV(tv)); | ||
719 | #if LJ_FR2 | ||
720 | J->base[-2] = kfunc; | ||
721 | J->base[-1] = TREF_FRAME; | ||
722 | #else | ||
723 | J->base[-1] = kfunc | TREF_FRAME; | ||
724 | #endif | ||
725 | rd->nres = -1; /* Pending tailcall. */ | ||
726 | } | ||
727 | |||
715 | /* Record ctype __index/__newindex metamethods. */ | 728 | /* Record ctype __index/__newindex metamethods. */ |
716 | static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | 729 | static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, |
717 | RecordFFData *rd) | 730 | RecordFFData *rd) |
@@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, | |||
721 | if (!tv) | 734 | if (!tv) |
722 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 735 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
723 | if (tvisfunc(tv)) { | 736 | if (tvisfunc(tv)) { |
724 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 737 | crec_tailcall(J, rd, tv); |
725 | rd->nres = -1; /* Pending tailcall. */ | ||
726 | } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { | 738 | } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { |
727 | /* Specialize to result of __index lookup. */ | 739 | /* Specialize to result of __index lookup. */ |
728 | cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); | 740 | cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); |
@@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J) | |||
1119 | lua_State *L = J->L; | 1131 | lua_State *L = J->L; |
1120 | TValue *base = L->base, *top = L->top; | 1132 | TValue *base = L->base, *top = L->top; |
1121 | const BCIns *pc = J->pc; | 1133 | const BCIns *pc = J->pc; |
1122 | TRef ftr = J->base[-1]; | 1134 | TRef ftr = J->base[-1-LJ_FR2]; |
1123 | ptrdiff_t delta; | 1135 | ptrdiff_t delta; |
1124 | if (!frame_islua(base-1) || J->framedepth <= 0) | 1136 | if (!frame_islua(base-1) || J->framedepth <= 0) |
1125 | lj_trace_err(J, LJ_TRERR_NYICALL); | 1137 | lj_trace_err(J, LJ_TRERR_NYICALL); |
1126 | J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); | 1138 | J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); |
1127 | L->top = base; L->base = base - delta; | 1139 | L->top = base; L->base = base - delta; |
1128 | J->base[-1] = TREF_FALSE; | 1140 | J->base[-1-LJ_FR2] = TREF_FALSE; |
1129 | J->base -= delta; J->baseslot -= (BCReg)delta; | 1141 | J->base -= delta; J->baseslot -= (BCReg)delta; |
1130 | J->maxslot = (BCReg)delta; J->framedepth--; | 1142 | J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; |
1131 | lj_snap_add(J); | 1143 | lj_snap_add(J); |
1132 | L->base = base; L->top = top; | 1144 | L->base = base; L->top = top; |
1133 | J->framedepth++; J->maxslot = 1; | 1145 | J->framedepth++; J->maxslot = 1; |
1134 | J->base += delta; J->baseslot += (BCReg)delta; | 1146 | J->base += delta; J->baseslot += (BCReg)delta; |
1135 | J->base[-1] = ftr; J->pc = pc; | 1147 | J->base[-1-LJ_FR2] = ftr; J->pc = pc; |
1136 | } | 1148 | } |
1137 | 1149 | ||
1138 | /* Record function call. */ | 1150 | /* Record function call. */ |
@@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) | |||
1224 | tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); | 1236 | tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); |
1225 | if (tv) { | 1237 | if (tv) { |
1226 | if (tvisfunc(tv)) { | 1238 | if (tvisfunc(tv)) { |
1227 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 1239 | crec_tailcall(J, rd, tv); |
1228 | rd->nres = -1; /* Pending tailcall. */ | ||
1229 | return; | 1240 | return; |
1230 | } | 1241 | } |
1231 | } else if (mm == MM_new) { | 1242 | } else if (mm == MM_new) { |
@@ -1373,8 +1384,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, | |||
1373 | } | 1384 | } |
1374 | if (tv) { | 1385 | if (tv) { |
1375 | if (tvisfunc(tv)) { | 1386 | if (tvisfunc(tv)) { |
1376 | J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; | 1387 | crec_tailcall(J, rd, tv); |
1377 | rd->nres = -1; /* Pending tailcall. */ | ||
1378 | return 0; | 1388 | return 0; |
1379 | } /* NYI: non-function metamethods. */ | 1389 | } /* NYI: non-function metamethods. */ |
1380 | } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ | 1390 | } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ |
diff --git a/src/lj_def.h b/src/lj_def.h index 29d3fdda..9413399d 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
@@ -95,6 +95,8 @@ typedef unsigned int uintptr_t; | |||
95 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) | 95 | #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) |
96 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) | 96 | #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) |
97 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) | 97 | #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) |
98 | #define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) | ||
99 | #define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) | ||
98 | 100 | ||
99 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) | 101 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) |
100 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) | 102 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index ae567622..64a9a65d 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -102,35 +102,41 @@ static void recff_stitch(jit_State *J) | |||
102 | ASMFunction cont = lj_cont_stitch; | 102 | ASMFunction cont = lj_cont_stitch; |
103 | lua_State *L = J->L; | 103 | lua_State *L = J->L; |
104 | TValue *base = L->base; | 104 | TValue *base = L->base; |
105 | BCReg nslot = J->maxslot + 1 + LJ_FR2; | ||
106 | TValue *nframe = base + 1 + LJ_FR2; | ||
105 | const BCIns *pc = frame_pc(base-1); | 107 | const BCIns *pc = frame_pc(base-1); |
106 | TValue *pframe = frame_prevl(base-1); | 108 | TValue *pframe = frame_prevl(base-1); |
107 | 109 | ||
108 | lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */ | ||
109 | /* Move func + args up in Lua stack and insert continuation. */ | 110 | /* Move func + args up in Lua stack and insert continuation. */ |
110 | memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); | 111 | memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); |
111 | setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); | 112 | setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); |
112 | setcont(base, cont); | 113 | setcont(base-LJ_FR2, cont); |
113 | setframe_pc(base, pc); | 114 | setframe_pc(base, pc); |
114 | setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ | 115 | setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ |
115 | L->base += 2; | 116 | L->base += 2 + LJ_FR2; |
116 | L->top += 2; | 117 | L->top += 2 + LJ_FR2; |
117 | 118 | ||
118 | /* Ditto for the IR. */ | 119 | /* Ditto for the IR. */ |
119 | memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); | 120 | memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); |
121 | #if LJ_FR2 | ||
122 | J->base[2] = TREF_FRAME; | ||
123 | J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); | ||
124 | J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; | ||
125 | #else | ||
120 | J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; | 126 | J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; |
121 | J->base[-1] = lj_ir_ktrace(J); | 127 | #endif |
122 | J->ktrace = tref_ref(J->base[-1]); | 128 | J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); |
123 | J->base += 2; | 129 | J->base += 2 + LJ_FR2; |
124 | J->baseslot += 2; | 130 | J->baseslot += 2 + LJ_FR2; |
125 | J->framedepth++; | 131 | J->framedepth++; |
126 | 132 | ||
127 | lj_record_stop(J, LJ_TRLINK_STITCH, 0); | 133 | lj_record_stop(J, LJ_TRLINK_STITCH, 0); |
128 | 134 | ||
129 | /* Undo Lua stack changes. */ | 135 | /* Undo Lua stack changes. */ |
130 | memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); | 136 | memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); |
131 | setframe_pc(base-1, pc); | 137 | setframe_pc(base-1, pc); |
132 | L->base -= 2; | 138 | L->base -= 2 + LJ_FR2; |
133 | L->top -= 2; | 139 | L->top -= 2 + LJ_FR2; |
134 | } | 140 | } |
135 | 141 | ||
136 | /* Fallback handler for fast functions that are not recorded (yet). */ | 142 | /* Fallback handler for fast functions that are not recorded (yet). */ |
@@ -373,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) | |||
373 | int errcode; | 379 | int errcode; |
374 | TValue argv0; | 380 | TValue argv0; |
375 | /* Temporarily insert metamethod below object. */ | 381 | /* Temporarily insert metamethod below object. */ |
376 | J->base[1] = J->base[0]; | 382 | J->base[1+LJ_FR2] = J->base[0]; |
377 | J->base[0] = ix.mobj; | 383 | J->base[0] = ix.mobj; |
378 | copyTV(J->L, &argv0, &rd->argv[0]); | 384 | copyTV(J->L, &argv0, &rd->argv[0]); |
379 | copyTV(J->L, &rd->argv[1], &rd->argv[0]); | 385 | copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); |
380 | copyTV(J->L, &rd->argv[0], &ix.mobjv); | 386 | copyTV(J->L, &rd->argv[0], &ix.mobjv); |
381 | /* Need to protect lj_record_tailcall because it may throw. */ | 387 | /* Need to protect lj_record_tailcall because it may throw. */ |
382 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); | 388 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); |
@@ -443,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) | |||
443 | static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) | 449 | static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) |
444 | { | 450 | { |
445 | if (J->maxslot >= 1) { | 451 | if (J->maxslot >= 1) { |
452 | #if LJ_FR2 | ||
453 | /* Shift function arguments up. */ | ||
454 | memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); | ||
455 | #endif | ||
446 | lj_record_call(J, 0, J->maxslot - 1); | 456 | lj_record_call(J, 0, J->maxslot - 1); |
447 | rd->nres = -1; /* Pending call. */ | 457 | rd->nres = -1; /* Pending call. */ |
448 | } /* else: Interpreter will throw. */ | 458 | } /* else: Interpreter will throw. */ |
@@ -462,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) | |||
462 | TValue argv0, argv1; | 472 | TValue argv0, argv1; |
463 | TRef tmp; | 473 | TRef tmp; |
464 | int errcode; | 474 | int errcode; |
465 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | ||
466 | /* Swap function and traceback. */ | 475 | /* Swap function and traceback. */ |
467 | tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; | 476 | tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; |
468 | copyTV(J->L, &argv0, &rd->argv[0]); | 477 | copyTV(J->L, &argv0, &rd->argv[0]); |
469 | copyTV(J->L, &argv1, &rd->argv[1]); | 478 | copyTV(J->L, &argv1, &rd->argv[1]); |
470 | copyTV(J->L, &rd->argv[0], &argv1); | 479 | copyTV(J->L, &rd->argv[0], &argv1); |
471 | copyTV(J->L, &rd->argv[1], &argv0); | 480 | copyTV(J->L, &rd->argv[1], &argv0); |
481 | #if LJ_FR2 | ||
482 | /* Shift function arguments up. */ | ||
483 | memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); | ||
484 | #endif | ||
472 | /* Need to protect lj_record_call because it may throw. */ | 485 | /* Need to protect lj_record_call because it may throw. */ |
473 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); | 486 | errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); |
474 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ | 487 | /* Always undo Lua stack swap to avoid confusing the interpreter. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 3de57046..4e9c85c7 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -220,7 +220,7 @@ IRFLDEF(FLENUM) | |||
220 | 220 | ||
221 | /* SLOAD mode bits, stored in op2. */ | 221 | /* SLOAD mode bits, stored in op2. */ |
222 | #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ | 222 | #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ |
223 | #define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ | 223 | #define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ |
224 | #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ | 224 | #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ |
225 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ | 225 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ |
226 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ | 226 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 55fbea8b..f460a0ab 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -179,14 +179,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | |||
179 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 179 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
180 | #define SNAP_TR(slot, tr) \ | 180 | #define SNAP_TR(slot, tr) \ |
181 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 181 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) |
182 | #if !LJ_FR2 | ||
182 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 183 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
184 | #endif | ||
183 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | 185 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) |
184 | #define snap_ref(sn) ((sn) & 0xffff) | 186 | #define snap_ref(sn) ((sn) & 0xffff) |
185 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | 187 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) |
186 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | 188 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) |
187 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
188 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) | 189 | #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) |
189 | 190 | ||
191 | static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) | ||
192 | { | ||
193 | #if LJ_FR2 | ||
194 | uint64_t pcbase; | ||
195 | memcpy(&pcbase, sn, sizeof(uint64_t)); | ||
196 | return (const BCIns *)(pcbase >> 8); | ||
197 | #else | ||
198 | return (const BCIns *)(uintptr_t)*sn; | ||
199 | #endif | ||
200 | } | ||
201 | |||
190 | /* Snapshot and exit numbers. */ | 202 | /* Snapshot and exit numbers. */ |
191 | typedef uint32_t SnapNo; | 203 | typedef uint32_t SnapNo; |
192 | typedef uint32_t ExitNo; | 204 | typedef uint32_t ExitNo; |
diff --git a/src/lj_record.c b/src/lj_record.c index 3b754897..f0481050 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -87,30 +87,48 @@ static void rec_check_slots(jit_State *J) | |||
87 | BCReg s, nslots = J->baseslot + J->maxslot; | 87 | BCReg s, nslots = J->baseslot + J->maxslot; |
88 | int32_t depth = 0; | 88 | int32_t depth = 0; |
89 | cTValue *base = J->L->base - J->baseslot; | 89 | cTValue *base = J->L->base - J->baseslot; |
90 | lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); | 90 | lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); |
91 | lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); | 91 | lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); |
92 | lua_assert(nslots < LJ_MAX_JSLOTS); | 92 | lua_assert(nslots < LJ_MAX_JSLOTS); |
93 | for (s = 0; s < nslots; s++) { | 93 | for (s = 0; s < nslots; s++) { |
94 | TRef tr = J->slot[s]; | 94 | TRef tr = J->slot[s]; |
95 | if (tr) { | 95 | if (tr) { |
96 | cTValue *tv = &base[s]; | 96 | cTValue *tv = &base[s]; |
97 | IRRef ref = tref_ref(tr); | 97 | IRRef ref = tref_ref(tr); |
98 | IRIns *ir; | 98 | IRIns *ir = NULL; /* Silence compiler. */ |
99 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | 99 | if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { |
100 | ir = IR(ref); | 100 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); |
101 | lua_assert(irt_t(ir->t) == tref_t(tr)); | 101 | ir = IR(ref); |
102 | lua_assert(irt_t(ir->t) == tref_t(tr)); | ||
103 | } | ||
102 | if (s == 0) { | 104 | if (s == 0) { |
103 | lua_assert(tref_isfunc(tr)); | 105 | lua_assert(tref_isfunc(tr)); |
106 | #if LJ_FR2 | ||
107 | } else if (s == 1) { | ||
108 | lua_assert(0); | ||
109 | #endif | ||
104 | } else if ((tr & TREF_FRAME)) { | 110 | } else if ((tr & TREF_FRAME)) { |
105 | GCfunc *fn = gco2func(frame_gc(tv)); | 111 | GCfunc *fn = gco2func(frame_gc(tv)); |
106 | BCReg delta = (BCReg)(tv - frame_prev(tv)); | 112 | BCReg delta = (BCReg)(tv - frame_prev(tv)); |
113 | #if LJ_FR2 | ||
114 | if (ref) | ||
115 | lua_assert(ir_knum(ir)->u64 == tv->u64); | ||
116 | tr = J->slot[s-1]; | ||
117 | ir = IR(tref_ref(tr)); | ||
118 | #endif | ||
107 | lua_assert(tref_isfunc(tr)); | 119 | lua_assert(tref_isfunc(tr)); |
108 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); | 120 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); |
109 | lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); | 121 | lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) |
122 | : (s == delta + LJ_FR2)); | ||
110 | depth++; | 123 | depth++; |
111 | } else if ((tr & TREF_CONT)) { | 124 | } else if ((tr & TREF_CONT)) { |
125 | #if LJ_FR2 | ||
126 | if (ref) | ||
127 | lua_assert(ir_knum(ir)->u64 == tv->u64); | ||
128 | #else | ||
112 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); | 129 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); |
113 | lua_assert((J->slot[s+1] & TREF_FRAME)); | 130 | #endif |
131 | lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); | ||
114 | depth++; | 132 | depth++; |
115 | } else { | 133 | } else { |
116 | if (tvisnumber(tv)) | 134 | if (tvisnumber(tv)) |
@@ -162,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot) | |||
162 | /* Get TRef for current function. */ | 180 | /* Get TRef for current function. */ |
163 | static TRef getcurrf(jit_State *J) | 181 | static TRef getcurrf(jit_State *J) |
164 | { | 182 | { |
165 | if (J->base[-1]) | 183 | if (J->base[-1-LJ_FR2]) |
166 | return J->base[-1]; | 184 | return J->base[-1-LJ_FR2]; |
167 | lua_assert(J->baseslot == 1); | 185 | lua_assert(J->baseslot == 1+LJ_FR2); |
168 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); | 186 | return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); |
169 | } | 187 | } |
170 | 188 | ||
171 | /* Compare for raw object equality. | 189 | /* Compare for raw object equality. |
@@ -509,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
509 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) | 527 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) |
510 | { | 528 | { |
511 | BCReg ra = bc_a(iterins); | 529 | BCReg ra = bc_a(iterins); |
512 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | ||
513 | if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ | 530 | if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ |
514 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ | 531 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ |
515 | J->maxslot = ra-1+bc_b(J->pc[-1]); | 532 | J->maxslot = ra-1+bc_b(J->pc[-1]); |
@@ -678,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
678 | { | 695 | { |
679 | RecordIndex ix; | 696 | RecordIndex ix; |
680 | TValue *functv = &J->L->base[func]; | 697 | TValue *functv = &J->L->base[func]; |
681 | TRef *fbase = &J->base[func]; | 698 | TRef kfunc, *fbase = &J->base[func]; |
682 | ptrdiff_t i; | 699 | ptrdiff_t i; |
683 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | 700 | (void)getslot(J, func); /* Ensure func has a reference. */ |
684 | for (i = 0; i <= nargs; i++) | 701 | for (i = 1; i <= nargs; i++) |
685 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ | 702 | (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ |
686 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ | 703 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ |
687 | ix.tab = fbase[0]; | 704 | ix.tab = fbase[0]; |
688 | copyTV(J->L, &ix.tabv, functv); | 705 | copyTV(J->L, &ix.tabv, functv); |
689 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) | 706 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) |
690 | lj_trace_err(J, LJ_TRERR_NOMM); | 707 | lj_trace_err(J, LJ_TRERR_NOMM); |
691 | for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ | 708 | for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ |
692 | fbase[i] = fbase[i-1]; | 709 | fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; |
710 | #if LJ_FR2 | ||
711 | fbase[2] = fbase[0]; | ||
712 | #endif | ||
693 | fbase[0] = ix.mobj; /* Replace function. */ | 713 | fbase[0] = ix.mobj; /* Replace function. */ |
694 | functv = &ix.mobjv; | 714 | functv = &ix.mobjv; |
695 | } | 715 | } |
696 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); | 716 | kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); |
717 | #if LJ_FR2 | ||
718 | fbase[0] = kfunc; | ||
719 | fbase[1] = TREF_FRAME; | ||
720 | #else | ||
721 | fbase[0] = kfunc | TREF_FRAME; | ||
722 | #endif | ||
697 | J->maxslot = (BCReg)nargs; | 723 | J->maxslot = (BCReg)nargs; |
698 | } | 724 | } |
699 | 725 | ||
@@ -703,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
703 | rec_call_setup(J, func, nargs); | 729 | rec_call_setup(J, func, nargs); |
704 | /* Bump frame. */ | 730 | /* Bump frame. */ |
705 | J->framedepth++; | 731 | J->framedepth++; |
706 | J->base += func+1; | 732 | J->base += func+1+LJ_FR2; |
707 | J->baseslot += func+1; | 733 | J->baseslot += func+1+LJ_FR2; |
708 | } | 734 | } |
709 | 735 | ||
710 | /* Record tail call. */ | 736 | /* Record tail call. */ |
@@ -720,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) | |||
720 | func += cbase; | 746 | func += cbase; |
721 | } | 747 | } |
722 | /* Move func + args down. */ | 748 | /* Move func + args down. */ |
723 | memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); | 749 | if (LJ_FR2 && J->baseslot == 2) |
750 | J->base[func+1] = 0; | ||
751 | memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); | ||
724 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ | 752 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ |
725 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ | 753 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ |
726 | if (++J->tailcalled > J->loopunroll) | 754 | if (++J->tailcalled > J->loopunroll) |
@@ -763,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
763 | BCReg cbase = (BCReg)frame_delta(frame); | 791 | BCReg cbase = (BCReg)frame_delta(frame); |
764 | if (--J->framedepth < 0) | 792 | if (--J->framedepth < 0) |
765 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 793 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
766 | lua_assert(J->baseslot > 1); | 794 | lua_assert(J->baseslot > 1+LJ_FR2); |
767 | gotresults++; | 795 | gotresults++; |
768 | rbase += cbase; | 796 | rbase += cbase; |
769 | J->baseslot -= (BCReg)cbase; | 797 | J->baseslot -= (BCReg)cbase; |
@@ -787,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
787 | BCReg cbase = (BCReg)frame_delta(frame); | 815 | BCReg cbase = (BCReg)frame_delta(frame); |
788 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ | 816 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ |
789 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 817 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
790 | lua_assert(J->baseslot > 1); | 818 | lua_assert(J->baseslot > 1+LJ_FR2); |
791 | rbase += cbase; | 819 | rbase += cbase; |
792 | J->baseslot -= (BCReg)cbase; | 820 | J->baseslot -= (BCReg)cbase; |
793 | J->base -= cbase; | 821 | J->base -= cbase; |
@@ -797,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
797 | BCIns callins = *(frame_pc(frame)-1); | 825 | BCIns callins = *(frame_pc(frame)-1); |
798 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; | 826 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; |
799 | BCReg cbase = bc_a(callins); | 827 | BCReg cbase = bc_a(callins); |
800 | GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); | 828 | GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); |
801 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */ | ||
802 | if ((pt->flags & PROTO_NOJIT)) | 829 | if ((pt->flags & PROTO_NOJIT)) |
803 | lj_trace_err(J, LJ_TRERR_CJITOFF); | 830 | lj_trace_err(J, LJ_TRERR_CJITOFF); |
804 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { | 831 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { |
@@ -811,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
811 | lj_snap_add(J); | 838 | lj_snap_add(J); |
812 | } | 839 | } |
813 | for (i = 0; i < nresults; i++) /* Adjust results. */ | 840 | for (i = 0; i < nresults; i++) /* Adjust results. */ |
814 | J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; | 841 | J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; |
815 | J->maxslot = cbase+(BCReg)nresults; | 842 | J->maxslot = cbase+(BCReg)nresults; |
816 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ | 843 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ |
817 | J->framedepth--; | 844 | J->framedepth--; |
818 | lua_assert(J->baseslot > cbase+1); | 845 | lua_assert(J->baseslot > cbase+1+LJ_FR2); |
819 | J->baseslot -= cbase+1; | 846 | J->baseslot -= cbase+1+LJ_FR2; |
820 | J->base -= cbase+1; | 847 | J->base -= cbase+1+LJ_FR2; |
821 | } else if (J->parent == 0 && J->exitno == 0 && | 848 | } else if (J->parent == 0 && J->exitno == 0 && |
822 | !bc_isret(bc_op(J->cur.startins))) { | 849 | !bc_isret(bc_op(J->cur.startins))) { |
823 | /* Return to lower frame would leave the loop in a root trace. */ | 850 | /* Return to lower frame would leave the loop in a root trace. */ |
@@ -827,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
827 | } else { /* Return to lower frame. Guard for the target we return to. */ | 854 | } else { /* Return to lower frame. Guard for the target we return to. */ |
828 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); | 855 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); |
829 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); | 856 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); |
830 | emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); | 857 | emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); |
831 | J->retdepth++; | 858 | J->retdepth++; |
832 | J->needsnap = 1; | 859 | J->needsnap = 1; |
833 | lua_assert(J->baseslot == 1); | 860 | lua_assert(J->baseslot == 1+LJ_FR2); |
834 | /* Shift result slots up and clear the slots of the new frame below. */ | 861 | /* Shift result slots up and clear the slots of the new frame below. */ |
835 | memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); | 862 | memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); |
836 | memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); | 863 | memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); |
837 | } | 864 | } |
838 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ | 865 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ |
839 | ASMFunction cont = frame_contf(frame); | 866 | ASMFunction cont = frame_contf(frame); |
@@ -842,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
842 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 869 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
843 | J->baseslot -= (BCReg)cbase; | 870 | J->baseslot -= (BCReg)cbase; |
844 | J->base -= cbase; | 871 | J->base -= cbase; |
845 | J->maxslot = cbase-2; | 872 | J->maxslot = cbase-(2<<LJ_FR2); |
846 | if (cont == lj_cont_ra) { | 873 | if (cont == lj_cont_ra) { |
847 | /* Copy result to destination slot. */ | 874 | /* Copy result to destination slot. */ |
848 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | 875 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); |
849 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; | 876 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; |
850 | if (dst >= J->maxslot) J->maxslot = dst+1; | 877 | if (dst >= J->maxslot) { |
878 | J->maxslot = dst+1; | ||
879 | } | ||
851 | } else if (cont == lj_cont_nop) { | 880 | } else if (cont == lj_cont_nop) { |
852 | /* Nothing to do here. */ | 881 | /* Nothing to do here. */ |
853 | } else if (cont == lj_cont_cat) { | 882 | } else if (cont == lj_cont_cat) { |
854 | BCReg bslot = bc_b(*(frame_contpc(frame)-1)); | 883 | BCReg bslot = bc_b(*(frame_contpc(frame)-1)); |
855 | TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; | 884 | TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; |
856 | if (bslot != cbase-2) { /* Concatenate the remainder. */ | 885 | if (bslot != J->maxslot) { /* Concatenate the remainder. */ |
857 | TValue *b = J->L->base, save; /* Simulate lower frame and result. */ | 886 | TValue *b = J->L->base, save; /* Simulate lower frame and result. */ |
858 | J->base[cbase-2] = tr; | 887 | J->base[J->maxslot] = tr; |
859 | copyTV(J->L, &save, b-2); | 888 | copyTV(J->L, &save, b-(2<<LJ_FR2)); |
860 | if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); | 889 | if (gotresults) |
890 | copyTV(J->L, b-(2<<LJ_FR2), b+rbase); | ||
891 | else | ||
892 | setnilV(b-(2<<LJ_FR2)); | ||
861 | J->L->base = b - cbase; | 893 | J->L->base = b - cbase; |
862 | tr = rec_cat(J, bslot, cbase-2); | 894 | tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); |
863 | b = J->L->base + cbase; /* Undo. */ | 895 | b = J->L->base + cbase; /* Undo. */ |
864 | J->L->base = b; | 896 | J->L->base = b; |
865 | copyTV(J->L, b-2, &save); | 897 | copyTV(J->L, b-(2<<LJ_FR2), &save); |
866 | } | 898 | } |
867 | if (tr) { /* Store final result. */ | 899 | if (tr) { /* Store final result. */ |
868 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | 900 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); |
869 | J->base[dst] = tr; | 901 | J->base[dst] = tr; |
870 | if (dst >= J->maxslot) J->maxslot = dst+1; | 902 | if (dst >= J->maxslot) { |
903 | J->maxslot = dst+1; | ||
904 | } | ||
871 | } /* Otherwise continue with another __concat call. */ | 905 | } /* Otherwise continue with another __concat call. */ |
872 | } else { | 906 | } else { |
873 | /* Result type already specialized. */ | 907 | /* Result type already specialized. */ |
@@ -876,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
876 | } else { | 910 | } else { |
877 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ | 911 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ |
878 | } | 912 | } |
879 | lua_assert(J->baseslot >= 1); | 913 | lua_assert(J->baseslot >= 1+LJ_FR2); |
880 | } | 914 | } |
881 | 915 | ||
882 | /* -- Metamethod handling ------------------------------------------------- */ | 916 | /* -- Metamethod handling ------------------------------------------------- */ |
@@ -885,11 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
885 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) | 919 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) |
886 | { | 920 | { |
887 | BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; | 921 | BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; |
922 | #if LJ_FR2 | ||
923 | J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); | ||
924 | J->base[top+1] = TREF_CONT; | ||
925 | #else | ||
888 | J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; | 926 | J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; |
927 | #endif | ||
889 | J->framedepth++; | 928 | J->framedepth++; |
890 | for (s = J->maxslot; s < top; s++) | 929 | for (s = J->maxslot; s < top; s++) |
891 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ | 930 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ |
892 | return top+1; | 931 | return top+1+LJ_FR2; |
893 | } | 932 | } |
894 | 933 | ||
895 | /* Record metamethod lookup. */ | 934 | /* Record metamethod lookup. */ |
@@ -967,9 +1006,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | |||
967 | BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); | 1006 | BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); |
968 | TRef *base = J->base + func; | 1007 | TRef *base = J->base + func; |
969 | TValue *basev = J->L->base + func; | 1008 | TValue *basev = J->L->base + func; |
970 | base[1] = ix->tab; base[2] = ix->key; | 1009 | base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; |
971 | copyTV(J->L, basev+1, &ix->tabv); | 1010 | copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); |
972 | copyTV(J->L, basev+2, &ix->keyv); | 1011 | copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); |
973 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ | 1012 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
974 | if (mm != MM_unm) { | 1013 | if (mm != MM_unm) { |
975 | ix->tab = ix->key; | 1014 | ix->tab = ix->key; |
@@ -980,8 +1019,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | |||
980 | lj_trace_err(J, LJ_TRERR_NOMM); | 1019 | lj_trace_err(J, LJ_TRERR_NOMM); |
981 | } | 1020 | } |
982 | ok: | 1021 | ok: |
983 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | ||
984 | base[0] = ix->mobj; | 1022 | base[0] = ix->mobj; |
1023 | #if LJ_FR2 | ||
1024 | base[1] = 0; | ||
1025 | #endif | ||
985 | copyTV(J->L, basev+0, &ix->mobjv); | 1026 | copyTV(J->L, basev+0, &ix->mobjv); |
986 | lj_record_call(J, func, 2); | 1027 | lj_record_call(J, func, 2); |
987 | return 0; /* No result yet. */ | 1028 | return 0; /* No result yet. */ |
@@ -997,8 +1038,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
997 | BCReg func = rec_mm_prep(J, lj_cont_ra); | 1038 | BCReg func = rec_mm_prep(J, lj_cont_ra); |
998 | TRef *base = J->base + func; | 1039 | TRef *base = J->base + func; |
999 | TValue *basev = J->L->base + func; | 1040 | TValue *basev = J->L->base + func; |
1000 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | ||
1001 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); | 1041 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); |
1042 | base += LJ_FR2; | ||
1043 | basev += LJ_FR2; | ||
1002 | base[1] = tr; copyTV(J->L, basev+1, tv); | 1044 | base[1] = tr; copyTV(J->L, basev+1, tv); |
1003 | #if LJ_52 | 1045 | #if LJ_52 |
1004 | base[2] = tr; copyTV(J->L, basev+2, tv); | 1046 | base[2] = tr; copyTV(J->L, basev+2, tv); |
@@ -1018,11 +1060,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
1018 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) | 1060 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) |
1019 | { | 1061 | { |
1020 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); | 1062 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); |
1021 | TRef *base = J->base + func; | 1063 | TRef *base = J->base + func + LJ_FR2; |
1022 | TValue *tv = J->L->base + func; | 1064 | TValue *tv = J->L->base + func + LJ_FR2; |
1023 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | 1065 | base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; |
1024 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; | 1066 | copyTV(J->L, tv-LJ_FR2, &ix->mobjv); |
1025 | copyTV(J->L, tv+0, &ix->mobjv); | ||
1026 | copyTV(J->L, tv+1, &ix->valv); | 1067 | copyTV(J->L, tv+1, &ix->valv); |
1027 | copyTV(J->L, tv+2, &ix->keyv); | 1068 | copyTV(J->L, tv+2, &ix->keyv); |
1028 | lj_record_call(J, func, 2); | 1069 | lj_record_call(J, func, 2); |
@@ -1339,11 +1380,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1339 | handlemm: | 1380 | handlemm: |
1340 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ | 1381 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ |
1341 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); | 1382 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); |
1342 | TRef *base = J->base + func; | 1383 | TRef *base = J->base + func + LJ_FR2; |
1343 | TValue *tv = J->L->base + func; | 1384 | TValue *tv = J->L->base + func + LJ_FR2; |
1344 | lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ | 1385 | base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; |
1345 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; | 1386 | setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); |
1346 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); | ||
1347 | copyTV(J->L, tv+1, &ix->tabv); | 1387 | copyTV(J->L, tv+1, &ix->tabv); |
1348 | copyTV(J->L, tv+2, &ix->keyv); | 1388 | copyTV(J->L, tv+2, &ix->keyv); |
1349 | if (ix->val) { | 1389 | if (ix->val) { |
@@ -1533,7 +1573,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | |||
1533 | goto noconstify; | 1573 | goto noconstify; |
1534 | kfunc = lj_ir_kfunc(J, J->fn); | 1574 | kfunc = lj_ir_kfunc(J, J->fn); |
1535 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); | 1575 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); |
1536 | J->base[-1] = TREF_FRAME | kfunc; | 1576 | #if LJ_FR2 |
1577 | J->base[-2] = kfunc; | ||
1578 | #else | ||
1579 | J->base[-1] = kfunc | TREF_FRAME; | ||
1580 | #endif | ||
1537 | fn = kfunc; | 1581 | fn = kfunc; |
1538 | } | 1582 | } |
1539 | tr = lj_record_constify(J, uvval(uvp)); | 1583 | tr = lj_record_constify(J, uvval(uvp)); |
@@ -1644,11 +1688,14 @@ static void rec_func_setup(jit_State *J) | |||
1644 | static void rec_func_vararg(jit_State *J) | 1688 | static void rec_func_vararg(jit_State *J) |
1645 | { | 1689 | { |
1646 | GCproto *pt = J->pt; | 1690 | GCproto *pt = J->pt; |
1647 | BCReg s, fixargs, vframe = J->maxslot+1; | 1691 | BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; |
1648 | lua_assert((pt->flags & PROTO_VARARG)); | 1692 | lua_assert((pt->flags & PROTO_VARARG)); |
1649 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) | 1693 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) |
1650 | lj_trace_err(J, LJ_TRERR_STACKOV); | 1694 | lj_trace_err(J, LJ_TRERR_STACKOV); |
1651 | J->base[vframe-1] = J->base[-1]; /* Copy function up. */ | 1695 | J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ |
1696 | #if LJ_FR2 | ||
1697 | J->base[vframe-1] = TREF_FRAME; | ||
1698 | #endif | ||
1652 | /* Copy fixarg slots up and set their original slots to nil. */ | 1699 | /* Copy fixarg slots up and set their original slots to nil. */ |
1653 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; | 1700 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; |
1654 | for (s = 0; s < fixargs; s++) { | 1701 | for (s = 0; s < fixargs; s++) { |
@@ -1710,7 +1757,7 @@ static int select_detect(jit_State *J) | |||
1710 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | 1757 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) |
1711 | { | 1758 | { |
1712 | int32_t numparams = J->pt->numparams; | 1759 | int32_t numparams = J->pt->numparams; |
1713 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; | 1760 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; |
1714 | lua_assert(frame_isvarg(J->L->base-1)); | 1761 | lua_assert(frame_isvarg(J->L->base-1)); |
1715 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ | 1762 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ |
1716 | ptrdiff_t i; | 1763 | ptrdiff_t i; |
@@ -1722,10 +1769,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1722 | J->maxslot = dst + (BCReg)nresults; | 1769 | J->maxslot = dst + (BCReg)nresults; |
1723 | } | 1770 | } |
1724 | for (i = 0; i < nresults; i++) | 1771 | for (i = 0; i < nresults; i++) |
1725 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; | 1772 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; |
1726 | } else { /* Unknown number of varargs passed to trace. */ | 1773 | } else { /* Unknown number of varargs passed to trace. */ |
1727 | TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); | 1774 | TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); |
1728 | int32_t frofs = 8*(1+numparams)+FRAME_VARG; | 1775 | int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; |
1729 | if (nresults >= 0) { /* Known fixed number of results. */ | 1776 | if (nresults >= 0) { /* Known fixed number of results. */ |
1730 | ptrdiff_t i; | 1777 | ptrdiff_t i; |
1731 | if (nvararg > 0) { | 1778 | if (nvararg > 0) { |
@@ -1739,7 +1786,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1739 | vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); | 1786 | vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1740 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); | 1787 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); |
1741 | for (i = 0; i < nload; i++) { | 1788 | for (i = 0; i < nload; i++) { |
1742 | IRType t = itype2irt(&J->L->base[i-1-nvararg]); | 1789 | IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); |
1743 | TRef aref = emitir(IRT(IR_AREF, IRT_PGC), | 1790 | TRef aref = emitir(IRT(IR_AREF, IRT_PGC), |
1744 | vbase, lj_ir_kint(J, (int32_t)i)); | 1791 | vbase, lj_ir_kint(J, (int32_t)i)); |
1745 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | 1792 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); |
@@ -1787,14 +1834,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | |||
1787 | if (idx != 0 && idx <= nvararg) { | 1834 | if (idx != 0 && idx <= nvararg) { |
1788 | IRType t; | 1835 | IRType t; |
1789 | TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); | 1836 | TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); |
1790 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); | 1837 | vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, |
1791 | t = itype2irt(&J->L->base[idx-2-nvararg]); | 1838 | lj_ir_kint(J, frofs-(8<<LJ_FR2))); |
1839 | t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); | ||
1792 | aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); | 1840 | aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); |
1793 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | 1841 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); |
1794 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | 1842 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ |
1795 | } | 1843 | } |
1796 | J->base[dst-2] = tr; | 1844 | J->base[dst-2-LJ_FR2] = tr; |
1797 | J->maxslot = dst-1; | 1845 | J->maxslot = dst-1-LJ_FR2; |
1798 | J->bcskip = 2; /* Skip CALLM + select. */ | 1846 | J->bcskip = 2; /* Skip CALLM + select. */ |
1799 | } else { | 1847 | } else { |
1800 | nyivarg: | 1848 | nyivarg: |
@@ -1887,7 +1935,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) | |||
1887 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); | 1935 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); |
1888 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 1936 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1889 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ | 1937 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ |
1938 | #if LJ_FR2 | ||
1939 | SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; | ||
1940 | uint64_t pcbase; | ||
1941 | memcpy(&pcbase, flink, sizeof(uint64_t)); | ||
1942 | pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); | ||
1943 | memcpy(flink, &pcbase, sizeof(uint64_t)); | ||
1944 | #else | ||
1890 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); | 1945 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
1946 | #endif | ||
1891 | J->needsnap = 1; | 1947 | J->needsnap = 1; |
1892 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); | 1948 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); |
1893 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ | 1949 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ |
@@ -2185,7 +2241,13 @@ void lj_record_ins(jit_State *J) | |||
2185 | 2241 | ||
2186 | case BC_MOV: | 2242 | case BC_MOV: |
2187 | /* Clear gap of method call to avoid resurrecting previous refs. */ | 2243 | /* Clear gap of method call to avoid resurrecting previous refs. */ |
2188 | if (ra > J->maxslot) J->base[ra-1] = 0; | 2244 | if (ra > J->maxslot) { |
2245 | #if LJ_FR2 | ||
2246 | memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); | ||
2247 | #else | ||
2248 | J->base[ra-1] = 0; | ||
2249 | #endif | ||
2250 | } | ||
2189 | break; | 2251 | break; |
2190 | case BC_KSTR: case BC_KNUM: case BC_KPRI: | 2252 | case BC_KSTR: case BC_KNUM: case BC_KPRI: |
2191 | break; | 2253 | break; |
@@ -2254,14 +2316,14 @@ void lj_record_ins(jit_State *J) | |||
2254 | /* -- Calls and vararg handling ----------------------------------------- */ | 2316 | /* -- Calls and vararg handling ----------------------------------------- */ |
2255 | 2317 | ||
2256 | case BC_ITERC: | 2318 | case BC_ITERC: |
2257 | J->base[ra] = getslot(J, ra-3-LJ_FR2); | 2319 | J->base[ra] = getslot(J, ra-3); |
2258 | J->base[ra+1] = getslot(J, ra-2-LJ_FR2); | 2320 | J->base[ra+1+LJ_FR2] = getslot(J, ra-2); |
2259 | J->base[ra+2] = getslot(J, ra-1-LJ_FR2); | 2321 | J->base[ra+2+LJ_FR2] = getslot(J, ra-1); |
2260 | { /* Do the actual copy now because lj_record_call needs the values. */ | 2322 | { /* Do the actual copy now because lj_record_call needs the values. */ |
2261 | TValue *b = &J->L->base[ra]; | 2323 | TValue *b = &J->L->base[ra]; |
2262 | copyTV(J->L, b, b-3-LJ_FR2); | 2324 | copyTV(J->L, b, b-3); |
2263 | copyTV(J->L, b+1, b-2-LJ_FR2); | 2325 | copyTV(J->L, b+1+LJ_FR2, b-2); |
2264 | copyTV(J->L, b+2, b-1-LJ_FR2); | 2326 | copyTV(J->L, b+2+LJ_FR2, b-1); |
2265 | } | 2327 | } |
2266 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | 2328 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
2267 | break; | 2329 | break; |
@@ -2384,7 +2446,12 @@ void lj_record_ins(jit_State *J) | |||
2384 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ | 2446 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ |
2385 | if (bcmode_a(op) == BCMdst && rc) { | 2447 | if (bcmode_a(op) == BCMdst && rc) { |
2386 | J->base[ra] = rc; | 2448 | J->base[ra] = rc; |
2387 | if (ra >= J->maxslot) J->maxslot = ra+1; | 2449 | if (ra >= J->maxslot) { |
2450 | #if LJ_FR2 | ||
2451 | if (ra > J->maxslot) J->base[ra-1] = 0; | ||
2452 | #endif | ||
2453 | J->maxslot = ra+1; | ||
2454 | } | ||
2388 | } | 2455 | } |
2389 | 2456 | ||
2390 | #undef rav | 2457 | #undef rav |
@@ -2469,7 +2536,7 @@ void lj_record_setup(jit_State *J) | |||
2469 | J->scev.idx = REF_NIL; | 2536 | J->scev.idx = REF_NIL; |
2470 | setmref(J->scev.pc, NULL); | 2537 | setmref(J->scev.pc, NULL); |
2471 | 2538 | ||
2472 | J->baseslot = 1; /* Invoking function is at base[-1]. */ | 2539 | J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ |
2473 | J->base = J->slot + J->baseslot; | 2540 | J->base = J->slot + J->baseslot; |
2474 | J->maxslot = 0; | 2541 | J->maxslot = 0; |
2475 | J->framedepth = 0; | 2542 | J->framedepth = 0; |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 6199b1f0..33c058be 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
68 | for (s = 0; s < nslots; s++) { | 68 | for (s = 0; s < nslots; s++) { |
69 | TRef tr = J->slot[s]; | 69 | TRef tr = J->slot[s]; |
70 | IRRef ref = tref_ref(tr); | 70 | IRRef ref = tref_ref(tr); |
71 | #if LJ_FR2 | ||
72 | if (s == 1) continue; | ||
73 | if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { | ||
74 | TValue *base = J->L->base - J->baseslot; | ||
75 | tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); | ||
76 | ref = tref_ref(tr); | ||
77 | } | ||
78 | #endif | ||
71 | if (ref) { | 79 | if (ref) { |
72 | SnapEntry sn = SNAP_TR(s, tr); | 80 | SnapEntry sn = SNAP_TR(s, tr); |
73 | IRIns *ir = &J->cur.ir[ref]; | 81 | IRIns *ir = &J->cur.ir[ref]; |
74 | if (!(sn & (SNAP_CONT|SNAP_FRAME)) && | 82 | if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && |
75 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { | 83 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { |
76 | /* No need to snapshot unmodified non-inherited slots. */ | 84 | /* No need to snapshot unmodified non-inherited slots. */ |
77 | if (!(ir->op2 & IRSLOAD_INHERIT)) | 85 | if (!(ir->op2 & IRSLOAD_INHERIT)) |
@@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
90 | } | 98 | } |
91 | 99 | ||
92 | /* Add frame links at the end of the snapshot. */ | 100 | /* Add frame links at the end of the snapshot. */ |
93 | static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) | 101 | static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) |
94 | { | 102 | { |
95 | cTValue *frame = J->L->base - 1; | 103 | cTValue *frame = J->L->base - 1; |
96 | cTValue *lim = J->L->base - J->baseslot; | 104 | cTValue *lim = J->L->base - J->baseslot + LJ_FR2; |
97 | GCfunc *fn = frame_func(frame); | 105 | GCfunc *fn = frame_func(frame); |
98 | cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; | 106 | cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; |
107 | #if LJ_FR2 | ||
108 | uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); | ||
109 | lua_assert(2 <= J->baseslot && J->baseslot <= 257); | ||
110 | memcpy(map, &pcbase, sizeof(uint64_t)); | ||
111 | #else | ||
99 | MSize f = 0; | 112 | MSize f = 0; |
100 | lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ | ||
101 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ | 113 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
114 | #endif | ||
102 | while (frame > lim) { /* Backwards traversal of all frames above base. */ | 115 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
103 | if (frame_islua(frame)) { | 116 | if (frame_islua(frame)) { |
117 | #if !LJ_FR2 | ||
104 | map[f++] = SNAP_MKPC(frame_pc(frame)); | 118 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
119 | #endif | ||
105 | frame = frame_prevl(frame); | 120 | frame = frame_prevl(frame); |
106 | } else if (frame_iscont(frame)) { | 121 | } else if (frame_iscont(frame)) { |
122 | #if !LJ_FR2 | ||
107 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 123 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
108 | map[f++] = SNAP_MKPC(frame_contpc(frame)); | 124 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
125 | #endif | ||
109 | frame = frame_prevd(frame); | 126 | frame = frame_prevd(frame); |
110 | } else { | 127 | } else { |
111 | lua_assert(!frame_isc(frame)); | 128 | lua_assert(!frame_isc(frame)); |
129 | #if !LJ_FR2 | ||
112 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 130 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
131 | #endif | ||
113 | frame = frame_prevd(frame); | 132 | frame = frame_prevd(frame); |
114 | continue; | 133 | continue; |
115 | } | 134 | } |
116 | if (frame + funcproto(frame_func(frame))->framesize > ftop) | 135 | if (frame + funcproto(frame_func(frame))->framesize > ftop) |
117 | ftop = frame + funcproto(frame_func(frame))->framesize; | 136 | ftop = frame + funcproto(frame_func(frame))->framesize; |
118 | } | 137 | } |
138 | *topslot = (uint8_t)(ftop - lim); | ||
139 | #if LJ_FR2 | ||
140 | lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); | ||
141 | return 2; | ||
142 | #else | ||
119 | lua_assert(f == (MSize)(1 + J->framedepth)); | 143 | lua_assert(f == (MSize)(1 + J->framedepth)); |
120 | return (BCReg)(ftop - lim); | 144 | return f; |
145 | #endif | ||
121 | } | 146 | } |
122 | 147 | ||
123 | /* Take a snapshot of the current stack. */ | 148 | /* Take a snapshot of the current stack. */ |
@@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | |||
127 | MSize nent; | 152 | MSize nent; |
128 | SnapEntry *p; | 153 | SnapEntry *p; |
129 | /* Conservative estimate. */ | 154 | /* Conservative estimate. */ |
130 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); | 155 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); |
131 | p = &J->cur.snapmap[nsnapmap]; | 156 | p = &J->cur.snapmap[nsnapmap]; |
132 | nent = snapshot_slots(J, p, nslots); | 157 | nent = snapshot_slots(J, p, nslots); |
133 | snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); | 158 | snap->nent = (uint8_t)nent; |
159 | nent += snapshot_framelinks(J, p + nent, &snap->topslot); | ||
134 | snap->mapofs = (uint16_t)nsnapmap; | 160 | snap->mapofs = (uint16_t)nsnapmap; |
135 | snap->ref = (IRRef1)J->cur.nins; | 161 | snap->ref = (IRRef1)J->cur.nins; |
136 | snap->nent = (uint8_t)nent; | ||
137 | snap->nslots = (uint8_t)nslots; | 162 | snap->nslots = (uint8_t)nslots; |
138 | snap->count = 0; | 163 | snap->count = 0; |
139 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); | 164 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); |
140 | } | 165 | } |
141 | 166 | ||
142 | /* Add or merge a snapshot. */ | 167 | /* Add or merge a snapshot. */ |
@@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J) | |||
284 | MSize n, m, nlim, nent = snap->nent; | 309 | MSize n, m, nlim, nent = snap->nent; |
285 | uint8_t udf[SNAP_USEDEF_SLOTS]; | 310 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
286 | BCReg maxslot = J->maxslot; | 311 | BCReg maxslot = J->maxslot; |
287 | BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); | ||
288 | BCReg baseslot = J->baseslot; | 312 | BCReg baseslot = J->baseslot; |
313 | BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); | ||
289 | maxslot += baseslot; | 314 | maxslot += baseslot; |
290 | minslot += baseslot; | 315 | minslot += baseslot; |
291 | snap->nslots = (uint8_t)maxslot; | 316 | snap->nslots = (uint8_t)maxslot; |
@@ -794,11 +819,13 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
794 | SnapShot *snap = &T->snap[snapno]; | 819 | SnapShot *snap = &T->snap[snapno]; |
795 | MSize n, nent = snap->nent; | 820 | MSize n, nent = snap->nent; |
796 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 821 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
797 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; | 822 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; |
823 | #if !LJ_FR2 | ||
798 | ptrdiff_t ftsz0; | 824 | ptrdiff_t ftsz0; |
825 | #endif | ||
799 | TValue *frame; | 826 | TValue *frame; |
800 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 827 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
801 | const BCIns *pc = snap_pc(map[nent]); | 828 | const BCIns *pc = snap_pc(&map[nent]); |
802 | lua_State *L = J->L; | 829 | lua_State *L = J->L; |
803 | 830 | ||
804 | /* Set interpreter PC to the next PC to get correct error messages. */ | 831 | /* Set interpreter PC to the next PC to get correct error messages. */ |
@@ -811,8 +838,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
811 | } | 838 | } |
812 | 839 | ||
813 | /* Fill stack slots with data from the registers and spill slots. */ | 840 | /* Fill stack slots with data from the registers and spill slots. */ |
814 | frame = L->base-1; | 841 | frame = L->base-1-LJ_FR2; |
842 | #if !LJ_FR2 | ||
815 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ | 843 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ |
844 | #endif | ||
816 | for (n = 0; n < nent; n++) { | 845 | for (n = 0; n < nent; n++) { |
817 | SnapEntry sn = map[n]; | 846 | SnapEntry sn = map[n]; |
818 | if (!(sn & SNAP_NORESTORE)) { | 847 | if (!(sn & SNAP_NORESTORE)) { |
@@ -835,14 +864,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
835 | TValue tmp; | 864 | TValue tmp; |
836 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); | 865 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
837 | o->u32.hi = tmp.u32.lo; | 866 | o->u32.hi = tmp.u32.lo; |
867 | #if !LJ_FR2 | ||
838 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 868 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
839 | lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */ | ||
840 | /* Overwrite tag with frame link. */ | 869 | /* Overwrite tag with frame link. */ |
841 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); | 870 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); |
842 | L->base = o+1; | 871 | L->base = o+1; |
872 | #endif | ||
843 | } | 873 | } |
844 | } | 874 | } |
845 | } | 875 | } |
876 | #if LJ_FR2 | ||
877 | L->base += (map[nent+LJ_BE] & 0xff); | ||
878 | #endif | ||
846 | lua_assert(map + nent == flinks); | 879 | lua_assert(map + nent == flinks); |
847 | 880 | ||
848 | /* Compute current stack top. */ | 881 | /* Compute current stack top. */ |