aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-05-23 01:49:00 +0200
committerMike Pall <mike>2016-05-23 01:49:00 +0200
commit6c8258d74b7d4ae7f288897518f23c809b9395f2 (patch)
tree7479dce31b17ed704f20ee3920b6de6696521f26
parent8f868a9d02340bae8b3b4a703118b324213f5c6d (diff)
downloadluajit-6c8258d74b7d4ae7f288897518f23c809b9395f2.tar.gz
luajit-6c8258d74b7d4ae7f288897518f23c809b9395f2.tar.bz2
luajit-6c8258d74b7d4ae7f288897518f23c809b9395f2.zip
LJ_FR2: Add support for trace recording and snapshots.
Contributed by Peter Cawley.
-rw-r--r--src/jit/dump.lua10
-rw-r--r--src/lj_arch.h2
-rw-r--r--src/lj_asm.c6
-rw-r--r--src/lj_asm_x86.h22
-rw-r--r--src/lj_crecord.c30
-rw-r--r--src/lj_def.h2
-rw-r--r--src/lj_ffrecord.c49
-rw-r--r--src/lj_ir.h2
-rw-r--r--src/lj_jit.h14
-rw-r--r--src/lj_record.c231
-rw-r--r--src/lj_snap.c61
11 files changed, 291 insertions, 138 deletions
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 9a722f73..a635af10 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -310,15 +310,17 @@ local function fmtfunc(func, pc)
310 end 310 end
311end 311end
312 312
313local function formatk(tr, idx) 313local function formatk(tr, idx, sn)
314 local k, t, slot = tracek(tr, idx) 314 local k, t, slot = tracek(tr, idx)
315 local tn = type(k) 315 local tn = type(k)
316 local s 316 local s
317 if tn == "number" then 317 if tn == "number" then
318 if k == 2^52+2^51 then 318 if band(sn or 0, 0x30000) ~= 0 then
319 s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
320 elseif k == 2^52+2^51 then
319 s = "bias" 321 s = "bias"
320 else 322 else
321 s = format("%+.14g", k) 323 s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
322 end 324 end
323 elseif tn == "string" then 325 elseif tn == "string" then
324 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) 326 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -354,7 +356,7 @@ local function printsnap(tr, snap)
354 n = n + 1 356 n = n + 1
355 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS 357 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
356 if ref < 0 then 358 if ref < 0 then
357 out:write(formatk(tr, ref)) 359 out:write(formatk(tr, ref, sn))
358 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM 360 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
359 out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) 361 out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
360 else 362 else
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 612c7303..72622a21 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -453,7 +453,7 @@
453#endif 453#endif
454 454
455/* Disable or enable the JIT compiler. */ 455/* Disable or enable the JIT compiler. */
456#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_FR2 || LJ_GC64 456#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64
457#define LJ_HASJIT 0 457#define LJ_HASJIT 0
458#else 458#else
459#define LJ_HASJIT 1 459#define LJ_HASJIT 1
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9f784cc8..5dd7ca3a 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1893,7 +1893,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1893 SnapEntry sn = map[n-1]; 1893 SnapEntry sn = map[n-1];
1894 if ((sn & SNAP_FRAME)) { 1894 if ((sn & SNAP_FRAME)) {
1895 *gotframe = 1; 1895 *gotframe = 1;
1896 return snap_slot(sn); 1896 return snap_slot(sn) - LJ_FR2;
1897 } 1897 }
1898 } 1898 }
1899 return 0; 1899 return 0;
@@ -1913,7 +1913,7 @@ static void asm_tail_link(ASMState *as)
1913 1913
1914 if (as->T->link == 0) { 1914 if (as->T->link == 0) {
1915 /* Setup fixed registers for exit to interpreter. */ 1915 /* Setup fixed registers for exit to interpreter. */
1916 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 1916 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1917 int32_t mres; 1917 int32_t mres;
1918 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 1918 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1919 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 1919 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
@@ -1922,7 +1922,7 @@ static void asm_tail_link(ASMState *as)
1922 } 1922 }
1923 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1923 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1924 ra_allockreg(as, i32ptr(pc), RID_LPC); 1924 ra_allockreg(as, i32ptr(pc), RID_LPC);
1925 mres = (int32_t)(snap->nslots - baseslot); 1925 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1926 switch (bc_op(*pc)) { 1926 switch (bc_op(*pc)) {
1927 case BC_CALLM: case BC_CALLMT: 1927 case BC_CALLM: case BC_CALLMT:
1928 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; 1928 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 0361a965..83fe22b2 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -348,7 +348,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
348 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && 348 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
349 noconflict(as, ref, IR_RETF, 0)) { 349 noconflict(as, ref, IR_RETF, 0)) {
350 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 350 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
351 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); 351 as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
352 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
352 as->mrm.idx = RID_NONE; 353 as->mrm.idx = RID_NONE;
353 return RID_MRM; 354 return RID_MRM;
354 } 355 }
@@ -655,6 +656,9 @@ static void asm_callx(ASMState *as, IRIns *ir)
655static void asm_retf(ASMState *as, IRIns *ir) 656static void asm_retf(ASMState *as, IRIns *ir)
656{ 657{
657 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 658 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
659#if LJ_FR2
660 Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
661#endif
658 void *pc = ir_kptr(IR(ir->op2)); 662 void *pc = ir_kptr(IR(ir->op2));
659 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); 663 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
660 as->topslot -= (BCReg)delta; 664 as->topslot -= (BCReg)delta;
@@ -663,7 +667,12 @@ static void asm_retf(ASMState *as, IRIns *ir)
663 emit_setgl(as, base, jit_base); 667 emit_setgl(as, base, jit_base);
664 emit_addptr(as, base, -8*delta); 668 emit_addptr(as, base, -8*delta);
665 asm_guardcc(as, CC_NE); 669 asm_guardcc(as, CC_NE);
670#if LJ_FR2
671 emit_rmro(as, XO_CMP, rpc, base, -8);
672 emit_loadu64(as, rpc, u64ptr(pc));
673#else
666 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); 674 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
675#endif
667} 676}
668 677
669/* -- Type conversions ---------------------------------------------------- */ 678/* -- Type conversions ---------------------------------------------------- */
@@ -1397,7 +1406,8 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1397 1406
1398static void asm_sload(ASMState *as, IRIns *ir) 1407static void asm_sload(ASMState *as, IRIns *ir)
1399{ 1408{
1400 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 1409 int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
1410 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1401 IRType1 t = ir->t; 1411 IRType1 t = ir->t;
1402 Reg base; 1412 Reg base;
1403 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1413 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
@@ -2383,13 +2393,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2383static void asm_stack_restore(ASMState *as, SnapShot *snap) 2393static void asm_stack_restore(ASMState *as, SnapShot *snap)
2384{ 2394{
2385 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2395 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2386 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2396#if !LJ_FR2 || defined(LUA_USE_ASSERT)
2397 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2398#endif
2387 MSize n, nent = snap->nent; 2399 MSize n, nent = snap->nent;
2388 /* Store the value of all modified slots to the Lua stack. */ 2400 /* Store the value of all modified slots to the Lua stack. */
2389 for (n = 0; n < nent; n++) { 2401 for (n = 0; n < nent; n++) {
2390 SnapEntry sn = map[n]; 2402 SnapEntry sn = map[n];
2391 BCReg s = snap_slot(sn); 2403 BCReg s = snap_slot(sn);
2392 int32_t ofs = 8*((int32_t)s-1); 2404 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
2393 IRRef ref = snap_ref(sn); 2405 IRRef ref = snap_ref(sn);
2394 IRIns *ir = IR(ref); 2406 IRIns *ir = IR(ref);
2395 if ((sn & SNAP_NORESTORE)) 2407 if ((sn & SNAP_NORESTORE))
@@ -2407,8 +2419,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2407 emit_movmroi(as, RID_BASE, ofs, ir->i); 2419 emit_movmroi(as, RID_BASE, ofs, ir->i);
2408 } 2420 }
2409 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2421 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2422#if !LJ_FR2
2410 if (s != 0) /* Do not overwrite link to previous frame. */ 2423 if (s != 0) /* Do not overwrite link to previous frame. */
2411 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); 2424 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2425#endif
2412 } else { 2426 } else {
2413 if (!(LJ_64 && irt_islightud(ir->t))) 2427 if (!(LJ_64 && irt_islightud(ir->t)))
2414 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); 2428 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index c0f7e3d7..d568b20a 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -712,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
712 return tr; 712 return tr;
713} 713}
714 714
715/* Tailcall to function. */
716static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
717{
718 TRef kfunc = lj_ir_kfunc(J, funcV(tv));
719#if LJ_FR2
720 J->base[-2] = kfunc;
721 J->base[-1] = TREF_FRAME;
722#else
723 J->base[-1] = kfunc | TREF_FRAME;
724#endif
725 rd->nres = -1; /* Pending tailcall. */
726}
727
715/* Record ctype __index/__newindex metamethods. */ 728/* Record ctype __index/__newindex metamethods. */
716static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, 729static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
717 RecordFFData *rd) 730 RecordFFData *rd)
@@ -721,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
721 if (!tv) 734 if (!tv)
722 lj_trace_err(J, LJ_TRERR_BADTYPE); 735 lj_trace_err(J, LJ_TRERR_BADTYPE);
723 if (tvisfunc(tv)) { 736 if (tvisfunc(tv)) {
724 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 737 crec_tailcall(J, rd, tv);
725 rd->nres = -1; /* Pending tailcall. */
726 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { 738 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
727 /* Specialize to result of __index lookup. */ 739 /* Specialize to result of __index lookup. */
728 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); 740 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@@ -1119,20 +1131,20 @@ static void crec_snap_caller(jit_State *J)
1119 lua_State *L = J->L; 1131 lua_State *L = J->L;
1120 TValue *base = L->base, *top = L->top; 1132 TValue *base = L->base, *top = L->top;
1121 const BCIns *pc = J->pc; 1133 const BCIns *pc = J->pc;
1122 TRef ftr = J->base[-1]; 1134 TRef ftr = J->base[-1-LJ_FR2];
1123 ptrdiff_t delta; 1135 ptrdiff_t delta;
1124 if (!frame_islua(base-1) || J->framedepth <= 0) 1136 if (!frame_islua(base-1) || J->framedepth <= 0)
1125 lj_trace_err(J, LJ_TRERR_NYICALL); 1137 lj_trace_err(J, LJ_TRERR_NYICALL);
1126 J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); 1138 J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
1127 L->top = base; L->base = base - delta; 1139 L->top = base; L->base = base - delta;
1128 J->base[-1] = TREF_FALSE; 1140 J->base[-1-LJ_FR2] = TREF_FALSE;
1129 J->base -= delta; J->baseslot -= (BCReg)delta; 1141 J->base -= delta; J->baseslot -= (BCReg)delta;
1130 J->maxslot = (BCReg)delta; J->framedepth--; 1142 J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
1131 lj_snap_add(J); 1143 lj_snap_add(J);
1132 L->base = base; L->top = top; 1144 L->base = base; L->top = top;
1133 J->framedepth++; J->maxslot = 1; 1145 J->framedepth++; J->maxslot = 1;
1134 J->base += delta; J->baseslot += (BCReg)delta; 1146 J->base += delta; J->baseslot += (BCReg)delta;
1135 J->base[-1] = ftr; J->pc = pc; 1147 J->base[-1-LJ_FR2] = ftr; J->pc = pc;
1136} 1148}
1137 1149
1138/* Record function call. */ 1150/* Record function call. */
@@ -1224,8 +1236,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
1224 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); 1236 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
1225 if (tv) { 1237 if (tv) {
1226 if (tvisfunc(tv)) { 1238 if (tvisfunc(tv)) {
1227 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1239 crec_tailcall(J, rd, tv);
1228 rd->nres = -1; /* Pending tailcall. */
1229 return; 1240 return;
1230 } 1241 }
1231 } else if (mm == MM_new) { 1242 } else if (mm == MM_new) {
@@ -1373,8 +1384,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
1373 } 1384 }
1374 if (tv) { 1385 if (tv) {
1375 if (tvisfunc(tv)) { 1386 if (tvisfunc(tv)) {
1376 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1387 crec_tailcall(J, rd, tv);
1377 rd->nres = -1; /* Pending tailcall. */
1378 return 0; 1388 return 0;
1379 } /* NYI: non-function metamethods. */ 1389 } /* NYI: non-function metamethods. */
1380 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ 1390 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */
diff --git a/src/lj_def.h b/src/lj_def.h
index 29d3fdda..9413399d 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -95,6 +95,8 @@ typedef unsigned int uintptr_t;
95#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) 95#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
96#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) 96#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
97#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) 97#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
98#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
99#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
98 100
99#define checki8(x) ((x) == (int32_t)(int8_t)(x)) 101#define checki8(x) ((x) == (int32_t)(int8_t)(x))
100#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) 102#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index ae567622..64a9a65d 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -102,35 +102,41 @@ static void recff_stitch(jit_State *J)
102 ASMFunction cont = lj_cont_stitch; 102 ASMFunction cont = lj_cont_stitch;
103 lua_State *L = J->L; 103 lua_State *L = J->L;
104 TValue *base = L->base; 104 TValue *base = L->base;
105 BCReg nslot = J->maxslot + 1 + LJ_FR2;
106 TValue *nframe = base + 1 + LJ_FR2;
105 const BCIns *pc = frame_pc(base-1); 107 const BCIns *pc = frame_pc(base-1);
106 TValue *pframe = frame_prevl(base-1); 108 TValue *pframe = frame_prevl(base-1);
107 109
108 lua_assert(!LJ_FR2); /* TODO_FR2: handle frame shift. */
109 /* Move func + args up in Lua stack and insert continuation. */ 110 /* Move func + args up in Lua stack and insert continuation. */
110 memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1)); 111 memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
111 setframe_ftsz(base+1, ((char *)(base+1) - (char *)pframe) + FRAME_CONT); 112 setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
112 setcont(base, cont); 113 setcont(base-LJ_FR2, cont);
113 setframe_pc(base, pc); 114 setframe_pc(base, pc);
114 setnilV(base-1); /* Incorrect, but rec_check_slots() won't run anymore. */ 115 setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
115 L->base += 2; 116 L->base += 2 + LJ_FR2;
116 L->top += 2; 117 L->top += 2 + LJ_FR2;
117 118
118 /* Ditto for the IR. */ 119 /* Ditto for the IR. */
119 memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1)); 120 memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
121#if LJ_FR2
122 J->base[2] = TREF_FRAME;
123 J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
124 J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
125#else
120 J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; 126 J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
121 J->base[-1] = lj_ir_ktrace(J); 127#endif
122 J->ktrace = tref_ref(J->base[-1]); 128 J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
123 J->base += 2; 129 J->base += 2 + LJ_FR2;
124 J->baseslot += 2; 130 J->baseslot += 2 + LJ_FR2;
125 J->framedepth++; 131 J->framedepth++;
126 132
127 lj_record_stop(J, LJ_TRLINK_STITCH, 0); 133 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
128 134
129 /* Undo Lua stack changes. */ 135 /* Undo Lua stack changes. */
130 memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1)); 136 memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
131 setframe_pc(base-1, pc); 137 setframe_pc(base-1, pc);
132 L->base -= 2; 138 L->base -= 2 + LJ_FR2;
133 L->top -= 2; 139 L->top -= 2 + LJ_FR2;
134} 140}
135 141
136/* Fallback handler for fast functions that are not recorded (yet). */ 142/* Fallback handler for fast functions that are not recorded (yet). */
@@ -373,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
373 int errcode; 379 int errcode;
374 TValue argv0; 380 TValue argv0;
375 /* Temporarily insert metamethod below object. */ 381 /* Temporarily insert metamethod below object. */
376 J->base[1] = J->base[0]; 382 J->base[1+LJ_FR2] = J->base[0];
377 J->base[0] = ix.mobj; 383 J->base[0] = ix.mobj;
378 copyTV(J->L, &argv0, &rd->argv[0]); 384 copyTV(J->L, &argv0, &rd->argv[0]);
379 copyTV(J->L, &rd->argv[1], &rd->argv[0]); 385 copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
380 copyTV(J->L, &rd->argv[0], &ix.mobjv); 386 copyTV(J->L, &rd->argv[0], &ix.mobjv);
381 /* Need to protect lj_record_tailcall because it may throw. */ 387 /* Need to protect lj_record_tailcall because it may throw. */
382 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); 388 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@@ -443,6 +449,10 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
443static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) 449static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
444{ 450{
445 if (J->maxslot >= 1) { 451 if (J->maxslot >= 1) {
452#if LJ_FR2
453 /* Shift function arguments up. */
454 memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
455#endif
446 lj_record_call(J, 0, J->maxslot - 1); 456 lj_record_call(J, 0, J->maxslot - 1);
447 rd->nres = -1; /* Pending call. */ 457 rd->nres = -1; /* Pending call. */
448 } /* else: Interpreter will throw. */ 458 } /* else: Interpreter will throw. */
@@ -462,13 +472,16 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
462 TValue argv0, argv1; 472 TValue argv0, argv1;
463 TRef tmp; 473 TRef tmp;
464 int errcode; 474 int errcode;
465 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
466 /* Swap function and traceback. */ 475 /* Swap function and traceback. */
467 tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp; 476 tmp = J->base[0]; J->base[0] = J->base[1]; J->base[1] = tmp;
468 copyTV(J->L, &argv0, &rd->argv[0]); 477 copyTV(J->L, &argv0, &rd->argv[0]);
469 copyTV(J->L, &argv1, &rd->argv[1]); 478 copyTV(J->L, &argv1, &rd->argv[1]);
470 copyTV(J->L, &rd->argv[0], &argv1); 479 copyTV(J->L, &rd->argv[0], &argv1);
471 copyTV(J->L, &rd->argv[1], &argv0); 480 copyTV(J->L, &rd->argv[1], &argv0);
481#if LJ_FR2
482 /* Shift function arguments up. */
483 memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
484#endif
472 /* Need to protect lj_record_call because it may throw. */ 485 /* Need to protect lj_record_call because it may throw. */
473 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); 486 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
474 /* Always undo Lua stack swap to avoid confusing the interpreter. */ 487 /* Always undo Lua stack swap to avoid confusing the interpreter. */
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 3de57046..4e9c85c7 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -220,7 +220,7 @@ IRFLDEF(FLENUM)
220 220
221/* SLOAD mode bits, stored in op2. */ 221/* SLOAD mode bits, stored in op2. */
222#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ 222#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
223#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ 223#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
224#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ 224#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
225#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ 225#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
226#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ 226#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 55fbea8b..f460a0ab 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -179,14 +179,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
179#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) 179#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
180#define SNAP_TR(slot, tr) \ 180#define SNAP_TR(slot, tr) \
181 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) 181 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
182#if !LJ_FR2
182#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) 183#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
184#endif
183#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) 185#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
184#define snap_ref(sn) ((sn) & 0xffff) 186#define snap_ref(sn) ((sn) & 0xffff)
185#define snap_slot(sn) ((BCReg)((sn) >> 24)) 187#define snap_slot(sn) ((BCReg)((sn) >> 24))
186#define snap_isframe(sn) ((sn) & SNAP_FRAME) 188#define snap_isframe(sn) ((sn) & SNAP_FRAME)
187#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
188#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) 189#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
189 190
191static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
192{
193#if LJ_FR2
194 uint64_t pcbase;
195 memcpy(&pcbase, sn, sizeof(uint64_t));
196 return (const BCIns *)(pcbase >> 8);
197#else
198 return (const BCIns *)(uintptr_t)*sn;
199#endif
200}
201
190/* Snapshot and exit numbers. */ 202/* Snapshot and exit numbers. */
191typedef uint32_t SnapNo; 203typedef uint32_t SnapNo;
192typedef uint32_t ExitNo; 204typedef uint32_t ExitNo;
diff --git a/src/lj_record.c b/src/lj_record.c
index 3b754897..f0481050 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -87,30 +87,48 @@ static void rec_check_slots(jit_State *J)
87 BCReg s, nslots = J->baseslot + J->maxslot; 87 BCReg s, nslots = J->baseslot + J->maxslot;
88 int32_t depth = 0; 88 int32_t depth = 0;
89 cTValue *base = J->L->base - J->baseslot; 89 cTValue *base = J->L->base - J->baseslot;
90 lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); 90 lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS);
91 lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); 91 lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME));
92 lua_assert(nslots < LJ_MAX_JSLOTS); 92 lua_assert(nslots < LJ_MAX_JSLOTS);
93 for (s = 0; s < nslots; s++) { 93 for (s = 0; s < nslots; s++) {
94 TRef tr = J->slot[s]; 94 TRef tr = J->slot[s];
95 if (tr) { 95 if (tr) {
96 cTValue *tv = &base[s]; 96 cTValue *tv = &base[s];
97 IRRef ref = tref_ref(tr); 97 IRRef ref = tref_ref(tr);
98 IRIns *ir; 98 IRIns *ir = NULL; /* Silence compiler. */
99 lua_assert(ref >= J->cur.nk && ref < J->cur.nins); 99 if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
100 ir = IR(ref); 100 lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
101 lua_assert(irt_t(ir->t) == tref_t(tr)); 101 ir = IR(ref);
102 lua_assert(irt_t(ir->t) == tref_t(tr));
103 }
102 if (s == 0) { 104 if (s == 0) {
103 lua_assert(tref_isfunc(tr)); 105 lua_assert(tref_isfunc(tr));
106#if LJ_FR2
107 } else if (s == 1) {
108 lua_assert(0);
109#endif
104 } else if ((tr & TREF_FRAME)) { 110 } else if ((tr & TREF_FRAME)) {
105 GCfunc *fn = gco2func(frame_gc(tv)); 111 GCfunc *fn = gco2func(frame_gc(tv));
106 BCReg delta = (BCReg)(tv - frame_prev(tv)); 112 BCReg delta = (BCReg)(tv - frame_prev(tv));
113#if LJ_FR2
114 if (ref)
115 lua_assert(ir_knum(ir)->u64 == tv->u64);
116 tr = J->slot[s-1];
117 ir = IR(tref_ref(tr));
118#endif
107 lua_assert(tref_isfunc(tr)); 119 lua_assert(tref_isfunc(tr));
108 if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); 120 if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
109 lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); 121 lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
122 : (s == delta + LJ_FR2));
110 depth++; 123 depth++;
111 } else if ((tr & TREF_CONT)) { 124 } else if ((tr & TREF_CONT)) {
125#if LJ_FR2
126 if (ref)
127 lua_assert(ir_knum(ir)->u64 == tv->u64);
128#else
112 lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); 129 lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
113 lua_assert((J->slot[s+1] & TREF_FRAME)); 130#endif
131 lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
114 depth++; 132 depth++;
115 } else { 133 } else {
116 if (tvisnumber(tv)) 134 if (tvisnumber(tv))
@@ -162,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot)
162/* Get TRef for current function. */ 180/* Get TRef for current function. */
163static TRef getcurrf(jit_State *J) 181static TRef getcurrf(jit_State *J)
164{ 182{
165 if (J->base[-1]) 183 if (J->base[-1-LJ_FR2])
166 return J->base[-1]; 184 return J->base[-1-LJ_FR2];
167 lua_assert(J->baseslot == 1); 185 lua_assert(J->baseslot == 1+LJ_FR2);
168 return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); 186 return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
169} 187}
170 188
171/* Compare for raw object equality. 189/* Compare for raw object equality.
@@ -509,7 +527,6 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
509static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 527static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
510{ 528{
511 BCReg ra = bc_a(iterins); 529 BCReg ra = bc_a(iterins);
512 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
513 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ 530 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
514 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 531 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
515 J->maxslot = ra-1+bc_b(J->pc[-1]); 532 J->maxslot = ra-1+bc_b(J->pc[-1]);
@@ -678,22 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
678{ 695{
679 RecordIndex ix; 696 RecordIndex ix;
680 TValue *functv = &J->L->base[func]; 697 TValue *functv = &J->L->base[func];
681 TRef *fbase = &J->base[func]; 698 TRef kfunc, *fbase = &J->base[func];
682 ptrdiff_t i; 699 ptrdiff_t i;
683 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ 700 (void)getslot(J, func); /* Ensure func has a reference. */
684 for (i = 0; i <= nargs; i++) 701 for (i = 1; i <= nargs; i++)
685 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ 702 (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
686 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ 703 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
687 ix.tab = fbase[0]; 704 ix.tab = fbase[0];
688 copyTV(J->L, &ix.tabv, functv); 705 copyTV(J->L, &ix.tabv, functv);
689 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) 706 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
690 lj_trace_err(J, LJ_TRERR_NOMM); 707 lj_trace_err(J, LJ_TRERR_NOMM);
691 for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ 708 for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
692 fbase[i] = fbase[i-1]; 709 fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
710#if LJ_FR2
711 fbase[2] = fbase[0];
712#endif
693 fbase[0] = ix.mobj; /* Replace function. */ 713 fbase[0] = ix.mobj; /* Replace function. */
694 functv = &ix.mobjv; 714 functv = &ix.mobjv;
695 } 715 }
696 fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); 716 kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
717#if LJ_FR2
718 fbase[0] = kfunc;
719 fbase[1] = TREF_FRAME;
720#else
721 fbase[0] = kfunc | TREF_FRAME;
722#endif
697 J->maxslot = (BCReg)nargs; 723 J->maxslot = (BCReg)nargs;
698} 724}
699 725
@@ -703,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
703 rec_call_setup(J, func, nargs); 729 rec_call_setup(J, func, nargs);
704 /* Bump frame. */ 730 /* Bump frame. */
705 J->framedepth++; 731 J->framedepth++;
706 J->base += func+1; 732 J->base += func+1+LJ_FR2;
707 J->baseslot += func+1; 733 J->baseslot += func+1+LJ_FR2;
708} 734}
709 735
710/* Record tail call. */ 736/* Record tail call. */
@@ -720,7 +746,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
720 func += cbase; 746 func += cbase;
721 } 747 }
722 /* Move func + args down. */ 748 /* Move func + args down. */
723 memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); 749 if (LJ_FR2 && J->baseslot == 2)
750 J->base[func+1] = 0;
751 memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
724 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ 752 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
725 /* Tailcalls can form a loop, so count towards the loop unroll limit. */ 753 /* Tailcalls can form a loop, so count towards the loop unroll limit. */
726 if (++J->tailcalled > J->loopunroll) 754 if (++J->tailcalled > J->loopunroll)
@@ -763,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
763 BCReg cbase = (BCReg)frame_delta(frame); 791 BCReg cbase = (BCReg)frame_delta(frame);
764 if (--J->framedepth < 0) 792 if (--J->framedepth < 0)
765 lj_trace_err(J, LJ_TRERR_NYIRETL); 793 lj_trace_err(J, LJ_TRERR_NYIRETL);
766 lua_assert(J->baseslot > 1); 794 lua_assert(J->baseslot > 1+LJ_FR2);
767 gotresults++; 795 gotresults++;
768 rbase += cbase; 796 rbase += cbase;
769 J->baseslot -= (BCReg)cbase; 797 J->baseslot -= (BCReg)cbase;
@@ -787,7 +815,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
787 BCReg cbase = (BCReg)frame_delta(frame); 815 BCReg cbase = (BCReg)frame_delta(frame);
788 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ 816 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
789 lj_trace_err(J, LJ_TRERR_NYIRETL); 817 lj_trace_err(J, LJ_TRERR_NYIRETL);
790 lua_assert(J->baseslot > 1); 818 lua_assert(J->baseslot > 1+LJ_FR2);
791 rbase += cbase; 819 rbase += cbase;
792 J->baseslot -= (BCReg)cbase; 820 J->baseslot -= (BCReg)cbase;
793 J->base -= cbase; 821 J->base -= cbase;
@@ -797,8 +825,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
797 BCIns callins = *(frame_pc(frame)-1); 825 BCIns callins = *(frame_pc(frame)-1);
798 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; 826 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
799 BCReg cbase = bc_a(callins); 827 BCReg cbase = bc_a(callins);
800 GCproto *pt = funcproto(frame_func(frame - (cbase+1-LJ_FR2))); 828 GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
801 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame teardown. */
802 if ((pt->flags & PROTO_NOJIT)) 829 if ((pt->flags & PROTO_NOJIT))
803 lj_trace_err(J, LJ_TRERR_CJITOFF); 830 lj_trace_err(J, LJ_TRERR_CJITOFF);
804 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { 831 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
@@ -811,13 +838,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
811 lj_snap_add(J); 838 lj_snap_add(J);
812 } 839 }
813 for (i = 0; i < nresults; i++) /* Adjust results. */ 840 for (i = 0; i < nresults; i++) /* Adjust results. */
814 J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; 841 J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
815 J->maxslot = cbase+(BCReg)nresults; 842 J->maxslot = cbase+(BCReg)nresults;
816 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ 843 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
817 J->framedepth--; 844 J->framedepth--;
818 lua_assert(J->baseslot > cbase+1); 845 lua_assert(J->baseslot > cbase+1+LJ_FR2);
819 J->baseslot -= cbase+1; 846 J->baseslot -= cbase+1+LJ_FR2;
820 J->base -= cbase+1; 847 J->base -= cbase+1+LJ_FR2;
821 } else if (J->parent == 0 && J->exitno == 0 && 848 } else if (J->parent == 0 && J->exitno == 0 &&
822 !bc_isret(bc_op(J->cur.startins))) { 849 !bc_isret(bc_op(J->cur.startins))) {
823 /* Return to lower frame would leave the loop in a root trace. */ 850 /* Return to lower frame would leave the loop in a root trace. */
@@ -827,13 +854,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
827 } else { /* Return to lower frame. Guard for the target we return to. */ 854 } else { /* Return to lower frame. Guard for the target we return to. */
828 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); 855 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
829 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); 856 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
830 emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); 857 emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
831 J->retdepth++; 858 J->retdepth++;
832 J->needsnap = 1; 859 J->needsnap = 1;
833 lua_assert(J->baseslot == 1); 860 lua_assert(J->baseslot == 1+LJ_FR2);
834 /* Shift result slots up and clear the slots of the new frame below. */ 861 /* Shift result slots up and clear the slots of the new frame below. */
835 memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); 862 memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
836 memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); 863 memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
837 } 864 }
838 } else if (frame_iscont(frame)) { /* Return to continuation frame. */ 865 } else if (frame_iscont(frame)) { /* Return to continuation frame. */
839 ASMFunction cont = frame_contf(frame); 866 ASMFunction cont = frame_contf(frame);
@@ -842,32 +869,39 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
842 lj_trace_err(J, LJ_TRERR_NYIRETL); 869 lj_trace_err(J, LJ_TRERR_NYIRETL);
843 J->baseslot -= (BCReg)cbase; 870 J->baseslot -= (BCReg)cbase;
844 J->base -= cbase; 871 J->base -= cbase;
845 J->maxslot = cbase-2; 872 J->maxslot = cbase-(2<<LJ_FR2);
846 if (cont == lj_cont_ra) { 873 if (cont == lj_cont_ra) {
847 /* Copy result to destination slot. */ 874 /* Copy result to destination slot. */
848 BCReg dst = bc_a(*(frame_contpc(frame)-1)); 875 BCReg dst = bc_a(*(frame_contpc(frame)-1));
849 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; 876 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
850 if (dst >= J->maxslot) J->maxslot = dst+1; 877 if (dst >= J->maxslot) {
878 J->maxslot = dst+1;
879 }
851 } else if (cont == lj_cont_nop) { 880 } else if (cont == lj_cont_nop) {
852 /* Nothing to do here. */ 881 /* Nothing to do here. */
853 } else if (cont == lj_cont_cat) { 882 } else if (cont == lj_cont_cat) {
854 BCReg bslot = bc_b(*(frame_contpc(frame)-1)); 883 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
855 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; 884 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
856 if (bslot != cbase-2) { /* Concatenate the remainder. */ 885 if (bslot != J->maxslot) { /* Concatenate the remainder. */
857 TValue *b = J->L->base, save; /* Simulate lower frame and result. */ 886 TValue *b = J->L->base, save; /* Simulate lower frame and result. */
858 J->base[cbase-2] = tr; 887 J->base[J->maxslot] = tr;
859 copyTV(J->L, &save, b-2); 888 copyTV(J->L, &save, b-(2<<LJ_FR2));
860 if (gotresults) copyTV(J->L, b-2, b+rbase); else setnilV(b-2); 889 if (gotresults)
890 copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
891 else
892 setnilV(b-(2<<LJ_FR2));
861 J->L->base = b - cbase; 893 J->L->base = b - cbase;
862 tr = rec_cat(J, bslot, cbase-2); 894 tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
863 b = J->L->base + cbase; /* Undo. */ 895 b = J->L->base + cbase; /* Undo. */
864 J->L->base = b; 896 J->L->base = b;
865 copyTV(J->L, b-2, &save); 897 copyTV(J->L, b-(2<<LJ_FR2), &save);
866 } 898 }
867 if (tr) { /* Store final result. */ 899 if (tr) { /* Store final result. */
868 BCReg dst = bc_a(*(frame_contpc(frame)-1)); 900 BCReg dst = bc_a(*(frame_contpc(frame)-1));
869 J->base[dst] = tr; 901 J->base[dst] = tr;
870 if (dst >= J->maxslot) J->maxslot = dst+1; 902 if (dst >= J->maxslot) {
903 J->maxslot = dst+1;
904 }
871 } /* Otherwise continue with another __concat call. */ 905 } /* Otherwise continue with another __concat call. */
872 } else { 906 } else {
873 /* Result type already specialized. */ 907 /* Result type already specialized. */
@@ -876,7 +910,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
876 } else { 910 } else {
877 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ 911 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
878 } 912 }
879 lua_assert(J->baseslot >= 1); 913 lua_assert(J->baseslot >= 1+LJ_FR2);
880} 914}
881 915
882/* -- Metamethod handling ------------------------------------------------- */ 916/* -- Metamethod handling ------------------------------------------------- */
@@ -885,11 +919,16 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
885static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 919static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
886{ 920{
887 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; 921 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
922#if LJ_FR2
923 J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
924 J->base[top+1] = TREF_CONT;
925#else
888 J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; 926 J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
927#endif
889 J->framedepth++; 928 J->framedepth++;
890 for (s = J->maxslot; s < top; s++) 929 for (s = J->maxslot; s < top; s++)
891 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ 930 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
892 return top+1; 931 return top+1+LJ_FR2;
893} 932}
894 933
895/* Record metamethod lookup. */ 934/* Record metamethod lookup. */
@@ -967,9 +1006,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
967 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); 1006 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
968 TRef *base = J->base + func; 1007 TRef *base = J->base + func;
969 TValue *basev = J->L->base + func; 1008 TValue *basev = J->L->base + func;
970 base[1] = ix->tab; base[2] = ix->key; 1009 base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
971 copyTV(J->L, basev+1, &ix->tabv); 1010 copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
972 copyTV(J->L, basev+2, &ix->keyv); 1011 copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
973 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ 1012 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
974 if (mm != MM_unm) { 1013 if (mm != MM_unm) {
975 ix->tab = ix->key; 1014 ix->tab = ix->key;
@@ -980,8 +1019,10 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
980 lj_trace_err(J, LJ_TRERR_NOMM); 1019 lj_trace_err(J, LJ_TRERR_NOMM);
981 } 1020 }
982ok: 1021ok:
983 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
984 base[0] = ix->mobj; 1022 base[0] = ix->mobj;
1023#if LJ_FR2
1024 base[1] = 0;
1025#endif
985 copyTV(J->L, basev+0, &ix->mobjv); 1026 copyTV(J->L, basev+0, &ix->mobjv);
986 lj_record_call(J, func, 2); 1027 lj_record_call(J, func, 2);
987 return 0; /* No result yet. */ 1028 return 0; /* No result yet. */
@@ -997,8 +1038,9 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
997 BCReg func = rec_mm_prep(J, lj_cont_ra); 1038 BCReg func = rec_mm_prep(J, lj_cont_ra);
998 TRef *base = J->base + func; 1039 TRef *base = J->base + func;
999 TValue *basev = J->L->base + func; 1040 TValue *basev = J->L->base + func;
1000 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */
1001 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); 1041 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
1042 base += LJ_FR2;
1043 basev += LJ_FR2;
1002 base[1] = tr; copyTV(J->L, basev+1, tv); 1044 base[1] = tr; copyTV(J->L, basev+1, tv);
1003#if LJ_52 1045#if LJ_52
1004 base[2] = tr; copyTV(J->L, basev+2, tv); 1046 base[2] = tr; copyTV(J->L, basev+2, tv);
@@ -1018,11 +1060,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
1018static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) 1060static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
1019{ 1061{
1020 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); 1062 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
1021 TRef *base = J->base + func; 1063 TRef *base = J->base + func + LJ_FR2;
1022 TValue *tv = J->L->base + func; 1064 TValue *tv = J->L->base + func + LJ_FR2;
1023 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ 1065 base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
1024 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; 1066 copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
1025 copyTV(J->L, tv+0, &ix->mobjv);
1026 copyTV(J->L, tv+1, &ix->valv); 1067 copyTV(J->L, tv+1, &ix->valv);
1027 copyTV(J->L, tv+2, &ix->keyv); 1068 copyTV(J->L, tv+2, &ix->keyv);
1028 lj_record_call(J, func, 2); 1069 lj_record_call(J, func, 2);
@@ -1339,11 +1380,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1339 handlemm: 1380 handlemm:
1340 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ 1381 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
1341 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); 1382 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
1342 TRef *base = J->base + func; 1383 TRef *base = J->base + func + LJ_FR2;
1343 TValue *tv = J->L->base + func; 1384 TValue *tv = J->L->base + func + LJ_FR2;
1344 lua_assert(!LJ_FR2); /* TODO_FR2: handle different frame setup. */ 1385 base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
1345 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; 1386 setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
1346 setfuncV(J->L, tv+0, funcV(&ix->mobjv));
1347 copyTV(J->L, tv+1, &ix->tabv); 1387 copyTV(J->L, tv+1, &ix->tabv);
1348 copyTV(J->L, tv+2, &ix->keyv); 1388 copyTV(J->L, tv+2, &ix->keyv);
1349 if (ix->val) { 1389 if (ix->val) {
@@ -1533,7 +1573,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
1533 goto noconstify; 1573 goto noconstify;
1534 kfunc = lj_ir_kfunc(J, J->fn); 1574 kfunc = lj_ir_kfunc(J, J->fn);
1535 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); 1575 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
1536 J->base[-1] = TREF_FRAME | kfunc; 1576#if LJ_FR2
1577 J->base[-2] = kfunc;
1578#else
1579 J->base[-1] = kfunc | TREF_FRAME;
1580#endif
1537 fn = kfunc; 1581 fn = kfunc;
1538 } 1582 }
1539 tr = lj_record_constify(J, uvval(uvp)); 1583 tr = lj_record_constify(J, uvval(uvp));
@@ -1644,11 +1688,14 @@ static void rec_func_setup(jit_State *J)
1644static void rec_func_vararg(jit_State *J) 1688static void rec_func_vararg(jit_State *J)
1645{ 1689{
1646 GCproto *pt = J->pt; 1690 GCproto *pt = J->pt;
1647 BCReg s, fixargs, vframe = J->maxslot+1; 1691 BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
1648 lua_assert((pt->flags & PROTO_VARARG)); 1692 lua_assert((pt->flags & PROTO_VARARG));
1649 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) 1693 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
1650 lj_trace_err(J, LJ_TRERR_STACKOV); 1694 lj_trace_err(J, LJ_TRERR_STACKOV);
1651 J->base[vframe-1] = J->base[-1]; /* Copy function up. */ 1695 J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
1696#if LJ_FR2
1697 J->base[vframe-1] = TREF_FRAME;
1698#endif
1652 /* Copy fixarg slots up and set their original slots to nil. */ 1699 /* Copy fixarg slots up and set their original slots to nil. */
1653 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; 1700 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
1654 for (s = 0; s < fixargs; s++) { 1701 for (s = 0; s < fixargs; s++) {
@@ -1710,7 +1757,7 @@ static int select_detect(jit_State *J)
1710static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) 1757static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1711{ 1758{
1712 int32_t numparams = J->pt->numparams; 1759 int32_t numparams = J->pt->numparams;
1713 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; 1760 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
1714 lua_assert(frame_isvarg(J->L->base-1)); 1761 lua_assert(frame_isvarg(J->L->base-1));
1715 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ 1762 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
1716 ptrdiff_t i; 1763 ptrdiff_t i;
@@ -1722,10 +1769,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1722 J->maxslot = dst + (BCReg)nresults; 1769 J->maxslot = dst + (BCReg)nresults;
1723 } 1770 }
1724 for (i = 0; i < nresults; i++) 1771 for (i = 0; i < nresults; i++)
1725 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; 1772 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
1726 } else { /* Unknown number of varargs passed to trace. */ 1773 } else { /* Unknown number of varargs passed to trace. */
1727 TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); 1774 TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
1728 int32_t frofs = 8*(1+numparams)+FRAME_VARG; 1775 int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
1729 if (nresults >= 0) { /* Known fixed number of results. */ 1776 if (nresults >= 0) { /* Known fixed number of results. */
1730 ptrdiff_t i; 1777 ptrdiff_t i;
1731 if (nvararg > 0) { 1778 if (nvararg > 0) {
@@ -1739,7 +1786,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1739 vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); 1786 vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1740 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); 1787 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
1741 for (i = 0; i < nload; i++) { 1788 for (i = 0; i < nload; i++) {
1742 IRType t = itype2irt(&J->L->base[i-1-nvararg]); 1789 IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
1743 TRef aref = emitir(IRT(IR_AREF, IRT_PGC), 1790 TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
1744 vbase, lj_ir_kint(J, (int32_t)i)); 1791 vbase, lj_ir_kint(J, (int32_t)i));
1745 TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 1792 TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
@@ -1787,14 +1834,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1787 if (idx != 0 && idx <= nvararg) { 1834 if (idx != 0 && idx <= nvararg) {
1788 IRType t; 1835 IRType t;
1789 TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); 1836 TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1790 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8)); 1837 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
1791 t = itype2irt(&J->L->base[idx-2-nvararg]); 1838 lj_ir_kint(J, frofs-(8<<LJ_FR2)));
1839 t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
1792 aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); 1840 aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
1793 tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 1841 tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
1794 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ 1842 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
1795 } 1843 }
1796 J->base[dst-2] = tr; 1844 J->base[dst-2-LJ_FR2] = tr;
1797 J->maxslot = dst-1; 1845 J->maxslot = dst-1-LJ_FR2;
1798 J->bcskip = 2; /* Skip CALLM + select. */ 1846 J->bcskip = 2; /* Skip CALLM + select. */
1799 } else { 1847 } else {
1800 nyivarg: 1848 nyivarg:
@@ -1887,7 +1935,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
1887 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); 1935 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
1888 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 1936 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1889 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ 1937 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
1938#if LJ_FR2
1939 SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
1940 uint64_t pcbase;
1941 memcpy(&pcbase, flink, sizeof(uint64_t));
1942 pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
1943 memcpy(flink, &pcbase, sizeof(uint64_t));
1944#else
1890 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); 1945 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
1946#endif
1891 J->needsnap = 1; 1947 J->needsnap = 1;
1892 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); 1948 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
1893 lj_snap_shrink(J); /* Shrink last snapshot if possible. */ 1949 lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@@ -2185,7 +2241,13 @@ void lj_record_ins(jit_State *J)
2185 2241
2186 case BC_MOV: 2242 case BC_MOV:
2187 /* Clear gap of method call to avoid resurrecting previous refs. */ 2243 /* Clear gap of method call to avoid resurrecting previous refs. */
2188 if (ra > J->maxslot) J->base[ra-1] = 0; 2244 if (ra > J->maxslot) {
2245#if LJ_FR2
2246 memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
2247#else
2248 J->base[ra-1] = 0;
2249#endif
2250 }
2189 break; 2251 break;
2190 case BC_KSTR: case BC_KNUM: case BC_KPRI: 2252 case BC_KSTR: case BC_KNUM: case BC_KPRI:
2191 break; 2253 break;
@@ -2254,14 +2316,14 @@ void lj_record_ins(jit_State *J)
2254 /* -- Calls and vararg handling ----------------------------------------- */ 2316 /* -- Calls and vararg handling ----------------------------------------- */
2255 2317
2256 case BC_ITERC: 2318 case BC_ITERC:
2257 J->base[ra] = getslot(J, ra-3-LJ_FR2); 2319 J->base[ra] = getslot(J, ra-3);
2258 J->base[ra+1] = getslot(J, ra-2-LJ_FR2); 2320 J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
2259 J->base[ra+2] = getslot(J, ra-1-LJ_FR2); 2321 J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
2260 { /* Do the actual copy now because lj_record_call needs the values. */ 2322 { /* Do the actual copy now because lj_record_call needs the values. */
2261 TValue *b = &J->L->base[ra]; 2323 TValue *b = &J->L->base[ra];
2262 copyTV(J->L, b, b-3-LJ_FR2); 2324 copyTV(J->L, b, b-3);
2263 copyTV(J->L, b+1, b-2-LJ_FR2); 2325 copyTV(J->L, b+1+LJ_FR2, b-2);
2264 copyTV(J->L, b+2, b-1-LJ_FR2); 2326 copyTV(J->L, b+2+LJ_FR2, b-1);
2265 } 2327 }
2266 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2328 lj_record_call(J, ra, (ptrdiff_t)rc-1);
2267 break; 2329 break;
@@ -2384,7 +2446,12 @@ void lj_record_ins(jit_State *J)
2384 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ 2446 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
2385 if (bcmode_a(op) == BCMdst && rc) { 2447 if (bcmode_a(op) == BCMdst && rc) {
2386 J->base[ra] = rc; 2448 J->base[ra] = rc;
2387 if (ra >= J->maxslot) J->maxslot = ra+1; 2449 if (ra >= J->maxslot) {
2450#if LJ_FR2
2451 if (ra > J->maxslot) J->base[ra-1] = 0;
2452#endif
2453 J->maxslot = ra+1;
2454 }
2388 } 2455 }
2389 2456
2390#undef rav 2457#undef rav
@@ -2469,7 +2536,7 @@ void lj_record_setup(jit_State *J)
2469 J->scev.idx = REF_NIL; 2536 J->scev.idx = REF_NIL;
2470 setmref(J->scev.pc, NULL); 2537 setmref(J->scev.pc, NULL);
2471 2538
2472 J->baseslot = 1; /* Invoking function is at base[-1]. */ 2539 J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
2473 J->base = J->slot + J->baseslot; 2540 J->base = J->slot + J->baseslot;
2474 J->maxslot = 0; 2541 J->maxslot = 0;
2475 J->framedepth = 0; 2542 J->framedepth = 0;
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 6199b1f0..33c058be 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -68,10 +68,18 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
68 for (s = 0; s < nslots; s++) { 68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s]; 69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr); 70 IRRef ref = tref_ref(tr);
71#if LJ_FR2
72 if (s == 1) continue;
73 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
74 TValue *base = J->L->base - J->baseslot;
75 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
76 ref = tref_ref(tr);
77 }
78#endif
71 if (ref) { 79 if (ref) {
72 SnapEntry sn = SNAP_TR(s, tr); 80 SnapEntry sn = SNAP_TR(s, tr);
73 IRIns *ir = &J->cur.ir[ref]; 81 IRIns *ir = &J->cur.ir[ref];
74 if (!(sn & (SNAP_CONT|SNAP_FRAME)) && 82 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
75 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { 83 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
76 /* No need to snapshot unmodified non-inherited slots. */ 84 /* No need to snapshot unmodified non-inherited slots. */
77 if (!(ir->op2 & IRSLOAD_INHERIT)) 85 if (!(ir->op2 & IRSLOAD_INHERIT))
@@ -90,34 +98,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
90} 98}
91 99
92/* Add frame links at the end of the snapshot. */ 100/* Add frame links at the end of the snapshot. */
93static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) 101static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
94{ 102{
95 cTValue *frame = J->L->base - 1; 103 cTValue *frame = J->L->base - 1;
96 cTValue *lim = J->L->base - J->baseslot; 104 cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
97 GCfunc *fn = frame_func(frame); 105 GCfunc *fn = frame_func(frame);
98 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; 106 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
107#if LJ_FR2
108 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
109 lua_assert(2 <= J->baseslot && J->baseslot <= 257);
110 memcpy(map, &pcbase, sizeof(uint64_t));
111#else
99 MSize f = 0; 112 MSize f = 0;
100 lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
101 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 113 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
114#endif
102 while (frame > lim) { /* Backwards traversal of all frames above base. */ 115 while (frame > lim) { /* Backwards traversal of all frames above base. */
103 if (frame_islua(frame)) { 116 if (frame_islua(frame)) {
117#if !LJ_FR2
104 map[f++] = SNAP_MKPC(frame_pc(frame)); 118 map[f++] = SNAP_MKPC(frame_pc(frame));
119#endif
105 frame = frame_prevl(frame); 120 frame = frame_prevl(frame);
106 } else if (frame_iscont(frame)) { 121 } else if (frame_iscont(frame)) {
122#if !LJ_FR2
107 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 123 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
108 map[f++] = SNAP_MKPC(frame_contpc(frame)); 124 map[f++] = SNAP_MKPC(frame_contpc(frame));
125#endif
109 frame = frame_prevd(frame); 126 frame = frame_prevd(frame);
110 } else { 127 } else {
111 lua_assert(!frame_isc(frame)); 128 lua_assert(!frame_isc(frame));
129#if !LJ_FR2
112 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 130 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
131#endif
113 frame = frame_prevd(frame); 132 frame = frame_prevd(frame);
114 continue; 133 continue;
115 } 134 }
116 if (frame + funcproto(frame_func(frame))->framesize > ftop) 135 if (frame + funcproto(frame_func(frame))->framesize > ftop)
117 ftop = frame + funcproto(frame_func(frame))->framesize; 136 ftop = frame + funcproto(frame_func(frame))->framesize;
118 } 137 }
138 *topslot = (uint8_t)(ftop - lim);
139#if LJ_FR2
140 lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
141 return 2;
142#else
119 lua_assert(f == (MSize)(1 + J->framedepth)); 143 lua_assert(f == (MSize)(1 + J->framedepth));
120 return (BCReg)(ftop - lim); 144 return f;
145#endif
121} 146}
122 147
123/* Take a snapshot of the current stack. */ 148/* Take a snapshot of the current stack. */
@@ -127,16 +152,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
127 MSize nent; 152 MSize nent;
128 SnapEntry *p; 153 SnapEntry *p;
129 /* Conservative estimate. */ 154 /* Conservative estimate. */
130 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); 155 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
131 p = &J->cur.snapmap[nsnapmap]; 156 p = &J->cur.snapmap[nsnapmap];
132 nent = snapshot_slots(J, p, nslots); 157 nent = snapshot_slots(J, p, nslots);
133 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); 158 snap->nent = (uint8_t)nent;
159 nent += snapshot_framelinks(J, p + nent, &snap->topslot);
134 snap->mapofs = (uint16_t)nsnapmap; 160 snap->mapofs = (uint16_t)nsnapmap;
135 snap->ref = (IRRef1)J->cur.nins; 161 snap->ref = (IRRef1)J->cur.nins;
136 snap->nent = (uint8_t)nent;
137 snap->nslots = (uint8_t)nslots; 162 snap->nslots = (uint8_t)nslots;
138 snap->count = 0; 163 snap->count = 0;
139 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); 164 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
140} 165}
141 166
142/* Add or merge a snapshot. */ 167/* Add or merge a snapshot. */
@@ -284,8 +309,8 @@ void lj_snap_shrink(jit_State *J)
284 MSize n, m, nlim, nent = snap->nent; 309 MSize n, m, nlim, nent = snap->nent;
285 uint8_t udf[SNAP_USEDEF_SLOTS]; 310 uint8_t udf[SNAP_USEDEF_SLOTS];
286 BCReg maxslot = J->maxslot; 311 BCReg maxslot = J->maxslot;
287 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
288 BCReg baseslot = J->baseslot; 312 BCReg baseslot = J->baseslot;
313 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
289 maxslot += baseslot; 314 maxslot += baseslot;
290 minslot += baseslot; 315 minslot += baseslot;
291 snap->nslots = (uint8_t)maxslot; 316 snap->nslots = (uint8_t)maxslot;
@@ -794,11 +819,13 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
794 SnapShot *snap = &T->snap[snapno]; 819 SnapShot *snap = &T->snap[snapno];
795 MSize n, nent = snap->nent; 820 MSize n, nent = snap->nent;
796 SnapEntry *map = &T->snapmap[snap->mapofs]; 821 SnapEntry *map = &T->snapmap[snap->mapofs];
797 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; 822 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
823#if !LJ_FR2
798 ptrdiff_t ftsz0; 824 ptrdiff_t ftsz0;
825#endif
799 TValue *frame; 826 TValue *frame;
800 BloomFilter rfilt = snap_renamefilter(T, snapno); 827 BloomFilter rfilt = snap_renamefilter(T, snapno);
801 const BCIns *pc = snap_pc(map[nent]); 828 const BCIns *pc = snap_pc(&map[nent]);
802 lua_State *L = J->L; 829 lua_State *L = J->L;
803 830
804 /* Set interpreter PC to the next PC to get correct error messages. */ 831 /* Set interpreter PC to the next PC to get correct error messages. */
@@ -811,8 +838,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
811 } 838 }
812 839
813 /* Fill stack slots with data from the registers and spill slots. */ 840 /* Fill stack slots with data from the registers and spill slots. */
814 frame = L->base-1; 841 frame = L->base-1-LJ_FR2;
842#if !LJ_FR2
815 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ 843 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
844#endif
816 for (n = 0; n < nent; n++) { 845 for (n = 0; n < nent; n++) {
817 SnapEntry sn = map[n]; 846 SnapEntry sn = map[n];
818 if (!(sn & SNAP_NORESTORE)) { 847 if (!(sn & SNAP_NORESTORE)) {
@@ -835,14 +864,18 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
835 TValue tmp; 864 TValue tmp;
836 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); 865 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
837 o->u32.hi = tmp.u32.lo; 866 o->u32.hi = tmp.u32.lo;
867#if !LJ_FR2
838 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 868 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
839 lua_assert(!LJ_FR2); /* TODO_FR2: store 64 bit PCs. */
840 /* Overwrite tag with frame link. */ 869 /* Overwrite tag with frame link. */
841 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); 870 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
842 L->base = o+1; 871 L->base = o+1;
872#endif
843 } 873 }
844 } 874 }
845 } 875 }
876#if LJ_FR2
877 L->base += (map[nent+LJ_BE] & 0xff);
878#endif
846 lua_assert(map + nent == flinks); 879 lua_assert(map + nent == flinks);
847 880
848 /* Compute current stack top. */ 881 /* Compute current stack top. */