diff options
| author | Mike Pall <mike> | 2010-02-04 03:08:29 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2010-02-04 03:08:29 +0100 |
| commit | 7256690364a2c9a5e9269ffd89bc132ee188480d (patch) | |
| tree | 20d8a6a37fb64492e12407ee412b4c2f10137e93 | |
| parent | 78f5e2ffd34e01626b910341a7808ea4be8a6d0d (diff) | |
| download | luajit-7256690364a2c9a5e9269ffd89bc132ee188480d.tar.gz luajit-7256690364a2c9a5e9269ffd89bc132ee188480d.tar.bz2 luajit-7256690364a2c9a5e9269ffd89bc132ee188480d.zip | |
Add shadow frame link stack for trace recorder.
Simplifies snapshots. Prerequisite for pre-call snapshots.
Increases consistency for fast function calls, too.
| -rw-r--r-- | src/lj_asm.c | 6 | ||||
| -rw-r--r-- | src/lj_def.h | 1 | ||||
| -rw-r--r-- | src/lj_jit.h | 3 | ||||
| -rw-r--r-- | src/lj_opt_loop.c | 12 | ||||
| -rw-r--r-- | src/lj_record.c | 63 | ||||
| -rw-r--r-- | src/lj_snap.c | 53 |
6 files changed, 82 insertions, 56 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 38bbb1c2..5e7ff7f5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -3042,7 +3042,7 @@ static void asm_tail_sync(ASMState *as) | |||
| 3042 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ | 3042 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ |
| 3043 | MSize n, nent = snap->nent; | 3043 | MSize n, nent = snap->nent; |
| 3044 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 3044 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
| 3045 | SnapEntry *flinks = map + nent + snap->nframelinks; | 3045 | SnapEntry *flinks = map + nent + 1; |
| 3046 | BCReg newbase = 0; | 3046 | BCReg newbase = 0; |
| 3047 | BCReg nslots, topslot = 0; | 3047 | BCReg nslots, topslot = 0; |
| 3048 | 3048 | ||
| @@ -3116,11 +3116,11 @@ static void asm_tail_sync(ASMState *as) | |||
| 3116 | if (!(sn & (SNAP_CONT|SNAP_FRAME))) | 3116 | if (!(sn & (SNAP_CONT|SNAP_FRAME))) |
| 3117 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | 3117 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); |
| 3118 | else if (s != 0) /* Do not overwrite link to previous frame. */ | 3118 | else if (s != 0) /* Do not overwrite link to previous frame. */ |
| 3119 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); | 3119 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks++)); |
| 3120 | } | 3120 | } |
| 3121 | checkmclim(as); | 3121 | checkmclim(as); |
| 3122 | } | 3122 | } |
| 3123 | lua_assert(map + nent == flinks-1); | 3123 | lua_assert(map + nent + 1 + snap->depth == flinks); |
| 3124 | } | 3124 | } |
| 3125 | 3125 | ||
| 3126 | /* Fixup the tail code. */ | 3126 | /* Fixup the tail code. */ |
diff --git a/src/lj_def.h b/src/lj_def.h index 872a7830..64b08f7b 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
| @@ -66,6 +66,7 @@ typedef unsigned __int32 uintptr_t; | |||
| 66 | 66 | ||
| 67 | /* JIT compiler limits. */ | 67 | /* JIT compiler limits. */ |
| 68 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ | 68 | #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ |
| 69 | #define LJ_MAX_JFRAME 20 /* Max. # of frames for a trace. */ | ||
| 69 | #define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */ | 70 | #define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */ |
| 70 | #define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */ | 71 | #define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */ |
| 71 | 72 | ||
diff --git a/src/lj_jit.h b/src/lj_jit.h index 1e029182..229642a5 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -114,7 +114,7 @@ typedef struct SnapShot { | |||
| 114 | IRRef1 ref; /* First IR ref for this snapshot. */ | 114 | IRRef1 ref; /* First IR ref for this snapshot. */ |
| 115 | uint8_t nslots; /* Number of valid slots. */ | 115 | uint8_t nslots; /* Number of valid slots. */ |
| 116 | uint8_t nent; /* Number of compressed entries. */ | 116 | uint8_t nent; /* Number of compressed entries. */ |
| 117 | uint8_t nframelinks; /* Number of frame links. */ | 117 | uint8_t depth; /* Number of frame links. */ |
| 118 | uint8_t count; /* Count of taken exits for this snapshot. */ | 118 | uint8_t count; /* Count of taken exits for this snapshot. */ |
| 119 | } SnapShot; | 119 | } SnapShot; |
| 120 | 120 | ||
| @@ -252,6 +252,7 @@ typedef struct jit_State { | |||
| 252 | 252 | ||
| 253 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ | 253 | IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ |
| 254 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ | 254 | TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ |
| 255 | SnapEntry frame[LJ_MAX_JFRAME+2]; /* Frame link stack. */ | ||
| 255 | 256 | ||
| 256 | int32_t param[JIT_P__MAX]; /* JIT engine parameters. */ | 257 | int32_t param[JIT_P__MAX]; /* JIT engine parameters. */ |
| 257 | 258 | ||
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index e5ad5b43..e0e6990e 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
| @@ -167,7 +167,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, | |||
| 167 | SnapEntry *loopmap, IRRef1 *subst) | 167 | SnapEntry *loopmap, IRRef1 *subst) |
| 168 | { | 168 | { |
| 169 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | 169 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; |
| 170 | MSize nmapofs, nframelinks; | 170 | MSize nmapofs, depth; |
| 171 | MSize on, ln, nn, onent = osnap->nent; | 171 | MSize on, ln, nn, onent = osnap->nent; |
| 172 | BCReg nslots = osnap->nslots; | 172 | BCReg nslots = osnap->nslots; |
| 173 | SnapShot *snap = &J->cur.snap[J->cur.nsnap]; | 173 | SnapShot *snap = &J->cur.snap[J->cur.nsnap]; |
| @@ -179,11 +179,11 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, | |||
| 179 | nmapofs = snap->mapofs; | 179 | nmapofs = snap->mapofs; |
| 180 | } | 180 | } |
| 181 | J->guardemit.irt = 0; | 181 | J->guardemit.irt = 0; |
| 182 | nframelinks = osnap->nframelinks; | 182 | depth = osnap->depth; |
| 183 | /* Setup new snapshot. */ | 183 | /* Setup new snapshot. */ |
| 184 | snap->mapofs = (uint16_t)nmapofs; | 184 | snap->mapofs = (uint16_t)nmapofs; |
| 185 | snap->ref = (IRRef1)J->cur.nins; | 185 | snap->ref = (IRRef1)J->cur.nins; |
| 186 | snap->nframelinks = (uint8_t)nframelinks; | 186 | snap->depth = (uint8_t)depth; |
| 187 | snap->nslots = nslots; | 187 | snap->nslots = nslots; |
| 188 | snap->count = 0; | 188 | snap->count = 0; |
| 189 | nmap = &J->cur.snapmap[nmapofs]; | 189 | nmap = &J->cur.snapmap[nmapofs]; |
| @@ -205,10 +205,10 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, | |||
| 205 | while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ | 205 | while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ |
| 206 | nmap[nn++] = loopmap[ln++]; | 206 | nmap[nn++] = loopmap[ln++]; |
| 207 | snap->nent = (uint8_t)nn; | 207 | snap->nent = (uint8_t)nn; |
| 208 | J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks); | 208 | J->cur.nsnapmap = (uint16_t)(nmapofs + nn + 1 + depth); |
| 209 | omap += onent; | 209 | omap += onent; |
| 210 | nmap += nn; | 210 | nmap += nn; |
| 211 | for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */ | 211 | for (nn = 0; nn <= depth; nn++) /* Copy PC + frame links. */ |
| 212 | nmap[nn] = omap[nn]; | 212 | nmap[nn] = omap[nn]; |
| 213 | } | 213 | } |
| 214 | 214 | ||
| @@ -314,7 +314,7 @@ static void loop_undo(jit_State *J, IRRef ins, MSize nsnap) | |||
| 314 | SnapShot *snap = &J->cur.snap[nsnap-1]; | 314 | SnapShot *snap = &J->cur.snap[nsnap-1]; |
| 315 | SnapEntry *map = J->cur.snapmap; | 315 | SnapEntry *map = J->cur.snapmap; |
| 316 | map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ | 316 | map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ |
| 317 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks); | 317 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + 1 + snap->depth); |
| 318 | J->cur.nsnap = nsnap; | 318 | J->cur.nsnap = nsnap; |
| 319 | J->guardemit.irt = 0; | 319 | J->guardemit.irt = 0; |
| 320 | lj_ir_rollback(J, ins); | 320 | lj_ir_rollback(J, ins); |
diff --git a/src/lj_record.c b/src/lj_record.c index 824d2fd1..f6d13264 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -101,20 +101,45 @@ static void rec_check_ir(jit_State *J) | |||
| 101 | } | 101 | } |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | /* Compare frame stack of the recorder and the VM. */ | ||
| 105 | static void rec_check_frames(jit_State *J) | ||
| 106 | { | ||
| 107 | cTValue *frame = J->L->base - 1; | ||
| 108 | cTValue *lim = J->L->base - J->baseslot; | ||
| 109 | int32_t depth = J->framedepth; | ||
| 110 | while (frame > lim) { | ||
| 111 | depth--; | ||
| 112 | lua_assert(depth >= 0); | ||
| 113 | lua_assert((SnapEntry)frame_ftsz(frame) == J->frame[depth]); | ||
| 114 | if (frame_iscont(frame)) { | ||
| 115 | depth--; | ||
| 116 | lua_assert(depth >= 0); | ||
| 117 | lua_assert((SnapEntry)frame_ftsz(frame-1) == J->frame[depth]); | ||
| 118 | } | ||
| 119 | frame = frame_prev(frame); | ||
| 120 | } | ||
| 121 | lua_assert(depth == 0); | ||
| 122 | } | ||
| 123 | |||
| 104 | /* Sanity check the slots. */ | 124 | /* Sanity check the slots. */ |
| 105 | static void rec_check_slots(jit_State *J) | 125 | static void rec_check_slots(jit_State *J) |
| 106 | { | 126 | { |
| 107 | BCReg s, nslots = J->baseslot + J->maxslot; | 127 | BCReg s, nslots = J->baseslot + J->maxslot; |
| 128 | int32_t depth; | ||
| 108 | lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); | 129 | lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); |
| 109 | lua_assert(nslots < LJ_MAX_JSLOTS); | 130 | lua_assert(nslots < LJ_MAX_JSLOTS); |
| 110 | for (s = 0; s < nslots; s++) { | 131 | for (s = 0; s < nslots; s++) { |
| 111 | TRef tr = J->slot[s]; | 132 | TRef tr = J->slot[s]; |
| 133 | if (s != 0 && (tr & (TREF_CONT|TREF_FRAME))) | ||
| 134 | depth++; | ||
| 112 | if (tr) { | 135 | if (tr) { |
| 113 | IRRef ref = tref_ref(tr); | 136 | IRRef ref = tref_ref(tr); |
| 114 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | 137 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); |
| 115 | lua_assert(irt_t(IR(ref)->t) == tref_t(tr)); | 138 | lua_assert(irt_t(IR(ref)->t) == tref_t(tr)); |
| 116 | } | 139 | } |
| 117 | } | 140 | } |
| 141 | lua_assert(J->framedepth == depth); | ||
| 142 | rec_check_frames(J); | ||
| 118 | } | 143 | } |
| 119 | #endif | 144 | #endif |
| 120 | 145 | ||
| @@ -854,6 +879,7 @@ typedef struct RecordFFData { | |||
| 854 | ptrdiff_t nres; /* Number of returned results (defaults to 1). */ | 879 | ptrdiff_t nres; /* Number of returned results (defaults to 1). */ |
| 855 | ptrdiff_t cres; /* Wanted number of call results. */ | 880 | ptrdiff_t cres; /* Wanted number of call results. */ |
| 856 | uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */ | 881 | uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */ |
| 882 | int metacall; /* True if function was resolved via __call. */ | ||
| 857 | } RecordFFData; | 883 | } RecordFFData; |
| 858 | 884 | ||
| 859 | /* Type of handler to record a fast function. */ | 885 | /* Type of handler to record a fast function. */ |
| @@ -1020,9 +1046,14 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1020 | ix.tab = tr; | 1046 | ix.tab = tr; |
| 1021 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | 1047 | copyTV(J->L, &ix.tabv, &rd->argv[0]); |
| 1022 | if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */ | 1048 | if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */ |
| 1049 | if (rd->metacall) /* Must not use kludge. */ | ||
| 1050 | recff_err_nyi(J, rd); | ||
| 1023 | res[0] = ix.mobj; | 1051 | res[0] = ix.mobj; |
| 1024 | copyTV(J->L, rd->argv - 1, &ix.mobjv); | 1052 | copyTV(J->L, rd->argv - 1, &ix.mobjv); /* Kludge. */ |
| 1025 | if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */ | 1053 | J->framedepth--; |
| 1054 | if (rec_call(J, (BCReg)(res - J->base), 1, 1)) | ||
| 1055 | J->framedepth++; | ||
| 1056 | else | ||
| 1026 | rd->cres = CALLRES_PENDING; | 1057 | rd->cres = CALLRES_PENDING; |
| 1027 | /* Otherwise res[0] already contains the result. */ | 1058 | /* Otherwise res[0] already contains the result. */ |
| 1028 | } else if (tref_isnumber(tr)) { | 1059 | } else if (tref_isnumber(tr)) { |
| @@ -1067,6 +1098,8 @@ static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1067 | { | 1098 | { |
| 1068 | if (rd->nargs >= 1) { | 1099 | if (rd->nargs >= 1) { |
| 1069 | BCReg parg = (BCReg)(arg - J->base); | 1100 | BCReg parg = (BCReg)(arg - J->base); |
| 1101 | J->pc = (const BCIns *)(sizeof(TValue) - 4 + | ||
| 1102 | (hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL)); | ||
| 1070 | if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */ | 1103 | if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */ |
| 1071 | res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */ | 1104 | res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */ |
| 1072 | rd->nres = (ptrdiff_t)J->maxslot - (ptrdiff_t)parg + 1; | 1105 | rd->nres = (ptrdiff_t)J->maxslot - (ptrdiff_t)parg + 1; |
| @@ -1108,6 +1141,8 @@ static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1108 | copyTV(J->L, &rd->argv[0], &argv1); | 1141 | copyTV(J->L, &rd->argv[0], &argv1); |
| 1109 | copyTV(J->L, &rd->argv[1], &argv0); | 1142 | copyTV(J->L, &rd->argv[1], &argv0); |
| 1110 | oargv = savestack(J->L, rd->argv); | 1143 | oargv = savestack(J->L, rd->argv); |
| 1144 | J->pc = (const BCIns *)(2*sizeof(TValue) - 4 + | ||
| 1145 | (hook_active(J2G(J)) ? FRAME_PCALLH : FRAME_PCALL)); | ||
| 1111 | /* Need to protect rec_call because the recorder may throw. */ | 1146 | /* Need to protect rec_call because the recorder may throw. */ |
| 1112 | rx.parg = parg; | 1147 | rx.parg = parg; |
| 1113 | rx.nargs = rd->nargs - 2; | 1148 | rx.nargs = rd->nargs - 2; |
| @@ -1549,7 +1584,7 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | |||
| 1549 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ | 1584 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ |
| 1550 | ASMFunction cont = frame_contf(frame); | 1585 | ASMFunction cont = frame_contf(frame); |
| 1551 | BCReg cbase = (BCReg)frame_delta(frame); | 1586 | BCReg cbase = (BCReg)frame_delta(frame); |
| 1552 | if (J->framedepth-- <= 0) | 1587 | if ((J->framedepth -= 2) <= 0) |
| 1553 | lj_trace_err(J, LJ_TRERR_NYIRETL); | 1588 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
| 1554 | J->baseslot -= (BCReg)cbase; | 1589 | J->baseslot -= (BCReg)cbase; |
| 1555 | J->base -= cbase; | 1590 | J->base -= cbase; |
| @@ -1602,6 +1637,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) | |||
| 1602 | if (tref_isfunc(res[0])) { /* Regular function call. */ | 1637 | if (tref_isfunc(res[0])) { /* Regular function call. */ |
| 1603 | rd.fn = funcV(tv); | 1638 | rd.fn = funcV(tv); |
| 1604 | rd.argv = tv+1; | 1639 | rd.argv = tv+1; |
| 1640 | rd.metacall = 0; | ||
| 1605 | } else { /* Otherwise resolve __call metamethod for called object. */ | 1641 | } else { /* Otherwise resolve __call metamethod for called object. */ |
| 1606 | RecordIndex ix; | 1642 | RecordIndex ix; |
| 1607 | ptrdiff_t i; | 1643 | ptrdiff_t i; |
| @@ -1615,13 +1651,21 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) | |||
| 1615 | res[0] = ix.mobj; | 1651 | res[0] = ix.mobj; |
| 1616 | rd.fn = funcV(&ix.mobjv); | 1652 | rd.fn = funcV(&ix.mobjv); |
| 1617 | rd.argv = tv; /* The called object is the 1st arg. */ | 1653 | rd.argv = tv; /* The called object is the 1st arg. */ |
| 1654 | rd.metacall = 1; | ||
| 1618 | } | 1655 | } |
| 1619 | 1656 | ||
| 1620 | /* Specialize to the runtime value of the called function. */ | 1657 | /* Specialize to the runtime value of the called function. */ |
| 1621 | trfunc = lj_ir_kfunc(J, rd.fn); | 1658 | trfunc = lj_ir_kfunc(J, rd.fn); |
| 1622 | emitir(IRTG(IR_EQ, IRT_FUNC), res[0], trfunc); | 1659 | emitir(IRTG(IR_EQ, IRT_FUNC), res[0], trfunc); |
| 1623 | res[0] = trfunc | TREF_FRAME; | 1660 | res[0] = trfunc | TREF_FRAME; |
| 1624 | J->framedepth++; | 1661 | |
| 1662 | /* Add frame links. */ | ||
| 1663 | J->frame[J->framedepth++] = SNAP_MKPC(J->pc+1); | ||
| 1664 | if (cres == CALLRES_CONT) /* Continuations need an extra frame stack slot. */ | ||
| 1665 | J->frame[J->framedepth++] = SNAP_MKFTSZ((func+1)*sizeof(TValue)+FRAME_CONT); | ||
| 1666 | /* NYI: func is wrong if any fast function ever sets up a continuation. */ | ||
| 1667 | if (J->framedepth > LJ_MAX_JFRAME) | ||
| 1668 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
| 1625 | 1669 | ||
| 1626 | if (isluafunc(rd.fn)) { /* Record call to Lua function. */ | 1670 | if (isluafunc(rd.fn)) { /* Record call to Lua function. */ |
| 1627 | GCproto *pt = funcproto(rd.fn); | 1671 | GCproto *pt = funcproto(rd.fn); |
| @@ -1659,6 +1703,7 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) | |||
| 1659 | return 0; /* No result yet. */ | 1703 | return 0; /* No result yet. */ |
| 1660 | } else { /* Record call to C function or fast function. */ | 1704 | } else { /* Record call to C function or fast function. */ |
| 1661 | uint32_t m = 0; | 1705 | uint32_t m = 0; |
| 1706 | BCReg oldmaxslot = J->maxslot; | ||
| 1662 | res[1+nargs] = 0; | 1707 | res[1+nargs] = 0; |
| 1663 | rd.nargs = nargs; | 1708 | rd.nargs = nargs; |
| 1664 | if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0])) | 1709 | if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0])) |
| @@ -1682,10 +1727,12 @@ static int rec_call(jit_State *J, BCReg func, ptrdiff_t cres, ptrdiff_t nargs) | |||
| 1682 | rec_ret(J, func, rd.nres); | 1727 | rec_ret(J, func, rd.nres); |
| 1683 | } else if (cres == CALLRES_CONT) { | 1728 | } else if (cres == CALLRES_CONT) { |
| 1684 | /* Note: immediately resolved continuations must not change J->maxslot. */ | 1729 | /* Note: immediately resolved continuations must not change J->maxslot. */ |
| 1730 | J->maxslot = oldmaxslot; | ||
| 1731 | J->framedepth--; | ||
| 1685 | res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */ | 1732 | res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */ |
| 1686 | } else { | 1733 | } else { |
| 1687 | J->framedepth++; | ||
| 1688 | lua_assert(cres == CALLRES_PENDING); | 1734 | lua_assert(cres == CALLRES_PENDING); |
| 1735 | J->framedepth++; | ||
| 1689 | return 0; /* Pending call, no result yet. */ | 1736 | return 0; /* Pending call, no result yet. */ |
| 1690 | } | 1737 | } |
| 1691 | return 1; /* Result resolved immediately. */ | 1738 | return 1; /* Result resolved immediately. */ |
| @@ -2213,13 +2260,13 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
| 2213 | } | 2260 | } |
| 2214 | setslot: | 2261 | setslot: |
| 2215 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ | 2262 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ |
| 2216 | if ((sn & SNAP_FRAME) && s != 0) { | 2263 | if ((sn & SNAP_FRAME) && s != 0) |
| 2217 | J->baseslot = s+1; | 2264 | J->baseslot = s+1; |
| 2218 | J->framedepth++; | ||
| 2219 | } | ||
| 2220 | } | 2265 | } |
| 2221 | J->base = J->slot + J->baseslot; | 2266 | J->base = J->slot + J->baseslot; |
| 2222 | J->maxslot = snap->nslots - J->baseslot; | 2267 | J->maxslot = snap->nslots - J->baseslot; |
| 2268 | J->framedepth = snap->depth; /* Copy frames from snapshot. */ | ||
| 2269 | memcpy(J->frame, &map[nent+1], sizeof(SnapEntry)*(size_t)snap->depth); | ||
| 2223 | lj_snap_add(J); | 2270 | lj_snap_add(J); |
| 2224 | } | 2271 | } |
| 2225 | 2272 | ||
diff --git a/src/lj_snap.c b/src/lj_snap.c index 8a53e3f6..95dc77da 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
| @@ -68,49 +68,26 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
| 68 | return n; | 68 | return n; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | /* Add frame links at the end of the snapshot. */ | ||
| 72 | static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | ||
| 73 | { | ||
| 74 | cTValue *frame = J->L->base - 1; | ||
| 75 | cTValue *lim = J->L->base - J->baseslot; | ||
| 76 | MSize f = 0; | ||
| 77 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ | ||
| 78 | while (frame > lim) { /* Backwards traversal of all frames above base. */ | ||
| 79 | if (frame_islua(frame)) { | ||
| 80 | map[f++] = SNAP_MKPC(frame_pc(frame)); | ||
| 81 | frame = frame_prevl(frame); | ||
| 82 | } else if (frame_ispcall(frame)) { | ||
| 83 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | ||
| 84 | frame = frame_prevd(frame); | ||
| 85 | } else if (frame_iscont(frame)) { | ||
| 86 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | ||
| 87 | map[f++] = SNAP_MKPC(frame_contpc(frame)); | ||
| 88 | frame = frame_prevd(frame); | ||
| 89 | } else { | ||
| 90 | lua_assert(0); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | return f; | ||
| 94 | } | ||
| 95 | |||
| 96 | /* Take a snapshot of the current stack. */ | 71 | /* Take a snapshot of the current stack. */ |
| 97 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | 72 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) |
| 98 | { | 73 | { |
| 99 | BCReg nslots = J->baseslot + J->maxslot; | 74 | BCReg nslots = J->baseslot + J->maxslot; |
| 100 | MSize nent, nframelinks; | 75 | MSize nent; |
| 101 | SnapEntry *p; | 76 | SnapEntry *p; |
| 102 | /* Conservative estimate. Continuation frames need 2 slots. */ | 77 | /* Conservative estimate. */ |
| 103 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1); | 78 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); |
| 104 | p = &J->cur.snapmap[nsnapmap]; | 79 | p = &J->cur.snapmap[nsnapmap]; |
| 105 | nent = snapshot_slots(J, p, nslots); | 80 | nent = snapshot_slots(J, p, nslots); |
| 106 | nframelinks = snapshot_framelinks(J, p + nent); | ||
| 107 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks); | ||
| 108 | snap->mapofs = (uint16_t)nsnapmap; | 81 | snap->mapofs = (uint16_t)nsnapmap; |
| 109 | snap->ref = (IRRef1)J->cur.nins; | 82 | snap->ref = (IRRef1)J->cur.nins; |
| 110 | snap->nent = (uint8_t)nent; | 83 | snap->nent = (uint8_t)nent; |
| 111 | snap->nframelinks = (uint8_t)nframelinks; | 84 | snap->depth = (uint8_t)J->framedepth; |
| 112 | snap->nslots = (uint8_t)nslots; | 85 | snap->nslots = (uint8_t)nslots; |
| 113 | snap->count = 0; | 86 | snap->count = 0; |
| 87 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); | ||
| 88 | /* Add frame links at the end of the snapshot. */ | ||
| 89 | p[nent] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ | ||
| 90 | memcpy(&p[nent+1], J->frame, sizeof(SnapEntry)*(size_t)J->framedepth); | ||
| 114 | } | 91 | } |
| 115 | 92 | ||
| 116 | /* Add or merge a snapshot. */ | 93 | /* Add or merge a snapshot. */ |
| @@ -141,14 +118,14 @@ void lj_snap_shrink(jit_State *J) | |||
| 141 | lua_assert(nslots < snap->nslots); | 118 | lua_assert(nslots < snap->nslots); |
| 142 | snap->nslots = (uint8_t)nslots; | 119 | snap->nslots = (uint8_t)nslots; |
| 143 | if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { | 120 | if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { |
| 144 | MSize s, delta, nframelinks = snap->nframelinks; | 121 | MSize s, delta, depth = snap->depth; |
| 145 | for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) | 122 | for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) |
| 146 | ; | 123 | ; |
| 147 | delta = snap->nent - nent; | 124 | delta = snap->nent - nent; |
| 148 | snap->nent = (uint8_t)nent; | 125 | snap->nent = (uint8_t)nent; |
| 149 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks); | 126 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + 1 + depth); |
| 150 | map += nent; | 127 | map += nent; |
| 151 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | 128 | for (s = 0; s <= depth; s++) /* Move PC + frame links down. */ |
| 152 | map[s] = map[s+delta]; | 129 | map[s] = map[s+delta]; |
| 153 | } | 130 | } |
| 154 | } | 131 | } |
| @@ -210,7 +187,7 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 210 | SnapShot *snap = &T->snap[snapno]; | 187 | SnapShot *snap = &T->snap[snapno]; |
| 211 | MSize n, nent = snap->nent; | 188 | MSize n, nent = snap->nent; |
| 212 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 189 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 213 | SnapEntry *flinks = map + nent + snap->nframelinks; | 190 | SnapEntry *flinks = map + nent; |
| 214 | int32_t ftsz0; | 191 | int32_t ftsz0; |
| 215 | BCReg nslots = snap->nslots; | 192 | BCReg nslots = snap->nslots; |
| 216 | TValue *frame; | 193 | TValue *frame; |
| @@ -224,6 +201,7 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 224 | } | 201 | } |
| 225 | 202 | ||
| 226 | /* Fill stack slots with data from the registers and spill slots. */ | 203 | /* Fill stack slots with data from the registers and spill slots. */ |
| 204 | J->pc = snap_pc(*flinks++); | ||
| 227 | frame = L->base-1; | 205 | frame = L->base-1; |
| 228 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ | 206 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ |
| 229 | for (n = 0; n < nent; n++) { | 207 | for (n = 0; n < nent; n++) { |
| @@ -236,7 +214,7 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 236 | lj_ir_kvalue(L, o, ir); | 214 | lj_ir_kvalue(L, o, ir); |
| 237 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 215 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
| 238 | /* Overwrite tag with frame link. */ | 216 | /* Overwrite tag with frame link. */ |
| 239 | o->fr.tp.ftsz = s != 0 ? (int32_t)*--flinks : ftsz0; | 217 | o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks++ : ftsz0; |
| 240 | if ((sn & SNAP_FRAME)) { | 218 | if ((sn & SNAP_FRAME)) { |
| 241 | GCfunc *fn = ir_kfunc(ir); | 219 | GCfunc *fn = ir_kfunc(ir); |
| 242 | if (isluafunc(fn)) { | 220 | if (isluafunc(fn)) { |
| @@ -291,8 +269,7 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 291 | } | 269 | } |
| 292 | } | 270 | } |
| 293 | L->top = curr_topL(L); | 271 | L->top = curr_topL(L); |
| 294 | J->pc = snap_pc(*--flinks); | 272 | lua_assert(map + nent + 1 + snap->depth == flinks); |
| 295 | lua_assert(map + nent == flinks); | ||
| 296 | } | 273 | } |
| 297 | 274 | ||
| 298 | #undef IR | 275 | #undef IR |
