aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2013-12-25 02:55:25 +0100
committerMike Pall <mike>2013-12-25 02:55:25 +0100
commitb5d741fa7e11a2a58df65f3c71489c58f8758f75 (patch)
tree3c33ec24c8fd363ca2ce797c998b21a8a0e39a5b
parent6e02c210c485791a5451cc74731acf319b2067bb (diff)
downloadluajit-b5d741fa7e11a2a58df65f3c71489c58f8758f75.tar.gz
luajit-b5d741fa7e11a2a58df65f3c71489c58f8758f75.tar.bz2
luajit-b5d741fa7e11a2a58df65f3c71489c58f8758f75.zip
Add trace stitching.
-rw-r--r--src/lib_base.c4
-rw-r--r--src/lib_jit.c2
-rw-r--r--src/lj_dispatch.c23
-rw-r--r--src/lj_dispatch.h6
-rw-r--r--src/lj_ffrecord.c130
-rw-r--r--src/lj_jit.h4
-rw-r--r--src/lj_record.c57
-rw-r--r--src/lj_record.h1
-rw-r--r--src/lj_snap.c3
-rw-r--r--src/lj_trace.c30
-rw-r--r--src/lj_trace.h1
-rw-r--r--src/lj_traceerr.h3
-rw-r--r--src/lj_vm.h1
-rw-r--r--src/vm_arm.dasc50
-rw-r--r--src/vm_mips.dasc58
-rw-r--r--src/vm_ppc.dasc55
-rw-r--r--src/vm_x86.dasc63
17 files changed, 422 insertions, 69 deletions
diff --git a/src/lib_base.c b/src/lib_base.c
index a19926a7..495e1ab1 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -101,7 +101,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
101#endif 101#endif
102 102
103LJLIB_PUSH(lastcl) 103LJLIB_PUSH(lastcl)
104LJLIB_ASM(pairs) 104LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
105{ 105{
106 return ffh_pairs(L, MM_pairs); 106 return ffh_pairs(L, MM_pairs);
107} 107}
@@ -114,7 +114,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
114} 114}
115 115
116LJLIB_PUSH(lastcl) 116LJLIB_PUSH(lastcl)
117LJLIB_ASM(ipairs) LJLIB_REC(.) 117LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
118{ 118{
119 return ffh_pairs(L, MM_ipairs); 119 return ffh_pairs(L, MM_ipairs);
120} 120}
diff --git a/src/lib_jit.c b/src/lib_jit.c
index a87e1833..1ee04b7d 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -284,7 +284,7 @@ static GCtrace *jit_checktrace(lua_State *L)
284/* Names of link types. ORDER LJ_TRLINK */ 284/* Names of link types. ORDER LJ_TRLINK */
285static const char *const jit_trlinkname[] = { 285static const char *const jit_trlinkname[] = {
286 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", 286 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
287 "interpreter", "return" 287 "interpreter", "return", "stitch"
288}; 288};
289 289
290/* local info = jit.util.traceinfo(tr) */ 290/* local info = jit.util.traceinfo(tr) */
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 0146d8cd..b76e8048 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -42,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
42#include <math.h> 42#include <math.h>
43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, 43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
44 lua_State *co); 44 lua_State *co);
45#if !LJ_HASJIT
46#define lj_dispatch_stitch lj_dispatch_ins
47#endif
48#if !LJ_HASPROFILE
49#define lj_dispatch_profile lj_dispatch_ins
50#endif
45 51
46#define GOTFUNC(name) (ASMFunction)name, 52#define GOTFUNC(name) (ASMFunction)name,
47static const ASMFunction dispatch_got[] = { 53static const ASMFunction dispatch_got[] = {
@@ -511,6 +517,23 @@ out:
511 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ 517 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
512} 518}
513 519
520#if LJ_HASJIT
521/* Stitch a new trace. */
522void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
523{
524 ERRNO_SAVE
525 lua_State *L = J->L;
526 void *cf = cframe_raw(L->cframe);
527 const BCIns *oldpc = cframe_pc(cf);
528 setcframe_pc(cf, pc);
529 /* Before dispatch, have to bias PC by 1. */
530 L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
531 lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
532 setcframe_pc(cf, oldpc);
533 ERRNO_RESTORE
534}
535#endif
536
514#if LJ_HASPROFILE 537#if LJ_HASPROFILE
515/* Profile dispatch. */ 538/* Profile dispatch. */
516void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) 539void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 811a0ae4..447eb2d8 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -29,7 +29,8 @@
29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
31 _(pow) _(fmod) _(ldexp) \ 31 _(pow) _(fmod) _(ldexp) \
32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_profile) _(lj_err_throw)\ 32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
33 _(lj_dispatch_profile) _(lj_err_throw) \
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 34 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 35 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 36 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
@@ -110,6 +111,9 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
110/* Instruction dispatch callback for hooks or when recording. */ 111/* Instruction dispatch callback for hooks or when recording. */
111LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); 112LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
112LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); 113LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
114#if LJ_HASJIT
115LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
116#endif
113#if LJ_HASPROFILE 117#if LJ_HASPROFILE
114LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); 118LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
115#endif 119#endif
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 42aae8b5..6a156c7c 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -96,28 +96,81 @@ static ptrdiff_t results_wanted(jit_State *J)
96 return -1; 96 return -1;
97} 97}
98 98
99/* Throw error for unsupported variant of fast function. */ 99/* Trace stitching: add continuation below frame to start a new trace. */
100LJ_NORET static void recff_nyiu(jit_State *J) 100static void recff_stitch(jit_State *J)
101{ 101{
102 setfuncV(J->L, &J->errinfo, J->fn); 102 ASMFunction cont = lj_cont_stitch;
103 lj_trace_err_info(J, LJ_TRERR_NYIFFU); 103 TraceNo traceno = J->cur.traceno;
104 lua_State *L = J->L;
105 TValue *base = L->base;
106 const BCIns *pc = frame_pc(base-1);
107 TValue *pframe = frame_prevl(base-1);
108 TRef trcont;
109
110 /* Move func + args up in Lua stack and insert continuation. */
111 memmove(&base[1], &base[-1], sizeof(TValue)*(J->maxslot+1));
112 setframe_ftsz(base+1, (int)((char *)(base+1) - (char *)pframe) + FRAME_CONT);
113 setcont(base, cont);
114 setframe_pc(base, pc);
115 if (LJ_DUALNUM) setintV(base-1, traceno); else base[-1].u64 = traceno;
116 L->base += 2;
117 L->top += 2;
118
119 /* Ditto for the IR. */
120 memmove(&J->base[1], &J->base[-1], sizeof(TRef)*(J->maxslot+1));
121#if LJ_64
122 trcont = lj_ir_kptr(J, (void *)((int64_t)cont-(int64_t)lj_vm_asm_begin));
123#else
124 trcont = lj_ir_kptr(J, (void *)cont);
125#endif
126 J->base[0] = trcont | TREF_CONT;
127 J->base[-1] = LJ_DUALNUM ? lj_ir_kint(J,traceno) : lj_ir_knum_u64(J,traceno);
128 J->maxslot += 2;
129 J->framedepth++;
130
131 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
132
133 /* Undo Lua stack changes. */
134 memmove(&base[-1], &base[1], sizeof(TValue)*(J->maxslot+1));
135 setframe_pc(base-1, pc);
136 L->base -= 2;
137 L->top -= 2;
104} 138}
105 139
106/* Fallback handler for all fast functions that are not recorded (yet). */ 140/* Fallback handler for fast functions that are not recorded (yet). */
107static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) 141static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
108{ 142{
109 setfuncV(J->L, &J->errinfo, J->fn); 143 if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
110 lj_trace_err_info(J, LJ_TRERR_NYIFF); 144 lj_trace_err_info(J, LJ_TRERR_TRACEUV);
111 UNUSED(rd); 145 } else {
146 /* Can only stitch from Lua call. */
147 if (J->framedepth && frame_islua(J->L->base-1)) {
148 BCOp op = bc_op(*frame_pc(J->L->base-1));
149 /* Stitched trace cannot start with *M op with variable # of args. */
150 if (!(op == BC_CALLM || op == BC_RETM || op == BC_TSETM)) {
151 switch (J->fn->c.ffid) {
152 case FF_error:
153 case FF_debug_sethook:
154 case FF_jit_flush:
155 break; /* Don't stitch across special builtins. */
156 default:
157 recff_stitch(J); /* Use trace stitching. */
158 rd->nres = -1;
159 return;
160 }
161 }
162 }
163 /* Otherwise stop trace and return to interpreter. */
164 lj_record_stop(J, LJ_TRLINK_RETURN, 0);
165 rd->nres = -1;
166 }
112} 167}
113 168
114/* C functions can have arbitrary side-effects and are not recorded (yet). */ 169/* Fallback handler for unsupported variants of fast functions. */
115static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) 170#define recff_nyiu recff_nyi
116{ 171
117 setfuncV(J->L, &J->errinfo, J->fn); 172/* Must stop the trace for classic C functions with arbitrary side-effects. */
118 lj_trace_err_info(J, LJ_TRERR_NYICF); 173#define recff_c recff_nyi
119 UNUSED(rd);
120}
121 174
122/* Emit BUFHDR for the global temporary buffer. */ 175/* Emit BUFHDR for the global temporary buffer. */
123static TRef recff_bufhdr(jit_State *J) 176static TRef recff_bufhdr(jit_State *J)
@@ -268,7 +321,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
268 J->base[i] = J->base[start+i]; 321 J->base[i] = J->base[start+i];
269 } /* else: Interpreter will throw. */ 322 } /* else: Interpreter will throw. */
270 } else { 323 } else {
271 recff_nyiu(J); 324 recff_nyiu(J, rd);
325 return;
272 } 326 }
273 } /* else: Interpreter will throw. */ 327 } /* else: Interpreter will throw. */
274} 328}
@@ -279,14 +333,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
279 TRef base = J->base[1]; 333 TRef base = J->base[1];
280 if (tr && !tref_isnil(base)) { 334 if (tr && !tref_isnil(base)) {
281 base = lj_opt_narrow_toint(J, base); 335 base = lj_opt_narrow_toint(J, base);
282 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 336 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
283 recff_nyiu(J); 337 recff_nyiu(J, rd);
338 return;
339 }
284 } 340 }
285 if (tref_isnumber_str(tr)) { 341 if (tref_isnumber_str(tr)) {
286 if (tref_isstr(tr)) { 342 if (tref_isstr(tr)) {
287 TValue tmp; 343 TValue tmp;
288 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) 344 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
289 recff_nyiu(J); /* Would need an inverted STRTO for this case. */ 345 recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
346 return;
347 }
290 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); 348 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
291 } 349 }
292#if LJ_HASFFI 350#if LJ_HASFFI
@@ -348,7 +406,8 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
348 } else if (tref_ispri(tr)) { 406 } else if (tref_ispri(tr)) {
349 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0])); 407 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
350 } else { 408 } else {
351 recff_nyiu(J); 409 recff_nyiu(J, rd);
410 return;
352 } 411 }
353 } 412 }
354} 413}
@@ -370,14 +429,14 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
370 } /* else: Interpreter will throw. */ 429 } /* else: Interpreter will throw. */
371} 430}
372 431
373static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) 432static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
374{ 433{
375 if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) { 434 if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
376 TRef tab = J->base[0]; 435 TRef tab = J->base[0];
377 if (tref_istab(tab)) { 436 if (tref_istab(tab)) {
378 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); 437 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
379 J->base[1] = tab; 438 J->base[1] = tab;
380 J->base[2] = lj_ir_kint(J, 0); 439 J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
381 rd->nres = 3; 440 rd->nres = 3;
382 } /* else: Interpreter will throw. */ 441 } /* else: Interpreter will throw. */
383 } 442 }
@@ -431,8 +490,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
431 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV); 490 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
432 return; 491 return;
433 } 492 }
434 recff_nyiu(J); 493 recff_nyiu(J, rd);
435 UNUSED(rd);
436} 494}
437 495
438/* -- Math library fast functions ----------------------------------------- */ 496/* -- Math library fast functions ----------------------------------------- */
@@ -672,8 +730,7 @@ static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
672 TRef tr = recff_bit64_tohex(J, rd, hdr); 730 TRef tr = recff_bit64_tohex(J, rd, hdr);
673 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); 731 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
674#else 732#else
675 UNUSED(rd); 733 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
676 recff_nyiu(J); /* Don't bother working around this NYI. */
677#endif 734#endif
678} 735}
679 736
@@ -891,7 +948,8 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
891 J->base[0] = TREF_NIL; 948 J->base[0] = TREF_NIL;
892 } 949 }
893 } else { /* Search for pattern. */ 950 } else { /* Search for pattern. */
894 recff_nyiu(J); 951 recff_nyiu(J, rd);
952 return;
895 } 953 }
896} 954}
897 955
@@ -931,7 +989,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
931 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); 989 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
932 lj_needsplit(J); 990 lj_needsplit(J);
933#else 991#else
934 recff_nyiu(J); /* Don't bother working around this NYI. */ 992 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
993 return;
935#endif 994#endif
936 } 995 }
937 break; 996 break;
@@ -946,8 +1005,10 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
946 if (LJ_SOFTFP) lj_needsplit(J); 1005 if (LJ_SOFTFP) lj_needsplit(J);
947 break; 1006 break;
948 case STRFMT_STR: 1007 case STRFMT_STR:
949 if (!tref_isstr(tra)) 1008 if (!tref_isstr(tra)) {
950 recff_nyiu(J); /* NYI: __tostring and non-string types for %s. */ 1009 recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
1010 return;
1011 }
951 if (sf == STRFMT_STR) /* Shortcut for plain %s. */ 1012 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
952 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra); 1013 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, tra);
953 else if ((sf & STRFMT_T_QUOTED)) 1014 else if ((sf & STRFMT_T_QUOTED))
@@ -966,8 +1027,8 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
966 case STRFMT_PTR: /* NYI */ 1027 case STRFMT_PTR: /* NYI */
967 case STRFMT_ERR: 1028 case STRFMT_ERR:
968 default: 1029 default:
969 recff_nyiu(J); 1030 recff_nyiu(J, rd);
970 break; 1031 return;
971 } 1032 }
972 } 1033 }
973 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); 1034 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
@@ -991,7 +1052,8 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
991 ix.idxchain = 0; 1052 ix.idxchain = 0;
992 lj_record_idx(J, &ix); /* Set new value. */ 1053 lj_record_idx(J, &ix); /* Set new value. */
993 } else { /* Complex case: insert in the middle. */ 1054 } else { /* Complex case: insert in the middle. */
994 recff_nyiu(J); 1055 recff_nyiu(J, rd);
1056 return;
995 } 1057 }
996 } /* else: Interpreter will throw. */ 1058 } /* else: Interpreter will throw. */
997} 1059}
diff --git a/src/lj_jit.h b/src/lj_jit.h
index cfb04aa7..52a216cc 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -97,6 +97,7 @@
97 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 97 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
98 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 98 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
99 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 99 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
100 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
100 \ 101 \
101 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 102 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
102 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 103 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -202,7 +203,8 @@ typedef enum {
202 LJ_TRLINK_UPREC, /* Up-recursion. */ 203 LJ_TRLINK_UPREC, /* Up-recursion. */
203 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 204 LJ_TRLINK_DOWNREC, /* Down-recursion. */
204 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 205 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
205 LJ_TRLINK_RETURN /* Return to interpreter. */ 206 LJ_TRLINK_RETURN, /* Return to interpreter. */
207 LJ_TRLINK_STITCH /* Trace stitching. */
206} TraceLink; 208} TraceLink;
207 209
208/* Trace object. */ 210/* Trace object. */
diff --git a/src/lj_record.c b/src/lj_record.c
index ce9e20de..4ab474ad 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -233,7 +233,7 @@ static void canonicalize_slots(jit_State *J)
233} 233}
234 234
235/* Stop recording. */ 235/* Stop recording. */
236static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) 236void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
237{ 237{
238 lj_trace_end(J); 238 lj_trace_end(J);
239 J->cur.linktype = (uint8_t)linktype; 239 J->cur.linktype = (uint8_t)linktype;
@@ -501,8 +501,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
501static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 501static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
502{ 502{
503 BCReg ra = bc_a(iterins); 503 BCReg ra = bc_a(iterins);
504 lua_assert(J->base[ra] != 0); 504 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
505 if (!tref_isnil(J->base[ra])) { /* Looping back? */
506 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 505 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
507 J->maxslot = ra-1+bc_b(J->pc[-1]); 506 J->maxslot = ra-1+bc_b(J->pc[-1]);
508 J->pc += bc_j(iterins)+1; 507 J->pc += bc_j(iterins)+1;
@@ -540,12 +539,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
540/* Handle the case when an interpreted loop op is hit. */ 539/* Handle the case when an interpreted loop op is hit. */
541static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) 540static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
542{ 541{
543 if (J->parent == 0) { 542 if (J->parent == 0 && J->exitno == 0) {
544 if (pc == J->startpc && J->framedepth + J->retdepth == 0) { 543 if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
545 /* Same loop? */ 544 /* Same loop? */
546 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ 545 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
547 lj_trace_err(J, LJ_TRERR_LLEAVE); 546 lj_trace_err(J, LJ_TRERR_LLEAVE);
548 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ 547 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
549 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ 548 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
550 /* It's usually better to abort here and wait until the inner loop 549 /* It's usually better to abort here and wait until the inner loop
551 ** is traced. But if the inner loop repeatedly didn't loop back, 550 ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -570,15 +569,15 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
570/* Handle the case when an already compiled loop op is hit. */ 569/* Handle the case when an already compiled loop op is hit. */
571static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) 570static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
572{ 571{
573 if (J->parent == 0) { /* Root trace hit an inner loop. */ 572 if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
574 /* Better let the inner loop spawn a side trace back here. */ 573 /* Better let the inner loop spawn a side trace back here. */
575 lj_trace_err(J, LJ_TRERR_LINNER); 574 lj_trace_err(J, LJ_TRERR_LINNER);
576 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ 575 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
577 J->instunroll = 0; /* Cannot continue across a compiled loop op. */ 576 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
578 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 577 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
579 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ 578 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
580 else 579 else
581 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ 580 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
582 } /* Side trace continues across a loop that's left or not entered. */ 581 } /* Side trace continues across a loop that's left or not entered. */
583} 582}
584 583
@@ -643,6 +642,18 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
643 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ 642 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
644 return tr; 643 return tr;
645 } 644 }
645 } else {
646 /* Don't specialize to non-monomorphic builtins. */
647 switch (fn->c.ffid) {
648 case FF_coroutine_wrap_aux:
649 case FF_string_gmatch_aux:
650 /* NYI: io_file_iter doesn't have an ffid, yet. */
651 /* NYI: specialize to ffid? Not strictly necessary, trace will stop. */
652 return tr;
653 default:
654 /* NYI: don't specialize to non-monomorphic C functions. */
655 break;
656 }
646 } 657 }
647 /* Otherwise specialize to the function (closure) value itself. */ 658 /* Otherwise specialize to the function (closure) value itself. */
648 kfunc = lj_ir_kfunc(J, fn); 659 kfunc = lj_ir_kfunc(J, fn);
@@ -750,12 +761,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
750 /* Return to lower frame via interpreter for unhandled cases. */ 761 /* Return to lower frame via interpreter for unhandled cases. */
751 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && 762 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
752 (!frame_islua(frame) || 763 (!frame_islua(frame) ||
753 (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { 764 (J->parent == 0 && J->exitno == 0 &&
765 !bc_isret(bc_op(J->cur.startins))))) {
754 /* NYI: specialize to frame type and return directly, not via RET*. */ 766 /* NYI: specialize to frame type and return directly, not via RET*. */
755 for (i = -1; i < (ptrdiff_t)rbase; i++) 767 for (i = -1; i < (ptrdiff_t)rbase; i++)
756 J->base[i] = 0; /* Purge dead slots. */ 768 J->base[i] = 0; /* Purge dead slots. */
757 J->maxslot = rbase + (BCReg)gotresults; 769 J->maxslot = rbase + (BCReg)gotresults;
758 rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ 770 lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
759 return; 771 return;
760 } 772 }
761 if (frame_isvarg(frame)) { 773 if (frame_isvarg(frame)) {
@@ -779,7 +791,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
779 if (check_downrec_unroll(J, pt)) { 791 if (check_downrec_unroll(J, pt)) {
780 J->maxslot = (BCReg)(rbase + gotresults); 792 J->maxslot = (BCReg)(rbase + gotresults);
781 lj_snap_purge(J); 793 lj_snap_purge(J);
782 rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ 794 lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
783 return; 795 return;
784 } 796 }
785 lj_snap_add(J); 797 lj_snap_add(J);
@@ -792,7 +804,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
792 lua_assert(J->baseslot > cbase+1); 804 lua_assert(J->baseslot > cbase+1);
793 J->baseslot -= cbase+1; 805 J->baseslot -= cbase+1;
794 J->base -= cbase+1; 806 J->base -= cbase+1;
795 } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { 807 } else if (J->parent == 0 && J->exitno == 0 &&
808 !bc_isret(bc_op(J->cur.startins))) {
796 /* Return to lower frame would leave the loop in a root trace. */ 809 /* Return to lower frame would leave the loop in a root trace. */
797 lj_trace_err(J, LJ_TRERR_LLEAVE); 810 lj_trace_err(J, LJ_TRERR_LLEAVE);
798 } else { /* Return to lower frame. Guard for the target we return to. */ 811 } else { /* Return to lower frame. Guard for the target we return to. */
@@ -1480,9 +1493,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
1480 if (count + J->tailcalled > J->param[JIT_P_recunroll]) { 1493 if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1481 J->pc++; 1494 J->pc++;
1482 if (J->framedepth + J->retdepth == 0) 1495 if (J->framedepth + J->retdepth == 0)
1483 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ 1496 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
1484 else 1497 else
1485 rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ 1498 lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
1486 } 1499 }
1487 } else { 1500 } else {
1488 if (count > J->param[JIT_P_callunroll]) { 1501 if (count > J->param[JIT_P_callunroll]) {
@@ -1556,9 +1569,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1556 } 1569 }
1557 J->instunroll = 0; /* Cannot continue across a compiled function. */ 1570 J->instunroll = 0; /* Cannot continue across a compiled function. */
1558 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 1571 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1559 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ 1572 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
1560 else 1573 else
1561 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ 1574 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
1562} 1575}
1563 1576
1564/* -- Vararg handling ----------------------------------------------------- */ 1577/* -- Vararg handling ----------------------------------------------------- */
@@ -2165,7 +2178,7 @@ void lj_record_ins(jit_State *J)
2165 case BC_JFORI: 2178 case BC_JFORI:
2166 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); 2179 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
2167 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ 2180 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
2168 rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); 2181 lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2169 /* Continue tracing if the loop is not entered. */ 2182 /* Continue tracing if the loop is not entered. */
2170 break; 2183 break;
2171 2184
@@ -2299,6 +2312,12 @@ static const BCIns *rec_setup_root(jit_State *J)
2299 J->maxslot = J->pt->numparams; 2312 J->maxslot = J->pt->numparams;
2300 pc++; 2313 pc++;
2301 break; 2314 break;
2315 case BC_CALLM:
2316 case BC_CALL:
2317 case BC_ITERC:
2318 /* No bytecode range check for stitched traces. */
2319 pc++;
2320 break;
2302 default: 2321 default:
2303 lua_assert(0); 2322 lua_assert(0);
2304 break; 2323 break;
@@ -2366,7 +2385,7 @@ void lj_record_setup(jit_State *J)
2366 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || 2385 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2367 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + 2386 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2368 J->param[JIT_P_tryside]) { 2387 J->param[JIT_P_tryside]) {
2369 rec_stop(J, LJ_TRLINK_INTERP, 0); 2388 lj_record_stop(J, LJ_TRLINK_INTERP, 0);
2370 } 2389 }
2371 } else { /* Root trace. */ 2390 } else { /* Root trace. */
2372 J->cur.root = 0; 2391 J->cur.root = 0;
@@ -2378,6 +2397,8 @@ void lj_record_setup(jit_State *J)
2378 lj_snap_add(J); 2397 lj_snap_add(J);
2379 if (bc_op(J->cur.startins) == BC_FORL) 2398 if (bc_op(J->cur.startins) == BC_FORL)
2380 rec_for_loop(J, J->pc-1, &J->scev, 1); 2399 rec_for_loop(J, J->pc-1, &J->scev, 1);
2400 else if (bc_op(J->cur.startins) == BC_ITERC)
2401 J->startpc = NULL;
2381 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2402 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2382 lj_trace_err(J, LJ_TRERR_STACKOV); 2403 lj_trace_err(J, LJ_TRERR_STACKOV);
2383 } 2404 }
diff --git a/src/lj_record.h b/src/lj_record.h
index 287b2604..5f08c18d 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,6 +28,7 @@ typedef struct RecordIndex {
28 28
29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, 29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
30 cTValue *av, cTValue *bv); 30 cTValue *av, cTValue *bv);
31LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
31LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); 32LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
32 33
33LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); 34LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
diff --git a/src/lj_snap.c b/src/lj_snap.c
index a25e4601..0c6cd776 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -97,7 +97,8 @@ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
97{ 97{
98 cTValue *frame = J->L->base - 1; 98 cTValue *frame = J->L->base - 1;
99 cTValue *lim = J->L->base - J->baseslot; 99 cTValue *lim = J->L->base - J->baseslot;
100 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; 100 GCfunc *fn = frame_func(frame);
101 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
101 MSize f = 0; 102 MSize f = 0;
102 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 103 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
103 while (frame > lim) { /* Backwards traversal of all frames above base. */ 104 while (frame > lim) { /* Backwards traversal of all frames above base. */
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 2b8d931f..fa15e23d 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -360,7 +360,7 @@ static void trace_start(jit_State *J)
360 TraceNo traceno; 360 TraceNo traceno;
361 361
362 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ 362 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
363 if (J->parent == 0) { 363 if (J->parent == 0 && J->exitno == 0) {
364 /* Lazy bytecode patching to disable hotcount events. */ 364 /* Lazy bytecode patching to disable hotcount events. */
365 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || 365 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
366 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); 366 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
@@ -453,6 +453,12 @@ static void trace_stop(jit_State *J)
453 root->nextside = (TraceNo1)traceno; 453 root->nextside = (TraceNo1)traceno;
454 } 454 }
455 break; 455 break;
456 case BC_CALLM:
457 case BC_CALL:
458 case BC_ITERC:
459 /* Trace stitching: patch link of previous trace. */
460 traceref(J, J->exitno)->link = traceno;
461 break;
456 default: 462 default:
457 lua_assert(0); 463 lua_assert(0);
458 break; 464 break;
@@ -502,8 +508,12 @@ static int trace_abort(jit_State *J)
502 return 1; /* Retry ASM with new MCode area. */ 508 return 1; /* Retry ASM with new MCode area. */
503 } 509 }
504 /* Penalize or blacklist starting bytecode instruction. */ 510 /* Penalize or blacklist starting bytecode instruction. */
505 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) 511 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
506 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); 512 if (J->exitno == 0)
513 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
514 else
515 traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
516 }
507 517
508 /* Is there anything to abort? */ 518 /* Is there anything to abort? */
509 traceno = J->cur.traceno; 519 traceno = J->cur.traceno;
@@ -680,6 +690,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
680 } 690 }
681} 691}
682 692
693/* Stitch a new trace to the previous trace. */
694void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
695{
696 /* Only start a new trace if not recording or inside __gc call or vmevent. */
697 if (J->state == LJ_TRACE_IDLE &&
698 !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
699 J->parent = 0; /* Have to treat it like a root trace. */
700 /* J->exitno is set to the invoking trace. */
701 J->state = LJ_TRACE_START;
702 lj_trace_ins(J, pc);
703 }
704}
705
706
683/* Tiny struct to pass data to protected call. */ 707/* Tiny struct to pass data to protected call. */
684typedef struct ExitDataCP { 708typedef struct ExitDataCP {
685 jit_State *J; 709 jit_State *J;
diff --git a/src/lj_trace.h b/src/lj_trace.h
index e30d3d59..be55e9d1 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -34,6 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
34/* Event handling. */ 34/* Event handling. */
35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); 35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); 36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
37LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
37LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); 38LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
38 39
39/* Signal asynchronous abort of trace or end of trace. */ 40/* Signal asynchronous abort of trace or end of trace. */
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 8f463ca6..2546fc8f 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,6 +7,7 @@
7 7
8/* Recording. */ 8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording") 9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEUV, "trace too short")
10TREDEF(TRACEOV, "trace too long") 11TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep") 12TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots") 13TREDEF(SNAPOV, "too many snapshots")
@@ -23,8 +24,6 @@ TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "JIT compilation disabled for function") 24TREDEF(CJITOFF, "JIT compilation disabled for function")
24TREDEF(CUNROLL, "call unroll limit reached") 25TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(DOWNREC, "down-recursion, restarting") 26TREDEF(DOWNREC, "down-recursion, restarting")
26TREDEF(NYICF, "NYI: C function %p")
27TREDEF(NYIFF, "NYI: FastFunc %s")
28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 27TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
29TREDEF(NYIRETL, "NYI: return to lower frame") 28TREDEF(NYIRETL, "NYI: return to lower frame")
30 29
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 5893d0b2..4a1c2f2d 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -107,6 +107,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
107LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ 107LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
108LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ 108LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
109LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ 109LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
110LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
110 111
111enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ 112enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
112 113
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index b728d52f..559d20bd 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2082,6 +2082,55 @@ static void build_subroutines(BuildCtx *ctx)
2082 | ldr INS, [PC, #-4] 2082 | ldr INS, [PC, #-4]
2083 | bx CRET1 2083 | bx CRET1
2084 | 2084 |
2085 |->cont_stitch: // Trace stitching.
2086 |.if JIT
2087 | // RA = resultptr, CARG4 = meta base
2088 | ldr RB, SAVE_MULTRES
2089 | ldr INS, [PC, #-4]
2090 | ldr CARG3, [CARG4, #-24] // Save previous trace number.
2091 | subs RB, RB, #8
2092 | decode_RA8 RC, INS // Call base.
2093 | beq >2
2094 |1: // Move results down.
2095 | ldrd CARG12, [RA]
2096 | add RA, RA, #8
2097 | subs RB, RB, #8
2098 | strd CARG12, [BASE, RC]
2099 | add RC, RC, #8
2100 | bne <1
2101 |2:
2102 | decode_RA8 RA, INS
2103 | decode_RB8 RB, INS
2104 | add RA, RA, RB
2105 | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2106 |3:
2107 | cmp RA, RC
2108 | mvn CARG2, #~LJ_TNIL
2109 | bhi >9 // More results wanted?
2110 |
2111 | ldr TRACE:RA, [CARG1, CARG3, lsl #2]
2112 | ldrh RC, TRACE:RA->link
2113 | cmp RC, CARG3
2114 | beq ->cont_nop // Blacklisted.
2115 | cmp RC, #0
2116 | bne =>BC_JLOOP // Jump to stitched trace.
2117 |
2118 | // Stitch a new trace to the previous trace.
2119 | str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
2120 | str L, [DISPATCH, #DISPATCH_J(L)]
2121 | str BASE, L->base
2122 | sub CARG1, DISPATCH, #-GG_DISP2J
2123 | mov CARG2, PC
2124 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2125 | ldr BASE, L->base
2126 | b ->cont_nop
2127 |
2128 |9: // Fill up results with nil.
2129 | strd CARG12, [BASE, RC]
2130 | add RC, RC, #8
2131 | b <3
2132 |.endif
2133 |
2085 |->vm_profhook: // Dispatch target for profiler hook. 2134 |->vm_profhook: // Dispatch target for profiler hook.
2086#if LJ_HASPROFILE 2135#if LJ_HASPROFILE
2087 | mov CARG1, L 2136 | mov CARG1, L
@@ -2166,6 +2215,7 @@ static void build_subroutines(BuildCtx *ctx)
2166 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. 2215 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
2167 | subhs RC, RC, #8 2216 | subhs RC, RC, #8
2168 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 2217 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
2218 | ldrhs CARG3, [BASE, FRAME_FUNC]
2169 | bx OP 2219 | bx OP
2170 | 2220 |
2171 |3: // Rethrow error from the right C frame. 2221 |3: // Rethrow error from the right C frame.
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index f45a5c49..094ffe38 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -2011,6 +2011,60 @@ static void build_subroutines(BuildCtx *ctx)
2011 | jr CRET1 2011 | jr CRET1
2012 |. lw INS, -4(PC) 2012 |. lw INS, -4(PC)
2013 | 2013 |
2014 |->cont_stitch: // Trace stitching.
2015 |.if JIT
2016 | // RA = resultptr, RB = meta base
2017 | lw INS, -4(PC)
2018 | lw TMP3, -24+LO(RB) // Save previous trace number.
2019 | decode_RA8a RC, INS
2020 | addiu AT, MULTRES, -8
2021 | decode_RA8b RC
2022 | beqz AT, >2
2023 |. addu RC, BASE, RC // Call base.
2024 |1: // Move results down.
2025 | ldc1 f0, 0(RA)
2026 | addiu AT, AT, -8
2027 | addiu RA, RA, 8
2028 | sdc1 f0, 0(RC)
2029 | bnez AT, <1
2030 |. addiu RC, RC, 8
2031 |2:
2032 | decode_RA8a RA, INS
2033 | decode_RB8a RB, INS
2034 | decode_RA8b RA
2035 | decode_RB8b RB
2036 | addu RA, RA, RB
2037 | lw TMP1, DISPATCH_J(trace)(DISPATCH)
2038 | addu RA, BASE, RA
2039 |3:
2040 | sltu AT, RC, RA
2041 | bnez AT, >9 // More results wanted?
2042 |. sll TMP2, TMP3, 2
2043 |
2044 | addu TMP2, TMP1, TMP2
2045 | lw TRACE:TMP2, 0(TMP2)
2046 | lhu RD, TRACE:TMP2->link
2047 | beq RD, TMP3, ->cont_nop // Blacklisted.
2048 |. load_got lj_dispatch_stitch
2049 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2050 |. sll RD, RD, 3
2051 |
2052 | // Stitch a new trace to the previous trace.
2053 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2054 | sw L, DISPATCH_J(L)(DISPATCH)
2055 | sw BASE, L->base
2056 | addiu CARG1, DISPATCH, GG_DISP2J
2057 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2058 |. move CARG2, PC
2059 | b ->cont_nop
2060 |. lw BASE, L->base
2061 |
2062 |9:
2063 | sw TISNIL, HI(RC)
2064 | b <3
2065 |. addiu RC, RC, 8
2066 |.endif
2067 |
2014 |->vm_profhook: // Dispatch target for profiler hook. 2068 |->vm_profhook: // Dispatch target for profiler hook.
2015#if LJ_HASPROFILE 2069#if LJ_HASPROFILE
2016 | load_got lj_dispatch_profile 2070 | load_got lj_dispatch_profile
@@ -2091,13 +2145,13 @@ static void build_subroutines(BuildCtx *ctx)
2091 | sw BASE, L->base 2145 | sw BASE, L->base
2092 |1: 2146 |1:
2093 | bltz CRET1, >3 // Check for error from exit. 2147 | bltz CRET1, >3 // Check for error from exit.
2094 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2148 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2095 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2149 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2096 | sll MULTRES, CRET1, 3 2150 | sll MULTRES, CRET1, 3
2097 | li TISNIL, LJ_TNIL 2151 | li TISNIL, LJ_TNIL
2098 | sw MULTRES, SAVE_MULTRES 2152 | sw MULTRES, SAVE_MULTRES
2099 | mtc1 TMP3, TOBIT 2153 | mtc1 TMP3, TOBIT
2100 | lw TMP1, LFUNC:TMP1->pc 2154 | lw TMP1, LFUNC:RB->pc
2101 | sw r0, DISPATCH_GL(jit_base)(DISPATCH) 2155 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2102 | lw KBASE, PC2PROTO(k)(TMP1) 2156 | lw KBASE, PC2PROTO(k)(TMP1)
2103 | cvt.d.s TOBIT, TOBIT 2157 | cvt.d.s TOBIT, TOBIT
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 91de682d..c21f5c43 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -2505,6 +2505,55 @@ static void build_subroutines(BuildCtx *ctx)
2505 | mtctr CRET1 2505 | mtctr CRET1
2506 | bctr 2506 | bctr
2507 | 2507 |
2508 |->cont_stitch: // Trace stitching.
2509 |.if JIT
2510 | // RA = resultptr, RB = meta base
2511 | lwz INS, -4(PC)
2512 | lwz TMP3, -20(RB) // Save previous trace number.
2513 | addic. TMP1, MULTRES, -8
2514 | decode_RA8 RC, INS // Call base.
2515 | beq >2
2516 |1: // Move results down.
2517 | lfd f0, 0(RA)
2518 | addic. TMP1, TMP1, -8
2519 | addi RA, RA, 8
2520 | stfdx f0, BASE, RC
2521 | addi RC, RC, 8
2522 | bne <1
2523 |2:
2524 | decode_RA8 RA, INS
2525 | decode_RB8 RB, INS
2526 | add RA, RA, RB
2527 | lwz TMP1, DISPATCH_J(trace)(DISPATCH)
2528 |3:
2529 | cmplw RA, RC
2530 | bgt >9 // More results wanted?
2531 |
2532 | slwi TMP2, TMP3, 2
2533 | lwzx TRACE:TMP2, TMP1, TMP2
2534 | lhz RD, TRACE:TMP2->link
2535 | cmpw RD, TMP3
2536 | cmpwi cr1, RD, 0
2537 | beq ->cont_nop // Blacklisted.
2538 | slwi RD, RD, 3
2539 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2540 |
2541 | // Stitch a new trace to the previous trace.
2542 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2543 | stp L, DISPATCH_J(L)(DISPATCH)
2544 | stp BASE, L->base
2545 | addi CARG1, DISPATCH, GG_DISP2J
2546 | mr CARG2, PC
2547 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2548 | lp BASE, L->base
2549 | b ->cont_nop
2550 |
2551 |9:
2552 | stwx TISNIL, BASE, RC
2553 | addi RC, RC, 8
2554 | b <3
2555 |.endif
2556 |
2508 |->vm_profhook: // Dispatch target for profiler hook. 2557 |->vm_profhook: // Dispatch target for profiler hook.
2509#if LJ_HASPROFILE 2558#if LJ_HASPROFILE
2510 | mr CARG1, L 2559 | mr CARG1, L
@@ -2557,7 +2606,7 @@ static void build_subroutines(BuildCtx *ctx)
2557 | sub CARG3, TMP0, CARG3 // Compute exit number. 2606 | sub CARG3, TMP0, CARG3 // Compute exit number.
2558 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2607 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2559 | srwi CARG3, CARG3, 2 2608 | srwi CARG3, CARG3, 2
2560 | stw L, DISPATCH_J(L)(DISPATCH) 2609 | stp L, DISPATCH_J(L)(DISPATCH)
2561 | subi CARG3, CARG3, 2 2610 | subi CARG3, CARG3, 2
2562 | stp BASE, L->base 2611 | stp BASE, L->base
2563 | stw CARG4, DISPATCH_J(parent)(DISPATCH) 2612 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
@@ -2589,11 +2638,11 @@ static void build_subroutines(BuildCtx *ctx)
2589 |1: 2638 |1:
2590 | cmpwi CARG1, 0 2639 | cmpwi CARG1, 0
2591 | blt >3 // Check for error from exit. 2640 | blt >3 // Check for error from exit.
2592 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2641 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2593 | slwi MULTRES, CARG1, 3 2642 | slwi MULTRES, CARG1, 3
2594 | li TMP2, 0 2643 | li TMP2, 0
2595 | stw MULTRES, SAVE_MULTRES 2644 | stw MULTRES, SAVE_MULTRES
2596 | lwz TMP1, LFUNC:TMP1->pc 2645 | lwz TMP1, LFUNC:RB->pc
2597 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) 2646 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2598 | lwz KBASE, PC2PROTO(k)(TMP1) 2647 | lwz KBASE, PC2PROTO(k)(TMP1)
2599 | // Setup type comparison constants. 2648 | // Setup type comparison constants.
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 8ed6efd1..c2f03d80 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2659,6 +2659,67 @@ static void build_subroutines(BuildCtx *ctx)
2659 | add NARGS:RD, 1 2659 | add NARGS:RD, 1
2660 | jmp RBa 2660 | jmp RBa
2661 | 2661 |
2662 |->cont_stitch: // Trace stitching.
2663 |.if JIT
2664 | // BASE = base, RC = result, RB = mbase
2665 | mov RA, [RB-24] // Save previous trace number.
2666 | mov TMP1, RA
2667 | mov TMP3, DISPATCH // Need one more register.
2668 | mov DISPATCH, MULTRES
2669 | movzx RA, PC_RA
2670 | lea RA, [BASE+RA*8] // Call base.
2671 | sub DISPATCH, 1
2672 | jz >2
2673 |1: // Move results down.
2674 |.if X64
2675 | mov RBa, [RC]
2676 | mov [RA], RBa
2677 |.else
2678 | mov RB, [RC]
2679 | mov [RA], RB
2680 | mov RB, [RC+4]
2681 | mov [RA+4], RB
2682 |.endif
2683 | add RC, 8
2684 | add RA, 8
2685 | sub DISPATCH, 1
2686 | jnz <1
2687 |2:
2688 | movzx RC, PC_RA
2689 | movzx RB, PC_RB
2690 | add RC, RB
2691 | lea RC, [BASE+RC*8-8]
2692 |3:
2693 | cmp RC, RA
2694 | ja >9 // More results wanted?
2695 |
2696 | mov DISPATCH, TMP3
2697 | mov RB, TMP1 // Get previous trace number.
2698 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2699 | mov TRACE:RD, [RA+RB*4]
2700 | movzx RD, word TRACE:RD->link
2701 | cmp RD, RB
2702 | je ->cont_nop // Blacklisted.
2703 | test RD, RD
2704 | jne =>BC_JLOOP // Jump to stitched trace.
2705 |
2706 | // Stitch a new trace to the previous trace.
2707 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2708 | mov L:RB, SAVE_L
2709 | mov L:RB->base, BASE
2710 | mov FCARG2, PC
2711 | lea FCARG1, [DISPATCH+GG_DISP2J]
2712 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2713 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2714 | mov BASE, L:RB->base
2715 | jmp ->cont_nop
2716 |
2717 |9: // Fill up results with nil.
2718 | mov dword [RA+4], LJ_TNIL
2719 | add RA, 8
2720 | jmp <3
2721 |.endif
2722 |
2662 |->vm_profhook: // Dispatch target for profiler hook. 2723 |->vm_profhook: // Dispatch target for profiler hook.
2663#if LJ_HASPROFILE 2724#if LJ_HASPROFILE
2664 | mov L:RB, SAVE_L 2725 | mov L:RB, SAVE_L
@@ -5382,7 +5443,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5382 | ins_A // RA = base, RD = target (loop extent) 5443 | ins_A // RA = base, RD = target (loop extent)
5383 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5444 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5384 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5445 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5385 |.if JIT 5446 |.if JIT
5386 | hotloop RB 5447 | hotloop RB
5387 |.endif 5448 |.endif
5388 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5449 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.