diff options
-rw-r--r-- | src/jit/dump.lua | 17 | ||||
-rw-r--r-- | src/lib_base.c | 2 | ||||
-rw-r--r-- | src/lj_asm.c | 12 | ||||
-rw-r--r-- | src/lj_asm_arm.h | 2 | ||||
-rw-r--r-- | src/lj_asm_arm64.h | 9 | ||||
-rw-r--r-- | src/lj_asm_mips.h | 2 | ||||
-rw-r--r-- | src/lj_asm_ppc.h | 5 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 13 | ||||
-rw-r--r-- | src/lj_dispatch.c | 8 | ||||
-rw-r--r-- | src/lj_ffrecord.c | 34 | ||||
-rw-r--r-- | src/lj_ir.h | 2 | ||||
-rw-r--r-- | src/lj_ircall.h | 2 | ||||
-rw-r--r-- | src/lj_jit.h | 6 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 9 | ||||
-rw-r--r-- | src/lj_opt_mem.c | 5 | ||||
-rw-r--r-- | src/lj_record.c | 130 | ||||
-rw-r--r-- | src/lj_record.h | 1 | ||||
-rw-r--r-- | src/lj_snap.c | 10 | ||||
-rw-r--r-- | src/lj_trace.c | 22 | ||||
-rw-r--r-- | src/lj_vm.h | 2 | ||||
-rw-r--r-- | src/vm_arm.dasc | 79 | ||||
-rw-r--r-- | src/vm_arm64.dasc | 79 | ||||
-rw-r--r-- | src/vm_mips.dasc | 97 | ||||
-rw-r--r-- | src/vm_mips64.dasc | 92 | ||||
-rw-r--r-- | src/vm_ppc.dasc | 9 | ||||
-rw-r--r-- | src/vm_x64.dasc | 80 | ||||
-rw-r--r-- | src/vm_x86.dasc | 99 |
27 files changed, 781 insertions, 47 deletions
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 5fb1e144..9eda08c4 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
@@ -219,8 +219,10 @@ local function colorize_text(s) | |||
219 | return s | 219 | return s |
220 | end | 220 | end |
221 | 221 | ||
222 | local function colorize_ansi(s, t) | 222 | local function colorize_ansi(s, t, extra) |
223 | return format(colortype_ansi[t], s) | 223 | local out = format(colortype_ansi[t], s) |
224 | if extra then out = "\027[3m"..out end | ||
225 | return out | ||
224 | end | 226 | end |
225 | 227 | ||
226 | local irtype_ansi = setmetatable({}, | 228 | local irtype_ansi = setmetatable({}, |
@@ -229,9 +231,10 @@ local irtype_ansi = setmetatable({}, | |||
229 | 231 | ||
230 | local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } | 232 | local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } |
231 | 233 | ||
232 | local function colorize_html(s, t) | 234 | local function colorize_html(s, t, extra) |
233 | s = gsub(s, "[<>&]", html_escape) | 235 | s = gsub(s, "[<>&]", html_escape) |
234 | return format('<span class="irt_%s">%s</span>', irtype_text[t], s) | 236 | return format('<span class="irt_%s%s">%s</span>', |
237 | irtype_text[t], extra and " irt_extra" or "", s) | ||
235 | end | 238 | end |
236 | 239 | ||
237 | local irtype_html = setmetatable({}, | 240 | local irtype_html = setmetatable({}, |
@@ -256,6 +259,7 @@ span.irt_tab { color: #c00000; } | |||
256 | span.irt_udt, span.irt_lud { color: #00c0c0; } | 259 | span.irt_udt, span.irt_lud { color: #00c0c0; } |
257 | span.irt_num { color: #4040c0; } | 260 | span.irt_num { color: #4040c0; } |
258 | span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } | 261 | span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } |
262 | span.irt_extra { font-style: italic; } | ||
259 | </style> | 263 | </style> |
260 | ]] | 264 | ]] |
261 | 265 | ||
@@ -271,6 +275,7 @@ local litname = { | |||
271 | if band(mode, 8) ~= 0 then s = s.."C" end | 275 | if band(mode, 8) ~= 0 then s = s.."C" end |
272 | if band(mode, 16) ~= 0 then s = s.."R" end | 276 | if band(mode, 16) ~= 0 then s = s.."R" end |
273 | if band(mode, 32) ~= 0 then s = s.."I" end | 277 | if band(mode, 32) ~= 0 then s = s.."I" end |
278 | if band(mode, 64) ~= 0 then s = s.."K" end | ||
274 | t[mode] = s | 279 | t[mode] = s |
275 | return s | 280 | return s |
276 | end}), | 281 | end}), |
@@ -350,7 +355,7 @@ local function formatk(tr, idx, sn) | |||
350 | else | 355 | else |
351 | s = tostring(k) -- For primitives. | 356 | s = tostring(k) -- For primitives. |
352 | end | 357 | end |
353 | s = colorize(format("%-4s", s), t) | 358 | s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0) |
354 | if slot then | 359 | if slot then |
355 | s = format("%s @%d", s, slot) | 360 | s = format("%s @%d", s, slot) |
356 | end | 361 | end |
@@ -370,7 +375,7 @@ local function printsnap(tr, snap) | |||
370 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) | 375 | out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) |
371 | else | 376 | else |
372 | local m, ot, op1, op2 = traceir(tr, ref) | 377 | local m, ot, op1, op2 = traceir(tr, ref) |
373 | out:write(colorize(format("%04d", ref), band(ot, 31))) | 378 | out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0)) |
374 | end | 379 | end |
375 | out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME | 380 | out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME |
376 | else | 381 | else |
diff --git a/src/lib_base.c b/src/lib_base.c index f16c66f5..55e3c6b8 100644 --- a/src/lib_base.c +++ b/src/lib_base.c | |||
@@ -76,7 +76,7 @@ LJLIB_ASM_(type) LJLIB_REC(.) | |||
76 | /* This solves a circular dependency problem -- change FF_next_N as needed. */ | 76 | /* This solves a circular dependency problem -- change FF_next_N as needed. */ |
77 | LJ_STATIC_ASSERT((int)FF_next == FF_next_N); | 77 | LJ_STATIC_ASSERT((int)FF_next == FF_next_N); |
78 | 78 | ||
79 | LJLIB_ASM(next) | 79 | LJLIB_ASM(next) LJLIB_REC(.) |
80 | { | 80 | { |
81 | lj_lib_checktab(L, 1); | 81 | lj_lib_checktab(L, 1); |
82 | lj_err_msg(L, LJ_ERR_NEXTIDX); | 82 | lj_err_msg(L, LJ_ERR_NEXTIDX); |
diff --git a/src/lj_asm.c b/src/lj_asm.c index d377eb4d..cc788407 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -2225,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as) | |||
2225 | as->modset |= RSET_SCRATCH; | 2225 | as->modset |= RSET_SCRATCH; |
2226 | continue; | 2226 | continue; |
2227 | } | 2227 | } |
2228 | case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { | 2228 | case IR_CALLL: |
2229 | /* lj_vm_next needs two TValues on the stack. */ | ||
2230 | #if LJ_TARGET_X64 && LJ_ABI_WIN | ||
2231 | if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) | ||
2232 | as->evenspill = SPS_FIRST + 4; | ||
2233 | #else | ||
2234 | if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) | ||
2235 | as->evenspill = 4; | ||
2236 | #endif | ||
2237 | /* fallthrough */ | ||
2238 | case IR_CALLN: case IR_CALLA: case IR_CALLS: { | ||
2229 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2239 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
2230 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2240 | ir->prev = asm_setup_call_slots(as, ir, ci); |
2231 | if (inloop) | 2241 | if (inloop) |
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index e53f9b08..cc608c0d 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -2064,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2064 | } else if ((sn & SNAP_SOFTFPNUM)) { | 2064 | } else if ((sn & SNAP_SOFTFPNUM)) { |
2065 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); | 2065 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); |
2066 | #endif | 2066 | #endif |
2067 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2068 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); | ||
2067 | } else { | 2069 | } else { |
2068 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); | 2070 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); |
2069 | } | 2071 | } |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 3cedd021..5decfff4 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -1814,7 +1814,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
1814 | IRIns *ir = IR(ref); | 1814 | IRIns *ir = IR(ref); |
1815 | if ((sn & SNAP_NORESTORE)) | 1815 | if ((sn & SNAP_NORESTORE)) |
1816 | continue; | 1816 | continue; |
1817 | if (irt_isnum(ir->t)) { | 1817 | if ((sn & SNAP_KEYINDEX)) { |
1818 | RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | ||
1819 | Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) : | ||
1820 | ra_alloc1(as, ref, allow); | ||
1821 | rset_clear(allow, r); | ||
1822 | emit_lso(as, A64I_STRw, r, RID_BASE, ofs); | ||
1823 | emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4); | ||
1824 | } else if (irt_isnum(ir->t)) { | ||
1818 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 1825 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
1819 | emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); | 1826 | emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); |
1820 | } else { | 1827 | } else { |
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 7f7dc6a0..ba05f193 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
@@ -2568,6 +2568,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2568 | } else if ((sn & SNAP_SOFTFPNUM)) { | 2568 | } else if ((sn & SNAP_SOFTFPNUM)) { |
2569 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | 2569 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); |
2570 | #endif | 2570 | #endif |
2571 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2572 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); | ||
2571 | } else { | 2573 | } else { |
2572 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2574 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
2573 | } | 2575 | } |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index f99561b3..ac5d88ce 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -1103,7 +1103,8 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1103 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | 1103 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), |
1104 | "inconsistent SLOAD variant"); | 1104 | "inconsistent SLOAD variant"); |
1105 | lj_assertA(LJ_DUALNUM || | 1105 | lj_assertA(LJ_DUALNUM || |
1106 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)), | 1106 | !irt_isint(t) || |
1107 | (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), | ||
1107 | "bad SLOAD type"); | 1108 | "bad SLOAD type"); |
1108 | #if LJ_SOFTFP | 1109 | #if LJ_SOFTFP |
1109 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), | 1110 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), |
@@ -2096,6 +2097,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2096 | } else if ((sn & SNAP_SOFTFPNUM)) { | 2097 | } else if ((sn & SNAP_SOFTFPNUM)) { |
2097 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | 2098 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); |
2098 | #endif | 2099 | #endif |
2100 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2101 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); | ||
2099 | } else { | 2102 | } else { |
2100 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | 2103 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); |
2101 | } | 2104 | } |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 48c31fe3..5eb18365 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -1700,7 +1700,8 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1700 | lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), | 1700 | lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), |
1701 | "inconsistent SLOAD variant"); | 1701 | "inconsistent SLOAD variant"); |
1702 | lj_assertA(LJ_DUALNUM || | 1702 | lj_assertA(LJ_DUALNUM || |
1703 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)), | 1703 | !irt_isint(t) || |
1704 | (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), | ||
1704 | "bad SLOAD type"); | 1705 | "bad SLOAD type"); |
1705 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | 1706 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { |
1706 | Reg left = ra_scratch(as, RSET_FPR); | 1707 | Reg left = ra_scratch(as, RSET_FPR); |
@@ -2727,7 +2728,15 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2727 | IRIns *ir = IR(ref); | 2728 | IRIns *ir = IR(ref); |
2728 | if ((sn & SNAP_NORESTORE)) | 2729 | if ((sn & SNAP_NORESTORE)) |
2729 | continue; | 2730 | continue; |
2730 | if (irt_isnum(ir->t)) { | 2731 | if ((sn & SNAP_KEYINDEX)) { |
2732 | emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX); | ||
2733 | if (irref_isk(ref)) { | ||
2734 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
2735 | } else { | ||
2736 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
2737 | emit_movtomro(as, src, RID_BASE, ofs); | ||
2738 | } | ||
2739 | } else if (irt_isnum(ir->t)) { | ||
2731 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 2740 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
2732 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 2741 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
2733 | } else { | 2742 | } else { |
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index bf8d8812..7b73d3dd 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c | |||
@@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG) | |||
68 | /* The JIT engine is off by default. luaopen_jit() turns it on. */ | 68 | /* The JIT engine is off by default. luaopen_jit() turns it on. */ |
69 | disp[BC_FORL] = disp[BC_IFORL]; | 69 | disp[BC_FORL] = disp[BC_IFORL]; |
70 | disp[BC_ITERL] = disp[BC_IITERL]; | 70 | disp[BC_ITERL] = disp[BC_IITERL]; |
71 | /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */ | ||
72 | disp[BC_ITERN] = &lj_vm_IITERN; | ||
71 | disp[BC_LOOP] = disp[BC_ILOOP]; | 73 | disp[BC_LOOP] = disp[BC_ILOOP]; |
72 | disp[BC_FUNCF] = disp[BC_IFUNCF]; | 74 | disp[BC_FUNCF] = disp[BC_IFUNCF]; |
73 | disp[BC_FUNCV] = disp[BC_IFUNCV]; | 75 | disp[BC_FUNCV] = disp[BC_IFUNCV]; |
@@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g) | |||
118 | mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; | 120 | mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; |
119 | if (oldmode != mode) { /* Mode changed? */ | 121 | if (oldmode != mode) { /* Mode changed? */ |
120 | ASMFunction *disp = G2GG(g)->dispatch; | 122 | ASMFunction *disp = G2GG(g)->dispatch; |
121 | ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; | 123 | ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv; |
122 | g->dispatchmode = mode; | 124 | g->dispatchmode = mode; |
123 | 125 | ||
124 | /* Hotcount if JIT is on, but not while recording. */ | 126 | /* Hotcount if JIT is on, but not while recording. */ |
125 | if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { | 127 | if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { |
126 | f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); | 128 | f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); |
127 | f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); | 129 | f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); |
130 | f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]); | ||
128 | f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); | 131 | f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); |
129 | f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); | 132 | f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); |
130 | f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); | 133 | f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); |
131 | } else { /* Otherwise use the non-hotcounting instructions. */ | 134 | } else { /* Otherwise use the non-hotcounting instructions. */ |
132 | f_forl = disp[GG_LEN_DDISP+BC_IFORL]; | 135 | f_forl = disp[GG_LEN_DDISP+BC_IFORL]; |
133 | f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; | 136 | f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; |
137 | f_itern = &lj_vm_IITERN; | ||
134 | f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; | 138 | f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; |
135 | f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); | 139 | f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); |
136 | f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); | 140 | f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); |
@@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g) | |||
138 | /* Init static counting instruction dispatch first (may be copied below). */ | 142 | /* Init static counting instruction dispatch first (may be copied below). */ |
139 | disp[GG_LEN_DDISP+BC_FORL] = f_forl; | 143 | disp[GG_LEN_DDISP+BC_FORL] = f_forl; |
140 | disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; | 144 | disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; |
145 | disp[GG_LEN_DDISP+BC_ITERN] = f_itern; | ||
141 | disp[GG_LEN_DDISP+BC_LOOP] = f_loop; | 146 | disp[GG_LEN_DDISP+BC_LOOP] = f_loop; |
142 | 147 | ||
143 | /* Set dynamic instruction dispatch. */ | 148 | /* Set dynamic instruction dispatch. */ |
@@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g) | |||
165 | /* Otherwise set dynamic counting ins. */ | 170 | /* Otherwise set dynamic counting ins. */ |
166 | disp[BC_FORL] = f_forl; | 171 | disp[BC_FORL] = f_forl; |
167 | disp[BC_ITERL] = f_iterl; | 172 | disp[BC_ITERL] = f_iterl; |
173 | disp[BC_ITERN] = f_itern; | ||
168 | disp[BC_LOOP] = f_loop; | 174 | disp[BC_LOOP] = f_loop; |
169 | /* Set dynamic return dispatch. */ | 175 | /* Set dynamic return dispatch. */ |
170 | if ((mode & DISPMODE_RET)) { | 176 | if ((mode & DISPMODE_RET)) { |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 24432d84..01e53fb6 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -521,6 +521,40 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) | |||
521 | recff_nyiu(J, rd); | 521 | recff_nyiu(J, rd); |
522 | } | 522 | } |
523 | 523 | ||
524 | static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd) | ||
525 | { | ||
526 | #if LJ_BE | ||
527 | /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, | ||
528 | ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. | ||
529 | */ | ||
530 | recff_nyi(J, rd); | ||
531 | #else | ||
532 | TRef tab = J->base[0]; | ||
533 | if (tref_istab(tab)) { | ||
534 | RecordIndex ix; | ||
535 | cTValue *keyv; | ||
536 | ix.tab = tab; | ||
537 | if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */ | ||
538 | ix.key = lj_ir_kint(J, 0); | ||
539 | keyv = niltvg(J2G(J)); | ||
540 | } else { | ||
541 | TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); | ||
542 | ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp); | ||
543 | keyv = &rd->argv[1]; | ||
544 | } | ||
545 | copyTV(J->L, &ix.tabv, &rd->argv[0]); | ||
546 | ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv); | ||
547 | /* Omit the value, if not used by the caller. */ | ||
548 | ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) && | ||
549 | bc_b(frame_pc(J->L->base-1)[-1]) <= 2); | ||
550 | ix.mobj = 0; /* We don't need the next index. */ | ||
551 | rd->nres = lj_record_next(J, &ix); | ||
552 | J->base[0] = ix.key; | ||
553 | J->base[1] = ix.val; | ||
554 | } /* else: Interpreter will throw. */ | ||
555 | #endif | ||
556 | } | ||
557 | |||
524 | /* -- Math library fast functions ----------------------------------------- */ | 558 | /* -- Math library fast functions ----------------------------------------- */ |
525 | 559 | ||
526 | static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) | 560 | static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 6a161933..2b127f6c 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -236,6 +236,7 @@ IRFLDEF(FLENUM) | |||
236 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ | 236 | #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ |
237 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ | 237 | #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ |
238 | #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ | 238 | #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ |
239 | #define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */ | ||
239 | 240 | ||
240 | /* XLOAD mode bits, stored in op2. */ | 241 | /* XLOAD mode bits, stored in op2. */ |
241 | #define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ | 242 | #define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ |
@@ -495,6 +496,7 @@ typedef uint32_t TRef; | |||
495 | #define TREF_REFMASK 0x0000ffff | 496 | #define TREF_REFMASK 0x0000ffff |
496 | #define TREF_FRAME 0x00010000 | 497 | #define TREF_FRAME 0x00010000 |
497 | #define TREF_CONT 0x00020000 | 498 | #define TREF_CONT 0x00020000 |
499 | #define TREF_KEYINDEX 0x00100000 | ||
498 | 500 | ||
499 | #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) | 501 | #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) |
500 | 502 | ||
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index c837b18d..9e7013ba 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -187,6 +187,8 @@ typedef struct CCallInfo { | |||
187 | _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ | 187 | _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ |
188 | _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ | 188 | _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ |
189 | _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ | 189 | _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ |
190 | _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ | ||
191 | _(ANY, lj_vm_next, 2, FL, PTR, 0) \ | ||
190 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ | 192 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ |
191 | _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ | 193 | _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ |
192 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ | 194 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 34ddf907..c9fe8319 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -150,6 +150,7 @@ typedef enum { | |||
150 | LJ_TRACE_IDLE, /* Trace compiler idle. */ | 150 | LJ_TRACE_IDLE, /* Trace compiler idle. */ |
151 | LJ_TRACE_ACTIVE = 0x10, | 151 | LJ_TRACE_ACTIVE = 0x10, |
152 | LJ_TRACE_RECORD, /* Bytecode recording active. */ | 152 | LJ_TRACE_RECORD, /* Bytecode recording active. */ |
153 | LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */ | ||
153 | LJ_TRACE_START, /* New trace started. */ | 154 | LJ_TRACE_START, /* New trace started. */ |
154 | LJ_TRACE_END, /* End of trace. */ | 155 | LJ_TRACE_END, /* End of trace. */ |
155 | LJ_TRACE_ASM, /* Assemble trace. */ | 156 | LJ_TRACE_ASM, /* Assemble trace. */ |
@@ -200,12 +201,15 @@ typedef uint32_t SnapEntry; | |||
200 | #define SNAP_CONT 0x020000 /* Continuation slot. */ | 201 | #define SNAP_CONT 0x020000 /* Continuation slot. */ |
201 | #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ | 202 | #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ |
202 | #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ | 203 | #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ |
204 | #define SNAP_KEYINDEX 0x100000 /* Traversal key index. */ | ||
203 | LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); | 205 | LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); |
204 | LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); | 206 | LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); |
207 | LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX); | ||
205 | 208 | ||
206 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) | 209 | #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) |
207 | #define SNAP_TR(slot, tr) \ | 210 | #define SNAP_TR(slot, tr) \ |
208 | (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) | 211 | (((SnapEntry)(slot) << 24) + \ |
212 | ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK))) | ||
209 | #if !LJ_FR2 | 213 | #if !LJ_FR2 |
210 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | 214 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) |
211 | #endif | 215 | #endif |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 41e0d1ca..2f903e27 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -2320,6 +2320,15 @@ LJFOLDF(fload_sbuf) | |||
2320 | return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; | 2320 | return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; |
2321 | } | 2321 | } |
2322 | 2322 | ||
2323 | /* The fast function ID of function objects is immutable. */ | ||
2324 | LJFOLD(FLOAD KGC IRFL_FUNC_FFID) | ||
2325 | LJFOLDF(fload_func_ffid_kgc) | ||
2326 | { | ||
2327 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
2328 | return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); | ||
2329 | return NEXTFOLD; | ||
2330 | } | ||
2331 | |||
2323 | /* The C type ID of cdata objects is immutable. */ | 2332 | /* The C type ID of cdata objects is immutable. */ |
2324 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) | 2333 | LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) |
2325 | LJFOLDF(fload_cdata_typeid_kgc) | 2334 | LJFOLDF(fload_cdata_typeid_kgc) |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 81184f14..d6a419e4 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -364,7 +364,10 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) | |||
364 | /* Different value: try to eliminate the redundant store. */ | 364 | /* Different value: try to eliminate the redundant store. */ |
365 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ | 365 | if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ |
366 | IRIns *ir; | 366 | IRIns *ir; |
367 | /* Check for any intervening guards (includes conflicting loads). */ | 367 | /* Check for any intervening guards (includes conflicting loads). |
368 | ** Note that lj_tab_keyindex and lj_vm_next don't need guards, | ||
369 | ** since they are followed by at least one guarded VLOAD. | ||
370 | */ | ||
368 | for (ir = IR(J->cur.nins-1); ir > store; ir--) | 371 | for (ir = IR(J->cur.nins-1); ir > store; ir--) |
369 | if (irt_isguard(ir->t) || ir->o == IR_ALEN) | 372 | if (irt_isguard(ir->t) || ir->o == IR_ALEN) |
370 | goto doemit; /* No elimination possible. */ | 373 | goto doemit; /* No elimination possible. */ |
diff --git a/src/lj_record.c b/src/lj_record.c index a1471aae..e51c98ba 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -156,6 +156,9 @@ static void rec_check_slots(jit_State *J) | |||
156 | lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), | 156 | lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), |
157 | "cont slot %d not followed by frame", s); | 157 | "cont slot %d not followed by frame", s); |
158 | depth++; | 158 | depth++; |
159 | } else if ((tr & TREF_KEYINDEX)) { | ||
160 | lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", | ||
161 | s, tref_type(tr)); | ||
159 | } else { | 162 | } else { |
160 | /* Number repr. may differ, but other types must be the same. */ | 163 | /* Number repr. may differ, but other types must be the same. */ |
161 | lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : | 164 | lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : |
@@ -283,9 +286,9 @@ static void canonicalize_slots(jit_State *J) | |||
283 | if (LJ_DUALNUM) return; | 286 | if (LJ_DUALNUM) return; |
284 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | 287 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { |
285 | TRef tr = J->slot[s]; | 288 | TRef tr = J->slot[s]; |
286 | if (tref_isinteger(tr)) { | 289 | if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { |
287 | IRIns *ir = IR(tref_ref(tr)); | 290 | IRIns *ir = IR(tref_ref(tr)); |
288 | if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) | 291 | if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) |
289 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); | 292 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); |
290 | } | 293 | } |
291 | } | 294 | } |
@@ -606,6 +609,7 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | |||
606 | { | 609 | { |
607 | if (J->parent == 0 && J->exitno == 0) { | 610 | if (J->parent == 0 && J->exitno == 0) { |
608 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { | 611 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { |
612 | if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ | ||
609 | /* Same loop? */ | 613 | /* Same loop? */ |
610 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ | 614 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ |
611 | lj_trace_err(J, LJ_TRERR_LLEAVE); | 615 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
@@ -646,6 +650,68 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | |||
646 | } /* Side trace continues across a loop that's left or not entered. */ | 650 | } /* Side trace continues across a loop that's left or not entered. */ |
647 | } | 651 | } |
648 | 652 | ||
653 | /* Record ITERN. */ | ||
654 | static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) | ||
655 | { | ||
656 | #if LJ_BE | ||
657 | /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, | ||
658 | ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. | ||
659 | */ | ||
660 | UNUSED(ra); UNUSED(rb); | ||
661 | setintV(&J->errinfo, (int32_t)BC_ITERN); | ||
662 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | ||
663 | #else | ||
664 | RecordIndex ix; | ||
665 | /* Since ITERN is recorded at the start, we need our own loop detection. */ | ||
666 | if (J->pc == J->startpc && J->cur.nins > REF_FIRST && | ||
667 | J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { | ||
668 | lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ | ||
669 | return LOOPEV_ENTER; | ||
670 | } | ||
671 | J->maxslot = ra; | ||
672 | lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ | ||
673 | ix.tab = getslot(J, ra-2); | ||
674 | ix.key = J->base[ra-1] ? J->base[ra-1] : | ||
675 | sloadt(J, (int32_t)(ra-1), IRT_INT, IRSLOAD_KEYINDEX); | ||
676 | copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); | ||
677 | copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); | ||
678 | ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ | ||
679 | ix.mobj = 1; /* We need the next index, too. */ | ||
680 | J->maxslot = ra + lj_record_next(J, &ix); | ||
681 | J->needsnap = 1; | ||
682 | if (!tref_isnil(ix.key)) { /* Looping back? */ | ||
683 | J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ | ||
684 | J->base[ra] = ix.key; | ||
685 | J->base[ra+1] = ix.val; | ||
686 | J->pc += bc_j(J->pc[1])+2; | ||
687 | return LOOPEV_ENTER; | ||
688 | } else { | ||
689 | J->maxslot = ra-3; | ||
690 | J->pc += 2; | ||
691 | return LOOPEV_LEAVE; | ||
692 | } | ||
693 | #endif | ||
694 | } | ||
695 | |||
696 | /* Record ISNEXT. */ | ||
697 | static void rec_isnext(jit_State *J, BCReg ra) | ||
698 | { | ||
699 | cTValue *b = &J->L->base[ra-3]; | ||
700 | if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && | ||
701 | tvistab(b+1) && tvisnil(b+2)) { | ||
702 | /* These checks are folded away for a compiled pairs(). */ | ||
703 | TRef func = getslot(J, ra-3); | ||
704 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); | ||
705 | emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); | ||
706 | (void)getslot(J, ra-2); /* Type check for table. */ | ||
707 | (void)getslot(J, ra-1); /* Type check for nil key. */ | ||
708 | J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; | ||
709 | J->maxslot = ra; | ||
710 | } else { /* Abort trace. Interpreter will despecialize bytecode. */ | ||
711 | lj_trace_err(J, LJ_TRERR_RECERR); | ||
712 | } | ||
713 | } | ||
714 | |||
649 | /* -- Record profiler hook checks ----------------------------------------- */ | 715 | /* -- Record profiler hook checks ----------------------------------------- */ |
650 | 716 | ||
651 | #if LJ_HASPROFILE | 717 | #if LJ_HASPROFILE |
@@ -716,7 +782,7 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) | |||
716 | /* NYI: io_file_iter doesn't have an ffid, yet. */ | 782 | /* NYI: io_file_iter doesn't have an ffid, yet. */ |
717 | { /* Specialize to the ffid. */ | 783 | { /* Specialize to the ffid. */ |
718 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); | 784 | TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); |
719 | emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid)); | 785 | emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); |
720 | } | 786 | } |
721 | return tr; | 787 | return tr; |
722 | default: | 788 | default: |
@@ -1565,6 +1631,47 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1565 | } | 1631 | } |
1566 | } | 1632 | } |
1567 | 1633 | ||
1634 | /* Determine result type of table traversal. */ | ||
1635 | static IRType rec_next_types(GCtab *t, uint32_t idx) | ||
1636 | { | ||
1637 | for (; idx < t->asize; idx++) { | ||
1638 | cTValue *a = arrayslot(t, idx); | ||
1639 | if (LJ_LIKELY(!tvisnil(a))) | ||
1640 | return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); | ||
1641 | } | ||
1642 | idx -= t->asize; | ||
1643 | for (; idx <= t->hmask; idx++) { | ||
1644 | Node *n = &noderef(t->node)[idx]; | ||
1645 | if (!tvisnil(&n->val)) | ||
1646 | return itype2irt(&n->key) + (itype2irt(&n->val) << 8); | ||
1647 | } | ||
1648 | return IRT_NIL + (IRT_NIL << 8); | ||
1649 | } | ||
1650 | |||
1651 | /* Record a table traversal step aka next(). */ | ||
1652 | int lj_record_next(jit_State *J, RecordIndex *ix) | ||
1653 | { | ||
1654 | IRType t, tkey, tval; | ||
1655 | TRef trvk; | ||
1656 | t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); | ||
1657 | tkey = (t & 0xff); tval = (t >> 8); | ||
1658 | trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); | ||
1659 | if (ix->mobj || tkey == IRT_NIL) { | ||
1660 | TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); | ||
1661 | /* Always check for invalid key from next() for nil result. */ | ||
1662 | if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); | ||
1663 | ix->mobj = idx; | ||
1664 | } | ||
1665 | ix->key = lj_record_vload(J, trvk, 1, tkey); | ||
1666 | if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ | ||
1667 | ix->val = TREF_NIL; | ||
1668 | return 1; | ||
1669 | } else { /* Need value. */ | ||
1670 | ix->val = lj_record_vload(J, trvk, 0, tval); | ||
1671 | return 2; | ||
1672 | } | ||
1673 | } | ||
1674 | |||
1568 | static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) | 1675 | static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) |
1569 | { | 1676 | { |
1570 | RecordIndex ix; | 1677 | RecordIndex ix; |
@@ -2440,6 +2547,9 @@ void lj_record_ins(jit_State *J) | |||
2440 | case BC_ITERL: | 2547 | case BC_ITERL: |
2441 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); | 2548 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); |
2442 | break; | 2549 | break; |
2550 | case BC_ITERN: | ||
2551 | rec_loop_interp(J, pc, rec_itern(J, ra, rb)); | ||
2552 | break; | ||
2443 | case BC_LOOP: | 2553 | case BC_LOOP: |
2444 | rec_loop_interp(J, pc, rec_loop(J, ra, 1)); | 2554 | rec_loop_interp(J, pc, rec_loop(J, ra, 1)); |
2445 | break; | 2555 | break; |
@@ -2468,6 +2578,10 @@ void lj_record_ins(jit_State *J) | |||
2468 | J->maxslot = ra; /* Shrink used slots. */ | 2578 | J->maxslot = ra; /* Shrink used slots. */ |
2469 | break; | 2579 | break; |
2470 | 2580 | ||
2581 | case BC_ISNEXT: | ||
2582 | rec_isnext(J, ra); | ||
2583 | break; | ||
2584 | |||
2471 | /* -- Function headers -------------------------------------------------- */ | 2585 | /* -- Function headers -------------------------------------------------- */ |
2472 | 2586 | ||
2473 | case BC_FUNCF: | 2587 | case BC_FUNCF: |
@@ -2497,8 +2611,6 @@ void lj_record_ins(jit_State *J) | |||
2497 | break; | 2611 | break; |
2498 | } | 2612 | } |
2499 | /* fallthrough */ | 2613 | /* fallthrough */ |
2500 | case BC_ITERN: | ||
2501 | case BC_ISNEXT: | ||
2502 | case BC_UCLO: | 2614 | case BC_UCLO: |
2503 | case BC_FNEW: | 2615 | case BC_FNEW: |
2504 | setintV(&J->errinfo, (int32_t)op); | 2616 | setintV(&J->errinfo, (int32_t)op); |
@@ -2550,6 +2662,13 @@ static const BCIns *rec_setup_root(jit_State *J) | |||
2550 | lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); | 2662 | lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); |
2551 | J->bc_min = pc; | 2663 | J->bc_min = pc; |
2552 | break; | 2664 | break; |
2665 | case BC_ITERN: | ||
2666 | lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); | ||
2667 | J->maxslot = ra; | ||
2668 | J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); | ||
2669 | J->bc_min = pc+2 + bc_j(pc[1]); | ||
2670 | J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ | ||
2671 | break; | ||
2553 | case BC_LOOP: | 2672 | case BC_LOOP: |
2554 | /* Only check BC range for real loops, but not for "repeat until true". */ | 2673 | /* Only check BC range for real loops, but not for "repeat until true". */ |
2555 | pcj = pc + bc_j(ins); | 2674 | pcj = pc + bc_j(ins); |
@@ -2657,6 +2776,7 @@ void lj_record_setup(jit_State *J) | |||
2657 | J->pc = rec_setup_root(J); | 2776 | J->pc = rec_setup_root(J); |
2658 | /* Note: the loop instruction itself is recorded at the end and not | 2777 | /* Note: the loop instruction itself is recorded at the end and not |
2659 | ** at the start! So snapshot #0 needs to point to the *next* instruction. | 2778 | ** at the start! So snapshot #0 needs to point to the *next* instruction. |
2779 | ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. | ||
2660 | */ | 2780 | */ |
2661 | lj_snap_add(J); | 2781 | lj_snap_add(J); |
2662 | if (bc_op(J->cur.startins) == BC_FORL) | 2782 | if (bc_op(J->cur.startins) == BC_FORL) |
diff --git a/src/lj_record.h b/src/lj_record.h index 3bf461c8..01cc6041 100644 --- a/src/lj_record.h +++ b/src/lj_record.h | |||
@@ -38,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults); | |||
38 | 38 | ||
39 | LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); | 39 | LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); |
40 | LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); | 40 | LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); |
41 | LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix); | ||
41 | 42 | ||
42 | LJ_FUNC void lj_record_ins(jit_State *J); | 43 | LJ_FUNC void lj_record_ins(jit_State *J); |
43 | LJ_FUNC void lj_record_setup(jit_State *J); | 44 | LJ_FUNC void lj_record_setup(jit_State *J); |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 40bfad92..97097a5b 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -463,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) | |||
463 | MSize j; | 463 | MSize j; |
464 | for (j = 0; j < nmax; j++) | 464 | for (j = 0; j < nmax; j++) |
465 | if (snap_ref(map[j]) == ref) | 465 | if (snap_ref(map[j]) == ref) |
466 | return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); | 466 | return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); |
467 | return 0; | 467 | return 0; |
468 | } | 468 | } |
469 | 469 | ||
@@ -538,10 +538,12 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
538 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | 538 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
539 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | 539 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
540 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | 540 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
541 | if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; | ||
541 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | 542 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
542 | } | 543 | } |
543 | setslot: | 544 | setslot: |
544 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ | 545 | /* Same as TREF_* flags. */ |
546 | J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); | ||
545 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); | 547 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); |
546 | if ((sn & SNAP_FRAME)) | 548 | if ((sn & SNAP_FRAME)) |
547 | J->baseslot = s+1; | 549 | J->baseslot = s+1; |
@@ -961,6 +963,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
961 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); | 963 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); |
962 | L->base = o+1; | 964 | L->base = o+1; |
963 | #endif | 965 | #endif |
966 | } else if ((sn & SNAP_KEYINDEX)) { | ||
967 | /* A IRT_INT key index slot is restored as a number. Undo this. */ | ||
968 | o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); | ||
969 | o->u32.hi = LJ_KEYINDEX; | ||
964 | } | 970 | } |
965 | } | 971 | } |
966 | } | 972 | } |
diff --git a/src/lj_trace.c b/src/lj_trace.c index a0ff8864..be886f35 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -215,8 +215,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) | |||
215 | break; | 215 | break; |
216 | case BC_JITERL: | 216 | case BC_JITERL: |
217 | case BC_JLOOP: | 217 | case BC_JLOOP: |
218 | lj_assertJ(op == BC_ITERL || op == BC_LOOP || bc_isret(op), | 218 | lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP || |
219 | "bad original bytecode %d", op); | 219 | bc_isret(op), "bad original bytecode %d", op); |
220 | *pc = T->startins; | 220 | *pc = T->startins; |
221 | break; | 221 | break; |
222 | case BC_JMP: | 222 | case BC_JMP: |
@@ -411,7 +411,7 @@ static void trace_start(jit_State *J) | |||
411 | TraceNo traceno; | 411 | TraceNo traceno; |
412 | 412 | ||
413 | if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ | 413 | if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ |
414 | if (J->parent == 0 && J->exitno == 0) { | 414 | if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { |
415 | /* Lazy bytecode patching to disable hotcount events. */ | 415 | /* Lazy bytecode patching to disable hotcount events. */ |
416 | lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || | 416 | lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || |
417 | bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, | 417 | bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, |
@@ -496,6 +496,7 @@ static void trace_stop(jit_State *J) | |||
496 | J->cur.nextroot = pt->trace; | 496 | J->cur.nextroot = pt->trace; |
497 | pt->trace = (TraceNo1)traceno; | 497 | pt->trace = (TraceNo1)traceno; |
498 | break; | 498 | break; |
499 | case BC_ITERN: | ||
499 | case BC_RET: | 500 | case BC_RET: |
500 | case BC_RET0: | 501 | case BC_RET0: |
501 | case BC_RET1: | 502 | case BC_RET1: |
@@ -575,7 +576,8 @@ static int trace_abort(jit_State *J) | |||
575 | return 1; /* Retry ASM with new MCode area. */ | 576 | return 1; /* Retry ASM with new MCode area. */ |
576 | } | 577 | } |
577 | /* Penalize or blacklist starting bytecode instruction. */ | 578 | /* Penalize or blacklist starting bytecode instruction. */ |
578 | if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { | 579 | if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins)) && |
580 | bc_op(J->cur.startins) != BC_ITERN) { | ||
579 | if (J->exitno == 0) { | 581 | if (J->exitno == 0) { |
580 | BCIns *startpc = mref(J->cur.startpc, BCIns); | 582 | BCIns *startpc = mref(J->cur.startpc, BCIns); |
581 | if (e == LJ_TRERR_RETRY) | 583 | if (e == LJ_TRERR_RETRY) |
@@ -651,8 +653,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | |||
651 | J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ | 653 | J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ |
652 | trace_start(J); | 654 | trace_start(J); |
653 | lj_dispatch_update(J2G(J)); | 655 | lj_dispatch_update(J2G(J)); |
654 | break; | 656 | if (J->state != LJ_TRACE_RECORD_1ST) |
657 | break; | ||
658 | /* fallthrough */ | ||
655 | 659 | ||
660 | case LJ_TRACE_RECORD_1ST: | ||
661 | J->state = LJ_TRACE_RECORD; | ||
662 | /* fallthrough */ | ||
656 | case LJ_TRACE_RECORD: | 663 | case LJ_TRACE_RECORD: |
657 | trace_pendpatch(J, 0); | 664 | trace_pendpatch(J, 0); |
658 | setvmstate(J2G(J), RECORD); | 665 | setvmstate(J2G(J), RECORD); |
@@ -899,13 +906,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
899 | } | 906 | } |
900 | if (bc_op(*pc) == BC_JLOOP) { | 907 | if (bc_op(*pc) == BC_JLOOP) { |
901 | BCIns *retpc = &traceref(J, bc_d(*pc))->startins; | 908 | BCIns *retpc = &traceref(J, bc_d(*pc))->startins; |
902 | if (bc_isret(bc_op(*retpc))) { | 909 | int isret = bc_isret(bc_op(*retpc)); |
910 | if (isret || bc_op(*retpc) == BC_ITERN) { | ||
903 | if (J->state == LJ_TRACE_RECORD) { | 911 | if (J->state == LJ_TRACE_RECORD) { |
904 | J->patchins = *pc; | 912 | J->patchins = *pc; |
905 | J->patchpc = (BCIns *)pc; | 913 | J->patchpc = (BCIns *)pc; |
906 | *J->patchpc = *retpc; | 914 | *J->patchpc = *retpc; |
907 | J->bcskip = 1; | 915 | J->bcskip = 1; |
908 | } else { | 916 | } else if (isret) { |
909 | pc = retpc; | 917 | pc = retpc; |
910 | setcframe_pc(cf, pc); | 918 | setcframe_pc(cf, pc); |
911 | } | 919 | } |
diff --git a/src/lj_vm.h b/src/lj_vm.h index 84348e7a..81ee8e28 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -51,6 +51,7 @@ LJ_ASMF void lj_vm_inshook(void); | |||
51 | LJ_ASMF void lj_vm_rethook(void); | 51 | LJ_ASMF void lj_vm_rethook(void); |
52 | LJ_ASMF void lj_vm_callhook(void); | 52 | LJ_ASMF void lj_vm_callhook(void); |
53 | LJ_ASMF void lj_vm_profhook(void); | 53 | LJ_ASMF void lj_vm_profhook(void); |
54 | LJ_ASMF void lj_vm_IITERN(void); | ||
54 | 55 | ||
55 | /* Trace exit handling. */ | 56 | /* Trace exit handling. */ |
56 | LJ_ASMF void lj_vm_exit_handler(void); | 57 | LJ_ASMF void lj_vm_exit_handler(void); |
@@ -98,6 +99,7 @@ LJ_ASMF double lj_vm_trunc_sf(double); | |||
98 | #if LJ_HASFFI | 99 | #if LJ_HASFFI |
99 | LJ_ASMF int lj_vm_errno(void); | 100 | LJ_ASMF int lj_vm_errno(void); |
100 | #endif | 101 | #endif |
102 | LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); | ||
101 | #endif | 103 | #endif |
102 | 104 | ||
103 | /* Continuations for metamethods. */ | 105 | /* Continuations for metamethods. */ |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 0e80bf00..3a73e00b 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
@@ -2424,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx) | |||
2424 | |//-- Miscellaneous functions -------------------------------------------- | 2424 | |//-- Miscellaneous functions -------------------------------------------- |
2425 | |//----------------------------------------------------------------------- | 2425 | |//----------------------------------------------------------------------- |
2426 | | | 2426 | | |
2427 | |.define NEXT_TAB, TAB:CARG1 | ||
2428 | |.define NEXT_RES, CARG1 | ||
2429 | |.define NEXT_IDX, CARG2 | ||
2430 | |.define NEXT_TMP0, CARG3 | ||
2431 | |.define NEXT_TMP1, CARG4 | ||
2432 | |.define NEXT_LIM, r12 | ||
2433 | |.define NEXT_RES_PTR, sp | ||
2434 | |.define NEXT_RES_VAL, [sp] | ||
2435 | |.define NEXT_RES_KEY_I, [sp, #8] | ||
2436 | |.define NEXT_RES_KEY_IT, [sp, #12] | ||
2437 | | | ||
2438 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2439 | |// Next idx returned in CRET2. | ||
2440 | |->vm_next: | ||
2441 | |.if JIT | ||
2442 | | ldr NEXT_TMP0, NEXT_TAB->array | ||
2443 | | ldr NEXT_LIM, NEXT_TAB->asize | ||
2444 | | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 | ||
2445 | |1: // Traverse array part. | ||
2446 | | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM | ||
2447 | | bhs >5 | ||
2448 | | ldr NEXT_TMP1, [NEXT_TMP0, #4] | ||
2449 | | str NEXT_IDX, NEXT_RES_KEY_I | ||
2450 | | add NEXT_TMP0, NEXT_TMP0, #8 | ||
2451 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2452 | | checktp NEXT_TMP1, LJ_TNIL | ||
2453 | | beq <1 // Skip holes in array part. | ||
2454 | | ldr NEXT_TMP0, [NEXT_TMP0, #-8] | ||
2455 | | mov NEXT_RES, NEXT_RES_PTR | ||
2456 | | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. | ||
2457 | | mvn NEXT_TMP0, #~LJ_TISNUM | ||
2458 | | str NEXT_TMP0, NEXT_RES_KEY_IT | ||
2459 | | bx lr | ||
2460 | | | ||
2461 | |5: // Traverse hash part. | ||
2462 | | ldr NEXT_TMP0, NEXT_TAB->hmask | ||
2463 | | ldr NODE:NEXT_RES, NEXT_TAB->node | ||
2464 | | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 | ||
2465 | | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 | ||
2466 | | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 | ||
2467 | |6: | ||
2468 | | cmp NEXT_IDX, NEXT_LIM | ||
2469 | | bhi >9 | ||
2470 | | ldr NEXT_TMP1, NODE:NEXT_RES->val.it | ||
2471 | | checktp NEXT_TMP1, LJ_TNIL | ||
2472 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2473 | | bxne lr | ||
2474 | | // Skip holes in hash part. | ||
2475 | | add NEXT_RES, NEXT_RES, #sizeof(Node) | ||
2476 | | b <6 | ||
2477 | | | ||
2478 | |9: // End of iteration. Set the key to nil (not the value). | ||
2479 | | mvn NEXT_TMP0, #0 | ||
2480 | | mov NEXT_RES, NEXT_RES_PTR | ||
2481 | | str NEXT_TMP0, NEXT_RES_KEY_IT | ||
2482 | | bx lr | ||
2483 | |.endif | ||
2484 | | | ||
2427 | |//----------------------------------------------------------------------- | 2485 | |//----------------------------------------------------------------------- |
2428 | |//-- FFI helper functions ----------------------------------------------- | 2486 | |//-- FFI helper functions ----------------------------------------------- |
2429 | |//----------------------------------------------------------------------- | 2487 | |//----------------------------------------------------------------------- |
@@ -3914,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3914 | break; | 3972 | break; |
3915 | 3973 | ||
3916 | case BC_ITERN: | 3974 | case BC_ITERN: |
3917 | | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3918 | |.if JIT | 3975 | |.if JIT |
3919 | | // NYI: add hotloop, record BC_ITERN. | 3976 | | hotloop |
3920 | |.endif | 3977 | |.endif |
3978 | |->vm_IITERN: | ||
3979 | | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3921 | | add RA, BASE, RA | 3980 | | add RA, BASE, RA |
3922 | | ldr TAB:RB, [RA, #-16] | 3981 | | ldr TAB:RB, [RA, #-16] |
3923 | | ldr CARG1, [RA, #-8] // Get index from control var. | 3982 | | ldr CARG1, [RA, #-8] // Get index from control var. |
@@ -3992,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3992 | | mov OP, #BC_ITERC | 4051 | | mov OP, #BC_ITERC |
3993 | | strb CARG1, [PC, #-4] | 4052 | | strb CARG1, [PC, #-4] |
3994 | | sub PC, RC, #0x20000 | 4053 | | sub PC, RC, #0x20000 |
4054 | |.if JIT | ||
4055 | | ldrb CARG1, [PC] | ||
4056 | | cmp CARG1, #BC_ITERN | ||
4057 | | bne >6 | ||
4058 | |.endif | ||
3995 | | strb OP, [PC] // Subsumes ins_next1. | 4059 | | strb OP, [PC] // Subsumes ins_next1. |
3996 | | ins_next2 | 4060 | | ins_next2 |
3997 | | b <1 | 4061 | | b <1 |
4062 | |.if JIT | ||
4063 | |6: // Unpatch JLOOP. | ||
4064 | | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] | ||
4065 | | ldrh CARG2, [PC, #2] | ||
4066 | | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] | ||
4067 | | // Subsumes ins_next1 and ins_next2. | ||
4068 | | ldr INS, TRACE:CARG1->startins | ||
4069 | | bfi INS, OP, #0, #8 | ||
4070 | | str INS, [PC], #4 | ||
4071 | | b <1 | ||
4072 | |.endif | ||
3998 | break; | 4073 | break; |
3999 | 4074 | ||
4000 | case BC_VARG: | 4075 | case BC_VARG: |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 2a2e3a9a..1abc6ecc 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
@@ -2064,6 +2064,63 @@ static void build_subroutines(BuildCtx *ctx) | |||
2064 | |//-- Miscellaneous functions -------------------------------------------- | 2064 | |//-- Miscellaneous functions -------------------------------------------- |
2065 | |//----------------------------------------------------------------------- | 2065 | |//----------------------------------------------------------------------- |
2066 | | | 2066 | | |
2067 | |.define NEXT_TAB, TAB:CARG1 | ||
2068 | |.define NEXT_RES, CARG1 | ||
2069 | |.define NEXT_IDX, CARG2w | ||
2070 | |.define NEXT_LIM, CARG3w | ||
2071 | |.define NEXT_TMP0, TMP0 | ||
2072 | |.define NEXT_TMP0w, TMP0w | ||
2073 | |.define NEXT_TMP1, TMP1 | ||
2074 | |.define NEXT_TMP1w, TMP1w | ||
2075 | |.define NEXT_RES_PTR, sp | ||
2076 | |.define NEXT_RES_VAL, [sp] | ||
2077 | |.define NEXT_RES_KEY, [sp, #8] | ||
2078 | | | ||
2079 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2080 | |// Next idx returned in CRET2w. | ||
2081 | |->vm_next: | ||
2082 | |.if JIT | ||
2083 | | ldr NEXT_LIM, NEXT_TAB->asize | ||
2084 | | ldr NEXT_TMP1, NEXT_TAB->array | ||
2085 | |1: // Traverse array part. | ||
2086 | | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM | ||
2087 | | bhs >5 // Index points after array part? | ||
2088 | | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3] | ||
2089 | | cmn NEXT_TMP0, #-LJ_TNIL | ||
2090 | | cinc NEXT_IDX, NEXT_IDX, eq | ||
2091 | | beq <1 // Skip holes in array part. | ||
2092 | | str NEXT_TMP0, NEXT_RES_VAL | ||
2093 | | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
2094 | | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY | ||
2095 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2096 | | mov NEXT_RES, NEXT_RES_PTR | ||
2097 | |4: | ||
2098 | | ret | ||
2099 | | | ||
2100 | |5: // Traverse hash part. | ||
2101 | | ldr NEXT_TMP1w, NEXT_TAB->hmask | ||
2102 | | ldr NODE:NEXT_RES, NEXT_TAB->node | ||
2103 | | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1 | ||
2104 | | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w | ||
2105 | | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3 | ||
2106 | |6: | ||
2107 | | cmp NEXT_IDX, NEXT_LIM | ||
2108 | | bhi >9 | ||
2109 | | ldr NEXT_TMP0, NODE:NEXT_RES->val | ||
2110 | | cmn NEXT_TMP0, #-LJ_TNIL | ||
2111 | | add NEXT_IDX, NEXT_IDX, #1 | ||
2112 | | bne <4 | ||
2113 | | // Skip holes in hash part. | ||
2114 | | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node) | ||
2115 | | b <6 | ||
2116 | | | ||
2117 | |9: // End of iteration. Set the key to nil (not the value). | ||
2118 | | movn NEXT_TMP0, #0 | ||
2119 | | str NEXT_TMP0, NEXT_RES_KEY | ||
2120 | | mov NEXT_RES, NEXT_RES_PTR | ||
2121 | | ret | ||
2122 | |.endif | ||
2123 | | | ||
2067 | |//----------------------------------------------------------------------- | 2124 | |//----------------------------------------------------------------------- |
2068 | |//-- FFI helper functions ----------------------------------------------- | 2125 | |//-- FFI helper functions ----------------------------------------------- |
2069 | |//----------------------------------------------------------------------- | 2126 | |//----------------------------------------------------------------------- |
@@ -3320,10 +3377,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3320 | break; | 3377 | break; |
3321 | 3378 | ||
3322 | case BC_ITERN: | 3379 | case BC_ITERN: |
3323 | | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3324 | |.if JIT | 3380 | |.if JIT |
3325 | | // NYI: add hotloop, record BC_ITERN. | 3381 | | hotloop |
3326 | |.endif | 3382 | |.endif |
3383 | |->vm_IITERN: | ||
3384 | | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
3327 | | add RA, BASE, RA, lsl #3 | 3385 | | add RA, BASE, RA, lsl #3 |
3328 | | ldr TAB:RB, [RA, #-16] | 3386 | | ldr TAB:RB, [RA, #-16] |
3329 | | ldrh TMP3w, [PC, # OFS_RD] | 3387 | | ldrh TMP3w, [PC, # OFS_RD] |
@@ -3390,11 +3448,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3390 | | ins_next | 3448 | | ins_next |
3391 | | | 3449 | | |
3392 | |5: // Despecialize bytecode if any of the checks fail. | 3450 | |5: // Despecialize bytecode if any of the checks fail. |
3451 | |.if JIT | ||
3452 | | ldrb TMP2w, [RC, # OFS_OP] | ||
3453 | |.endif | ||
3393 | | mov TMP0, #BC_JMP | 3454 | | mov TMP0, #BC_JMP |
3394 | | mov TMP1, #BC_ITERC | 3455 | | mov TMP1, #BC_ITERC |
3395 | | strb TMP0w, [PC, #-4+OFS_OP] | 3456 | | strb TMP0w, [PC, #-4+OFS_OP] |
3457 | |.if JIT | ||
3458 | | cmp TMP2w, #BC_ITERN | ||
3459 | | bne >6 | ||
3460 | |.endif | ||
3396 | | strb TMP1w, [RC, # OFS_OP] | 3461 | | strb TMP1w, [RC, # OFS_OP] |
3397 | | b <1 | 3462 | | b <1 |
3463 | |.if JIT | ||
3464 | |6: // Unpatch JLOOP. | ||
3465 | | ldr RA, [GL, #GL_J(trace)] | ||
3466 | | ldrh TMP2w, [RC, # OFS_RD] | ||
3467 | | ldr TRACE:RA, [RA, TMP2, lsl #3] | ||
3468 | | ldr TMP2w, TRACE:RA->startins | ||
3469 | | bfxil TMP2w, TMP1w, #0, #8 | ||
3470 | | str TMP2w, [RC] | ||
3471 | | b <1 | ||
3472 | |.endif | ||
3398 | break; | 3473 | break; |
3399 | 3474 | ||
3400 | case BC_VARG: | 3475 | case BC_VARG: |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 3b0ea4a2..f70c613e 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
@@ -190,7 +190,7 @@ | |||
190 | |//----------------------------------------------------------------------- | 190 | |//----------------------------------------------------------------------- |
191 | | | 191 | | |
192 | |// Trap for not-yet-implemented parts. | 192 | |// Trap for not-yet-implemented parts. |
193 | |.macro NYI; .long 0xf0f0f0f0; .endmacro | 193 | |.macro NYI; .long 0xec1cf0f0; .endmacro |
194 | | | 194 | | |
195 | |// Macros to mark delay slots. | 195 | |// Macros to mark delay slots. |
196 | |.macro ., a; a; .endmacro | 196 | |.macro ., a; a; .endmacro |
@@ -2798,6 +2798,73 @@ static void build_subroutines(BuildCtx *ctx) | |||
2798 | |//-- Miscellaneous functions -------------------------------------------- | 2798 | |//-- Miscellaneous functions -------------------------------------------- |
2799 | |//----------------------------------------------------------------------- | 2799 | |//----------------------------------------------------------------------- |
2800 | | | 2800 | | |
2801 | |.define NEXT_TAB, TAB:CARG1 | ||
2802 | |.define NEXT_IDX, CARG2 | ||
2803 | |.define NEXT_ASIZE, CARG3 | ||
2804 | |.define NEXT_NIL, CARG4 | ||
2805 | |.define NEXT_TMP0, r12 | ||
2806 | |.define NEXT_TMP1, r13 | ||
2807 | |.define NEXT_TMP2, r14 | ||
2808 | |.define NEXT_RES_VK, CRET1 | ||
2809 | |.define NEXT_RES_IDX, CRET2 | ||
2810 | |.define NEXT_RES_PTR, sp | ||
2811 | |.define NEXT_RES_VAL_I, 0(sp) | ||
2812 | |.define NEXT_RES_VAL_IT, 4(sp) | ||
2813 | |.define NEXT_RES_KEY_I, 8(sp) | ||
2814 | |.define NEXT_RES_KEY_IT, 12(sp) | ||
2815 | | | ||
2816 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2817 | |// Next idx returned in CRET2. | ||
2818 | |->vm_next: | ||
2819 | |.if JIT and ENDIAN_LE | ||
2820 | | lw NEXT_ASIZE, NEXT_TAB->asize | ||
2821 | | lw NEXT_TMP0, NEXT_TAB->array | ||
2822 | | li NEXT_NIL, LJ_TNIL | ||
2823 | |1: // Traverse array part. | ||
2824 | | sltu AT, NEXT_IDX, NEXT_ASIZE | ||
2825 | | sll NEXT_TMP1, NEXT_IDX, 3 | ||
2826 | | beqz AT, >5 | ||
2827 | |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 | ||
2828 | | lw NEXT_TMP2, 4(NEXT_TMP1) | ||
2829 | | sw NEXT_IDX, NEXT_RES_KEY_I | ||
2830 | | beq NEXT_TMP2, NEXT_NIL, <1 | ||
2831 | |. addiu NEXT_IDX, NEXT_IDX, 1 | ||
2832 | | lw NEXT_TMP0, 0(NEXT_TMP1) | ||
2833 | | li AT, LJ_TISNUM | ||
2834 | | sw NEXT_TMP2, NEXT_RES_VAL_IT | ||
2835 | | sw AT, NEXT_RES_KEY_IT | ||
2836 | | sw NEXT_TMP0, NEXT_RES_VAL_I | ||
2837 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2838 | | jr ra | ||
2839 | |. move NEXT_RES_IDX, NEXT_IDX | ||
2840 | | | ||
2841 | |5: // Traverse hash part. | ||
2842 | | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE | ||
2843 | | lw NODE:NEXT_RES_VK, NEXT_TAB->node | ||
2844 | | sll NEXT_TMP2, NEXT_RES_IDX, 5 | ||
2845 | | lw NEXT_TMP0, NEXT_TAB->hmask | ||
2846 | | sll AT, NEXT_RES_IDX, 3 | ||
2847 | | subu AT, NEXT_TMP2, AT | ||
2848 | | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT | ||
2849 | |6: | ||
2850 | | sltu AT, NEXT_TMP0, NEXT_RES_IDX | ||
2851 | | bnez AT, >8 | ||
2852 | |. nop | ||
2853 | | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it | ||
2854 | | bne NEXT_TMP2, NEXT_NIL, >9 | ||
2855 | |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 | ||
2856 | | // Skip holes in hash part. | ||
2857 | | b <6 | ||
2858 | |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) | ||
2859 | | | ||
2860 | |8: // End of iteration. Set the key to nil (not the value). | ||
2861 | | sw NEXT_NIL, NEXT_RES_KEY_IT | ||
2862 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2863 | |9: | ||
2864 | | jr ra | ||
2865 | |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE | ||
2866 | |.endif | ||
2867 | | | ||
2801 | |//----------------------------------------------------------------------- | 2868 | |//----------------------------------------------------------------------- |
2802 | |//-- FFI helper functions ----------------------------------------------- | 2869 | |//-- FFI helper functions ----------------------------------------------- |
2803 | |//----------------------------------------------------------------------- | 2870 | |//----------------------------------------------------------------------- |
@@ -4521,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4521 | break; | 4588 | break; |
4522 | 4589 | ||
4523 | case BC_ITERN: | 4590 | case BC_ITERN: |
4524 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | 4591 | |.if JIT and ENDIAN_LE |
4525 | |.if JIT | 4592 | | hotloop |
4526 | | // NYI: add hotloop, record BC_ITERN. | ||
4527 | |.endif | 4593 | |.endif |
4594 | |->vm_IITERN: | ||
4595 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | ||
4528 | | addu RA, BASE, RA | 4596 | | addu RA, BASE, RA |
4529 | | lw TAB:RB, -16+LO(RA) | 4597 | | lw TAB:RB, -16+LO(RA) |
4530 | | lw RC, -8+LO(RA) // Get index from control var. | 4598 | | lw RC, -8+LO(RA) // Get index from control var. |
@@ -4614,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4614 | | li TMP3, BC_JMP | 4682 | | li TMP3, BC_JMP |
4615 | | li TMP1, BC_ITERC | 4683 | | li TMP1, BC_ITERC |
4616 | | sb TMP3, -4+OFS_OP(PC) | 4684 | | sb TMP3, -4+OFS_OP(PC) |
4617 | | addu PC, TMP0, TMP2 | 4685 | | addu PC, TMP0, TMP2 |
4686 | |.if JIT | ||
4687 | | lb TMP0, OFS_OP(PC) | ||
4688 | | li AT, BC_ITERN | ||
4689 | | bne TMP0, AT, >6 | ||
4690 | |. lhu TMP2, OFS_RD(PC) | ||
4691 | |.endif | ||
4618 | | b <1 | 4692 | | b <1 |
4619 | |. sb TMP1, OFS_OP(PC) | 4693 | |. sb TMP1, OFS_OP(PC) |
4694 | |.if JIT | ||
4695 | |6: // Unpatch JLOOP. | ||
4696 | | lw TMP0, DISPATCH_J(trace)(DISPATCH) | ||
4697 | | sll TMP2, TMP2, 2 | ||
4698 | | addu TMP0, TMP0, TMP2 | ||
4699 | | lw TRACE:TMP2, 0(TMP0) | ||
4700 | | lw TMP0, TRACE:TMP2->startins | ||
4701 | | li AT, -256 | ||
4702 | | and TMP0, TMP0, AT | ||
4703 | | or TMP0, TMP0, TMP1 | ||
4704 | | b <1 | ||
4705 | |. sw TMP0, 0(PC) | ||
4706 | |.endif | ||
4620 | break; | 4707 | break; |
4621 | 4708 | ||
4622 | case BC_VARG: | 4709 | case BC_VARG: |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 0d28326a..5c5d761c 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc | |||
@@ -193,7 +193,7 @@ | |||
193 | |//----------------------------------------------------------------------- | 193 | |//----------------------------------------------------------------------- |
194 | | | 194 | | |
195 | |// Trap for not-yet-implemented parts. | 195 | |// Trap for not-yet-implemented parts. |
196 | |.macro NYI; .long 0xf0f0f0f0; .endmacro | 196 | |.macro NYI; .long 0xec1cf0f0; .endmacro |
197 | | | 197 | | |
198 | |// Macros to mark delay slots. | 198 | |// Macros to mark delay slots. |
199 | |.macro ., a; a; .endmacro | 199 | |.macro ., a; a; .endmacro |
@@ -2904,6 +2904,70 @@ static void build_subroutines(BuildCtx *ctx) | |||
2904 | |//-- Miscellaneous functions -------------------------------------------- | 2904 | |//-- Miscellaneous functions -------------------------------------------- |
2905 | |//----------------------------------------------------------------------- | 2905 | |//----------------------------------------------------------------------- |
2906 | | | 2906 | | |
2907 | |.define NEXT_TAB, TAB:CARG1 | ||
2908 | |.define NEXT_IDX, CARG2 | ||
2909 | |.define NEXT_ASIZE, CARG3 | ||
2910 | |.define NEXT_NIL, CARG4 | ||
2911 | |.define NEXT_TMP0, r12 | ||
2912 | |.define NEXT_TMP1, r13 | ||
2913 | |.define NEXT_TMP2, r14 | ||
2914 | |.define NEXT_RES_VK, CRET1 | ||
2915 | |.define NEXT_RES_IDX, CRET2 | ||
2916 | |.define NEXT_RES_PTR, sp | ||
2917 | |.define NEXT_RES_VAL, 0(sp) | ||
2918 | |.define NEXT_RES_KEY, 8(sp) | ||
2919 | | | ||
2920 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2921 | |// Next idx returned in CRET2. | ||
2922 | |->vm_next: | ||
2923 | |.if JIT and ENDIAN_LE | ||
2924 | | lw NEXT_ASIZE, NEXT_TAB->asize | ||
2925 | | ld NEXT_TMP0, NEXT_TAB->array | ||
2926 | | li NEXT_NIL, LJ_TNIL | ||
2927 | |1: // Traverse array part. | ||
2928 | | sltu AT, NEXT_IDX, NEXT_ASIZE | ||
2929 | | sll NEXT_TMP1, NEXT_IDX, 3 | ||
2930 | | beqz AT, >5 | ||
2931 | |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 | ||
2932 | | li AT, LJ_TISNUM | ||
2933 | | ld NEXT_TMP2, 0(NEXT_TMP1) | ||
2934 | | dsll AT, AT, 47 | ||
2935 | | or NEXT_TMP1, NEXT_IDX, AT | ||
2936 | | beq NEXT_TMP2, NEXT_NIL, <1 | ||
2937 | |. addiu NEXT_IDX, NEXT_IDX, 1 | ||
2938 | | sd NEXT_TMP2, NEXT_RES_VAL | ||
2939 | | sd NEXT_TMP1, NEXT_RES_KEY | ||
2940 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2941 | | jr ra | ||
2942 | |. move NEXT_RES_IDX, NEXT_IDX | ||
2943 | | | ||
2944 | |5: // Traverse hash part. | ||
2945 | | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE | ||
2946 | | ld NODE:NEXT_RES_VK, NEXT_TAB->node | ||
2947 | | sll NEXT_TMP2, NEXT_RES_IDX, 5 | ||
2948 | | lw NEXT_TMP0, NEXT_TAB->hmask | ||
2949 | | sll AT, NEXT_RES_IDX, 3 | ||
2950 | | subu AT, NEXT_TMP2, AT | ||
2951 | | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT | ||
2952 | |6: | ||
2953 | | sltu AT, NEXT_TMP0, NEXT_RES_IDX | ||
2954 | | bnez AT, >8 | ||
2955 | |. nop | ||
2956 | | ld NEXT_TMP2, NODE:NEXT_RES_VK->val | ||
2957 | | bne NEXT_TMP2, NEXT_NIL, >9 | ||
2958 | |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 | ||
2959 | | // Skip holes in hash part. | ||
2960 | | b <6 | ||
2961 | |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) | ||
2962 | | | ||
2963 | |8: // End of iteration. Set the key to nil (not the value). | ||
2964 | | sd NEXT_NIL, NEXT_RES_KEY | ||
2965 | | move NEXT_RES_VK, NEXT_RES_PTR | ||
2966 | |9: | ||
2967 | | jr ra | ||
2968 | |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE | ||
2969 | |.endif | ||
2970 | | | ||
2907 | |//----------------------------------------------------------------------- | 2971 | |//----------------------------------------------------------------------- |
2908 | |//-- FFI helper functions ----------------------------------------------- | 2972 | |//-- FFI helper functions ----------------------------------------------- |
2909 | |//----------------------------------------------------------------------- | 2973 | |//----------------------------------------------------------------------- |
@@ -4700,10 +4764,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4700 | break; | 4764 | break; |
4701 | 4765 | ||
4702 | case BC_ITERN: | 4766 | case BC_ITERN: |
4703 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | 4767 | |.if JIT and ENDIAN_LE |
4704 | |.if JIT | 4768 | | hotloop |
4705 | | // NYI: add hotloop, record BC_ITERN. | ||
4706 | |.endif | 4769 | |.endif |
4770 | |->vm_IITERN: | ||
4771 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | ||
4707 | | daddu RA, BASE, RA | 4772 | | daddu RA, BASE, RA |
4708 | | ld TAB:RB, -16(RA) | 4773 | | ld TAB:RB, -16(RA) |
4709 | | lw RC, -8+LO(RA) // Get index from control var. | 4774 | | lw RC, -8+LO(RA) // Get index from control var. |
@@ -4789,8 +4854,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4789 | | li TMP1, BC_ITERC | 4854 | | li TMP1, BC_ITERC |
4790 | | sb TMP3, -4+OFS_OP(PC) | 4855 | | sb TMP3, -4+OFS_OP(PC) |
4791 | | daddu PC, TMP0, TMP2 | 4856 | | daddu PC, TMP0, TMP2 |
4857 | |.if JIT | ||
4858 | | lb TMP0, OFS_OP(PC) | ||
4859 | | li AT, BC_ITERN | ||
4860 | | bne TMP0, AT, >6 | ||
4861 | |. lhu TMP2, OFS_RD(PC) | ||
4862 | |.endif | ||
4792 | | b <1 | 4863 | | b <1 |
4793 | |. sb TMP1, OFS_OP(PC) | 4864 | |. sb TMP1, OFS_OP(PC) |
4865 | |.if JIT | ||
4866 | |6: // Unpatch JLOOP. | ||
4867 | | ld TMP0, DISPATCH_J(trace)(DISPATCH) | ||
4868 | | sll TMP2, TMP2, 3 | ||
4869 | | daddu TMP0, TMP0, TMP2 | ||
4870 | | ld TRACE:TMP2, 0(TMP0) | ||
4871 | | lw TMP0, TRACE:TMP2->startins | ||
4872 | | li AT, -256 | ||
4873 | | and TMP0, TMP0, AT | ||
4874 | | or TMP0, TMP0, TMP1 | ||
4875 | | b <1 | ||
4876 | |. sw TMP0, 0(PC) | ||
4877 | |.endif | ||
4794 | break; | 4878 | break; |
4795 | 4879 | ||
4796 | case BC_VARG: | 4880 | case BC_VARG: |
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index d4133a65..d9e19298 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc | |||
@@ -3163,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx) | |||
3163 | | blr | 3163 | | blr |
3164 | |.endif | 3164 | |.endif |
3165 | | | 3165 | | |
3166 | |->vm_next: | ||
3167 | |.if JIT | ||
3168 | | NYI // On big-endian. | ||
3169 | |.endif | ||
3170 | | | ||
3166 | |//----------------------------------------------------------------------- | 3171 | |//----------------------------------------------------------------------- |
3167 | |//-- FFI helper functions ----------------------------------------------- | 3172 | |//-- FFI helper functions ----------------------------------------------- |
3168 | |//----------------------------------------------------------------------- | 3173 | |//----------------------------------------------------------------------- |
@@ -5112,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5112 | case BC_ITERN: | 5117 | case BC_ITERN: |
5113 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | 5118 | | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) |
5114 | |.if JIT | 5119 | |.if JIT |
5115 | | // NYI: add hotloop, record BC_ITERN. | 5120 | | // NYI on big-endian |
5116 | |.endif | 5121 | |.endif |
5122 | |->vm_IITERN: | ||
5117 | | add RA, BASE, RA | 5123 | | add RA, BASE, RA |
5118 | | lwz TAB:RB, -12(RA) | 5124 | | lwz TAB:RB, -12(RA) |
5119 | | lwz RC, -4(RA) // Get index from control var. | 5125 | | lwz RC, -4(RA) // Get index from control var. |
@@ -5244,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
5244 | | li TMP1, BC_ITERC | 5250 | | li TMP1, BC_ITERC |
5245 | | stb TMP0, -1(PC) | 5251 | | stb TMP0, -1(PC) |
5246 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) | 5252 | | addis PC, TMP3, -(BCBIAS_J*4 >> 16) |
5253 | | // NYI on big-endian: unpatch JLOOP. | ||
5247 | | stb TMP1, 3(PC) | 5254 | | stb TMP1, 3(PC) |
5248 | | b <1 | 5255 | | b <1 |
5249 | break; | 5256 | break; |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index d2119bc4..fdffd4b6 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc | |||
@@ -2633,6 +2633,67 @@ static void build_subroutines(BuildCtx *ctx) | |||
2633 | | .if X64WIN; pop rsi; .endif | 2633 | | .if X64WIN; pop rsi; .endif |
2634 | | ret | 2634 | | ret |
2635 | | | 2635 | | |
2636 | |.define NEXT_TAB, TAB:CARG1 | ||
2637 | |.define NEXT_IDX, CARG2d | ||
2638 | |.define NEXT_IDXa, CARG2 | ||
2639 | |.define NEXT_PTR, RC | ||
2640 | |.define NEXT_PTRd, RCd | ||
2641 | |.define NEXT_TMP, CARG3 | ||
2642 | |.define NEXT_ASIZE, CARG4d | ||
2643 | |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
2644 | |.if X64WIN | ||
2645 | |.define NEXT_RES_PTR, [rsp+aword*5] | ||
2646 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
2647 | |.else | ||
2648 | |.define NEXT_RES_PTR, [rsp+aword*1] | ||
2649 | |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
2650 | |.endif | ||
2651 | | | ||
2652 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
2653 | |// Next idx returned in edx. | ||
2654 | |->vm_next: | ||
2655 | |.if JIT | ||
2656 | | mov NEXT_ASIZE, NEXT_TAB->asize | ||
2657 | |1: // Traverse array part. | ||
2658 | | cmp NEXT_IDX, NEXT_ASIZE; jae >5 | ||
2659 | | mov NEXT_TMP, NEXT_TAB->array | ||
2660 | | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] | ||
2661 | | cmp NEXT_TMP, LJ_TNIL; je >2 | ||
2662 | | lea NEXT_PTR, NEXT_RES_PTR | ||
2663 | | mov qword [NEXT_PTR], NEXT_TMP | ||
2664 | |.if DUALNUM | ||
2665 | | setint NEXT_TMP, NEXT_IDXa | ||
2666 | | mov qword [NEXT_PTR+qword*1], NEXT_TMP | ||
2667 | |.else | ||
2668 | | cvtsi2sd xmm0, NEXT_IDX | ||
2669 | | movsd qword [NEXT_PTR+qword*1], xmm0 | ||
2670 | |.endif | ||
2671 | | NEXT_RES_IDX 1 | ||
2672 | | ret | ||
2673 | |2: // Skip holes in array part. | ||
2674 | | add NEXT_IDX, 1 | ||
2675 | | jmp <1 | ||
2676 | | | ||
2677 | |5: // Traverse hash part. | ||
2678 | | sub NEXT_IDX, NEXT_ASIZE | ||
2679 | |6: | ||
2680 | | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 | ||
2681 | | imul NEXT_PTRd, NEXT_IDX, #NODE | ||
2682 | | add NODE:NEXT_PTR, NEXT_TAB->node | ||
2683 | | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 | ||
2684 | | NEXT_RES_IDXL NEXT_ASIZE+1 | ||
2685 | | ret | ||
2686 | |7: // Skip holes in hash part. | ||
2687 | | add NEXT_IDX, 1 | ||
2688 | | jmp <6 | ||
2689 | | | ||
2690 | |9: // End of iteration. Set the key to nil (not the value). | ||
2691 | | NEXT_RES_IDX NEXT_ASIZE | ||
2692 | | lea NEXT_PTR, NEXT_RES_PTR | ||
2693 | | mov qword [NEXT_PTR+qword*1], LJ_TNIL | ||
2694 | | ret | ||
2695 | |.endif | ||
2696 | | | ||
2636 | |//----------------------------------------------------------------------- | 2697 | |//----------------------------------------------------------------------- |
2637 | |//-- Assertions --------------------------------------------------------- | 2698 | |//-- Assertions --------------------------------------------------------- |
2638 | |//----------------------------------------------------------------------- | 2699 | |//----------------------------------------------------------------------- |
@@ -4044,10 +4105,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4044 | break; | 4105 | break; |
4045 | 4106 | ||
4046 | case BC_ITERN: | 4107 | case BC_ITERN: |
4047 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
4048 | |.if JIT | 4108 | |.if JIT |
4049 | | // NYI: add hotloop, record BC_ITERN. | 4109 | | hotloop RBd |
4050 | |.endif | 4110 | |.endif |
4111 | |->vm_IITERN: | ||
4112 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
4051 | | mov TAB:RB, [BASE+RA*8-16] | 4113 | | mov TAB:RB, [BASE+RA*8-16] |
4052 | | cleartp TAB:RB | 4114 | | cleartp TAB:RB |
4053 | | mov RCd, [BASE+RA*8-8] // Get index from control var. | 4115 | | mov RCd, [BASE+RA*8-8] // Get index from control var. |
@@ -4118,8 +4180,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4118 | |5: // Despecialize bytecode if any of the checks fail. | 4180 | |5: // Despecialize bytecode if any of the checks fail. |
4119 | | mov PC_OP, BC_JMP | 4181 | | mov PC_OP, BC_JMP |
4120 | | branchPC RD | 4182 | | branchPC RD |
4183 | |.if JIT | ||
4184 | | cmp byte [PC], BC_ITERN | ||
4185 | | jne >6 | ||
4186 | |.endif | ||
4121 | | mov byte [PC], BC_ITERC | 4187 | | mov byte [PC], BC_ITERC |
4122 | | jmp <1 | 4188 | | jmp <1 |
4189 | |.if JIT | ||
4190 | |6: // Unpatch JLOOP. | ||
4191 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4192 | | movzx RCd, word [PC+2] | ||
4193 | | mov TRACE:RA, [RA+RC*8] | ||
4194 | | mov eax, TRACE:RA->startins | ||
4195 | | mov al, BC_ITERC | ||
4196 | | mov dword [PC], eax | ||
4197 | | jmp <1 | ||
4198 | |.endif | ||
4123 | break; | 4199 | break; |
4124 | 4200 | ||
4125 | case BC_VARG: | 4201 | case BC_VARG: |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 718cb8f0..cbf0810c 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
@@ -3120,6 +3120,86 @@ static void build_subroutines(BuildCtx *ctx) | |||
3120 | | ret | 3120 | | ret |
3121 | |.endif | 3121 | |.endif |
3122 | | | 3122 | | |
3123 | |.define NEXT_TAB, TAB:FCARG1 | ||
3124 | |.define NEXT_IDX, FCARG2 | ||
3125 | |.define NEXT_PTR, RCa | ||
3126 | |.define NEXT_PTRd, RC | ||
3127 | |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
3128 | |.if X64 | ||
3129 | |.define NEXT_TMP, CARG3d | ||
3130 | |.define NEXT_TMPq, CARG3 | ||
3131 | |.define NEXT_ASIZE, CARG4d | ||
3132 | |.macro NEXT_ENTER; .endmacro | ||
3133 | |.macro NEXT_LEAVE; ret; .endmacro | ||
3134 | |.if X64WIN | ||
3135 | |.define NEXT_RES_PTR, [rsp+aword*5] | ||
3136 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
3137 | |.else | ||
3138 | |.define NEXT_RES_PTR, [rsp+aword*1] | ||
3139 | |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro | ||
3140 | |.endif | ||
3141 | |.else | ||
3142 | |.define NEXT_ASIZE, esi | ||
3143 | |.define NEXT_TMP, edi | ||
3144 | |.macro NEXT_ENTER; push esi; push edi; .endmacro | ||
3145 | |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro | ||
3146 | |.define NEXT_RES_PTR, [esp+dword*3] | ||
3147 | |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | ||
3148 | |.endif | ||
3149 | | | ||
3150 | |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | ||
3151 | |// Next idx returned in edx. | ||
3152 | |->vm_next: | ||
3153 | |.if JIT | ||
3154 | | NEXT_ENTER | ||
3155 | | mov NEXT_ASIZE, NEXT_TAB->asize | ||
3156 | |1: // Traverse array part. | ||
3157 | | cmp NEXT_IDX, NEXT_ASIZE; jae >5 | ||
3158 | | mov NEXT_TMP, NEXT_TAB->array | ||
3159 | | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 | ||
3160 | | lea NEXT_PTR, NEXT_RES_PTR | ||
3161 | |.if X64 | ||
3162 | | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] | ||
3163 | | mov qword [NEXT_PTR], NEXT_TMPq | ||
3164 | |.else | ||
3165 | | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] | ||
3166 | | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] | ||
3167 | | mov dword [NEXT_PTR+4], NEXT_ASIZE | ||
3168 | | mov dword [NEXT_PTR], NEXT_TMP | ||
3169 | |.endif | ||
3170 | |.if DUALNUM | ||
3171 | | mov dword [NEXT_PTR+dword*3], LJ_TISNUM | ||
3172 | | mov dword [NEXT_PTR+dword*2], NEXT_IDX | ||
3173 | |.else | ||
3174 | | cvtsi2sd xmm0, NEXT_IDX | ||
3175 | | movsd qword [NEXT_PTR+dword*2], xmm0 | ||
3176 | |.endif | ||
3177 | | NEXT_RES_IDX 1 | ||
3178 | | NEXT_LEAVE | ||
3179 | |2: // Skip holes in array part. | ||
3180 | | add NEXT_IDX, 1 | ||
3181 | | jmp <1 | ||
3182 | | | ||
3183 | |5: // Traverse hash part. | ||
3184 | | sub NEXT_IDX, NEXT_ASIZE | ||
3185 | |6: | ||
3186 | | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 | ||
3187 | | imul NEXT_PTRd, NEXT_IDX, #NODE | ||
3188 | | add NODE:NEXT_PTRd, dword NEXT_TAB->node | ||
3189 | | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 | ||
3190 | | NEXT_RES_IDXL NEXT_ASIZE+1 | ||
3191 | | NEXT_LEAVE | ||
3192 | |7: // Skip holes in hash part. | ||
3193 | | add NEXT_IDX, 1 | ||
3194 | | jmp <6 | ||
3195 | | | ||
3196 | |9: // End of iteration. Set the key to nil (not the value). | ||
3197 | | NEXT_RES_IDX NEXT_ASIZE | ||
3198 | | lea NEXT_PTR, NEXT_RES_PTR | ||
3199 | | mov dword [NEXT_PTR+dword*3], LJ_TNIL | ||
3200 | | NEXT_LEAVE | ||
3201 | |.endif | ||
3202 | | | ||
3123 | |//----------------------------------------------------------------------- | 3203 | |//----------------------------------------------------------------------- |
3124 | |//-- Assertions --------------------------------------------------------- | 3204 | |//-- Assertions --------------------------------------------------------- |
3125 | |//----------------------------------------------------------------------- | 3205 | |//----------------------------------------------------------------------- |
@@ -4771,10 +4851,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4771 | break; | 4851 | break; |
4772 | 4852 | ||
4773 | case BC_ITERN: | 4853 | case BC_ITERN: |
4774 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
4775 | |.if JIT | 4854 | |.if JIT |
4776 | | // NYI: add hotloop, record BC_ITERN. | 4855 | | hotloop RB |
4777 | |.endif | 4856 | |.endif |
4857 | |->vm_IITERN: | ||
4858 | | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | ||
4778 | | mov TMP1, KBASE // Need two more free registers. | 4859 | | mov TMP1, KBASE // Need two more free registers. |
4779 | | mov TMP2, DISPATCH | 4860 | | mov TMP2, DISPATCH |
4780 | | mov TAB:RB, [BASE+RA*8-16] | 4861 | | mov TAB:RB, [BASE+RA*8-16] |
@@ -4868,8 +4949,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4868 | |5: // Despecialize bytecode if any of the checks fail. | 4949 | |5: // Despecialize bytecode if any of the checks fail. |
4869 | | mov PC_OP, BC_JMP | 4950 | | mov PC_OP, BC_JMP |
4870 | | branchPC RD | 4951 | | branchPC RD |
4952 | |.if JIT | ||
4953 | | cmp byte [PC], BC_ITERN | ||
4954 | | jne >6 | ||
4955 | |.endif | ||
4871 | | mov byte [PC], BC_ITERC | 4956 | | mov byte [PC], BC_ITERC |
4872 | | jmp <1 | 4957 | | jmp <1 |
4958 | |.if JIT | ||
4959 | |6: // Unpatch JLOOP. | ||
4960 | | mov RA, [DISPATCH+DISPATCH_J(trace)] | ||
4961 | | movzx RC, word [PC+2] | ||
4962 | | mov TRACE:RA, [RA+RC*4] | ||
4963 | | mov eax, TRACE:RA->startins | ||
4964 | | mov al, BC_ITERC | ||
4965 | | mov dword [PC], eax | ||
4966 | | jmp <1 | ||
4967 | |.endif | ||
4873 | break; | 4968 | break; |
4874 | 4969 | ||
4875 | case BC_VARG: | 4970 | case BC_VARG: |