aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2011-03-10 01:57:24 +0100
committerMike Pall <mike>2011-03-10 01:57:24 +0100
commitbfce3c1127fd57fe0c935c92bcf45b4737041edd (patch)
tree2bd2d9e08c70608de63c7a69df7f00cfab07f6be /src
parent3f26e3a89d54dfb761ca02fc89aaf15326f5f514 (diff)
downloadluajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.tar.gz
luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.tar.bz2
luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.zip
DUALNUM: Handle integer type in JIT compiler.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.dep9
-rw-r--r--src/lj_asm.c35
-rw-r--r--src/lj_crecord.c35
-rw-r--r--src/lj_ffrecord.c39
-rw-r--r--src/lj_ir.c26
-rw-r--r--src/lj_ir.h30
-rw-r--r--src/lj_iropt.h12
-rw-r--r--src/lj_meta.c28
-rw-r--r--src/lj_meta.h2
-rw-r--r--src/lj_obj.h2
-rw-r--r--src/lj_opt_fold.c5
-rw-r--r--src/lj_opt_loop.c9
-rw-r--r--src/lj_opt_narrow.c233
-rw-r--r--src/lj_record.c280
-rw-r--r--src/lj_snap.c3
-rw-r--r--src/lj_trace.c12
16 files changed, 484 insertions, 276 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 1684ebd7..8458ec78 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -128,15 +128,16 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
128 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 128 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
129lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 129lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
130 lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 130 lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
131 lj_dispatch.h lj_traceerr.h 131 lj_dispatch.h lj_traceerr.h lj_vm.h
132lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 132lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
133 lj_arch.h 133 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \
134 lj_vm.h
134lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 135lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
135 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ 136 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \
136 lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 137 lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
137lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 138lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
138 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 139 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
139 lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 140 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
140 lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h 141 lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
141lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 142lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
142 lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 143 lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 5f3c5fab..d395010d 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2059,7 +2059,7 @@ static void asm_href(ASMState *as, IRIns *ir)
2059 } else { 2059 } else {
2060 emit_sjcc(as, CC_P, l_next); 2060 emit_sjcc(as, CC_P, l_next);
2061 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); 2061 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
2062 emit_sjcc(as, CC_A, l_next); 2062 emit_sjcc(as, CC_AE, l_next);
2063 /* The type check avoids NaN penalties and complaints from Valgrind. */ 2063 /* The type check avoids NaN penalties and complaints from Valgrind. */
2064#if LJ_64 2064#if LJ_64
2065 emit_u32(as, LJ_TISNUM); 2065 emit_u32(as, LJ_TISNUM);
@@ -2388,7 +2388,8 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
2388 2388
2389static void asm_ahuvload(ASMState *as, IRIns *ir) 2389static void asm_ahuvload(ASMState *as, IRIns *ir)
2390{ 2390{
2391 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); 2391 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
2392 (LJ_DUALNUM && irt_isint(ir->t)));
2392#if LJ_64 2393#if LJ_64
2393 if (irt_islightud(ir->t)) { 2394 if (irt_islightud(ir->t)) {
2394 Reg dest = asm_load_lightud64(as, ir, 1); 2395 Reg dest = asm_load_lightud64(as, ir, 1);
@@ -2409,8 +2410,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
2409 } 2410 }
2410 /* Always do the type check, even if the load result is unused. */ 2411 /* Always do the type check, even if the load result is unused. */
2411 as->mrm.ofs += 4; 2412 as->mrm.ofs += 4;
2412 asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); 2413 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
2413 if (LJ_64 && irt_isnum(ir->t)) { 2414 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
2415 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
2414 emit_u32(as, LJ_TISNUM); 2416 emit_u32(as, LJ_TISNUM);
2415 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); 2417 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
2416 } else { 2418 } else {
@@ -2443,7 +2445,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
2443 if (ra_hasreg(src)) { 2445 if (ra_hasreg(src)) {
2444 emit_mrm(as, XO_MOVto, src, RID_MRM); 2446 emit_mrm(as, XO_MOVto, src, RID_MRM);
2445 } else if (!irt_ispri(irr->t)) { 2447 } else if (!irt_ispri(irr->t)) {
2446 lua_assert(irt_isaddr(ir->t)); 2448 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
2447 emit_i32(as, irr->i); 2449 emit_i32(as, irr->i);
2448 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 2450 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
2449 } 2451 }
@@ -2460,8 +2462,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
2460 Reg base; 2462 Reg base;
2461 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 2463 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
2462 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 2464 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
2463 lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 2465 lua_assert(LJ_DUALNUM ||
2464 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t)) { 2466 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
2467 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
2465 Reg left = ra_scratch(as, RSET_FPR); 2468 Reg left = ra_scratch(as, RSET_FPR);
2466 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 2469 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
2467 base = ra_alloc1(as, REF_BASE, RSET_GPR); 2470 base = ra_alloc1(as, REF_BASE, RSET_GPR);
@@ -2481,12 +2484,14 @@ static void asm_sload(ASMState *as, IRIns *ir)
2481 Reg dest = ra_dest(as, ir, allow); 2484 Reg dest = ra_dest(as, ir, allow);
2482 base = ra_alloc1(as, REF_BASE, RSET_GPR); 2485 base = ra_alloc1(as, REF_BASE, RSET_GPR);
2483 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 2486 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
2484 if ((ir->op2 & IRSLOAD_CONVERT)) 2487 if ((ir->op2 & IRSLOAD_CONVERT)) {
2485 emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); 2488 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
2486 else if (irt_isnum(t)) 2489 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
2490 } else if (irt_isnum(t)) {
2487 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); 2491 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
2488 else 2492 } else {
2489 emit_rmro(as, XO_MOV, dest, base, ofs); 2493 emit_rmro(as, XO_MOV, dest, base, ofs);
2494 }
2490 } else { 2495 } else {
2491 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 2496 if (!(ir->op2 & IRSLOAD_TYPECHECK))
2492 return; /* No type check: avoid base alloc. */ 2497 return; /* No type check: avoid base alloc. */
@@ -2494,8 +2499,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
2494 } 2499 }
2495 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 2500 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
2496 /* Need type check, even if the load result is unused. */ 2501 /* Need type check, even if the load result is unused. */
2497 asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); 2502 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
2498 if (LJ_64 && irt_isnum(t)) { 2503 if (LJ_64 && irt_type(t) >= IRT_NUM) {
2504 lua_assert(irt_isinteger(t) || irt_isnum(t));
2499 emit_u32(as, LJ_TISNUM); 2505 emit_u32(as, LJ_TISNUM);
2500 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); 2506 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
2501 } else { 2507 } else {
@@ -3408,7 +3414,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
3408 Reg src = ra_alloc1(as, ref, RSET_FPR); 3414 Reg src = ra_alloc1(as, ref, RSET_FPR);
3409 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); 3415 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
3410 } else { 3416 } else {
3411 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); 3417 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
3418 (LJ_DUALNUM && irt_isinteger(ir->t)));
3412 if (!irref_isk(ref)) { 3419 if (!irref_isk(ref)) {
3413 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); 3420 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
3414 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); 3421 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index c93cece3..9482cc18 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -185,6 +185,8 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
185 (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 185 (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
186 else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ 186 else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */
187 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); 187 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0);
188 else if (ssize <= 4)
189 sp = lj_opt_narrow_toint(J, sp);
188 xstore: 190 xstore:
189 if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); 191 if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J);
190 if (dp == 0) return sp; 192 if (dp == 0) return sp;
@@ -355,10 +357,10 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
355 CType *s; 357 CType *s;
356 if (LJ_LIKELY(tref_isinteger(sp))) { 358 if (LJ_LIKELY(tref_isinteger(sp))) {
357 sid = CTID_INT32; 359 sid = CTID_INT32;
358 svisnz = (void *)(intptr_t)(numV(sval) != 0); 360 svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval));
359 } else if (tref_isnum(sp)) { 361 } else if (tref_isnum(sp)) {
360 sid = CTID_DOUBLE; 362 sid = CTID_DOUBLE;
361 svisnz = (void *)(intptr_t)(numV(sval) != 0); 363 svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval));
362 } else if (tref_isbool(sp)) { 364 } else if (tref_isbool(sp)) {
363 sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); 365 sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0);
364 sid = CTID_BOOL; 366 sid = CTID_BOOL;
@@ -443,16 +445,16 @@ static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr)
443static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) 445static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
444{ 446{
445 IRIns *ir = IR(tref_ref(tr)); 447 IRIns *ir = IR(tref_ref(tr));
446 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && 448 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && irref_isk(ir->op2) &&
447 ir->o == IR_ADD && irref_isk(ir->op2)) { 449 (ir->o == IR_ADD || ir->o == IR_ADDOV || ir->o == IR_SUBOV)) {
448 IRIns *irk = IR(ir->op2); 450 IRIns *irk = IR(ir->op2);
449 tr = ir->op1; 451 ptrdiff_t k;
450#if LJ_64 452 if (LJ_64 && irk->o == IR_KINT64)
451 if (irk->o == IR_KINT64) 453 k = (ptrdiff_t)ir_kint64(irk)->u64 * sz;
452 *ofsp += (ptrdiff_t)ir_kint64(irk)->u64 * sz;
453 else 454 else
454#endif 455 k = (ptrdiff_t)irk->i * sz;
455 *ofsp += (ptrdiff_t)irk->i * sz; 456 if (ir->o == IR_SUBOV) *ofsp -= k; else *ofsp += k;
457 tr = ir->op1; /* Not a TRef, but the caller doesn't care. */
456 } 458 }
457 return tr; 459 return tr;
458} 460}
@@ -477,16 +479,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
477 479
478 idx = J->base[1]; 480 idx = J->base[1];
479 if (tref_isnumber(idx)) { 481 if (tref_isnumber(idx)) {
480 /* The size of a ptrdiff_t is target-specific. */ 482 idx = lj_opt_narrow_cindex(J, idx);
481#if LJ_64
482 if (tref_isnum(idx))
483 idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY);
484 else
485 idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT);
486#else
487 if (tref_isnum(idx))
488 idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY);
489#endif
490 integer_key: 483 integer_key:
491 if (ctype_ispointer(ct->info)) { 484 if (ctype_ispointer(ct->info)) {
492 CTSize sz; 485 CTSize sz;
@@ -635,7 +628,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
635 TRef sp, dp; 628 TRef sp, dp;
636 TValue tv; 629 TValue tv;
637 TValue *sval = &tv; 630 TValue *sval = &tv;
638 setnumV(&tv, 0); 631 setintV(&tv, 0);
639 if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ 632 if (!gcref(df->name)) continue; /* Ignore unnamed fields. */
640 dc = ctype_rawchild(cts, df); /* Field type. */ 633 dc = ctype_rawchild(cts, df); /* Field type. */
641 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 634 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 631321d9..8077bf84 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -63,9 +63,9 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd);
63/* Get runtime value of int argument. */ 63/* Get runtime value of int argument. */
64static int32_t argv2int(jit_State *J, TValue *o) 64static int32_t argv2int(jit_State *J, TValue *o)
65{ 65{
66 if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) 66 if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o)))
67 lj_trace_err(J, LJ_TRERR_BADTYPE); 67 lj_trace_err(J, LJ_TRERR_BADTYPE);
68 return lj_num2bit(numV(o)); 68 return tvisint(o) ? intV(o) : lj_num2int(numV(o));
69} 69}
70 70
71/* Get runtime value of string argument. */ 71/* Get runtime value of string argument. */
@@ -75,9 +75,12 @@ static GCstr *argv2str(jit_State *J, TValue *o)
75 return strV(o); 75 return strV(o);
76 } else { 76 } else {
77 GCstr *s; 77 GCstr *s;
78 if (!tvisnum(o)) 78 if (!tvisnumber(o))
79 lj_trace_err(J, LJ_TRERR_BADTYPE); 79 lj_trace_err(J, LJ_TRERR_BADTYPE);
80 s = lj_str_fromnum(J->L, &o->n); 80 if (tvisint(o))
81 s = lj_str_fromint(J->L, intV(o));
82 else
83 s = lj_str_fromnum(J->L, &o->n);
81 setstrV(J->L, o, s); 84 setstrV(J->L, o, s);
82 return s; 85 return s;
83 } 86 }
@@ -128,7 +131,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
128{ 131{
129 /* Arguments already specialized. Result is a constant string. Neat, huh? */ 132 /* Arguments already specialized. Result is a constant string. Neat, huh? */
130 uint32_t t; 133 uint32_t t;
131 if (tvisnum(&rd->argv[0])) 134 if (tvisnumber(&rd->argv[0]))
132 t = ~LJ_TNUMX; 135 t = ~LJ_TNUMX;
133 else if (LJ_64 && tvislightud(&rd->argv[0])) 136 else if (LJ_64 && tvislightud(&rd->argv[0]))
134 t = ~LJ_TLIGHTUD; 137 t = ~LJ_TLIGHTUD;
@@ -255,7 +258,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
255 TRef tr = J->base[0]; 258 TRef tr = J->base[0];
256 TRef base = J->base[1]; 259 TRef base = J->base[1];
257 if (tr && base) { 260 if (tr && base) {
258 base = lj_ir_toint(J, base); 261 base = lj_opt_narrow_toint(J, base);
259 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 262 if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
260 recff_nyiu(J); 263 recff_nyiu(J);
261 } 264 }
@@ -332,12 +335,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
332 RecordIndex ix; 335 RecordIndex ix;
333 ix.tab = J->base[0]; 336 ix.tab = J->base[0];
334 if (tref_istab(ix.tab)) { 337 if (tref_istab(ix.tab)) {
335 if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ 338 if (!tvisnumber(&rd->argv[1])) /* No support for string coercion. */
336 lj_trace_err(J, LJ_TRERR_BADTYPE); 339 lj_trace_err(J, LJ_TRERR_BADTYPE);
337 setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); 340 setintV(&ix.keyv, numberVint(&rd->argv[1])+1);
338 settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); 341 settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
339 ix.val = 0; ix.idxchain = 0; 342 ix.val = 0; ix.idxchain = 0;
340 ix.key = lj_ir_toint(J, J->base[1]); 343 ix.key = lj_opt_narrow_toint(J, J->base[1]);
341 J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); 344 J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1));
342 J->base[1] = lj_record_idx(J, &ix); 345 J->base[1] = lj_record_idx(J, &ix);
343 rd->nres = tref_isnil(J->base[1]) ? 0 : 2; 346 rd->nres = tref_isnil(J->base[1]) ? 0 : 2;
@@ -525,26 +528,26 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
525/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 528/* Record unary bit.tobit, bit.bnot, bit.bswap. */
526static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 529static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
527{ 530{
528 TRef tr = lj_ir_tobit(J, J->base[0]); 531 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
529 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 532 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
530} 533}
531 534
532/* Record N-ary bit.band, bit.bor, bit.bxor. */ 535/* Record N-ary bit.band, bit.bor, bit.bxor. */
533static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 536static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
534{ 537{
535 TRef tr = lj_ir_tobit(J, J->base[0]); 538 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
536 uint32_t op = rd->data; 539 uint32_t op = rd->data;
537 BCReg i; 540 BCReg i;
538 for (i = 1; J->base[i] != 0; i++) 541 for (i = 1; J->base[i] != 0; i++)
539 tr = emitir(IRTI(op), tr, lj_ir_tobit(J, J->base[i])); 542 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i]));
540 J->base[0] = tr; 543 J->base[0] = tr;
541} 544}
542 545
543/* Record bit shifts. */ 546/* Record bit shifts. */
544static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 547static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
545{ 548{
546 TRef tr = lj_ir_tobit(J, J->base[0]); 549 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
547 TRef tsh = lj_ir_tobit(J, J->base[1]); 550 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
548 if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 551 if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
549 !tref_isk(tsh)) 552 !tref_isk(tsh))
550 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 553 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
@@ -570,25 +573,25 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
570 int32_t start, end; 573 int32_t start, end;
571 if (rd->data) { /* string.sub(str, start [,end]) */ 574 if (rd->data) { /* string.sub(str, start [,end]) */
572 start = argv2int(J, &rd->argv[1]); 575 start = argv2int(J, &rd->argv[1]);
573 trstart = lj_ir_toint(J, J->base[1]); 576 trstart = lj_opt_narrow_toint(J, J->base[1]);
574 trend = J->base[2]; 577 trend = J->base[2];
575 if (tref_isnil(trend)) { 578 if (tref_isnil(trend)) {
576 trend = lj_ir_kint(J, -1); 579 trend = lj_ir_kint(J, -1);
577 end = -1; 580 end = -1;
578 } else { 581 } else {
579 trend = lj_ir_toint(J, trend); 582 trend = lj_opt_narrow_toint(J, trend);
580 end = argv2int(J, &rd->argv[2]); 583 end = argv2int(J, &rd->argv[2]);
581 } 584 }
582 } else { /* string.byte(str, [,start [,end]]) */ 585 } else { /* string.byte(str, [,start [,end]]) */
583 if (J->base[1]) { 586 if (J->base[1]) {
584 start = argv2int(J, &rd->argv[1]); 587 start = argv2int(J, &rd->argv[1]);
585 trstart = lj_ir_toint(J, J->base[1]); 588 trstart = lj_opt_narrow_toint(J, J->base[1]);
586 trend = J->base[2]; 589 trend = J->base[2];
587 if (tref_isnil(trend)) { 590 if (tref_isnil(trend)) {
588 trend = trstart; 591 trend = trstart;
589 end = start; 592 end = start;
590 } else { 593 } else {
591 trend = lj_ir_toint(J, trend); 594 trend = lj_opt_narrow_toint(J, trend);
592 end = argv2int(J, &rd->argv[2]); 595 end = argv2int(J, &rd->argv[2]);
593 } 596 }
594 } else { 597 } else {
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 1d57938e..721cfd0f 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -426,32 +426,6 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
426 return tr; 426 return tr;
427} 427}
428 428
429/* Convert from number or string to bitop operand (overflow wrapped). */
430TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr)
431{
432 if (!tref_isinteger(tr)) {
433 if (tref_isstr(tr))
434 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
435 else if (!tref_isnum(tr))
436 lj_trace_err(J, LJ_TRERR_BADTYPE);
437 tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
438 }
439 return tr;
440}
441
442/* Convert from number or string to integer (overflow undefined). */
443TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr)
444{
445 if (!tref_isinteger(tr)) {
446 if (tref_isstr(tr))
447 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
448 else if (!tref_isnum(tr))
449 lj_trace_err(J, LJ_TRERR_BADTYPE);
450 tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
451 }
452 return tr;
453}
454
455/* -- Miscellaneous IR ops ------------------------------------------------ */ 429/* -- Miscellaneous IR ops ------------------------------------------------ */
456 430
457/* Evaluate numeric comparison. */ 431/* Evaluate numeric comparison. */
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 060cf562..c46bbbe0 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -124,7 +124,7 @@
124 _(XBAR, S , ___, ___) \ 124 _(XBAR, S , ___, ___) \
125 \ 125 \
126 /* Type conversions. */ \ 126 /* Type conversions. */ \
127 _(CONV, N , ref, lit) \ 127 _(CONV, NW, ref, lit) \
128 _(TOBIT, N , ref, ref) \ 128 _(TOBIT, N , ref, ref) \
129 _(TOSTR, N , ref, ___) \ 129 _(TOSTR, N , ref, ___) \
130 _(STRTO, N , ref, ___) \ 130 _(STRTO, N , ref, ___) \
@@ -345,8 +345,8 @@ typedef enum {
345#define IRM_AW (IRM_A|IRM_W) 345#define IRM_AW (IRM_A|IRM_W)
346#define IRM_LW (IRM_L|IRM_W) 346#define IRM_LW (IRM_L|IRM_W)
347 347
348#define irm_op1(m) (cast(IRMode, (m)&3)) 348#define irm_op1(m) ((IRMode)((m)&3))
349#define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) 349#define irm_op2(m) ((IRMode)(((m)>>2)&3))
350#define irm_iscomm(m) ((m) & IRM_C) 350#define irm_iscomm(m) ((m) & IRM_C)
351#define irm_kind(m) ((m) & IRM_S) 351#define irm_kind(m) ((m) & IRM_S)
352 352
@@ -401,8 +401,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
401#define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) 401#define IRTG(o, t) (IRT((o), IRT_GUARD|(t)))
402#define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) 402#define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT))
403 403
404#define irt_t(t) (cast(IRType, (t).irt)) 404#define irt_t(t) ((IRType)(t).irt)
405#define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) 405#define irt_type(t) ((IRType)((t).irt & IRT_TYPE))
406#define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) 406#define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0)
407#define irt_typerange(t, first, last) \ 407#define irt_typerange(t, first, last) \
408 ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) 408 ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first))
@@ -441,18 +441,30 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
441 441
442static LJ_AINLINE IRType itype2irt(const TValue *tv) 442static LJ_AINLINE IRType itype2irt(const TValue *tv)
443{ 443{
444 if (tvisnum(tv)) 444 if (tvisint(tv))
445 return IRT_INT;
446 else if (tvisnum(tv))
445 return IRT_NUM; 447 return IRT_NUM;
446#if LJ_64 448#if LJ_64
447 else if (tvislightud(tv)) 449 else if (tvislightud(tv))
448 return IRT_LIGHTUD; 450 return IRT_LIGHTUD;
449#endif 451#endif
450 else 452 else
451 return cast(IRType, ~itype(tv)); 453 return (IRType)~itype(tv);
452} 454}
453 455
454#define irt_toitype(t) \ 456static LJ_AINLINE uint32_t irt_toitype_(IRType t)
455 check_exp(!(LJ_64 && irt_islightud((t))), ~(uint32_t)irt_type((t))) 457{
458 lua_assert(!LJ_64 || t != IRT_LIGHTUD);
459 if (LJ_DUALNUM && t > IRT_NUM) {
460 return LJ_TISNUM;
461 } else {
462 lua_assert(t <= IRT_NUM);
463 return ~(uint32_t)t;
464 }
465}
466
467#define irt_toitype(t) irt_toitype_(irt_type((t)))
456 468
457#define irt_isguard(t) ((t).irt & IRT_GUARD) 469#define irt_isguard(t) ((t).irt & IRT_GUARD)
458#define irt_ismarked(t) ((t).irt & IRT_MARK) 470#define irt_ismarked(t) ((t).irt & IRT_MARK)
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index db99c118..1c94e91c 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -84,8 +84,6 @@ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
84/* Convert IR operand types. */ 84/* Convert IR operand types. */
85LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); 85LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr);
86LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); 86LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr);
87LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr);
88LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr);
89 87
90/* Miscellaneous IR ops. */ 88/* Miscellaneous IR ops. */
91LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); 89LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op);
@@ -134,9 +132,17 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J);
134 132
135/* Narrowing. */ 133/* Narrowing. */
136LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); 134LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J);
135LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key);
136LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr);
137LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr);
138#if LJ_HASFFI
139LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key);
140#endif
141LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
142 TValue *vb, TValue *vc, IROp op);
137LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); 143LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc);
138LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); 144LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
139LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); 145LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
140 146
141/* Optimization passes. */ 147/* Optimization passes. */
142LJ_FUNC void lj_opt_dce(jit_State *J); 148LJ_FUNC void lj_opt_dce(jit_State *J);
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 23f11f58..48cee510 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -393,13 +393,27 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
393 lj_err_msg(L, LJ_ERR_FORLIM); 393 lj_err_msg(L, LJ_ERR_FORLIM);
394 if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) 394 if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2))))
395 lj_err_msg(L, LJ_ERR_FORSTEP); 395 lj_err_msg(L, LJ_ERR_FORSTEP);
396#if LJ_DUALNUM 396 if (LJ_DUALNUM) {
397 /* Ensure all slots are integers or all slots are numbers. */ 397 /* Ensure all slots are integers or all slots are numbers. */
398 if (!(tvisint(o) && tvisint(o+1) && tvisint(o+2))) { 398 int32_t k[3];
399 if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); 399 int nint = 0;
400 if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); 400 ptrdiff_t i;
401 if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); 401 for (i = 0; i <= 2; i++) {
402 if (tvisint(o+i)) {
403 k[i] = intV(o+i); nint++;
404 } else {
405 k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i));
406 }
407 }
408 if (nint == 3) { /* Narrow to integers. */
409 setintV(o, k[0]);
410 setintV(o+1, k[1]);
411 setintV(o+2, k[2]);
412 } else if (nint != 0) { /* Widen to numbers. */
413 if (tvisint(o)) setnumV(o, (lua_Number)intV(o));
414 if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1));
415 if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2));
416 }
402 } 417 }
403#endif
404} 418}
405 419
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 687e6c08..32b3dec3 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -29,6 +29,6 @@ LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
29LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 29LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
30LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 30LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
31LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 31LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
32LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base); 32LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
33 33
34#endif 34#endif
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 88289f3e..19a2345f 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -325,8 +325,6 @@ typedef struct GCproto {
325#define proto_kgc(pt, idx) \ 325#define proto_kgc(pt, idx) \
326 check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ 326 check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \
327 gcref(mref((pt)->k, GCRef)[(idx)])) 327 gcref(mref((pt)->k, GCRef)[(idx)]))
328#define proto_knum(pt, idx) \
329 check_exp((uintptr_t)(idx) < (pt)->sizekn, mref((pt)->k, lua_Number)[(idx)])
330#define proto_knumtv(pt, idx) \ 328#define proto_knumtv(pt, idx) \
331 check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) 329 check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)])
332#define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) 330#define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto)))
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 471a4b29..e2d5c517 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -558,7 +558,10 @@ LJFOLD(CONV KINT IRCONV_I64_INT)
558LJFOLD(CONV KINT IRCONV_U64_INT) 558LJFOLD(CONV KINT IRCONV_U64_INT)
559LJFOLDF(kfold_conv_kint_i64) 559LJFOLDF(kfold_conv_kint_i64)
560{ 560{
561 return INT64FOLD((uint64_t)(int64_t)fleft->i); 561 if ((fins->op2 & IRCONV_SEXT))
562 return INT64FOLD((uint64_t)(int64_t)fleft->i);
563 else
564 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
562} 565}
563 566
564LJFOLD(CONV KINT64 IRCONV_NUM_I64) 567LJFOLD(CONV KINT64 IRCONV_NUM_I64)
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 559e579e..6dd06636 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -300,8 +300,11 @@ static void loop_unroll(jit_State *J)
300 } 300 }
301 /* Check all loop-carried dependencies for type instability. */ 301 /* Check all loop-carried dependencies for type instability. */
302 if (!irt_sametype(t, irr->t)) { 302 if (!irt_sametype(t, irr->t)) {
303 if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ 303 if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */
304 subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); 304 subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT));
305 else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */
306 subst[ins] = tref_ref(emitir(IRTGI(IR_CONV), ref,
307 IRCONV_INT_NUM|IRCONV_CHECK));
305 else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) 308 else if (!(irt_isinteger(t) && irt_isinteger(irr->t)))
306 lj_trace_err(J, LJ_TRERR_TYPEINS); 309 lj_trace_err(J, LJ_TRERR_TYPEINS);
307 } 310 }
@@ -355,8 +358,8 @@ int lj_opt_loop(jit_State *J)
355 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 358 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
356 if (LJ_UNLIKELY(errcode)) { 359 if (LJ_UNLIKELY(errcode)) {
357 lua_State *L = J->L; 360 lua_State *L = J->L;
358 if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ 361 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
359 int32_t e = lj_num2int(numV(L->top-1)); 362 int32_t e = numberVint(L->top-1);
360 switch ((TraceError)e) { 363 switch ((TraceError)e) {
361 case LJ_TRERR_TYPEINS: /* Type instability. */ 364 case LJ_TRERR_TYPEINS: /* Type instability. */
362 case LJ_TRERR_GFAIL: /* Guard would always fail. */ 365 case LJ_TRERR_GFAIL: /* Guard would always fail. */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 0a2bb6cd..1727e9b5 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -1,5 +1,6 @@
1/* 1/*
2** NARROW: Narrowing of numbers to integers (double to int32_t). 2** NARROW: Narrowing of numbers to integers (double to int32_t).
3** STRIPOV: Stripping of overflow checks.
3** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h 4** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4*/ 5*/
5 6
@@ -16,6 +17,7 @@
16#include "lj_jit.h" 17#include "lj_jit.h"
17#include "lj_iropt.h" 18#include "lj_iropt.h"
18#include "lj_trace.h" 19#include "lj_trace.h"
20#include "lj_vm.h"
19 21
20/* Rationale for narrowing optimizations: 22/* Rationale for narrowing optimizations:
21** 23**
@@ -57,24 +59,34 @@
57** 59**
58** A better solution is to keep all numbers as FP values and only narrow 60** A better solution is to keep all numbers as FP values and only narrow
59** when it's beneficial to do so. LuaJIT uses predictive narrowing for 61** when it's beneficial to do so. LuaJIT uses predictive narrowing for
60** induction variables and demand-driven narrowing for index expressions 62** induction variables and demand-driven narrowing for index expressions,
61** and bit operations. Additionally it can eliminate or hoists most of the 63** integer arguments and bit operations. Additionally it can eliminate or
62** resulting overflow checks. Regular arithmetic computations are never 64** hoist most of the resulting overflow checks. Regular arithmetic
63** narrowed to integers. 65** computations are never narrowed to integers.
64** 66**
65** The integer type in the IR has convenient wrap-around semantics and 67** The integer type in the IR has convenient wrap-around semantics and
66** ignores overflow. Extra operations have been added for 68** ignores overflow. Extra operations have been added for
67** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. 69** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.
68** Apart from reducing overall complexity of the compiler, this also 70** Apart from reducing overall complexity of the compiler, this also
69** nicely solves the problem where you want to apply algebraic 71** nicely solves the problem where you want to apply algebraic
70** simplifications to ADD, but not to ADDOV. And the assembler can use lea 72** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can
71** instead of an add for integer ADD, but not for ADDOV (lea does not 73** use lea instead of an add for integer ADD, but not for ADDOV (lea does
72** affect the flags, but it helps to avoid register moves). 74** not affect the flags, but it helps to avoid register moves).
73** 75**
74** Note that all of the above has to be reconsidered if LuaJIT is to be 76**
75** ported to architectures with slow FP operations or with no hardware FPU 77** All of the above has to be reconsidered for architectures with slow FP
76** at all. In the latter case an integer-only port may be the best overall 78** operations or without a hardware FPU. The dual-number mode of LuaJIT
77** solution (if this still meets user demands). 79** addresses this issue. Arithmetic operations are performed on integers
80** as far as possible and overflow checks are added as needed.
81**
82** This implies that narrowing for integer arguments and bit operations
83** should also strip overflow checks, e.g. replace ADDOV with ADD. The
84** original overflow guards are weak and can be eliminated by DCE, if
85** there's no other use.
86**
87** A slight twist is that it's usually beneficial to use overflow-checked
88** integer arithmetics if all inputs are already integers. This is the only
89** change that affects the single-number mode, too.
78*/ 90*/
79 91
80/* Some local macros to save typing. Undef'd at the end. */ 92/* Some local macros to save typing. Undef'd at the end. */
@@ -94,10 +106,10 @@
94** already takes care of eliminating simple redundant conversions like 106** already takes care of eliminating simple redundant conversions like
95** CONV.int.num(CONV.num.int(x)) ==> x. 107** CONV.int.num(CONV.num.int(x)) ==> x.
96** 108**
97** But the surrounding code is FP-heavy and all arithmetic operations are 109** But the surrounding code is FP-heavy and arithmetic operations are
98** performed on FP numbers. Consider a common example such as 'x=t[i+1]', 110** performed on FP numbers (for the single-number mode). Consider a common
99** with 'i' already an integer (due to induction variable narrowing). The 111** example such as 'x=t[i+1]', with 'i' already an integer (due to induction
100** index expression would be recorded as 112** variable narrowing). The index expression would be recorded as
101** CONV.int.num(ADD(CONV.num.int(i), 1)) 113** CONV.int.num(ADD(CONV.num.int(i), 1))
102** which is clearly suboptimal. 114** which is clearly suboptimal.
103** 115**
@@ -113,6 +125,9 @@
113** FP ops remain in the IR and are eliminated by DCE since all references to 125** FP ops remain in the IR and are eliminated by DCE since all references to
114** them are gone. 126** them are gone.
115** 127**
128** [In dual-number mode the trace recorder already emits ADDOV etc., but
129** this can be further reduced. See below.]
130**
116** Special care has to be taken to avoid narrowing across an operation 131** Special care has to be taken to avoid narrowing across an operation
117** which is potentially operating on non-integral operands. One obvious 132** which is potentially operating on non-integral operands. One obvious
118** case is when an expression contains a non-integral constant, but ends 133** case is when an expression contains a non-integral constant, but ends
@@ -221,6 +236,26 @@ static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode)
221 bp->mode = mode; 236 bp->mode = mode;
222} 237}
223 238
239/* Backpropagate overflow stripping. */
240static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth)
241{
242 jit_State *J = nc->J;
243 IRIns *ir = IR(ref);
244 if (ir->o == IR_ADDOV || ir->o == IR_SUBOV ||
245 (ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) {
246 BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT);
247 if (bp) {
248 ref = bp->val;
249 } else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
250 narrow_stripov_backprop(nc, ir->op1, depth);
251 narrow_stripov_backprop(nc, ir->op2, depth);
252 *nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref);
253 return;
254 }
255 }
256 *nc->sp++ = NARROWINS(NARROW_REF, ref);
257}
258
224/* Backpropagate narrowing conversion. Return number of needed conversions. */ 259/* Backpropagate narrowing conversion. Return number of needed conversions. */
225static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) 260static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
226{ 261{
@@ -230,24 +265,26 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
230 265
231 /* Check the easy cases first. */ 266 /* Check the easy cases first. */
232 if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { 267 if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {
233 if (nc->t == IRT_I64) 268 if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY)
234 *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ 269 narrow_stripov_backprop(nc, ir->op1, depth+1);
235 else 270 else
236 *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ 271 *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */
272 if (nc->t == IRT_I64)
273 *nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */
237 return 0; 274 return 0;
238 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ 275 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
239 lua_Number n = ir_knum(ir)->n; 276 lua_Number n = ir_knum(ir)->n;
240 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { 277 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
241 /* Allows a wider range of constants. */ 278 /* Allows a wider range of constants. */
242 int64_t k64 = (int64_t)n; 279 int64_t k64 = (int64_t)n;
243 if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ 280 if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
244 *nc->sp++ = NARROWINS(NARROW_INT, 0); 281 *nc->sp++ = NARROWINS(NARROW_INT, 0);
245 *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ 282 *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
246 return 0; 283 return 0;
247 } 284 }
248 } else { 285 } else {
249 int32_t k = lj_num2int(n); 286 int32_t k = lj_num2int(n);
250 if (n == cast_num(k)) { /* Only if constant is really an integer. */ 287 if (n == (lua_Number)k) { /* Only if constant is really an integer. */
251 *nc->sp++ = NARROWINS(NARROW_INT, 0); 288 *nc->sp++ = NARROWINS(NARROW_INT, 0);
252 *nc->sp++ = (NarrowIns)k; 289 *nc->sp++ = (NarrowIns)k;
253 return 0; 290 return 0;
@@ -287,7 +324,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
287 mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; 324 mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX;
288 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); 325 bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
289 if (bp) { 326 if (bp) {
290 *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); 327 *nc->sp++ = NARROWINS(NARROW_REF, bp->val);
328 *nc->sp++ = NARROWINS(NARROW_SEXT, 0);
291 return 0; 329 return 0;
292 } 330 }
293 } 331 }
@@ -326,8 +364,9 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
326 } else if (op == NARROW_CONV) { 364 } else if (op == NARROW_CONV) {
327 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ 365 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
328 } else if (op == NARROW_SEXT) { 366 } else if (op == NARROW_SEXT) {
329 *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, 367 lua_assert(sp >= nc->stack+1);
330 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); 368 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
369 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
331 } else if (op == NARROW_INT) { 370 } else if (op == NARROW_INT) {
332 lua_assert(next < last); 371 lua_assert(next < last);
333 *sp++ = nc->t == IRT_I64 ? 372 *sp++ = nc->t == IRT_I64 ?
@@ -340,7 +379,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
340 /* Omit some overflow checks for array indexing. See comments above. */ 379 /* Omit some overflow checks for array indexing. See comments above. */
341 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { 380 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
342 if (next == last && irref_isk(narrow_ref(sp[0])) && 381 if (next == last && irref_isk(narrow_ref(sp[0])) &&
343 (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) 382 (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u)
344 guardot = 0; 383 guardot = 0;
345 else /* Otherwise cache a stronger check. */ 384 else /* Otherwise cache a stronger check. */
346 mode += IRCONV_CHECK-IRCONV_INDEX; 385 mode += IRCONV_CHECK-IRCONV_INDEX;
@@ -377,12 +416,123 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
377 return NEXTFOLD; 416 return NEXTFOLD;
378} 417}
379 418
419/* -- Narrowing of implicit conversions ----------------------------------- */
420
421/* Recursively strip overflow checks. */
422static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
423{
424 IRRef ref = tref_ref(tr);
425 IRIns *ir = IR(ref);
426 int op = ir->o;
427 if (op >= IR_ADDOV && op <= lastop) {
428 BPropEntry *bp = narrow_bpc_get(J, ref, mode);
429 if (bp) {
430 return TREF(bp->val, irt_t(IR(bp->val)->t));
431 } else {
432 IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */
433 op1 = narrow_stripov(J, op1, lastop, mode);
434 op2 = narrow_stripov(J, op2, lastop, mode);
435 tr = emitir(IRT(op - IR_ADDOV + IR_ADD,
436 ((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2);
437 narrow_bpc_set(J, ref, tref_ref(tr), mode);
438 }
439 } else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) {
440 tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode);
441 }
442 return tr;
443}
444
445/* Narrow array index. */
446TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
447{
448 IRIns *ir;
449 lua_assert(tref_isnumber(tr));
450 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
451 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
452 /* Omit some overflow checks for array indexing. See comments above. */
453 ir = IR(tref_ref(tr));
454 if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) &&
455 (uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u)
456 return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2);
457 return tr;
458}
459
460/* Narrow conversion to integer operand (overflow undefined). */
461TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr)
462{
463 if (tref_isstr(tr))
464 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
465 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
466 return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
467 if (!tref_isinteger(tr))
468 lj_trace_err(J, LJ_TRERR_BADTYPE);
469 /*
470 ** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV.
471 ** Use IRCONV_TOBIT for the cache entries, since the semantics are the same.
472 */
473 return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
474}
475
476/* Narrow conversion to bitop operand (overflow wrapped). */
477TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
478{
479 if (tref_isstr(tr))
480 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
481 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
482 return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
483 if (!tref_isinteger(tr))
484 lj_trace_err(J, LJ_TRERR_BADTYPE);
485 /*
486 ** Wrapped overflow semantics allow stripping of ADDOV and SUBOV.
487 ** MULOV cannot be stripped due to precision widening.
488 */
489 return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
490}
491
492#if LJ_HASFFI
493/* Narrow C array index (overflow undefined). */
494TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
495{
496 lua_assert(tref_isnumber(tr));
497 if (tref_isnum(tr))
498 return emitir(IRTI(IR_CONV), tr,
499 (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
500 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
501 return narrow_stripov(J, tr, IR_MULOV,
502 LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
503 ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT));
504}
505#endif
506
380/* -- Narrowing of arithmetic operators ----------------------------------- */ 507/* -- Narrowing of arithmetic operators ----------------------------------- */
381 508
382/* Check whether a number fits into an int32_t (-0 is ok, too). */ 509/* Check whether a number fits into an int32_t (-0 is ok, too). */
383static int numisint(lua_Number n) 510static int numisint(lua_Number n)
384{ 511{
385 return (n == cast_num(lj_num2int(n))); 512 return (n == (lua_Number)lj_num2int(n));
513}
514
515/* Narrowing of arithmetic operations. */
516TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
517 TValue *vb, TValue *vc, IROp op)
518{
519 if (tref_isstr(rb)) {
520 rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
521 lj_str_tonum(strV(vb), vb);
522 }
523 if (tref_isstr(rc)) {
524 rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
525 lj_str_tonum(strV(vc), vc);
526 }
527 /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
528 if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
529 tref_isinteger(rb) && tref_isinteger(rc) &&
530 numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
531 (int)op - (int)IR_ADD)))
532 return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
533 if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
534 if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
535 return emitir(IRTN(op), rb, rc);
386} 536}
387 537
388/* Narrowing of modulo operator. */ 538/* Narrowing of modulo operator. */
@@ -409,16 +559,15 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
409/* Narrowing of power operator or math.pow. */ 559/* Narrowing of power operator or math.pow. */
410TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) 560TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
411{ 561{
412 lua_Number n;
413 if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) 562 if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
414 lj_trace_err(J, LJ_TRERR_BADTYPE); 563 lj_trace_err(J, LJ_TRERR_BADTYPE);
415 n = numV(vc);
416 /* Narrowing must be unconditional to preserve (-x)^i semantics. */ 564 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
417 if (numisint(n)) { 565 if (tvisint(vc) || numisint(numV(vc))) {
418 int checkrange = 0; 566 int checkrange = 0;
419 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ 567 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
420 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { 568 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
421 if (!(n >= -65536.0 && n <= 65536.0)) goto split_pow; 569 int32_t k = numberVint(vc);
570 if (!(k >= -65536 && k <= 65536)) goto split_pow;
422 checkrange = 1; 571 checkrange = 1;
423 } 572 }
424 if (!tref_isinteger(rc)) { 573 if (!tref_isinteger(rc)) {
@@ -448,20 +597,28 @@ split_pow:
448 597
449/* -- Predictive narrowing of induction variables ------------------------- */ 598/* -- Predictive narrowing of induction variables ------------------------- */
450 599
600/* Narrow a single runtime value. */
601static int narrow_forl(jit_State *J, cTValue *o)
602{
603 if (tvisint(o)) return 1;
604 if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));
605 return 0;
606}
607
451/* Narrow the FORL index type by looking at the runtime values. */ 608/* Narrow the FORL index type by looking at the runtime values. */
452IRType lj_opt_narrow_forl(cTValue *forbase) 609IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
453{ 610{
454 lua_assert(tvisnum(&forbase[FORL_IDX]) && 611 lua_assert(tvisnumber(&tv[FORL_IDX]) &&
455 tvisnum(&forbase[FORL_STOP]) && 612 tvisnumber(&tv[FORL_STOP]) &&
456 tvisnum(&forbase[FORL_STEP])); 613 tvisnumber(&tv[FORL_STEP]));
457 /* Narrow only if the runtime values of start/stop/step are all integers. */ 614 /* Narrow only if the runtime values of start/stop/step are all integers. */
458 if (numisint(numV(&forbase[FORL_IDX])) && 615 if (narrow_forl(J, &tv[FORL_IDX]) &&
459 numisint(numV(&forbase[FORL_STOP])) && 616 narrow_forl(J, &tv[FORL_STOP]) &&
460 numisint(numV(&forbase[FORL_STEP]))) { 617 narrow_forl(J, &tv[FORL_STEP])) {
461 /* And if the loop index can't possibly overflow. */ 618 /* And if the loop index can't possibly overflow. */
462 lua_Number step = numV(&forbase[FORL_STEP]); 619 lua_Number step = numberVnum(&tv[FORL_STEP]);
463 lua_Number sum = numV(&forbase[FORL_STOP]) + step; 620 lua_Number sum = numberVnum(&tv[FORL_STOP]) + step;
464 if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) 621 if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0))
465 return IRT_INT; 622 return IRT_INT;
466 } 623 }
467 return IRT_NUM; 624 return IRT_NUM;
diff --git a/src/lj_record.c b/src/lj_record.c
index 2bfd2608..613e458e 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -13,6 +13,7 @@
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_str.h"
15#include "lj_tab.h" 15#include "lj_tab.h"
16#include "lj_meta.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
17#include "lj_bc.h" 18#include "lj_bc.h"
18#include "lj_ff.h" 19#include "lj_ff.h"
@@ -102,7 +103,7 @@ static void rec_check_slots(jit_State *J)
102 lua_assert((J->slot[s+1] & TREF_FRAME)); 103 lua_assert((J->slot[s+1] & TREF_FRAME));
103 depth++; 104 depth++;
104 } else { 105 } else {
105 if (tvisnum(tv)) 106 if (tvisnumber(tv))
106 lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ 107 lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */
107 else 108 else
108 lua_assert(itype2irt(tv) == tref_type(tr)); 109 lua_assert(itype2irt(tv) == tref_type(tr));
@@ -197,6 +198,7 @@ typedef enum {
197static void canonicalize_slots(jit_State *J) 198static void canonicalize_slots(jit_State *J)
198{ 199{
199 BCReg s; 200 BCReg s;
201 if (LJ_DUALNUM) return;
200 for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { 202 for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
201 TRef tr = J->slot[s]; 203 TRef tr = J->slot[s];
202 if (tref_isinteger(tr)) { 204 if (tref_isinteger(tr)) {
@@ -254,16 +256,16 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
254 } 256 }
255 if (op == BC_KSHORT) { 257 if (op == BC_KSHORT) {
256 int32_t k = (int32_t)(int16_t)bc_d(ins); 258 int32_t k = (int32_t)(int16_t)bc_d(ins);
257 return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k)); 259 return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k);
258 } else { 260 } else {
259 lua_Number n = proto_knum(J->pt, bc_d(ins)); 261 cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
260 if (t == IRT_INT) { 262 if (t == IRT_INT) {
261 int32_t k = lj_num2int(n); 263 int32_t k = numberVint(tv);
262 if (n == cast_num(k)) /* -0 is ok here. */ 264 if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */
263 return lj_ir_kint(J, k); 265 return lj_ir_kint(J, k);
264 return 0; /* Type mismatch. */ 266 return 0; /* Type mismatch. */
265 } else { 267 } else {
266 return lj_ir_knum(J, n); 268 return lj_ir_knum(J, numberVnum(tv));
267 } 269 }
268 } 270 }
269 } 271 }
@@ -273,41 +275,47 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
273 return 0; /* No assignment to this slot found? */ 275 return 0; /* No assignment to this slot found? */
274} 276}
275 277
278/* Load and optionally convert a FORI argument from a slot. */
279static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode)
280{
281 int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0;
282 return sloadt(J, (int32_t)slot,
283 t + (((mode & IRSLOAD_TYPECHECK) ||
284 (conv && t == IRT_INT && !(mode >> 16))) ?
285 IRT_GUARD : 0),
286 mode + conv);
287}
288
276/* Peek before FORI to find a const initializer. Otherwise load from slot. */ 289/* Peek before FORI to find a const initializer. Otherwise load from slot. */
277static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t) 290static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot,
291 IRType t, int mode)
278{ 292{
279 TRef tr = J->base[slot]; 293 TRef tr = J->base[slot];
280 if (!tr) { 294 if (!tr) {
281 tr = find_kinit(J, fori, slot, t); 295 tr = find_kinit(J, fori, slot, t);
282 if (!tr) 296 if (!tr)
283 tr = sloadt(J, (int32_t)slot, 297 tr = fori_load(J, slot, t, mode);
284 t == IRT_INT ? (IRT_INT|IRT_GUARD) : t,
285 t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_READONLY|IRSLOAD_INHERIT) :
286 (IRSLOAD_READONLY|IRSLOAD_INHERIT));
287 } 298 }
288 return tr; 299 return tr;
289} 300}
290 301
291/* In-place coercion of FORI arguments. */ 302/* Return the direction of the FOR loop iterator.
292static lua_Number for_coerce(jit_State *J, TValue *o) 303** It's important to exactly reproduce the semantics of the interpreter.
304*/
305static int rec_for_direction(cTValue *o)
293{ 306{
294 if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) 307 return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0;
295 lj_trace_err(J, LJ_TRERR_BADTYPE);
296 return numV(o);
297} 308}
298 309
299/* Simulate the runtime behavior of the FOR loop iterator. 310/* Simulate the runtime behavior of the FOR loop iterator. */
300** It's important to exactly reproduce the semantics of the interpreter. 311static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl)
301*/
302static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl)
303{ 312{
304 TValue *forbase = &J->L->base[ra]; 313 lua_Number stopv = numberVnum(&o[FORL_STOP]);
305 lua_Number stopv = for_coerce(J, &forbase[FORL_STOP]); 314 lua_Number idxv = numberVnum(&o[FORL_IDX]);
306 lua_Number idxv = for_coerce(J, &forbase[FORL_IDX]); 315 lua_Number stepv = numberVnum(&o[FORL_STEP]);
307 lua_Number stepv = for_coerce(J, &forbase[FORL_STEP]);
308 if (isforl) 316 if (isforl)
309 idxv += stepv; 317 idxv += stepv;
310 if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { 318 if (rec_for_direction(&o[FORL_STEP])) {
311 if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } 319 if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; }
312 *op = IR_GT; return LOOPEV_LEAVE; 320 *op = IR_GT; return LOOPEV_LEAVE;
313 } else { 321 } else {
@@ -316,44 +324,123 @@ static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl)
316 } 324 }
317} 325}
318 326
327/* Record checks for FOR loop overflow and step direction. */
328static void rec_for_check(jit_State *J, IRType t, int dir, TRef stop, TRef step)
329{
330 if (!tref_isk(step)) {
331 /* Non-constant step: need a guard for the direction. */
332 TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
333 emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
334 /* Add hoistable overflow checks for a narrowed FORL index. */
335 if (t == IRT_INT) {
336 if (tref_isk(stop)) {
337 /* Constant stop: optimize check away or to a range check for step. */
338 int32_t k = IR(tref_ref(stop))->i;
339 if (dir) {
340 if (k > 0)
341 emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
342 } else {
343 if (k < 0)
344 emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
345 }
346 } else {
347 /* Stop+step variable: need full overflow check. */
348 TRef tr = emitir(IRTGI(IR_ADDOV), step, stop);
349 emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */
350 }
351 }
352 } else if (t == IRT_INT && !tref_isk(stop)) {
353 /* Constant step: optimize overflow check to a range check for stop. */
354 int32_t k = IR(tref_ref(step))->i;
355 k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
356 emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
357 }
358}
359
360/* Record a FORL instruction. */
361static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev,
362 int init)
363{
364 BCReg ra = bc_a(*fori);
365 cTValue *tv = &J->L->base[ra];
366 TRef idx = J->base[ra+FORL_IDX];
367 IRType t = idx ? tref_type(idx) :
368 (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM;
369 int mode = IRSLOAD_INHERIT +
370 ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0);
371 TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
372 TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
373 int tc, dir = rec_for_direction(&tv[FORL_STEP]);
374 lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
375 scev->t.irt = t;
376 scev->dir = dir;
377 scev->stop = tref_ref(stop);
378 scev->step = tref_ref(step);
379 if (init)
380 rec_for_check(J, t, dir, stop, step);
381 scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT));
382 tc = (LJ_DUALNUM &&
383 !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step))) ?
384 IRSLOAD_TYPECHECK : 0;
385 if (tc) {
386 J->base[ra+FORL_STOP] = stop;
387 J->base[ra+FORL_STEP] = step;
388 }
389 if (!idx)
390 idx = fori_load(J, ra+FORL_IDX, t,
391 IRSLOAD_INHERIT + tc + (J->scev.start << 16));
392 if (!init)
393 J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
394 J->base[ra+FORL_EXT] = idx;
395 scev->idx = tref_ref(idx);
396 J->maxslot = ra+FORL_EXT+1;
397}
398
319/* Record FORL/JFORL or FORI/JFORI. */ 399/* Record FORL/JFORL or FORI/JFORI. */
320static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) 400static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
321{ 401{
322 BCReg ra = bc_a(*fori); 402 BCReg ra = bc_a(*fori);
323 IROp op; 403 TValue *tv = &J->L->base[ra];
324 LoopEvent ev = for_iter(J, &op, ra, isforl);
325 TRef *tr = &J->base[ra]; 404 TRef *tr = &J->base[ra];
326 TRef idx, stop; 405 IROp op;
406 LoopEvent ev;
407 TRef stop;
327 IRType t; 408 IRType t;
328 if (isforl) { /* Handle FORL/JFORL opcodes. */ 409 if (isforl) { /* Handle FORL/JFORL opcodes. */
329 TRef step; 410 TRef idx = tr[FORL_IDX];
330 idx = tr[FORL_IDX];
331 if (tref_ref(idx) == J->scev.idx) { 411 if (tref_ref(idx) == J->scev.idx) {
332 t = J->scev.t.irt; 412 t = J->scev.t.irt;
333 stop = J->scev.stop; 413 stop = J->scev.stop;
334 step = J->scev.step; 414 idx = emitir(IRT(IR_ADD, t), idx, J->scev.step);
415 tr[FORL_EXT] = tr[FORL_IDX] = idx;
335 } else { 416 } else {
336 if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); 417 ScEvEntry scev;
337 t = tref_type(idx); 418 rec_for_loop(J, fori, &scev, 0);
338 stop = fori_arg(J, fori, ra+FORL_STOP, t); 419 t = scev.t.irt;
339 step = fori_arg(J, fori, ra+FORL_STEP, t); 420 stop = scev.stop;
340 } 421 }
341 tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
342 } else { /* Handle FORI/JFORI opcodes. */ 422 } else { /* Handle FORI/JFORI opcodes. */
343 BCReg i; 423 BCReg i;
344 t = IRT_NUM; 424 lj_meta_for(J->L, tv);
425 t = lj_opt_narrow_forl(J, tv);
345 for (i = FORL_IDX; i <= FORL_STEP; i++) { 426 for (i = FORL_IDX; i <= FORL_STEP; i++) {
346 lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ 427 lua_assert(tref_isnumber_str(tr[i]));
347 tr[i] = lj_ir_tonum(J, J->base[ra+i]); 428 if (tref_isstr(tr[i]))
429 tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
430 if (t == IRT_INT) {
431 if (!tref_isinteger(tr[i]))
432 tr[i] = emitir(IRTI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK);
433 } else {
434 if (!tref_isnum(tr[i]))
435 tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT);
436 }
348 } 437 }
349 idx = tr[FORL_IDX]; 438 tr[FORL_EXT] = tr[FORL_IDX];
350 stop = tr[FORL_STOP]; 439 stop = tr[FORL_STOP];
351 if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ 440 rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), stop, tr[FORL_STEP]);
352 emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM),
353 tr[FORL_STEP], lj_ir_knum_zero(J));
354 } 441 }
355 442
356 tr[FORL_EXT] = idx; 443 ev = rec_for_iter(&op, tv, isforl);
357 if (ev == LOOPEV_LEAVE) { 444 if (ev == LOOPEV_LEAVE) {
358 J->maxslot = ra+FORL_EXT+1; 445 J->maxslot = ra+FORL_EXT+1;
359 J->pc = fori+1; 446 J->pc = fori+1;
@@ -363,7 +450,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
363 } 450 }
364 lj_snap_add(J); 451 lj_snap_add(J);
365 452
366 emitir(IRTG(op, t), idx, stop); 453 emitir(IRTG(op, t), tr[FORL_IDX], stop);
367 454
368 if (ev == LOOPEV_LEAVE) { 455 if (ev == LOOPEV_LEAVE) {
369 J->maxslot = ra; 456 J->maxslot = ra;
@@ -870,7 +957,7 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
870 if (ref == J->scev.idx) { 957 if (ref == J->scev.idx) {
871 int32_t stop; 958 int32_t stop;
872 lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); 959 lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD);
873 stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); 960 stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]);
874 /* Runtime value for stop of loop is within bounds? */ 961 /* Runtime value for stop of loop is within bounds? */
875 if ((int64_t)stop + ofs < (int64_t)asize) { 962 if ((int64_t)stop + ofs < (int64_t)asize) {
876 /* Emit invariant bounds check for stop. */ 963 /* Emit invariant bounds check for stop. */
@@ -897,15 +984,12 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
897 /* Integer keys are looked up in the array part first. */ 984 /* Integer keys are looked up in the array part first. */
898 key = ix->key; 985 key = ix->key;
899 if (tref_isnumber(key)) { 986 if (tref_isnumber(key)) {
900 lua_Number n = numV(&ix->keyv); 987 int32_t k = numberVint(&ix->keyv);
901 int32_t k = lj_num2int(n); 988 if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k)
902 lua_assert(tvisnum(&ix->keyv)); 989 k = LJ_MAX_ASIZE;
903 /* Potential array key? */ 990 if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */
904 if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { 991 TRef ikey = lj_opt_narrow_index(J, key);
905 TRef asizeref, ikey = key; 992 TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
906 if (!tref_isinteger(ikey))
907 ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX);
908 asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
909 if ((MSize)k < t->asize) { /* Currently an array key? */ 993 if ((MSize)k < t->asize) { /* Currently an array key? */
910 TRef arrayref; 994 TRef arrayref;
911 rec_idx_abc(J, asizeref, ikey, t->asize); 995 rec_idx_abc(J, asizeref, ikey, t->asize);
@@ -1081,7 +1165,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1081 } else { 1165 } else {
1082 keybarrier = 0; /* Previous non-nil value kept the key alive. */ 1166 keybarrier = 0; /* Previous non-nil value kept the key alive. */
1083 } 1167 }
1084 if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ 1168 /* Convert int to number before storing. */
1169 if (!LJ_DUALNUM && tref_isinteger(ix->val))
1085 ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); 1170 ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT);
1086 emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); 1171 emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val);
1087 if (keybarrier || tref_isgcv(ix->val)) 1172 if (keybarrier || tref_isgcv(ix->val))
@@ -1135,7 +1220,8 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
1135 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ 1220 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */
1136 return res; 1221 return res;
1137 } else { /* Upvalue store. */ 1222 } else { /* Upvalue store. */
1138 if (tref_isinteger(val)) /* Convert int to number before storing. */ 1223 /* Convert int to number before storing. */
1224 if (!LJ_DUALNUM && tref_isinteger(val))
1139 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); 1225 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
1140 emitir(IRT(IR_USTORE, tref_type(val)), uref, val); 1226 emitir(IRT(IR_USTORE, tref_type(val)), uref, val);
1141 if (needbarrier && tref_isgcv(val)) 1227 if (needbarrier && tref_isgcv(val))
@@ -1455,16 +1541,15 @@ void lj_record_ins(jit_State *J)
1455 case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ 1541 case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */
1456 case BCMvar: 1542 case BCMvar:
1457 copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; 1543 copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break;
1458 case BCMnum: { lua_Number n = proto_knum(J->pt, rb);
1459 setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break;
1460 default: break; /* Handled later. */ 1544 default: break; /* Handled later. */
1461 } 1545 }
1462 switch (bcmode_c(op)) { 1546 switch (bcmode_c(op)) {
1463 case BCMvar: 1547 case BCMvar:
1464 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; 1548 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1465 case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; 1549 case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
1466 case BCMnum: { lua_Number n = proto_knum(J->pt, rc); 1550 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
1467 setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; 1551 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
1552 lj_ir_knumint(J, numV(tv)); } break;
1468 case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); 1553 case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc));
1469 setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; 1554 setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
1470 default: break; /* Handled later. */ 1555 default: break; /* Handled later. */
@@ -1502,9 +1587,11 @@ void lj_record_ins(jit_State *J)
1502 irop = (int)op - (int)BC_ISLT + (int)IR_LT; 1587 irop = (int)op - (int)BC_ISLT + (int)IR_LT;
1503 if (ta == IRT_NUM) { 1588 if (ta == IRT_NUM) {
1504 if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ 1589 if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */
1505 if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; 1590 if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop))
1591 irop ^= 5;
1506 } else if (ta == IRT_INT) { 1592 } else if (ta == IRT_INT) {
1507 if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; 1593 if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop))
1594 irop ^= 1;
1508 } else if (ta == IRT_STR) { 1595 } else if (ta == IRT_STR) {
1509 if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; 1596 if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
1510 ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); 1597 ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
@@ -1599,13 +1686,11 @@ void lj_record_ins(jit_State *J)
1599 case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: 1686 case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN:
1600 case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { 1687 case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: {
1601 MMS mm = bcmode_mm(op); 1688 MMS mm = bcmode_mm(op);
1602 if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { 1689 if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1603 rb = lj_ir_tonum(J, rb); 1690 rc = lj_opt_narrow_arith(J, rb, rc, &ix.tabv, &ix.keyv,
1604 rc = lj_ir_tonum(J, rc); 1691 (int)mm - (int)MM_add + (int)IR_ADD);
1605 rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); 1692 else
1606 } else {
1607 rc = rec_mm_arith(J, &ix, mm); 1693 rc = rec_mm_arith(J, &ix, mm);
1608 }
1609 break; 1694 break;
1610 } 1695 }
1611 1696
@@ -1827,59 +1912,6 @@ void lj_record_ins(jit_State *J)
1827 1912
1828/* -- Recording setup ----------------------------------------------------- */ 1913/* -- Recording setup ----------------------------------------------------- */
1829 1914
1830/* Setup recording for a FORL loop. */
1831static void rec_setup_forl(jit_State *J, const BCIns *fori)
1832{
1833 BCReg ra = bc_a(*fori);
1834 cTValue *forbase = &J->L->base[ra];
1835 IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase)
1836 : IRT_NUM;
1837 TRef start;
1838 TRef stop = fori_arg(J, fori, ra+FORL_STOP, t);
1839 TRef step = fori_arg(J, fori, ra+FORL_STEP, t);
1840 int dir = (0 <= numV(&forbase[FORL_STEP]));
1841 lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
1842 J->scev.t.irt = t;
1843 J->scev.dir = dir;
1844 J->scev.stop = tref_ref(stop);
1845 J->scev.step = tref_ref(step);
1846 if (!tref_isk(step)) {
1847 /* Non-constant step: need a guard for the direction. */
1848 TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
1849 emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
1850 /* Add hoistable overflow checks for a narrowed FORL index. */
1851 if (t == IRT_INT) {
1852 if (tref_isk(stop)) {
1853 /* Constant stop: optimize check away or to a range check for step. */
1854 int32_t k = IR(tref_ref(stop))->i;
1855 if (dir) {
1856 if (k > 0)
1857 emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
1858 } else {
1859 if (k < 0)
1860 emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
1861 }
1862 } else {
1863 /* Stop+step variable: need full overflow check. */
1864 TRef tr = emitir(IRTGI(IR_ADDOV), step, stop);
1865 emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */
1866 }
1867 }
1868 } else if (t == IRT_INT && !tref_isk(stop)) {
1869 /* Constant step: optimize overflow check to a range check for stop. */
1870 int32_t k = IR(tref_ref(step))->i;
1871 k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
1872 emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
1873 }
1874 J->scev.start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT));
1875 start = sloadt(J, (int32_t)(ra+FORL_IDX),
1876 (t == IRT_INT && !J->scev.start) ? (IRT_INT|IRT_GUARD) : t,
1877 t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_INHERIT) : IRSLOAD_INHERIT);
1878 J->base[ra+FORL_EXT] = start;
1879 J->scev.idx = tref_ref(start);
1880 J->maxslot = ra+FORL_EXT+1;
1881}
1882
1883/* Setup recording for a root trace started by a hot loop. */ 1915/* Setup recording for a root trace started by a hot loop. */
1884static const BCIns *rec_setup_root(jit_State *J) 1916static const BCIns *rec_setup_root(jit_State *J)
1885{ 1917{
@@ -2033,7 +2065,7 @@ void lj_record_setup(jit_State *J)
2033 if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && 2065 if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI &&
2034 bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { 2066 bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
2035 lj_snap_add(J); 2067 lj_snap_add(J);
2036 rec_setup_forl(J, J->pc-1); 2068 rec_for_loop(J, J->pc-1, &J->scev, 1);
2037 goto sidecheck; 2069 goto sidecheck;
2038 } 2070 }
2039 } else { 2071 } else {
@@ -2054,7 +2086,7 @@ void lj_record_setup(jit_State *J)
2054 */ 2086 */
2055 lj_snap_add(J); 2087 lj_snap_add(J);
2056 if (bc_op(J->cur.startins) == BC_FORL) 2088 if (bc_op(J->cur.startins) == BC_FORL)
2057 rec_setup_forl(J, J->pc-1); 2089 rec_for_loop(J, J->pc-1, &J->scev, 1);
2058 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2090 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2059 lj_trace_err(J, LJ_TRERR_STACKOV); 2091 lj_trace_err(J, LJ_TRERR_STACKOV);
2060 } 2092 }
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 59435b20..70628a0e 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -68,7 +68,8 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
68 if (!(ir->op2 & IRSLOAD_INHERIT)) 68 if (!(ir->op2 & IRSLOAD_INHERIT))
69 continue; 69 continue;
70 /* No need to restore readonly slots and unmodified non-parent slots. */ 70 /* No need to restore readonly slots and unmodified non-parent slots. */
71 if ((ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 71 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
72 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
72 sn |= SNAP_NORESTORE; 73 sn |= SNAP_NORESTORE;
73 } 74 }
74 map[n++] = sn; 75 map[n++] = sn;
diff --git a/src/lj_trace.c b/src/lj_trace.c
index b67e8f75..69124542 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -495,8 +495,8 @@ static int trace_abort(jit_State *J)
495 495
496 J->postproc = LJ_POST_NONE; 496 J->postproc = LJ_POST_NONE;
497 lj_mcode_abort(J); 497 lj_mcode_abort(J);
498 if (tvisnum(L->top-1)) 498 if (tvisnumber(L->top-1))
499 e = (TraceError)lj_num2int(numV(L->top-1)); 499 e = (TraceError)numberVint(L->top-1);
500 if (e == LJ_TRERR_MCODELM) { 500 if (e == LJ_TRERR_MCODELM) {
501 J->state = LJ_TRACE_ASM; 501 J->state = LJ_TRACE_ASM;
502 return 1; /* Retry ASM with new MCode area. */ 502 return 1; /* Retry ASM with new MCode area. */
@@ -703,8 +703,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
703 setintV(L->top++, J->exitno); 703 setintV(L->top++, J->exitno);
704 setintV(L->top++, RID_NUM_GPR); 704 setintV(L->top++, RID_NUM_GPR);
705 setintV(L->top++, RID_NUM_FPR); 705 setintV(L->top++, RID_NUM_FPR);
706 for (i = 0; i < RID_NUM_GPR; i++) 706 for (i = 0; i < RID_NUM_GPR; i++) {
707 setnumV(L->top++, cast_num(ex->gpr[i])); 707 if (sizeof(ex->gpr[i]) == sizeof(int32_t))
708 setintV(L->top++, (int32_t)ex->gpr[i]);
709 else
710 setnumV(L->top++, (lua_Number)ex->gpr[i]);
711 }
708 for (i = 0; i < RID_NUM_FPR; i++) { 712 for (i = 0; i < RID_NUM_FPR; i++) {
709 setnumV(L->top, ex->fpr[i]); 713 setnumV(L->top, ex->fpr[i]);
710 if (LJ_UNLIKELY(tvisnan(L->top))) 714 if (LJ_UNLIKELY(tvisnan(L->top)))