diff options
author | Mike Pall <mike> | 2011-03-10 01:57:24 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2011-03-10 01:57:24 +0100 |
commit | bfce3c1127fd57fe0c935c92bcf45b4737041edd (patch) | |
tree | 2bd2d9e08c70608de63c7a69df7f00cfab07f6be /src | |
parent | 3f26e3a89d54dfb761ca02fc89aaf15326f5f514 (diff) | |
download | luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.tar.gz luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.tar.bz2 luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.zip |
DUALNUM: Handle integer type in JIT compiler.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.dep | 9 | ||||
-rw-r--r-- | src/lj_asm.c | 35 | ||||
-rw-r--r-- | src/lj_crecord.c | 35 | ||||
-rw-r--r-- | src/lj_ffrecord.c | 39 | ||||
-rw-r--r-- | src/lj_ir.c | 26 | ||||
-rw-r--r-- | src/lj_ir.h | 30 | ||||
-rw-r--r-- | src/lj_iropt.h | 12 | ||||
-rw-r--r-- | src/lj_meta.c | 28 | ||||
-rw-r--r-- | src/lj_meta.h | 2 | ||||
-rw-r--r-- | src/lj_obj.h | 2 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 5 | ||||
-rw-r--r-- | src/lj_opt_loop.c | 9 | ||||
-rw-r--r-- | src/lj_opt_narrow.c | 233 | ||||
-rw-r--r-- | src/lj_record.c | 280 | ||||
-rw-r--r-- | src/lj_snap.c | 3 | ||||
-rw-r--r-- | src/lj_trace.c | 12 |
16 files changed, 484 insertions, 276 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep index 1684ebd7..8458ec78 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -128,15 +128,16 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
128 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | 128 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h |
129 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 129 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
130 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 130 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
131 | lj_dispatch.h lj_traceerr.h | 131 | lj_dispatch.h lj_traceerr.h lj_vm.h |
132 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | 132 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ |
133 | lj_arch.h | 133 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ |
134 | lj_vm.h | ||
134 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 135 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
135 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ | 136 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ |
136 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | 137 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h |
137 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 138 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
138 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ | 139 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ |
139 | lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ | 140 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ |
140 | lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h | 141 | lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h |
141 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 142 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
142 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 143 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 5f3c5fab..d395010d 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -2059,7 +2059,7 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
2059 | } else { | 2059 | } else { |
2060 | emit_sjcc(as, CC_P, l_next); | 2060 | emit_sjcc(as, CC_P, l_next); |
2061 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); | 2061 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); |
2062 | emit_sjcc(as, CC_A, l_next); | 2062 | emit_sjcc(as, CC_AE, l_next); |
2063 | /* The type check avoids NaN penalties and complaints from Valgrind. */ | 2063 | /* The type check avoids NaN penalties and complaints from Valgrind. */ |
2064 | #if LJ_64 | 2064 | #if LJ_64 |
2065 | emit_u32(as, LJ_TISNUM); | 2065 | emit_u32(as, LJ_TISNUM); |
@@ -2388,7 +2388,8 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | |||
2388 | 2388 | ||
2389 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 2389 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
2390 | { | 2390 | { |
2391 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); | 2391 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || |
2392 | (LJ_DUALNUM && irt_isint(ir->t))); | ||
2392 | #if LJ_64 | 2393 | #if LJ_64 |
2393 | if (irt_islightud(ir->t)) { | 2394 | if (irt_islightud(ir->t)) { |
2394 | Reg dest = asm_load_lightud64(as, ir, 1); | 2395 | Reg dest = asm_load_lightud64(as, ir, 1); |
@@ -2409,8 +2410,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
2409 | } | 2410 | } |
2410 | /* Always do the type check, even if the load result is unused. */ | 2411 | /* Always do the type check, even if the load result is unused. */ |
2411 | as->mrm.ofs += 4; | 2412 | as->mrm.ofs += 4; |
2412 | asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); | 2413 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); |
2413 | if (LJ_64 && irt_isnum(ir->t)) { | 2414 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { |
2415 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | ||
2414 | emit_u32(as, LJ_TISNUM); | 2416 | emit_u32(as, LJ_TISNUM); |
2415 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | 2417 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); |
2416 | } else { | 2418 | } else { |
@@ -2443,7 +2445,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
2443 | if (ra_hasreg(src)) { | 2445 | if (ra_hasreg(src)) { |
2444 | emit_mrm(as, XO_MOVto, src, RID_MRM); | 2446 | emit_mrm(as, XO_MOVto, src, RID_MRM); |
2445 | } else if (!irt_ispri(irr->t)) { | 2447 | } else if (!irt_ispri(irr->t)) { |
2446 | lua_assert(irt_isaddr(ir->t)); | 2448 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); |
2447 | emit_i32(as, irr->i); | 2449 | emit_i32(as, irr->i); |
2448 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 2450 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
2449 | } | 2451 | } |
@@ -2460,8 +2462,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
2460 | Reg base; | 2462 | Reg base; |
2461 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 2463 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
2462 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 2464 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); |
2463 | lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 2465 | lua_assert(LJ_DUALNUM || |
2464 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t)) { | 2466 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); |
2467 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | ||
2465 | Reg left = ra_scratch(as, RSET_FPR); | 2468 | Reg left = ra_scratch(as, RSET_FPR); |
2466 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 2469 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
2467 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 2470 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
@@ -2481,12 +2484,14 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
2481 | Reg dest = ra_dest(as, ir, allow); | 2484 | Reg dest = ra_dest(as, ir, allow); |
2482 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 2485 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
2483 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 2486 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
2484 | if ((ir->op2 & IRSLOAD_CONVERT)) | 2487 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
2485 | emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); | 2488 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
2486 | else if (irt_isnum(t)) | 2489 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); |
2490 | } else if (irt_isnum(t)) { | ||
2487 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | 2491 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); |
2488 | else | 2492 | } else { |
2489 | emit_rmro(as, XO_MOV, dest, base, ofs); | 2493 | emit_rmro(as, XO_MOV, dest, base, ofs); |
2494 | } | ||
2490 | } else { | 2495 | } else { |
2491 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 2496 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
2492 | return; /* No type check: avoid base alloc. */ | 2497 | return; /* No type check: avoid base alloc. */ |
@@ -2494,8 +2499,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
2494 | } | 2499 | } |
2495 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 2500 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
2496 | /* Need type check, even if the load result is unused. */ | 2501 | /* Need type check, even if the load result is unused. */ |
2497 | asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); | 2502 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); |
2498 | if (LJ_64 && irt_isnum(t)) { | 2503 | if (LJ_64 && irt_type(t) >= IRT_NUM) { |
2504 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | ||
2499 | emit_u32(as, LJ_TISNUM); | 2505 | emit_u32(as, LJ_TISNUM); |
2500 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | 2506 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); |
2501 | } else { | 2507 | } else { |
@@ -3408,7 +3414,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
3408 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 3414 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
3409 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 3415 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
3410 | } else { | 3416 | } else { |
3411 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | 3417 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || |
3418 | (LJ_DUALNUM && irt_isinteger(ir->t))); | ||
3412 | if (!irref_isk(ref)) { | 3419 | if (!irref_isk(ref)) { |
3413 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | 3420 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); |
3414 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); | 3421 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c93cece3..9482cc18 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
@@ -185,6 +185,8 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
185 | (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); | 185 | (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); |
186 | else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ | 186 | else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ |
187 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); | 187 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); |
188 | else if (ssize <= 4) | ||
189 | sp = lj_opt_narrow_toint(J, sp); | ||
188 | xstore: | 190 | xstore: |
189 | if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); | 191 | if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); |
190 | if (dp == 0) return sp; | 192 | if (dp == 0) return sp; |
@@ -355,10 +357,10 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) | |||
355 | CType *s; | 357 | CType *s; |
356 | if (LJ_LIKELY(tref_isinteger(sp))) { | 358 | if (LJ_LIKELY(tref_isinteger(sp))) { |
357 | sid = CTID_INT32; | 359 | sid = CTID_INT32; |
358 | svisnz = (void *)(intptr_t)(numV(sval) != 0); | 360 | svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); |
359 | } else if (tref_isnum(sp)) { | 361 | } else if (tref_isnum(sp)) { |
360 | sid = CTID_DOUBLE; | 362 | sid = CTID_DOUBLE; |
361 | svisnz = (void *)(intptr_t)(numV(sval) != 0); | 363 | svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); |
362 | } else if (tref_isbool(sp)) { | 364 | } else if (tref_isbool(sp)) { |
363 | sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); | 365 | sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); |
364 | sid = CTID_BOOL; | 366 | sid = CTID_BOOL; |
@@ -443,16 +445,16 @@ static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) | |||
443 | static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) | 445 | static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) |
444 | { | 446 | { |
445 | IRIns *ir = IR(tref_ref(tr)); | 447 | IRIns *ir = IR(tref_ref(tr)); |
446 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && | 448 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && irref_isk(ir->op2) && |
447 | ir->o == IR_ADD && irref_isk(ir->op2)) { | 449 | (ir->o == IR_ADD || ir->o == IR_ADDOV || ir->o == IR_SUBOV)) { |
448 | IRIns *irk = IR(ir->op2); | 450 | IRIns *irk = IR(ir->op2); |
449 | tr = ir->op1; | 451 | ptrdiff_t k; |
450 | #if LJ_64 | 452 | if (LJ_64 && irk->o == IR_KINT64) |
451 | if (irk->o == IR_KINT64) | 453 | k = (ptrdiff_t)ir_kint64(irk)->u64 * sz; |
452 | *ofsp += (ptrdiff_t)ir_kint64(irk)->u64 * sz; | ||
453 | else | 454 | else |
454 | #endif | 455 | k = (ptrdiff_t)irk->i * sz; |
455 | *ofsp += (ptrdiff_t)irk->i * sz; | 456 | if (ir->o == IR_SUBOV) *ofsp -= k; else *ofsp += k; |
457 | tr = ir->op1; /* Not a TRef, but the caller doesn't care. */ | ||
456 | } | 458 | } |
457 | return tr; | 459 | return tr; |
458 | } | 460 | } |
@@ -477,16 +479,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) | |||
477 | 479 | ||
478 | idx = J->base[1]; | 480 | idx = J->base[1]; |
479 | if (tref_isnumber(idx)) { | 481 | if (tref_isnumber(idx)) { |
480 | /* The size of a ptrdiff_t is target-specific. */ | 482 | idx = lj_opt_narrow_cindex(J, idx); |
481 | #if LJ_64 | ||
482 | if (tref_isnum(idx)) | ||
483 | idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); | ||
484 | else | ||
485 | idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT); | ||
486 | #else | ||
487 | if (tref_isnum(idx)) | ||
488 | idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); | ||
489 | #endif | ||
490 | integer_key: | 483 | integer_key: |
491 | if (ctype_ispointer(ct->info)) { | 484 | if (ctype_ispointer(ct->info)) { |
492 | CTSize sz; | 485 | CTSize sz; |
@@ -635,7 +628,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
635 | TRef sp, dp; | 628 | TRef sp, dp; |
636 | TValue tv; | 629 | TValue tv; |
637 | TValue *sval = &tv; | 630 | TValue *sval = &tv; |
638 | setnumV(&tv, 0); | 631 | setintV(&tv, 0); |
639 | if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ | 632 | if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ |
640 | dc = ctype_rawchild(cts, df); /* Field type. */ | 633 | dc = ctype_rawchild(cts, df); /* Field type. */ |
641 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) | 634 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 631321d9..8077bf84 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -63,9 +63,9 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd); | |||
63 | /* Get runtime value of int argument. */ | 63 | /* Get runtime value of int argument. */ |
64 | static int32_t argv2int(jit_State *J, TValue *o) | 64 | static int32_t argv2int(jit_State *J, TValue *o) |
65 | { | 65 | { |
66 | if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) | 66 | if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o))) |
67 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 67 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
68 | return lj_num2bit(numV(o)); | 68 | return tvisint(o) ? intV(o) : lj_num2int(numV(o)); |
69 | } | 69 | } |
70 | 70 | ||
71 | /* Get runtime value of string argument. */ | 71 | /* Get runtime value of string argument. */ |
@@ -75,9 +75,12 @@ static GCstr *argv2str(jit_State *J, TValue *o) | |||
75 | return strV(o); | 75 | return strV(o); |
76 | } else { | 76 | } else { |
77 | GCstr *s; | 77 | GCstr *s; |
78 | if (!tvisnum(o)) | 78 | if (!tvisnumber(o)) |
79 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 79 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
80 | s = lj_str_fromnum(J->L, &o->n); | 80 | if (tvisint(o)) |
81 | s = lj_str_fromint(J->L, intV(o)); | ||
82 | else | ||
83 | s = lj_str_fromnum(J->L, &o->n); | ||
81 | setstrV(J->L, o, s); | 84 | setstrV(J->L, o, s); |
82 | return s; | 85 | return s; |
83 | } | 86 | } |
@@ -128,7 +131,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) | |||
128 | { | 131 | { |
129 | /* Arguments already specialized. Result is a constant string. Neat, huh? */ | 132 | /* Arguments already specialized. Result is a constant string. Neat, huh? */ |
130 | uint32_t t; | 133 | uint32_t t; |
131 | if (tvisnum(&rd->argv[0])) | 134 | if (tvisnumber(&rd->argv[0])) |
132 | t = ~LJ_TNUMX; | 135 | t = ~LJ_TNUMX; |
133 | else if (LJ_64 && tvislightud(&rd->argv[0])) | 136 | else if (LJ_64 && tvislightud(&rd->argv[0])) |
134 | t = ~LJ_TLIGHTUD; | 137 | t = ~LJ_TLIGHTUD; |
@@ -255,7 +258,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) | |||
255 | TRef tr = J->base[0]; | 258 | TRef tr = J->base[0]; |
256 | TRef base = J->base[1]; | 259 | TRef base = J->base[1]; |
257 | if (tr && base) { | 260 | if (tr && base) { |
258 | base = lj_ir_toint(J, base); | 261 | base = lj_opt_narrow_toint(J, base); |
259 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) | 262 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) |
260 | recff_nyiu(J); | 263 | recff_nyiu(J); |
261 | } | 264 | } |
@@ -332,12 +335,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) | |||
332 | RecordIndex ix; | 335 | RecordIndex ix; |
333 | ix.tab = J->base[0]; | 336 | ix.tab = J->base[0]; |
334 | if (tref_istab(ix.tab)) { | 337 | if (tref_istab(ix.tab)) { |
335 | if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ | 338 | if (!tvisnumber(&rd->argv[1])) /* No support for string coercion. */ |
336 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 339 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
337 | setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); | 340 | setintV(&ix.keyv, numberVint(&rd->argv[1])+1); |
338 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); | 341 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); |
339 | ix.val = 0; ix.idxchain = 0; | 342 | ix.val = 0; ix.idxchain = 0; |
340 | ix.key = lj_ir_toint(J, J->base[1]); | 343 | ix.key = lj_opt_narrow_toint(J, J->base[1]); |
341 | J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); | 344 | J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); |
342 | J->base[1] = lj_record_idx(J, &ix); | 345 | J->base[1] = lj_record_idx(J, &ix); |
343 | rd->nres = tref_isnil(J->base[1]) ? 0 : 2; | 346 | rd->nres = tref_isnil(J->base[1]) ? 0 : 2; |
@@ -525,26 +528,26 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) | |||
525 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | 528 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ |
526 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) | 529 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) |
527 | { | 530 | { |
528 | TRef tr = lj_ir_tobit(J, J->base[0]); | 531 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
529 | J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); | 532 | J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); |
530 | } | 533 | } |
531 | 534 | ||
532 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ | 535 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ |
533 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) | 536 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) |
534 | { | 537 | { |
535 | TRef tr = lj_ir_tobit(J, J->base[0]); | 538 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
536 | uint32_t op = rd->data; | 539 | uint32_t op = rd->data; |
537 | BCReg i; | 540 | BCReg i; |
538 | for (i = 1; J->base[i] != 0; i++) | 541 | for (i = 1; J->base[i] != 0; i++) |
539 | tr = emitir(IRTI(op), tr, lj_ir_tobit(J, J->base[i])); | 542 | tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); |
540 | J->base[0] = tr; | 543 | J->base[0] = tr; |
541 | } | 544 | } |
542 | 545 | ||
543 | /* Record bit shifts. */ | 546 | /* Record bit shifts. */ |
544 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) | 547 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) |
545 | { | 548 | { |
546 | TRef tr = lj_ir_tobit(J, J->base[0]); | 549 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
547 | TRef tsh = lj_ir_tobit(J, J->base[1]); | 550 | TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); |
548 | if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 551 | if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && |
549 | !tref_isk(tsh)) | 552 | !tref_isk(tsh)) |
550 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | 553 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); |
@@ -570,25 +573,25 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
570 | int32_t start, end; | 573 | int32_t start, end; |
571 | if (rd->data) { /* string.sub(str, start [,end]) */ | 574 | if (rd->data) { /* string.sub(str, start [,end]) */ |
572 | start = argv2int(J, &rd->argv[1]); | 575 | start = argv2int(J, &rd->argv[1]); |
573 | trstart = lj_ir_toint(J, J->base[1]); | 576 | trstart = lj_opt_narrow_toint(J, J->base[1]); |
574 | trend = J->base[2]; | 577 | trend = J->base[2]; |
575 | if (tref_isnil(trend)) { | 578 | if (tref_isnil(trend)) { |
576 | trend = lj_ir_kint(J, -1); | 579 | trend = lj_ir_kint(J, -1); |
577 | end = -1; | 580 | end = -1; |
578 | } else { | 581 | } else { |
579 | trend = lj_ir_toint(J, trend); | 582 | trend = lj_opt_narrow_toint(J, trend); |
580 | end = argv2int(J, &rd->argv[2]); | 583 | end = argv2int(J, &rd->argv[2]); |
581 | } | 584 | } |
582 | } else { /* string.byte(str, [,start [,end]]) */ | 585 | } else { /* string.byte(str, [,start [,end]]) */ |
583 | if (J->base[1]) { | 586 | if (J->base[1]) { |
584 | start = argv2int(J, &rd->argv[1]); | 587 | start = argv2int(J, &rd->argv[1]); |
585 | trstart = lj_ir_toint(J, J->base[1]); | 588 | trstart = lj_opt_narrow_toint(J, J->base[1]); |
586 | trend = J->base[2]; | 589 | trend = J->base[2]; |
587 | if (tref_isnil(trend)) { | 590 | if (tref_isnil(trend)) { |
588 | trend = trstart; | 591 | trend = trstart; |
589 | end = start; | 592 | end = start; |
590 | } else { | 593 | } else { |
591 | trend = lj_ir_toint(J, trend); | 594 | trend = lj_opt_narrow_toint(J, trend); |
592 | end = argv2int(J, &rd->argv[2]); | 595 | end = argv2int(J, &rd->argv[2]); |
593 | } | 596 | } |
594 | } else { | 597 | } else { |
diff --git a/src/lj_ir.c b/src/lj_ir.c index 1d57938e..721cfd0f 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
@@ -426,32 +426,6 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) | |||
426 | return tr; | 426 | return tr; |
427 | } | 427 | } |
428 | 428 | ||
429 | /* Convert from number or string to bitop operand (overflow wrapped). */ | ||
430 | TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr) | ||
431 | { | ||
432 | if (!tref_isinteger(tr)) { | ||
433 | if (tref_isstr(tr)) | ||
434 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
435 | else if (!tref_isnum(tr)) | ||
436 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
437 | tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); | ||
438 | } | ||
439 | return tr; | ||
440 | } | ||
441 | |||
442 | /* Convert from number or string to integer (overflow undefined). */ | ||
443 | TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr) | ||
444 | { | ||
445 | if (!tref_isinteger(tr)) { | ||
446 | if (tref_isstr(tr)) | ||
447 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
448 | else if (!tref_isnum(tr)) | ||
449 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
450 | tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); | ||
451 | } | ||
452 | return tr; | ||
453 | } | ||
454 | |||
455 | /* -- Miscellaneous IR ops ------------------------------------------------ */ | 429 | /* -- Miscellaneous IR ops ------------------------------------------------ */ |
456 | 430 | ||
457 | /* Evaluate numeric comparison. */ | 431 | /* Evaluate numeric comparison. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 060cf562..c46bbbe0 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -124,7 +124,7 @@ | |||
124 | _(XBAR, S , ___, ___) \ | 124 | _(XBAR, S , ___, ___) \ |
125 | \ | 125 | \ |
126 | /* Type conversions. */ \ | 126 | /* Type conversions. */ \ |
127 | _(CONV, N , ref, lit) \ | 127 | _(CONV, NW, ref, lit) \ |
128 | _(TOBIT, N , ref, ref) \ | 128 | _(TOBIT, N , ref, ref) \ |
129 | _(TOSTR, N , ref, ___) \ | 129 | _(TOSTR, N , ref, ___) \ |
130 | _(STRTO, N , ref, ___) \ | 130 | _(STRTO, N , ref, ___) \ |
@@ -345,8 +345,8 @@ typedef enum { | |||
345 | #define IRM_AW (IRM_A|IRM_W) | 345 | #define IRM_AW (IRM_A|IRM_W) |
346 | #define IRM_LW (IRM_L|IRM_W) | 346 | #define IRM_LW (IRM_L|IRM_W) |
347 | 347 | ||
348 | #define irm_op1(m) (cast(IRMode, (m)&3)) | 348 | #define irm_op1(m) ((IRMode)((m)&3)) |
349 | #define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) | 349 | #define irm_op2(m) ((IRMode)(((m)>>2)&3)) |
350 | #define irm_iscomm(m) ((m) & IRM_C) | 350 | #define irm_iscomm(m) ((m) & IRM_C) |
351 | #define irm_kind(m) ((m) & IRM_S) | 351 | #define irm_kind(m) ((m) & IRM_S) |
352 | 352 | ||
@@ -401,8 +401,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
401 | #define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) | 401 | #define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) |
402 | #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) | 402 | #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) |
403 | 403 | ||
404 | #define irt_t(t) (cast(IRType, (t).irt)) | 404 | #define irt_t(t) ((IRType)(t).irt) |
405 | #define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) | 405 | #define irt_type(t) ((IRType)((t).irt & IRT_TYPE)) |
406 | #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) | 406 | #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) |
407 | #define irt_typerange(t, first, last) \ | 407 | #define irt_typerange(t, first, last) \ |
408 | ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) | 408 | ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) |
@@ -441,18 +441,30 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
441 | 441 | ||
442 | static LJ_AINLINE IRType itype2irt(const TValue *tv) | 442 | static LJ_AINLINE IRType itype2irt(const TValue *tv) |
443 | { | 443 | { |
444 | if (tvisnum(tv)) | 444 | if (tvisint(tv)) |
445 | return IRT_INT; | ||
446 | else if (tvisnum(tv)) | ||
445 | return IRT_NUM; | 447 | return IRT_NUM; |
446 | #if LJ_64 | 448 | #if LJ_64 |
447 | else if (tvislightud(tv)) | 449 | else if (tvislightud(tv)) |
448 | return IRT_LIGHTUD; | 450 | return IRT_LIGHTUD; |
449 | #endif | 451 | #endif |
450 | else | 452 | else |
451 | return cast(IRType, ~itype(tv)); | 453 | return (IRType)~itype(tv); |
452 | } | 454 | } |
453 | 455 | ||
454 | #define irt_toitype(t) \ | 456 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) |
455 | check_exp(!(LJ_64 && irt_islightud((t))), ~(uint32_t)irt_type((t))) | 457 | { |
458 | lua_assert(!LJ_64 || t != IRT_LIGHTUD); | ||
459 | if (LJ_DUALNUM && t > IRT_NUM) { | ||
460 | return LJ_TISNUM; | ||
461 | } else { | ||
462 | lua_assert(t <= IRT_NUM); | ||
463 | return ~(uint32_t)t; | ||
464 | } | ||
465 | } | ||
466 | |||
467 | #define irt_toitype(t) irt_toitype_(irt_type((t))) | ||
456 | 468 | ||
457 | #define irt_isguard(t) ((t).irt & IRT_GUARD) | 469 | #define irt_isguard(t) ((t).irt & IRT_GUARD) |
458 | #define irt_ismarked(t) ((t).irt & IRT_MARK) | 470 | #define irt_ismarked(t) ((t).irt & IRT_MARK) |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index db99c118..1c94e91c 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -84,8 +84,6 @@ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); | |||
84 | /* Convert IR operand types. */ | 84 | /* Convert IR operand types. */ |
85 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); | 85 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); |
86 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); | 86 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); |
87 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr); | ||
88 | LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr); | ||
89 | 87 | ||
90 | /* Miscellaneous IR ops. */ | 88 | /* Miscellaneous IR ops. */ |
91 | LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); | 89 | LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); |
@@ -134,9 +132,17 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J); | |||
134 | 132 | ||
135 | /* Narrowing. */ | 133 | /* Narrowing. */ |
136 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); | 134 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); |
135 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key); | ||
136 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr); | ||
137 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr); | ||
138 | #if LJ_HASFFI | ||
139 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); | ||
140 | #endif | ||
141 | LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, | ||
142 | TValue *vb, TValue *vc, IROp op); | ||
137 | LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); | 143 | LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); |
138 | LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); | 144 | LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); |
139 | LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); | 145 | LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); |
140 | 146 | ||
141 | /* Optimization passes. */ | 147 | /* Optimization passes. */ |
142 | LJ_FUNC void lj_opt_dce(jit_State *J); | 148 | LJ_FUNC void lj_opt_dce(jit_State *J); |
diff --git a/src/lj_meta.c b/src/lj_meta.c index 23f11f58..48cee510 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
@@ -393,13 +393,27 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) | |||
393 | lj_err_msg(L, LJ_ERR_FORLIM); | 393 | lj_err_msg(L, LJ_ERR_FORLIM); |
394 | if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) | 394 | if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) |
395 | lj_err_msg(L, LJ_ERR_FORSTEP); | 395 | lj_err_msg(L, LJ_ERR_FORSTEP); |
396 | #if LJ_DUALNUM | 396 | if (LJ_DUALNUM) { |
397 | /* Ensure all slots are integers or all slots are numbers. */ | 397 | /* Ensure all slots are integers or all slots are numbers. */ |
398 | if (!(tvisint(o) && tvisint(o+1) && tvisint(o+2))) { | 398 | int32_t k[3]; |
399 | if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); | 399 | int nint = 0; |
400 | if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); | 400 | ptrdiff_t i; |
401 | if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); | 401 | for (i = 0; i <= 2; i++) { |
402 | if (tvisint(o+i)) { | ||
403 | k[i] = intV(o+i); nint++; | ||
404 | } else { | ||
405 | k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); | ||
406 | } | ||
407 | } | ||
408 | if (nint == 3) { /* Narrow to integers. */ | ||
409 | setintV(o, k[0]); | ||
410 | setintV(o+1, k[1]); | ||
411 | setintV(o+2, k[2]); | ||
412 | } else if (nint != 0) { /* Widen to numbers. */ | ||
413 | if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); | ||
414 | if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); | ||
415 | if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); | ||
416 | } | ||
402 | } | 417 | } |
403 | #endif | ||
404 | } | 418 | } |
405 | 419 | ||
diff --git a/src/lj_meta.h b/src/lj_meta.h index 687e6c08..32b3dec3 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h | |||
@@ -29,6 +29,6 @@ LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); | |||
29 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); | 29 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); |
30 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); | 30 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); |
31 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); | 31 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); |
32 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base); | 32 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); |
33 | 33 | ||
34 | #endif | 34 | #endif |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 88289f3e..19a2345f 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
@@ -325,8 +325,6 @@ typedef struct GCproto { | |||
325 | #define proto_kgc(pt, idx) \ | 325 | #define proto_kgc(pt, idx) \ |
326 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ | 326 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ |
327 | gcref(mref((pt)->k, GCRef)[(idx)])) | 327 | gcref(mref((pt)->k, GCRef)[(idx)])) |
328 | #define proto_knum(pt, idx) \ | ||
329 | check_exp((uintptr_t)(idx) < (pt)->sizekn, mref((pt)->k, lua_Number)[(idx)]) | ||
330 | #define proto_knumtv(pt, idx) \ | 328 | #define proto_knumtv(pt, idx) \ |
331 | check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) | 329 | check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) |
332 | #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) | 330 | #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 471a4b29..e2d5c517 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -558,7 +558,10 @@ LJFOLD(CONV KINT IRCONV_I64_INT) | |||
558 | LJFOLD(CONV KINT IRCONV_U64_INT) | 558 | LJFOLD(CONV KINT IRCONV_U64_INT) |
559 | LJFOLDF(kfold_conv_kint_i64) | 559 | LJFOLDF(kfold_conv_kint_i64) |
560 | { | 560 | { |
561 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | 561 | if ((fins->op2 & IRCONV_SEXT)) |
562 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | ||
563 | else | ||
564 | return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i); | ||
562 | } | 565 | } |
563 | 566 | ||
564 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) | 567 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 559e579e..6dd06636 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -300,8 +300,11 @@ static void loop_unroll(jit_State *J) | |||
300 | } | 300 | } |
301 | /* Check all loop-carried dependencies for type instability. */ | 301 | /* Check all loop-carried dependencies for type instability. */ |
302 | if (!irt_sametype(t, irr->t)) { | 302 | if (!irt_sametype(t, irr->t)) { |
303 | if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ | 303 | if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ |
304 | subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); | 304 | subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); |
305 | else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ | ||
306 | subst[ins] = tref_ref(emitir(IRTGI(IR_CONV), ref, | ||
307 | IRCONV_INT_NUM|IRCONV_CHECK)); | ||
305 | else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) | 308 | else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) |
306 | lj_trace_err(J, LJ_TRERR_TYPEINS); | 309 | lj_trace_err(J, LJ_TRERR_TYPEINS); |
307 | } | 310 | } |
@@ -355,8 +358,8 @@ int lj_opt_loop(jit_State *J) | |||
355 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 358 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); |
356 | if (LJ_UNLIKELY(errcode)) { | 359 | if (LJ_UNLIKELY(errcode)) { |
357 | lua_State *L = J->L; | 360 | lua_State *L = J->L; |
358 | if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ | 361 | if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ |
359 | int32_t e = lj_num2int(numV(L->top-1)); | 362 | int32_t e = numberVint(L->top-1); |
360 | switch ((TraceError)e) { | 363 | switch ((TraceError)e) { |
361 | case LJ_TRERR_TYPEINS: /* Type instability. */ | 364 | case LJ_TRERR_TYPEINS: /* Type instability. */ |
362 | case LJ_TRERR_GFAIL: /* Guard would always fail. */ | 365 | case LJ_TRERR_GFAIL: /* Guard would always fail. */ |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 0a2bb6cd..1727e9b5 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | ** NARROW: Narrowing of numbers to integers (double to int32_t). | 2 | ** NARROW: Narrowing of numbers to integers (double to int32_t). |
3 | ** STRIPOV: Stripping of overflow checks. | ||
3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | 4 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h |
4 | */ | 5 | */ |
5 | 6 | ||
@@ -16,6 +17,7 @@ | |||
16 | #include "lj_jit.h" | 17 | #include "lj_jit.h" |
17 | #include "lj_iropt.h" | 18 | #include "lj_iropt.h" |
18 | #include "lj_trace.h" | 19 | #include "lj_trace.h" |
20 | #include "lj_vm.h" | ||
19 | 21 | ||
20 | /* Rationale for narrowing optimizations: | 22 | /* Rationale for narrowing optimizations: |
21 | ** | 23 | ** |
@@ -57,24 +59,34 @@ | |||
57 | ** | 59 | ** |
58 | ** A better solution is to keep all numbers as FP values and only narrow | 60 | ** A better solution is to keep all numbers as FP values and only narrow |
59 | ** when it's beneficial to do so. LuaJIT uses predictive narrowing for | 61 | ** when it's beneficial to do so. LuaJIT uses predictive narrowing for |
60 | ** induction variables and demand-driven narrowing for index expressions | 62 | ** induction variables and demand-driven narrowing for index expressions, |
61 | ** and bit operations. Additionally it can eliminate or hoists most of the | 63 | ** integer arguments and bit operations. Additionally it can eliminate or |
62 | ** resulting overflow checks. Regular arithmetic computations are never | 64 | ** hoist most of the resulting overflow checks. Regular arithmetic |
63 | ** narrowed to integers. | 65 | ** computations are never narrowed to integers. |
64 | ** | 66 | ** |
65 | ** The integer type in the IR has convenient wrap-around semantics and | 67 | ** The integer type in the IR has convenient wrap-around semantics and |
66 | ** ignores overflow. Extra operations have been added for | 68 | ** ignores overflow. Extra operations have been added for |
67 | ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. | 69 | ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. |
68 | ** Apart from reducing overall complexity of the compiler, this also | 70 | ** Apart from reducing overall complexity of the compiler, this also |
69 | ** nicely solves the problem where you want to apply algebraic | 71 | ** nicely solves the problem where you want to apply algebraic |
70 | ** simplifications to ADD, but not to ADDOV. And the assembler can use lea | 72 | ** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can |
71 | ** instead of an add for integer ADD, but not for ADDOV (lea does not | 73 | ** use lea instead of an add for integer ADD, but not for ADDOV (lea does |
72 | ** affect the flags, but it helps to avoid register moves). | 74 | ** not affect the flags, but it helps to avoid register moves). |
73 | ** | 75 | ** |
74 | ** Note that all of the above has to be reconsidered if LuaJIT is to be | 76 | ** |
75 | ** ported to architectures with slow FP operations or with no hardware FPU | 77 | ** All of the above has to be reconsidered for architectures with slow FP |
76 | ** at all. In the latter case an integer-only port may be the best overall | 78 | ** operations or without a hardware FPU. The dual-number mode of LuaJIT |
77 | ** solution (if this still meets user demands). | 79 | ** addresses this issue. Arithmetic operations are performed on integers |
80 | ** as far as possible and overflow checks are added as needed. | ||
81 | ** | ||
82 | ** This implies that narrowing for integer arguments and bit operations | ||
83 | ** should also strip overflow checks, e.g. replace ADDOV with ADD. The | ||
84 | ** original overflow guards are weak and can be eliminated by DCE, if | ||
85 | ** there's no other use. | ||
86 | ** | ||
87 | ** A slight twist is that it's usually beneficial to use overflow-checked | ||
88 | ** integer arithmetics if all inputs are already integers. This is the only | ||
89 | ** change that affects the single-number mode, too. | ||
78 | */ | 90 | */ |
79 | 91 | ||
80 | /* Some local macros to save typing. Undef'd at the end. */ | 92 | /* Some local macros to save typing. Undef'd at the end. */ |
@@ -94,10 +106,10 @@ | |||
94 | ** already takes care of eliminating simple redundant conversions like | 106 | ** already takes care of eliminating simple redundant conversions like |
95 | ** CONV.int.num(CONV.num.int(x)) ==> x. | 107 | ** CONV.int.num(CONV.num.int(x)) ==> x. |
96 | ** | 108 | ** |
97 | ** But the surrounding code is FP-heavy and all arithmetic operations are | 109 | ** But the surrounding code is FP-heavy and arithmetic operations are |
98 | ** performed on FP numbers. Consider a common example such as 'x=t[i+1]', | 110 | ** performed on FP numbers (for the single-number mode). Consider a common |
99 | ** with 'i' already an integer (due to induction variable narrowing). The | 111 | ** example such as 'x=t[i+1]', with 'i' already an integer (due to induction |
100 | ** index expression would be recorded as | 112 | ** variable narrowing). The index expression would be recorded as |
101 | ** CONV.int.num(ADD(CONV.num.int(i), 1)) | 113 | ** CONV.int.num(ADD(CONV.num.int(i), 1)) |
102 | ** which is clearly suboptimal. | 114 | ** which is clearly suboptimal. |
103 | ** | 115 | ** |
@@ -113,6 +125,9 @@ | |||
113 | ** FP ops remain in the IR and are eliminated by DCE since all references to | 125 | ** FP ops remain in the IR and are eliminated by DCE since all references to |
114 | ** them are gone. | 126 | ** them are gone. |
115 | ** | 127 | ** |
128 | ** [In dual-number mode the trace recorder already emits ADDOV etc., but | ||
129 | ** this can be further reduced. See below.] | ||
130 | ** | ||
116 | ** Special care has to be taken to avoid narrowing across an operation | 131 | ** Special care has to be taken to avoid narrowing across an operation |
117 | ** which is potentially operating on non-integral operands. One obvious | 132 | ** which is potentially operating on non-integral operands. One obvious |
118 | ** case is when an expression contains a non-integral constant, but ends | 133 | ** case is when an expression contains a non-integral constant, but ends |
@@ -221,6 +236,26 @@ static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode) | |||
221 | bp->mode = mode; | 236 | bp->mode = mode; |
222 | } | 237 | } |
223 | 238 | ||
239 | /* Backpropagate overflow stripping. */ | ||
240 | static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth) | ||
241 | { | ||
242 | jit_State *J = nc->J; | ||
243 | IRIns *ir = IR(ref); | ||
244 | if (ir->o == IR_ADDOV || ir->o == IR_SUBOV || | ||
245 | (ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) { | ||
246 | BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT); | ||
247 | if (bp) { | ||
248 | ref = bp->val; | ||
249 | } else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { | ||
250 | narrow_stripov_backprop(nc, ir->op1, depth); | ||
251 | narrow_stripov_backprop(nc, ir->op2, depth); | ||
252 | *nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref); | ||
253 | return; | ||
254 | } | ||
255 | } | ||
256 | *nc->sp++ = NARROWINS(NARROW_REF, ref); | ||
257 | } | ||
258 | |||
224 | /* Backpropagate narrowing conversion. Return number of needed conversions. */ | 259 | /* Backpropagate narrowing conversion. Return number of needed conversions. */ |
225 | static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | 260 | static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) |
226 | { | 261 | { |
@@ -230,24 +265,26 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
230 | 265 | ||
231 | /* Check the easy cases first. */ | 266 | /* Check the easy cases first. */ |
232 | if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { | 267 | if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { |
233 | if (nc->t == IRT_I64) | 268 | if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY) |
234 | *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ | 269 | narrow_stripov_backprop(nc, ir->op1, depth+1); |
235 | else | 270 | else |
236 | *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ | 271 | *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ |
272 | if (nc->t == IRT_I64) | ||
273 | *nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */ | ||
237 | return 0; | 274 | return 0; |
238 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ | 275 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ |
239 | lua_Number n = ir_knum(ir)->n; | 276 | lua_Number n = ir_knum(ir)->n; |
240 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { | 277 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { |
241 | /* Allows a wider range of constants. */ | 278 | /* Allows a wider range of constants. */ |
242 | int64_t k64 = (int64_t)n; | 279 | int64_t k64 = (int64_t)n; |
243 | if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ | 280 | if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ |
244 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | 281 | *nc->sp++ = NARROWINS(NARROW_INT, 0); |
245 | *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ | 282 | *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ |
246 | return 0; | 283 | return 0; |
247 | } | 284 | } |
248 | } else { | 285 | } else { |
249 | int32_t k = lj_num2int(n); | 286 | int32_t k = lj_num2int(n); |
250 | if (n == cast_num(k)) { /* Only if constant is really an integer. */ | 287 | if (n == (lua_Number)k) { /* Only if constant is really an integer. */ |
251 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | 288 | *nc->sp++ = NARROWINS(NARROW_INT, 0); |
252 | *nc->sp++ = (NarrowIns)k; | 289 | *nc->sp++ = (NarrowIns)k; |
253 | return 0; | 290 | return 0; |
@@ -287,7 +324,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
287 | mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; | 324 | mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; |
288 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); | 325 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); |
289 | if (bp) { | 326 | if (bp) { |
290 | *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); | 327 | *nc->sp++ = NARROWINS(NARROW_REF, bp->val); |
328 | *nc->sp++ = NARROWINS(NARROW_SEXT, 0); | ||
291 | return 0; | 329 | return 0; |
292 | } | 330 | } |
293 | } | 331 | } |
@@ -326,8 +364,9 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
326 | } else if (op == NARROW_CONV) { | 364 | } else if (op == NARROW_CONV) { |
327 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | 365 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ |
328 | } else if (op == NARROW_SEXT) { | 366 | } else if (op == NARROW_SEXT) { |
329 | *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, | 367 | lua_assert(sp >= nc->stack+1); |
330 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | 368 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], |
369 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | ||
331 | } else if (op == NARROW_INT) { | 370 | } else if (op == NARROW_INT) { |
332 | lua_assert(next < last); | 371 | lua_assert(next < last); |
333 | *sp++ = nc->t == IRT_I64 ? | 372 | *sp++ = nc->t == IRT_I64 ? |
@@ -340,7 +379,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
340 | /* Omit some overflow checks for array indexing. See comments above. */ | 379 | /* Omit some overflow checks for array indexing. See comments above. */ |
341 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { | 380 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { |
342 | if (next == last && irref_isk(narrow_ref(sp[0])) && | 381 | if (next == last && irref_isk(narrow_ref(sp[0])) && |
343 | (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) | 382 | (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u) |
344 | guardot = 0; | 383 | guardot = 0; |
345 | else /* Otherwise cache a stronger check. */ | 384 | else /* Otherwise cache a stronger check. */ |
346 | mode += IRCONV_CHECK-IRCONV_INDEX; | 385 | mode += IRCONV_CHECK-IRCONV_INDEX; |
@@ -377,12 +416,123 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) | |||
377 | return NEXTFOLD; | 416 | return NEXTFOLD; |
378 | } | 417 | } |
379 | 418 | ||
419 | /* -- Narrowing of implicit conversions ----------------------------------- */ | ||
420 | |||
421 | /* Recursively strip overflow checks. */ | ||
422 | static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) | ||
423 | { | ||
424 | IRRef ref = tref_ref(tr); | ||
425 | IRIns *ir = IR(ref); | ||
426 | int op = ir->o; | ||
427 | if (op >= IR_ADDOV && op <= lastop) { | ||
428 | BPropEntry *bp = narrow_bpc_get(J, ref, mode); | ||
429 | if (bp) { | ||
430 | return TREF(bp->val, irt_t(IR(bp->val)->t)); | ||
431 | } else { | ||
432 | IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */ | ||
433 | op1 = narrow_stripov(J, op1, lastop, mode); | ||
434 | op2 = narrow_stripov(J, op2, lastop, mode); | ||
435 | tr = emitir(IRT(op - IR_ADDOV + IR_ADD, | ||
436 | ((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2); | ||
437 | narrow_bpc_set(J, ref, tref_ref(tr), mode); | ||
438 | } | ||
439 | } else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) { | ||
440 | tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode); | ||
441 | } | ||
442 | return tr; | ||
443 | } | ||
444 | |||
445 | /* Narrow array index. */ | ||
446 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) | ||
447 | { | ||
448 | IRIns *ir; | ||
449 | lua_assert(tref_isnumber(tr)); | ||
450 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | ||
451 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); | ||
452 | /* Omit some overflow checks for array indexing. See comments above. */ | ||
453 | ir = IR(tref_ref(tr)); | ||
454 | if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) && | ||
455 | (uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u) | ||
456 | return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2); | ||
457 | return tr; | ||
458 | } | ||
459 | |||
460 | /* Narrow conversion to integer operand (overflow undefined). */ | ||
461 | TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr) | ||
462 | { | ||
463 | if (tref_isstr(tr)) | ||
464 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
465 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | ||
466 | return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); | ||
467 | if (!tref_isinteger(tr)) | ||
468 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
469 | /* | ||
470 | ** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. | ||
471 | ** Use IRCONV_TOBIT for the cache entries, since the semantics are the same. | ||
472 | */ | ||
473 | return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); | ||
474 | } | ||
475 | |||
476 | /* Narrow conversion to bitop operand (overflow wrapped). */ | ||
477 | TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) | ||
478 | { | ||
479 | if (tref_isstr(tr)) | ||
480 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
481 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | ||
482 | return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); | ||
483 | if (!tref_isinteger(tr)) | ||
484 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
485 | /* | ||
486 | ** Wrapped overflow semantics allow stripping of ADDOV and SUBOV. | ||
487 | ** MULOV cannot be stripped due to precision widening. | ||
488 | */ | ||
489 | return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); | ||
490 | } | ||
491 | |||
492 | #if LJ_HASFFI | ||
493 | /* Narrow C array index (overflow undefined). */ | ||
494 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) | ||
495 | { | ||
496 | lua_assert(tref_isnumber(tr)); | ||
497 | if (tref_isnum(tr)) | ||
498 | return emitir(IRTI(IR_CONV), tr, | ||
499 | (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY); | ||
500 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ | ||
501 | return narrow_stripov(J, tr, IR_MULOV, | ||
502 | LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : | ||
503 | ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT)); | ||
504 | } | ||
505 | #endif | ||
506 | |||
380 | /* -- Narrowing of arithmetic operators ----------------------------------- */ | 507 | /* -- Narrowing of arithmetic operators ----------------------------------- */ |
381 | 508 | ||
382 | /* Check whether a number fits into an int32_t (-0 is ok, too). */ | 509 | /* Check whether a number fits into an int32_t (-0 is ok, too). */ |
383 | static int numisint(lua_Number n) | 510 | static int numisint(lua_Number n) |
384 | { | 511 | { |
385 | return (n == cast_num(lj_num2int(n))); | 512 | return (n == (lua_Number)lj_num2int(n)); |
513 | } | ||
514 | |||
515 | /* Narrowing of arithmetic operations. */ | ||
516 | TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, | ||
517 | TValue *vb, TValue *vc, IROp op) | ||
518 | { | ||
519 | if (tref_isstr(rb)) { | ||
520 | rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); | ||
521 | lj_str_tonum(strV(vb), vb); | ||
522 | } | ||
523 | if (tref_isstr(rc)) { | ||
524 | rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); | ||
525 | lj_str_tonum(strV(vc), vc); | ||
526 | } | ||
527 | /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ | ||
528 | if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && | ||
529 | tref_isinteger(rb) && tref_isinteger(rc) && | ||
530 | numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), | ||
531 | (int)op - (int)IR_ADD))) | ||
532 | return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); | ||
533 | if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); | ||
534 | if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | ||
535 | return emitir(IRTN(op), rb, rc); | ||
386 | } | 536 | } |
387 | 537 | ||
388 | /* Narrowing of modulo operator. */ | 538 | /* Narrowing of modulo operator. */ |
@@ -409,16 +559,15 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) | |||
409 | /* Narrowing of power operator or math.pow. */ | 559 | /* Narrowing of power operator or math.pow. */ |
410 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) | 560 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) |
411 | { | 561 | { |
412 | lua_Number n; | ||
413 | if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) | 562 | if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) |
414 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 563 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
415 | n = numV(vc); | ||
416 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | 564 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ |
417 | if (numisint(n)) { | 565 | if (tvisint(vc) || numisint(numV(vc))) { |
418 | int checkrange = 0; | 566 | int checkrange = 0; |
419 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ | 567 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ |
420 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | 568 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { |
421 | if (!(n >= -65536.0 && n <= 65536.0)) goto split_pow; | 569 | int32_t k = numberVint(vc); |
570 | if (!(k >= -65536 && k <= 65536)) goto split_pow; | ||
422 | checkrange = 1; | 571 | checkrange = 1; |
423 | } | 572 | } |
424 | if (!tref_isinteger(rc)) { | 573 | if (!tref_isinteger(rc)) { |
@@ -448,20 +597,28 @@ split_pow: | |||
448 | 597 | ||
449 | /* -- Predictive narrowing of induction variables ------------------------- */ | 598 | /* -- Predictive narrowing of induction variables ------------------------- */ |
450 | 599 | ||
600 | /* Narrow a single runtime value. */ | ||
601 | static int narrow_forl(jit_State *J, cTValue *o) | ||
602 | { | ||
603 | if (tvisint(o)) return 1; | ||
604 | if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); | ||
605 | return 0; | ||
606 | } | ||
607 | |||
451 | /* Narrow the FORL index type by looking at the runtime values. */ | 608 | /* Narrow the FORL index type by looking at the runtime values. */ |
452 | IRType lj_opt_narrow_forl(cTValue *forbase) | 609 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) |
453 | { | 610 | { |
454 | lua_assert(tvisnum(&forbase[FORL_IDX]) && | 611 | lua_assert(tvisnumber(&tv[FORL_IDX]) && |
455 | tvisnum(&forbase[FORL_STOP]) && | 612 | tvisnumber(&tv[FORL_STOP]) && |
456 | tvisnum(&forbase[FORL_STEP])); | 613 | tvisnumber(&tv[FORL_STEP])); |
457 | /* Narrow only if the runtime values of start/stop/step are all integers. */ | 614 | /* Narrow only if the runtime values of start/stop/step are all integers. */ |
458 | if (numisint(numV(&forbase[FORL_IDX])) && | 615 | if (narrow_forl(J, &tv[FORL_IDX]) && |
459 | numisint(numV(&forbase[FORL_STOP])) && | 616 | narrow_forl(J, &tv[FORL_STOP]) && |
460 | numisint(numV(&forbase[FORL_STEP]))) { | 617 | narrow_forl(J, &tv[FORL_STEP])) { |
461 | /* And if the loop index can't possibly overflow. */ | 618 | /* And if the loop index can't possibly overflow. */ |
462 | lua_Number step = numV(&forbase[FORL_STEP]); | 619 | lua_Number step = numberVnum(&tv[FORL_STEP]); |
463 | lua_Number sum = numV(&forbase[FORL_STOP]) + step; | 620 | lua_Number sum = numberVnum(&tv[FORL_STOP]) + step; |
464 | if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) | 621 | if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0)) |
465 | return IRT_INT; | 622 | return IRT_INT; |
466 | } | 623 | } |
467 | return IRT_NUM; | 624 | return IRT_NUM; |
diff --git a/src/lj_record.c b/src/lj_record.c index 2bfd2608..613e458e 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
14 | #include "lj_str.h" | 14 | #include "lj_str.h" |
15 | #include "lj_tab.h" | 15 | #include "lj_tab.h" |
16 | #include "lj_meta.h" | ||
16 | #include "lj_frame.h" | 17 | #include "lj_frame.h" |
17 | #include "lj_bc.h" | 18 | #include "lj_bc.h" |
18 | #include "lj_ff.h" | 19 | #include "lj_ff.h" |
@@ -102,7 +103,7 @@ static void rec_check_slots(jit_State *J) | |||
102 | lua_assert((J->slot[s+1] & TREF_FRAME)); | 103 | lua_assert((J->slot[s+1] & TREF_FRAME)); |
103 | depth++; | 104 | depth++; |
104 | } else { | 105 | } else { |
105 | if (tvisnum(tv)) | 106 | if (tvisnumber(tv)) |
106 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ | 107 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ |
107 | else | 108 | else |
108 | lua_assert(itype2irt(tv) == tref_type(tr)); | 109 | lua_assert(itype2irt(tv) == tref_type(tr)); |
@@ -197,6 +198,7 @@ typedef enum { | |||
197 | static void canonicalize_slots(jit_State *J) | 198 | static void canonicalize_slots(jit_State *J) |
198 | { | 199 | { |
199 | BCReg s; | 200 | BCReg s; |
201 | if (LJ_DUALNUM) return; | ||
200 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | 202 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { |
201 | TRef tr = J->slot[s]; | 203 | TRef tr = J->slot[s]; |
202 | if (tref_isinteger(tr)) { | 204 | if (tref_isinteger(tr)) { |
@@ -254,16 +256,16 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) | |||
254 | } | 256 | } |
255 | if (op == BC_KSHORT) { | 257 | if (op == BC_KSHORT) { |
256 | int32_t k = (int32_t)(int16_t)bc_d(ins); | 258 | int32_t k = (int32_t)(int16_t)bc_d(ins); |
257 | return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k)); | 259 | return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k); |
258 | } else { | 260 | } else { |
259 | lua_Number n = proto_knum(J->pt, bc_d(ins)); | 261 | cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); |
260 | if (t == IRT_INT) { | 262 | if (t == IRT_INT) { |
261 | int32_t k = lj_num2int(n); | 263 | int32_t k = numberVint(tv); |
262 | if (n == cast_num(k)) /* -0 is ok here. */ | 264 | if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ |
263 | return lj_ir_kint(J, k); | 265 | return lj_ir_kint(J, k); |
264 | return 0; /* Type mismatch. */ | 266 | return 0; /* Type mismatch. */ |
265 | } else { | 267 | } else { |
266 | return lj_ir_knum(J, n); | 268 | return lj_ir_knum(J, numberVnum(tv)); |
267 | } | 269 | } |
268 | } | 270 | } |
269 | } | 271 | } |
@@ -273,41 +275,47 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) | |||
273 | return 0; /* No assignment to this slot found? */ | 275 | return 0; /* No assignment to this slot found? */ |
274 | } | 276 | } |
275 | 277 | ||
278 | /* Load and optionally convert a FORI argument from a slot. */ | ||
279 | static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) | ||
280 | { | ||
281 | int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; | ||
282 | return sloadt(J, (int32_t)slot, | ||
283 | t + (((mode & IRSLOAD_TYPECHECK) || | ||
284 | (conv && t == IRT_INT && !(mode >> 16))) ? | ||
285 | IRT_GUARD : 0), | ||
286 | mode + conv); | ||
287 | } | ||
288 | |||
276 | /* Peek before FORI to find a const initializer. Otherwise load from slot. */ | 289 | /* Peek before FORI to find a const initializer. Otherwise load from slot. */ |
277 | static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t) | 290 | static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, |
291 | IRType t, int mode) | ||
278 | { | 292 | { |
279 | TRef tr = J->base[slot]; | 293 | TRef tr = J->base[slot]; |
280 | if (!tr) { | 294 | if (!tr) { |
281 | tr = find_kinit(J, fori, slot, t); | 295 | tr = find_kinit(J, fori, slot, t); |
282 | if (!tr) | 296 | if (!tr) |
283 | tr = sloadt(J, (int32_t)slot, | 297 | tr = fori_load(J, slot, t, mode); |
284 | t == IRT_INT ? (IRT_INT|IRT_GUARD) : t, | ||
285 | t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_READONLY|IRSLOAD_INHERIT) : | ||
286 | (IRSLOAD_READONLY|IRSLOAD_INHERIT)); | ||
287 | } | 298 | } |
288 | return tr; | 299 | return tr; |
289 | } | 300 | } |
290 | 301 | ||
291 | /* In-place coercion of FORI arguments. */ | 302 | /* Return the direction of the FOR loop iterator. |
292 | static lua_Number for_coerce(jit_State *J, TValue *o) | 303 | ** It's important to exactly reproduce the semantics of the interpreter. |
304 | */ | ||
305 | static int rec_for_direction(cTValue *o) | ||
293 | { | 306 | { |
294 | if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) | 307 | return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0; |
295 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
296 | return numV(o); | ||
297 | } | 308 | } |
298 | 309 | ||
299 | /* Simulate the runtime behavior of the FOR loop iterator. | 310 | /* Simulate the runtime behavior of the FOR loop iterator. */ |
300 | ** It's important to exactly reproduce the semantics of the interpreter. | 311 | static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl) |
301 | */ | ||
302 | static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) | ||
303 | { | 312 | { |
304 | TValue *forbase = &J->L->base[ra]; | 313 | lua_Number stopv = numberVnum(&o[FORL_STOP]); |
305 | lua_Number stopv = for_coerce(J, &forbase[FORL_STOP]); | 314 | lua_Number idxv = numberVnum(&o[FORL_IDX]); |
306 | lua_Number idxv = for_coerce(J, &forbase[FORL_IDX]); | 315 | lua_Number stepv = numberVnum(&o[FORL_STEP]); |
307 | lua_Number stepv = for_coerce(J, &forbase[FORL_STEP]); | ||
308 | if (isforl) | 316 | if (isforl) |
309 | idxv += stepv; | 317 | idxv += stepv; |
310 | if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { | 318 | if (rec_for_direction(&o[FORL_STEP])) { |
311 | if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } | 319 | if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } |
312 | *op = IR_GT; return LOOPEV_LEAVE; | 320 | *op = IR_GT; return LOOPEV_LEAVE; |
313 | } else { | 321 | } else { |
@@ -316,44 +324,123 @@ static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) | |||
316 | } | 324 | } |
317 | } | 325 | } |
318 | 326 | ||
327 | /* Record checks for FOR loop overflow and step direction. */ | ||
328 | static void rec_for_check(jit_State *J, IRType t, int dir, TRef stop, TRef step) | ||
329 | { | ||
330 | if (!tref_isk(step)) { | ||
331 | /* Non-constant step: need a guard for the direction. */ | ||
332 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); | ||
333 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); | ||
334 | /* Add hoistable overflow checks for a narrowed FORL index. */ | ||
335 | if (t == IRT_INT) { | ||
336 | if (tref_isk(stop)) { | ||
337 | /* Constant stop: optimize check away or to a range check for step. */ | ||
338 | int32_t k = IR(tref_ref(stop))->i; | ||
339 | if (dir) { | ||
340 | if (k > 0) | ||
341 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); | ||
342 | } else { | ||
343 | if (k < 0) | ||
344 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); | ||
345 | } | ||
346 | } else { | ||
347 | /* Stop+step variable: need full overflow check. */ | ||
348 | TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); | ||
349 | emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ | ||
350 | } | ||
351 | } | ||
352 | } else if (t == IRT_INT && !tref_isk(stop)) { | ||
353 | /* Constant step: optimize overflow check to a range check for stop. */ | ||
354 | int32_t k = IR(tref_ref(step))->i; | ||
355 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; | ||
356 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); | ||
357 | } | ||
358 | } | ||
359 | |||
360 | /* Record a FORL instruction. */ | ||
361 | static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, | ||
362 | int init) | ||
363 | { | ||
364 | BCReg ra = bc_a(*fori); | ||
365 | cTValue *tv = &J->L->base[ra]; | ||
366 | TRef idx = J->base[ra+FORL_IDX]; | ||
367 | IRType t = idx ? tref_type(idx) : | ||
368 | (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; | ||
369 | int mode = IRSLOAD_INHERIT + | ||
370 | ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0); | ||
371 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); | ||
372 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); | ||
373 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); | ||
374 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | ||
375 | scev->t.irt = t; | ||
376 | scev->dir = dir; | ||
377 | scev->stop = tref_ref(stop); | ||
378 | scev->step = tref_ref(step); | ||
379 | if (init) | ||
380 | rec_for_check(J, t, dir, stop, step); | ||
381 | scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); | ||
382 | tc = (LJ_DUALNUM && | ||
383 | !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step))) ? | ||
384 | IRSLOAD_TYPECHECK : 0; | ||
385 | if (tc) { | ||
386 | J->base[ra+FORL_STOP] = stop; | ||
387 | J->base[ra+FORL_STEP] = step; | ||
388 | } | ||
389 | if (!idx) | ||
390 | idx = fori_load(J, ra+FORL_IDX, t, | ||
391 | IRSLOAD_INHERIT + tc + (J->scev.start << 16)); | ||
392 | if (!init) | ||
393 | J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); | ||
394 | J->base[ra+FORL_EXT] = idx; | ||
395 | scev->idx = tref_ref(idx); | ||
396 | J->maxslot = ra+FORL_EXT+1; | ||
397 | } | ||
398 | |||
319 | /* Record FORL/JFORL or FORI/JFORI. */ | 399 | /* Record FORL/JFORL or FORI/JFORI. */ |
320 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | 400 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) |
321 | { | 401 | { |
322 | BCReg ra = bc_a(*fori); | 402 | BCReg ra = bc_a(*fori); |
323 | IROp op; | 403 | TValue *tv = &J->L->base[ra]; |
324 | LoopEvent ev = for_iter(J, &op, ra, isforl); | ||
325 | TRef *tr = &J->base[ra]; | 404 | TRef *tr = &J->base[ra]; |
326 | TRef idx, stop; | 405 | IROp op; |
406 | LoopEvent ev; | ||
407 | TRef stop; | ||
327 | IRType t; | 408 | IRType t; |
328 | if (isforl) { /* Handle FORL/JFORL opcodes. */ | 409 | if (isforl) { /* Handle FORL/JFORL opcodes. */ |
329 | TRef step; | 410 | TRef idx = tr[FORL_IDX]; |
330 | idx = tr[FORL_IDX]; | ||
331 | if (tref_ref(idx) == J->scev.idx) { | 411 | if (tref_ref(idx) == J->scev.idx) { |
332 | t = J->scev.t.irt; | 412 | t = J->scev.t.irt; |
333 | stop = J->scev.stop; | 413 | stop = J->scev.stop; |
334 | step = J->scev.step; | 414 | idx = emitir(IRT(IR_ADD, t), idx, J->scev.step); |
415 | tr[FORL_EXT] = tr[FORL_IDX] = idx; | ||
335 | } else { | 416 | } else { |
336 | if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); | 417 | ScEvEntry scev; |
337 | t = tref_type(idx); | 418 | rec_for_loop(J, fori, &scev, 0); |
338 | stop = fori_arg(J, fori, ra+FORL_STOP, t); | 419 | t = scev.t.irt; |
339 | step = fori_arg(J, fori, ra+FORL_STEP, t); | 420 | stop = scev.stop; |
340 | } | 421 | } |
341 | tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); | ||
342 | } else { /* Handle FORI/JFORI opcodes. */ | 422 | } else { /* Handle FORI/JFORI opcodes. */ |
343 | BCReg i; | 423 | BCReg i; |
344 | t = IRT_NUM; | 424 | lj_meta_for(J->L, tv); |
425 | t = lj_opt_narrow_forl(J, tv); | ||
345 | for (i = FORL_IDX; i <= FORL_STEP; i++) { | 426 | for (i = FORL_IDX; i <= FORL_STEP; i++) { |
346 | lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ | 427 | lua_assert(tref_isnumber_str(tr[i])); |
347 | tr[i] = lj_ir_tonum(J, J->base[ra+i]); | 428 | if (tref_isstr(tr[i])) |
429 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); | ||
430 | if (t == IRT_INT) { | ||
431 | if (!tref_isinteger(tr[i])) | ||
432 | tr[i] = emitir(IRTI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK); | ||
433 | } else { | ||
434 | if (!tref_isnum(tr[i])) | ||
435 | tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT); | ||
436 | } | ||
348 | } | 437 | } |
349 | idx = tr[FORL_IDX]; | 438 | tr[FORL_EXT] = tr[FORL_IDX]; |
350 | stop = tr[FORL_STOP]; | 439 | stop = tr[FORL_STOP]; |
351 | if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ | 440 | rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), stop, tr[FORL_STEP]); |
352 | emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM), | ||
353 | tr[FORL_STEP], lj_ir_knum_zero(J)); | ||
354 | } | 441 | } |
355 | 442 | ||
356 | tr[FORL_EXT] = idx; | 443 | ev = rec_for_iter(&op, tv, isforl); |
357 | if (ev == LOOPEV_LEAVE) { | 444 | if (ev == LOOPEV_LEAVE) { |
358 | J->maxslot = ra+FORL_EXT+1; | 445 | J->maxslot = ra+FORL_EXT+1; |
359 | J->pc = fori+1; | 446 | J->pc = fori+1; |
@@ -363,7 +450,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
363 | } | 450 | } |
364 | lj_snap_add(J); | 451 | lj_snap_add(J); |
365 | 452 | ||
366 | emitir(IRTG(op, t), idx, stop); | 453 | emitir(IRTG(op, t), tr[FORL_IDX], stop); |
367 | 454 | ||
368 | if (ev == LOOPEV_LEAVE) { | 455 | if (ev == LOOPEV_LEAVE) { |
369 | J->maxslot = ra; | 456 | J->maxslot = ra; |
@@ -870,7 +957,7 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
870 | if (ref == J->scev.idx) { | 957 | if (ref == J->scev.idx) { |
871 | int32_t stop; | 958 | int32_t stop; |
872 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | 959 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); |
873 | stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); | 960 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); |
874 | /* Runtime value for stop of loop is within bounds? */ | 961 | /* Runtime value for stop of loop is within bounds? */ |
875 | if ((int64_t)stop + ofs < (int64_t)asize) { | 962 | if ((int64_t)stop + ofs < (int64_t)asize) { |
876 | /* Emit invariant bounds check for stop. */ | 963 | /* Emit invariant bounds check for stop. */ |
@@ -897,15 +984,12 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
897 | /* Integer keys are looked up in the array part first. */ | 984 | /* Integer keys are looked up in the array part first. */ |
898 | key = ix->key; | 985 | key = ix->key; |
899 | if (tref_isnumber(key)) { | 986 | if (tref_isnumber(key)) { |
900 | lua_Number n = numV(&ix->keyv); | 987 | int32_t k = numberVint(&ix->keyv); |
901 | int32_t k = lj_num2int(n); | 988 | if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) |
902 | lua_assert(tvisnum(&ix->keyv)); | 989 | k = LJ_MAX_ASIZE; |
903 | /* Potential array key? */ | 990 | if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ |
904 | if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { | 991 | TRef ikey = lj_opt_narrow_index(J, key); |
905 | TRef asizeref, ikey = key; | 992 | TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
906 | if (!tref_isinteger(ikey)) | ||
907 | ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX); | ||
908 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | ||
909 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 993 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
910 | TRef arrayref; | 994 | TRef arrayref; |
911 | rec_idx_abc(J, asizeref, ikey, t->asize); | 995 | rec_idx_abc(J, asizeref, ikey, t->asize); |
@@ -1081,7 +1165,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
1081 | } else { | 1165 | } else { |
1082 | keybarrier = 0; /* Previous non-nil value kept the key alive. */ | 1166 | keybarrier = 0; /* Previous non-nil value kept the key alive. */ |
1083 | } | 1167 | } |
1084 | if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ | 1168 | /* Convert int to number before storing. */ |
1169 | if (!LJ_DUALNUM && tref_isinteger(ix->val)) | ||
1085 | ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); | 1170 | ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); |
1086 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); | 1171 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); |
1087 | if (keybarrier || tref_isgcv(ix->val)) | 1172 | if (keybarrier || tref_isgcv(ix->val)) |
@@ -1135,7 +1220,8 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | |||
1135 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ | 1220 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ |
1136 | return res; | 1221 | return res; |
1137 | } else { /* Upvalue store. */ | 1222 | } else { /* Upvalue store. */ |
1138 | if (tref_isinteger(val)) /* Convert int to number before storing. */ | 1223 | /* Convert int to number before storing. */ |
1224 | if (!LJ_DUALNUM && tref_isinteger(val)) | ||
1139 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | 1225 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
1140 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); | 1226 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); |
1141 | if (needbarrier && tref_isgcv(val)) | 1227 | if (needbarrier && tref_isgcv(val)) |
@@ -1455,16 +1541,15 @@ void lj_record_ins(jit_State *J) | |||
1455 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ | 1541 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ |
1456 | case BCMvar: | 1542 | case BCMvar: |
1457 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; | 1543 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; |
1458 | case BCMnum: { lua_Number n = proto_knum(J->pt, rb); | ||
1459 | setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break; | ||
1460 | default: break; /* Handled later. */ | 1544 | default: break; /* Handled later. */ |
1461 | } | 1545 | } |
1462 | switch (bcmode_c(op)) { | 1546 | switch (bcmode_c(op)) { |
1463 | case BCMvar: | 1547 | case BCMvar: |
1464 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | 1548 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; |
1465 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; | 1549 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; |
1466 | case BCMnum: { lua_Number n = proto_knum(J->pt, rc); | 1550 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); |
1467 | setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; | 1551 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : |
1552 | lj_ir_knumint(J, numV(tv)); } break; | ||
1468 | case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); | 1553 | case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); |
1469 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; | 1554 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; |
1470 | default: break; /* Handled later. */ | 1555 | default: break; /* Handled later. */ |
@@ -1502,9 +1587,11 @@ void lj_record_ins(jit_State *J) | |||
1502 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; | 1587 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; |
1503 | if (ta == IRT_NUM) { | 1588 | if (ta == IRT_NUM) { |
1504 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ | 1589 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ |
1505 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; | 1590 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) |
1591 | irop ^= 5; | ||
1506 | } else if (ta == IRT_INT) { | 1592 | } else if (ta == IRT_INT) { |
1507 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; | 1593 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) |
1594 | irop ^= 1; | ||
1508 | } else if (ta == IRT_STR) { | 1595 | } else if (ta == IRT_STR) { |
1509 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; | 1596 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; |
1510 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); | 1597 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); |
@@ -1599,13 +1686,11 @@ void lj_record_ins(jit_State *J) | |||
1599 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: | 1686 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: |
1600 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { | 1687 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { |
1601 | MMS mm = bcmode_mm(op); | 1688 | MMS mm = bcmode_mm(op); |
1602 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { | 1689 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) |
1603 | rb = lj_ir_tonum(J, rb); | 1690 | rc = lj_opt_narrow_arith(J, rb, rc, &ix.tabv, &ix.keyv, |
1604 | rc = lj_ir_tonum(J, rc); | 1691 | (int)mm - (int)MM_add + (int)IR_ADD); |
1605 | rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); | 1692 | else |
1606 | } else { | ||
1607 | rc = rec_mm_arith(J, &ix, mm); | 1693 | rc = rec_mm_arith(J, &ix, mm); |
1608 | } | ||
1609 | break; | 1694 | break; |
1610 | } | 1695 | } |
1611 | 1696 | ||
@@ -1827,59 +1912,6 @@ void lj_record_ins(jit_State *J) | |||
1827 | 1912 | ||
1828 | /* -- Recording setup ----------------------------------------------------- */ | 1913 | /* -- Recording setup ----------------------------------------------------- */ |
1829 | 1914 | ||
1830 | /* Setup recording for a FORL loop. */ | ||
1831 | static void rec_setup_forl(jit_State *J, const BCIns *fori) | ||
1832 | { | ||
1833 | BCReg ra = bc_a(*fori); | ||
1834 | cTValue *forbase = &J->L->base[ra]; | ||
1835 | IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase) | ||
1836 | : IRT_NUM; | ||
1837 | TRef start; | ||
1838 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t); | ||
1839 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t); | ||
1840 | int dir = (0 <= numV(&forbase[FORL_STEP])); | ||
1841 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | ||
1842 | J->scev.t.irt = t; | ||
1843 | J->scev.dir = dir; | ||
1844 | J->scev.stop = tref_ref(stop); | ||
1845 | J->scev.step = tref_ref(step); | ||
1846 | if (!tref_isk(step)) { | ||
1847 | /* Non-constant step: need a guard for the direction. */ | ||
1848 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); | ||
1849 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); | ||
1850 | /* Add hoistable overflow checks for a narrowed FORL index. */ | ||
1851 | if (t == IRT_INT) { | ||
1852 | if (tref_isk(stop)) { | ||
1853 | /* Constant stop: optimize check away or to a range check for step. */ | ||
1854 | int32_t k = IR(tref_ref(stop))->i; | ||
1855 | if (dir) { | ||
1856 | if (k > 0) | ||
1857 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); | ||
1858 | } else { | ||
1859 | if (k < 0) | ||
1860 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); | ||
1861 | } | ||
1862 | } else { | ||
1863 | /* Stop+step variable: need full overflow check. */ | ||
1864 | TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); | ||
1865 | emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ | ||
1866 | } | ||
1867 | } | ||
1868 | } else if (t == IRT_INT && !tref_isk(stop)) { | ||
1869 | /* Constant step: optimize overflow check to a range check for stop. */ | ||
1870 | int32_t k = IR(tref_ref(step))->i; | ||
1871 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; | ||
1872 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); | ||
1873 | } | ||
1874 | J->scev.start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); | ||
1875 | start = sloadt(J, (int32_t)(ra+FORL_IDX), | ||
1876 | (t == IRT_INT && !J->scev.start) ? (IRT_INT|IRT_GUARD) : t, | ||
1877 | t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_INHERIT) : IRSLOAD_INHERIT); | ||
1878 | J->base[ra+FORL_EXT] = start; | ||
1879 | J->scev.idx = tref_ref(start); | ||
1880 | J->maxslot = ra+FORL_EXT+1; | ||
1881 | } | ||
1882 | |||
1883 | /* Setup recording for a root trace started by a hot loop. */ | 1915 | /* Setup recording for a root trace started by a hot loop. */ |
1884 | static const BCIns *rec_setup_root(jit_State *J) | 1916 | static const BCIns *rec_setup_root(jit_State *J) |
1885 | { | 1917 | { |
@@ -2033,7 +2065,7 @@ void lj_record_setup(jit_State *J) | |||
2033 | if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && | 2065 | if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && |
2034 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | 2066 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { |
2035 | lj_snap_add(J); | 2067 | lj_snap_add(J); |
2036 | rec_setup_forl(J, J->pc-1); | 2068 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2037 | goto sidecheck; | 2069 | goto sidecheck; |
2038 | } | 2070 | } |
2039 | } else { | 2071 | } else { |
@@ -2054,7 +2086,7 @@ void lj_record_setup(jit_State *J) | |||
2054 | */ | 2086 | */ |
2055 | lj_snap_add(J); | 2087 | lj_snap_add(J); |
2056 | if (bc_op(J->cur.startins) == BC_FORL) | 2088 | if (bc_op(J->cur.startins) == BC_FORL) |
2057 | rec_setup_forl(J, J->pc-1); | 2089 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2058 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | 2090 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) |
2059 | lj_trace_err(J, LJ_TRERR_STACKOV); | 2091 | lj_trace_err(J, LJ_TRERR_STACKOV); |
2060 | } | 2092 | } |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 59435b20..70628a0e 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -68,7 +68,8 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
68 | if (!(ir->op2 & IRSLOAD_INHERIT)) | 68 | if (!(ir->op2 & IRSLOAD_INHERIT)) |
69 | continue; | 69 | continue; |
70 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 70 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
71 | if ((ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 71 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && |
72 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | ||
72 | sn |= SNAP_NORESTORE; | 73 | sn |= SNAP_NORESTORE; |
73 | } | 74 | } |
74 | map[n++] = sn; | 75 | map[n++] = sn; |
diff --git a/src/lj_trace.c b/src/lj_trace.c index b67e8f75..69124542 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -495,8 +495,8 @@ static int trace_abort(jit_State *J) | |||
495 | 495 | ||
496 | J->postproc = LJ_POST_NONE; | 496 | J->postproc = LJ_POST_NONE; |
497 | lj_mcode_abort(J); | 497 | lj_mcode_abort(J); |
498 | if (tvisnum(L->top-1)) | 498 | if (tvisnumber(L->top-1)) |
499 | e = (TraceError)lj_num2int(numV(L->top-1)); | 499 | e = (TraceError)numberVint(L->top-1); |
500 | if (e == LJ_TRERR_MCODELM) { | 500 | if (e == LJ_TRERR_MCODELM) { |
501 | J->state = LJ_TRACE_ASM; | 501 | J->state = LJ_TRACE_ASM; |
502 | return 1; /* Retry ASM with new MCode area. */ | 502 | return 1; /* Retry ASM with new MCode area. */ |
@@ -703,8 +703,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
703 | setintV(L->top++, J->exitno); | 703 | setintV(L->top++, J->exitno); |
704 | setintV(L->top++, RID_NUM_GPR); | 704 | setintV(L->top++, RID_NUM_GPR); |
705 | setintV(L->top++, RID_NUM_FPR); | 705 | setintV(L->top++, RID_NUM_FPR); |
706 | for (i = 0; i < RID_NUM_GPR; i++) | 706 | for (i = 0; i < RID_NUM_GPR; i++) { |
707 | setnumV(L->top++, cast_num(ex->gpr[i])); | 707 | if (sizeof(ex->gpr[i]) == sizeof(int32_t)) |
708 | setintV(L->top++, (int32_t)ex->gpr[i]); | ||
709 | else | ||
710 | setnumV(L->top++, (lua_Number)ex->gpr[i]); | ||
711 | } | ||
708 | for (i = 0; i < RID_NUM_FPR; i++) { | 712 | for (i = 0; i < RID_NUM_FPR; i++) { |
709 | setnumV(L->top, ex->fpr[i]); | 713 | setnumV(L->top, ex->fpr[i]); |
710 | if (LJ_UNLIKELY(tvisnan(L->top))) | 714 | if (LJ_UNLIKELY(tvisnan(L->top))) |