diff options
| author | Mike Pall <mike> | 2011-03-10 01:57:24 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2011-03-10 01:57:24 +0100 |
| commit | bfce3c1127fd57fe0c935c92bcf45b4737041edd (patch) | |
| tree | 2bd2d9e08c70608de63c7a69df7f00cfab07f6be | |
| parent | 3f26e3a89d54dfb761ca02fc89aaf15326f5f514 (diff) | |
| download | luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.tar.gz luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.tar.bz2 luajit-bfce3c1127fd57fe0c935c92bcf45b4737041edd.zip | |
DUALNUM: Handle integer type in JIT compiler.
| -rw-r--r-- | src/Makefile.dep | 9 | ||||
| -rw-r--r-- | src/lj_asm.c | 35 | ||||
| -rw-r--r-- | src/lj_crecord.c | 35 | ||||
| -rw-r--r-- | src/lj_ffrecord.c | 39 | ||||
| -rw-r--r-- | src/lj_ir.c | 26 | ||||
| -rw-r--r-- | src/lj_ir.h | 30 | ||||
| -rw-r--r-- | src/lj_iropt.h | 12 | ||||
| -rw-r--r-- | src/lj_meta.c | 28 | ||||
| -rw-r--r-- | src/lj_meta.h | 2 | ||||
| -rw-r--r-- | src/lj_obj.h | 2 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 5 | ||||
| -rw-r--r-- | src/lj_opt_loop.c | 9 | ||||
| -rw-r--r-- | src/lj_opt_narrow.c | 233 | ||||
| -rw-r--r-- | src/lj_record.c | 280 | ||||
| -rw-r--r-- | src/lj_snap.c | 3 | ||||
| -rw-r--r-- | src/lj_trace.c | 12 |
16 files changed, 484 insertions, 276 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep index 1684ebd7..8458ec78 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
| @@ -128,15 +128,16 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 128 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | 128 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h |
| 129 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 129 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
| 130 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 130 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
| 131 | lj_dispatch.h lj_traceerr.h | 131 | lj_dispatch.h lj_traceerr.h lj_vm.h |
| 132 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | 132 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ |
| 133 | lj_arch.h | 133 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ |
| 134 | lj_vm.h | ||
| 134 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 135 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 135 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ | 136 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ |
| 136 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | 137 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h |
| 137 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 138 | lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 138 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ | 139 | lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ |
| 139 | lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ | 140 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ |
| 140 | lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h | 141 | lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h |
| 141 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 142 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
| 142 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 143 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 5f3c5fab..d395010d 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -2059,7 +2059,7 @@ static void asm_href(ASMState *as, IRIns *ir) | |||
| 2059 | } else { | 2059 | } else { |
| 2060 | emit_sjcc(as, CC_P, l_next); | 2060 | emit_sjcc(as, CC_P, l_next); |
| 2061 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); | 2061 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); |
| 2062 | emit_sjcc(as, CC_A, l_next); | 2062 | emit_sjcc(as, CC_AE, l_next); |
| 2063 | /* The type check avoids NaN penalties and complaints from Valgrind. */ | 2063 | /* The type check avoids NaN penalties and complaints from Valgrind. */ |
| 2064 | #if LJ_64 | 2064 | #if LJ_64 |
| 2065 | emit_u32(as, LJ_TISNUM); | 2065 | emit_u32(as, LJ_TISNUM); |
| @@ -2388,7 +2388,8 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | |||
| 2388 | 2388 | ||
| 2389 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 2389 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| 2390 | { | 2390 | { |
| 2391 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); | 2391 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || |
| 2392 | (LJ_DUALNUM && irt_isint(ir->t))); | ||
| 2392 | #if LJ_64 | 2393 | #if LJ_64 |
| 2393 | if (irt_islightud(ir->t)) { | 2394 | if (irt_islightud(ir->t)) { |
| 2394 | Reg dest = asm_load_lightud64(as, ir, 1); | 2395 | Reg dest = asm_load_lightud64(as, ir, 1); |
| @@ -2409,8 +2410,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
| 2409 | } | 2410 | } |
| 2410 | /* Always do the type check, even if the load result is unused. */ | 2411 | /* Always do the type check, even if the load result is unused. */ |
| 2411 | as->mrm.ofs += 4; | 2412 | as->mrm.ofs += 4; |
| 2412 | asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); | 2413 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); |
| 2413 | if (LJ_64 && irt_isnum(ir->t)) { | 2414 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { |
| 2415 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | ||
| 2414 | emit_u32(as, LJ_TISNUM); | 2416 | emit_u32(as, LJ_TISNUM); |
| 2415 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | 2417 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); |
| 2416 | } else { | 2418 | } else { |
| @@ -2443,7 +2445,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 2443 | if (ra_hasreg(src)) { | 2445 | if (ra_hasreg(src)) { |
| 2444 | emit_mrm(as, XO_MOVto, src, RID_MRM); | 2446 | emit_mrm(as, XO_MOVto, src, RID_MRM); |
| 2445 | } else if (!irt_ispri(irr->t)) { | 2447 | } else if (!irt_ispri(irr->t)) { |
| 2446 | lua_assert(irt_isaddr(ir->t)); | 2448 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); |
| 2447 | emit_i32(as, irr->i); | 2449 | emit_i32(as, irr->i); |
| 2448 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 2450 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
| 2449 | } | 2451 | } |
| @@ -2460,8 +2462,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 2460 | Reg base; | 2462 | Reg base; |
| 2461 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 2463 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
| 2462 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 2464 | lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); |
| 2463 | lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); | 2465 | lua_assert(LJ_DUALNUM || |
| 2464 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t)) { | 2466 | !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); |
| 2467 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | ||
| 2465 | Reg left = ra_scratch(as, RSET_FPR); | 2468 | Reg left = ra_scratch(as, RSET_FPR); |
| 2466 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 2469 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
| 2467 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 2470 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
| @@ -2481,12 +2484,14 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 2481 | Reg dest = ra_dest(as, ir, allow); | 2484 | Reg dest = ra_dest(as, ir, allow); |
| 2482 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 2485 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
| 2483 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); | 2486 | lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); |
| 2484 | if ((ir->op2 & IRSLOAD_CONVERT)) | 2487 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
| 2485 | emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); | 2488 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
| 2486 | else if (irt_isnum(t)) | 2489 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); |
| 2490 | } else if (irt_isnum(t)) { | ||
| 2487 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | 2491 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); |
| 2488 | else | 2492 | } else { |
| 2489 | emit_rmro(as, XO_MOV, dest, base, ofs); | 2493 | emit_rmro(as, XO_MOV, dest, base, ofs); |
| 2494 | } | ||
| 2490 | } else { | 2495 | } else { |
| 2491 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 2496 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
| 2492 | return; /* No type check: avoid base alloc. */ | 2497 | return; /* No type check: avoid base alloc. */ |
| @@ -2494,8 +2499,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 2494 | } | 2499 | } |
| 2495 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 2500 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
| 2496 | /* Need type check, even if the load result is unused. */ | 2501 | /* Need type check, even if the load result is unused. */ |
| 2497 | asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); | 2502 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); |
| 2498 | if (LJ_64 && irt_isnum(t)) { | 2503 | if (LJ_64 && irt_type(t) >= IRT_NUM) { |
| 2504 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | ||
| 2499 | emit_u32(as, LJ_TISNUM); | 2505 | emit_u32(as, LJ_TISNUM); |
| 2500 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | 2506 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); |
| 2501 | } else { | 2507 | } else { |
| @@ -3408,7 +3414,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
| 3408 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 3414 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
| 3409 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 3415 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
| 3410 | } else { | 3416 | } else { |
| 3411 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | 3417 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || |
| 3418 | (LJ_DUALNUM && irt_isinteger(ir->t))); | ||
| 3412 | if (!irref_isk(ref)) { | 3419 | if (!irref_isk(ref)) { |
| 3413 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | 3420 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); |
| 3414 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); | 3421 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index c93cece3..9482cc18 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
| @@ -185,6 +185,8 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
| 185 | (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); | 185 | (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); |
| 186 | else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ | 186 | else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ |
| 187 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); | 187 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); |
| 188 | else if (ssize <= 4) | ||
| 189 | sp = lj_opt_narrow_toint(J, sp); | ||
| 188 | xstore: | 190 | xstore: |
| 189 | if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); | 191 | if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); |
| 190 | if (dp == 0) return sp; | 192 | if (dp == 0) return sp; |
| @@ -355,10 +357,10 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) | |||
| 355 | CType *s; | 357 | CType *s; |
| 356 | if (LJ_LIKELY(tref_isinteger(sp))) { | 358 | if (LJ_LIKELY(tref_isinteger(sp))) { |
| 357 | sid = CTID_INT32; | 359 | sid = CTID_INT32; |
| 358 | svisnz = (void *)(intptr_t)(numV(sval) != 0); | 360 | svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); |
| 359 | } else if (tref_isnum(sp)) { | 361 | } else if (tref_isnum(sp)) { |
| 360 | sid = CTID_DOUBLE; | 362 | sid = CTID_DOUBLE; |
| 361 | svisnz = (void *)(intptr_t)(numV(sval) != 0); | 363 | svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); |
| 362 | } else if (tref_isbool(sp)) { | 364 | } else if (tref_isbool(sp)) { |
| 363 | sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); | 365 | sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); |
| 364 | sid = CTID_BOOL; | 366 | sid = CTID_BOOL; |
| @@ -443,16 +445,16 @@ static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) | |||
| 443 | static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) | 445 | static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) |
| 444 | { | 446 | { |
| 445 | IRIns *ir = IR(tref_ref(tr)); | 447 | IRIns *ir = IR(tref_ref(tr)); |
| 446 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && | 448 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && irref_isk(ir->op2) && |
| 447 | ir->o == IR_ADD && irref_isk(ir->op2)) { | 449 | (ir->o == IR_ADD || ir->o == IR_ADDOV || ir->o == IR_SUBOV)) { |
| 448 | IRIns *irk = IR(ir->op2); | 450 | IRIns *irk = IR(ir->op2); |
| 449 | tr = ir->op1; | 451 | ptrdiff_t k; |
| 450 | #if LJ_64 | 452 | if (LJ_64 && irk->o == IR_KINT64) |
| 451 | if (irk->o == IR_KINT64) | 453 | k = (ptrdiff_t)ir_kint64(irk)->u64 * sz; |
| 452 | *ofsp += (ptrdiff_t)ir_kint64(irk)->u64 * sz; | ||
| 453 | else | 454 | else |
| 454 | #endif | 455 | k = (ptrdiff_t)irk->i * sz; |
| 455 | *ofsp += (ptrdiff_t)irk->i * sz; | 456 | if (ir->o == IR_SUBOV) *ofsp -= k; else *ofsp += k; |
| 457 | tr = ir->op1; /* Not a TRef, but the caller doesn't care. */ | ||
| 456 | } | 458 | } |
| 457 | return tr; | 459 | return tr; |
| 458 | } | 460 | } |
| @@ -477,16 +479,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) | |||
| 477 | 479 | ||
| 478 | idx = J->base[1]; | 480 | idx = J->base[1]; |
| 479 | if (tref_isnumber(idx)) { | 481 | if (tref_isnumber(idx)) { |
| 480 | /* The size of a ptrdiff_t is target-specific. */ | 482 | idx = lj_opt_narrow_cindex(J, idx); |
| 481 | #if LJ_64 | ||
| 482 | if (tref_isnum(idx)) | ||
| 483 | idx = emitconv(idx, IRT_I64, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); | ||
| 484 | else | ||
| 485 | idx = emitconv(idx, IRT_I64, IRT_INT, IRCONV_SEXT); | ||
| 486 | #else | ||
| 487 | if (tref_isnum(idx)) | ||
| 488 | idx = emitconv(idx, IRT_INT, IRT_NUM, IRCONV_TRUNC|IRCONV_ANY); | ||
| 489 | #endif | ||
| 490 | integer_key: | 483 | integer_key: |
| 491 | if (ctype_ispointer(ct->info)) { | 484 | if (ctype_ispointer(ct->info)) { |
| 492 | CTSize sz; | 485 | CTSize sz; |
| @@ -635,7 +628,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) | |||
| 635 | TRef sp, dp; | 628 | TRef sp, dp; |
| 636 | TValue tv; | 629 | TValue tv; |
| 637 | TValue *sval = &tv; | 630 | TValue *sval = &tv; |
| 638 | setnumV(&tv, 0); | 631 | setintV(&tv, 0); |
| 639 | if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ | 632 | if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ |
| 640 | dc = ctype_rawchild(cts, df); /* Field type. */ | 633 | dc = ctype_rawchild(cts, df); /* Field type. */ |
| 641 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) | 634 | if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 631321d9..8077bf84 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
| @@ -63,9 +63,9 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd); | |||
| 63 | /* Get runtime value of int argument. */ | 63 | /* Get runtime value of int argument. */ |
| 64 | static int32_t argv2int(jit_State *J, TValue *o) | 64 | static int32_t argv2int(jit_State *J, TValue *o) |
| 65 | { | 65 | { |
| 66 | if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) | 66 | if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o))) |
| 67 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 67 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 68 | return lj_num2bit(numV(o)); | 68 | return tvisint(o) ? intV(o) : lj_num2int(numV(o)); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | /* Get runtime value of string argument. */ | 71 | /* Get runtime value of string argument. */ |
| @@ -75,9 +75,12 @@ static GCstr *argv2str(jit_State *J, TValue *o) | |||
| 75 | return strV(o); | 75 | return strV(o); |
| 76 | } else { | 76 | } else { |
| 77 | GCstr *s; | 77 | GCstr *s; |
| 78 | if (!tvisnum(o)) | 78 | if (!tvisnumber(o)) |
| 79 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 79 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 80 | s = lj_str_fromnum(J->L, &o->n); | 80 | if (tvisint(o)) |
| 81 | s = lj_str_fromint(J->L, intV(o)); | ||
| 82 | else | ||
| 83 | s = lj_str_fromnum(J->L, &o->n); | ||
| 81 | setstrV(J->L, o, s); | 84 | setstrV(J->L, o, s); |
| 82 | return s; | 85 | return s; |
| 83 | } | 86 | } |
| @@ -128,7 +131,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) | |||
| 128 | { | 131 | { |
| 129 | /* Arguments already specialized. Result is a constant string. Neat, huh? */ | 132 | /* Arguments already specialized. Result is a constant string. Neat, huh? */ |
| 130 | uint32_t t; | 133 | uint32_t t; |
| 131 | if (tvisnum(&rd->argv[0])) | 134 | if (tvisnumber(&rd->argv[0])) |
| 132 | t = ~LJ_TNUMX; | 135 | t = ~LJ_TNUMX; |
| 133 | else if (LJ_64 && tvislightud(&rd->argv[0])) | 136 | else if (LJ_64 && tvislightud(&rd->argv[0])) |
| 134 | t = ~LJ_TLIGHTUD; | 137 | t = ~LJ_TLIGHTUD; |
| @@ -255,7 +258,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) | |||
| 255 | TRef tr = J->base[0]; | 258 | TRef tr = J->base[0]; |
| 256 | TRef base = J->base[1]; | 259 | TRef base = J->base[1]; |
| 257 | if (tr && base) { | 260 | if (tr && base) { |
| 258 | base = lj_ir_toint(J, base); | 261 | base = lj_opt_narrow_toint(J, base); |
| 259 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) | 262 | if (!tref_isk(base) || IR(tref_ref(base))->i != 10) |
| 260 | recff_nyiu(J); | 263 | recff_nyiu(J); |
| 261 | } | 264 | } |
| @@ -332,12 +335,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) | |||
| 332 | RecordIndex ix; | 335 | RecordIndex ix; |
| 333 | ix.tab = J->base[0]; | 336 | ix.tab = J->base[0]; |
| 334 | if (tref_istab(ix.tab)) { | 337 | if (tref_istab(ix.tab)) { |
| 335 | if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ | 338 | if (!tvisnumber(&rd->argv[1])) /* No support for string coercion. */ |
| 336 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 339 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 337 | setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); | 340 | setintV(&ix.keyv, numberVint(&rd->argv[1])+1); |
| 338 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); | 341 | settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); |
| 339 | ix.val = 0; ix.idxchain = 0; | 342 | ix.val = 0; ix.idxchain = 0; |
| 340 | ix.key = lj_ir_toint(J, J->base[1]); | 343 | ix.key = lj_opt_narrow_toint(J, J->base[1]); |
| 341 | J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); | 344 | J->base[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); |
| 342 | J->base[1] = lj_record_idx(J, &ix); | 345 | J->base[1] = lj_record_idx(J, &ix); |
| 343 | rd->nres = tref_isnil(J->base[1]) ? 0 : 2; | 346 | rd->nres = tref_isnil(J->base[1]) ? 0 : 2; |
| @@ -525,26 +528,26 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) | |||
| 525 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | 528 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ |
| 526 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) | 529 | static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) |
| 527 | { | 530 | { |
| 528 | TRef tr = lj_ir_tobit(J, J->base[0]); | 531 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
| 529 | J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); | 532 | J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); |
| 530 | } | 533 | } |
| 531 | 534 | ||
| 532 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ | 535 | /* Record N-ary bit.band, bit.bor, bit.bxor. */ |
| 533 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) | 536 | static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) |
| 534 | { | 537 | { |
| 535 | TRef tr = lj_ir_tobit(J, J->base[0]); | 538 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
| 536 | uint32_t op = rd->data; | 539 | uint32_t op = rd->data; |
| 537 | BCReg i; | 540 | BCReg i; |
| 538 | for (i = 1; J->base[i] != 0; i++) | 541 | for (i = 1; J->base[i] != 0; i++) |
| 539 | tr = emitir(IRTI(op), tr, lj_ir_tobit(J, J->base[i])); | 542 | tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); |
| 540 | J->base[0] = tr; | 543 | J->base[0] = tr; |
| 541 | } | 544 | } |
| 542 | 545 | ||
| 543 | /* Record bit shifts. */ | 546 | /* Record bit shifts. */ |
| 544 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) | 547 | static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) |
| 545 | { | 548 | { |
| 546 | TRef tr = lj_ir_tobit(J, J->base[0]); | 549 | TRef tr = lj_opt_narrow_tobit(J, J->base[0]); |
| 547 | TRef tsh = lj_ir_tobit(J, J->base[1]); | 550 | TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); |
| 548 | if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 551 | if (!(rd->data < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && |
| 549 | !tref_isk(tsh)) | 552 | !tref_isk(tsh)) |
| 550 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); | 553 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); |
| @@ -570,25 +573,25 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) | |||
| 570 | int32_t start, end; | 573 | int32_t start, end; |
| 571 | if (rd->data) { /* string.sub(str, start [,end]) */ | 574 | if (rd->data) { /* string.sub(str, start [,end]) */ |
| 572 | start = argv2int(J, &rd->argv[1]); | 575 | start = argv2int(J, &rd->argv[1]); |
| 573 | trstart = lj_ir_toint(J, J->base[1]); | 576 | trstart = lj_opt_narrow_toint(J, J->base[1]); |
| 574 | trend = J->base[2]; | 577 | trend = J->base[2]; |
| 575 | if (tref_isnil(trend)) { | 578 | if (tref_isnil(trend)) { |
| 576 | trend = lj_ir_kint(J, -1); | 579 | trend = lj_ir_kint(J, -1); |
| 577 | end = -1; | 580 | end = -1; |
| 578 | } else { | 581 | } else { |
| 579 | trend = lj_ir_toint(J, trend); | 582 | trend = lj_opt_narrow_toint(J, trend); |
| 580 | end = argv2int(J, &rd->argv[2]); | 583 | end = argv2int(J, &rd->argv[2]); |
| 581 | } | 584 | } |
| 582 | } else { /* string.byte(str, [,start [,end]]) */ | 585 | } else { /* string.byte(str, [,start [,end]]) */ |
| 583 | if (J->base[1]) { | 586 | if (J->base[1]) { |
| 584 | start = argv2int(J, &rd->argv[1]); | 587 | start = argv2int(J, &rd->argv[1]); |
| 585 | trstart = lj_ir_toint(J, J->base[1]); | 588 | trstart = lj_opt_narrow_toint(J, J->base[1]); |
| 586 | trend = J->base[2]; | 589 | trend = J->base[2]; |
| 587 | if (tref_isnil(trend)) { | 590 | if (tref_isnil(trend)) { |
| 588 | trend = trstart; | 591 | trend = trstart; |
| 589 | end = start; | 592 | end = start; |
| 590 | } else { | 593 | } else { |
| 591 | trend = lj_ir_toint(J, trend); | 594 | trend = lj_opt_narrow_toint(J, trend); |
| 592 | end = argv2int(J, &rd->argv[2]); | 595 | end = argv2int(J, &rd->argv[2]); |
| 593 | } | 596 | } |
| 594 | } else { | 597 | } else { |
diff --git a/src/lj_ir.c b/src/lj_ir.c index 1d57938e..721cfd0f 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
| @@ -426,32 +426,6 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) | |||
| 426 | return tr; | 426 | return tr; |
| 427 | } | 427 | } |
| 428 | 428 | ||
| 429 | /* Convert from number or string to bitop operand (overflow wrapped). */ | ||
| 430 | TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr) | ||
| 431 | { | ||
| 432 | if (!tref_isinteger(tr)) { | ||
| 433 | if (tref_isstr(tr)) | ||
| 434 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
| 435 | else if (!tref_isnum(tr)) | ||
| 436 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
| 437 | tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); | ||
| 438 | } | ||
| 439 | return tr; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* Convert from number or string to integer (overflow undefined). */ | ||
| 443 | TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr) | ||
| 444 | { | ||
| 445 | if (!tref_isinteger(tr)) { | ||
| 446 | if (tref_isstr(tr)) | ||
| 447 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
| 448 | else if (!tref_isnum(tr)) | ||
| 449 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
| 450 | tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); | ||
| 451 | } | ||
| 452 | return tr; | ||
| 453 | } | ||
| 454 | |||
| 455 | /* -- Miscellaneous IR ops ------------------------------------------------ */ | 429 | /* -- Miscellaneous IR ops ------------------------------------------------ */ |
| 456 | 430 | ||
| 457 | /* Evaluate numeric comparison. */ | 431 | /* Evaluate numeric comparison. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 060cf562..c46bbbe0 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -124,7 +124,7 @@ | |||
| 124 | _(XBAR, S , ___, ___) \ | 124 | _(XBAR, S , ___, ___) \ |
| 125 | \ | 125 | \ |
| 126 | /* Type conversions. */ \ | 126 | /* Type conversions. */ \ |
| 127 | _(CONV, N , ref, lit) \ | 127 | _(CONV, NW, ref, lit) \ |
| 128 | _(TOBIT, N , ref, ref) \ | 128 | _(TOBIT, N , ref, ref) \ |
| 129 | _(TOSTR, N , ref, ___) \ | 129 | _(TOSTR, N , ref, ___) \ |
| 130 | _(STRTO, N , ref, ___) \ | 130 | _(STRTO, N , ref, ___) \ |
| @@ -345,8 +345,8 @@ typedef enum { | |||
| 345 | #define IRM_AW (IRM_A|IRM_W) | 345 | #define IRM_AW (IRM_A|IRM_W) |
| 346 | #define IRM_LW (IRM_L|IRM_W) | 346 | #define IRM_LW (IRM_L|IRM_W) |
| 347 | 347 | ||
| 348 | #define irm_op1(m) (cast(IRMode, (m)&3)) | 348 | #define irm_op1(m) ((IRMode)((m)&3)) |
| 349 | #define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) | 349 | #define irm_op2(m) ((IRMode)(((m)>>2)&3)) |
| 350 | #define irm_iscomm(m) ((m) & IRM_C) | 350 | #define irm_iscomm(m) ((m) & IRM_C) |
| 351 | #define irm_kind(m) ((m) & IRM_S) | 351 | #define irm_kind(m) ((m) & IRM_S) |
| 352 | 352 | ||
| @@ -401,8 +401,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
| 401 | #define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) | 401 | #define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) |
| 402 | #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) | 402 | #define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) |
| 403 | 403 | ||
| 404 | #define irt_t(t) (cast(IRType, (t).irt)) | 404 | #define irt_t(t) ((IRType)(t).irt) |
| 405 | #define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) | 405 | #define irt_type(t) ((IRType)((t).irt & IRT_TYPE)) |
| 406 | #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) | 406 | #define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) |
| 407 | #define irt_typerange(t, first, last) \ | 407 | #define irt_typerange(t, first, last) \ |
| 408 | ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) | 408 | ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) |
| @@ -441,18 +441,30 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
| 441 | 441 | ||
| 442 | static LJ_AINLINE IRType itype2irt(const TValue *tv) | 442 | static LJ_AINLINE IRType itype2irt(const TValue *tv) |
| 443 | { | 443 | { |
| 444 | if (tvisnum(tv)) | 444 | if (tvisint(tv)) |
| 445 | return IRT_INT; | ||
| 446 | else if (tvisnum(tv)) | ||
| 445 | return IRT_NUM; | 447 | return IRT_NUM; |
| 446 | #if LJ_64 | 448 | #if LJ_64 |
| 447 | else if (tvislightud(tv)) | 449 | else if (tvislightud(tv)) |
| 448 | return IRT_LIGHTUD; | 450 | return IRT_LIGHTUD; |
| 449 | #endif | 451 | #endif |
| 450 | else | 452 | else |
| 451 | return cast(IRType, ~itype(tv)); | 453 | return (IRType)~itype(tv); |
| 452 | } | 454 | } |
| 453 | 455 | ||
| 454 | #define irt_toitype(t) \ | 456 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) |
| 455 | check_exp(!(LJ_64 && irt_islightud((t))), ~(uint32_t)irt_type((t))) | 457 | { |
| 458 | lua_assert(!LJ_64 || t != IRT_LIGHTUD); | ||
| 459 | if (LJ_DUALNUM && t > IRT_NUM) { | ||
| 460 | return LJ_TISNUM; | ||
| 461 | } else { | ||
| 462 | lua_assert(t <= IRT_NUM); | ||
| 463 | return ~(uint32_t)t; | ||
| 464 | } | ||
| 465 | } | ||
| 466 | |||
| 467 | #define irt_toitype(t) irt_toitype_(irt_type((t))) | ||
| 456 | 468 | ||
| 457 | #define irt_isguard(t) ((t).irt & IRT_GUARD) | 469 | #define irt_isguard(t) ((t).irt & IRT_GUARD) |
| 458 | #define irt_ismarked(t) ((t).irt & IRT_MARK) | 470 | #define irt_ismarked(t) ((t).irt & IRT_MARK) |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index db99c118..1c94e91c 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
| @@ -84,8 +84,6 @@ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); | |||
| 84 | /* Convert IR operand types. */ | 84 | /* Convert IR operand types. */ |
| 85 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); | 85 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); |
| 86 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); | 86 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); |
| 87 | LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr); | ||
| 88 | LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr); | ||
| 89 | 87 | ||
| 90 | /* Miscellaneous IR ops. */ | 88 | /* Miscellaneous IR ops. */ |
| 91 | LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); | 89 | LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); |
| @@ -134,9 +132,17 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J); | |||
| 134 | 132 | ||
| 135 | /* Narrowing. */ | 133 | /* Narrowing. */ |
| 136 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); | 134 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); |
| 135 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key); | ||
| 136 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr); | ||
| 137 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr); | ||
| 138 | #if LJ_HASFFI | ||
| 139 | LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); | ||
| 140 | #endif | ||
| 141 | LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, | ||
| 142 | TValue *vb, TValue *vc, IROp op); | ||
| 137 | LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); | 143 | LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); |
| 138 | LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); | 144 | LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); |
| 139 | LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); | 145 | LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); |
| 140 | 146 | ||
| 141 | /* Optimization passes. */ | 147 | /* Optimization passes. */ |
| 142 | LJ_FUNC void lj_opt_dce(jit_State *J); | 148 | LJ_FUNC void lj_opt_dce(jit_State *J); |
diff --git a/src/lj_meta.c b/src/lj_meta.c index 23f11f58..48cee510 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
| @@ -393,13 +393,27 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) | |||
| 393 | lj_err_msg(L, LJ_ERR_FORLIM); | 393 | lj_err_msg(L, LJ_ERR_FORLIM); |
| 394 | if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) | 394 | if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) |
| 395 | lj_err_msg(L, LJ_ERR_FORSTEP); | 395 | lj_err_msg(L, LJ_ERR_FORSTEP); |
| 396 | #if LJ_DUALNUM | 396 | if (LJ_DUALNUM) { |
| 397 | /* Ensure all slots are integers or all slots are numbers. */ | 397 | /* Ensure all slots are integers or all slots are numbers. */ |
| 398 | if (!(tvisint(o) && tvisint(o+1) && tvisint(o+2))) { | 398 | int32_t k[3]; |
| 399 | if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); | 399 | int nint = 0; |
| 400 | if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); | 400 | ptrdiff_t i; |
| 401 | if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); | 401 | for (i = 0; i <= 2; i++) { |
| 402 | if (tvisint(o+i)) { | ||
| 403 | k[i] = intV(o+i); nint++; | ||
| 404 | } else { | ||
| 405 | k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); | ||
| 406 | } | ||
| 407 | } | ||
| 408 | if (nint == 3) { /* Narrow to integers. */ | ||
| 409 | setintV(o, k[0]); | ||
| 410 | setintV(o+1, k[1]); | ||
| 411 | setintV(o+2, k[2]); | ||
| 412 | } else if (nint != 0) { /* Widen to numbers. */ | ||
| 413 | if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); | ||
| 414 | if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); | ||
| 415 | if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); | ||
| 416 | } | ||
| 402 | } | 417 | } |
| 403 | #endif | ||
| 404 | } | 418 | } |
| 405 | 419 | ||
diff --git a/src/lj_meta.h b/src/lj_meta.h index 687e6c08..32b3dec3 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h | |||
| @@ -29,6 +29,6 @@ LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); | |||
| 29 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); | 29 | LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); |
| 30 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); | 30 | LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); |
| 31 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); | 31 | LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); |
| 32 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *base); | 32 | LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); |
| 33 | 33 | ||
| 34 | #endif | 34 | #endif |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 88289f3e..19a2345f 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
| @@ -325,8 +325,6 @@ typedef struct GCproto { | |||
| 325 | #define proto_kgc(pt, idx) \ | 325 | #define proto_kgc(pt, idx) \ |
| 326 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ | 326 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \ |
| 327 | gcref(mref((pt)->k, GCRef)[(idx)])) | 327 | gcref(mref((pt)->k, GCRef)[(idx)])) |
| 328 | #define proto_knum(pt, idx) \ | ||
| 329 | check_exp((uintptr_t)(idx) < (pt)->sizekn, mref((pt)->k, lua_Number)[(idx)]) | ||
| 330 | #define proto_knumtv(pt, idx) \ | 328 | #define proto_knumtv(pt, idx) \ |
| 331 | check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) | 329 | check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) |
| 332 | #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) | 330 | #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 471a4b29..e2d5c517 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -558,7 +558,10 @@ LJFOLD(CONV KINT IRCONV_I64_INT) | |||
| 558 | LJFOLD(CONV KINT IRCONV_U64_INT) | 558 | LJFOLD(CONV KINT IRCONV_U64_INT) |
| 559 | LJFOLDF(kfold_conv_kint_i64) | 559 | LJFOLDF(kfold_conv_kint_i64) |
| 560 | { | 560 | { |
| 561 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | 561 | if ((fins->op2 & IRCONV_SEXT)) |
| 562 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | ||
| 563 | else | ||
| 564 | return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i); | ||
| 562 | } | 565 | } |
| 563 | 566 | ||
| 564 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) | 567 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 559e579e..6dd06636 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
| @@ -300,8 +300,11 @@ static void loop_unroll(jit_State *J) | |||
| 300 | } | 300 | } |
| 301 | /* Check all loop-carried dependencies for type instability. */ | 301 | /* Check all loop-carried dependencies for type instability. */ |
| 302 | if (!irt_sametype(t, irr->t)) { | 302 | if (!irt_sametype(t, irr->t)) { |
| 303 | if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ | 303 | if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num. */ |
| 304 | subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); | 304 | subst[ins] = tref_ref(emitir(IRTN(IR_CONV), ref, IRCONV_NUM_INT)); |
| 305 | else if (irt_isnum(irr->t) && irt_isinteger(t)) /* Fix num->int. */ | ||
| 306 | subst[ins] = tref_ref(emitir(IRTGI(IR_CONV), ref, | ||
| 307 | IRCONV_INT_NUM|IRCONV_CHECK)); | ||
| 305 | else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) | 308 | else if (!(irt_isinteger(t) && irt_isinteger(irr->t))) |
| 306 | lj_trace_err(J, LJ_TRERR_TYPEINS); | 309 | lj_trace_err(J, LJ_TRERR_TYPEINS); |
| 307 | } | 310 | } |
| @@ -355,8 +358,8 @@ int lj_opt_loop(jit_State *J) | |||
| 355 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 358 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); |
| 356 | if (LJ_UNLIKELY(errcode)) { | 359 | if (LJ_UNLIKELY(errcode)) { |
| 357 | lua_State *L = J->L; | 360 | lua_State *L = J->L; |
| 358 | if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ | 361 | if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ |
| 359 | int32_t e = lj_num2int(numV(L->top-1)); | 362 | int32_t e = numberVint(L->top-1); |
| 360 | switch ((TraceError)e) { | 363 | switch ((TraceError)e) { |
| 361 | case LJ_TRERR_TYPEINS: /* Type instability. */ | 364 | case LJ_TRERR_TYPEINS: /* Type instability. */ |
| 362 | case LJ_TRERR_GFAIL: /* Guard would always fail. */ | 365 | case LJ_TRERR_GFAIL: /* Guard would always fail. */ |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 0a2bb6cd..1727e9b5 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** NARROW: Narrowing of numbers to integers (double to int32_t). | 2 | ** NARROW: Narrowing of numbers to integers (double to int32_t). |
| 3 | ** STRIPOV: Stripping of overflow checks. | ||
| 3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | 4 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ | 5 | */ |
| 5 | 6 | ||
| @@ -16,6 +17,7 @@ | |||
| 16 | #include "lj_jit.h" | 17 | #include "lj_jit.h" |
| 17 | #include "lj_iropt.h" | 18 | #include "lj_iropt.h" |
| 18 | #include "lj_trace.h" | 19 | #include "lj_trace.h" |
| 20 | #include "lj_vm.h" | ||
| 19 | 21 | ||
| 20 | /* Rationale for narrowing optimizations: | 22 | /* Rationale for narrowing optimizations: |
| 21 | ** | 23 | ** |
| @@ -57,24 +59,34 @@ | |||
| 57 | ** | 59 | ** |
| 58 | ** A better solution is to keep all numbers as FP values and only narrow | 60 | ** A better solution is to keep all numbers as FP values and only narrow |
| 59 | ** when it's beneficial to do so. LuaJIT uses predictive narrowing for | 61 | ** when it's beneficial to do so. LuaJIT uses predictive narrowing for |
| 60 | ** induction variables and demand-driven narrowing for index expressions | 62 | ** induction variables and demand-driven narrowing for index expressions, |
| 61 | ** and bit operations. Additionally it can eliminate or hoists most of the | 63 | ** integer arguments and bit operations. Additionally it can eliminate or |
| 62 | ** resulting overflow checks. Regular arithmetic computations are never | 64 | ** hoist most of the resulting overflow checks. Regular arithmetic |
| 63 | ** narrowed to integers. | 65 | ** computations are never narrowed to integers. |
| 64 | ** | 66 | ** |
| 65 | ** The integer type in the IR has convenient wrap-around semantics and | 67 | ** The integer type in the IR has convenient wrap-around semantics and |
| 66 | ** ignores overflow. Extra operations have been added for | 68 | ** ignores overflow. Extra operations have been added for |
| 67 | ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. | 69 | ** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. |
| 68 | ** Apart from reducing overall complexity of the compiler, this also | 70 | ** Apart from reducing overall complexity of the compiler, this also |
| 69 | ** nicely solves the problem where you want to apply algebraic | 71 | ** nicely solves the problem where you want to apply algebraic |
| 70 | ** simplifications to ADD, but not to ADDOV. And the assembler can use lea | 72 | ** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can |
| 71 | ** instead of an add for integer ADD, but not for ADDOV (lea does not | 73 | ** use lea instead of an add for integer ADD, but not for ADDOV (lea does |
| 72 | ** affect the flags, but it helps to avoid register moves). | 74 | ** not affect the flags, but it helps to avoid register moves). |
| 73 | ** | 75 | ** |
| 74 | ** Note that all of the above has to be reconsidered if LuaJIT is to be | 76 | ** |
| 75 | ** ported to architectures with slow FP operations or with no hardware FPU | 77 | ** All of the above has to be reconsidered for architectures with slow FP |
| 76 | ** at all. In the latter case an integer-only port may be the best overall | 78 | ** operations or without a hardware FPU. The dual-number mode of LuaJIT |
| 77 | ** solution (if this still meets user demands). | 79 | ** addresses this issue. Arithmetic operations are performed on integers |
| 80 | ** as far as possible and overflow checks are added as needed. | ||
| 81 | ** | ||
| 82 | ** This implies that narrowing for integer arguments and bit operations | ||
| 83 | ** should also strip overflow checks, e.g. replace ADDOV with ADD. The | ||
| 84 | ** original overflow guards are weak and can be eliminated by DCE, if | ||
| 85 | ** there's no other use. | ||
| 86 | ** | ||
| 87 | ** A slight twist is that it's usually beneficial to use overflow-checked | ||
| 88 | ** integer arithmetics if all inputs are already integers. This is the only | ||
| 89 | ** change that affects the single-number mode, too. | ||
| 78 | */ | 90 | */ |
| 79 | 91 | ||
| 80 | /* Some local macros to save typing. Undef'd at the end. */ | 92 | /* Some local macros to save typing. Undef'd at the end. */ |
| @@ -94,10 +106,10 @@ | |||
| 94 | ** already takes care of eliminating simple redundant conversions like | 106 | ** already takes care of eliminating simple redundant conversions like |
| 95 | ** CONV.int.num(CONV.num.int(x)) ==> x. | 107 | ** CONV.int.num(CONV.num.int(x)) ==> x. |
| 96 | ** | 108 | ** |
| 97 | ** But the surrounding code is FP-heavy and all arithmetic operations are | 109 | ** But the surrounding code is FP-heavy and arithmetic operations are |
| 98 | ** performed on FP numbers. Consider a common example such as 'x=t[i+1]', | 110 | ** performed on FP numbers (for the single-number mode). Consider a common |
| 99 | ** with 'i' already an integer (due to induction variable narrowing). The | 111 | ** example such as 'x=t[i+1]', with 'i' already an integer (due to induction |
| 100 | ** index expression would be recorded as | 112 | ** variable narrowing). The index expression would be recorded as |
| 101 | ** CONV.int.num(ADD(CONV.num.int(i), 1)) | 113 | ** CONV.int.num(ADD(CONV.num.int(i), 1)) |
| 102 | ** which is clearly suboptimal. | 114 | ** which is clearly suboptimal. |
| 103 | ** | 115 | ** |
| @@ -113,6 +125,9 @@ | |||
| 113 | ** FP ops remain in the IR and are eliminated by DCE since all references to | 125 | ** FP ops remain in the IR and are eliminated by DCE since all references to |
| 114 | ** them are gone. | 126 | ** them are gone. |
| 115 | ** | 127 | ** |
| 128 | ** [In dual-number mode the trace recorder already emits ADDOV etc., but | ||
| 129 | ** this can be further reduced. See below.] | ||
| 130 | ** | ||
| 116 | ** Special care has to be taken to avoid narrowing across an operation | 131 | ** Special care has to be taken to avoid narrowing across an operation |
| 117 | ** which is potentially operating on non-integral operands. One obvious | 132 | ** which is potentially operating on non-integral operands. One obvious |
| 118 | ** case is when an expression contains a non-integral constant, but ends | 133 | ** case is when an expression contains a non-integral constant, but ends |
| @@ -221,6 +236,26 @@ static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode) | |||
| 221 | bp->mode = mode; | 236 | bp->mode = mode; |
| 222 | } | 237 | } |
| 223 | 238 | ||
| 239 | /* Backpropagate overflow stripping. */ | ||
| 240 | static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth) | ||
| 241 | { | ||
| 242 | jit_State *J = nc->J; | ||
| 243 | IRIns *ir = IR(ref); | ||
| 244 | if (ir->o == IR_ADDOV || ir->o == IR_SUBOV || | ||
| 245 | (ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) { | ||
| 246 | BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT); | ||
| 247 | if (bp) { | ||
| 248 | ref = bp->val; | ||
| 249 | } else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { | ||
| 250 | narrow_stripov_backprop(nc, ir->op1, depth); | ||
| 251 | narrow_stripov_backprop(nc, ir->op2, depth); | ||
| 252 | *nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref); | ||
| 253 | return; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | *nc->sp++ = NARROWINS(NARROW_REF, ref); | ||
| 257 | } | ||
| 258 | |||
| 224 | /* Backpropagate narrowing conversion. Return number of needed conversions. */ | 259 | /* Backpropagate narrowing conversion. Return number of needed conversions. */ |
| 225 | static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | 260 | static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) |
| 226 | { | 261 | { |
| @@ -230,24 +265,26 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
| 230 | 265 | ||
| 231 | /* Check the easy cases first. */ | 266 | /* Check the easy cases first. */ |
| 232 | if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { | 267 | if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) { |
| 233 | if (nc->t == IRT_I64) | 268 | if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY) |
| 234 | *nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */ | 269 | narrow_stripov_backprop(nc, ir->op1, depth+1); |
| 235 | else | 270 | else |
| 236 | *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ | 271 | *nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */ |
| 272 | if (nc->t == IRT_I64) | ||
| 273 | *nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */ | ||
| 237 | return 0; | 274 | return 0; |
| 238 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ | 275 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ |
| 239 | lua_Number n = ir_knum(ir)->n; | 276 | lua_Number n = ir_knum(ir)->n; |
| 240 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { | 277 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { |
| 241 | /* Allows a wider range of constants. */ | 278 | /* Allows a wider range of constants. */ |
| 242 | int64_t k64 = (int64_t)n; | 279 | int64_t k64 = (int64_t)n; |
| 243 | if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ | 280 | if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ |
| 244 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | 281 | *nc->sp++ = NARROWINS(NARROW_INT, 0); |
| 245 | *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ | 282 | *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ |
| 246 | return 0; | 283 | return 0; |
| 247 | } | 284 | } |
| 248 | } else { | 285 | } else { |
| 249 | int32_t k = lj_num2int(n); | 286 | int32_t k = lj_num2int(n); |
| 250 | if (n == cast_num(k)) { /* Only if constant is really an integer. */ | 287 | if (n == (lua_Number)k) { /* Only if constant is really an integer. */ |
| 251 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | 288 | *nc->sp++ = NARROWINS(NARROW_INT, 0); |
| 252 | *nc->sp++ = (NarrowIns)k; | 289 | *nc->sp++ = (NarrowIns)k; |
| 253 | return 0; | 290 | return 0; |
| @@ -287,7 +324,8 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
| 287 | mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; | 324 | mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX; |
| 288 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); | 325 | bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode); |
| 289 | if (bp) { | 326 | if (bp) { |
| 290 | *nc->sp++ = NARROWINS(NARROW_SEXT, bp->val); | 327 | *nc->sp++ = NARROWINS(NARROW_REF, bp->val); |
| 328 | *nc->sp++ = NARROWINS(NARROW_SEXT, 0); | ||
| 291 | return 0; | 329 | return 0; |
| 292 | } | 330 | } |
| 293 | } | 331 | } |
| @@ -326,8 +364,9 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
| 326 | } else if (op == NARROW_CONV) { | 364 | } else if (op == NARROW_CONV) { |
| 327 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ | 365 | *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ |
| 328 | } else if (op == NARROW_SEXT) { | 366 | } else if (op == NARROW_SEXT) { |
| 329 | *sp++ = emitir(IRT(IR_CONV, IRT_I64), ref, | 367 | lua_assert(sp >= nc->stack+1); |
| 330 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | 368 | sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], |
| 369 | (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | ||
| 331 | } else if (op == NARROW_INT) { | 370 | } else if (op == NARROW_INT) { |
| 332 | lua_assert(next < last); | 371 | lua_assert(next < last); |
| 333 | *sp++ = nc->t == IRT_I64 ? | 372 | *sp++ = nc->t == IRT_I64 ? |
| @@ -340,7 +379,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) | |||
| 340 | /* Omit some overflow checks for array indexing. See comments above. */ | 379 | /* Omit some overflow checks for array indexing. See comments above. */ |
| 341 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { | 380 | if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { |
| 342 | if (next == last && irref_isk(narrow_ref(sp[0])) && | 381 | if (next == last && irref_isk(narrow_ref(sp[0])) && |
| 343 | (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000) | 382 | (uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u) |
| 344 | guardot = 0; | 383 | guardot = 0; |
| 345 | else /* Otherwise cache a stronger check. */ | 384 | else /* Otherwise cache a stronger check. */ |
| 346 | mode += IRCONV_CHECK-IRCONV_INDEX; | 385 | mode += IRCONV_CHECK-IRCONV_INDEX; |
| @@ -377,12 +416,123 @@ TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) | |||
| 377 | return NEXTFOLD; | 416 | return NEXTFOLD; |
| 378 | } | 417 | } |
| 379 | 418 | ||
| 419 | /* -- Narrowing of implicit conversions ----------------------------------- */ | ||
| 420 | |||
| 421 | /* Recursively strip overflow checks. */ | ||
| 422 | static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) | ||
| 423 | { | ||
| 424 | IRRef ref = tref_ref(tr); | ||
| 425 | IRIns *ir = IR(ref); | ||
| 426 | int op = ir->o; | ||
| 427 | if (op >= IR_ADDOV && op <= lastop) { | ||
| 428 | BPropEntry *bp = narrow_bpc_get(J, ref, mode); | ||
| 429 | if (bp) { | ||
| 430 | return TREF(bp->val, irt_t(IR(bp->val)->t)); | ||
| 431 | } else { | ||
| 432 | IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */ | ||
| 433 | op1 = narrow_stripov(J, op1, lastop, mode); | ||
| 434 | op2 = narrow_stripov(J, op2, lastop, mode); | ||
| 435 | tr = emitir(IRT(op - IR_ADDOV + IR_ADD, | ||
| 436 | ((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2); | ||
| 437 | narrow_bpc_set(J, ref, tref_ref(tr), mode); | ||
| 438 | } | ||
| 439 | } else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) { | ||
| 440 | tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode); | ||
| 441 | } | ||
| 442 | return tr; | ||
| 443 | } | ||
| 444 | |||
| 445 | /* Narrow array index. */ | ||
| 446 | TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) | ||
| 447 | { | ||
| 448 | IRIns *ir; | ||
| 449 | lua_assert(tref_isnumber(tr)); | ||
| 450 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | ||
| 451 | return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); | ||
| 452 | /* Omit some overflow checks for array indexing. See comments above. */ | ||
| 453 | ir = IR(tref_ref(tr)); | ||
| 454 | if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) && | ||
| 455 | (uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u) | ||
| 456 | return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2); | ||
| 457 | return tr; | ||
| 458 | } | ||
| 459 | |||
| 460 | /* Narrow conversion to integer operand (overflow undefined). */ | ||
| 461 | TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr) | ||
| 462 | { | ||
| 463 | if (tref_isstr(tr)) | ||
| 464 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
| 465 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | ||
| 466 | return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); | ||
| 467 | if (!tref_isinteger(tr)) | ||
| 468 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
| 469 | /* | ||
| 470 | ** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. | ||
| 471 | ** Use IRCONV_TOBIT for the cache entries, since the semantics are the same. | ||
| 472 | */ | ||
| 473 | return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); | ||
| 474 | } | ||
| 475 | |||
| 476 | /* Narrow conversion to bitop operand (overflow wrapped). */ | ||
| 477 | TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) | ||
| 478 | { | ||
| 479 | if (tref_isstr(tr)) | ||
| 480 | tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); | ||
| 481 | if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ | ||
| 482 | return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); | ||
| 483 | if (!tref_isinteger(tr)) | ||
| 484 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
| 485 | /* | ||
| 486 | ** Wrapped overflow semantics allow stripping of ADDOV and SUBOV. | ||
| 487 | ** MULOV cannot be stripped due to precision widening. | ||
| 488 | */ | ||
| 489 | return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); | ||
| 490 | } | ||
| 491 | |||
| 492 | #if LJ_HASFFI | ||
| 493 | /* Narrow C array index (overflow undefined). */ | ||
| 494 | TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) | ||
| 495 | { | ||
| 496 | lua_assert(tref_isnumber(tr)); | ||
| 497 | if (tref_isnum(tr)) | ||
| 498 | return emitir(IRTI(IR_CONV), tr, | ||
| 499 | (IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY); | ||
| 500 | /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ | ||
| 501 | return narrow_stripov(J, tr, IR_MULOV, | ||
| 502 | LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : | ||
| 503 | ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT)); | ||
| 504 | } | ||
| 505 | #endif | ||
| 506 | |||
| 380 | /* -- Narrowing of arithmetic operators ----------------------------------- */ | 507 | /* -- Narrowing of arithmetic operators ----------------------------------- */ |
| 381 | 508 | ||
| 382 | /* Check whether a number fits into an int32_t (-0 is ok, too). */ | 509 | /* Check whether a number fits into an int32_t (-0 is ok, too). */ |
| 383 | static int numisint(lua_Number n) | 510 | static int numisint(lua_Number n) |
| 384 | { | 511 | { |
| 385 | return (n == cast_num(lj_num2int(n))); | 512 | return (n == (lua_Number)lj_num2int(n)); |
| 513 | } | ||
| 514 | |||
| 515 | /* Narrowing of arithmetic operations. */ | ||
| 516 | TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, | ||
| 517 | TValue *vb, TValue *vc, IROp op) | ||
| 518 | { | ||
| 519 | if (tref_isstr(rb)) { | ||
| 520 | rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); | ||
| 521 | lj_str_tonum(strV(vb), vb); | ||
| 522 | } | ||
| 523 | if (tref_isstr(rc)) { | ||
| 524 | rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); | ||
| 525 | lj_str_tonum(strV(vc), vc); | ||
| 526 | } | ||
| 527 | /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ | ||
| 528 | if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && | ||
| 529 | tref_isinteger(rb) && tref_isinteger(rc) && | ||
| 530 | numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), | ||
| 531 | (int)op - (int)IR_ADD))) | ||
| 532 | return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); | ||
| 533 | if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); | ||
| 534 | if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | ||
| 535 | return emitir(IRTN(op), rb, rc); | ||
| 386 | } | 536 | } |
| 387 | 537 | ||
| 388 | /* Narrowing of modulo operator. */ | 538 | /* Narrowing of modulo operator. */ |
| @@ -409,16 +559,15 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) | |||
| 409 | /* Narrowing of power operator or math.pow. */ | 559 | /* Narrowing of power operator or math.pow. */ |
| 410 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) | 560 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) |
| 411 | { | 561 | { |
| 412 | lua_Number n; | ||
| 413 | if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) | 562 | if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) |
| 414 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 563 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 415 | n = numV(vc); | ||
| 416 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ | 564 | /* Narrowing must be unconditional to preserve (-x)^i semantics. */ |
| 417 | if (numisint(n)) { | 565 | if (tvisint(vc) || numisint(numV(vc))) { |
| 418 | int checkrange = 0; | 566 | int checkrange = 0; |
| 419 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ | 567 | /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ |
| 420 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { | 568 | if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { |
| 421 | if (!(n >= -65536.0 && n <= 65536.0)) goto split_pow; | 569 | int32_t k = numberVint(vc); |
| 570 | if (!(k >= -65536 && k <= 65536)) goto split_pow; | ||
| 422 | checkrange = 1; | 571 | checkrange = 1; |
| 423 | } | 572 | } |
| 424 | if (!tref_isinteger(rc)) { | 573 | if (!tref_isinteger(rc)) { |
| @@ -448,20 +597,28 @@ split_pow: | |||
| 448 | 597 | ||
| 449 | /* -- Predictive narrowing of induction variables ------------------------- */ | 598 | /* -- Predictive narrowing of induction variables ------------------------- */ |
| 450 | 599 | ||
| 600 | /* Narrow a single runtime value. */ | ||
| 601 | static int narrow_forl(jit_State *J, cTValue *o) | ||
| 602 | { | ||
| 603 | if (tvisint(o)) return 1; | ||
| 604 | if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); | ||
| 605 | return 0; | ||
| 606 | } | ||
| 607 | |||
| 451 | /* Narrow the FORL index type by looking at the runtime values. */ | 608 | /* Narrow the FORL index type by looking at the runtime values. */ |
| 452 | IRType lj_opt_narrow_forl(cTValue *forbase) | 609 | IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) |
| 453 | { | 610 | { |
| 454 | lua_assert(tvisnum(&forbase[FORL_IDX]) && | 611 | lua_assert(tvisnumber(&tv[FORL_IDX]) && |
| 455 | tvisnum(&forbase[FORL_STOP]) && | 612 | tvisnumber(&tv[FORL_STOP]) && |
| 456 | tvisnum(&forbase[FORL_STEP])); | 613 | tvisnumber(&tv[FORL_STEP])); |
| 457 | /* Narrow only if the runtime values of start/stop/step are all integers. */ | 614 | /* Narrow only if the runtime values of start/stop/step are all integers. */ |
| 458 | if (numisint(numV(&forbase[FORL_IDX])) && | 615 | if (narrow_forl(J, &tv[FORL_IDX]) && |
| 459 | numisint(numV(&forbase[FORL_STOP])) && | 616 | narrow_forl(J, &tv[FORL_STOP]) && |
| 460 | numisint(numV(&forbase[FORL_STEP]))) { | 617 | narrow_forl(J, &tv[FORL_STEP])) { |
| 461 | /* And if the loop index can't possibly overflow. */ | 618 | /* And if the loop index can't possibly overflow. */ |
| 462 | lua_Number step = numV(&forbase[FORL_STEP]); | 619 | lua_Number step = numberVnum(&tv[FORL_STEP]); |
| 463 | lua_Number sum = numV(&forbase[FORL_STOP]) + step; | 620 | lua_Number sum = numberVnum(&tv[FORL_STOP]) + step; |
| 464 | if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) | 621 | if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0)) |
| 465 | return IRT_INT; | 622 | return IRT_INT; |
| 466 | } | 623 | } |
| 467 | return IRT_NUM; | 624 | return IRT_NUM; |
diff --git a/src/lj_record.c b/src/lj_record.c index 2bfd2608..613e458e 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "lj_err.h" | 13 | #include "lj_err.h" |
| 14 | #include "lj_str.h" | 14 | #include "lj_str.h" |
| 15 | #include "lj_tab.h" | 15 | #include "lj_tab.h" |
| 16 | #include "lj_meta.h" | ||
| 16 | #include "lj_frame.h" | 17 | #include "lj_frame.h" |
| 17 | #include "lj_bc.h" | 18 | #include "lj_bc.h" |
| 18 | #include "lj_ff.h" | 19 | #include "lj_ff.h" |
| @@ -102,7 +103,7 @@ static void rec_check_slots(jit_State *J) | |||
| 102 | lua_assert((J->slot[s+1] & TREF_FRAME)); | 103 | lua_assert((J->slot[s+1] & TREF_FRAME)); |
| 103 | depth++; | 104 | depth++; |
| 104 | } else { | 105 | } else { |
| 105 | if (tvisnum(tv)) | 106 | if (tvisnumber(tv)) |
| 106 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ | 107 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ |
| 107 | else | 108 | else |
| 108 | lua_assert(itype2irt(tv) == tref_type(tr)); | 109 | lua_assert(itype2irt(tv) == tref_type(tr)); |
| @@ -197,6 +198,7 @@ typedef enum { | |||
| 197 | static void canonicalize_slots(jit_State *J) | 198 | static void canonicalize_slots(jit_State *J) |
| 198 | { | 199 | { |
| 199 | BCReg s; | 200 | BCReg s; |
| 201 | if (LJ_DUALNUM) return; | ||
| 200 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | 202 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { |
| 201 | TRef tr = J->slot[s]; | 203 | TRef tr = J->slot[s]; |
| 202 | if (tref_isinteger(tr)) { | 204 | if (tref_isinteger(tr)) { |
| @@ -254,16 +256,16 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) | |||
| 254 | } | 256 | } |
| 255 | if (op == BC_KSHORT) { | 257 | if (op == BC_KSHORT) { |
| 256 | int32_t k = (int32_t)(int16_t)bc_d(ins); | 258 | int32_t k = (int32_t)(int16_t)bc_d(ins); |
| 257 | return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, cast_num(k)); | 259 | return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k); |
| 258 | } else { | 260 | } else { |
| 259 | lua_Number n = proto_knum(J->pt, bc_d(ins)); | 261 | cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); |
| 260 | if (t == IRT_INT) { | 262 | if (t == IRT_INT) { |
| 261 | int32_t k = lj_num2int(n); | 263 | int32_t k = numberVint(tv); |
| 262 | if (n == cast_num(k)) /* -0 is ok here. */ | 264 | if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ |
| 263 | return lj_ir_kint(J, k); | 265 | return lj_ir_kint(J, k); |
| 264 | return 0; /* Type mismatch. */ | 266 | return 0; /* Type mismatch. */ |
| 265 | } else { | 267 | } else { |
| 266 | return lj_ir_knum(J, n); | 268 | return lj_ir_knum(J, numberVnum(tv)); |
| 267 | } | 269 | } |
| 268 | } | 270 | } |
| 269 | } | 271 | } |
| @@ -273,41 +275,47 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) | |||
| 273 | return 0; /* No assignment to this slot found? */ | 275 | return 0; /* No assignment to this slot found? */ |
| 274 | } | 276 | } |
| 275 | 277 | ||
| 278 | /* Load and optionally convert a FORI argument from a slot. */ | ||
| 279 | static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) | ||
| 280 | { | ||
| 281 | int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; | ||
| 282 | return sloadt(J, (int32_t)slot, | ||
| 283 | t + (((mode & IRSLOAD_TYPECHECK) || | ||
| 284 | (conv && t == IRT_INT && !(mode >> 16))) ? | ||
| 285 | IRT_GUARD : 0), | ||
| 286 | mode + conv); | ||
| 287 | } | ||
| 288 | |||
| 276 | /* Peek before FORI to find a const initializer. Otherwise load from slot. */ | 289 | /* Peek before FORI to find a const initializer. Otherwise load from slot. */ |
| 277 | static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, IRType t) | 290 | static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, |
| 291 | IRType t, int mode) | ||
| 278 | { | 292 | { |
| 279 | TRef tr = J->base[slot]; | 293 | TRef tr = J->base[slot]; |
| 280 | if (!tr) { | 294 | if (!tr) { |
| 281 | tr = find_kinit(J, fori, slot, t); | 295 | tr = find_kinit(J, fori, slot, t); |
| 282 | if (!tr) | 296 | if (!tr) |
| 283 | tr = sloadt(J, (int32_t)slot, | 297 | tr = fori_load(J, slot, t, mode); |
| 284 | t == IRT_INT ? (IRT_INT|IRT_GUARD) : t, | ||
| 285 | t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_READONLY|IRSLOAD_INHERIT) : | ||
| 286 | (IRSLOAD_READONLY|IRSLOAD_INHERIT)); | ||
| 287 | } | 298 | } |
| 288 | return tr; | 299 | return tr; |
| 289 | } | 300 | } |
| 290 | 301 | ||
| 291 | /* In-place coercion of FORI arguments. */ | 302 | /* Return the direction of the FOR loop iterator. |
| 292 | static lua_Number for_coerce(jit_State *J, TValue *o) | 303 | ** It's important to exactly reproduce the semantics of the interpreter. |
| 304 | */ | ||
| 305 | static int rec_for_direction(cTValue *o) | ||
| 293 | { | 306 | { |
| 294 | if (!tvisnum(o) && !(tvisstr(o) && lj_str_tonum(strV(o), o))) | 307 | return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0; |
| 295 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
| 296 | return numV(o); | ||
| 297 | } | 308 | } |
| 298 | 309 | ||
| 299 | /* Simulate the runtime behavior of the FOR loop iterator. | 310 | /* Simulate the runtime behavior of the FOR loop iterator. */ |
| 300 | ** It's important to exactly reproduce the semantics of the interpreter. | 311 | static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl) |
| 301 | */ | ||
| 302 | static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) | ||
| 303 | { | 312 | { |
| 304 | TValue *forbase = &J->L->base[ra]; | 313 | lua_Number stopv = numberVnum(&o[FORL_STOP]); |
| 305 | lua_Number stopv = for_coerce(J, &forbase[FORL_STOP]); | 314 | lua_Number idxv = numberVnum(&o[FORL_IDX]); |
| 306 | lua_Number idxv = for_coerce(J, &forbase[FORL_IDX]); | 315 | lua_Number stepv = numberVnum(&o[FORL_STEP]); |
| 307 | lua_Number stepv = for_coerce(J, &forbase[FORL_STEP]); | ||
| 308 | if (isforl) | 316 | if (isforl) |
| 309 | idxv += stepv; | 317 | idxv += stepv; |
| 310 | if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { | 318 | if (rec_for_direction(&o[FORL_STEP])) { |
| 311 | if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } | 319 | if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } |
| 312 | *op = IR_GT; return LOOPEV_LEAVE; | 320 | *op = IR_GT; return LOOPEV_LEAVE; |
| 313 | } else { | 321 | } else { |
| @@ -316,44 +324,123 @@ static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) | |||
| 316 | } | 324 | } |
| 317 | } | 325 | } |
| 318 | 326 | ||
| 327 | /* Record checks for FOR loop overflow and step direction. */ | ||
| 328 | static void rec_for_check(jit_State *J, IRType t, int dir, TRef stop, TRef step) | ||
| 329 | { | ||
| 330 | if (!tref_isk(step)) { | ||
| 331 | /* Non-constant step: need a guard for the direction. */ | ||
| 332 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); | ||
| 333 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); | ||
| 334 | /* Add hoistable overflow checks for a narrowed FORL index. */ | ||
| 335 | if (t == IRT_INT) { | ||
| 336 | if (tref_isk(stop)) { | ||
| 337 | /* Constant stop: optimize check away or to a range check for step. */ | ||
| 338 | int32_t k = IR(tref_ref(stop))->i; | ||
| 339 | if (dir) { | ||
| 340 | if (k > 0) | ||
| 341 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); | ||
| 342 | } else { | ||
| 343 | if (k < 0) | ||
| 344 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); | ||
| 345 | } | ||
| 346 | } else { | ||
| 347 | /* Stop+step variable: need full overflow check. */ | ||
| 348 | TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); | ||
| 349 | emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ | ||
| 350 | } | ||
| 351 | } | ||
| 352 | } else if (t == IRT_INT && !tref_isk(stop)) { | ||
| 353 | /* Constant step: optimize overflow check to a range check for stop. */ | ||
| 354 | int32_t k = IR(tref_ref(step))->i; | ||
| 355 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; | ||
| 356 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | /* Record a FORL instruction. */ | ||
| 361 | static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, | ||
| 362 | int init) | ||
| 363 | { | ||
| 364 | BCReg ra = bc_a(*fori); | ||
| 365 | cTValue *tv = &J->L->base[ra]; | ||
| 366 | TRef idx = J->base[ra+FORL_IDX]; | ||
| 367 | IRType t = idx ? tref_type(idx) : | ||
| 368 | (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; | ||
| 369 | int mode = IRSLOAD_INHERIT + | ||
| 370 | ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0); | ||
| 371 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); | ||
| 372 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); | ||
| 373 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); | ||
| 374 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | ||
| 375 | scev->t.irt = t; | ||
| 376 | scev->dir = dir; | ||
| 377 | scev->stop = tref_ref(stop); | ||
| 378 | scev->step = tref_ref(step); | ||
| 379 | if (init) | ||
| 380 | rec_for_check(J, t, dir, stop, step); | ||
| 381 | scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); | ||
| 382 | tc = (LJ_DUALNUM && | ||
| 383 | !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step))) ? | ||
| 384 | IRSLOAD_TYPECHECK : 0; | ||
| 385 | if (tc) { | ||
| 386 | J->base[ra+FORL_STOP] = stop; | ||
| 387 | J->base[ra+FORL_STEP] = step; | ||
| 388 | } | ||
| 389 | if (!idx) | ||
| 390 | idx = fori_load(J, ra+FORL_IDX, t, | ||
| 391 | IRSLOAD_INHERIT + tc + (J->scev.start << 16)); | ||
| 392 | if (!init) | ||
| 393 | J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); | ||
| 394 | J->base[ra+FORL_EXT] = idx; | ||
| 395 | scev->idx = tref_ref(idx); | ||
| 396 | J->maxslot = ra+FORL_EXT+1; | ||
| 397 | } | ||
| 398 | |||
| 319 | /* Record FORL/JFORL or FORI/JFORI. */ | 399 | /* Record FORL/JFORL or FORI/JFORI. */ |
| 320 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | 400 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) |
| 321 | { | 401 | { |
| 322 | BCReg ra = bc_a(*fori); | 402 | BCReg ra = bc_a(*fori); |
| 323 | IROp op; | 403 | TValue *tv = &J->L->base[ra]; |
| 324 | LoopEvent ev = for_iter(J, &op, ra, isforl); | ||
| 325 | TRef *tr = &J->base[ra]; | 404 | TRef *tr = &J->base[ra]; |
| 326 | TRef idx, stop; | 405 | IROp op; |
| 406 | LoopEvent ev; | ||
| 407 | TRef stop; | ||
| 327 | IRType t; | 408 | IRType t; |
| 328 | if (isforl) { /* Handle FORL/JFORL opcodes. */ | 409 | if (isforl) { /* Handle FORL/JFORL opcodes. */ |
| 329 | TRef step; | 410 | TRef idx = tr[FORL_IDX]; |
| 330 | idx = tr[FORL_IDX]; | ||
| 331 | if (tref_ref(idx) == J->scev.idx) { | 411 | if (tref_ref(idx) == J->scev.idx) { |
| 332 | t = J->scev.t.irt; | 412 | t = J->scev.t.irt; |
| 333 | stop = J->scev.stop; | 413 | stop = J->scev.stop; |
| 334 | step = J->scev.step; | 414 | idx = emitir(IRT(IR_ADD, t), idx, J->scev.step); |
| 415 | tr[FORL_EXT] = tr[FORL_IDX] = idx; | ||
| 335 | } else { | 416 | } else { |
| 336 | if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); | 417 | ScEvEntry scev; |
| 337 | t = tref_type(idx); | 418 | rec_for_loop(J, fori, &scev, 0); |
| 338 | stop = fori_arg(J, fori, ra+FORL_STOP, t); | 419 | t = scev.t.irt; |
| 339 | step = fori_arg(J, fori, ra+FORL_STEP, t); | 420 | stop = scev.stop; |
| 340 | } | 421 | } |
| 341 | tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); | ||
| 342 | } else { /* Handle FORI/JFORI opcodes. */ | 422 | } else { /* Handle FORI/JFORI opcodes. */ |
| 343 | BCReg i; | 423 | BCReg i; |
| 344 | t = IRT_NUM; | 424 | lj_meta_for(J->L, tv); |
| 425 | t = lj_opt_narrow_forl(J, tv); | ||
| 345 | for (i = FORL_IDX; i <= FORL_STEP; i++) { | 426 | for (i = FORL_IDX; i <= FORL_STEP; i++) { |
| 346 | lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ | 427 | lua_assert(tref_isnumber_str(tr[i])); |
| 347 | tr[i] = lj_ir_tonum(J, J->base[ra+i]); | 428 | if (tref_isstr(tr[i])) |
| 429 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); | ||
| 430 | if (t == IRT_INT) { | ||
| 431 | if (!tref_isinteger(tr[i])) | ||
| 432 | tr[i] = emitir(IRTI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK); | ||
| 433 | } else { | ||
| 434 | if (!tref_isnum(tr[i])) | ||
| 435 | tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT); | ||
| 436 | } | ||
| 348 | } | 437 | } |
| 349 | idx = tr[FORL_IDX]; | 438 | tr[FORL_EXT] = tr[FORL_IDX]; |
| 350 | stop = tr[FORL_STOP]; | 439 | stop = tr[FORL_STOP]; |
| 351 | if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ | 440 | rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), stop, tr[FORL_STEP]); |
| 352 | emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM), | ||
| 353 | tr[FORL_STEP], lj_ir_knum_zero(J)); | ||
| 354 | } | 441 | } |
| 355 | 442 | ||
| 356 | tr[FORL_EXT] = idx; | 443 | ev = rec_for_iter(&op, tv, isforl); |
| 357 | if (ev == LOOPEV_LEAVE) { | 444 | if (ev == LOOPEV_LEAVE) { |
| 358 | J->maxslot = ra+FORL_EXT+1; | 445 | J->maxslot = ra+FORL_EXT+1; |
| 359 | J->pc = fori+1; | 446 | J->pc = fori+1; |
| @@ -363,7 +450,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | |||
| 363 | } | 450 | } |
| 364 | lj_snap_add(J); | 451 | lj_snap_add(J); |
| 365 | 452 | ||
| 366 | emitir(IRTG(op, t), idx, stop); | 453 | emitir(IRTG(op, t), tr[FORL_IDX], stop); |
| 367 | 454 | ||
| 368 | if (ev == LOOPEV_LEAVE) { | 455 | if (ev == LOOPEV_LEAVE) { |
| 369 | J->maxslot = ra; | 456 | J->maxslot = ra; |
| @@ -870,7 +957,7 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | |||
| 870 | if (ref == J->scev.idx) { | 957 | if (ref == J->scev.idx) { |
| 871 | int32_t stop; | 958 | int32_t stop; |
| 872 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | 959 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); |
| 873 | stop = lj_num2int(numV(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP])); | 960 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); |
| 874 | /* Runtime value for stop of loop is within bounds? */ | 961 | /* Runtime value for stop of loop is within bounds? */ |
| 875 | if ((int64_t)stop + ofs < (int64_t)asize) { | 962 | if ((int64_t)stop + ofs < (int64_t)asize) { |
| 876 | /* Emit invariant bounds check for stop. */ | 963 | /* Emit invariant bounds check for stop. */ |
| @@ -897,15 +984,12 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | |||
| 897 | /* Integer keys are looked up in the array part first. */ | 984 | /* Integer keys are looked up in the array part first. */ |
| 898 | key = ix->key; | 985 | key = ix->key; |
| 899 | if (tref_isnumber(key)) { | 986 | if (tref_isnumber(key)) { |
| 900 | lua_Number n = numV(&ix->keyv); | 987 | int32_t k = numberVint(&ix->keyv); |
| 901 | int32_t k = lj_num2int(n); | 988 | if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) |
| 902 | lua_assert(tvisnum(&ix->keyv)); | 989 | k = LJ_MAX_ASIZE; |
| 903 | /* Potential array key? */ | 990 | if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ |
| 904 | if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { | 991 | TRef ikey = lj_opt_narrow_index(J, key); |
| 905 | TRef asizeref, ikey = key; | 992 | TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
| 906 | if (!tref_isinteger(ikey)) | ||
| 907 | ikey = emitir(IRTGI(IR_CONV), ikey, IRCONV_INT_NUM|IRCONV_INDEX); | ||
| 908 | asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | ||
| 909 | if ((MSize)k < t->asize) { /* Currently an array key? */ | 993 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
| 910 | TRef arrayref; | 994 | TRef arrayref; |
| 911 | rec_idx_abc(J, asizeref, ikey, t->asize); | 995 | rec_idx_abc(J, asizeref, ikey, t->asize); |
| @@ -1081,7 +1165,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) | |||
| 1081 | } else { | 1165 | } else { |
| 1082 | keybarrier = 0; /* Previous non-nil value kept the key alive. */ | 1166 | keybarrier = 0; /* Previous non-nil value kept the key alive. */ |
| 1083 | } | 1167 | } |
| 1084 | if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ | 1168 | /* Convert int to number before storing. */ |
| 1169 | if (!LJ_DUALNUM && tref_isinteger(ix->val)) | ||
| 1085 | ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); | 1170 | ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); |
| 1086 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); | 1171 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); |
| 1087 | if (keybarrier || tref_isgcv(ix->val)) | 1172 | if (keybarrier || tref_isgcv(ix->val)) |
| @@ -1135,7 +1220,8 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | |||
| 1135 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ | 1220 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ |
| 1136 | return res; | 1221 | return res; |
| 1137 | } else { /* Upvalue store. */ | 1222 | } else { /* Upvalue store. */ |
| 1138 | if (tref_isinteger(val)) /* Convert int to number before storing. */ | 1223 | /* Convert int to number before storing. */ |
| 1224 | if (!LJ_DUALNUM && tref_isinteger(val)) | ||
| 1139 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | 1225 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
| 1140 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); | 1226 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); |
| 1141 | if (needbarrier && tref_isgcv(val)) | 1227 | if (needbarrier && tref_isgcv(val)) |
| @@ -1455,16 +1541,15 @@ void lj_record_ins(jit_State *J) | |||
| 1455 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ | 1541 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ |
| 1456 | case BCMvar: | 1542 | case BCMvar: |
| 1457 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; | 1543 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; |
| 1458 | case BCMnum: { lua_Number n = proto_knum(J->pt, rb); | ||
| 1459 | setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break; | ||
| 1460 | default: break; /* Handled later. */ | 1544 | default: break; /* Handled later. */ |
| 1461 | } | 1545 | } |
| 1462 | switch (bcmode_c(op)) { | 1546 | switch (bcmode_c(op)) { |
| 1463 | case BCMvar: | 1547 | case BCMvar: |
| 1464 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | 1548 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; |
| 1465 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; | 1549 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; |
| 1466 | case BCMnum: { lua_Number n = proto_knum(J->pt, rc); | 1550 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); |
| 1467 | setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; | 1551 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : |
| 1552 | lj_ir_knumint(J, numV(tv)); } break; | ||
| 1468 | case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); | 1553 | case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); |
| 1469 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; | 1554 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; |
| 1470 | default: break; /* Handled later. */ | 1555 | default: break; /* Handled later. */ |
| @@ -1502,9 +1587,11 @@ void lj_record_ins(jit_State *J) | |||
| 1502 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; | 1587 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; |
| 1503 | if (ta == IRT_NUM) { | 1588 | if (ta == IRT_NUM) { |
| 1504 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ | 1589 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ |
| 1505 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; | 1590 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) |
| 1591 | irop ^= 5; | ||
| 1506 | } else if (ta == IRT_INT) { | 1592 | } else if (ta == IRT_INT) { |
| 1507 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; | 1593 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) |
| 1594 | irop ^= 1; | ||
| 1508 | } else if (ta == IRT_STR) { | 1595 | } else if (ta == IRT_STR) { |
| 1509 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; | 1596 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; |
| 1510 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); | 1597 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); |
| @@ -1599,13 +1686,11 @@ void lj_record_ins(jit_State *J) | |||
| 1599 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: | 1686 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: |
| 1600 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { | 1687 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { |
| 1601 | MMS mm = bcmode_mm(op); | 1688 | MMS mm = bcmode_mm(op); |
| 1602 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { | 1689 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) |
| 1603 | rb = lj_ir_tonum(J, rb); | 1690 | rc = lj_opt_narrow_arith(J, rb, rc, &ix.tabv, &ix.keyv, |
| 1604 | rc = lj_ir_tonum(J, rc); | 1691 | (int)mm - (int)MM_add + (int)IR_ADD); |
| 1605 | rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); | 1692 | else |
| 1606 | } else { | ||
| 1607 | rc = rec_mm_arith(J, &ix, mm); | 1693 | rc = rec_mm_arith(J, &ix, mm); |
| 1608 | } | ||
| 1609 | break; | 1694 | break; |
| 1610 | } | 1695 | } |
| 1611 | 1696 | ||
| @@ -1827,59 +1912,6 @@ void lj_record_ins(jit_State *J) | |||
| 1827 | 1912 | ||
| 1828 | /* -- Recording setup ----------------------------------------------------- */ | 1913 | /* -- Recording setup ----------------------------------------------------- */ |
| 1829 | 1914 | ||
| 1830 | /* Setup recording for a FORL loop. */ | ||
| 1831 | static void rec_setup_forl(jit_State *J, const BCIns *fori) | ||
| 1832 | { | ||
| 1833 | BCReg ra = bc_a(*fori); | ||
| 1834 | cTValue *forbase = &J->L->base[ra]; | ||
| 1835 | IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase) | ||
| 1836 | : IRT_NUM; | ||
| 1837 | TRef start; | ||
| 1838 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t); | ||
| 1839 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t); | ||
| 1840 | int dir = (0 <= numV(&forbase[FORL_STEP])); | ||
| 1841 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | ||
| 1842 | J->scev.t.irt = t; | ||
| 1843 | J->scev.dir = dir; | ||
| 1844 | J->scev.stop = tref_ref(stop); | ||
| 1845 | J->scev.step = tref_ref(step); | ||
| 1846 | if (!tref_isk(step)) { | ||
| 1847 | /* Non-constant step: need a guard for the direction. */ | ||
| 1848 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); | ||
| 1849 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); | ||
| 1850 | /* Add hoistable overflow checks for a narrowed FORL index. */ | ||
| 1851 | if (t == IRT_INT) { | ||
| 1852 | if (tref_isk(stop)) { | ||
| 1853 | /* Constant stop: optimize check away or to a range check for step. */ | ||
| 1854 | int32_t k = IR(tref_ref(stop))->i; | ||
| 1855 | if (dir) { | ||
| 1856 | if (k > 0) | ||
| 1857 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); | ||
| 1858 | } else { | ||
| 1859 | if (k < 0) | ||
| 1860 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); | ||
| 1861 | } | ||
| 1862 | } else { | ||
| 1863 | /* Stop+step variable: need full overflow check. */ | ||
| 1864 | TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); | ||
| 1865 | emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ | ||
| 1866 | } | ||
| 1867 | } | ||
| 1868 | } else if (t == IRT_INT && !tref_isk(stop)) { | ||
| 1869 | /* Constant step: optimize overflow check to a range check for stop. */ | ||
| 1870 | int32_t k = IR(tref_ref(step))->i; | ||
| 1871 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; | ||
| 1872 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); | ||
| 1873 | } | ||
| 1874 | J->scev.start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); | ||
| 1875 | start = sloadt(J, (int32_t)(ra+FORL_IDX), | ||
| 1876 | (t == IRT_INT && !J->scev.start) ? (IRT_INT|IRT_GUARD) : t, | ||
| 1877 | t == IRT_INT ? (IRSLOAD_CONVERT|IRSLOAD_INHERIT) : IRSLOAD_INHERIT); | ||
| 1878 | J->base[ra+FORL_EXT] = start; | ||
| 1879 | J->scev.idx = tref_ref(start); | ||
| 1880 | J->maxslot = ra+FORL_EXT+1; | ||
| 1881 | } | ||
| 1882 | |||
| 1883 | /* Setup recording for a root trace started by a hot loop. */ | 1915 | /* Setup recording for a root trace started by a hot loop. */ |
| 1884 | static const BCIns *rec_setup_root(jit_State *J) | 1916 | static const BCIns *rec_setup_root(jit_State *J) |
| 1885 | { | 1917 | { |
| @@ -2033,7 +2065,7 @@ void lj_record_setup(jit_State *J) | |||
| 2033 | if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && | 2065 | if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && |
| 2034 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | 2066 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { |
| 2035 | lj_snap_add(J); | 2067 | lj_snap_add(J); |
| 2036 | rec_setup_forl(J, J->pc-1); | 2068 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
| 2037 | goto sidecheck; | 2069 | goto sidecheck; |
| 2038 | } | 2070 | } |
| 2039 | } else { | 2071 | } else { |
| @@ -2054,7 +2086,7 @@ void lj_record_setup(jit_State *J) | |||
| 2054 | */ | 2086 | */ |
| 2055 | lj_snap_add(J); | 2087 | lj_snap_add(J); |
| 2056 | if (bc_op(J->cur.startins) == BC_FORL) | 2088 | if (bc_op(J->cur.startins) == BC_FORL) |
| 2057 | rec_setup_forl(J, J->pc-1); | 2089 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
| 2058 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | 2090 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) |
| 2059 | lj_trace_err(J, LJ_TRERR_STACKOV); | 2091 | lj_trace_err(J, LJ_TRERR_STACKOV); |
| 2060 | } | 2092 | } |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 59435b20..70628a0e 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
| @@ -68,7 +68,8 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |||
| 68 | if (!(ir->op2 & IRSLOAD_INHERIT)) | 68 | if (!(ir->op2 & IRSLOAD_INHERIT)) |
| 69 | continue; | 69 | continue; |
| 70 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 70 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
| 71 | if ((ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 71 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && |
| 72 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | ||
| 72 | sn |= SNAP_NORESTORE; | 73 | sn |= SNAP_NORESTORE; |
| 73 | } | 74 | } |
| 74 | map[n++] = sn; | 75 | map[n++] = sn; |
diff --git a/src/lj_trace.c b/src/lj_trace.c index b67e8f75..69124542 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
| @@ -495,8 +495,8 @@ static int trace_abort(jit_State *J) | |||
| 495 | 495 | ||
| 496 | J->postproc = LJ_POST_NONE; | 496 | J->postproc = LJ_POST_NONE; |
| 497 | lj_mcode_abort(J); | 497 | lj_mcode_abort(J); |
| 498 | if (tvisnum(L->top-1)) | 498 | if (tvisnumber(L->top-1)) |
| 499 | e = (TraceError)lj_num2int(numV(L->top-1)); | 499 | e = (TraceError)numberVint(L->top-1); |
| 500 | if (e == LJ_TRERR_MCODELM) { | 500 | if (e == LJ_TRERR_MCODELM) { |
| 501 | J->state = LJ_TRACE_ASM; | 501 | J->state = LJ_TRACE_ASM; |
| 502 | return 1; /* Retry ASM with new MCode area. */ | 502 | return 1; /* Retry ASM with new MCode area. */ |
| @@ -703,8 +703,12 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) | |||
| 703 | setintV(L->top++, J->exitno); | 703 | setintV(L->top++, J->exitno); |
| 704 | setintV(L->top++, RID_NUM_GPR); | 704 | setintV(L->top++, RID_NUM_GPR); |
| 705 | setintV(L->top++, RID_NUM_FPR); | 705 | setintV(L->top++, RID_NUM_FPR); |
| 706 | for (i = 0; i < RID_NUM_GPR; i++) | 706 | for (i = 0; i < RID_NUM_GPR; i++) { |
| 707 | setnumV(L->top++, cast_num(ex->gpr[i])); | 707 | if (sizeof(ex->gpr[i]) == sizeof(int32_t)) |
| 708 | setintV(L->top++, (int32_t)ex->gpr[i]); | ||
| 709 | else | ||
| 710 | setnumV(L->top++, (lua_Number)ex->gpr[i]); | ||
| 711 | } | ||
| 708 | for (i = 0; i < RID_NUM_FPR; i++) { | 712 | for (i = 0; i < RID_NUM_FPR; i++) { |
| 709 | setnumV(L->top, ex->fpr[i]); | 713 | setnumV(L->top, ex->fpr[i]); |
| 710 | if (LJ_UNLIKELY(tvisnan(L->top))) | 714 | if (LJ_UNLIKELY(tvisnan(L->top))) |
