aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2025-11-27 17:45:17 +0100
committerMike Pall <mike>2025-11-27 17:45:17 +0100
commitf80b349d5490aa289b2925d297f3f3c618977570 (patch)
tree8d8fb0d2beb3e863592139d603ada63e5aa6ce77 /src
parent3215838aa744d148e79a8ea0bd7c014e984302cb (diff)
downloadluajit-f80b349d5490aa289b2925d297f3f3c618977570.tar.gz
luajit-f80b349d5490aa289b2925d297f3f3c618977570.tar.bz2
luajit-f80b349d5490aa289b2925d297f3f3c618977570.zip
Unify Lua number to FFI integer conversions.
Phew. #1411
Diffstat (limited to 'src')
-rw-r--r--src/lib_io.c7
-rw-r--r--src/lib_os.c8
-rw-r--r--src/lj_api.c24
-rw-r--r--src/lj_asm.c21
-rw-r--r--src/lj_asm_arm.h5
-rw-r--r--src/lj_asm_arm64.h20
-rw-r--r--src/lj_asm_mips.h79
-rw-r--r--src/lj_asm_ppc.h27
-rw-r--r--src/lj_asm_x86.h45
-rw-r--r--src/lj_bcwrite.c11
-rw-r--r--src/lj_cconv.c12
-rw-r--r--src/lj_cdata.c7
-rw-r--r--src/lj_crecord.c36
-rw-r--r--src/lj_def.h2
-rw-r--r--src/lj_ffrecord.c4
-rw-r--r--src/lj_ir.c23
-rw-r--r--src/lj_ircall.h32
-rw-r--r--src/lj_jit.h27
-rw-r--r--src/lj_lib.c2
-rw-r--r--src/lj_meta.c3
-rw-r--r--src/lj_obj.h91
-rw-r--r--src/lj_opt_fold.c64
-rw-r--r--src/lj_opt_narrow.c32
-rw-r--r--src/lj_opt_split.c15
-rw-r--r--src/lj_parse.c23
-rw-r--r--src/lj_record.c21
-rw-r--r--src/lj_strfmt.c9
-rw-r--r--src/lj_strscan.c8
-rw-r--r--src/lj_tab.c24
-rw-r--r--src/lj_target_x86.h1
-rw-r--r--src/lj_trace.c30
-rw-r--r--src/lj_vm.h34
-rw-r--r--src/lj_vmmath.c2
-rw-r--r--src/vm_arm.dasc115
-rw-r--r--src/vm_arm64.dasc36
-rw-r--r--src/vm_mips.dasc119
-rw-r--r--src/vm_mips64.dasc142
-rw-r--r--src/vm_ppc.dasc146
-rw-r--r--src/vm_x64.dasc43
-rw-r--r--src/vm_x86.dasc92
40 files changed, 1029 insertions, 413 deletions
diff --git a/src/lib_io.c b/src/lib_io.c
index 5659ff51..ec7d2545 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp)
127 lua_Number d; 127 lua_Number d;
128 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { 128 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
129 if (LJ_DUALNUM) { 129 if (LJ_DUALNUM) {
130 int32_t i = lj_num2int(d); 130 int64_t i64;
131 if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) { 131 int32_t i;
132 if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) {
132 setintV(L->top++, i); 133 setintV(L->top++, i);
133 return 1; 134 return 1;
134 } 135 }
@@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek)
335 if (tvisint(o)) 336 if (tvisint(o))
336 ofs = (int64_t)intV(o); 337 ofs = (int64_t)intV(o);
337 else if (tvisnum(o)) 338 else if (tvisnum(o))
338 ofs = (int64_t)numV(o); 339 ofs = lj_num2i64(numV(o));
339 else if (!tvisnil(o)) 340 else if (!tvisnil(o))
340 lj_err_argt(L, 3, LUA_TNUMBER); 341 lj_err_argt(L, 3, LUA_TNUMBER);
341 } 342 }
diff --git a/src/lib_os.c b/src/lib_os.c
index ae3fc857..fffc923e 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d)
171LJLIB_CF(os_date) 171LJLIB_CF(os_date)
172{ 172{
173 const char *s = luaL_optstring(L, 1, "%c"); 173 const char *s = luaL_optstring(L, 1, "%c");
174 time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); 174 time_t t = lua_isnoneornil(L, 2) ? time(NULL) :
175 lj_num2int_type(luaL_checknumber(L, 2), time_t);
175 struct tm *stm; 176 struct tm *stm;
176#if LJ_TARGET_POSIX 177#if LJ_TARGET_POSIX
177 struct tm rtm; 178 struct tm rtm;
@@ -253,8 +254,9 @@ LJLIB_CF(os_time)
253 254
254LJLIB_CF(os_difftime) 255LJLIB_CF(os_difftime)
255{ 256{
256 lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), 257 lua_pushnumber(L,
257 (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); 258 difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t),
259 lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t)));
258 return 1; 260 return 1;
259} 261}
260 262
diff --git a/src/lj_api.c b/src/lj_api.c
index e9fc25b4..94d8bc7e 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
416 return intV(&tmp); 416 return intV(&tmp);
417 n = numV(&tmp); 417 n = numV(&tmp);
418 } 418 }
419#if LJ_64 419 return lj_num2int_type(n, lua_Integer);
420 return (lua_Integer)n;
421#else
422 return lj_num2int(n);
423#endif
424} 420}
425 421
426LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) 422LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
@@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
445 n = numV(&tmp); 441 n = numV(&tmp);
446 } 442 }
447 if (ok) *ok = 1; 443 if (ok) *ok = 1;
448#if LJ_64 444 return lj_num2int_type(n, lua_Integer);
449 return (lua_Integer)n;
450#else
451 return lj_num2int(n);
452#endif
453} 445}
454 446
455LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) 447LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
@@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
468 return (lua_Integer)intV(&tmp); 460 return (lua_Integer)intV(&tmp);
469 n = numV(&tmp); 461 n = numV(&tmp);
470 } 462 }
471#if LJ_64 463 return lj_num2int_type(n, lua_Integer);
472 return (lua_Integer)n;
473#else
474 return lj_num2int(n);
475#endif
476} 464}
477 465
478LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) 466LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
@@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
493 return (lua_Integer)intV(&tmp); 481 return (lua_Integer)intV(&tmp);
494 n = numV(&tmp); 482 n = numV(&tmp);
495 } 483 }
496#if LJ_64 484 return lj_num2int_type(n, lua_Integer);
497 return (lua_Integer)n;
498#else
499 return lj_num2int(n);
500#endif
501} 485}
502 486
503LUA_API int lua_toboolean(lua_State *L, int idx) 487LUA_API int lua_toboolean(lua_State *L, int idx)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 0e888c29..8f7ae9a3 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir)
1329 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1329 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1330 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1330 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1331 IRCallID id; 1331 IRCallID id;
1332 const CCallInfo *ci;
1333#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1334 CCallInfo cim;
1335#endif
1332 IRRef args[2]; 1336 IRRef args[2];
1333 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, 1337 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1334 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); 1338 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1335 args[LJ_BE] = (ir-1)->op1; 1339 args[LJ_BE] = (ir-1)->op1;
1336 args[LJ_LE] = ir->op1; 1340 args[LJ_LE] = ir->op1;
1337 if (st == IRT_NUM || st == IRT_FLOAT) { 1341 lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted");
1338 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); 1342 if (st == IRT_NUM) {
1343 id = IRCALL_lj_vm_num2u64;
1339 ir--; 1344 ir--;
1345 ci = &lj_ir_callinfo[id];
1340 } else { 1346 } else {
1341 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); 1347 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1342 }
1343 {
1344#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP 1348#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1345 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; 1349 cim = lj_ir_callinfo[id];
1346 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ 1350 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1351 ci = &cim;
1347#else 1352#else
1348 const CCallInfo *ci = &lj_ir_callinfo[id]; 1353 ci = &lj_ir_callinfo[id];
1349#endif 1354#endif
1350 asm_setupresult(as, ir, ci);
1351 asm_gencall(as, ci, args);
1352 } 1355 }
1356 asm_setupresult(as, ir, ci);
1357 asm_gencall(as, ci, args);
1353} 1358}
1354#endif 1359#endif
1355 1360
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 406360d2..1ddd2b3e 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
624 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 624 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
625 Reg dest = ra_dest(as, ir, RSET_GPR); 625 Reg dest = ra_dest(as, ir, RSET_GPR);
626 ARMIns ai; 626 ARMIns ai;
627 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
627 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 628 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
628 ai = irt_isint(ir->t) ? 629 ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32;
629 (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
630 (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
631 emit_dm(as, ai, (tmp & 15), (left & 15)); 630 emit_dm(as, ai, (tmp & 15), (left & 15));
632 } 631 }
633 } else 632 } else
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index fdcff1db..507fc084 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir)
648 } else { 648 } else {
649 Reg left = ra_alloc1(as, lref, RSET_FPR); 649 Reg left = ra_alloc1(as, lref, RSET_FPR);
650 Reg dest = ra_dest(as, ir, RSET_GPR); 650 Reg dest = ra_dest(as, ir, RSET_GPR);
651 A64Ins ai = irt_is64(ir->t) ? 651 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
652 (st == IRT_NUM ? 652 if (irt_isu64(ir->t)) {
653 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : 653 emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP);
654 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : 654 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest);
655 (st == IRT_NUM ? 655 emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31));
656 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : 656 emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31));
657 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); 657 } else {
658 emit_dn(as, ai, dest, (left & 31)); 658 A64Ins ai = irt_is64(ir->t) ?
659 (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) :
660 (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32);
661 emit_dn(as, ai, dest, (left & 31));
662 }
659 } 663 }
660 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 664 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
661 Reg dest = ra_dest(as, ir, RSET_GPR); 665 Reg dest = ra_dest(as, ir, RSET_GPR);
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 8dadabe4..36ed5de4 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir)
635 Reg dest = ra_dest(as, ir, RSET_GPR); 635 Reg dest = ra_dest(as, ir, RSET_GPR);
636 Reg left = ra_alloc1(as, lref, RSET_FPR); 636 Reg left = ra_alloc1(as, lref, RSET_FPR);
637 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 637 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
638 if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ 638 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
639 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
640 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
641 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
642 emit_tg(as, MIPSI_MFC1, dest, tmp);
643 emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
644 tmp, tmp);
645 emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
646 tmp, left, tmp);
647 if (st == IRT_FLOAT)
648 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
649 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
650 else
651 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
652 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
653#if LJ_64 639#if LJ_64
654 } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ 640 if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
655 MCLabel l_end; 641 MCLabel l_end = emit_label(as);
656 emit_tg(as, MIPSI_DMFC1, dest, tmp); 642 emit_tg(as, MIPSI_DMFC1, dest, tmp);
657 l_end = emit_label(as); 643 /* For result == INT64_MAX add -2^64 and convert again. */
658 /* For inputs >= 2^63 add -2^64 and convert again. */
659 if (st == IRT_NUM) { 644 if (st == IRT_NUM) {
660 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); 645 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
661 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); 646 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
662 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 647 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
663 (void *)&as->J->k64[LJ_K64_M2P64], 648 (void *)&as->J->k64[LJ_K64_M2P64],
664 rset_exclude(RSET_GPR, dest)); 649 rset_exclude(RSET_GPR, dest)); /* Delay slot. */
665 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ 650 emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */
666#if !LJ_TARGET_MIPSR6 651 emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
667 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 652 emit_ti(as, MIPSI_LI, RID_TMP, -1);
668 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); 653 emit_tg(as, MIPSI_DMFC1, dest, tmp);
669#else 654 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left);
670 emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
671 emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
672#endif
673 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
674 (void *)&as->J->k64[LJ_K64_2P63],
675 rset_exclude(RSET_GPR, dest));
676 } else { 655 } else {
677 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); 656 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
678 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); 657 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
679 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 658 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
680 (void *)&as->J->k32[LJ_K32_M2P64], 659 (void *)&as->J->k32[LJ_K32_M2P64],
681 rset_exclude(RSET_GPR, dest)); 660 rset_exclude(RSET_GPR, dest)); /* Delay slot. */
682 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ 661 emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */
683#if !LJ_TARGET_MIPSR6 662 emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
684 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 663 emit_ti(as, MIPSI_LI, RID_TMP, -1);
685 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); 664 emit_tg(as, MIPSI_DMFC1, dest, tmp);
686#else 665 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left);
687 emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
688 emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
689#endif
690 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
691 (void *)&as->J->k32[LJ_K32_2P63],
692 rset_exclude(RSET_GPR, dest));
693 } 666 }
667 } else
694#endif 668#endif
695 } else { 669 {
696#if LJ_32 670#if LJ_32
697 emit_tg(as, MIPSI_MFC1, dest, tmp); 671 emit_tg(as, MIPSI_MFC1, dest, tmp);
698 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 672 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
@@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
733 "bad type for checked CONV"); 707 "bad type for checked CONV");
734 asm_tointg(as, ir, RID_NONE); 708 asm_tointg(as, ir, RID_NONE);
735 } else { 709 } else {
736 IRCallID cid = irt_is64(ir->t) ? 710 IRCallID cid;
737 ((st == IRT_NUM) ? 711 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
738 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : 712 lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted");
739 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : 713 cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 :
740 ((st == IRT_NUM) ? 714 (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i);
741 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
742 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
743 asm_callid(as, ir, cid); 715 asm_callid(as, ir, cid);
744 } 716 }
745 } else 717 } else
@@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
780 } 752 }
781 } 753 }
782 } else { 754 } else {
783 if (st64 && !(ir->op2 & IRCONV_NONE)) { 755 if (!irt_isu32(ir->t)) { /* Implicit sign extension. */
756 Reg left = ra_alloc1(as, lref, RSET_GPR);
757 emit_dta(as, MIPSI_SLL, dest, left, 0);
758 } else if (st64 && !(ir->op2 & IRCONV_NONE)) {
784 /* This is either a 32 bit reg/reg mov which zeroes the hiword 759 /* This is either a 32 bit reg/reg mov which zeroes the hiword
785 ** or a load of the loword from a 64 bit address. 760 ** or a load of the loword from a 64 bit address.
786 */ 761 */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index d77c45ce..9e2af414 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
512 Reg dest = ra_dest(as, ir, RSET_GPR); 512 Reg dest = ra_dest(as, ir, RSET_GPR);
513 Reg left = ra_alloc1(as, lref, RSET_FPR); 513 Reg left = ra_alloc1(as, lref, RSET_FPR);
514 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 514 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
515 if (irt_isu32(ir->t)) { 515 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
516 /* Convert both x and x-2^31 to int and merge results. */ 516 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
517 Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); 517 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
518 emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ 518 emit_fb(as, PPCI_FCTIWZ, tmp, left);
519 emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
520 emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
521 emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */
522 emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */
523 emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */
524 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
525 emit_tai(as, PPCI_LWZ, dest,
526 RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */
527 emit_fb(as, PPCI_FCTIWZ, tmp, left);
528 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
529 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
530 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
531 emit_lsptr(as, PPCI_LFS, (tmp & 31),
532 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
533 } else {
534 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
535 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
536 emit_fb(as, PPCI_FCTIWZ, tmp, left);
537 }
538 } 519 }
539 } else 520 } else
540#endif 521#endif
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index f3c2238a..bdbce116 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
905 } else { 905 } else {
906 Reg dest = ra_dest(as, ir, RSET_GPR); 906 Reg dest = ra_dest(as, ir, RSET_GPR);
907 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; 907 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
908 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 908 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
909 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 909#if LJ_64
910 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 910 if (irt_isu64(ir->t)) {
911 /* For the indefinite result -2^63, add -2^64 and convert again. */
911 Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : 912 Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
912 ra_scratch(as, RSET_FPR); 913 ra_scratch(as, RSET_FPR);
913 MCLabel l_end = emit_label(as); 914 MCLabel l_end = emit_label(as);
914 if (LJ_32)
915 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
916 emit_rr(as, op, dest|REX_64, tmp); 915 emit_rr(as, op, dest|REX_64, tmp);
917 if (st == IRT_NUM) 916 if (st == IRT_NUM)
918 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); 917 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]);
919 else 918 else
920 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); 919 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]);
921 emit_sjcc(as, CC_NS, l_end); 920 emit_sjcc(as, CC_NO, l_end);
922 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ 921 emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1);
923 emit_rr(as, op, dest|REX_64, tmp); 922 emit_rr(as, op, dest|REX_64, tmp);
924 ra_left(as, tmp, lref); 923 ra_left(as, tmp, lref);
925 } else { 924
926 if (LJ_64 && irt_isu32(ir->t)) 925 } else
927 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ 926#endif
927 {
928 emit_mrm(as, op, 928 emit_mrm(as, op,
929 dest|((LJ_64 && 929 dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0),
930 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
931 asm_fuseload(as, lref, RSET_FPR)); 930 asm_fuseload(as, lref, RSET_FPR));
932 } 931 }
933 } 932 }
@@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
1020 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1019 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1021 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1020 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1022 Reg lo, hi; 1021 Reg lo, hi;
1022 int usehi = ra_used(ir);
1023 lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); 1023 lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
1024 lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); 1024 lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
1025 hi = ra_dest(as, ir, RSET_GPR); 1025 hi = ra_dest(as, ir, RSET_GPR);
@@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
1032 emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); 1032 emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
1033 } 1033 }
1034 if (dt == IRT_U64) { 1034 if (dt == IRT_U64) {
1035 /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ 1035 /* For the indefinite result -2^63, add -2^64 and convert again. */
1036 MCLabel l_pop, l_end = emit_label(as); 1036 MCLabel l_pop, l_end = emit_label(as);
1037 emit_x87op(as, XI_FPOP); 1037 emit_x87op(as, XI_FPOP);
1038 l_pop = emit_label(as); 1038 l_pop = emit_label(as);
1039 emit_sjmp(as, l_end); 1039 emit_sjmp(as, l_end);
1040 emit_rmro(as, XO_MOV, hi, RID_ESP, 4); 1040 if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
1041 if ((as->flags & JIT_F_SSE3)) 1041 if ((as->flags & JIT_F_SSE3))
1042 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 1042 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
1043 else 1043 else
1044 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); 1044 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
1045 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); 1045 emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]);
1046 emit_sjcc(as, CC_NS, l_pop); 1046 emit_sjcc(as, CC_NE, l_pop);
1047 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ 1047 emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0);
1048 } 1048 emit_sjcc(as, CC_NO, l_pop);
1049 emit_rmro(as, XO_MOV, hi, RID_ESP, 4); 1049 emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1);
1050 usehi = 1;
1051 }
1052 if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
1050 if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ 1053 if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */
1051 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 1054 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
1052 } else { /* Otherwise set FPU rounding mode to truncate before the store. */ 1055 } else { /* Otherwise set FPU rounding mode to truncate before the store. */
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index ec6f13c8..cd7ae942 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
59 p = lj_strfmt_wuleb128(p, intV(o)); 59 p = lj_strfmt_wuleb128(p, intV(o));
60 } else if (tvisnum(o)) { 60 } else if (tvisnum(o)) {
61 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 61 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
62 lua_Number num = numV(o); 62 int64_t i64;
63 int32_t k = lj_num2int(num); 63 int32_t k;
64 if (num == (lua_Number)k) { /* -0 is never a constant. */ 64 if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */
65 *p++ = BCDUMP_KTAB_INT; 65 *p++ = BCDUMP_KTAB_INT;
66 p = lj_strfmt_wuleb128(p, k); 66 p = lj_strfmt_wuleb128(p, k);
67 ctx->sb.w = p; 67 ctx->sb.w = p;
@@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
270 /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ 270 /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
271 if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { 271 if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
272 /* Narrow number constants to integers. */ 272 /* Narrow number constants to integers. */
273 lua_Number num = numV(o); 273 int64_t i64;
274 k = lj_num2int(num); 274 if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */
275 if (num == (lua_Number)k) { /* -0 is never a constant. */
276 save_int: 275 save_int:
277 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); 276 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
278 if (k < 0) 277 if (k < 0)
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 854b51db..2b9349cd 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
197 else goto err_conv; /* NYI: long double. */ 197 else goto err_conv; /* NYI: long double. */
198 /* Then convert double to integer. */ 198 /* Then convert double to integer. */
199 /* The conversion must exactly match the semantics of JIT-compiled code! */ 199 /* The conversion must exactly match the semantics of JIT-compiled code! */
200 if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) { 200 if (dsize < 8) {
201 int32_t i = (int32_t)n; 201 int64_t i = lj_num2i64(n); /* Always convert via int64_t. */
202 if (dsize == 4) *(int32_t *)dp = i; 202 if (dsize == 4) *(int32_t *)dp = i;
203 else if (dsize == 2) *(int16_t *)dp = (int16_t)i; 203 else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
204 else *(int8_t *)dp = (int8_t)i; 204 else *(int8_t *)dp = (int8_t)i;
205 } else if (dsize == 4) {
206 *(uint32_t *)dp = (uint32_t)n;
207 } else if (dsize == 8) { 205 } else if (dsize == 8) {
208 if (!(dinfo & CTF_UNSIGNED)) 206 if ((dinfo & CTF_UNSIGNED))
209 *(int64_t *)dp = (int64_t)n;
210 else
211 *(uint64_t *)dp = lj_num2u64(n); 207 *(uint64_t *)dp = lj_num2u64(n);
208 else
209 *(int64_t *)dp = lj_num2i64(n);
212 } else { 210 } else {
213 goto err_conv; /* NYI: conversion to >64 bit integers. */ 211 goto err_conv; /* NYI: conversion to >64 bit integers. */
214 } 212 }
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 3b48f76c..2dc56a80 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -133,12 +133,7 @@ collect_attrib:
133 idx = (ptrdiff_t)intV(key); 133 idx = (ptrdiff_t)intV(key);
134 goto integer_key; 134 goto integer_key;
135 } else if (tvisnum(key)) { /* Numeric key. */ 135 } else if (tvisnum(key)) { /* Numeric key. */
136#ifdef _MSC_VER 136 idx = lj_num2int_type(numV(key), ptrdiff_t);
137 /* Workaround for MSVC bug. */
138 volatile
139#endif
140 lua_Number n = numV(key);
141 idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
142 integer_key: 137 integer_key:
143 if (ctype_ispointer(ct->info)) { 138 if (ctype_ispointer(ct->info)) {
144 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ 139 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 27f2c1dd..45c559cf 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
445 /* fallthrough */ 445 /* fallthrough */
446 case CCX(I, F): 446 case CCX(I, F):
447 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 447 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
448 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); 448 conv_I_F:
449#if LJ_SOFTFP || LJ_32
450 if (st == IRT_FLOAT) { /* Uncommon. Simplify split backends. */
451 sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0);
452 st = IRT_NUM;
453 }
454#endif
455 if (dsize < 8) {
456 lj_needsplit(J);
457 sp = emitconv(sp, IRT_I64, st, IRCONV_ANY);
458 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0);
459 } else {
460 sp = emitconv(sp, dt, st, IRCONV_ANY);
461 }
449 goto xstore; 462 goto xstore;
450 case CCX(I, P): 463 case CCX(I, P):
451 case CCX(I, A): 464 case CCX(I, A):
@@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
523 goto xstore; 536 goto xstore;
524 case CCX(P, F): 537 case CCX(P, F):
525 if (st == IRT_CDATA) goto err_nyi; 538 if (st == IRT_CDATA) goto err_nyi;
526 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 539 /* The signed 64 bit conversion is cheaper. */
527 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 540 dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32;
528 st, IRCONV_ANY); 541 goto conv_I_F;
529 goto xstore;
530 542
531 /* Destination is an array. */ 543 /* Destination is an array. */
532 case CCX(A, A): 544 case CCX(A, A):
@@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1878 if (J->base[0] && tref_iscdata(J->base[1])) { 1890 if (J->base[0] && tref_iscdata(J->base[1])) {
1879 tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), 1891 tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64),
1880 J->base[1], &rd->argv[1]); 1892 J->base[1], &rd->argv[1]);
1881 if (!tref_isinteger(tsh)) 1893 if (LJ_32 && !tref_isinteger(tsh))
1882 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); 1894 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1883 J->base[1] = tsh; 1895 J->base[1] = tsh;
1884 } 1896 }
@@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1886 if (id) { 1898 if (id) {
1887 TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); 1899 TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
1888 uint32_t op = rd->data; 1900 uint32_t op = rd->data;
1901 IRType t;
1889 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); 1902 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1903 t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh);
1890 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 1904 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1891 !tref_isk(tsh)) 1905 !tref_isk(tsh))
1892 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); 1906 tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63));
1893#ifdef LJ_TARGET_UNIFYROT 1907#ifdef LJ_TARGET_UNIFYROT
1894 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 1908 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1895 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 1909 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1896 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 1910 tsh = emitir(IRT(IR_NEG, t), tsh, tsh);
1897 } 1911 }
1898#endif 1912#endif
1899 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); 1913 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1900 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); 1914 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
diff --git a/src/lj_def.h b/src/lj_def.h
index a9e23729..f34b1a39 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter;
127#define LJ_INLINE inline 127#define LJ_INLINE inline
128#define LJ_AINLINE inline __attribute__((always_inline)) 128#define LJ_AINLINE inline __attribute__((always_inline))
129#define LJ_NOINLINE __attribute__((noinline)) 129#define LJ_NOINLINE __attribute__((noinline))
130#define LJ_CONSTF __attribute__((nothrow,const))
130 131
131#if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) 132#if defined(__ELF__) || defined(__MACH__) || defined(__psp2__)
132#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) 133#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
@@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p)
245#define LJ_INLINE __inline 246#define LJ_INLINE __inline
246#define LJ_AINLINE __forceinline 247#define LJ_AINLINE __forceinline
247#define LJ_NOINLINE __declspec(noinline) 248#define LJ_NOINLINE __declspec(noinline)
249#define LJ_CONSTF __declspec(nothrow noalias)
248#if defined(_M_IX86) 250#if defined(_M_IX86)
249#define LJ_FASTCALL __fastcall 251#define LJ_FASTCALL __fastcall
250#endif 252#endif
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 527b6c06..290986f6 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o)
70{ 70{
71 if (!lj_strscan_numberobj(o)) 71 if (!lj_strscan_numberobj(o))
72 lj_trace_err(J, LJ_TRERR_BADTYPE); 72 lj_trace_err(J, LJ_TRERR_BADTYPE);
73 return tvisint(o) ? intV(o) : lj_num2int(numV(o)); 73 return numberVint(o);
74} 74}
75 75
76/* Get runtime value of string argument. */ 76/* Get runtime value of string argument. */
@@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd)
586 /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ 586 /* Result is integral (or NaN/Inf), but may not fit an int32_t. */
587 if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ 587 if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */
588 lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); 588 lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data);
589 if (n == (lua_Number)lj_num2int(n)) 589 if (lj_num2int_ok(n))
590 tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); 590 tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK);
591 } 591 }
592 J->base[0] = tr; 592 J->base[0] = tr;
diff --git a/src/lj_ir.c b/src/lj_ir.c
index e7a5e8bc..e24fead4 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64)
248 return lj_ir_k64(J, IR_KINT64, u64); 248 return lj_ir_k64(J, IR_KINT64, u64);
249} 249}
250 250
251/* Check whether a number is int and return it. -0 is NOT considered an int. */
252static int numistrueint(lua_Number n, int32_t *kp)
253{
254 int32_t k = lj_num2int(n);
255 if (n == (lua_Number)k) {
256 if (kp) *kp = k;
257 if (k == 0) { /* Special check for -0. */
258 TValue tv;
259 setnumV(&tv, n);
260 if (tv.u32.hi != 0)
261 return 0;
262 }
263 return 1;
264 }
265 return 0;
266}
267
268/* Intern number as int32_t constant if possible, otherwise as FP constant. */ 251/* Intern number as int32_t constant if possible, otherwise as FP constant. */
269TRef lj_ir_knumint(jit_State *J, lua_Number n) 252TRef lj_ir_knumint(jit_State *J, lua_Number n)
270{ 253{
254 int64_t i64;
271 int32_t k; 255 int32_t k;
272 if (numistrueint(n, &k)) 256 TValue tv;
257 setnumV(&tv, n);
258 /* -0 is NOT considered an int. */
259 if (lj_num2int_check(n, i64, k) && !tvismzero(&tv))
273 return lj_ir_kint(J, k); 260 return lj_ir_kint(J, k);
274 else 261 else
275 return lj_ir_knum(J, n); 262 return lj_ir_knum(J, n);
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 5196144e..60b196c6 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -233,20 +233,15 @@ typedef struct CCallInfo {
233 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ 233 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
234 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ 234 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
235 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ 235 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
236 _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
237 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ 236 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
238 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ 237 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
239 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 238 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
240 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 239 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
241 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
242 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ 240 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
243 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ 241 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
244 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ 242 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
245 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ 243 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
246 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ 244 _(FP64_FFI, lj_vm_num2u64, 1, N, U64, XA_FP) \
247 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
248 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
249 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
250 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ 245 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
251 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ 246 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
252 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ 247 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
@@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
291#define softfp_d2i __aeabi_d2iz 286#define softfp_d2i __aeabi_d2iz
292#define softfp_ui2d __aeabi_ui2d 287#define softfp_ui2d __aeabi_ui2d
293#define softfp_f2d __aeabi_f2d 288#define softfp_f2d __aeabi_f2d
294#define softfp_d2ui __aeabi_d2uiz
295#define softfp_d2f __aeabi_d2f 289#define softfp_d2f __aeabi_d2f
296#define softfp_i2f __aeabi_i2f 290#define softfp_i2f __aeabi_i2f
297#define softfp_ui2f __aeabi_ui2f 291#define softfp_ui2f __aeabi_ui2f
298#define softfp_f2i __aeabi_f2iz 292#define softfp_f2i __aeabi_f2iz
299#define softfp_f2ui __aeabi_f2uiz
300#define fp64_l2d __aeabi_l2d 293#define fp64_l2d __aeabi_l2d
301#define fp64_ul2d __aeabi_ul2d 294#define fp64_ul2d __aeabi_ul2d
302#define fp64_l2f __aeabi_l2f 295#define fp64_l2f __aeabi_l2f
303#define fp64_ul2f __aeabi_ul2f 296#define fp64_ul2f __aeabi_ul2f
304#if LJ_TARGET_IOS
305#define fp64_d2l __fixdfdi
306#define fp64_d2ul __fixunsdfdi
307#define fp64_f2l __fixsfdi
308#define fp64_f2ul __fixunssfdi
309#else
310#define fp64_d2l __aeabi_d2lz
311#define fp64_d2ul __aeabi_d2ulz
312#define fp64_f2l __aeabi_f2lz
313#define fp64_f2ul __aeabi_f2ulz
314#endif
315#elif LJ_TARGET_MIPS || LJ_TARGET_PPC 297#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
316#define softfp_add __adddf3 298#define softfp_add __adddf3
317#define softfp_sub __subdf3 299#define softfp_sub __subdf3
@@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
322#define softfp_d2i __fixdfsi 304#define softfp_d2i __fixdfsi
323#define softfp_ui2d __floatunsidf 305#define softfp_ui2d __floatunsidf
324#define softfp_f2d __extendsfdf2 306#define softfp_f2d __extendsfdf2
325#define softfp_d2ui __fixunsdfsi
326#define softfp_d2f __truncdfsf2 307#define softfp_d2f __truncdfsf2
327#define softfp_i2f __floatsisf 308#define softfp_i2f __floatsisf
328#define softfp_ui2f __floatunsisf 309#define softfp_ui2f __floatunsisf
329#define softfp_f2i __fixsfsi 310#define softfp_f2i __fixsfsi
330#define softfp_f2ui __fixunssfsi
331#else 311#else
332#error "Missing soft-float definitions for target architecture" 312#error "Missing soft-float definitions for target architecture"
333#endif 313#endif
@@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a);
341#if LJ_HASFFI 321#if LJ_HASFFI
342extern double softfp_ui2d(uint32_t a); 322extern double softfp_ui2d(uint32_t a);
343extern double softfp_f2d(float a); 323extern double softfp_f2d(float a);
344extern uint32_t softfp_d2ui(double a);
345extern float softfp_d2f(double a); 324extern float softfp_d2f(double a);
346extern float softfp_i2f(int32_t a); 325extern float softfp_i2f(int32_t a);
347extern float softfp_ui2f(uint32_t a); 326extern float softfp_ui2f(uint32_t a);
348extern int32_t softfp_f2i(float a); 327extern int32_t softfp_f2i(float a);
349extern uint32_t softfp_f2ui(float a);
350#endif 328#endif
351#if LJ_TARGET_MIPS 329#if LJ_TARGET_MIPS
352extern double lj_vm_sfmin(double a, double b); 330extern double lj_vm_sfmin(double a, double b);
@@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b);
360#define fp64_ul2d __floatundidf 338#define fp64_ul2d __floatundidf
361#define fp64_l2f __floatdisf 339#define fp64_l2f __floatdisf
362#define fp64_ul2f __floatundisf 340#define fp64_ul2f __floatundisf
363#define fp64_d2l __fixdfdi
364#define fp64_d2ul __fixunsdfdi
365#define fp64_f2l __fixsfdi
366#define fp64_f2ul __fixunssfdi
367#else 341#else
368#error "Missing fp64 helper definitions for this compiler" 342#error "Missing fp64 helper definitions for this compiler"
369#endif 343#endif
@@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a);
374extern double fp64_ul2d(uint64_t a); 348extern double fp64_ul2d(uint64_t a);
375extern float fp64_l2f(int64_t a); 349extern float fp64_l2f(int64_t a);
376extern float fp64_ul2f(uint64_t a); 350extern float fp64_ul2f(uint64_t a);
377extern int64_t fp64_d2l(double a);
378extern uint64_t fp64_d2ul(double a);
379extern int64_t fp64_f2l(float a);
380extern uint64_t fp64_f2ul(float a);
381#endif 351#endif
382 352
383#endif 353#endif
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 05a8e9bb..c0523457 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -350,22 +350,18 @@ enum {
350}; 350};
351 351
352enum { 352enum {
353#if LJ_TARGET_X64 || LJ_TARGET_MIPS64
354 LJ_K64_M2P64, /* -2^64 */
355#endif
353#if LJ_TARGET_X86ORX64 356#if LJ_TARGET_X86ORX64
354 LJ_K64_TOBIT, /* 2^52 + 2^51 */ 357 LJ_K64_TOBIT, /* 2^52 + 2^51 */
355 LJ_K64_2P64, /* 2^64 */ 358 LJ_K64_2P64, /* 2^64 */
356 LJ_K64_M2P64, /* -2^64 */
357#if LJ_32
358 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
359#else
360 LJ_K64_M2P64_31 = LJ_K64_M2P64,
361#endif 359#endif
360#if LJ_TARGET_MIPS64
361 LJ_K64_2P63, /* 2^63 */
362#endif 362#endif
363#if LJ_TARGET_MIPS 363#if LJ_TARGET_MIPS
364 LJ_K64_2P31, /* 2^31 */ 364 LJ_K64_2P31, /* 2^31 */
365#if LJ_64
366 LJ_K64_2P63, /* 2^63 */
367 LJ_K64_M2P64, /* -2^64 */
368#endif
369#endif 365#endif
370#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 366#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
371 LJ_K64_VM_EXIT_HANDLER, 367 LJ_K64_VM_EXIT_HANDLER,
@@ -376,20 +372,19 @@ enum {
376#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) 372#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
377 373
378enum { 374enum {
379#if LJ_TARGET_X86ORX64 375#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
380 LJ_K32_M2P64_31, /* -2^64 or -2^31 */ 376 LJ_K32_M2P64, /* -2^64 */
377#endif
378#if LJ_TARGET_MIPS64
379 LJ_K32_2P63, /* 2^63 */
381#endif 380#endif
382#if LJ_TARGET_PPC 381#if LJ_TARGET_PPC
383 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ 382 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
384 LJ_K32_2P52, /* 2^52 */ 383 LJ_K32_2P52, /* 2^52 */
385#endif 384#endif
386#if LJ_TARGET_PPC || LJ_TARGET_MIPS 385#if LJ_TARGET_PPC
387 LJ_K32_2P31, /* 2^31 */ 386 LJ_K32_2P31, /* 2^31 */
388#endif 387#endif
389#if LJ_TARGET_MIPS64
390 LJ_K32_2P63, /* 2^63 */
391 LJ_K32_M2P64, /* -2^64 */
392#endif
393#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 388#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
394 LJ_K32_VM_EXIT_HANDLER, 389 LJ_K32_VM_EXIT_HANDLER,
395 LJ_K32_VM_EXIT_INTERP, 390 LJ_K32_VM_EXIT_INTERP,
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 88cb2bdd..d51351b8 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
349 ** integer overflow. Overflow detection still works, since all FPUs 349 ** integer overflow. Overflow detection still works, since all FPUs
350 ** return either MININT or MAXINT, which is then out of range. 350 ** return either MININT or MAXINT, which is then out of range.
351 */ 351 */
352 int32_t i = (int32_t)numV(o); 352 int32_t i = lj_num2int(numV(o));
353 if (i >= a && i <= b) return i; 353 if (i >= a && i <= b) return i;
354#if LJ_HASFFI 354#if LJ_HASFFI
355 } else if (tviscdata(o)) { 355 } else if (tviscdata(o)) {
diff --git a/src/lj_meta.c b/src/lj_meta.c
index c9307615..3f30fafb 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
465 if (tvisint(o+i)) { 465 if (tvisint(o+i)) {
466 k[i] = intV(o+i); nint++; 466 k[i] = intV(o+i); nint++;
467 } else { 467 } else {
468 k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); 468 int64_t i64;
469 if (lj_num2int_check(numV(o+i), i64, k[i])) nint++;
469 } 470 }
470 } 471 }
471 if (nint == 3) { /* Narrow to integers. */ 472 if (nint == 3) { /* Narrow to integers. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 73b186e2..58e5049c 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
981 981
982/* -- Number to integer conversion ---------------------------------------- */ 982/* -- Number to integer conversion ---------------------------------------- */
983 983
984#if LJ_SOFTFP 984/*
985LJ_ASMF int32_t lj_vm_tobit(double x); 985** The C standard leaves many aspects of FP to integer conversions as
986#if LJ_TARGET_MIPS64 986** undefined behavior. Portability is a mess, hardware support varies,
987LJ_ASMF int32_t lj_vm_tointg(double x); 987** and modern C compilers are like a box of chocolates -- you never know
988#endif 988** what you're gonna get.
989#endif 989**
990** However, we need 100% matching behavior between the interpreter (asm + C),
991** optimizations (C) and the code generated by the JIT compiler (asm).
992** Mixing Lua numbers with FFI numbers creates some extra requirements.
993**
994** These conversions have been moved to assembler code, even if they seem
995** trivial, to foil unanticipated C compiler 'optimizations' with the
996** surrounding code. Only the unchecked double to int32_t conversion
997** is still in C, because it ought to be pretty safe -- we'll see.
998**
999** These macros also serve to document all places where FP to integer
1000** conversions happen.
1001*/
990 1002
991static LJ_AINLINE int32_t lj_num2bit(lua_Number n) 1003/* Unchecked double to int32_t conversion. */
992{ 1004#define lj_num2int(n) ((int32_t)(n))
993#if LJ_SOFTFP
994 return lj_vm_tobit(n);
995#else
996 TValue o;
997 o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */
998 return (int32_t)o.u32.lo;
999#endif
1000}
1001 1005
1002#define lj_num2int(n) ((int32_t)(n)) 1006/* Unchecked double to arch/os-dependent signed integer type conversion.
1007** This assumes the 32/64-bit signed conversions are NOT range-extended.
1008*/
1009#define lj_num2int_type(n, tp) ((tp)(n))
1003 1010
1004/* 1011/* Convert a double to int32_t and check for exact conversion.
1005** This must match the JIT backend behavior. In particular for archs 1012** Returns the zero-extended int32_t on success. -0 is OK, too.
1006** that don't have a common hardware instruction for this conversion. 1013** Returns 0x8000000080000000LL on failure (simplifies range checks).
1007** Note that signed FP to unsigned int conversions have an undefined
1008** result and should never be relied upon in portable FFI code.
1009** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
1010*/ 1014*/
1011static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 1015LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
1012{ 1016
1013#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS 1017/* Check for exact conversion only, without storing the result. */
1014 int64_t i = (int64_t)n; 1018#define lj_num2int_ok(x) (lj_vm_num2int_check((x)) >= 0)
1015 if (i < 0) i = (int64_t)(n - 18446744073709551616.0); 1019
1016 return (uint64_t)i; 1020/* Check for exact conversion and conditionally store result.
1017#else 1021** Note: conditions that fail for 0x80000000 may check only the lower
1018 return (uint64_t)n; 1022** 32 bits. This generates good code for both 32 and 64 bit archs.
1019#endif 1023*/
1020} 1024#define lj_num2int_cond(x, i64, i, cond) \
1025 (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0)
1026
1027/* This is the generic check for a full-range int32_t result. */
1028#define lj_num2int_check(x, i64, i) \
1029 lj_num2int_cond((x), i64, i, i64 >= 0)
1030
1031/* Predictable conversion from double to int64_t or uint64_t.
1032** Truncates towards zero. Out-of-range values, NaN and +-Inf return
1033** an arch-dependent result, but do not cause C undefined behavior.
1034** The uint64_t conversion accepts the union of the unsigned + signed range.
1035*/
1036LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
1037LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x);
1038
1039#define lj_num2i64(x) (lj_vm_num2i64((x)))
1040#define lj_num2u64(x) (lj_vm_num2u64((x)))
1041
1042/* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */
1043LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
1044
1045#define lj_num2bit(x) lj_vm_tobit((x))
1021 1046
1022static LJ_AINLINE int32_t numberVint(cTValue *o) 1047static LJ_AINLINE int32_t numberVint(cTValue *o)
1023{ 1048{
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 6fdf4566..456c04b2 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith)
303 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); 303 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
304} 304}
305 305
306/* Forward declaration. */
307static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
308 IROp op);
309
306LJFOLD(ADDOV KINT KINT) 310LJFOLD(ADDOV KINT KINT)
307LJFOLD(SUBOV KINT KINT) 311LJFOLD(SUBOV KINT KINT)
308LJFOLD(MULOV KINT KINT) 312LJFOLD(MULOV KINT KINT)
309LJFOLDF(kfold_intovarith) 313LJFOLDF(kfold_intovarith)
310{ 314{
311 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i, 315 int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i,
312 fins->o - IR_ADDOV); 316 (IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD));
313 int32_t k = lj_num2int(n); 317 return checki32(k) ? INTFOLD(k) : FAILFOLD;
314 if (n != (lua_Number)k)
315 return FAILFOLD;
316 return INTFOLD(k);
317} 318}
318 319
319LJFOLD(BNOT KINT) 320LJFOLD(BNOT KINT)
@@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
368 IROp op) 369 IROp op)
369{ 370{
370 UNUSED(J); 371 UNUSED(J);
371#if LJ_HASFFI
372 switch (op) { 372 switch (op) {
373 case IR_ADD: k1 += k2; break; 373 case IR_ADD: k1 += k2; break;
374 case IR_SUB: k1 -= k2; break; 374 case IR_SUB: k1 -= k2; break;
375 case IR_MUL: k1 *= k2; break; 375 case IR_MUL: k1 *= k2; break;
376#if LJ_HASFFI
376 case IR_BAND: k1 &= k2; break; 377 case IR_BAND: k1 &= k2; break;
377 case IR_BOR: k1 |= k2; break; 378 case IR_BOR: k1 |= k2; break;
378 case IR_BXOR: k1 ^= k2; break; 379 case IR_BXOR: k1 ^= k2; break;
@@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
382 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; 383 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
383 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; 384 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op); break; 385 default: lj_assertJ(0, "bad IR op %d", op); break;
385 }
386#else
387 UNUSED(k2); UNUSED(op);
388 lj_assertJ(0, "FFI IR op without FFI");
389#endif 386#endif
387 }
390 return k1; 388 return k1;
391} 389}
392 390
@@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
883LJFOLDF(kfold_conv_knum_int_num) 881LJFOLDF(kfold_conv_knum_int_num)
884{ 882{
885 lua_Number n = knumleft; 883 lua_Number n = knumleft;
886 int32_t k = lj_num2int(n); 884 if (irt_isguard(fins->t)) {
887 if (irt_isguard(fins->t) && n != (lua_Number)k) { 885 int64_t i64;
886 int32_t k;
887 if (lj_num2int_check(n, i64, k))
888 return INTFOLD(k);
888 /* We're about to create a guard which always fails, like CONV +1.5. 889 /* We're about to create a guard which always fails, like CONV +1.5.
889 ** Some pathological loops cause this during LICM, e.g.: 890 ** Some pathological loops cause this during LICM, e.g.:
890 ** local x,k,t = 0,1.5,{1,[1.5]=2} 891 ** local x,k,t = 0,1.5,{1,[1.5]=2}
@@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num)
892 ** assert(x == 300) 893 ** assert(x == 300)
893 */ 894 */
894 return FAILFOLD; 895 return FAILFOLD;
896 } else {
897 return INTFOLD(lj_num2int(n));
895 } 898 }
896 return INTFOLD(k);
897}
898
899LJFOLD(CONV KNUM IRCONV_U32_NUM)
900LJFOLDF(kfold_conv_knum_u32_num)
901{
902#ifdef _MSC_VER
903 { /* Workaround for MSVC bug. */
904 volatile uint32_t u = (uint32_t)knumleft;
905 return INTFOLD((int32_t)u);
906 }
907#else
908 return INTFOLD((int32_t)(uint32_t)knumleft);
909#endif
910} 899}
911 900
912LJFOLD(CONV KNUM IRCONV_I64_NUM) 901LJFOLD(CONV KNUM IRCONV_I64_NUM)
913LJFOLDF(kfold_conv_knum_i64_num) 902LJFOLDF(kfold_conv_knum_i64_num)
914{ 903{
915 return INT64FOLD((uint64_t)(int64_t)knumleft); 904 return INT64FOLD((uint64_t)lj_num2i64(knumleft));
916} 905}
917 906
918LJFOLD(CONV KNUM IRCONV_U64_NUM) 907LJFOLD(CONV KNUM IRCONV_U64_NUM)
@@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int)
1135} 1124}
1136 1125
1137LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ 1126LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
1138LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */
1139LJFOLDF(simplify_conv_int_num) 1127LJFOLDF(simplify_conv_int_num)
1140{ 1128{
1141 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1129 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
@@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert)
1334 return lj_opt_narrow_convert(J); 1322 return lj_opt_narrow_convert(J);
1335} 1323}
1336 1324
1325LJFOLD(XSTORE any CONV)
1326LJFOLDF(xstore_conv)
1327{
1328#if LJ_64
1329 PHIBARRIER(fright);
1330 if (!irt_is64(fins->t) &&
1331 irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) &&
1332 ((fright->op2&IRCONV_SRCMASK) == IRT_I64 ||
1333 (fright->op2&IRCONV_SRCMASK) == IRT_U64)) {
1334 fins->op2 = fright->op1;
1335 return RETRYFOLD;
1336 }
1337#else
1338 UNUSED(J);
1339#endif
1340 return NEXTFOLD;
1341}
1342
1337/* -- Integer algebraic simplifications ----------------------------------- */ 1343/* -- Integer algebraic simplifications ----------------------------------- */
1338 1344
1339LJFOLD(ADD any KINT) 1345LJFOLD(ADD any KINT)
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 01b5833d..3085c837 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
281 return 0; 281 return 0;
282 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ 282 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
283 lua_Number n = ir_knum(ir)->n; 283 lua_Number n = ir_knum(ir)->n;
284 int64_t i64;
285 int32_t k;
284 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { 286 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
285 /* Allows a wider range of constants. */ 287 /* Allows a wider range of constants, if const doesn't lose precision. */
286 int64_t k64 = (int64_t)n; 288 if (lj_num2int_check(n, i64, k)) {
287 if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
288 *nc->sp++ = NARROWINS(NARROW_INT, 0);
289 *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
290 return 0;
291 }
292 } else {
293 int32_t k = lj_num2int(n);
294 /* Only if constant is a small integer. */
295 if (checki16(k) && n == (lua_Number)k) {
296 *nc->sp++ = NARROWINS(NARROW_INT, 0); 289 *nc->sp++ = NARROWINS(NARROW_INT, 0);
297 *nc->sp++ = (NarrowIns)k; 290 *nc->sp++ = (NarrowIns)k;
298 return 0; 291 return 0;
299 } 292 }
293 } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) {
294 /* Only if constant is a small integer. */
295 *nc->sp++ = NARROWINS(NARROW_INT, 0);
296 *nc->sp++ = (NarrowIns)k;
297 return 0;
300 } 298 }
301 return 10; /* Never narrow other FP constants (this is rare). */ 299 return 10; /* Never narrow other FP constants (this is rare). */
302 } 300 }
@@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
512 510
513/* -- Narrowing of arithmetic operators ----------------------------------- */ 511/* -- Narrowing of arithmetic operators ----------------------------------- */
514 512
515/* Check whether a number fits into an int32_t (-0 is ok, too). */
516static int numisint(lua_Number n)
517{
518 return (n == (lua_Number)lj_num2int(n));
519}
520
521/* Convert string to number. Error out for non-numeric string values. */ 513/* Convert string to number. Error out for non-numeric string values. */
522static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) 514static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
523{ 515{
@@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
539 /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ 531 /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
540 if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && 532 if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
541 tref_isinteger(rb) && tref_isinteger(rc) && 533 tref_isinteger(rb) && tref_isinteger(rc) &&
542 numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), 534 lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
543 (int)op - (int)IR_ADD))) 535 (int)op - (int)IR_ADD)))
544 return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); 536 return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
545 if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); 537 if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
546 if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); 538 if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
@@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
591static int narrow_forl(jit_State *J, cTValue *o) 583static int narrow_forl(jit_State *J, cTValue *o)
592{ 584{
593 if (tvisint(o)) return 1; 585 if (tvisint(o)) return 1;
594 if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); 586 if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o));
595 return 0; 587 return 0;
596} 588}
597 589
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 8d025911..d29d1eab 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -573,13 +573,9 @@ static void split_ir(jit_State *J)
573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ 573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
575#if LJ_SOFTFP 575#if LJ_SOFTFP
576 lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted");
576 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ 577 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
577 hi = split_call_l(J, hisubst, oir, ir, 578 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64);
578 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
579 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
580 nir->o = IR_CALLN;
581 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
582 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
583 } 579 }
584#else 580#else
585 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ 581 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
@@ -692,8 +688,9 @@ static void split_ir(jit_State *J)
692 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; 688 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
693 } 689 }
694 } else if (st == IRT_FLOAT) { 690 } else if (st == IRT_FLOAT) {
691 lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
695 nir->o = IR_CALLN; 692 nir->o = IR_CALLN;
696 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; 693 nir->op2 = IRCALL_softfp_f2i;
697 } else 694 } else
698#endif 695#endif
699#if LJ_SOFTFP 696#if LJ_SOFTFP
@@ -705,9 +702,7 @@ static void split_ir(jit_State *J)
705 } else { 702 } else {
706 split_call_l(J, hisubst, oir, ir, 703 split_call_l(J, hisubst, oir, ir,
707#if LJ_32 && LJ_HASFFI 704#if LJ_32 && LJ_HASFFI
708 st == IRT_NUM ? 705 st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
709 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
710 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
711#else 706#else
712 IRCALL_softfp_d2i 707 IRCALL_softfp_d2i
713#endif 708#endif
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 181ce4d7..832f6bf4 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
522 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); 522 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv));
523 else 523 else
524#else 524#else
525 lua_Number n = expr_numberV(e); 525 int64_t i64;
526 int32_t k = lj_num2int(n); 526 int32_t k;
527 if (checki16(k) && n == (lua_Number)k) 527 if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64)))
528 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); 528 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k);
529 else 529 else
530#endif 530#endif
@@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2)
782 setnumV(&o, n); 782 setnumV(&o, n);
783 if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ 783 if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */
784 if (LJ_DUALNUM) { 784 if (LJ_DUALNUM) {
785 int32_t k = lj_num2int(n); 785 int64_t i64;
786 if ((lua_Number)k == n) { 786 int32_t k;
787 if (lj_num2int_check(n, i64, k)) {
787 setintV(&e1->u.nval, k); 788 setintV(&e1->u.nval, k);
788 return 1; 789 return 1;
789 } 790 }
@@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
1386 if (tvisnum(&n->key)) { 1387 if (tvisnum(&n->key)) {
1387 TValue *tv = &((TValue *)kptr)[kidx]; 1388 TValue *tv = &((TValue *)kptr)[kidx];
1388 if (LJ_DUALNUM) { 1389 if (LJ_DUALNUM) {
1389 lua_Number nn = numV(&n->key); 1390 int64_t i64;
1390 int32_t k = lj_num2int(nn); 1391 int32_t k;
1391 lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); 1392 lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
1392 if ((lua_Number)k == nn) 1393 if (lj_num2int_check(numV(&n->key), i64, k))
1393 setintV(tv, k); 1394 setintV(tv, k);
1394 else 1395 else
1395 *tv = n->key; 1396 *tv = n->key;
@@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e)
1656 } 1657 }
1657 } 1658 }
1658#else 1659#else
1659 lua_Number n = expr_numberV(e); 1660 int64_t i64;
1660 int32_t k = lj_num2int(n); 1661 int32_t k;
1661 if (checku8(k) && n == (lua_Number)k) { 1662 if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) {
1662 t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ 1663 t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */
1663 return; 1664 return;
1664 } 1665 }
diff --git a/src/lj_record.c b/src/lj_record.c
index 6543f274..536d7171 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
351 } else { 351 } else {
352 cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); 352 cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
353 if (t == IRT_INT) { 353 if (t == IRT_INT) {
354 int32_t k = numberVint(tv); 354 if (tvisint(tv)) {
355 if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ 355 return lj_ir_kint(J, intV(tv));
356 return lj_ir_kint(J, k); 356 } else {
357 int64_t i64;
358 int32_t k;
359 if (lj_num2int_check(numV(tv), i64, k)) /* -0 is ok here. */
360 return lj_ir_kint(J, k);
361 }
357 return 0; /* Type mismatch. */ 362 return 0; /* Type mismatch. */
358 } else { 363 } else {
359 return lj_ir_knum(J, numberVnum(tv)); 364 return lj_ir_knum(J, numberVnum(tv));
@@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
1426 /* Integer keys are looked up in the array part first. */ 1431 /* Integer keys are looked up in the array part first. */
1427 key = ix->key; 1432 key = ix->key;
1428 if (tref_isnumber(key)) { 1433 if (tref_isnumber(key)) {
1429 int32_t k = numberVint(&ix->keyv); 1434 int32_t k;
1430 if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) 1435 if (tvisint(&ix->keyv)) {
1431 k = LJ_MAX_ASIZE; 1436 k = intV(&ix->keyv);
1437 } else {
1438 int64_t i64;
1439 if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE;
1440 }
1432 if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ 1441 if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */
1433 TRef ikey = lj_opt_narrow_index(J, key); 1442 TRef ikey = lj_opt_narrow_index(J, key);
1434 TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); 1443 TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
index bb649fc8..0936298d 100644
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
351/* Add number formatted as signed integer to buffer. */ 351/* Add number formatted as signed integer to buffer. */
352SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) 352SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
353{ 353{
354 int64_t k = (int64_t)n; 354 int64_t k = lj_num2i64(n);
355 if (checki32(k) && sf == STRFMT_INT) 355 if (checki32(k) && sf == STRFMT_INT)
356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ 356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
357 else 357 else
@@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
361/* Add number formatted as unsigned integer to buffer. */ 361/* Add number formatted as unsigned integer to buffer. */
362SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) 362SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
363{ 363{
364 int64_t k; 364 return lj_strfmt_putfxint(sb, sf, lj_num2u64(n));
365 if (n >= 9223372036854775808.0)
366 k = (int64_t)(n - 18446744073709551616.0);
367 else
368 k = (int64_t)n;
369 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
370} 365}
371 366
372/* Format stack arguments to buffer. */ 367/* Format stack arguments to buffer. */
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 502c78e9..fbb959c5 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
523 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); 523 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
524 524
525 /* Try to convert number to integer, if requested. */ 525 /* Try to convert number to integer, if requested. */
526 if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) { 526 if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) {
527 double n = o->n; 527 int64_t tmp;
528 int32_t i = lj_num2int(n); 528 if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o))
529 if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } 529 return STRSCAN_INT;
530 } 530 }
531 return fmt; 531 return fmt;
532 } 532 }
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 62e33611..2959fadb 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins)
295{ 295{
296 lj_assertX(!tvisint(key), "bad integer key"); 296 lj_assertX(!tvisint(key), "bad integer key");
297 if (tvisnum(key)) { 297 if (tvisnum(key)) {
298 lua_Number nk = numV(key); 298 int64_t i64;
299 int32_t k = lj_num2int(nk); 299 int32_t k;
300 if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) { 300 if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) {
301 bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; 301 bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
302 return 1; 302 return 1;
303 } 303 }
@@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
409 if (tv) 409 if (tv)
410 return tv; 410 return tv;
411 } else if (tvisnum(key)) { 411 } else if (tvisnum(key)) {
412 lua_Number nk = numV(key); 412 int64_t i64;
413 int32_t k = lj_num2int(nk); 413 int32_t k;
414 if (nk == (lua_Number)k) { 414 if (lj_num2int_check(numV(key), i64, k)) {
415 cTValue *tv = lj_tab_getint(t, k); 415 cTValue *tv = lj_tab_getint(t, k);
416 if (tv) 416 if (tv)
417 return tv; 417 return tv;
@@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
542 } else if (tvisint(key)) { 542 } else if (tvisint(key)) {
543 return lj_tab_setint(L, t, intV(key)); 543 return lj_tab_setint(L, t, intV(key));
544 } else if (tvisnum(key)) { 544 } else if (tvisnum(key)) {
545 lua_Number nk = numV(key); 545 int64_t i64;
546 int32_t k = lj_num2int(nk); 546 int32_t k;
547 if (nk == (lua_Number)k) 547 if (lj_num2int_check(numV(key), i64, k))
548 return lj_tab_setint(L, t, k); 548 return lj_tab_setint(L, t, k);
549 if (tvisnan(key)) 549 if (tvisnan(key))
550 lj_err_msg(L, LJ_ERR_NANIDX); 550 lj_err_msg(L, LJ_ERR_NANIDX);
@@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
580 setnumV(&tmp, (lua_Number)k); 580 setnumV(&tmp, (lua_Number)k);
581 key = &tmp; 581 key = &tmp;
582 } else if (tvisnum(key)) { 582 } else if (tvisnum(key)) {
583 lua_Number nk = numV(key); 583 int64_t i64;
584 int32_t k = lj_num2int(nk); 584 int32_t k;
585 if ((uint32_t)k < t->asize && nk == (lua_Number)k) 585 if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize))
586 return (uint32_t)k + 1; 586 return (uint32_t)k + 1;
587 } 587 }
588 if (!tvisnil(key)) { 588 if (!tvisnil(key)) {
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index fa32a5d4..193102ee 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -314,6 +314,7 @@ typedef enum {
314 XO_FSTPq = XO_(dd), XOg_FSTPq = 3, 314 XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
315 XO_FISTPq = XO_(df), XOg_FISTPq = 7, 315 XO_FISTPq = XO_(df), XOg_FISTPq = 7,
316 XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, 316 XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1,
317 XO_FADDd = XO_(d8), XOg_FADDd = 0,
317 XO_FADDq = XO_(dc), XOg_FADDq = 0, 318 XO_FADDq = XO_(dc), XOg_FADDq = 0,
318 XO_FLDCW = XO_(d9), XOg_FLDCW = 5, 319 XO_FLDCW = XO_(d9), XOg_FLDCW = 5,
319 XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 320 XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 47d7faa5..ad329540 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g)
317 tv[1].u64 = U64x(80000000,00000000); 317 tv[1].u64 = U64x(80000000,00000000);
318 318
319 /* Initialize 32/64 bit constants. */ 319 /* Initialize 32/64 bit constants. */
320#if LJ_TARGET_X64 || LJ_TARGET_MIPS64
321 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
322#endif
320#if LJ_TARGET_X86ORX64 323#if LJ_TARGET_X86ORX64
321 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); 324 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
322#if LJ_32
323 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
324#endif
325 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); 325 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
326 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
327#endif 326#endif
327#if LJ_TARGET_MIPS64
328 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
329#endif
330#if LJ_TARGET_MIPS
331 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
332#endif
333
328#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 334#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
329 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); 335 J->k32[LJ_K32_M2P64] = 0xdf800000;
336#endif
337#if LJ_TARGET_MIPS64
338 J->k32[LJ_K32_2P63] = 0x5f000000;
330#endif 339#endif
331#if LJ_TARGET_PPC 340#if LJ_TARGET_PPC
332 J->k32[LJ_K32_2P52_2P31] = 0x59800004; 341 J->k32[LJ_K32_2P52_2P31] = 0x59800004;
333 J->k32[LJ_K32_2P52] = 0x59800000; 342 J->k32[LJ_K32_2P52] = 0x59800000;
334#endif 343#endif
335#if LJ_TARGET_PPC || LJ_TARGET_MIPS 344#if LJ_TARGET_PPC
336 J->k32[LJ_K32_2P31] = 0x4f000000; 345 J->k32[LJ_K32_2P31] = 0x4f000000;
337#endif 346#endif
338#if LJ_TARGET_MIPS 347
339 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
340#if LJ_64
341 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
342 J->k32[LJ_K32_2P63] = 0x5f000000;
343 J->k32[LJ_K32_M2P64] = 0xdf800000;
344#endif
345#endif
346#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 348#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
347 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; 349 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
348 J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; 350 J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 9cc42613..96ad2d07 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
37#if LJ_TARGET_PPC 37#if LJ_TARGET_PPC
38void lj_vm_cachesync(void *start, void *end); 38void lj_vm_cachesync(void *start, void *end);
39#endif 39#endif
40LJ_ASMF double lj_vm_foldarith(double x, double y, int op); 40LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op);
41#if LJ_HASJIT 41#if LJ_HASJIT
42LJ_ASMF double lj_vm_foldfpm(double x, int op); 42LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op);
43#endif 43#endif
44#if !LJ_ARCH_HASFPU 44#if LJ_SOFTFP && LJ_TARGET_MIPS64
45/* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ 45LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x);
46#endif 46#endif
47/* Declared in lj_obj.h:
48** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
49** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
50** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x);
51** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
52*/
47 53
48/* Dispatch targets for recording and hooks. */ 54/* Dispatch targets for recording and hooks. */
49LJ_ASMF void lj_vm_record(void); 55LJ_ASMF void lj_vm_record(void);
@@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[];
62#define lj_vm_floor floor 68#define lj_vm_floor floor
63#define lj_vm_ceil ceil 69#define lj_vm_ceil ceil
64#else 70#else
65LJ_ASMF double lj_vm_floor(double); 71LJ_ASMF LJ_CONSTF double lj_vm_floor(double);
66LJ_ASMF double lj_vm_ceil(double); 72LJ_ASMF LJ_CONSTF double lj_vm_ceil(double);
67#if LJ_TARGET_ARM 73#if LJ_TARGET_ARM
68LJ_ASMF double lj_vm_floor_sf(double); 74LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double);
69LJ_ASMF double lj_vm_ceil_sf(double); 75LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double);
70#endif 76#endif
71#endif 77#endif
72#ifdef LUAJIT_NO_LOG2 78#ifdef LUAJIT_NO_LOG2
73LJ_ASMF double lj_vm_log2(double); 79LJ_ASMF LJ_CONSTF double lj_vm_log2(double);
74#else 80#else
75#define lj_vm_log2 log2 81#define lj_vm_log2 log2
76#endif 82#endif
@@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
80 86
81#if LJ_HASJIT 87#if LJ_HASJIT
82#if LJ_TARGET_X86ORX64 88#if LJ_TARGET_X86ORX64
83LJ_ASMF void lj_vm_floor_sse(void); 89LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void);
84LJ_ASMF void lj_vm_ceil_sse(void); 90LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void);
85LJ_ASMF void lj_vm_trunc_sse(void); 91LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void);
86#endif 92#endif
87#if LJ_TARGET_PPC || LJ_TARGET_ARM64 93#if LJ_TARGET_PPC || LJ_TARGET_ARM64
88#define lj_vm_trunc trunc 94#define lj_vm_trunc trunc
89#else 95#else
90LJ_ASMF double lj_vm_trunc(double); 96LJ_ASMF LJ_CONSTF double lj_vm_trunc(double);
91#if LJ_TARGET_ARM 97#if LJ_TARGET_ARM
92LJ_ASMF double lj_vm_trunc_sf(double); 98LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double);
93#endif 99#endif
94#endif 100#endif
95#if LJ_HASFFI 101#if LJ_HASFFI
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 2c9b96cc..1495102f 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op)
59 case IR_NEG - IR_ADD: return -x; break; 59 case IR_NEG - IR_ADD: return -x; break;
60 case IR_ABS - IR_ADD: return fabs(x); break; 60 case IR_ABS - IR_ADD: return fabs(x); break;
61#if LJ_HASJIT 61#if LJ_HASJIT
62 case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; 62 case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break;
63 case IR_MIN - IR_ADD: return x < y ? x : y; break; 63 case IR_MIN - IR_ADD: return x < y ? x : y; break;
64 case IR_MAX - IR_ADD: return x > y ? x : y; break; 64 case IR_MAX - IR_ADD: return x > y ? x : y; break;
65#endif 65#endif
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 86bef0cf..2cd7eedb 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx)
2452 | bx lr 2452 | bx lr
2453 | 2453 |
2454 |//----------------------------------------------------------------------- 2454 |//-----------------------------------------------------------------------
2455 |//-- Number conversion functions ----------------------------------------
2456 |//-----------------------------------------------------------------------
2457 |
2458 |// int64_t lj_vm_num2int_check(double x)
2459 |->vm_num2int_check:
2460 |.if FPU
2461 |.if not HFABI
2462 | vmov d0, CARG1, CARG2
2463 |.endif
2464 | vcvt.s32.f64 s4, d0
2465 | vcvt.f64.s32 d1, s4
2466 | vcmp.f64 d0, d1
2467 | vmrs
2468 | bne >1
2469 | vmov CRET1, s4
2470 | mov CRET2, #0
2471 | bx lr
2472 |
2473 |.else
2474 |
2475 | asr CARG4, CARG2, #31 // sign = 0 or -1.
2476 | lsl CARG2, CARG2, #1
2477 | orrs RB, CARG2, CARG1
2478 | bxeq lr // Return 0 for +-0.
2479 | mov RB, #1024
2480 | add RB, RB, #30
2481 | sub RB, RB, CARG2, lsr #21
2482 | cmp RB, #32
2483 | bhs >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
2484 | lsr CARG3, CARG1, #21
2485 | orr CARG2, CARG3, CARG2, lsl #10 // Left-aligned mantissa.
2486 | rsb CARG3, RB, #32
2487 | lsl CARG3, CARG2, CARG3
2488 | orr CARG2, CARG2, #0x80000000 // Merge leading 1.
2489 | orrs CARG3, CARG3, CARG1, lsl #11
2490 | lsr CARG1, CARG2, RB // lo = right-aligned absolute value.
2491 | bne >1 // Fail if fractional part != 0.
2492 | adds CRET1, CARG1, CARG4
2493 | bmi >1 // Fail if lo+sign >= 0x80000000.
2494 | eor CRET1, CRET1, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign.
2495 | mov CRET2, #0
2496 | bx lr
2497 |.endif
2498 |1:
2499 | mov CRET1, #0x80000000
2500 | mov CRET2, #0x80000000
2501 | bx lr
2502 |
2503 |// int64_t lj_vm_num2i64(double x)
2504 |->vm_num2i64:
2505 |// fallthrough, same as lj_vm_num2u64.
2506 |
2507 |// uint64_t lj_vm_num2u64(double x)
2508 |->vm_num2u64:
2509 |.if HFABI
2510 | vmov CARG1, CARG2, d0
2511 |.endif
2512 | lsl RB, CARG2, #1
2513 | lsr RB, RB, #21
2514 | sub RB, RB, #1020
2515 | sub RB, RB, #3
2516 | cmp RB, #116
2517 | bhs >3 // Exponent out of range.
2518 | asr CARG4, CARG2, #31 // sign = 0 or -1.
2519 | lsl CARG2, CARG2, #12
2520 | lsr CARG2, CARG2, #12
2521 | rsbs RB, RB, #52
2522 | orr CARG2, CARG2, #0x00100000
2523 | bmi >2 // Shift mantissa left or right?
2524 | lsr CARG1, CARG1, RB // 64 bit right shift.
2525 | lsr CARG3, CARG2, RB
2526 | rsb RB, RB, #32
2527 | orr CARG1, CARG1, CARG2, lsl RB
2528 | rsb RB, RB, #0
2529 | orr CARG1, CARG1, CARG2, lsr RB
2530 | adds CRET1, CARG1, CARG4 // m = sign?-m:m = (m+sign)^sign.
2531 | adc CRET2, CARG3, CARG4
2532 |1:
2533 | eor CRET1, CRET1, CARG4
2534 | eor CRET2, CRET2, CARG4
2535 | bx lr
2536 |2:
2537 | rsb RB, RB, #0
2538 | lsl CARG2, CARG2, RB // 64 bit left shift.
2539 | lsl CARG3, CARG1, RB
2540 | sub RB, RB, #32
2541 | orr CARG2, CARG2, CARG1, lsl RB
2542 | rsb RB, RB, #0
2543 | orr CARG2, CARG2, CARG1, lsr RB
2544 | adds CRET1, CARG3, CARG4
2545 | adc CRET2, CARG2, CARG4
2546 | b <1
2547 |3:
2548 | mov CRET1, #0
2549 | mov CRET2, #0
2550 | bx lr
2551 |
2552 |// int32_t lj_vm_tobit(double x)
2553 |.if FPU
2554 |->vm_tobit:
2555 | vldr d1, >9
2556 |.if not HFABI
2557 | vmov d0, CARG1, CARG2
2558 |.endif
2559 | vadd.f64 d0, d0, d1
2560 | vmov CARG1, s0
2561 | bx lr
2562 |9:
2563 | .long 0, 0x43380000 // (double)(2^52 + 2^51).
2564 |.endif
2565 |
2566 |//-----------------------------------------------------------------------
2455 |//-- Miscellaneous functions -------------------------------------------- 2567 |//-- Miscellaneous functions --------------------------------------------
2456 |//----------------------------------------------------------------------- 2568 |//-----------------------------------------------------------------------
2457 | 2569 |
@@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4097 | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] 4209 | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
4098 | // Subsumes ins_next1 and ins_next2. 4210 | // Subsumes ins_next1 and ins_next2.
4099 | ldr INS, TRACE:CARG1->startins 4211 | ldr INS, TRACE:CARG1->startins
4100 | bfi INS, OP, #0, #8 4212 | bic INS, INS, #0xff
4213 | orr INS, INS, OP
4101 | str INS, [PC], #4 4214 | str INS, [PC], #4
4102 | b <1 4215 | b <1
4103 |.endif 4216 |.endif
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index a437b657..eb6d0c2f 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx)
2156 | ret 2156 | ret
2157 | 2157 |
2158 |//----------------------------------------------------------------------- 2158 |//-----------------------------------------------------------------------
2159 |//-- Number conversion functions ----------------------------------------
2160 |//-----------------------------------------------------------------------
2161 |
2162 |// int64_t lj_vm_num2int_check(double x)
2163 |->vm_num2int_check:
2164 | fcvtzs CRET1w, FARG1
2165 | scvtf FARG2, CRET1w
2166 | fcmp FARG2, FARG1
2167 | bne >1
2168 | ret
2169 |1:
2170 | mov CRET1, #0x8000000080000000
2171 | ret
2172 |
2173 |// int64_t lj_vm_num2i64(double x)
2174 |->vm_num2i64:
2175 | fcvtzs CRET1, FARG1
2176 | ret
2177 |
2178 |// uint64_t lj_vm_num2u64(double x)
2179 |->vm_num2u64:
2180 | fcvtzs CRET1, FARG1
2181 | fcvtzu CARG2, FARG1
2182 | cmn CRET1, #1 // Set overflow if CRET1 == INT64_MAX.
2183 | csel CRET1, CRET1, CARG2, vc // No overflow ? i64 : u64.
2184 | ret
2185 |
2186 |// int32_t lj_vm_tobit(double x)
2187 |->vm_tobit:
2188 | movz CRET1, #0x4338, lsl #48 // 2^52 + 2^51.
2189 | fmov FARG2, CRET1
2190 | fadd FARG1, FARG1, FARG2
2191 | fmov CRET1w, s0
2192 | ret
2193 |
2194 |//-----------------------------------------------------------------------
2159 |//-- Miscellaneous functions -------------------------------------------- 2195 |//-- Miscellaneous functions --------------------------------------------
2160 |//----------------------------------------------------------------------- 2196 |//-----------------------------------------------------------------------
2161 | 2197 |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 02e588ee..8a6b8270 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -85,6 +85,7 @@
85| 85|
86|.if FPU 86|.if FPU
87|.define FARG1, f12 87|.define FARG1, f12
88|.define FARG1HI, f13
88|.define FARG2, f14 89|.define FARG2, f14
89| 90|
90|.define FRET1, f0 91|.define FRET1, f0
@@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx)
2560 | mtc1 r0, f4 2561 | mtc1 r0, f4
2561 | mtc1 TMP0, f5 2562 | mtc1 TMP0, f5
2562 | abs.d FRET2, FARG1 // |x| 2563 | abs.d FRET2, FARG1 // |x|
2563 | mfc1 AT, f13 2564 | mfc1 AT, FARG1HI
2564 | c.olt.d 0, FRET2, f4 2565 | c.olt.d 0, FRET2, f4
2565 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 2566 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2566 | bc1f 0, >1 // Truncate only if |x| < 2^52. 2567 | bc1f 0, >1 // Truncate only if |x| < 2^52.
@@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx)
2822 | sfmin_max max, vm_sfcmpogt 2823 | sfmin_max max, vm_sfcmpogt
2823 | 2824 |
2824 |//----------------------------------------------------------------------- 2825 |//-----------------------------------------------------------------------
2826 |//-- Number conversion functions ----------------------------------------
2827 |//-----------------------------------------------------------------------
2828 |
2829 |// int64_t lj_vm_num2int_check(double x)
2830 |->vm_num2int_check:
2831 |.if FPU
2832 | trunc.w.d FARG2, FARG1
2833 | mfc1 SFRETLO, FARG2
2834 | cvt.d.w FARG2, FARG2
2835 | c.eq.d FARG1, FARG2
2836 | bc1f 0, >2
2837 |. nop
2838 | jr ra
2839 |. move SFRETHI, r0
2840 |
2841 |.else
2842 |
2843 | sll SFRETLO, SFARG1HI, 1
2844 | or SFRETHI, SFRETLO, SFARG1LO
2845 | beqz SFRETHI, >1 // Return 0 for +-0.
2846 |. li TMP0, 1054
2847 | srl AT, SFRETLO, 21
2848 | subu TMP0, TMP0, AT
2849 | sltiu AT, TMP0, 32
2850 | beqz AT, >2 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
2851 |. sll SFRETLO, SFARG1HI, 11
2852 | srl SFRETHI, SFARG1LO, 21
2853 | negu TMP1, TMP0
2854 | or SFRETLO, SFRETLO, SFRETHI // Left-aligned mantissa.
2855 | sllv TMP2, SFRETLO, TMP1
2856 | lui AT, 0x8000
2857 | sll SFRETHI, SFARG1LO, 11
2858 | or SFRETLO, SFRETLO, AT // Merge leading 1.
2859 | or TMP2, TMP2, SFRETHI
2860 | srlv SFRETLO, SFRETLO, TMP0 // lo = right-aligned absolute value.
2861 | bnez TMP2, >2 // Fail if fractional part != 0.
2862 |. sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1.
2863 | addu SFRETLO, SFRETLO, SFARG1HI
2864 | bltz SFRETLO, >2 // Fail if lo+sign >= 0x80000000.
2865 |. xor SFRETLO, SFRETLO, SFARG1HI // lo = sign?-lo:lo = (lo+sign)^sign.
2866 |1:
2867 | jr ra
2868 |. move SFRETHI, r0
2869 |.endif
2870 |2: // Not an integer, return 0x8000000080000000LL.
2871 | lui SFRETHI, 0x8000
2872 | jr ra
2873 |. lui SFRETLO, 0x8000
2874 |
2875 |// int64_t lj_vm_num2i64(double x)
2876 |->vm_num2i64:
2877 |// fallthrough, same as lj_vm_num2u64.
2878 |
2879 |// uint64_t lj_vm_num2u64(double x)
2880 |->vm_num2u64:
2881 |.if FPU
2882 | mfc1 SFARG1HI, FARG1HI
2883 | mfc1 SFARG1LO, FARG1
2884 |.endif
2885 | srl TMP0, SFARG1HI, 20
2886 | andi TMP0, TMP0, 0x7ff
2887 | addiu SFRETLO, TMP0, -1023
2888 | sltiu SFRETLO, SFRETLO, 116
2889 | beqz SFRETLO, >3 // Exponent out of range.
2890 |. sll SFRETHI, SFARG1HI, 12
2891 | lui AT, 0x0010
2892 | srl SFRETHI, SFRETHI, 12
2893 | addiu TMP0, TMP0, -1075
2894 | sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1.
2895 | bgez TMP0, >2 // Shift mantissa left or right?
2896 |. or SFRETHI, SFRETHI, AT // Merge leading 1 into masked mantissa.
2897 | subu TMP1, r0, TMP0
2898 | sll AT, SFRETHI, 1
2899 | nor TMP0, r0, TMP1
2900 | srlv SFRETHI, SFRETHI, TMP1 // Shift hi mantissa right for low exp.
2901 | sllv AT, AT, TMP0 // Shifted-out hi mantissa.
2902 | srlv SFRETLO, SFARG1LO, TMP1 // Shift lo mantissa right for low exp.
2903 | andi TMP1, TMP1, 0x20 // Conditional right shift by 32.
2904 | or AT, AT, SFRETLO // Merge into lo mantissa.
2905 | movn AT, SFRETHI, TMP1
2906 | movn SFRETHI, r0, TMP1
2907 |1:
2908 | addu SFRETLO, AT, SFARG1HI // m = sign?-m:m = (m+sign)^sign.
2909 | addu SFRETHI, SFRETHI, SFARG1HI
2910 | sltu TMP0, SFRETLO, AT // Carry
2911 | addu SFRETHI, SFRETHI, TMP0
2912 | xor SFRETLO, SFRETLO, SFARG1HI
2913 | jr ra
2914 |. xor SFRETHI, SFRETHI, SFARG1HI
2915 |2:
2916 | srl TMP2, SFARG1LO, 1
2917 | nor AT, r0, TMP0
2918 | sllv SFRETHI, SFRETHI, TMP0 // Shift hi mantissa left for high exp.
2919 | srlv TMP2, TMP2, AT // Shifted-out lo mantissa.
2920 | sllv AT, SFARG1LO, TMP0 // Shift lo mantissa left for high exp.
2921 | andi TMP0, TMP0, 0x20 // Conditional left shift by 32.
2922 | or SFRETHI, SFRETHI, TMP2 // Merge into hi mantissa.
2923 | movn SFRETHI, AT, TMP0
2924 | b <1
2925 |. movn AT, r0, TMP0
2926 |3:
2927 | jr ra
2928 |. li SFRETHI, 0
2929 |
2930 |// int32_t lj_vm_tobit(double x)
2931 |.if FPU
2932 |->vm_tobit:
2933 | lui AT, 0x59c0 // 2^52 + 2^51 (float).
2934 | mtc1 AT, FARG2
2935 | cvt.d.s FARG2, FARG2
2936 | add.d FARG1, FARG1, FARG2
2937 | jr ra
2938 |. mfc1 CRET1, FARG1
2939 |.endif
2940 |
2941 |//-----------------------------------------------------------------------
2825 |//-- Miscellaneous functions -------------------------------------------- 2942 |//-- Miscellaneous functions --------------------------------------------
2826 |//----------------------------------------------------------------------- 2943 |//-----------------------------------------------------------------------
2827 | 2944 |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 859c0aee..4dc40d8a 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx)
2113 | dinsu CRET2, AT, 21, 21 2113 | dinsu CRET2, AT, 21, 21
2114 | slt AT, CARG1, r0 2114 | slt AT, CARG1, r0
2115 | dsrlv CRET1, CRET2, TMP0 2115 | dsrlv CRET1, CRET2, TMP0
2116 | dsubu CARG1, r0, CRET1 2116 | negu CARG1, CRET1
2117 |.if MIPSR6 2117 |.if MIPSR6
2118 | seleqz CRET1, CRET1, AT 2118 | seleqz CRET1, CRET1, AT
2119 | selnez CARG1, CARG1, AT 2119 | selnez CARG1, CARG1, AT
@@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx)
2121 |.else 2121 |.else
2122 | movn CRET1, CARG1, AT 2122 | movn CRET1, CARG1, AT
2123 |.endif 2123 |.endif
2124 | li CARG1, 64 2124 | negu TMP0, TMP0
2125 | subu TMP0, CARG1, TMP0
2126 | dsllv CRET2, CRET2, TMP0 // Integer check. 2125 | dsllv CRET2, CRET2, TMP0 // Integer check.
2127 | sextw AT, CRET1 2126 | sextw AT, CRET1
2128 | xor AT, CRET1, AT // Range check. 2127 | xor AT, CRET1, AT // Range check.
2129 |.if MIPSR6
2130 | seleqz AT, AT, CRET2
2131 | selnez CRET2, CRET2, CRET2
2132 | jr ra 2128 | jr ra
2133 |. or CRET2, AT, CRET2 2129 |. or CRET2, AT, CRET2
2134 |.else
2135 | jr ra
2136 |. movz CRET2, AT, CRET2
2137 |.endif
2138 |1: 2130 |1:
2139 | jr ra 2131 | jr ra
2140 |. li CRET2, 1 2132 |. li CRET2, 1
@@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx)
2929 | sfmin_max max, vm_sfcmpogt 2921 | sfmin_max max, vm_sfcmpogt
2930 | 2922 |
2931 |//----------------------------------------------------------------------- 2923 |//-----------------------------------------------------------------------
2924 |//-- Number conversion functions ----------------------------------------
2925 |//-----------------------------------------------------------------------
2926 |
2927 |// int64_t lj_vm_num2int_check(double x)
2928 |->vm_num2int_check:
2929 |.if FPU
2930 | trunc.w.d FARG2, FARG1
2931 | mfc1 CRET1, FARG2
2932 | cvt.d.w FARG2, FARG2
2933 |.if MIPSR6
2934 | cmp.eq.d FARG2, FARG1, FARG2
2935 | bc1eqz FARG2, >2
2936 |.else
2937 | c.eq.d FARG1, FARG2
2938 | bc1f 0, >2
2939 |.endif
2940 |. nop
2941 | jr ra
2942 |. zextw CRET1, CRET1
2943 |
2944 |.else
2945 |
2946 | dsll CRET2, CARG1, 1
2947 | beqz CRET2, >1
2948 |. li TMP0, 1076
2949 | dsrl AT, CRET2, 53
2950 | dsubu TMP0, TMP0, AT
2951 | sltiu AT, TMP0, 54
2952 | beqz AT, >2
2953 |. dextm CRET2, CRET2, 0, 20
2954 | dinsu CRET2, AT, 21, 21
2955 | slt AT, CARG1, r0
2956 | dsrlv CRET1, CRET2, TMP0
2957 | negu CARG1, CRET1
2958 |.if MIPSR6
2959 | seleqz CRET1, CRET1, AT
2960 | selnez CARG1, CARG1, AT
2961 | or CRET1, CRET1, CARG1
2962 |.else
2963 | movn CRET1, CARG1, AT
2964 |.endif
2965 | negu TMP0, TMP0
2966 | dsllv CRET2, CRET2, TMP0 // Integer check.
2967 | sextw AT, CRET1
2968 | xor AT, CRET1, AT // Range check.
2969 | or AT, AT, CRET2
2970 | bnez AT, >2
2971 |. nop
2972 | jr ra
2973 |. zextw CRET1, CRET1
2974 |1:
2975 | jr ra
2976 |. move CRET1, r0
2977 |.endif
2978 |2:
2979 | lui CRET1, 0x8000
2980 | dsll CRET1, CRET1, 16
2981 | ori CRET1, CRET1, 0x8000
2982 | jr ra
2983 |. dsll CRET1, CRET1, 16
2984 |
2985 |// int64_t lj_vm_num2i64(double x)
2986 |->vm_num2i64:
2987 |.if FPU
2988 | trunc.l.d FARG1, FARG1
2989 | jr ra
2990 |. dmfc1 CRET1, FARG1
2991 |.else
2992 |// fallthrough, same as lj_vm_num2u64 for soft-float.
2993 |.endif
2994 |
2995 |// uint64_t lj_vm_num2u64(double x)
2996 |->vm_num2u64:
2997 |.if FPU
2998 | trunc.l.d FARG2, FARG1
2999 | dmfc1 CRET1, FARG2
3000 | li AT, -1
3001 | dsrl AT, AT, 1
3002 | beq CRET1, AT, >1
3003 |. lui AT, 0xdf80 // -2^64 (float).
3004 | jr ra
3005 |. nop
3006 |1:
3007 | mtc1 AT, FARG2
3008 | cvt.d.s FARG2, FARG2
3009 | add.d FARG1, FARG1, FARG2
3010 | trunc.l.d FARG2, FARG1
3011 | jr ra
3012 |. dmfc1 CRET1, FARG2
3013 |
3014 |.else
3015 |
3016 | dextu CARG2, CARG1, 20, 10
3017 | addiu AT, CARG2, -1023
3018 | sltiu AT, AT, 116
3019 | beqz AT, >2 // Exponent out of range.
3020 |. addiu CARG2, CARG2, -1075
3021 | dextm CRET1, CARG1, 0, 19
3022 | dsll AT, AT, 52
3023 | dsra CARG1, CARG1, 63 // sign = 0 or -1.
3024 | bgez CARG2, >1 // Shift mantissa left or right?
3025 |. or CRET1, CRET1, AT // Merge leading 1 into masked mantissa.
3026 | subu CARG2, r0, CARG2
3027 | dsrlv CRET1, CRET1, CARG2 // Shift mantissa right for low exp.
3028 | daddu CRET1, CRET1, CARG1
3029 | jr ra
3030 |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign.
3031 |1:
3032 | dsllv CRET1, CRET1, CARG2 // Shift mantissa left for high exp.
3033 | daddu CRET1, CRET1, CARG1
3034 | jr ra
3035 |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign.
3036 |2:
3037 | jr ra
3038 |. move CRET1, r0
3039 |.endif
3040 |
3041 |// int32_t lj_vm_tobit(double x)
3042 |.if FPU
3043 |->vm_tobit:
3044 | lui AT, 0x59c0 // 2^52 + 2^51 (float).
3045 | mtc1 AT, FARG2
3046 | cvt.d.s FARG2, FARG2
3047 | add.d FARG1, FARG1, FARG2
3048 | mfc1 CRET1, FARG1
3049 | jr ra
3050 |. sextw CRET1, CRET1
3051 |.endif
3052 |
3053 |//-----------------------------------------------------------------------
2932 |//-- Miscellaneous functions -------------------------------------------- 3054 |//-- Miscellaneous functions --------------------------------------------
2933 |//----------------------------------------------------------------------- 3055 |//-----------------------------------------------------------------------
2934 | 3056 |
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 2ddeefbf..1761e39b 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx)
3160 | blr 3160 | blr
3161 | 3161 |
3162 |//----------------------------------------------------------------------- 3162 |//-----------------------------------------------------------------------
3163 |//-- Number conversion functions ----------------------------------------
3164 |//-----------------------------------------------------------------------
3165 |
3166 |// int64_t lj_vm_num2int_check(double x)
3167 |->vm_num2int_check:
3168 |.if FPU
3169 | subi sp, sp, 16
3170 | stfd FARG1, 0(sp)
3171 | lwz CARG1, 0(sp)
3172 | lwz CARG2, 4(sp)
3173 |.endif
3174 | slwi TMP1, CARG1, 1
3175 |.if PPE
3176 | or TMP1, TMP1, CARG2
3177 | cmpwi TMP1, 0
3178 |.else
3179 | or. TMP1, TMP1, CARG2
3180 |.endif
3181 | beq >2 // Return 0 for +-0.
3182 | rlwinm RB, CARG1, 12, 21, 31
3183 | subfic RB, RB, 1054
3184 | cmplwi RB, 32
3185 | bge >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
3186 | slwi CARG3, CARG1, 11
3187 | rlwimi CARG3, CARG2, 11, 21, 31 // Left-aligned mantissa.
3188 | subfic TMP1, RB, 32
3189 | slw TMP1, CARG3, TMP1
3190 | slwi TMP2, CARG2, 11
3191 |.if PPE
3192 | or. TMP1, TMP1, TMP2
3193 |.else
3194 | or TMP1, TMP1, TMP2
3195 | cmpwi TMP1, 0
3196 |.endif
3197 | bne >1 // Fail if fractional part != 0.
3198 | oris CARG3, CARG3, 0x8000 // Merge leading 1.
3199 | srw CRET2, CARG3, RB // lo = right-aligned absolute value.
3200 | srawi CARG4, CARG1, 31 // sign = 0 or -1.
3201 |.if GPR64
3202 | add CRET2, CRET2, CARG4
3203 | cmpwi CRET2, 0
3204 |.else
3205 | add. CRET2, CRET2, CARG4
3206 |.endif
3207 | blt >1 // Fail if fractional part != 0.
3208 | xor CRET2, CRET2, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign.
3209 |2:
3210 |.if GPR64
3211 | rldicl CRET1, CRET1, 0, 32
3212 |.else
3213 | li CRET1, 0
3214 |.endif
3215 |.if FPU
3216 | addi sp, sp, 16
3217 |.endif
3218 | blr
3219 |1:
3220 |.if GPR64
3221 | lus CRET1, 0x8000
3222 | rldicr CRET1, CRET1, 32, 32
3223 |.else
3224 | lus CRET1, 0x8000
3225 | lus CRET2, 0x8000
3226 |.endif
3227 |.if FPU
3228 | addi sp, sp, 16
3229 |.endif
3230 | blr
3231 |
3232 |// int64_t lj_vm_num2i64(double x)
3233 |->vm_num2i64:
3234 |// fallthrough, same as lj_vm_num2u64.
3235 |
3236 |// uint64_t lj_vm_num2u64(double x)
3237 |->vm_num2u64:
3238 |.if FPU
3239 | subi sp, sp, 16
3240 | stfd FARG1, 0(sp)
3241 | lwz CARG1, 0(sp)
3242 | lwz CARG2, 4(sp)
3243 |.endif
3244 | rlwinm RB, CARG1, 12, 21, 31
3245 | addi RB, RB, -1023
3246 | cmplwi RB, 116
3247 | bge >3 // Exponent out of range.
3248 | srawi CARG4, CARG1, 31 // sign = 0 or -1.
3249 | clrlwi CARG1, CARG1, 12
3250 | subfic RB, RB, 52
3251 | oris CARG1, CARG1, 0x0010
3252 | cmpwi RB, 0
3253 | blt >2 // Shift mantissa left or right?
3254 | subfic TMP1, RB, 32 // 64 bit right shift.
3255 | srw CARG2, CARG2, RB
3256 | slw TMP2, CARG1, TMP1
3257 | addi TMP1, RB, -32
3258 | or CARG2, CARG2, TMP2
3259 | srw TMP2, CARG1, TMP1
3260 | or CARG2, CARG2, TMP2
3261 | srw CARG1, CARG1, RB
3262 |1:
3263 | addc CARG2, CARG2, CARG4
3264 | adde CARG1, CARG1, CARG4
3265 | xor CRET2, CARG2, CARG4
3266 | xor CRET1, CARG1, CARG4
3267 |.if GPR64
3268 | rldimi CRET2, CRET1, 0, 32
3269 | mr CRET1, CRET2
3270 |.endif
3271 | addi sp, sp, 16
3272 | blr
3273 |2:
3274 | subfic TMP1, RB, 0 // 64 bit left shift.
3275 | addi RB, RB, -32
3276 | slw CARG1, CARG1, TMP1
3277 | srw TMP2, CARG2, RB
3278 | addi RB, TMP1, -32
3279 | or CARG1, CARG1, TMP2
3280 | slw TMP2, CARG2, RB
3281 | or CARG1, CARG1, TMP2
3282 | slw CARG2, CARG2, TMP1
3283 | b <1
3284 |3:
3285 | li CRET1, 0
3286 |.if not GPR64
3287 | li CRET2, 0
3288 |.endif
3289 |.if FPU
3290 | addi sp, sp, 16
3291 |.endif
3292 | blr
3293 |
3294 |// int32_t lj_vm_tobit(double x)
3295 |.if FPU
3296 |->vm_tobit:
3297 | lus TMP0, 0x59c0 // 2^52 + 2^51 (float).
3298 | subi sp, sp, 16
3299 | stw TMP0, 0(sp)
3300 | lfs FARG2, 0(sp)
3301 | fadd FARG1, FARG1, FARG2
3302 | stfd FARG1, 0(sp)
3303 | lwz CRET1, 4(sp)
3304 | addi sp, sp, 16
3305 | blr
3306 |.endif
3307 |
3308 |//-----------------------------------------------------------------------
3163 |//-- Miscellaneous functions -------------------------------------------- 3309 |//-- Miscellaneous functions --------------------------------------------
3164 |//----------------------------------------------------------------------- 3310 |//-----------------------------------------------------------------------
3165 | 3311 |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 4cfb7b6a..970e8e43 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx)
2625 | ret 2625 | ret
2626 | 2626 |
2627 |//----------------------------------------------------------------------- 2627 |//-----------------------------------------------------------------------
2628 |//-- Number conversion functions ----------------------------------------
2629 |//-----------------------------------------------------------------------
2630 |
2631 |// int64_t lj_vm_num2int_check(double x)
2632 |->vm_num2int_check:
2633 | cvttsd2si eax, xmm0
2634 | xorps xmm1, xmm1
2635 | cvtsi2sd xmm1, eax
2636 | ucomisd xmm1, xmm0
2637 | jp >1
2638 | jne >1
2639 | ret
2640 |1:
2641 | mov64 rax, U64x(80000000,80000000)
2642 | ret
2643 |
2644 |// int64_t lj_vm_num2i64(double x)
2645 |->vm_num2i64:
2646 | cvttsd2si rax, xmm0
2647 | ret
2648 |
2649 |// uint64_t lj_vm_num2u64(double x)
2650 |->vm_num2u64:
2651 | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range.
2652 | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow.
2653 | jo >1
2654 | ret
2655 |1:
2656 | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double).
2657 | movd xmm1, rdx
2658 | addsd xmm0, xmm1
2659 | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range.
2660 | // Note that -0x1p63 converts to -0x8000000000000000LL either way.
2661 | ret
2662 |
2663 |// int32_t lj_vm_tobit(double x)
2664 |->vm_tobit:
2665 | sseconst_tobit xmm1, RC
2666 | addsd xmm0, xmm1
2667 | movd eax, xmm0
2668 | ret
2669 |
2670 |//-----------------------------------------------------------------------
2628 |//-- Miscellaneous functions -------------------------------------------- 2671 |//-- Miscellaneous functions --------------------------------------------
2629 |//----------------------------------------------------------------------- 2672 |//-----------------------------------------------------------------------
2630 | 2673 |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 77c4069d..485ed809 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx)
3059 | ret 3059 | ret
3060 | 3060 |
3061 |//----------------------------------------------------------------------- 3061 |//-----------------------------------------------------------------------
3062 |//-- Number conversion functions ----------------------------------------
3063 |//-----------------------------------------------------------------------
3064 |
3065 |// int64_t lj_vm_num2int_check(double x)
3066 |->vm_num2int_check:
3067 |.if not X64
3068 | movsd xmm0, qword [esp+4]
3069 |.endif
3070 | cvttsd2si eax, xmm0
3071 | xorps xmm1, xmm1
3072 | cvtsi2sd xmm1, eax
3073 | ucomisd xmm1, xmm0
3074 | jp >1
3075 | jne >1
3076 |.if not X64
3077 | xor edx, edx
3078 |.endif
3079 | ret
3080 |1:
3081 |.if X64
3082 | mov64 rax, U64x(80000000,80000000)
3083 |.else
3084 | mov eax, 0x80000000
3085 | mov edx, eax
3086 |.endif
3087 | ret
3088 |
3089 |// int64_t lj_vm_num2i64(double x)
3090 |->vm_num2i64:
3091 |.if X64
3092 | cvttsd2si rax, xmm0
3093 | ret
3094 |.else
3095 | sub esp, 12
3096 | fld qword [esp+16]
3097 | fisttp qword [esp]
3098 | mov eax, dword [esp]
3099 | mov edx, dword [esp+4]
3100 | add esp, 12
3101 | ret
3102 |.endif
3103 |
3104 |// uint64_t lj_vm_num2u64(double x)
3105 |->vm_num2u64:
3106 |.if X64
3107 | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range.
3108 | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow.
3109 | jo >1
3110 | ret
3111 |1:
3112 | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double).
3113 | movd xmm1, rdx
3114 | addsd xmm0, xmm1
3115 | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range.
3116 | // Note that -0x1p63 converts to -0x8000000000000000LL either way.
3117 | ret
3118 |.else
3119 | sub esp, 12
3120 | fld qword [esp+16]
3121 | fld st0
3122 | fisttp qword [esp]
3123 | mov edx, dword [esp+4]
3124 | mov eax, dword [esp]
3125 | cmp edx, 1
3126 | jo >2
3127 |1:
3128 | fpop
3129 | add esp, 12
3130 | ret
3131 |2:
3132 | cmp eax, 0
3133 | jne <1
3134 | mov dword [esp+8], 0xdf800000 // -0x1p64 (float).
3135 | fadd dword [esp+8]
3136 | fisttp qword [esp]
3137 | mov eax, dword [esp]
3138 | mov edx, dword [esp+4]
3139 | add esp, 12
3140 | ret
3141 |.endif
3142 |
3143 |// int32_t lj_vm_tobit(double x)
3144 |->vm_tobit:
3145 |.if not X64
3146 | movsd xmm0, qword [esp+4]
3147 |.endif
3148 | sseconst_tobit xmm1, RCa
3149 | addsd xmm0, xmm1
3150 | movd eax, xmm0
3151 | ret
3152 |
3153 |//-----------------------------------------------------------------------
3062 |//-- Miscellaneous functions -------------------------------------------- 3154 |//-- Miscellaneous functions --------------------------------------------
3063 |//----------------------------------------------------------------------- 3155 |//-----------------------------------------------------------------------
3064 | 3156 |