aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2025-11-27 17:45:17 +0100
committerMike Pall <mike>2025-11-27 17:45:17 +0100
commitf80b349d5490aa289b2925d297f3f3c618977570 (patch)
tree8d8fb0d2beb3e863592139d603ada63e5aa6ce77
parent3215838aa744d148e79a8ea0bd7c014e984302cb (diff)
downloadluajit-f80b349d5490aa289b2925d297f3f3c618977570.tar.gz
luajit-f80b349d5490aa289b2925d297f3f3c618977570.tar.bz2
luajit-f80b349d5490aa289b2925d297f3f3c618977570.zip
Unify Lua number to FFI integer conversions.
Phew. #1411
-rw-r--r--doc/ext_ffi_semantics.html56
-rw-r--r--src/lib_io.c7
-rw-r--r--src/lib_os.c8
-rw-r--r--src/lj_api.c24
-rw-r--r--src/lj_asm.c21
-rw-r--r--src/lj_asm_arm.h5
-rw-r--r--src/lj_asm_arm64.h20
-rw-r--r--src/lj_asm_mips.h79
-rw-r--r--src/lj_asm_ppc.h27
-rw-r--r--src/lj_asm_x86.h45
-rw-r--r--src/lj_bcwrite.c11
-rw-r--r--src/lj_cconv.c12
-rw-r--r--src/lj_cdata.c7
-rw-r--r--src/lj_crecord.c36
-rw-r--r--src/lj_def.h2
-rw-r--r--src/lj_ffrecord.c4
-rw-r--r--src/lj_ir.c23
-rw-r--r--src/lj_ircall.h32
-rw-r--r--src/lj_jit.h27
-rw-r--r--src/lj_lib.c2
-rw-r--r--src/lj_meta.c3
-rw-r--r--src/lj_obj.h91
-rw-r--r--src/lj_opt_fold.c64
-rw-r--r--src/lj_opt_narrow.c32
-rw-r--r--src/lj_opt_split.c15
-rw-r--r--src/lj_parse.c23
-rw-r--r--src/lj_record.c21
-rw-r--r--src/lj_strfmt.c9
-rw-r--r--src/lj_strscan.c8
-rw-r--r--src/lj_tab.c24
-rw-r--r--src/lj_target_x86.h1
-rw-r--r--src/lj_trace.c30
-rw-r--r--src/lj_vm.h34
-rw-r--r--src/lj_vmmath.c2
-rw-r--r--src/vm_arm.dasc115
-rw-r--r--src/vm_arm64.dasc36
-rw-r--r--src/vm_mips.dasc119
-rw-r--r--src/vm_mips64.dasc142
-rw-r--r--src/vm_ppc.dasc146
-rw-r--r--src/vm_x64.dasc43
-rw-r--r--src/vm_x86.dasc92
41 files changed, 1067 insertions, 431 deletions
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index cd533e8c..cd72da21 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -338,42 +338,44 @@ pointer or type compatibility:
338<tr class="odd"> 338<tr class="odd">
339<td class="convin">Integer</td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr> 339<td class="convin">Integer</td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr>
340<tr class="even"> 340<tr class="even">
341<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> <tt>int32_t</tt> &rarr;<sup>narrow</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt></td></tr> 341<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> <tt>int64_t</tt> &rarr;<sup>narrow</sup> <sup>*</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt>, <tt>(u)int32_t</tt></td></tr>
342<tr class="odd"> 342<tr class="odd">
343<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup></td><td class="convout"><tt>(u)int32_t</tt>, <tt>(u)int64_t</tt></td></tr> 343<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup></td><td class="convout"><tt>int64_t</tt></td></tr>
344<tr class="even"> 344<tr class="even">
345<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> uint64_t &cup; int64_t &rarr;<sup>reinterpret</sup> <sup>*</sup></td><td class="convout"><tt>uint64_t</tt></td></tr>
346<tr class="odd">
345<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr> 347<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr>
346<tr class="odd separate"> 348<tr class="even separate">
347<td class="convin">Number</td><td class="convop">n == 0 &rarr; 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr> 349<td class="convin">Number</td><td class="convop">n == 0 &rarr; 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr>
348<tr class="even"> 350<tr class="odd">
349<td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> &rarr; 0, <tt>true</tt> &rarr; 1</td><td class="convout">Number</td></tr> 351<td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> &rarr; 0, <tt>true</tt> &rarr; 1</td><td class="convout">Number</td></tr>
350<tr class="odd separate"> 352<tr class="even separate">
351<td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr> 353<td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr>
352<tr class="even">
353<td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr>
354<tr class="odd"> 354<tr class="odd">
355<td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr>
356<tr class="even">
355<td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr> 357<td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr>
356<tr class="even separate"> 358<tr class="odd separate">
357<td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr> 359<td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr>
358<tr class="odd"> 360<tr class="even">
359<td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr> 361<td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr>
360<tr class="even separate"> 362<tr class="odd separate">
361<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> 363<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
362<tr class="odd">
363<td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
364<tr class="even"> 364<tr class="even">
365<td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
366<tr class="odd">
365<td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr> 367<td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr>
366<tr class="odd separate"> 368<tr class="even separate">
367<td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr> 369<td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr>
368<tr class="even">
369<td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr>
370<tr class="odd"> 370<tr class="odd">
371<td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr> 371<td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr>
372<tr class="even"> 372<tr class="even">
373<td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr>
374<tr class="odd">
373<td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr> 375<td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr>
374<tr class="odd separate"> 376<tr class="even separate">
375<td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr> 377<td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr>
376<tr class="even"> 378<tr class="odd">
377<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr> 379<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr>
378</table> 380</table>
379<p> 381<p>
@@ -384,6 +386,24 @@ type.
384Conversions not listed above will raise an error. E.g. it's not 386Conversions not listed above will raise an error. E.g. it's not
385possible to convert a pointer to a complex number or vice versa. 387possible to convert a pointer to a complex number or vice versa.
386</p> 388</p>
389<p>
390* Some conversions from <tt>double</tt> have a larger defined range to
391allow for mixed-signedness conversions, which are common in C code.
392E.g. initializing an <tt>int32_t</tt> field with <tt>0xffffffff</tt>
393or initializing an <tt>uint32_t</tt> or <tt>uint64_t</tt> field with
394<tt>-1</tt>. Under strict conversion rules, these assignments would
395give undefined results, since Lua numbers are doubles. The extended
396ranges make these conversions defined. Lua numbers that are even
397outside that range give an architecture-specific result.
398</p>
399<p>
400Please note that doubles do not have the precision to represent the
401whole signed or unsigned 64 bit integer range. Beware of large hex
402constants in particular: e.g. <tt>0xffffffffffffffff</tt> is a double
403rounded up to <tt>0x1p64</tt> during parsing. This will <em>not</em>
404convert to a defined 64 bit integer value. Use the 64 bit literal
405syntax instead, i.e. <tt>0xffffffffffffffffULL</tt>.
406</p>
387 407
388<h3 id="convert_vararg">Conversions for vararg C&nbsp;function arguments</h3> 408<h3 id="convert_vararg">Conversions for vararg C&nbsp;function arguments</h3>
389<p> 409<p>
diff --git a/src/lib_io.c b/src/lib_io.c
index 5659ff51..ec7d2545 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp)
127 lua_Number d; 127 lua_Number d;
128 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { 128 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
129 if (LJ_DUALNUM) { 129 if (LJ_DUALNUM) {
130 int32_t i = lj_num2int(d); 130 int64_t i64;
131 if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) { 131 int32_t i;
132 if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) {
132 setintV(L->top++, i); 133 setintV(L->top++, i);
133 return 1; 134 return 1;
134 } 135 }
@@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek)
335 if (tvisint(o)) 336 if (tvisint(o))
336 ofs = (int64_t)intV(o); 337 ofs = (int64_t)intV(o);
337 else if (tvisnum(o)) 338 else if (tvisnum(o))
338 ofs = (int64_t)numV(o); 339 ofs = lj_num2i64(numV(o));
339 else if (!tvisnil(o)) 340 else if (!tvisnil(o))
340 lj_err_argt(L, 3, LUA_TNUMBER); 341 lj_err_argt(L, 3, LUA_TNUMBER);
341 } 342 }
diff --git a/src/lib_os.c b/src/lib_os.c
index ae3fc857..fffc923e 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d)
171LJLIB_CF(os_date) 171LJLIB_CF(os_date)
172{ 172{
173 const char *s = luaL_optstring(L, 1, "%c"); 173 const char *s = luaL_optstring(L, 1, "%c");
174 time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); 174 time_t t = lua_isnoneornil(L, 2) ? time(NULL) :
175 lj_num2int_type(luaL_checknumber(L, 2), time_t);
175 struct tm *stm; 176 struct tm *stm;
176#if LJ_TARGET_POSIX 177#if LJ_TARGET_POSIX
177 struct tm rtm; 178 struct tm rtm;
@@ -253,8 +254,9 @@ LJLIB_CF(os_time)
253 254
254LJLIB_CF(os_difftime) 255LJLIB_CF(os_difftime)
255{ 256{
256 lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), 257 lua_pushnumber(L,
257 (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); 258 difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t),
259 lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t)));
258 return 1; 260 return 1;
259} 261}
260 262
diff --git a/src/lj_api.c b/src/lj_api.c
index e9fc25b4..94d8bc7e 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
416 return intV(&tmp); 416 return intV(&tmp);
417 n = numV(&tmp); 417 n = numV(&tmp);
418 } 418 }
419#if LJ_64 419 return lj_num2int_type(n, lua_Integer);
420 return (lua_Integer)n;
421#else
422 return lj_num2int(n);
423#endif
424} 420}
425 421
426LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) 422LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
@@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
445 n = numV(&tmp); 441 n = numV(&tmp);
446 } 442 }
447 if (ok) *ok = 1; 443 if (ok) *ok = 1;
448#if LJ_64 444 return lj_num2int_type(n, lua_Integer);
449 return (lua_Integer)n;
450#else
451 return lj_num2int(n);
452#endif
453} 445}
454 446
455LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) 447LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
@@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
468 return (lua_Integer)intV(&tmp); 460 return (lua_Integer)intV(&tmp);
469 n = numV(&tmp); 461 n = numV(&tmp);
470 } 462 }
471#if LJ_64 463 return lj_num2int_type(n, lua_Integer);
472 return (lua_Integer)n;
473#else
474 return lj_num2int(n);
475#endif
476} 464}
477 465
478LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) 466LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
@@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
493 return (lua_Integer)intV(&tmp); 481 return (lua_Integer)intV(&tmp);
494 n = numV(&tmp); 482 n = numV(&tmp);
495 } 483 }
496#if LJ_64 484 return lj_num2int_type(n, lua_Integer);
497 return (lua_Integer)n;
498#else
499 return lj_num2int(n);
500#endif
501} 485}
502 486
503LUA_API int lua_toboolean(lua_State *L, int idx) 487LUA_API int lua_toboolean(lua_State *L, int idx)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 0e888c29..8f7ae9a3 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir)
1329 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1329 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1330 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1330 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1331 IRCallID id; 1331 IRCallID id;
1332 const CCallInfo *ci;
1333#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1334 CCallInfo cim;
1335#endif
1332 IRRef args[2]; 1336 IRRef args[2];
1333 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, 1337 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1334 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); 1338 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1335 args[LJ_BE] = (ir-1)->op1; 1339 args[LJ_BE] = (ir-1)->op1;
1336 args[LJ_LE] = ir->op1; 1340 args[LJ_LE] = ir->op1;
1337 if (st == IRT_NUM || st == IRT_FLOAT) { 1341 lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted");
1338 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); 1342 if (st == IRT_NUM) {
1343 id = IRCALL_lj_vm_num2u64;
1339 ir--; 1344 ir--;
1345 ci = &lj_ir_callinfo[id];
1340 } else { 1346 } else {
1341 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); 1347 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1342 }
1343 {
1344#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP 1348#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1345 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; 1349 cim = lj_ir_callinfo[id];
1346 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ 1350 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1351 ci = &cim;
1347#else 1352#else
1348 const CCallInfo *ci = &lj_ir_callinfo[id]; 1353 ci = &lj_ir_callinfo[id];
1349#endif 1354#endif
1350 asm_setupresult(as, ir, ci);
1351 asm_gencall(as, ci, args);
1352 } 1355 }
1356 asm_setupresult(as, ir, ci);
1357 asm_gencall(as, ci, args);
1353} 1358}
1354#endif 1359#endif
1355 1360
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 406360d2..1ddd2b3e 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
624 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 624 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
625 Reg dest = ra_dest(as, ir, RSET_GPR); 625 Reg dest = ra_dest(as, ir, RSET_GPR);
626 ARMIns ai; 626 ARMIns ai;
627 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
627 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 628 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
628 ai = irt_isint(ir->t) ? 629 ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32;
629 (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
630 (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
631 emit_dm(as, ai, (tmp & 15), (left & 15)); 630 emit_dm(as, ai, (tmp & 15), (left & 15));
632 } 631 }
633 } else 632 } else
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index fdcff1db..507fc084 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir)
648 } else { 648 } else {
649 Reg left = ra_alloc1(as, lref, RSET_FPR); 649 Reg left = ra_alloc1(as, lref, RSET_FPR);
650 Reg dest = ra_dest(as, ir, RSET_GPR); 650 Reg dest = ra_dest(as, ir, RSET_GPR);
651 A64Ins ai = irt_is64(ir->t) ? 651 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
652 (st == IRT_NUM ? 652 if (irt_isu64(ir->t)) {
653 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : 653 emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP);
654 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : 654 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest);
655 (st == IRT_NUM ? 655 emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31));
656 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : 656 emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31));
657 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); 657 } else {
658 emit_dn(as, ai, dest, (left & 31)); 658 A64Ins ai = irt_is64(ir->t) ?
659 (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) :
660 (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32);
661 emit_dn(as, ai, dest, (left & 31));
662 }
659 } 663 }
660 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 664 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
661 Reg dest = ra_dest(as, ir, RSET_GPR); 665 Reg dest = ra_dest(as, ir, RSET_GPR);
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 8dadabe4..36ed5de4 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir)
635 Reg dest = ra_dest(as, ir, RSET_GPR); 635 Reg dest = ra_dest(as, ir, RSET_GPR);
636 Reg left = ra_alloc1(as, lref, RSET_FPR); 636 Reg left = ra_alloc1(as, lref, RSET_FPR);
637 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 637 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
638 if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ 638 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
639 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
640 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
641 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
642 emit_tg(as, MIPSI_MFC1, dest, tmp);
643 emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
644 tmp, tmp);
645 emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
646 tmp, left, tmp);
647 if (st == IRT_FLOAT)
648 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
649 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
650 else
651 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
652 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
653#if LJ_64 639#if LJ_64
654 } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ 640 if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
655 MCLabel l_end; 641 MCLabel l_end = emit_label(as);
656 emit_tg(as, MIPSI_DMFC1, dest, tmp); 642 emit_tg(as, MIPSI_DMFC1, dest, tmp);
657 l_end = emit_label(as); 643 /* For result == INT64_MAX add -2^64 and convert again. */
658 /* For inputs >= 2^63 add -2^64 and convert again. */
659 if (st == IRT_NUM) { 644 if (st == IRT_NUM) {
660 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); 645 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
661 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); 646 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
662 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 647 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
663 (void *)&as->J->k64[LJ_K64_M2P64], 648 (void *)&as->J->k64[LJ_K64_M2P64],
664 rset_exclude(RSET_GPR, dest)); 649 rset_exclude(RSET_GPR, dest)); /* Delay slot. */
665 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ 650 emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */
666#if !LJ_TARGET_MIPSR6 651 emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
667 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 652 emit_ti(as, MIPSI_LI, RID_TMP, -1);
668 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); 653 emit_tg(as, MIPSI_DMFC1, dest, tmp);
669#else 654 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left);
670 emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
671 emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
672#endif
673 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
674 (void *)&as->J->k64[LJ_K64_2P63],
675 rset_exclude(RSET_GPR, dest));
676 } else { 655 } else {
677 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); 656 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
678 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); 657 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
679 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 658 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
680 (void *)&as->J->k32[LJ_K32_M2P64], 659 (void *)&as->J->k32[LJ_K32_M2P64],
681 rset_exclude(RSET_GPR, dest)); 660 rset_exclude(RSET_GPR, dest)); /* Delay slot. */
682 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ 661 emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */
683#if !LJ_TARGET_MIPSR6 662 emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
684 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 663 emit_ti(as, MIPSI_LI, RID_TMP, -1);
685 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); 664 emit_tg(as, MIPSI_DMFC1, dest, tmp);
686#else 665 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left);
687 emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
688 emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
689#endif
690 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
691 (void *)&as->J->k32[LJ_K32_2P63],
692 rset_exclude(RSET_GPR, dest));
693 } 666 }
667 } else
694#endif 668#endif
695 } else { 669 {
696#if LJ_32 670#if LJ_32
697 emit_tg(as, MIPSI_MFC1, dest, tmp); 671 emit_tg(as, MIPSI_MFC1, dest, tmp);
698 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 672 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
@@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
733 "bad type for checked CONV"); 707 "bad type for checked CONV");
734 asm_tointg(as, ir, RID_NONE); 708 asm_tointg(as, ir, RID_NONE);
735 } else { 709 } else {
736 IRCallID cid = irt_is64(ir->t) ? 710 IRCallID cid;
737 ((st == IRT_NUM) ? 711 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
738 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : 712 lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted");
739 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : 713 cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 :
740 ((st == IRT_NUM) ? 714 (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i);
741 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
742 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
743 asm_callid(as, ir, cid); 715 asm_callid(as, ir, cid);
744 } 716 }
745 } else 717 } else
@@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
780 } 752 }
781 } 753 }
782 } else { 754 } else {
783 if (st64 && !(ir->op2 & IRCONV_NONE)) { 755 if (!irt_isu32(ir->t)) { /* Implicit sign extension. */
756 Reg left = ra_alloc1(as, lref, RSET_GPR);
757 emit_dta(as, MIPSI_SLL, dest, left, 0);
758 } else if (st64 && !(ir->op2 & IRCONV_NONE)) {
784 /* This is either a 32 bit reg/reg mov which zeroes the hiword 759 /* This is either a 32 bit reg/reg mov which zeroes the hiword
785 ** or a load of the loword from a 64 bit address. 760 ** or a load of the loword from a 64 bit address.
786 */ 761 */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index d77c45ce..9e2af414 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
512 Reg dest = ra_dest(as, ir, RSET_GPR); 512 Reg dest = ra_dest(as, ir, RSET_GPR);
513 Reg left = ra_alloc1(as, lref, RSET_FPR); 513 Reg left = ra_alloc1(as, lref, RSET_FPR);
514 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 514 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
515 if (irt_isu32(ir->t)) { 515 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
516 /* Convert both x and x-2^31 to int and merge results. */ 516 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
517 Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); 517 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
518 emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ 518 emit_fb(as, PPCI_FCTIWZ, tmp, left);
519 emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
520 emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
521 emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */
522 emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */
523 emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */
524 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
525 emit_tai(as, PPCI_LWZ, dest,
526 RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */
527 emit_fb(as, PPCI_FCTIWZ, tmp, left);
528 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
529 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
530 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
531 emit_lsptr(as, PPCI_LFS, (tmp & 31),
532 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
533 } else {
534 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
535 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
536 emit_fb(as, PPCI_FCTIWZ, tmp, left);
537 }
538 } 519 }
539 } else 520 } else
540#endif 521#endif
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index f3c2238a..bdbce116 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
905 } else { 905 } else {
906 Reg dest = ra_dest(as, ir, RSET_GPR); 906 Reg dest = ra_dest(as, ir, RSET_GPR);
907 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; 907 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
908 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 908 lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
909 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 909#if LJ_64
910 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 910 if (irt_isu64(ir->t)) {
911 /* For the indefinite result -2^63, add -2^64 and convert again. */
911 Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : 912 Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
912 ra_scratch(as, RSET_FPR); 913 ra_scratch(as, RSET_FPR);
913 MCLabel l_end = emit_label(as); 914 MCLabel l_end = emit_label(as);
914 if (LJ_32)
915 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
916 emit_rr(as, op, dest|REX_64, tmp); 915 emit_rr(as, op, dest|REX_64, tmp);
917 if (st == IRT_NUM) 916 if (st == IRT_NUM)
918 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); 917 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]);
919 else 918 else
920 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); 919 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]);
921 emit_sjcc(as, CC_NS, l_end); 920 emit_sjcc(as, CC_NO, l_end);
922 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ 921 emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1);
923 emit_rr(as, op, dest|REX_64, tmp); 922 emit_rr(as, op, dest|REX_64, tmp);
924 ra_left(as, tmp, lref); 923 ra_left(as, tmp, lref);
925 } else { 924
926 if (LJ_64 && irt_isu32(ir->t)) 925 } else
927 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ 926#endif
927 {
928 emit_mrm(as, op, 928 emit_mrm(as, op,
929 dest|((LJ_64 && 929 dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0),
930 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
931 asm_fuseload(as, lref, RSET_FPR)); 930 asm_fuseload(as, lref, RSET_FPR));
932 } 931 }
933 } 932 }
@@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
1020 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1019 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1021 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1020 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1022 Reg lo, hi; 1021 Reg lo, hi;
1022 int usehi = ra_used(ir);
1023 lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); 1023 lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
1024 lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); 1024 lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
1025 hi = ra_dest(as, ir, RSET_GPR); 1025 hi = ra_dest(as, ir, RSET_GPR);
@@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
1032 emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); 1032 emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
1033 } 1033 }
1034 if (dt == IRT_U64) { 1034 if (dt == IRT_U64) {
1035 /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ 1035 /* For the indefinite result -2^63, add -2^64 and convert again. */
1036 MCLabel l_pop, l_end = emit_label(as); 1036 MCLabel l_pop, l_end = emit_label(as);
1037 emit_x87op(as, XI_FPOP); 1037 emit_x87op(as, XI_FPOP);
1038 l_pop = emit_label(as); 1038 l_pop = emit_label(as);
1039 emit_sjmp(as, l_end); 1039 emit_sjmp(as, l_end);
1040 emit_rmro(as, XO_MOV, hi, RID_ESP, 4); 1040 if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
1041 if ((as->flags & JIT_F_SSE3)) 1041 if ((as->flags & JIT_F_SSE3))
1042 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 1042 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
1043 else 1043 else
1044 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); 1044 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
1045 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); 1045 emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]);
1046 emit_sjcc(as, CC_NS, l_pop); 1046 emit_sjcc(as, CC_NE, l_pop);
1047 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ 1047 emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0);
1048 } 1048 emit_sjcc(as, CC_NO, l_pop);
1049 emit_rmro(as, XO_MOV, hi, RID_ESP, 4); 1049 emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1);
1050 usehi = 1;
1051 }
1052 if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
1050 if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ 1053 if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */
1051 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 1054 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
1052 } else { /* Otherwise set FPU rounding mode to truncate before the store. */ 1055 } else { /* Otherwise set FPU rounding mode to truncate before the store. */
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index ec6f13c8..cd7ae942 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
59 p = lj_strfmt_wuleb128(p, intV(o)); 59 p = lj_strfmt_wuleb128(p, intV(o));
60 } else if (tvisnum(o)) { 60 } else if (tvisnum(o)) {
61 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 61 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
62 lua_Number num = numV(o); 62 int64_t i64;
63 int32_t k = lj_num2int(num); 63 int32_t k;
64 if (num == (lua_Number)k) { /* -0 is never a constant. */ 64 if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */
65 *p++ = BCDUMP_KTAB_INT; 65 *p++ = BCDUMP_KTAB_INT;
66 p = lj_strfmt_wuleb128(p, k); 66 p = lj_strfmt_wuleb128(p, k);
67 ctx->sb.w = p; 67 ctx->sb.w = p;
@@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
270 /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ 270 /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
271 if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { 271 if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
272 /* Narrow number constants to integers. */ 272 /* Narrow number constants to integers. */
273 lua_Number num = numV(o); 273 int64_t i64;
274 k = lj_num2int(num); 274 if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */
275 if (num == (lua_Number)k) { /* -0 is never a constant. */
276 save_int: 275 save_int:
277 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); 276 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
278 if (k < 0) 277 if (k < 0)
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 854b51db..2b9349cd 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
197 else goto err_conv; /* NYI: long double. */ 197 else goto err_conv; /* NYI: long double. */
198 /* Then convert double to integer. */ 198 /* Then convert double to integer. */
199 /* The conversion must exactly match the semantics of JIT-compiled code! */ 199 /* The conversion must exactly match the semantics of JIT-compiled code! */
200 if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) { 200 if (dsize < 8) {
201 int32_t i = (int32_t)n; 201 int64_t i = lj_num2i64(n); /* Always convert via int64_t. */
202 if (dsize == 4) *(int32_t *)dp = i; 202 if (dsize == 4) *(int32_t *)dp = i;
203 else if (dsize == 2) *(int16_t *)dp = (int16_t)i; 203 else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
204 else *(int8_t *)dp = (int8_t)i; 204 else *(int8_t *)dp = (int8_t)i;
205 } else if (dsize == 4) {
206 *(uint32_t *)dp = (uint32_t)n;
207 } else if (dsize == 8) { 205 } else if (dsize == 8) {
208 if (!(dinfo & CTF_UNSIGNED)) 206 if ((dinfo & CTF_UNSIGNED))
209 *(int64_t *)dp = (int64_t)n;
210 else
211 *(uint64_t *)dp = lj_num2u64(n); 207 *(uint64_t *)dp = lj_num2u64(n);
208 else
209 *(int64_t *)dp = lj_num2i64(n);
212 } else { 210 } else {
213 goto err_conv; /* NYI: conversion to >64 bit integers. */ 211 goto err_conv; /* NYI: conversion to >64 bit integers. */
214 } 212 }
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 3b48f76c..2dc56a80 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -133,12 +133,7 @@ collect_attrib:
133 idx = (ptrdiff_t)intV(key); 133 idx = (ptrdiff_t)intV(key);
134 goto integer_key; 134 goto integer_key;
135 } else if (tvisnum(key)) { /* Numeric key. */ 135 } else if (tvisnum(key)) { /* Numeric key. */
136#ifdef _MSC_VER 136 idx = lj_num2int_type(numV(key), ptrdiff_t);
137 /* Workaround for MSVC bug. */
138 volatile
139#endif
140 lua_Number n = numV(key);
141 idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
142 integer_key: 137 integer_key:
143 if (ctype_ispointer(ct->info)) { 138 if (ctype_ispointer(ct->info)) {
144 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ 139 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 27f2c1dd..45c559cf 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
445 /* fallthrough */ 445 /* fallthrough */
446 case CCX(I, F): 446 case CCX(I, F):
447 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 447 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
448 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); 448 conv_I_F:
449#if LJ_SOFTFP || LJ_32
450 if (st == IRT_FLOAT) { /* Uncommon. Simplify split backends. */
451 sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0);
452 st = IRT_NUM;
453 }
454#endif
455 if (dsize < 8) {
456 lj_needsplit(J);
457 sp = emitconv(sp, IRT_I64, st, IRCONV_ANY);
458 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0);
459 } else {
460 sp = emitconv(sp, dt, st, IRCONV_ANY);
461 }
449 goto xstore; 462 goto xstore;
450 case CCX(I, P): 463 case CCX(I, P):
451 case CCX(I, A): 464 case CCX(I, A):
@@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
523 goto xstore; 536 goto xstore;
524 case CCX(P, F): 537 case CCX(P, F):
525 if (st == IRT_CDATA) goto err_nyi; 538 if (st == IRT_CDATA) goto err_nyi;
526 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 539 /* The signed 64 bit conversion is cheaper. */
527 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 540 dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32;
528 st, IRCONV_ANY); 541 goto conv_I_F;
529 goto xstore;
530 542
531 /* Destination is an array. */ 543 /* Destination is an array. */
532 case CCX(A, A): 544 case CCX(A, A):
@@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1878 if (J->base[0] && tref_iscdata(J->base[1])) { 1890 if (J->base[0] && tref_iscdata(J->base[1])) {
1879 tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), 1891 tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64),
1880 J->base[1], &rd->argv[1]); 1892 J->base[1], &rd->argv[1]);
1881 if (!tref_isinteger(tsh)) 1893 if (LJ_32 && !tref_isinteger(tsh))
1882 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); 1894 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1883 J->base[1] = tsh; 1895 J->base[1] = tsh;
1884 } 1896 }
@@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1886 if (id) { 1898 if (id) {
1887 TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); 1899 TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
1888 uint32_t op = rd->data; 1900 uint32_t op = rd->data;
1901 IRType t;
1889 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); 1902 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1903 t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh);
1890 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 1904 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1891 !tref_isk(tsh)) 1905 !tref_isk(tsh))
1892 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); 1906 tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63));
1893#ifdef LJ_TARGET_UNIFYROT 1907#ifdef LJ_TARGET_UNIFYROT
1894 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 1908 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1895 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 1909 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1896 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 1910 tsh = emitir(IRT(IR_NEG, t), tsh, tsh);
1897 } 1911 }
1898#endif 1912#endif
1899 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); 1913 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1900 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); 1914 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
diff --git a/src/lj_def.h b/src/lj_def.h
index a9e23729..f34b1a39 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter;
127#define LJ_INLINE inline 127#define LJ_INLINE inline
128#define LJ_AINLINE inline __attribute__((always_inline)) 128#define LJ_AINLINE inline __attribute__((always_inline))
129#define LJ_NOINLINE __attribute__((noinline)) 129#define LJ_NOINLINE __attribute__((noinline))
130#define LJ_CONSTF __attribute__((nothrow,const))
130 131
131#if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) 132#if defined(__ELF__) || defined(__MACH__) || defined(__psp2__)
132#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) 133#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
@@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p)
245#define LJ_INLINE __inline 246#define LJ_INLINE __inline
246#define LJ_AINLINE __forceinline 247#define LJ_AINLINE __forceinline
247#define LJ_NOINLINE __declspec(noinline) 248#define LJ_NOINLINE __declspec(noinline)
249#define LJ_CONSTF __declspec(nothrow noalias)
248#if defined(_M_IX86) 250#if defined(_M_IX86)
249#define LJ_FASTCALL __fastcall 251#define LJ_FASTCALL __fastcall
250#endif 252#endif
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 527b6c06..290986f6 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o)
70{ 70{
71 if (!lj_strscan_numberobj(o)) 71 if (!lj_strscan_numberobj(o))
72 lj_trace_err(J, LJ_TRERR_BADTYPE); 72 lj_trace_err(J, LJ_TRERR_BADTYPE);
73 return tvisint(o) ? intV(o) : lj_num2int(numV(o)); 73 return numberVint(o);
74} 74}
75 75
76/* Get runtime value of string argument. */ 76/* Get runtime value of string argument. */
@@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd)
586 /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ 586 /* Result is integral (or NaN/Inf), but may not fit an int32_t. */
587 if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ 587 if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */
588 lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); 588 lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data);
589 if (n == (lua_Number)lj_num2int(n)) 589 if (lj_num2int_ok(n))
590 tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); 590 tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK);
591 } 591 }
592 J->base[0] = tr; 592 J->base[0] = tr;
diff --git a/src/lj_ir.c b/src/lj_ir.c
index e7a5e8bc..e24fead4 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64)
248 return lj_ir_k64(J, IR_KINT64, u64); 248 return lj_ir_k64(J, IR_KINT64, u64);
249} 249}
250 250
251/* Check whether a number is int and return it. -0 is NOT considered an int. */
252static int numistrueint(lua_Number n, int32_t *kp)
253{
254 int32_t k = lj_num2int(n);
255 if (n == (lua_Number)k) {
256 if (kp) *kp = k;
257 if (k == 0) { /* Special check for -0. */
258 TValue tv;
259 setnumV(&tv, n);
260 if (tv.u32.hi != 0)
261 return 0;
262 }
263 return 1;
264 }
265 return 0;
266}
267
268/* Intern number as int32_t constant if possible, otherwise as FP constant. */ 251/* Intern number as int32_t constant if possible, otherwise as FP constant. */
269TRef lj_ir_knumint(jit_State *J, lua_Number n) 252TRef lj_ir_knumint(jit_State *J, lua_Number n)
270{ 253{
254 int64_t i64;
271 int32_t k; 255 int32_t k;
272 if (numistrueint(n, &k)) 256 TValue tv;
257 setnumV(&tv, n);
258 /* -0 is NOT considered an int. */
259 if (lj_num2int_check(n, i64, k) && !tvismzero(&tv))
273 return lj_ir_kint(J, k); 260 return lj_ir_kint(J, k);
274 else 261 else
275 return lj_ir_knum(J, n); 262 return lj_ir_knum(J, n);
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 5196144e..60b196c6 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -233,20 +233,15 @@ typedef struct CCallInfo {
233 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ 233 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
234 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ 234 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
235 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ 235 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
236 _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
237 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ 236 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
238 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ 237 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
239 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 238 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
240 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 239 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
241 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
242 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ 240 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
243 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ 241 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
244 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ 242 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
245 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ 243 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
246 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ 244 _(FP64_FFI, lj_vm_num2u64, 1, N, U64, XA_FP) \
247 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
248 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
249 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
250 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ 245 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
251 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ 246 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
252 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ 247 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
@@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
291#define softfp_d2i __aeabi_d2iz 286#define softfp_d2i __aeabi_d2iz
292#define softfp_ui2d __aeabi_ui2d 287#define softfp_ui2d __aeabi_ui2d
293#define softfp_f2d __aeabi_f2d 288#define softfp_f2d __aeabi_f2d
294#define softfp_d2ui __aeabi_d2uiz
295#define softfp_d2f __aeabi_d2f 289#define softfp_d2f __aeabi_d2f
296#define softfp_i2f __aeabi_i2f 290#define softfp_i2f __aeabi_i2f
297#define softfp_ui2f __aeabi_ui2f 291#define softfp_ui2f __aeabi_ui2f
298#define softfp_f2i __aeabi_f2iz 292#define softfp_f2i __aeabi_f2iz
299#define softfp_f2ui __aeabi_f2uiz
300#define fp64_l2d __aeabi_l2d 293#define fp64_l2d __aeabi_l2d
301#define fp64_ul2d __aeabi_ul2d 294#define fp64_ul2d __aeabi_ul2d
302#define fp64_l2f __aeabi_l2f 295#define fp64_l2f __aeabi_l2f
303#define fp64_ul2f __aeabi_ul2f 296#define fp64_ul2f __aeabi_ul2f
304#if LJ_TARGET_IOS
305#define fp64_d2l __fixdfdi
306#define fp64_d2ul __fixunsdfdi
307#define fp64_f2l __fixsfdi
308#define fp64_f2ul __fixunssfdi
309#else
310#define fp64_d2l __aeabi_d2lz
311#define fp64_d2ul __aeabi_d2ulz
312#define fp64_f2l __aeabi_f2lz
313#define fp64_f2ul __aeabi_f2ulz
314#endif
315#elif LJ_TARGET_MIPS || LJ_TARGET_PPC 297#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
316#define softfp_add __adddf3 298#define softfp_add __adddf3
317#define softfp_sub __subdf3 299#define softfp_sub __subdf3
@@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
322#define softfp_d2i __fixdfsi 304#define softfp_d2i __fixdfsi
323#define softfp_ui2d __floatunsidf 305#define softfp_ui2d __floatunsidf
324#define softfp_f2d __extendsfdf2 306#define softfp_f2d __extendsfdf2
325#define softfp_d2ui __fixunsdfsi
326#define softfp_d2f __truncdfsf2 307#define softfp_d2f __truncdfsf2
327#define softfp_i2f __floatsisf 308#define softfp_i2f __floatsisf
328#define softfp_ui2f __floatunsisf 309#define softfp_ui2f __floatunsisf
329#define softfp_f2i __fixsfsi 310#define softfp_f2i __fixsfsi
330#define softfp_f2ui __fixunssfsi
331#else 311#else
332#error "Missing soft-float definitions for target architecture" 312#error "Missing soft-float definitions for target architecture"
333#endif 313#endif
@@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a);
341#if LJ_HASFFI 321#if LJ_HASFFI
342extern double softfp_ui2d(uint32_t a); 322extern double softfp_ui2d(uint32_t a);
343extern double softfp_f2d(float a); 323extern double softfp_f2d(float a);
344extern uint32_t softfp_d2ui(double a);
345extern float softfp_d2f(double a); 324extern float softfp_d2f(double a);
346extern float softfp_i2f(int32_t a); 325extern float softfp_i2f(int32_t a);
347extern float softfp_ui2f(uint32_t a); 326extern float softfp_ui2f(uint32_t a);
348extern int32_t softfp_f2i(float a); 327extern int32_t softfp_f2i(float a);
349extern uint32_t softfp_f2ui(float a);
350#endif 328#endif
351#if LJ_TARGET_MIPS 329#if LJ_TARGET_MIPS
352extern double lj_vm_sfmin(double a, double b); 330extern double lj_vm_sfmin(double a, double b);
@@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b);
360#define fp64_ul2d __floatundidf 338#define fp64_ul2d __floatundidf
361#define fp64_l2f __floatdisf 339#define fp64_l2f __floatdisf
362#define fp64_ul2f __floatundisf 340#define fp64_ul2f __floatundisf
363#define fp64_d2l __fixdfdi
364#define fp64_d2ul __fixunsdfdi
365#define fp64_f2l __fixsfdi
366#define fp64_f2ul __fixunssfdi
367#else 341#else
368#error "Missing fp64 helper definitions for this compiler" 342#error "Missing fp64 helper definitions for this compiler"
369#endif 343#endif
@@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a);
374extern double fp64_ul2d(uint64_t a); 348extern double fp64_ul2d(uint64_t a);
375extern float fp64_l2f(int64_t a); 349extern float fp64_l2f(int64_t a);
376extern float fp64_ul2f(uint64_t a); 350extern float fp64_ul2f(uint64_t a);
377extern int64_t fp64_d2l(double a);
378extern uint64_t fp64_d2ul(double a);
379extern int64_t fp64_f2l(float a);
380extern uint64_t fp64_f2ul(float a);
381#endif 351#endif
382 352
383#endif 353#endif
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 05a8e9bb..c0523457 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -350,22 +350,18 @@ enum {
350}; 350};
351 351
352enum { 352enum {
353#if LJ_TARGET_X64 || LJ_TARGET_MIPS64
354 LJ_K64_M2P64, /* -2^64 */
355#endif
353#if LJ_TARGET_X86ORX64 356#if LJ_TARGET_X86ORX64
354 LJ_K64_TOBIT, /* 2^52 + 2^51 */ 357 LJ_K64_TOBIT, /* 2^52 + 2^51 */
355 LJ_K64_2P64, /* 2^64 */ 358 LJ_K64_2P64, /* 2^64 */
356 LJ_K64_M2P64, /* -2^64 */
357#if LJ_32
358 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
359#else
360 LJ_K64_M2P64_31 = LJ_K64_M2P64,
361#endif 359#endif
360#if LJ_TARGET_MIPS64
361 LJ_K64_2P63, /* 2^63 */
362#endif 362#endif
363#if LJ_TARGET_MIPS 363#if LJ_TARGET_MIPS
364 LJ_K64_2P31, /* 2^31 */ 364 LJ_K64_2P31, /* 2^31 */
365#if LJ_64
366 LJ_K64_2P63, /* 2^63 */
367 LJ_K64_M2P64, /* -2^64 */
368#endif
369#endif 365#endif
370#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 366#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
371 LJ_K64_VM_EXIT_HANDLER, 367 LJ_K64_VM_EXIT_HANDLER,
@@ -376,20 +372,19 @@ enum {
376#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) 372#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
377 373
378enum { 374enum {
379#if LJ_TARGET_X86ORX64 375#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
380 LJ_K32_M2P64_31, /* -2^64 or -2^31 */ 376 LJ_K32_M2P64, /* -2^64 */
377#endif
378#if LJ_TARGET_MIPS64
379 LJ_K32_2P63, /* 2^63 */
381#endif 380#endif
382#if LJ_TARGET_PPC 381#if LJ_TARGET_PPC
383 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ 382 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
384 LJ_K32_2P52, /* 2^52 */ 383 LJ_K32_2P52, /* 2^52 */
385#endif 384#endif
386#if LJ_TARGET_PPC || LJ_TARGET_MIPS 385#if LJ_TARGET_PPC
387 LJ_K32_2P31, /* 2^31 */ 386 LJ_K32_2P31, /* 2^31 */
388#endif 387#endif
389#if LJ_TARGET_MIPS64
390 LJ_K32_2P63, /* 2^63 */
391 LJ_K32_M2P64, /* -2^64 */
392#endif
393#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 388#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
394 LJ_K32_VM_EXIT_HANDLER, 389 LJ_K32_VM_EXIT_HANDLER,
395 LJ_K32_VM_EXIT_INTERP, 390 LJ_K32_VM_EXIT_INTERP,
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 88cb2bdd..d51351b8 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
349 ** integer overflow. Overflow detection still works, since all FPUs 349 ** integer overflow. Overflow detection still works, since all FPUs
350 ** return either MININT or MAXINT, which is then out of range. 350 ** return either MININT or MAXINT, which is then out of range.
351 */ 351 */
352 int32_t i = (int32_t)numV(o); 352 int32_t i = lj_num2int(numV(o));
353 if (i >= a && i <= b) return i; 353 if (i >= a && i <= b) return i;
354#if LJ_HASFFI 354#if LJ_HASFFI
355 } else if (tviscdata(o)) { 355 } else if (tviscdata(o)) {
diff --git a/src/lj_meta.c b/src/lj_meta.c
index c9307615..3f30fafb 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
465 if (tvisint(o+i)) { 465 if (tvisint(o+i)) {
466 k[i] = intV(o+i); nint++; 466 k[i] = intV(o+i); nint++;
467 } else { 467 } else {
468 k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); 468 int64_t i64;
469 if (lj_num2int_check(numV(o+i), i64, k[i])) nint++;
469 } 470 }
470 } 471 }
471 if (nint == 3) { /* Narrow to integers. */ 472 if (nint == 3) { /* Narrow to integers. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 73b186e2..58e5049c 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
981 981
982/* -- Number to integer conversion ---------------------------------------- */ 982/* -- Number to integer conversion ---------------------------------------- */
983 983
984#if LJ_SOFTFP 984/*
985LJ_ASMF int32_t lj_vm_tobit(double x); 985** The C standard leaves many aspects of FP to integer conversions as
986#if LJ_TARGET_MIPS64 986** undefined behavior. Portability is a mess, hardware support varies,
987LJ_ASMF int32_t lj_vm_tointg(double x); 987** and modern C compilers are like a box of chocolates -- you never know
988#endif 988** what you're gonna get.
989#endif 989**
990** However, we need 100% matching behavior between the interpreter (asm + C),
991** optimizations (C) and the code generated by the JIT compiler (asm).
992** Mixing Lua numbers with FFI numbers creates some extra requirements.
993**
994** These conversions have been moved to assembler code, even if they seem
995** trivial, to foil unanticipated C compiler 'optimizations' with the
996** surrounding code. Only the unchecked double to int32_t conversion
997** is still in C, because it ought to be pretty safe -- we'll see.
998**
999** These macros also serve to document all places where FP to integer
1000** conversions happen.
1001*/
990 1002
991static LJ_AINLINE int32_t lj_num2bit(lua_Number n) 1003/* Unchecked double to int32_t conversion. */
992{ 1004#define lj_num2int(n) ((int32_t)(n))
993#if LJ_SOFTFP
994 return lj_vm_tobit(n);
995#else
996 TValue o;
997 o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */
998 return (int32_t)o.u32.lo;
999#endif
1000}
1001 1005
1002#define lj_num2int(n) ((int32_t)(n)) 1006/* Unchecked double to arch/os-dependent signed integer type conversion.
1007** This assumes the 32/64-bit signed conversions are NOT range-extended.
1008*/
1009#define lj_num2int_type(n, tp) ((tp)(n))
1003 1010
1004/* 1011/* Convert a double to int32_t and check for exact conversion.
1005** This must match the JIT backend behavior. In particular for archs 1012** Returns the zero-extended int32_t on success. -0 is OK, too.
1006** that don't have a common hardware instruction for this conversion. 1013** Returns 0x8000000080000000LL on failure (simplifies range checks).
1007** Note that signed FP to unsigned int conversions have an undefined
1008** result and should never be relied upon in portable FFI code.
1009** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
1010*/ 1014*/
1011static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) 1015LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
1012{ 1016
1013#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS 1017/* Check for exact conversion only, without storing the result. */
1014 int64_t i = (int64_t)n; 1018#define lj_num2int_ok(x) (lj_vm_num2int_check((x)) >= 0)
1015 if (i < 0) i = (int64_t)(n - 18446744073709551616.0); 1019
1016 return (uint64_t)i; 1020/* Check for exact conversion and conditionally store result.
1017#else 1021** Note: conditions that fail for 0x80000000 may check only the lower
1018 return (uint64_t)n; 1022** 32 bits. This generates good code for both 32 and 64 bit archs.
1019#endif 1023*/
1020} 1024#define lj_num2int_cond(x, i64, i, cond) \
1025 (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0)
1026
1027/* This is the generic check for a full-range int32_t result. */
1028#define lj_num2int_check(x, i64, i) \
1029 lj_num2int_cond((x), i64, i, i64 >= 0)
1030
1031/* Predictable conversion from double to int64_t or uint64_t.
1032** Truncates towards zero. Out-of-range values, NaN and +-Inf return
1033** an arch-dependent result, but do not cause C undefined behavior.
1034** The uint64_t conversion accepts the union of the unsigned + signed range.
1035*/
1036LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
1037LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x);
1038
1039#define lj_num2i64(x) (lj_vm_num2i64((x)))
1040#define lj_num2u64(x) (lj_vm_num2u64((x)))
1041
1042/* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */
1043LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
1044
1045#define lj_num2bit(x) lj_vm_tobit((x))
1021 1046
1022static LJ_AINLINE int32_t numberVint(cTValue *o) 1047static LJ_AINLINE int32_t numberVint(cTValue *o)
1023{ 1048{
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 6fdf4566..456c04b2 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith)
303 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); 303 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
304} 304}
305 305
306/* Forward declaration. */
307static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
308 IROp op);
309
306LJFOLD(ADDOV KINT KINT) 310LJFOLD(ADDOV KINT KINT)
307LJFOLD(SUBOV KINT KINT) 311LJFOLD(SUBOV KINT KINT)
308LJFOLD(MULOV KINT KINT) 312LJFOLD(MULOV KINT KINT)
309LJFOLDF(kfold_intovarith) 313LJFOLDF(kfold_intovarith)
310{ 314{
311 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i, 315 int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i,
312 fins->o - IR_ADDOV); 316 (IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD));
313 int32_t k = lj_num2int(n); 317 return checki32(k) ? INTFOLD(k) : FAILFOLD;
314 if (n != (lua_Number)k)
315 return FAILFOLD;
316 return INTFOLD(k);
317} 318}
318 319
319LJFOLD(BNOT KINT) 320LJFOLD(BNOT KINT)
@@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
368 IROp op) 369 IROp op)
369{ 370{
370 UNUSED(J); 371 UNUSED(J);
371#if LJ_HASFFI
372 switch (op) { 372 switch (op) {
373 case IR_ADD: k1 += k2; break; 373 case IR_ADD: k1 += k2; break;
374 case IR_SUB: k1 -= k2; break; 374 case IR_SUB: k1 -= k2; break;
375 case IR_MUL: k1 *= k2; break; 375 case IR_MUL: k1 *= k2; break;
376#if LJ_HASFFI
376 case IR_BAND: k1 &= k2; break; 377 case IR_BAND: k1 &= k2; break;
377 case IR_BOR: k1 |= k2; break; 378 case IR_BOR: k1 |= k2; break;
378 case IR_BXOR: k1 ^= k2; break; 379 case IR_BXOR: k1 ^= k2; break;
@@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
382 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; 383 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
383 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; 384 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op); break; 385 default: lj_assertJ(0, "bad IR op %d", op); break;
385 }
386#else
387 UNUSED(k2); UNUSED(op);
388 lj_assertJ(0, "FFI IR op without FFI");
389#endif 386#endif
387 }
390 return k1; 388 return k1;
391} 389}
392 390
@@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
883LJFOLDF(kfold_conv_knum_int_num) 881LJFOLDF(kfold_conv_knum_int_num)
884{ 882{
885 lua_Number n = knumleft; 883 lua_Number n = knumleft;
886 int32_t k = lj_num2int(n); 884 if (irt_isguard(fins->t)) {
887 if (irt_isguard(fins->t) && n != (lua_Number)k) { 885 int64_t i64;
886 int32_t k;
887 if (lj_num2int_check(n, i64, k))
888 return INTFOLD(k);
888 /* We're about to create a guard which always fails, like CONV +1.5. 889 /* We're about to create a guard which always fails, like CONV +1.5.
889 ** Some pathological loops cause this during LICM, e.g.: 890 ** Some pathological loops cause this during LICM, e.g.:
890 ** local x,k,t = 0,1.5,{1,[1.5]=2} 891 ** local x,k,t = 0,1.5,{1,[1.5]=2}
@@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num)
892 ** assert(x == 300) 893 ** assert(x == 300)
893 */ 894 */
894 return FAILFOLD; 895 return FAILFOLD;
896 } else {
897 return INTFOLD(lj_num2int(n));
895 } 898 }
896 return INTFOLD(k);
897}
898
899LJFOLD(CONV KNUM IRCONV_U32_NUM)
900LJFOLDF(kfold_conv_knum_u32_num)
901{
902#ifdef _MSC_VER
903 { /* Workaround for MSVC bug. */
904 volatile uint32_t u = (uint32_t)knumleft;
905 return INTFOLD((int32_t)u);
906 }
907#else
908 return INTFOLD((int32_t)(uint32_t)knumleft);
909#endif
910} 899}
911 900
912LJFOLD(CONV KNUM IRCONV_I64_NUM) 901LJFOLD(CONV KNUM IRCONV_I64_NUM)
913LJFOLDF(kfold_conv_knum_i64_num) 902LJFOLDF(kfold_conv_knum_i64_num)
914{ 903{
915 return INT64FOLD((uint64_t)(int64_t)knumleft); 904 return INT64FOLD((uint64_t)lj_num2i64(knumleft));
916} 905}
917 906
918LJFOLD(CONV KNUM IRCONV_U64_NUM) 907LJFOLD(CONV KNUM IRCONV_U64_NUM)
@@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int)
1135} 1124}
1136 1125
1137LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ 1126LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
1138LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */
1139LJFOLDF(simplify_conv_int_num) 1127LJFOLDF(simplify_conv_int_num)
1140{ 1128{
1141 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1129 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
@@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert)
1334 return lj_opt_narrow_convert(J); 1322 return lj_opt_narrow_convert(J);
1335} 1323}
1336 1324
1325LJFOLD(XSTORE any CONV)
1326LJFOLDF(xstore_conv)
1327{
1328#if LJ_64
1329 PHIBARRIER(fright);
1330 if (!irt_is64(fins->t) &&
1331 irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) &&
1332 ((fright->op2&IRCONV_SRCMASK) == IRT_I64 ||
1333 (fright->op2&IRCONV_SRCMASK) == IRT_U64)) {
1334 fins->op2 = fright->op1;
1335 return RETRYFOLD;
1336 }
1337#else
1338 UNUSED(J);
1339#endif
1340 return NEXTFOLD;
1341}
1342
1337/* -- Integer algebraic simplifications ----------------------------------- */ 1343/* -- Integer algebraic simplifications ----------------------------------- */
1338 1344
1339LJFOLD(ADD any KINT) 1345LJFOLD(ADD any KINT)
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 01b5833d..3085c837 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
281 return 0; 281 return 0;
282 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ 282 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
283 lua_Number n = ir_knum(ir)->n; 283 lua_Number n = ir_knum(ir)->n;
284 int64_t i64;
285 int32_t k;
284 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { 286 if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
285 /* Allows a wider range of constants. */ 287 /* Allows a wider range of constants, if const doesn't lose precision. */
286 int64_t k64 = (int64_t)n; 288 if (lj_num2int_check(n, i64, k)) {
287 if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
288 *nc->sp++ = NARROWINS(NARROW_INT, 0);
289 *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
290 return 0;
291 }
292 } else {
293 int32_t k = lj_num2int(n);
294 /* Only if constant is a small integer. */
295 if (checki16(k) && n == (lua_Number)k) {
296 *nc->sp++ = NARROWINS(NARROW_INT, 0); 289 *nc->sp++ = NARROWINS(NARROW_INT, 0);
297 *nc->sp++ = (NarrowIns)k; 290 *nc->sp++ = (NarrowIns)k;
298 return 0; 291 return 0;
299 } 292 }
293 } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) {
294 /* Only if constant is a small integer. */
295 *nc->sp++ = NARROWINS(NARROW_INT, 0);
296 *nc->sp++ = (NarrowIns)k;
297 return 0;
300 } 298 }
301 return 10; /* Never narrow other FP constants (this is rare). */ 299 return 10; /* Never narrow other FP constants (this is rare). */
302 } 300 }
@@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
512 510
513/* -- Narrowing of arithmetic operators ----------------------------------- */ 511/* -- Narrowing of arithmetic operators ----------------------------------- */
514 512
515/* Check whether a number fits into an int32_t (-0 is ok, too). */
516static int numisint(lua_Number n)
517{
518 return (n == (lua_Number)lj_num2int(n));
519}
520
521/* Convert string to number. Error out for non-numeric string values. */ 513/* Convert string to number. Error out for non-numeric string values. */
522static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) 514static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
523{ 515{
@@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
539 /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ 531 /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
540 if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && 532 if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
541 tref_isinteger(rb) && tref_isinteger(rc) && 533 tref_isinteger(rb) && tref_isinteger(rc) &&
542 numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), 534 lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
543 (int)op - (int)IR_ADD))) 535 (int)op - (int)IR_ADD)))
544 return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); 536 return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
545 if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); 537 if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
546 if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); 538 if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
@@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
591static int narrow_forl(jit_State *J, cTValue *o) 583static int narrow_forl(jit_State *J, cTValue *o)
592{ 584{
593 if (tvisint(o)) return 1; 585 if (tvisint(o)) return 1;
594 if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); 586 if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o));
595 return 0; 587 return 0;
596} 588}
597 589
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 8d025911..d29d1eab 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -573,13 +573,9 @@ static void split_ir(jit_State *J)
573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ 573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
575#if LJ_SOFTFP 575#if LJ_SOFTFP
576 lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted");
576 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ 577 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
577 hi = split_call_l(J, hisubst, oir, ir, 578 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64);
578 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
579 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
580 nir->o = IR_CALLN;
581 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
582 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
583 } 579 }
584#else 580#else
585 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ 581 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
@@ -692,8 +688,9 @@ static void split_ir(jit_State *J)
692 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; 688 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
693 } 689 }
694 } else if (st == IRT_FLOAT) { 690 } else if (st == IRT_FLOAT) {
691 lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
695 nir->o = IR_CALLN; 692 nir->o = IR_CALLN;
696 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; 693 nir->op2 = IRCALL_softfp_f2i;
697 } else 694 } else
698#endif 695#endif
699#if LJ_SOFTFP 696#if LJ_SOFTFP
@@ -705,9 +702,7 @@ static void split_ir(jit_State *J)
705 } else { 702 } else {
706 split_call_l(J, hisubst, oir, ir, 703 split_call_l(J, hisubst, oir, ir,
707#if LJ_32 && LJ_HASFFI 704#if LJ_32 && LJ_HASFFI
708 st == IRT_NUM ? 705 st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
709 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
710 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
711#else 706#else
712 IRCALL_softfp_d2i 707 IRCALL_softfp_d2i
713#endif 708#endif
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 181ce4d7..832f6bf4 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
522 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); 522 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv));
523 else 523 else
524#else 524#else
525 lua_Number n = expr_numberV(e); 525 int64_t i64;
526 int32_t k = lj_num2int(n); 526 int32_t k;
527 if (checki16(k) && n == (lua_Number)k) 527 if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64)))
528 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); 528 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k);
529 else 529 else
530#endif 530#endif
@@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2)
782 setnumV(&o, n); 782 setnumV(&o, n);
783 if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ 783 if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */
784 if (LJ_DUALNUM) { 784 if (LJ_DUALNUM) {
785 int32_t k = lj_num2int(n); 785 int64_t i64;
786 if ((lua_Number)k == n) { 786 int32_t k;
787 if (lj_num2int_check(n, i64, k)) {
787 setintV(&e1->u.nval, k); 788 setintV(&e1->u.nval, k);
788 return 1; 789 return 1;
789 } 790 }
@@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
1386 if (tvisnum(&n->key)) { 1387 if (tvisnum(&n->key)) {
1387 TValue *tv = &((TValue *)kptr)[kidx]; 1388 TValue *tv = &((TValue *)kptr)[kidx];
1388 if (LJ_DUALNUM) { 1389 if (LJ_DUALNUM) {
1389 lua_Number nn = numV(&n->key); 1390 int64_t i64;
1390 int32_t k = lj_num2int(nn); 1391 int32_t k;
1391 lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); 1392 lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
1392 if ((lua_Number)k == nn) 1393 if (lj_num2int_check(numV(&n->key), i64, k))
1393 setintV(tv, k); 1394 setintV(tv, k);
1394 else 1395 else
1395 *tv = n->key; 1396 *tv = n->key;
@@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e)
1656 } 1657 }
1657 } 1658 }
1658#else 1659#else
1659 lua_Number n = expr_numberV(e); 1660 int64_t i64;
1660 int32_t k = lj_num2int(n); 1661 int32_t k;
1661 if (checku8(k) && n == (lua_Number)k) { 1662 if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) {
1662 t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ 1663 t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */
1663 return; 1664 return;
1664 } 1665 }
diff --git a/src/lj_record.c b/src/lj_record.c
index 6543f274..536d7171 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
351 } else { 351 } else {
352 cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); 352 cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
353 if (t == IRT_INT) { 353 if (t == IRT_INT) {
354 int32_t k = numberVint(tv); 354 if (tvisint(tv)) {
355 if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ 355 return lj_ir_kint(J, intV(tv));
356 return lj_ir_kint(J, k); 356 } else {
357 int64_t i64;
358 int32_t k;
359 if (lj_num2int_check(numV(tv), i64, k)) /* -0 is ok here. */
360 return lj_ir_kint(J, k);
361 }
357 return 0; /* Type mismatch. */ 362 return 0; /* Type mismatch. */
358 } else { 363 } else {
359 return lj_ir_knum(J, numberVnum(tv)); 364 return lj_ir_knum(J, numberVnum(tv));
@@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
1426 /* Integer keys are looked up in the array part first. */ 1431 /* Integer keys are looked up in the array part first. */
1427 key = ix->key; 1432 key = ix->key;
1428 if (tref_isnumber(key)) { 1433 if (tref_isnumber(key)) {
1429 int32_t k = numberVint(&ix->keyv); 1434 int32_t k;
1430 if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) 1435 if (tvisint(&ix->keyv)) {
1431 k = LJ_MAX_ASIZE; 1436 k = intV(&ix->keyv);
1437 } else {
1438 int64_t i64;
1439 if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE;
1440 }
1432 if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ 1441 if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */
1433 TRef ikey = lj_opt_narrow_index(J, key); 1442 TRef ikey = lj_opt_narrow_index(J, key);
1434 TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); 1443 TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
index bb649fc8..0936298d 100644
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
351/* Add number formatted as signed integer to buffer. */ 351/* Add number formatted as signed integer to buffer. */
352SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) 352SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
353{ 353{
354 int64_t k = (int64_t)n; 354 int64_t k = lj_num2i64(n);
355 if (checki32(k) && sf == STRFMT_INT) 355 if (checki32(k) && sf == STRFMT_INT)
356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ 356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
357 else 357 else
@@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
361/* Add number formatted as unsigned integer to buffer. */ 361/* Add number formatted as unsigned integer to buffer. */
362SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) 362SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
363{ 363{
364 int64_t k; 364 return lj_strfmt_putfxint(sb, sf, lj_num2u64(n));
365 if (n >= 9223372036854775808.0)
366 k = (int64_t)(n - 18446744073709551616.0);
367 else
368 k = (int64_t)n;
369 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
370} 365}
371 366
372/* Format stack arguments to buffer. */ 367/* Format stack arguments to buffer. */
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 502c78e9..fbb959c5 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
523 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); 523 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
524 524
525 /* Try to convert number to integer, if requested. */ 525 /* Try to convert number to integer, if requested. */
526 if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) { 526 if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) {
527 double n = o->n; 527 int64_t tmp;
528 int32_t i = lj_num2int(n); 528 if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o))
529 if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } 529 return STRSCAN_INT;
530 } 530 }
531 return fmt; 531 return fmt;
532 } 532 }
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 62e33611..2959fadb 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins)
295{ 295{
296 lj_assertX(!tvisint(key), "bad integer key"); 296 lj_assertX(!tvisint(key), "bad integer key");
297 if (tvisnum(key)) { 297 if (tvisnum(key)) {
298 lua_Number nk = numV(key); 298 int64_t i64;
299 int32_t k = lj_num2int(nk); 299 int32_t k;
300 if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) { 300 if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) {
301 bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; 301 bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
302 return 1; 302 return 1;
303 } 303 }
@@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
409 if (tv) 409 if (tv)
410 return tv; 410 return tv;
411 } else if (tvisnum(key)) { 411 } else if (tvisnum(key)) {
412 lua_Number nk = numV(key); 412 int64_t i64;
413 int32_t k = lj_num2int(nk); 413 int32_t k;
414 if (nk == (lua_Number)k) { 414 if (lj_num2int_check(numV(key), i64, k)) {
415 cTValue *tv = lj_tab_getint(t, k); 415 cTValue *tv = lj_tab_getint(t, k);
416 if (tv) 416 if (tv)
417 return tv; 417 return tv;
@@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
542 } else if (tvisint(key)) { 542 } else if (tvisint(key)) {
543 return lj_tab_setint(L, t, intV(key)); 543 return lj_tab_setint(L, t, intV(key));
544 } else if (tvisnum(key)) { 544 } else if (tvisnum(key)) {
545 lua_Number nk = numV(key); 545 int64_t i64;
546 int32_t k = lj_num2int(nk); 546 int32_t k;
547 if (nk == (lua_Number)k) 547 if (lj_num2int_check(numV(key), i64, k))
548 return lj_tab_setint(L, t, k); 548 return lj_tab_setint(L, t, k);
549 if (tvisnan(key)) 549 if (tvisnan(key))
550 lj_err_msg(L, LJ_ERR_NANIDX); 550 lj_err_msg(L, LJ_ERR_NANIDX);
@@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
580 setnumV(&tmp, (lua_Number)k); 580 setnumV(&tmp, (lua_Number)k);
581 key = &tmp; 581 key = &tmp;
582 } else if (tvisnum(key)) { 582 } else if (tvisnum(key)) {
583 lua_Number nk = numV(key); 583 int64_t i64;
584 int32_t k = lj_num2int(nk); 584 int32_t k;
585 if ((uint32_t)k < t->asize && nk == (lua_Number)k) 585 if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize))
586 return (uint32_t)k + 1; 586 return (uint32_t)k + 1;
587 } 587 }
588 if (!tvisnil(key)) { 588 if (!tvisnil(key)) {
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index fa32a5d4..193102ee 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -314,6 +314,7 @@ typedef enum {
314 XO_FSTPq = XO_(dd), XOg_FSTPq = 3, 314 XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
315 XO_FISTPq = XO_(df), XOg_FISTPq = 7, 315 XO_FISTPq = XO_(df), XOg_FISTPq = 7,
316 XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, 316 XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1,
317 XO_FADDd = XO_(d8), XOg_FADDd = 0,
317 XO_FADDq = XO_(dc), XOg_FADDq = 0, 318 XO_FADDq = XO_(dc), XOg_FADDq = 0,
318 XO_FLDCW = XO_(d9), XOg_FLDCW = 5, 319 XO_FLDCW = XO_(d9), XOg_FLDCW = 5,
319 XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 320 XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 47d7faa5..ad329540 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g)
317 tv[1].u64 = U64x(80000000,00000000); 317 tv[1].u64 = U64x(80000000,00000000);
318 318
319 /* Initialize 32/64 bit constants. */ 319 /* Initialize 32/64 bit constants. */
320#if LJ_TARGET_X64 || LJ_TARGET_MIPS64
321 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
322#endif
320#if LJ_TARGET_X86ORX64 323#if LJ_TARGET_X86ORX64
321 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); 324 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
322#if LJ_32
323 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
324#endif
325 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); 325 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
326 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
327#endif 326#endif
327#if LJ_TARGET_MIPS64
328 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
329#endif
330#if LJ_TARGET_MIPS
331 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
332#endif
333
328#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 334#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
329 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); 335 J->k32[LJ_K32_M2P64] = 0xdf800000;
336#endif
337#if LJ_TARGET_MIPS64
338 J->k32[LJ_K32_2P63] = 0x5f000000;
330#endif 339#endif
331#if LJ_TARGET_PPC 340#if LJ_TARGET_PPC
332 J->k32[LJ_K32_2P52_2P31] = 0x59800004; 341 J->k32[LJ_K32_2P52_2P31] = 0x59800004;
333 J->k32[LJ_K32_2P52] = 0x59800000; 342 J->k32[LJ_K32_2P52] = 0x59800000;
334#endif 343#endif
335#if LJ_TARGET_PPC || LJ_TARGET_MIPS 344#if LJ_TARGET_PPC
336 J->k32[LJ_K32_2P31] = 0x4f000000; 345 J->k32[LJ_K32_2P31] = 0x4f000000;
337#endif 346#endif
338#if LJ_TARGET_MIPS 347
339 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
340#if LJ_64
341 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
342 J->k32[LJ_K32_2P63] = 0x5f000000;
343 J->k32[LJ_K32_M2P64] = 0xdf800000;
344#endif
345#endif
346#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 348#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
347 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; 349 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
348 J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; 350 J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 9cc42613..96ad2d07 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
37#if LJ_TARGET_PPC 37#if LJ_TARGET_PPC
38void lj_vm_cachesync(void *start, void *end); 38void lj_vm_cachesync(void *start, void *end);
39#endif 39#endif
40LJ_ASMF double lj_vm_foldarith(double x, double y, int op); 40LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op);
41#if LJ_HASJIT 41#if LJ_HASJIT
42LJ_ASMF double lj_vm_foldfpm(double x, int op); 42LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op);
43#endif 43#endif
44#if !LJ_ARCH_HASFPU 44#if LJ_SOFTFP && LJ_TARGET_MIPS64
45/* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ 45LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x);
46#endif 46#endif
47/* Declared in lj_obj.h:
48** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
49** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
50** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x);
51** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
52*/
47 53
48/* Dispatch targets for recording and hooks. */ 54/* Dispatch targets for recording and hooks. */
49LJ_ASMF void lj_vm_record(void); 55LJ_ASMF void lj_vm_record(void);
@@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[];
62#define lj_vm_floor floor 68#define lj_vm_floor floor
63#define lj_vm_ceil ceil 69#define lj_vm_ceil ceil
64#else 70#else
65LJ_ASMF double lj_vm_floor(double); 71LJ_ASMF LJ_CONSTF double lj_vm_floor(double);
66LJ_ASMF double lj_vm_ceil(double); 72LJ_ASMF LJ_CONSTF double lj_vm_ceil(double);
67#if LJ_TARGET_ARM 73#if LJ_TARGET_ARM
68LJ_ASMF double lj_vm_floor_sf(double); 74LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double);
69LJ_ASMF double lj_vm_ceil_sf(double); 75LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double);
70#endif 76#endif
71#endif 77#endif
72#ifdef LUAJIT_NO_LOG2 78#ifdef LUAJIT_NO_LOG2
73LJ_ASMF double lj_vm_log2(double); 79LJ_ASMF LJ_CONSTF double lj_vm_log2(double);
74#else 80#else
75#define lj_vm_log2 log2 81#define lj_vm_log2 log2
76#endif 82#endif
@@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
80 86
81#if LJ_HASJIT 87#if LJ_HASJIT
82#if LJ_TARGET_X86ORX64 88#if LJ_TARGET_X86ORX64
83LJ_ASMF void lj_vm_floor_sse(void); 89LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void);
84LJ_ASMF void lj_vm_ceil_sse(void); 90LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void);
85LJ_ASMF void lj_vm_trunc_sse(void); 91LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void);
86#endif 92#endif
87#if LJ_TARGET_PPC || LJ_TARGET_ARM64 93#if LJ_TARGET_PPC || LJ_TARGET_ARM64
88#define lj_vm_trunc trunc 94#define lj_vm_trunc trunc
89#else 95#else
90LJ_ASMF double lj_vm_trunc(double); 96LJ_ASMF LJ_CONSTF double lj_vm_trunc(double);
91#if LJ_TARGET_ARM 97#if LJ_TARGET_ARM
92LJ_ASMF double lj_vm_trunc_sf(double); 98LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double);
93#endif 99#endif
94#endif 100#endif
95#if LJ_HASFFI 101#if LJ_HASFFI
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 2c9b96cc..1495102f 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op)
59 case IR_NEG - IR_ADD: return -x; break; 59 case IR_NEG - IR_ADD: return -x; break;
60 case IR_ABS - IR_ADD: return fabs(x); break; 60 case IR_ABS - IR_ADD: return fabs(x); break;
61#if LJ_HASJIT 61#if LJ_HASJIT
62 case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; 62 case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break;
63 case IR_MIN - IR_ADD: return x < y ? x : y; break; 63 case IR_MIN - IR_ADD: return x < y ? x : y; break;
64 case IR_MAX - IR_ADD: return x > y ? x : y; break; 64 case IR_MAX - IR_ADD: return x > y ? x : y; break;
65#endif 65#endif
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 86bef0cf..2cd7eedb 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx)
2452 | bx lr 2452 | bx lr
2453 | 2453 |
2454 |//----------------------------------------------------------------------- 2454 |//-----------------------------------------------------------------------
2455 |//-- Number conversion functions ----------------------------------------
2456 |//-----------------------------------------------------------------------
2457 |
2458 |// int64_t lj_vm_num2int_check(double x)
2459 |->vm_num2int_check:
2460 |.if FPU
2461 |.if not HFABI
2462 | vmov d0, CARG1, CARG2
2463 |.endif
2464 | vcvt.s32.f64 s4, d0
2465 | vcvt.f64.s32 d1, s4
2466 | vcmp.f64 d0, d1
2467 | vmrs
2468 | bne >1
2469 | vmov CRET1, s4
2470 | mov CRET2, #0
2471 | bx lr
2472 |
2473 |.else
2474 |
2475 | asr CARG4, CARG2, #31 // sign = 0 or -1.
2476 | lsl CARG2, CARG2, #1
2477 | orrs RB, CARG2, CARG1
2478 | bxeq lr // Return 0 for +-0.
2479 | mov RB, #1024
2480 | add RB, RB, #30
2481 | sub RB, RB, CARG2, lsr #21
2482 | cmp RB, #32
2483 | bhs >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
2484 | lsr CARG3, CARG1, #21
2485 | orr CARG2, CARG3, CARG2, lsl #10 // Left-aligned mantissa.
2486 | rsb CARG3, RB, #32
2487 | lsl CARG3, CARG2, CARG3
2488 | orr CARG2, CARG2, #0x80000000 // Merge leading 1.
2489 | orrs CARG3, CARG3, CARG1, lsl #11
2490 | lsr CARG1, CARG2, RB // lo = right-aligned absolute value.
2491 | bne >1 // Fail if fractional part != 0.
2492 | adds CRET1, CARG1, CARG4
2493 | bmi >1 // Fail if lo+sign >= 0x80000000.
2494 | eor CRET1, CRET1, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign.
2495 | mov CRET2, #0
2496 | bx lr
2497 |.endif
2498 |1:
2499 | mov CRET1, #0x80000000
2500 | mov CRET2, #0x80000000
2501 | bx lr
2502 |
2503 |// int64_t lj_vm_num2i64(double x)
2504 |->vm_num2i64:
2505 |// fallthrough, same as lj_vm_num2u64.
2506 |
2507 |// uint64_t lj_vm_num2u64(double x)
2508 |->vm_num2u64:
2509 |.if HFABI
2510 | vmov CARG1, CARG2, d0
2511 |.endif
2512 | lsl RB, CARG2, #1
2513 | lsr RB, RB, #21
2514 | sub RB, RB, #1020
2515 | sub RB, RB, #3
2516 | cmp RB, #116
2517 | bhs >3 // Exponent out of range.
2518 | asr CARG4, CARG2, #31 // sign = 0 or -1.
2519 | lsl CARG2, CARG2, #12
2520 | lsr CARG2, CARG2, #12
2521 | rsbs RB, RB, #52
2522 | orr CARG2, CARG2, #0x00100000
2523 | bmi >2 // Shift mantissa left or right?
2524 | lsr CARG1, CARG1, RB // 64 bit right shift.
2525 | lsr CARG3, CARG2, RB
2526 | rsb RB, RB, #32
2527 | orr CARG1, CARG1, CARG2, lsl RB
2528 | rsb RB, RB, #0
2529 | orr CARG1, CARG1, CARG2, lsr RB
2530 | adds CRET1, CARG1, CARG4 // m = sign?-m:m = (m+sign)^sign.
2531 | adc CRET2, CARG3, CARG4
2532 |1:
2533 | eor CRET1, CRET1, CARG4
2534 | eor CRET2, CRET2, CARG4
2535 | bx lr
2536 |2:
2537 | rsb RB, RB, #0
2538 | lsl CARG2, CARG2, RB // 64 bit left shift.
2539 | lsl CARG3, CARG1, RB
2540 | sub RB, RB, #32
2541 | orr CARG2, CARG2, CARG1, lsl RB
2542 | rsb RB, RB, #0
2543 | orr CARG2, CARG2, CARG1, lsr RB
2544 | adds CRET1, CARG3, CARG4
2545 | adc CRET2, CARG2, CARG4
2546 | b <1
2547 |3:
2548 | mov CRET1, #0
2549 | mov CRET2, #0
2550 | bx lr
2551 |
2552 |// int32_t lj_vm_tobit(double x)
2553 |.if FPU
2554 |->vm_tobit:
2555 | vldr d1, >9
2556 |.if not HFABI
2557 | vmov d0, CARG1, CARG2
2558 |.endif
2559 | vadd.f64 d0, d0, d1
2560 | vmov CARG1, s0
2561 | bx lr
2562 |9:
2563 | .long 0, 0x43380000 // (double)(2^52 + 2^51).
2564 |.endif
2565 |
2566 |//-----------------------------------------------------------------------
2455 |//-- Miscellaneous functions -------------------------------------------- 2567 |//-- Miscellaneous functions --------------------------------------------
2456 |//----------------------------------------------------------------------- 2568 |//-----------------------------------------------------------------------
2457 | 2569 |
@@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4097 | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] 4209 | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
4098 | // Subsumes ins_next1 and ins_next2. 4210 | // Subsumes ins_next1 and ins_next2.
4099 | ldr INS, TRACE:CARG1->startins 4211 | ldr INS, TRACE:CARG1->startins
4100 | bfi INS, OP, #0, #8 4212 | bic INS, INS, #0xff
4213 | orr INS, INS, OP
4101 | str INS, [PC], #4 4214 | str INS, [PC], #4
4102 | b <1 4215 | b <1
4103 |.endif 4216 |.endif
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index a437b657..eb6d0c2f 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx)
2156 | ret 2156 | ret
2157 | 2157 |
2158 |//----------------------------------------------------------------------- 2158 |//-----------------------------------------------------------------------
2159 |//-- Number conversion functions ----------------------------------------
2160 |//-----------------------------------------------------------------------
2161 |
2162 |// int64_t lj_vm_num2int_check(double x)
2163 |->vm_num2int_check:
2164 | fcvtzs CRET1w, FARG1
2165 | scvtf FARG2, CRET1w
2166 | fcmp FARG2, FARG1
2167 | bne >1
2168 | ret
2169 |1:
2170 | mov CRET1, #0x8000000080000000
2171 | ret
2172 |
2173 |// int64_t lj_vm_num2i64(double x)
2174 |->vm_num2i64:
2175 | fcvtzs CRET1, FARG1
2176 | ret
2177 |
2178 |// uint64_t lj_vm_num2u64(double x)
2179 |->vm_num2u64:
2180 | fcvtzs CRET1, FARG1
2181 | fcvtzu CARG2, FARG1
2182 | cmn CRET1, #1 // Set overflow if CRET1 == INT64_MAX.
2183 | csel CRET1, CRET1, CARG2, vc // No overflow ? i64 : u64.
2184 | ret
2185 |
2186 |// int32_t lj_vm_tobit(double x)
2187 |->vm_tobit:
2188 | movz CRET1, #0x4338, lsl #48 // 2^52 + 2^51.
2189 | fmov FARG2, CRET1
2190 | fadd FARG1, FARG1, FARG2
2191 | fmov CRET1w, s0
2192 | ret
2193 |
2194 |//-----------------------------------------------------------------------
2159 |//-- Miscellaneous functions -------------------------------------------- 2195 |//-- Miscellaneous functions --------------------------------------------
2160 |//----------------------------------------------------------------------- 2196 |//-----------------------------------------------------------------------
2161 | 2197 |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 02e588ee..8a6b8270 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -85,6 +85,7 @@
85| 85|
86|.if FPU 86|.if FPU
87|.define FARG1, f12 87|.define FARG1, f12
88|.define FARG1HI, f13
88|.define FARG2, f14 89|.define FARG2, f14
89| 90|
90|.define FRET1, f0 91|.define FRET1, f0
@@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx)
2560 | mtc1 r0, f4 2561 | mtc1 r0, f4
2561 | mtc1 TMP0, f5 2562 | mtc1 TMP0, f5
2562 | abs.d FRET2, FARG1 // |x| 2563 | abs.d FRET2, FARG1 // |x|
2563 | mfc1 AT, f13 2564 | mfc1 AT, FARG1HI
2564 | c.olt.d 0, FRET2, f4 2565 | c.olt.d 0, FRET2, f4
2565 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 2566 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2566 | bc1f 0, >1 // Truncate only if |x| < 2^52. 2567 | bc1f 0, >1 // Truncate only if |x| < 2^52.
@@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx)
2822 | sfmin_max max, vm_sfcmpogt 2823 | sfmin_max max, vm_sfcmpogt
2823 | 2824 |
2824 |//----------------------------------------------------------------------- 2825 |//-----------------------------------------------------------------------
2826 |//-- Number conversion functions ----------------------------------------
2827 |//-----------------------------------------------------------------------
2828 |
2829 |// int64_t lj_vm_num2int_check(double x)
2830 |->vm_num2int_check:
2831 |.if FPU
2832 | trunc.w.d FARG2, FARG1
2833 | mfc1 SFRETLO, FARG2
2834 | cvt.d.w FARG2, FARG2
2835 | c.eq.d FARG1, FARG2
2836 | bc1f 0, >2
2837 |. nop
2838 | jr ra
2839 |. move SFRETHI, r0
2840 |
2841 |.else
2842 |
2843 | sll SFRETLO, SFARG1HI, 1
2844 | or SFRETHI, SFRETLO, SFARG1LO
2845 | beqz SFRETHI, >1 // Return 0 for +-0.
2846 |. li TMP0, 1054
2847 | srl AT, SFRETLO, 21
2848 | subu TMP0, TMP0, AT
2849 | sltiu AT, TMP0, 32
2850 | beqz AT, >2 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
2851 |. sll SFRETLO, SFARG1HI, 11
2852 | srl SFRETHI, SFARG1LO, 21
2853 | negu TMP1, TMP0
2854 | or SFRETLO, SFRETLO, SFRETHI // Left-aligned mantissa.
2855 | sllv TMP2, SFRETLO, TMP1
2856 | lui AT, 0x8000
2857 | sll SFRETHI, SFARG1LO, 11
2858 | or SFRETLO, SFRETLO, AT // Merge leading 1.
2859 | or TMP2, TMP2, SFRETHI
2860 | srlv SFRETLO, SFRETLO, TMP0 // lo = right-aligned absolute value.
2861 | bnez TMP2, >2 // Fail if fractional part != 0.
2862 |. sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1.
2863 | addu SFRETLO, SFRETLO, SFARG1HI
2864 | bltz SFRETLO, >2 // Fail if lo+sign >= 0x80000000.
2865 |. xor SFRETLO, SFRETLO, SFARG1HI // lo = sign?-lo:lo = (lo+sign)^sign.
2866 |1:
2867 | jr ra
2868 |. move SFRETHI, r0
2869 |.endif
2870 |2: // Not an integer, return 0x8000000080000000LL.
2871 | lui SFRETHI, 0x8000
2872 | jr ra
2873 |. lui SFRETLO, 0x8000
2874 |
2875 |// int64_t lj_vm_num2i64(double x)
2876 |->vm_num2i64:
2877 |// fallthrough, same as lj_vm_num2u64.
2878 |
2879 |// uint64_t lj_vm_num2u64(double x)
2880 |->vm_num2u64:
2881 |.if FPU
2882 | mfc1 SFARG1HI, FARG1HI
2883 | mfc1 SFARG1LO, FARG1
2884 |.endif
2885 | srl TMP0, SFARG1HI, 20
2886 | andi TMP0, TMP0, 0x7ff
2887 | addiu SFRETLO, TMP0, -1023
2888 | sltiu SFRETLO, SFRETLO, 116
2889 | beqz SFRETLO, >3 // Exponent out of range.
2890 |. sll SFRETHI, SFARG1HI, 12
2891 | lui AT, 0x0010
2892 | srl SFRETHI, SFRETHI, 12
2893 | addiu TMP0, TMP0, -1075
2894 | sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1.
2895 | bgez TMP0, >2 // Shift mantissa left or right?
2896 |. or SFRETHI, SFRETHI, AT // Merge leading 1 into masked mantissa.
2897 | subu TMP1, r0, TMP0
2898 | sll AT, SFRETHI, 1
2899 | nor TMP0, r0, TMP1
2900 | srlv SFRETHI, SFRETHI, TMP1 // Shift hi mantissa right for low exp.
2901 | sllv AT, AT, TMP0 // Shifted-out hi mantissa.
2902 | srlv SFRETLO, SFARG1LO, TMP1 // Shift lo mantissa right for low exp.
2903 | andi TMP1, TMP1, 0x20 // Conditional right shift by 32.
2904 | or AT, AT, SFRETLO // Merge into lo mantissa.
2905 | movn AT, SFRETHI, TMP1
2906 | movn SFRETHI, r0, TMP1
2907 |1:
2908 | addu SFRETLO, AT, SFARG1HI // m = sign?-m:m = (m+sign)^sign.
2909 | addu SFRETHI, SFRETHI, SFARG1HI
2910 | sltu TMP0, SFRETLO, AT // Carry
2911 | addu SFRETHI, SFRETHI, TMP0
2912 | xor SFRETLO, SFRETLO, SFARG1HI
2913 | jr ra
2914 |. xor SFRETHI, SFRETHI, SFARG1HI
2915 |2:
2916 | srl TMP2, SFARG1LO, 1
2917 | nor AT, r0, TMP0
2918 | sllv SFRETHI, SFRETHI, TMP0 // Shift hi mantissa left for high exp.
2919 | srlv TMP2, TMP2, AT // Shifted-out lo mantissa.
2920 | sllv AT, SFARG1LO, TMP0 // Shift lo mantissa left for high exp.
2921 | andi TMP0, TMP0, 0x20 // Conditional left shift by 32.
2922 | or SFRETHI, SFRETHI, TMP2 // Merge into hi mantissa.
2923 | movn SFRETHI, AT, TMP0
2924 | b <1
2925 |. movn AT, r0, TMP0
2926 |3:
2927 | jr ra
2928 |. li SFRETHI, 0
2929 |
2930 |// int32_t lj_vm_tobit(double x)
2931 |.if FPU
2932 |->vm_tobit:
2933 | lui AT, 0x59c0 // 2^52 + 2^51 (float).
2934 | mtc1 AT, FARG2
2935 | cvt.d.s FARG2, FARG2
2936 | add.d FARG1, FARG1, FARG2
2937 | jr ra
2938 |. mfc1 CRET1, FARG1
2939 |.endif
2940 |
2941 |//-----------------------------------------------------------------------
2825 |//-- Miscellaneous functions -------------------------------------------- 2942 |//-- Miscellaneous functions --------------------------------------------
2826 |//----------------------------------------------------------------------- 2943 |//-----------------------------------------------------------------------
2827 | 2944 |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 859c0aee..4dc40d8a 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx)
2113 | dinsu CRET2, AT, 21, 21 2113 | dinsu CRET2, AT, 21, 21
2114 | slt AT, CARG1, r0 2114 | slt AT, CARG1, r0
2115 | dsrlv CRET1, CRET2, TMP0 2115 | dsrlv CRET1, CRET2, TMP0
2116 | dsubu CARG1, r0, CRET1 2116 | negu CARG1, CRET1
2117 |.if MIPSR6 2117 |.if MIPSR6
2118 | seleqz CRET1, CRET1, AT 2118 | seleqz CRET1, CRET1, AT
2119 | selnez CARG1, CARG1, AT 2119 | selnez CARG1, CARG1, AT
@@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx)
2121 |.else 2121 |.else
2122 | movn CRET1, CARG1, AT 2122 | movn CRET1, CARG1, AT
2123 |.endif 2123 |.endif
2124 | li CARG1, 64 2124 | negu TMP0, TMP0
2125 | subu TMP0, CARG1, TMP0
2126 | dsllv CRET2, CRET2, TMP0 // Integer check. 2125 | dsllv CRET2, CRET2, TMP0 // Integer check.
2127 | sextw AT, CRET1 2126 | sextw AT, CRET1
2128 | xor AT, CRET1, AT // Range check. 2127 | xor AT, CRET1, AT // Range check.
2129 |.if MIPSR6
2130 | seleqz AT, AT, CRET2
2131 | selnez CRET2, CRET2, CRET2
2132 | jr ra 2128 | jr ra
2133 |. or CRET2, AT, CRET2 2129 |. or CRET2, AT, CRET2
2134 |.else
2135 | jr ra
2136 |. movz CRET2, AT, CRET2
2137 |.endif
2138 |1: 2130 |1:
2139 | jr ra 2131 | jr ra
2140 |. li CRET2, 1 2132 |. li CRET2, 1
@@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx)
2929 | sfmin_max max, vm_sfcmpogt 2921 | sfmin_max max, vm_sfcmpogt
2930 | 2922 |
2931 |//----------------------------------------------------------------------- 2923 |//-----------------------------------------------------------------------
2924 |//-- Number conversion functions ----------------------------------------
2925 |//-----------------------------------------------------------------------
2926 |
2927 |// int64_t lj_vm_num2int_check(double x)
2928 |->vm_num2int_check:
2929 |.if FPU
2930 | trunc.w.d FARG2, FARG1
2931 | mfc1 CRET1, FARG2
2932 | cvt.d.w FARG2, FARG2
2933 |.if MIPSR6
2934 | cmp.eq.d FARG2, FARG1, FARG2
2935 | bc1eqz FARG2, >2
2936 |.else
2937 | c.eq.d FARG1, FARG2
2938 | bc1f 0, >2
2939 |.endif
2940 |. nop
2941 | jr ra
2942 |. zextw CRET1, CRET1
2943 |
2944 |.else
2945 |
2946 | dsll CRET2, CARG1, 1
2947 | beqz CRET2, >1
2948 |. li TMP0, 1076
2949 | dsrl AT, CRET2, 53
2950 | dsubu TMP0, TMP0, AT
2951 | sltiu AT, TMP0, 54
2952 | beqz AT, >2
2953 |. dextm CRET2, CRET2, 0, 20
2954 | dinsu CRET2, AT, 21, 21
2955 | slt AT, CARG1, r0
2956 | dsrlv CRET1, CRET2, TMP0
2957 | negu CARG1, CRET1
2958 |.if MIPSR6
2959 | seleqz CRET1, CRET1, AT
2960 | selnez CARG1, CARG1, AT
2961 | or CRET1, CRET1, CARG1
2962 |.else
2963 | movn CRET1, CARG1, AT
2964 |.endif
2965 | negu TMP0, TMP0
2966 | dsllv CRET2, CRET2, TMP0 // Integer check.
2967 | sextw AT, CRET1
2968 | xor AT, CRET1, AT // Range check.
2969 | or AT, AT, CRET2
2970 | bnez AT, >2
2971 |. nop
2972 | jr ra
2973 |. zextw CRET1, CRET1
2974 |1:
2975 | jr ra
2976 |. move CRET1, r0
2977 |.endif
2978 |2:
2979 | lui CRET1, 0x8000
2980 | dsll CRET1, CRET1, 16
2981 | ori CRET1, CRET1, 0x8000
2982 | jr ra
2983 |. dsll CRET1, CRET1, 16
2984 |
2985 |// int64_t lj_vm_num2i64(double x)
2986 |->vm_num2i64:
2987 |.if FPU
2988 | trunc.l.d FARG1, FARG1
2989 | jr ra
2990 |. dmfc1 CRET1, FARG1
2991 |.else
2992 |// fallthrough, same as lj_vm_num2u64 for soft-float.
2993 |.endif
2994 |
2995 |// uint64_t lj_vm_num2u64(double x)
2996 |->vm_num2u64:
2997 |.if FPU
2998 | trunc.l.d FARG2, FARG1
2999 | dmfc1 CRET1, FARG2
3000 | li AT, -1
3001 | dsrl AT, AT, 1
3002 | beq CRET1, AT, >1
3003 |. lui AT, 0xdf80 // -2^64 (float).
3004 | jr ra
3005 |. nop
3006 |1:
3007 | mtc1 AT, FARG2
3008 | cvt.d.s FARG2, FARG2
3009 | add.d FARG1, FARG1, FARG2
3010 | trunc.l.d FARG2, FARG1
3011 | jr ra
3012 |. dmfc1 CRET1, FARG2
3013 |
3014 |.else
3015 |
3016 | dextu CARG2, CARG1, 20, 10
3017 | addiu AT, CARG2, -1023
3018 | sltiu AT, AT, 116
3019 | beqz AT, >2 // Exponent out of range.
3020 |. addiu CARG2, CARG2, -1075
3021 | dextm CRET1, CARG1, 0, 19
3022 | dsll AT, AT, 52
3023 | dsra CARG1, CARG1, 63 // sign = 0 or -1.
3024 | bgez CARG2, >1 // Shift mantissa left or right?
3025 |. or CRET1, CRET1, AT // Merge leading 1 into masked mantissa.
3026 | subu CARG2, r0, CARG2
3027 | dsrlv CRET1, CRET1, CARG2 // Shift mantissa right for low exp.
3028 | daddu CRET1, CRET1, CARG1
3029 | jr ra
3030 |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign.
3031 |1:
3032 | dsllv CRET1, CRET1, CARG2 // Shift mantissa left for high exp.
3033 | daddu CRET1, CRET1, CARG1
3034 | jr ra
3035 |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign.
3036 |2:
3037 | jr ra
3038 |. move CRET1, r0
3039 |.endif
3040 |
3041 |// int32_t lj_vm_tobit(double x)
3042 |.if FPU
3043 |->vm_tobit:
3044 | lui AT, 0x59c0 // 2^52 + 2^51 (float).
3045 | mtc1 AT, FARG2
3046 | cvt.d.s FARG2, FARG2
3047 | add.d FARG1, FARG1, FARG2
3048 | mfc1 CRET1, FARG1
3049 | jr ra
3050 |. sextw CRET1, CRET1
3051 |.endif
3052 |
3053 |//-----------------------------------------------------------------------
2932 |//-- Miscellaneous functions -------------------------------------------- 3054 |//-- Miscellaneous functions --------------------------------------------
2933 |//----------------------------------------------------------------------- 3055 |//-----------------------------------------------------------------------
2934 | 3056 |
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 2ddeefbf..1761e39b 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx)
3160 | blr 3160 | blr
3161 | 3161 |
3162 |//----------------------------------------------------------------------- 3162 |//-----------------------------------------------------------------------
3163 |//-- Number conversion functions ----------------------------------------
3164 |//-----------------------------------------------------------------------
3165 |
3166 |// int64_t lj_vm_num2int_check(double x)
3167 |->vm_num2int_check:
3168 |.if FPU
3169 | subi sp, sp, 16
3170 | stfd FARG1, 0(sp)
3171 | lwz CARG1, 0(sp)
3172 | lwz CARG2, 4(sp)
3173 |.endif
3174 | slwi TMP1, CARG1, 1
3175 |.if PPE
3176 | or TMP1, TMP1, CARG2
3177 | cmpwi TMP1, 0
3178 |.else
3179 | or. TMP1, TMP1, CARG2
3180 |.endif
3181 | beq >2 // Return 0 for +-0.
3182 | rlwinm RB, CARG1, 12, 21, 31
3183 | subfic RB, RB, 1054
3184 | cmplwi RB, 32
3185 | bge >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32.
3186 | slwi CARG3, CARG1, 11
3187 | rlwimi CARG3, CARG2, 11, 21, 31 // Left-aligned mantissa.
3188 | subfic TMP1, RB, 32
3189 | slw TMP1, CARG3, TMP1
3190 | slwi TMP2, CARG2, 11
3191 |.if PPE
3192 | or. TMP1, TMP1, TMP2
3193 |.else
3194 | or TMP1, TMP1, TMP2
3195 | cmpwi TMP1, 0
3196 |.endif
3197 | bne >1 // Fail if fractional part != 0.
3198 | oris CARG3, CARG3, 0x8000 // Merge leading 1.
3199 | srw CRET2, CARG3, RB // lo = right-aligned absolute value.
3200 | srawi CARG4, CARG1, 31 // sign = 0 or -1.
3201 |.if GPR64
3202 | add CRET2, CRET2, CARG4
3203 | cmpwi CRET2, 0
3204 |.else
3205 | add. CRET2, CRET2, CARG4
3206 |.endif
3207 | blt >1 // Fail if fractional part != 0.
3208 | xor CRET2, CRET2, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign.
3209 |2:
3210 |.if GPR64
3211 | rldicl CRET1, CRET1, 0, 32
3212 |.else
3213 | li CRET1, 0
3214 |.endif
3215 |.if FPU
3216 | addi sp, sp, 16
3217 |.endif
3218 | blr
3219 |1:
3220 |.if GPR64
3221 | lus CRET1, 0x8000
3222 | rldicr CRET1, CRET1, 32, 32
3223 |.else
3224 | lus CRET1, 0x8000
3225 | lus CRET2, 0x8000
3226 |.endif
3227 |.if FPU
3228 | addi sp, sp, 16
3229 |.endif
3230 | blr
3231 |
3232 |// int64_t lj_vm_num2i64(double x)
3233 |->vm_num2i64:
3234 |// fallthrough, same as lj_vm_num2u64.
3235 |
3236 |// uint64_t lj_vm_num2u64(double x)
3237 |->vm_num2u64:
3238 |.if FPU
3239 | subi sp, sp, 16
3240 | stfd FARG1, 0(sp)
3241 | lwz CARG1, 0(sp)
3242 | lwz CARG2, 4(sp)
3243 |.endif
3244 | rlwinm RB, CARG1, 12, 21, 31
3245 | addi RB, RB, -1023
3246 | cmplwi RB, 116
3247 | bge >3 // Exponent out of range.
3248 | srawi CARG4, CARG1, 31 // sign = 0 or -1.
3249 | clrlwi CARG1, CARG1, 12
3250 | subfic RB, RB, 52
3251 | oris CARG1, CARG1, 0x0010
3252 | cmpwi RB, 0
3253 | blt >2 // Shift mantissa left or right?
3254 | subfic TMP1, RB, 32 // 64 bit right shift.
3255 | srw CARG2, CARG2, RB
3256 | slw TMP2, CARG1, TMP1
3257 | addi TMP1, RB, -32
3258 | or CARG2, CARG2, TMP2
3259 | srw TMP2, CARG1, TMP1
3260 | or CARG2, CARG2, TMP2
3261 | srw CARG1, CARG1, RB
3262 |1:
3263 | addc CARG2, CARG2, CARG4
3264 | adde CARG1, CARG1, CARG4
3265 | xor CRET2, CARG2, CARG4
3266 | xor CRET1, CARG1, CARG4
3267 |.if GPR64
3268 | rldimi CRET2, CRET1, 0, 32
3269 | mr CRET1, CRET2
3270 |.endif
3271 | addi sp, sp, 16
3272 | blr
3273 |2:
3274 | subfic TMP1, RB, 0 // 64 bit left shift.
3275 | addi RB, RB, -32
3276 | slw CARG1, CARG1, TMP1
3277 | srw TMP2, CARG2, RB
3278 | addi RB, TMP1, -32
3279 | or CARG1, CARG1, TMP2
3280 | slw TMP2, CARG2, RB
3281 | or CARG1, CARG1, TMP2
3282 | slw CARG2, CARG2, TMP1
3283 | b <1
3284 |3:
3285 | li CRET1, 0
3286 |.if not GPR64
3287 | li CRET2, 0
3288 |.endif
3289 |.if FPU
3290 | addi sp, sp, 16
3291 |.endif
3292 | blr
3293 |
3294 |// int32_t lj_vm_tobit(double x)
3295 |.if FPU
3296 |->vm_tobit:
3297 | lus TMP0, 0x59c0 // 2^52 + 2^51 (float).
3298 | subi sp, sp, 16
3299 | stw TMP0, 0(sp)
3300 | lfs FARG2, 0(sp)
3301 | fadd FARG1, FARG1, FARG2
3302 | stfd FARG1, 0(sp)
3303 | lwz CRET1, 4(sp)
3304 | addi sp, sp, 16
3305 | blr
3306 |.endif
3307 |
3308 |//-----------------------------------------------------------------------
3163 |//-- Miscellaneous functions -------------------------------------------- 3309 |//-- Miscellaneous functions --------------------------------------------
3164 |//----------------------------------------------------------------------- 3310 |//-----------------------------------------------------------------------
3165 | 3311 |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 4cfb7b6a..970e8e43 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx)
2625 | ret 2625 | ret
2626 | 2626 |
2627 |//----------------------------------------------------------------------- 2627 |//-----------------------------------------------------------------------
2628 |//-- Number conversion functions ----------------------------------------
2629 |//-----------------------------------------------------------------------
2630 |
2631 |// int64_t lj_vm_num2int_check(double x)
2632 |->vm_num2int_check:
2633 | cvttsd2si eax, xmm0
2634 | xorps xmm1, xmm1
2635 | cvtsi2sd xmm1, eax
2636 | ucomisd xmm1, xmm0
2637 | jp >1
2638 | jne >1
2639 | ret
2640 |1:
2641 | mov64 rax, U64x(80000000,80000000)
2642 | ret
2643 |
2644 |// int64_t lj_vm_num2i64(double x)
2645 |->vm_num2i64:
2646 | cvttsd2si rax, xmm0
2647 | ret
2648 |
2649 |// uint64_t lj_vm_num2u64(double x)
2650 |->vm_num2u64:
2651 | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range.
2652 | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow.
2653 | jo >1
2654 | ret
2655 |1:
2656 | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double).
2657 | movd xmm1, rdx
2658 | addsd xmm0, xmm1
2659 | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range.
2660 | // Note that -0x1p63 converts to -0x8000000000000000LL either way.
2661 | ret
2662 |
2663 |// int32_t lj_vm_tobit(double x)
2664 |->vm_tobit:
2665 | sseconst_tobit xmm1, RC
2666 | addsd xmm0, xmm1
2667 | movd eax, xmm0
2668 | ret
2669 |
2670 |//-----------------------------------------------------------------------
2628 |//-- Miscellaneous functions -------------------------------------------- 2671 |//-- Miscellaneous functions --------------------------------------------
2629 |//----------------------------------------------------------------------- 2672 |//-----------------------------------------------------------------------
2630 | 2673 |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 77c4069d..485ed809 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx)
3059 | ret 3059 | ret
3060 | 3060 |
3061 |//----------------------------------------------------------------------- 3061 |//-----------------------------------------------------------------------
3062 |//-- Number conversion functions ----------------------------------------
3063 |//-----------------------------------------------------------------------
3064 |
3065 |// int64_t lj_vm_num2int_check(double x)
3066 |->vm_num2int_check:
3067 |.if not X64
3068 | movsd xmm0, qword [esp+4]
3069 |.endif
3070 | cvttsd2si eax, xmm0
3071 | xorps xmm1, xmm1
3072 | cvtsi2sd xmm1, eax
3073 | ucomisd xmm1, xmm0
3074 | jp >1
3075 | jne >1
3076 |.if not X64
3077 | xor edx, edx
3078 |.endif
3079 | ret
3080 |1:
3081 |.if X64
3082 | mov64 rax, U64x(80000000,80000000)
3083 |.else
3084 | mov eax, 0x80000000
3085 | mov edx, eax
3086 |.endif
3087 | ret
3088 |
3089 |// int64_t lj_vm_num2i64(double x)
3090 |->vm_num2i64:
3091 |.if X64
3092 | cvttsd2si rax, xmm0
3093 | ret
3094 |.else
3095 | sub esp, 12
3096 | fld qword [esp+16]
3097 | fisttp qword [esp]
3098 | mov eax, dword [esp]
3099 | mov edx, dword [esp+4]
3100 | add esp, 12
3101 | ret
3102 |.endif
3103 |
3104 |// uint64_t lj_vm_num2u64(double x)
3105 |->vm_num2u64:
3106 |.if X64
3107 | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range.
3108 | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow.
3109 | jo >1
3110 | ret
3111 |1:
3112 | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double).
3113 | movd xmm1, rdx
3114 | addsd xmm0, xmm1
3115 | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range.
3116 | // Note that -0x1p63 converts to -0x8000000000000000LL either way.
3117 | ret
3118 |.else
3119 | sub esp, 12
3120 | fld qword [esp+16]
3121 | fld st0
3122 | fisttp qword [esp]
3123 | mov edx, dword [esp+4]
3124 | mov eax, dword [esp]
3125 | cmp edx, 1
3126 | jo >2
3127 |1:
3128 | fpop
3129 | add esp, 12
3130 | ret
3131 |2:
3132 | cmp eax, 0
3133 | jne <1
3134 | mov dword [esp+8], 0xdf800000 // -0x1p64 (float).
3135 | fadd dword [esp+8]
3136 | fisttp qword [esp]
3137 | mov eax, dword [esp]
3138 | mov edx, dword [esp+4]
3139 | add esp, 12
3140 | ret
3141 |.endif
3142 |
3143 |// int32_t lj_vm_tobit(double x)
3144 |->vm_tobit:
3145 |.if not X64
3146 | movsd xmm0, qword [esp+4]
3147 |.endif
3148 | sseconst_tobit xmm1, RCa
3149 | addsd xmm0, xmm1
3150 | movd eax, xmm0
3151 | ret
3152 |
3153 |//-----------------------------------------------------------------------
3062 |//-- Miscellaneous functions -------------------------------------------- 3154 |//-- Miscellaneous functions --------------------------------------------
3063 |//----------------------------------------------------------------------- 3155 |//-----------------------------------------------------------------------
3064 | 3156 |