diff options
| author | Mike Pall <mike> | 2025-11-27 17:45:17 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2025-11-27 17:45:17 +0100 |
| commit | f80b349d5490aa289b2925d297f3f3c618977570 (patch) | |
| tree | 8d8fb0d2beb3e863592139d603ada63e5aa6ce77 | |
| parent | 3215838aa744d148e79a8ea0bd7c014e984302cb (diff) | |
| download | luajit-f80b349d5490aa289b2925d297f3f3c618977570.tar.gz luajit-f80b349d5490aa289b2925d297f3f3c618977570.tar.bz2 luajit-f80b349d5490aa289b2925d297f3f3c618977570.zip | |
Unify Lua number to FFI integer conversions.
Phew. #1411
41 files changed, 1067 insertions, 431 deletions
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index cd533e8c..cd72da21 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html | |||
| @@ -338,42 +338,44 @@ pointer or type compatibility: | |||
| 338 | <tr class="odd"> | 338 | <tr class="odd"> |
| 339 | <td class="convin">Integer</td><td class="convop">→<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr> | 339 | <td class="convin">Integer</td><td class="convop">→<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr> |
| 340 | <tr class="even"> | 340 | <tr class="even"> |
| 341 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup> <tt>int32_t</tt> →<sup>narrow</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt></td></tr> | 341 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup> <tt>int64_t</tt> →<sup>narrow</sup> <sup>*</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt>, <tt>(u)int32_t</tt></td></tr> |
| 342 | <tr class="odd"> | 342 | <tr class="odd"> |
| 343 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup></td><td class="convout"><tt>(u)int32_t</tt>, <tt>(u)int64_t</tt></td></tr> | 343 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup></td><td class="convout"><tt>int64_t</tt></td></tr> |
| 344 | <tr class="even"> | 344 | <tr class="even"> |
| 345 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup> uint64_t ∪ int64_t →<sup>reinterpret</sup> <sup>*</sup></td><td class="convout"><tt>uint64_t</tt></td></tr> | ||
| 346 | <tr class="odd"> | ||
| 345 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr> | 347 | <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr> |
| 346 | <tr class="odd separate"> | 348 | <tr class="even separate"> |
| 347 | <td class="convin">Number</td><td class="convop">n == 0 → 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr> | 349 | <td class="convin">Number</td><td class="convop">n == 0 → 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr> |
| 348 | <tr class="even"> | 350 | <tr class="odd"> |
| 349 | <td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> → 0, <tt>true</tt> → 1</td><td class="convout">Number</td></tr> | 351 | <td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> → 0, <tt>true</tt> → 1</td><td class="convout">Number</td></tr> |
| 350 | <tr class="odd separate"> | 352 | <tr class="even separate"> |
| 351 | <td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr> | 353 | <td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr> |
| 352 | <tr class="even"> | ||
| 353 | <td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr> | ||
| 354 | <tr class="odd"> | 354 | <tr class="odd"> |
| 355 | <td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr> | ||
| 356 | <tr class="even"> | ||
| 355 | <td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr> | 357 | <td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr> |
| 356 | <tr class="even separate"> | 358 | <tr class="odd separate"> |
| 357 | <td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr> | 359 | <td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr> |
| 358 | <tr class="odd"> | 360 | <tr class="even"> |
| 359 | <td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr> | 361 | <td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr> |
| 360 | <tr class="even separate"> | 362 | <tr class="odd separate"> |
| 361 | <td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> | 363 | <td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> |
| 362 | <tr class="odd"> | ||
| 363 | <td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> | ||
| 364 | <tr class="even"> | 364 | <tr class="even"> |
| 365 | <td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> | ||
| 366 | <tr class="odd"> | ||
| 365 | <td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr> | 367 | <td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr> |
| 366 | <tr class="odd separate"> | 368 | <tr class="even separate"> |
| 367 | <td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr> | 369 | <td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr> |
| 368 | <tr class="even"> | ||
| 369 | <td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr> | ||
| 370 | <tr class="odd"> | 370 | <tr class="odd"> |
| 371 | <td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr> | 371 | <td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr> |
| 372 | <tr class="even"> | 372 | <tr class="even"> |
| 373 | <td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr> | ||
| 374 | <tr class="odd"> | ||
| 373 | <td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr> | 375 | <td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr> |
| 374 | <tr class="odd separate"> | 376 | <tr class="even separate"> |
| 375 | <td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr> | 377 | <td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr> |
| 376 | <tr class="even"> | 378 | <tr class="odd"> |
| 377 | <td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr> | 379 | <td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr> |
| 378 | </table> | 380 | </table> |
| 379 | <p> | 381 | <p> |
| @@ -384,6 +386,24 @@ type. | |||
| 384 | Conversions not listed above will raise an error. E.g. it's not | 386 | Conversions not listed above will raise an error. E.g. it's not |
| 385 | possible to convert a pointer to a complex number or vice versa. | 387 | possible to convert a pointer to a complex number or vice versa. |
| 386 | </p> | 388 | </p> |
| 389 | <p> | ||
| 390 | * Some conversions from <tt>double</tt> have a larger defined range to | ||
| 391 | allow for mixed-signedness conversions, which are common in C code. | ||
| 392 | E.g. initializing an <tt>int32_t</tt> field with <tt>0xffffffff</tt> | ||
| 393 | or initializing an <tt>uint32_t</tt> or <tt>uint64_t</tt> field with | ||
| 394 | <tt>-1</tt>. Under strict conversion rules, these assignments would | ||
| 395 | give undefined results, since Lua numbers are doubles. The extended | ||
| 396 | ranges make these conversions defined. Lua numbers that are even | ||
| 397 | outside that range give an architecture-specific result. | ||
| 398 | </p> | ||
| 399 | <p> | ||
| 400 | Please note that doubles do not have the precision to represent the | ||
| 401 | whole signed or unsigned 64 bit integer range. Beware of large hex | ||
| 402 | constants in particular: e.g. <tt>0xffffffffffffffff</tt> is a double | ||
| 403 | rounded up to <tt>0x1p64</tt> during parsing. This will <em>not</em> | ||
| 404 | convert to a defined 64 bit integer value. Use the 64 bit literal | ||
| 405 | syntax instead, i.e. <tt>0xffffffffffffffffULL</tt>. | ||
| 406 | </p> | ||
| 387 | 407 | ||
| 388 | <h3 id="convert_vararg">Conversions for vararg C function arguments</h3> | 408 | <h3 id="convert_vararg">Conversions for vararg C function arguments</h3> |
| 389 | <p> | 409 | <p> |
diff --git a/src/lib_io.c b/src/lib_io.c index 5659ff51..ec7d2545 100644 --- a/src/lib_io.c +++ b/src/lib_io.c | |||
| @@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp) | |||
| 127 | lua_Number d; | 127 | lua_Number d; |
| 128 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { | 128 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { |
| 129 | if (LJ_DUALNUM) { | 129 | if (LJ_DUALNUM) { |
| 130 | int32_t i = lj_num2int(d); | 130 | int64_t i64; |
| 131 | if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) { | 131 | int32_t i; |
| 132 | if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) { | ||
| 132 | setintV(L->top++, i); | 133 | setintV(L->top++, i); |
| 133 | return 1; | 134 | return 1; |
| 134 | } | 135 | } |
| @@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek) | |||
| 335 | if (tvisint(o)) | 336 | if (tvisint(o)) |
| 336 | ofs = (int64_t)intV(o); | 337 | ofs = (int64_t)intV(o); |
| 337 | else if (tvisnum(o)) | 338 | else if (tvisnum(o)) |
| 338 | ofs = (int64_t)numV(o); | 339 | ofs = lj_num2i64(numV(o)); |
| 339 | else if (!tvisnil(o)) | 340 | else if (!tvisnil(o)) |
| 340 | lj_err_argt(L, 3, LUA_TNUMBER); | 341 | lj_err_argt(L, 3, LUA_TNUMBER); |
| 341 | } | 342 | } |
diff --git a/src/lib_os.c b/src/lib_os.c index ae3fc857..fffc923e 100644 --- a/src/lib_os.c +++ b/src/lib_os.c | |||
| @@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d) | |||
| 171 | LJLIB_CF(os_date) | 171 | LJLIB_CF(os_date) |
| 172 | { | 172 | { |
| 173 | const char *s = luaL_optstring(L, 1, "%c"); | 173 | const char *s = luaL_optstring(L, 1, "%c"); |
| 174 | time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); | 174 | time_t t = lua_isnoneornil(L, 2) ? time(NULL) : |
| 175 | lj_num2int_type(luaL_checknumber(L, 2), time_t); | ||
| 175 | struct tm *stm; | 176 | struct tm *stm; |
| 176 | #if LJ_TARGET_POSIX | 177 | #if LJ_TARGET_POSIX |
| 177 | struct tm rtm; | 178 | struct tm rtm; |
| @@ -253,8 +254,9 @@ LJLIB_CF(os_time) | |||
| 253 | 254 | ||
| 254 | LJLIB_CF(os_difftime) | 255 | LJLIB_CF(os_difftime) |
| 255 | { | 256 | { |
| 256 | lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), | 257 | lua_pushnumber(L, |
| 257 | (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); | 258 | difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t), |
| 259 | lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t))); | ||
| 258 | return 1; | 260 | return 1; |
| 259 | } | 261 | } |
| 260 | 262 | ||
diff --git a/src/lj_api.c b/src/lj_api.c index e9fc25b4..94d8bc7e 100644 --- a/src/lj_api.c +++ b/src/lj_api.c | |||
| @@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) | |||
| 416 | return intV(&tmp); | 416 | return intV(&tmp); |
| 417 | n = numV(&tmp); | 417 | n = numV(&tmp); |
| 418 | } | 418 | } |
| 419 | #if LJ_64 | 419 | return lj_num2int_type(n, lua_Integer); |
| 420 | return (lua_Integer)n; | ||
| 421 | #else | ||
| 422 | return lj_num2int(n); | ||
| 423 | #endif | ||
| 424 | } | 420 | } |
| 425 | 421 | ||
| 426 | LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) | 422 | LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) |
| @@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) | |||
| 445 | n = numV(&tmp); | 441 | n = numV(&tmp); |
| 446 | } | 442 | } |
| 447 | if (ok) *ok = 1; | 443 | if (ok) *ok = 1; |
| 448 | #if LJ_64 | 444 | return lj_num2int_type(n, lua_Integer); |
| 449 | return (lua_Integer)n; | ||
| 450 | #else | ||
| 451 | return lj_num2int(n); | ||
| 452 | #endif | ||
| 453 | } | 445 | } |
| 454 | 446 | ||
| 455 | LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) | 447 | LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) |
| @@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) | |||
| 468 | return (lua_Integer)intV(&tmp); | 460 | return (lua_Integer)intV(&tmp); |
| 469 | n = numV(&tmp); | 461 | n = numV(&tmp); |
| 470 | } | 462 | } |
| 471 | #if LJ_64 | 463 | return lj_num2int_type(n, lua_Integer); |
| 472 | return (lua_Integer)n; | ||
| 473 | #else | ||
| 474 | return lj_num2int(n); | ||
| 475 | #endif | ||
| 476 | } | 464 | } |
| 477 | 465 | ||
| 478 | LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) | 466 | LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) |
| @@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) | |||
| 493 | return (lua_Integer)intV(&tmp); | 481 | return (lua_Integer)intV(&tmp); |
| 494 | n = numV(&tmp); | 482 | n = numV(&tmp); |
| 495 | } | 483 | } |
| 496 | #if LJ_64 | 484 | return lj_num2int_type(n, lua_Integer); |
| 497 | return (lua_Integer)n; | ||
| 498 | #else | ||
| 499 | return lj_num2int(n); | ||
| 500 | #endif | ||
| 501 | } | 485 | } |
| 502 | 486 | ||
| 503 | LUA_API int lua_toboolean(lua_State *L, int idx) | 487 | LUA_API int lua_toboolean(lua_State *L, int idx) |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 0e888c29..8f7ae9a3 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir) | |||
| 1329 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | 1329 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); |
| 1330 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | 1330 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); |
| 1331 | IRCallID id; | 1331 | IRCallID id; |
| 1332 | const CCallInfo *ci; | ||
| 1333 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
| 1334 | CCallInfo cim; | ||
| 1335 | #endif | ||
| 1332 | IRRef args[2]; | 1336 | IRRef args[2]; |
| 1333 | lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, | 1337 | lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, |
| 1334 | "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); | 1338 | "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); |
| 1335 | args[LJ_BE] = (ir-1)->op1; | 1339 | args[LJ_BE] = (ir-1)->op1; |
| 1336 | args[LJ_LE] = ir->op1; | 1340 | args[LJ_LE] = ir->op1; |
| 1337 | if (st == IRT_NUM || st == IRT_FLOAT) { | 1341 | lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted"); |
| 1338 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | 1342 | if (st == IRT_NUM) { |
| 1343 | id = IRCALL_lj_vm_num2u64; | ||
| 1339 | ir--; | 1344 | ir--; |
| 1345 | ci = &lj_ir_callinfo[id]; | ||
| 1340 | } else { | 1346 | } else { |
| 1341 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | 1347 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); |
| 1342 | } | ||
| 1343 | { | ||
| 1344 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | 1348 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP |
| 1345 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | 1349 | cim = lj_ir_callinfo[id]; |
| 1346 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | 1350 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ |
| 1351 | ci = &cim; | ||
| 1347 | #else | 1352 | #else |
| 1348 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1353 | ci = &lj_ir_callinfo[id]; |
| 1349 | #endif | 1354 | #endif |
| 1350 | asm_setupresult(as, ir, ci); | ||
| 1351 | asm_gencall(as, ci, args); | ||
| 1352 | } | 1355 | } |
| 1356 | asm_setupresult(as, ir, ci); | ||
| 1357 | asm_gencall(as, ci, args); | ||
| 1353 | } | 1358 | } |
| 1354 | #endif | 1359 | #endif |
| 1355 | 1360 | ||
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 406360d2..1ddd2b3e 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
| @@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 624 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 624 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
| 625 | Reg dest = ra_dest(as, ir, RSET_GPR); | 625 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 626 | ARMIns ai; | 626 | ARMIns ai; |
| 627 | lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); | ||
| 627 | emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); | 628 | emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); |
| 628 | ai = irt_isint(ir->t) ? | 629 | ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32; |
| 629 | (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : | ||
| 630 | (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); | ||
| 631 | emit_dm(as, ai, (tmp & 15), (left & 15)); | 630 | emit_dm(as, ai, (tmp & 15), (left & 15)); |
| 632 | } | 631 | } |
| 633 | } else | 632 | } else |
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index fdcff1db..507fc084 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
| @@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 648 | } else { | 648 | } else { |
| 649 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 649 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
| 650 | Reg dest = ra_dest(as, ir, RSET_GPR); | 650 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 651 | A64Ins ai = irt_is64(ir->t) ? | 651 | lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); |
| 652 | (st == IRT_NUM ? | 652 | if (irt_isu64(ir->t)) { |
| 653 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : | 653 | emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP); |
| 654 | (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : | 654 | emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest); |
| 655 | (st == IRT_NUM ? | 655 | emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31)); |
| 656 | (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : | 656 | emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31)); |
| 657 | (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); | 657 | } else { |
| 658 | emit_dn(as, ai, dest, (left & 31)); | 658 | A64Ins ai = irt_is64(ir->t) ? |
| 659 | (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) : | ||
| 660 | (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32); | ||
| 661 | emit_dn(as, ai, dest, (left & 31)); | ||
| 662 | } | ||
| 659 | } | 663 | } |
| 660 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 664 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
| 661 | Reg dest = ra_dest(as, ir, RSET_GPR); | 665 | Reg dest = ra_dest(as, ir, RSET_GPR); |
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 8dadabe4..36ed5de4 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
| @@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 635 | Reg dest = ra_dest(as, ir, RSET_GPR); | 635 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 636 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 636 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
| 637 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 637 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
| 638 | if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ | 638 | lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); |
| 639 | /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ | ||
| 640 | emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); | ||
| 641 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); | ||
| 642 | emit_tg(as, MIPSI_MFC1, dest, tmp); | ||
| 643 | emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, | ||
| 644 | tmp, tmp); | ||
| 645 | emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, | ||
| 646 | tmp, left, tmp); | ||
| 647 | if (st == IRT_FLOAT) | ||
| 648 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | ||
| 649 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); | ||
| 650 | else | ||
| 651 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | ||
| 652 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); | ||
| 653 | #if LJ_64 | 639 | #if LJ_64 |
| 654 | } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ | 640 | if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ |
| 655 | MCLabel l_end; | 641 | MCLabel l_end = emit_label(as); |
| 656 | emit_tg(as, MIPSI_DMFC1, dest, tmp); | 642 | emit_tg(as, MIPSI_DMFC1, dest, tmp); |
| 657 | l_end = emit_label(as); | 643 | /* For result == INT64_MAX add -2^64 and convert again. */ |
| 658 | /* For inputs >= 2^63 add -2^64 and convert again. */ | ||
| 659 | if (st == IRT_NUM) { | 644 | if (st == IRT_NUM) { |
| 660 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); | 645 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); |
| 661 | emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); | 646 | emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); |
| 662 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 647 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
| 663 | (void *)&as->J->k64[LJ_K64_M2P64], | 648 | (void *)&as->J->k64[LJ_K64_M2P64], |
| 664 | rset_exclude(RSET_GPR, dest)); | 649 | rset_exclude(RSET_GPR, dest)); /* Delay slot. */ |
| 665 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ | 650 | emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */ |
| 666 | #if !LJ_TARGET_MIPSR6 | 651 | emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1); |
| 667 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | 652 | emit_ti(as, MIPSI_LI, RID_TMP, -1); |
| 668 | emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); | 653 | emit_tg(as, MIPSI_DMFC1, dest, tmp); |
| 669 | #else | 654 | emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); |
| 670 | emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); | ||
| 671 | emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp); | ||
| 672 | #endif | ||
| 673 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | ||
| 674 | (void *)&as->J->k64[LJ_K64_2P63], | ||
| 675 | rset_exclude(RSET_GPR, dest)); | ||
| 676 | } else { | 655 | } else { |
| 677 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); | 656 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); |
| 678 | emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); | 657 | emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); |
| 679 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | 658 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), |
| 680 | (void *)&as->J->k32[LJ_K32_M2P64], | 659 | (void *)&as->J->k32[LJ_K32_M2P64], |
| 681 | rset_exclude(RSET_GPR, dest)); | 660 | rset_exclude(RSET_GPR, dest)); /* Delay slot. */ |
| 682 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ | 661 | emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */ |
| 683 | #if !LJ_TARGET_MIPSR6 | 662 | emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1); |
| 684 | emit_branch(as, MIPSI_BC1T, 0, 0, l_end); | 663 | emit_ti(as, MIPSI_LI, RID_TMP, -1); |
| 685 | emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); | 664 | emit_tg(as, MIPSI_DMFC1, dest, tmp); |
| 686 | #else | 665 | emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); |
| 687 | emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); | ||
| 688 | emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp); | ||
| 689 | #endif | ||
| 690 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | ||
| 691 | (void *)&as->J->k32[LJ_K32_2P63], | ||
| 692 | rset_exclude(RSET_GPR, dest)); | ||
| 693 | } | 666 | } |
| 667 | } else | ||
| 694 | #endif | 668 | #endif |
| 695 | } else { | 669 | { |
| 696 | #if LJ_32 | 670 | #if LJ_32 |
| 697 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 671 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
| 698 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, | 672 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, |
| @@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 733 | "bad type for checked CONV"); | 707 | "bad type for checked CONV"); |
| 734 | asm_tointg(as, ir, RID_NONE); | 708 | asm_tointg(as, ir, RID_NONE); |
| 735 | } else { | 709 | } else { |
| 736 | IRCallID cid = irt_is64(ir->t) ? | 710 | IRCallID cid; |
| 737 | ((st == IRT_NUM) ? | 711 | lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); |
| 738 | (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : | 712 | lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted"); |
| 739 | (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : | 713 | cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 : |
| 740 | ((st == IRT_NUM) ? | 714 | (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i); |
| 741 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : | ||
| 742 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); | ||
| 743 | asm_callid(as, ir, cid); | 715 | asm_callid(as, ir, cid); |
| 744 | } | 716 | } |
| 745 | } else | 717 | } else |
| @@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 780 | } | 752 | } |
| 781 | } | 753 | } |
| 782 | } else { | 754 | } else { |
| 783 | if (st64 && !(ir->op2 & IRCONV_NONE)) { | 755 | if (!irt_isu32(ir->t)) { /* Implicit sign extension. */ |
| 756 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 757 | emit_dta(as, MIPSI_SLL, dest, left, 0); | ||
| 758 | } else if (st64 && !(ir->op2 & IRCONV_NONE)) { | ||
| 784 | /* This is either a 32 bit reg/reg mov which zeroes the hiword | 759 | /* This is either a 32 bit reg/reg mov which zeroes the hiword |
| 785 | ** or a load of the loword from a 64 bit address. | 760 | ** or a load of the loword from a 64 bit address. |
| 786 | */ | 761 | */ |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index d77c45ce..9e2af414 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
| @@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 512 | Reg dest = ra_dest(as, ir, RSET_GPR); | 512 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 513 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 513 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
| 514 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 514 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
| 515 | if (irt_isu32(ir->t)) { | 515 | lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); |
| 516 | /* Convert both x and x-2^31 to int and merge results. */ | 516 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
| 517 | Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); | 517 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
| 518 | emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ | 518 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
| 519 | emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); | ||
| 520 | emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); | ||
| 521 | emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ | ||
| 522 | emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ | ||
| 523 | emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ | ||
| 524 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | ||
| 525 | emit_tai(as, PPCI_LWZ, dest, | ||
| 526 | RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ | ||
| 527 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | ||
| 528 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | ||
| 529 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | ||
| 530 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); | ||
| 531 | emit_lsptr(as, PPCI_LFS, (tmp & 31), | ||
| 532 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); | ||
| 533 | } else { | ||
| 534 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | ||
| 535 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | ||
| 536 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | ||
| 537 | } | ||
| 538 | } | 519 | } |
| 539 | } else | 520 | } else |
| 540 | #endif | 521 | #endif |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index f3c2238a..bdbce116 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 905 | } else { | 905 | } else { |
| 906 | Reg dest = ra_dest(as, ir, RSET_GPR); | 906 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 907 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; | 907 | x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; |
| 908 | if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { | 908 | lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); |
| 909 | /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ | 909 | #if LJ_64 |
| 910 | /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ | 910 | if (irt_isu64(ir->t)) { |
| 911 | /* For the indefinite result -2^63, add -2^64 and convert again. */ | ||
| 911 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : | 912 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
| 912 | ra_scratch(as, RSET_FPR); | 913 | ra_scratch(as, RSET_FPR); |
| 913 | MCLabel l_end = emit_label(as); | 914 | MCLabel l_end = emit_label(as); |
| 914 | if (LJ_32) | ||
| 915 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | ||
| 916 | emit_rr(as, op, dest|REX_64, tmp); | 915 | emit_rr(as, op, dest|REX_64, tmp); |
| 917 | if (st == IRT_NUM) | 916 | if (st == IRT_NUM) |
| 918 | emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); | 917 | emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]); |
| 919 | else | 918 | else |
| 920 | emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); | 919 | emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]); |
| 921 | emit_sjcc(as, CC_NS, l_end); | 920 | emit_sjcc(as, CC_NO, l_end); |
| 922 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ | 921 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1); |
| 923 | emit_rr(as, op, dest|REX_64, tmp); | 922 | emit_rr(as, op, dest|REX_64, tmp); |
| 924 | ra_left(as, tmp, lref); | 923 | ra_left(as, tmp, lref); |
| 925 | } else { | 924 | |
| 926 | if (LJ_64 && irt_isu32(ir->t)) | 925 | } else |
| 927 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ | 926 | #endif |
| 927 | { | ||
| 928 | emit_mrm(as, op, | 928 | emit_mrm(as, op, |
| 929 | dest|((LJ_64 && | 929 | dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0), |
| 930 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | ||
| 931 | asm_fuseload(as, lref, RSET_FPR)); | 930 | asm_fuseload(as, lref, RSET_FPR)); |
| 932 | } | 931 | } |
| 933 | } | 932 | } |
| @@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
| 1020 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | 1019 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); |
| 1021 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | 1020 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); |
| 1022 | Reg lo, hi; | 1021 | Reg lo, hi; |
| 1022 | int usehi = ra_used(ir); | ||
| 1023 | lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); | 1023 | lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); |
| 1024 | lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); | 1024 | lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); |
| 1025 | hi = ra_dest(as, ir, RSET_GPR); | 1025 | hi = ra_dest(as, ir, RSET_GPR); |
| @@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
| 1032 | emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); | 1032 | emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); |
| 1033 | } | 1033 | } |
| 1034 | if (dt == IRT_U64) { | 1034 | if (dt == IRT_U64) { |
| 1035 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | 1035 | /* For the indefinite result -2^63, add -2^64 and convert again. */ |
| 1036 | MCLabel l_pop, l_end = emit_label(as); | 1036 | MCLabel l_pop, l_end = emit_label(as); |
| 1037 | emit_x87op(as, XI_FPOP); | 1037 | emit_x87op(as, XI_FPOP); |
| 1038 | l_pop = emit_label(as); | 1038 | l_pop = emit_label(as); |
| 1039 | emit_sjmp(as, l_end); | 1039 | emit_sjmp(as, l_end); |
| 1040 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | 1040 | if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4); |
| 1041 | if ((as->flags & JIT_F_SSE3)) | 1041 | if ((as->flags & JIT_F_SSE3)) |
| 1042 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | 1042 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); |
| 1043 | else | 1043 | else |
| 1044 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | 1044 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); |
| 1045 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); | 1045 | emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]); |
| 1046 | emit_sjcc(as, CC_NS, l_pop); | 1046 | emit_sjcc(as, CC_NE, l_pop); |
| 1047 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | 1047 | emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0); |
| 1048 | } | 1048 | emit_sjcc(as, CC_NO, l_pop); |
| 1049 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | 1049 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1); |
| 1050 | usehi = 1; | ||
| 1051 | } | ||
| 1052 | if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
| 1050 | if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ | 1053 | if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ |
| 1051 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | 1054 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); |
| 1052 | } else { /* Otherwise set FPU rounding mode to truncate before the store. */ | 1055 | } else { /* Otherwise set FPU rounding mode to truncate before the store. */ |
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index ec6f13c8..cd7ae942 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c | |||
| @@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) | |||
| 59 | p = lj_strfmt_wuleb128(p, intV(o)); | 59 | p = lj_strfmt_wuleb128(p, intV(o)); |
| 60 | } else if (tvisnum(o)) { | 60 | } else if (tvisnum(o)) { |
| 61 | if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ | 61 | if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ |
| 62 | lua_Number num = numV(o); | 62 | int64_t i64; |
| 63 | int32_t k = lj_num2int(num); | 63 | int32_t k; |
| 64 | if (num == (lua_Number)k) { /* -0 is never a constant. */ | 64 | if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */ |
| 65 | *p++ = BCDUMP_KTAB_INT; | 65 | *p++ = BCDUMP_KTAB_INT; |
| 66 | p = lj_strfmt_wuleb128(p, k); | 66 | p = lj_strfmt_wuleb128(p, k); |
| 67 | ctx->sb.w = p; | 67 | ctx->sb.w = p; |
| @@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) | |||
| 270 | /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ | 270 | /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ |
| 271 | if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { | 271 | if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { |
| 272 | /* Narrow number constants to integers. */ | 272 | /* Narrow number constants to integers. */ |
| 273 | lua_Number num = numV(o); | 273 | int64_t i64; |
| 274 | k = lj_num2int(num); | 274 | if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */ |
| 275 | if (num == (lua_Number)k) { /* -0 is never a constant. */ | ||
| 276 | save_int: | 275 | save_int: |
| 277 | p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); | 276 | p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); |
| 278 | if (k < 0) | 277 | if (k < 0) |
diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 854b51db..2b9349cd 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c | |||
| @@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, | |||
| 197 | else goto err_conv; /* NYI: long double. */ | 197 | else goto err_conv; /* NYI: long double. */ |
| 198 | /* Then convert double to integer. */ | 198 | /* Then convert double to integer. */ |
| 199 | /* The conversion must exactly match the semantics of JIT-compiled code! */ | 199 | /* The conversion must exactly match the semantics of JIT-compiled code! */ |
| 200 | if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) { | 200 | if (dsize < 8) { |
| 201 | int32_t i = (int32_t)n; | 201 | int64_t i = lj_num2i64(n); /* Always convert via int64_t. */ |
| 202 | if (dsize == 4) *(int32_t *)dp = i; | 202 | if (dsize == 4) *(int32_t *)dp = i; |
| 203 | else if (dsize == 2) *(int16_t *)dp = (int16_t)i; | 203 | else if (dsize == 2) *(int16_t *)dp = (int16_t)i; |
| 204 | else *(int8_t *)dp = (int8_t)i; | 204 | else *(int8_t *)dp = (int8_t)i; |
| 205 | } else if (dsize == 4) { | ||
| 206 | *(uint32_t *)dp = (uint32_t)n; | ||
| 207 | } else if (dsize == 8) { | 205 | } else if (dsize == 8) { |
| 208 | if (!(dinfo & CTF_UNSIGNED)) | 206 | if ((dinfo & CTF_UNSIGNED)) |
| 209 | *(int64_t *)dp = (int64_t)n; | ||
| 210 | else | ||
| 211 | *(uint64_t *)dp = lj_num2u64(n); | 207 | *(uint64_t *)dp = lj_num2u64(n); |
| 208 | else | ||
| 209 | *(int64_t *)dp = lj_num2i64(n); | ||
| 212 | } else { | 210 | } else { |
| 213 | goto err_conv; /* NYI: conversion to >64 bit integers. */ | 211 | goto err_conv; /* NYI: conversion to >64 bit integers. */ |
| 214 | } | 212 | } |
diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 3b48f76c..2dc56a80 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c | |||
| @@ -133,12 +133,7 @@ collect_attrib: | |||
| 133 | idx = (ptrdiff_t)intV(key); | 133 | idx = (ptrdiff_t)intV(key); |
| 134 | goto integer_key; | 134 | goto integer_key; |
| 135 | } else if (tvisnum(key)) { /* Numeric key. */ | 135 | } else if (tvisnum(key)) { /* Numeric key. */ |
| 136 | #ifdef _MSC_VER | 136 | idx = lj_num2int_type(numV(key), ptrdiff_t); |
| 137 | /* Workaround for MSVC bug. */ | ||
| 138 | volatile | ||
| 139 | #endif | ||
| 140 | lua_Number n = numV(key); | ||
| 141 | idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); | ||
| 142 | integer_key: | 137 | integer_key: |
| 143 | if (ctype_ispointer(ct->info)) { | 138 | if (ctype_ispointer(ct->info)) { |
| 144 | CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ | 139 | CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 27f2c1dd..45c559cf 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
| @@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
| 445 | /* fallthrough */ | 445 | /* fallthrough */ |
| 446 | case CCX(I, F): | 446 | case CCX(I, F): |
| 447 | if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; | 447 | if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; |
| 448 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); | 448 | conv_I_F: |
| 449 | #if LJ_SOFTFP || LJ_32 | ||
| 450 | if (st == IRT_FLOAT) { /* Uncommon. Simplify split backends. */ | ||
| 451 | sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0); | ||
| 452 | st = IRT_NUM; | ||
| 453 | } | ||
| 454 | #endif | ||
| 455 | if (dsize < 8) { | ||
| 456 | lj_needsplit(J); | ||
| 457 | sp = emitconv(sp, IRT_I64, st, IRCONV_ANY); | ||
| 458 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0); | ||
| 459 | } else { | ||
| 460 | sp = emitconv(sp, dt, st, IRCONV_ANY); | ||
| 461 | } | ||
| 449 | goto xstore; | 462 | goto xstore; |
| 450 | case CCX(I, P): | 463 | case CCX(I, P): |
| 451 | case CCX(I, A): | 464 | case CCX(I, A): |
| @@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
| 523 | goto xstore; | 536 | goto xstore; |
| 524 | case CCX(P, F): | 537 | case CCX(P, F): |
| 525 | if (st == IRT_CDATA) goto err_nyi; | 538 | if (st == IRT_CDATA) goto err_nyi; |
| 526 | /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ | 539 | /* The signed 64 bit conversion is cheaper. */ |
| 527 | sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, | 540 | dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32; |
| 528 | st, IRCONV_ANY); | 541 | goto conv_I_F; |
| 529 | goto xstore; | ||
| 530 | 542 | ||
| 531 | /* Destination is an array. */ | 543 | /* Destination is an array. */ |
| 532 | case CCX(A, A): | 544 | case CCX(A, A): |
| @@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) | |||
| 1878 | if (J->base[0] && tref_iscdata(J->base[1])) { | 1890 | if (J->base[0] && tref_iscdata(J->base[1])) { |
| 1879 | tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), | 1891 | tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), |
| 1880 | J->base[1], &rd->argv[1]); | 1892 | J->base[1], &rd->argv[1]); |
| 1881 | if (!tref_isinteger(tsh)) | 1893 | if (LJ_32 && !tref_isinteger(tsh)) |
| 1882 | tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); | 1894 | tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); |
| 1883 | J->base[1] = tsh; | 1895 | J->base[1] = tsh; |
| 1884 | } | 1896 | } |
| @@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) | |||
| 1886 | if (id) { | 1898 | if (id) { |
| 1887 | TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); | 1899 | TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); |
| 1888 | uint32_t op = rd->data; | 1900 | uint32_t op = rd->data; |
| 1901 | IRType t; | ||
| 1889 | if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); | 1902 | if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); |
| 1903 | t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh); | ||
| 1890 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | 1904 | if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && |
| 1891 | !tref_isk(tsh)) | 1905 | !tref_isk(tsh)) |
| 1892 | tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); | 1906 | tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63)); |
| 1893 | #ifdef LJ_TARGET_UNIFYROT | 1907 | #ifdef LJ_TARGET_UNIFYROT |
| 1894 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { | 1908 | if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { |
| 1895 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; | 1909 | op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; |
| 1896 | tsh = emitir(IRTI(IR_NEG), tsh, tsh); | 1910 | tsh = emitir(IRT(IR_NEG, t), tsh, tsh); |
| 1897 | } | 1911 | } |
| 1898 | #endif | 1912 | #endif |
| 1899 | tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); | 1913 | tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); |
| 1900 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); | 1914 | J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); |
diff --git a/src/lj_def.h b/src/lj_def.h index a9e23729..f34b1a39 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
| @@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter; | |||
| 127 | #define LJ_INLINE inline | 127 | #define LJ_INLINE inline |
| 128 | #define LJ_AINLINE inline __attribute__((always_inline)) | 128 | #define LJ_AINLINE inline __attribute__((always_inline)) |
| 129 | #define LJ_NOINLINE __attribute__((noinline)) | 129 | #define LJ_NOINLINE __attribute__((noinline)) |
| 130 | #define LJ_CONSTF __attribute__((nothrow,const)) | ||
| 130 | 131 | ||
| 131 | #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) | 132 | #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) |
| 132 | #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) | 133 | #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) |
| @@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) | |||
| 245 | #define LJ_INLINE __inline | 246 | #define LJ_INLINE __inline |
| 246 | #define LJ_AINLINE __forceinline | 247 | #define LJ_AINLINE __forceinline |
| 247 | #define LJ_NOINLINE __declspec(noinline) | 248 | #define LJ_NOINLINE __declspec(noinline) |
| 249 | #define LJ_CONSTF __declspec(nothrow noalias) | ||
| 248 | #if defined(_M_IX86) | 250 | #if defined(_M_IX86) |
| 249 | #define LJ_FASTCALL __fastcall | 251 | #define LJ_FASTCALL __fastcall |
| 250 | #endif | 252 | #endif |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 527b6c06..290986f6 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
| @@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o) | |||
| 70 | { | 70 | { |
| 71 | if (!lj_strscan_numberobj(o)) | 71 | if (!lj_strscan_numberobj(o)) |
| 72 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 72 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 73 | return tvisint(o) ? intV(o) : lj_num2int(numV(o)); | 73 | return numberVint(o); |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | /* Get runtime value of string argument. */ | 76 | /* Get runtime value of string argument. */ |
| @@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd) | |||
| 586 | /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ | 586 | /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ |
| 587 | if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ | 587 | if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ |
| 588 | lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); | 588 | lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); |
| 589 | if (n == (lua_Number)lj_num2int(n)) | 589 | if (lj_num2int_ok(n)) |
| 590 | tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); | 590 | tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); |
| 591 | } | 591 | } |
| 592 | J->base[0] = tr; | 592 | J->base[0] = tr; |
diff --git a/src/lj_ir.c b/src/lj_ir.c index e7a5e8bc..e24fead4 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
| @@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64) | |||
| 248 | return lj_ir_k64(J, IR_KINT64, u64); | 248 | return lj_ir_k64(J, IR_KINT64, u64); |
| 249 | } | 249 | } |
| 250 | 250 | ||
| 251 | /* Check whether a number is int and return it. -0 is NOT considered an int. */ | ||
| 252 | static int numistrueint(lua_Number n, int32_t *kp) | ||
| 253 | { | ||
| 254 | int32_t k = lj_num2int(n); | ||
| 255 | if (n == (lua_Number)k) { | ||
| 256 | if (kp) *kp = k; | ||
| 257 | if (k == 0) { /* Special check for -0. */ | ||
| 258 | TValue tv; | ||
| 259 | setnumV(&tv, n); | ||
| 260 | if (tv.u32.hi != 0) | ||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | return 1; | ||
| 264 | } | ||
| 265 | return 0; | ||
| 266 | } | ||
| 267 | |||
| 268 | /* Intern number as int32_t constant if possible, otherwise as FP constant. */ | 251 | /* Intern number as int32_t constant if possible, otherwise as FP constant. */ |
| 269 | TRef lj_ir_knumint(jit_State *J, lua_Number n) | 252 | TRef lj_ir_knumint(jit_State *J, lua_Number n) |
| 270 | { | 253 | { |
| 254 | int64_t i64; | ||
| 271 | int32_t k; | 255 | int32_t k; |
| 272 | if (numistrueint(n, &k)) | 256 | TValue tv; |
| 257 | setnumV(&tv, n); | ||
| 258 | /* -0 is NOT considered an int. */ | ||
| 259 | if (lj_num2int_check(n, i64, k) && !tvismzero(&tv)) | ||
| 273 | return lj_ir_kint(J, k); | 260 | return lj_ir_kint(J, k); |
| 274 | else | 261 | else |
| 275 | return lj_ir_knum(J, n); | 262 | return lj_ir_knum(J, n); |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 5196144e..60b196c6 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
| @@ -233,20 +233,15 @@ typedef struct CCallInfo { | |||
| 233 | _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ | 233 | _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ |
| 234 | _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ | 234 | _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ |
| 235 | _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ | 235 | _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ |
| 236 | _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ | ||
| 237 | _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ | 236 | _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ |
| 238 | _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ | 237 | _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ |
| 239 | _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ | 238 | _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ |
| 240 | _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ | 239 | _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ |
| 241 | _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ | ||
| 242 | _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ | 240 | _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ |
| 243 | _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ | 241 | _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ |
| 244 | _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ | 242 | _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ |
| 245 | _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ | 243 | _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ |
| 246 | _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ | 244 | _(FP64_FFI, lj_vm_num2u64, 1, N, U64, XA_FP) \ |
| 247 | _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \ | ||
| 248 | _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ | ||
| 249 | _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ | ||
| 250 | _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ | 245 | _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
| 251 | _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ | 246 | _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ |
| 252 | _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ | 247 | _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ |
| @@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | |||
| 291 | #define softfp_d2i __aeabi_d2iz | 286 | #define softfp_d2i __aeabi_d2iz |
| 292 | #define softfp_ui2d __aeabi_ui2d | 287 | #define softfp_ui2d __aeabi_ui2d |
| 293 | #define softfp_f2d __aeabi_f2d | 288 | #define softfp_f2d __aeabi_f2d |
| 294 | #define softfp_d2ui __aeabi_d2uiz | ||
| 295 | #define softfp_d2f __aeabi_d2f | 289 | #define softfp_d2f __aeabi_d2f |
| 296 | #define softfp_i2f __aeabi_i2f | 290 | #define softfp_i2f __aeabi_i2f |
| 297 | #define softfp_ui2f __aeabi_ui2f | 291 | #define softfp_ui2f __aeabi_ui2f |
| 298 | #define softfp_f2i __aeabi_f2iz | 292 | #define softfp_f2i __aeabi_f2iz |
| 299 | #define softfp_f2ui __aeabi_f2uiz | ||
| 300 | #define fp64_l2d __aeabi_l2d | 293 | #define fp64_l2d __aeabi_l2d |
| 301 | #define fp64_ul2d __aeabi_ul2d | 294 | #define fp64_ul2d __aeabi_ul2d |
| 302 | #define fp64_l2f __aeabi_l2f | 295 | #define fp64_l2f __aeabi_l2f |
| 303 | #define fp64_ul2f __aeabi_ul2f | 296 | #define fp64_ul2f __aeabi_ul2f |
| 304 | #if LJ_TARGET_IOS | ||
| 305 | #define fp64_d2l __fixdfdi | ||
| 306 | #define fp64_d2ul __fixunsdfdi | ||
| 307 | #define fp64_f2l __fixsfdi | ||
| 308 | #define fp64_f2ul __fixunssfdi | ||
| 309 | #else | ||
| 310 | #define fp64_d2l __aeabi_d2lz | ||
| 311 | #define fp64_d2ul __aeabi_d2ulz | ||
| 312 | #define fp64_f2l __aeabi_f2lz | ||
| 313 | #define fp64_f2ul __aeabi_f2ulz | ||
| 314 | #endif | ||
| 315 | #elif LJ_TARGET_MIPS || LJ_TARGET_PPC | 297 | #elif LJ_TARGET_MIPS || LJ_TARGET_PPC |
| 316 | #define softfp_add __adddf3 | 298 | #define softfp_add __adddf3 |
| 317 | #define softfp_sub __subdf3 | 299 | #define softfp_sub __subdf3 |
| @@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | |||
| 322 | #define softfp_d2i __fixdfsi | 304 | #define softfp_d2i __fixdfsi |
| 323 | #define softfp_ui2d __floatunsidf | 305 | #define softfp_ui2d __floatunsidf |
| 324 | #define softfp_f2d __extendsfdf2 | 306 | #define softfp_f2d __extendsfdf2 |
| 325 | #define softfp_d2ui __fixunsdfsi | ||
| 326 | #define softfp_d2f __truncdfsf2 | 307 | #define softfp_d2f __truncdfsf2 |
| 327 | #define softfp_i2f __floatsisf | 308 | #define softfp_i2f __floatsisf |
| 328 | #define softfp_ui2f __floatunsisf | 309 | #define softfp_ui2f __floatunsisf |
| 329 | #define softfp_f2i __fixsfsi | 310 | #define softfp_f2i __fixsfsi |
| 330 | #define softfp_f2ui __fixunssfsi | ||
| 331 | #else | 311 | #else |
| 332 | #error "Missing soft-float definitions for target architecture" | 312 | #error "Missing soft-float definitions for target architecture" |
| 333 | #endif | 313 | #endif |
| @@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a); | |||
| 341 | #if LJ_HASFFI | 321 | #if LJ_HASFFI |
| 342 | extern double softfp_ui2d(uint32_t a); | 322 | extern double softfp_ui2d(uint32_t a); |
| 343 | extern double softfp_f2d(float a); | 323 | extern double softfp_f2d(float a); |
| 344 | extern uint32_t softfp_d2ui(double a); | ||
| 345 | extern float softfp_d2f(double a); | 324 | extern float softfp_d2f(double a); |
| 346 | extern float softfp_i2f(int32_t a); | 325 | extern float softfp_i2f(int32_t a); |
| 347 | extern float softfp_ui2f(uint32_t a); | 326 | extern float softfp_ui2f(uint32_t a); |
| 348 | extern int32_t softfp_f2i(float a); | 327 | extern int32_t softfp_f2i(float a); |
| 349 | extern uint32_t softfp_f2ui(float a); | ||
| 350 | #endif | 328 | #endif |
| 351 | #if LJ_TARGET_MIPS | 329 | #if LJ_TARGET_MIPS |
| 352 | extern double lj_vm_sfmin(double a, double b); | 330 | extern double lj_vm_sfmin(double a, double b); |
| @@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b); | |||
| 360 | #define fp64_ul2d __floatundidf | 338 | #define fp64_ul2d __floatundidf |
| 361 | #define fp64_l2f __floatdisf | 339 | #define fp64_l2f __floatdisf |
| 362 | #define fp64_ul2f __floatundisf | 340 | #define fp64_ul2f __floatundisf |
| 363 | #define fp64_d2l __fixdfdi | ||
| 364 | #define fp64_d2ul __fixunsdfdi | ||
| 365 | #define fp64_f2l __fixsfdi | ||
| 366 | #define fp64_f2ul __fixunssfdi | ||
| 367 | #else | 341 | #else |
| 368 | #error "Missing fp64 helper definitions for this compiler" | 342 | #error "Missing fp64 helper definitions for this compiler" |
| 369 | #endif | 343 | #endif |
| @@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a); | |||
| 374 | extern double fp64_ul2d(uint64_t a); | 348 | extern double fp64_ul2d(uint64_t a); |
| 375 | extern float fp64_l2f(int64_t a); | 349 | extern float fp64_l2f(int64_t a); |
| 376 | extern float fp64_ul2f(uint64_t a); | 350 | extern float fp64_ul2f(uint64_t a); |
| 377 | extern int64_t fp64_d2l(double a); | ||
| 378 | extern uint64_t fp64_d2ul(double a); | ||
| 379 | extern int64_t fp64_f2l(float a); | ||
| 380 | extern uint64_t fp64_f2ul(float a); | ||
| 381 | #endif | 351 | #endif |
| 382 | 352 | ||
| 383 | #endif | 353 | #endif |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 05a8e9bb..c0523457 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -350,22 +350,18 @@ enum { | |||
| 350 | }; | 350 | }; |
| 351 | 351 | ||
| 352 | enum { | 352 | enum { |
| 353 | #if LJ_TARGET_X64 || LJ_TARGET_MIPS64 | ||
| 354 | LJ_K64_M2P64, /* -2^64 */ | ||
| 355 | #endif | ||
| 353 | #if LJ_TARGET_X86ORX64 | 356 | #if LJ_TARGET_X86ORX64 |
| 354 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | 357 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ |
| 355 | LJ_K64_2P64, /* 2^64 */ | 358 | LJ_K64_2P64, /* 2^64 */ |
| 356 | LJ_K64_M2P64, /* -2^64 */ | ||
| 357 | #if LJ_32 | ||
| 358 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
| 359 | #else | ||
| 360 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
| 361 | #endif | 359 | #endif |
| 360 | #if LJ_TARGET_MIPS64 | ||
| 361 | LJ_K64_2P63, /* 2^63 */ | ||
| 362 | #endif | 362 | #endif |
| 363 | #if LJ_TARGET_MIPS | 363 | #if LJ_TARGET_MIPS |
| 364 | LJ_K64_2P31, /* 2^31 */ | 364 | LJ_K64_2P31, /* 2^31 */ |
| 365 | #if LJ_64 | ||
| 366 | LJ_K64_2P63, /* 2^63 */ | ||
| 367 | LJ_K64_M2P64, /* -2^64 */ | ||
| 368 | #endif | ||
| 369 | #endif | 365 | #endif |
| 370 | #if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 | 366 | #if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 |
| 371 | LJ_K64_VM_EXIT_HANDLER, | 367 | LJ_K64_VM_EXIT_HANDLER, |
| @@ -376,20 +372,19 @@ enum { | |||
| 376 | #define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) | 372 | #define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) |
| 377 | 373 | ||
| 378 | enum { | 374 | enum { |
| 379 | #if LJ_TARGET_X86ORX64 | 375 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 |
| 380 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | 376 | LJ_K32_M2P64, /* -2^64 */ |
| 377 | #endif | ||
| 378 | #if LJ_TARGET_MIPS64 | ||
| 379 | LJ_K32_2P63, /* 2^63 */ | ||
| 381 | #endif | 380 | #endif |
| 382 | #if LJ_TARGET_PPC | 381 | #if LJ_TARGET_PPC |
| 383 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | 382 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ |
| 384 | LJ_K32_2P52, /* 2^52 */ | 383 | LJ_K32_2P52, /* 2^52 */ |
| 385 | #endif | 384 | #endif |
| 386 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 385 | #if LJ_TARGET_PPC |
| 387 | LJ_K32_2P31, /* 2^31 */ | 386 | LJ_K32_2P31, /* 2^31 */ |
| 388 | #endif | 387 | #endif |
| 389 | #if LJ_TARGET_MIPS64 | ||
| 390 | LJ_K32_2P63, /* 2^63 */ | ||
| 391 | LJ_K32_M2P64, /* -2^64 */ | ||
| 392 | #endif | ||
| 393 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 | 388 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 |
| 394 | LJ_K32_VM_EXIT_HANDLER, | 389 | LJ_K32_VM_EXIT_HANDLER, |
| 395 | LJ_K32_VM_EXIT_INTERP, | 390 | LJ_K32_VM_EXIT_INTERP, |
diff --git a/src/lj_lib.c b/src/lj_lib.c index 88cb2bdd..d51351b8 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c | |||
| @@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) | |||
| 349 | ** integer overflow. Overflow detection still works, since all FPUs | 349 | ** integer overflow. Overflow detection still works, since all FPUs |
| 350 | ** return either MININT or MAXINT, which is then out of range. | 350 | ** return either MININT or MAXINT, which is then out of range. |
| 351 | */ | 351 | */ |
| 352 | int32_t i = (int32_t)numV(o); | 352 | int32_t i = lj_num2int(numV(o)); |
| 353 | if (i >= a && i <= b) return i; | 353 | if (i >= a && i <= b) return i; |
| 354 | #if LJ_HASFFI | 354 | #if LJ_HASFFI |
| 355 | } else if (tviscdata(o)) { | 355 | } else if (tviscdata(o)) { |
diff --git a/src/lj_meta.c b/src/lj_meta.c index c9307615..3f30fafb 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
| @@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) | |||
| 465 | if (tvisint(o+i)) { | 465 | if (tvisint(o+i)) { |
| 466 | k[i] = intV(o+i); nint++; | 466 | k[i] = intV(o+i); nint++; |
| 467 | } else { | 467 | } else { |
| 468 | k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); | 468 | int64_t i64; |
| 469 | if (lj_num2int_check(numV(o+i), i64, k[i])) nint++; | ||
| 469 | } | 470 | } |
| 470 | } | 471 | } |
| 471 | if (nint == 3) { /* Narrow to integers. */ | 472 | if (nint == 3) { /* Narrow to integers. */ |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 73b186e2..58e5049c 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
| @@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) | |||
| 981 | 981 | ||
| 982 | /* -- Number to integer conversion ---------------------------------------- */ | 982 | /* -- Number to integer conversion ---------------------------------------- */ |
| 983 | 983 | ||
| 984 | #if LJ_SOFTFP | 984 | /* |
| 985 | LJ_ASMF int32_t lj_vm_tobit(double x); | 985 | ** The C standard leaves many aspects of FP to integer conversions as |
| 986 | #if LJ_TARGET_MIPS64 | 986 | ** undefined behavior. Portability is a mess, hardware support varies, |
| 987 | LJ_ASMF int32_t lj_vm_tointg(double x); | 987 | ** and modern C compilers are like a box of chocolates -- you never know |
| 988 | #endif | 988 | ** what you're gonna get. |
| 989 | #endif | 989 | ** |
| 990 | ** However, we need 100% matching behavior between the interpreter (asm + C), | ||
| 991 | ** optimizations (C) and the code generated by the JIT compiler (asm). | ||
| 992 | ** Mixing Lua numbers with FFI numbers creates some extra requirements. | ||
| 993 | ** | ||
| 994 | ** These conversions have been moved to assembler code, even if they seem | ||
| 995 | ** trivial, to foil unanticipated C compiler 'optimizations' with the | ||
| 996 | ** surrounding code. Only the unchecked double to int32_t conversion | ||
| 997 | ** is still in C, because it ought to be pretty safe -- we'll see. | ||
| 998 | ** | ||
| 999 | ** These macros also serve to document all places where FP to integer | ||
| 1000 | ** conversions happen. | ||
| 1001 | */ | ||
| 990 | 1002 | ||
| 991 | static LJ_AINLINE int32_t lj_num2bit(lua_Number n) | 1003 | /* Unchecked double to int32_t conversion. */ |
| 992 | { | 1004 | #define lj_num2int(n) ((int32_t)(n)) |
| 993 | #if LJ_SOFTFP | ||
| 994 | return lj_vm_tobit(n); | ||
| 995 | #else | ||
| 996 | TValue o; | ||
| 997 | o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ | ||
| 998 | return (int32_t)o.u32.lo; | ||
| 999 | #endif | ||
| 1000 | } | ||
| 1001 | 1005 | ||
| 1002 | #define lj_num2int(n) ((int32_t)(n)) | 1006 | /* Unchecked double to arch/os-dependent signed integer type conversion. |
| 1007 | ** This assumes the 32/64-bit signed conversions are NOT range-extended. | ||
| 1008 | */ | ||
| 1009 | #define lj_num2int_type(n, tp) ((tp)(n)) | ||
| 1003 | 1010 | ||
| 1004 | /* | 1011 | /* Convert a double to int32_t and check for exact conversion. |
| 1005 | ** This must match the JIT backend behavior. In particular for archs | 1012 | ** Returns the zero-extended int32_t on success. -0 is OK, too. |
| 1006 | ** that don't have a common hardware instruction for this conversion. | 1013 | ** Returns 0x8000000080000000LL on failure (simplifies range checks). |
| 1007 | ** Note that signed FP to unsigned int conversions have an undefined | ||
| 1008 | ** result and should never be relied upon in portable FFI code. | ||
| 1009 | ** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). | ||
| 1010 | */ | 1014 | */ |
| 1011 | static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) | 1015 | LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); |
| 1012 | { | 1016 | |
| 1013 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS | 1017 | /* Check for exact conversion only, without storing the result. */ |
| 1014 | int64_t i = (int64_t)n; | 1018 | #define lj_num2int_ok(x) (lj_vm_num2int_check((x)) >= 0) |
| 1015 | if (i < 0) i = (int64_t)(n - 18446744073709551616.0); | 1019 | |
| 1016 | return (uint64_t)i; | 1020 | /* Check for exact conversion and conditionally store result. |
| 1017 | #else | 1021 | ** Note: conditions that fail for 0x80000000 may check only the lower |
| 1018 | return (uint64_t)n; | 1022 | ** 32 bits. This generates good code for both 32 and 64 bit archs. |
| 1019 | #endif | 1023 | */ |
| 1020 | } | 1024 | #define lj_num2int_cond(x, i64, i, cond) \ |
| 1025 | (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0) | ||
| 1026 | |||
| 1027 | /* This is the generic check for a full-range int32_t result. */ | ||
| 1028 | #define lj_num2int_check(x, i64, i) \ | ||
| 1029 | lj_num2int_cond((x), i64, i, i64 >= 0) | ||
| 1030 | |||
| 1031 | /* Predictable conversion from double to int64_t or uint64_t. | ||
| 1032 | ** Truncates towards zero. Out-of-range values, NaN and +-Inf return | ||
| 1033 | ** an arch-dependent result, but do not cause C undefined behavior. | ||
| 1034 | ** The uint64_t conversion accepts the union of the unsigned + signed range. | ||
| 1035 | */ | ||
| 1036 | LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x); | ||
| 1037 | LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x); | ||
| 1038 | |||
| 1039 | #define lj_num2i64(x) (lj_vm_num2i64((x))) | ||
| 1040 | #define lj_num2u64(x) (lj_vm_num2u64((x))) | ||
| 1041 | |||
| 1042 | /* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */ | ||
| 1043 | LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x); | ||
| 1044 | |||
| 1045 | #define lj_num2bit(x) lj_vm_tobit((x)) | ||
| 1021 | 1046 | ||
| 1022 | static LJ_AINLINE int32_t numberVint(cTValue *o) | 1047 | static LJ_AINLINE int32_t numberVint(cTValue *o) |
| 1023 | { | 1048 | { |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 6fdf4566..456c04b2 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith) | |||
| 303 | return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); | 303 | return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); |
| 304 | } | 304 | } |
| 305 | 305 | ||
| 306 | /* Forward declaration. */ | ||
| 307 | static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, | ||
| 308 | IROp op); | ||
| 309 | |||
| 306 | LJFOLD(ADDOV KINT KINT) | 310 | LJFOLD(ADDOV KINT KINT) |
| 307 | LJFOLD(SUBOV KINT KINT) | 311 | LJFOLD(SUBOV KINT KINT) |
| 308 | LJFOLD(MULOV KINT KINT) | 312 | LJFOLD(MULOV KINT KINT) |
| 309 | LJFOLDF(kfold_intovarith) | 313 | LJFOLDF(kfold_intovarith) |
| 310 | { | 314 | { |
| 311 | lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i, | 315 | int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i, |
| 312 | fins->o - IR_ADDOV); | 316 | (IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD)); |
| 313 | int32_t k = lj_num2int(n); | 317 | return checki32(k) ? INTFOLD(k) : FAILFOLD; |
| 314 | if (n != (lua_Number)k) | ||
| 315 | return FAILFOLD; | ||
| 316 | return INTFOLD(k); | ||
| 317 | } | 318 | } |
| 318 | 319 | ||
| 319 | LJFOLD(BNOT KINT) | 320 | LJFOLD(BNOT KINT) |
| @@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, | |||
| 368 | IROp op) | 369 | IROp op) |
| 369 | { | 370 | { |
| 370 | UNUSED(J); | 371 | UNUSED(J); |
| 371 | #if LJ_HASFFI | ||
| 372 | switch (op) { | 372 | switch (op) { |
| 373 | case IR_ADD: k1 += k2; break; | 373 | case IR_ADD: k1 += k2; break; |
| 374 | case IR_SUB: k1 -= k2; break; | 374 | case IR_SUB: k1 -= k2; break; |
| 375 | case IR_MUL: k1 *= k2; break; | 375 | case IR_MUL: k1 *= k2; break; |
| 376 | #if LJ_HASFFI | ||
| 376 | case IR_BAND: k1 &= k2; break; | 377 | case IR_BAND: k1 &= k2; break; |
| 377 | case IR_BOR: k1 |= k2; break; | 378 | case IR_BOR: k1 |= k2; break; |
| 378 | case IR_BXOR: k1 ^= k2; break; | 379 | case IR_BXOR: k1 ^= k2; break; |
| @@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, | |||
| 382 | case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; | 383 | case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; |
| 383 | case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; | 384 | case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; |
| 384 | default: lj_assertJ(0, "bad IR op %d", op); break; | 385 | default: lj_assertJ(0, "bad IR op %d", op); break; |
| 385 | } | ||
| 386 | #else | ||
| 387 | UNUSED(k2); UNUSED(op); | ||
| 388 | lj_assertJ(0, "FFI IR op without FFI"); | ||
| 389 | #endif | 386 | #endif |
| 387 | } | ||
| 390 | return k1; | 388 | return k1; |
| 391 | } | 389 | } |
| 392 | 390 | ||
| @@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) | |||
| 883 | LJFOLDF(kfold_conv_knum_int_num) | 881 | LJFOLDF(kfold_conv_knum_int_num) |
| 884 | { | 882 | { |
| 885 | lua_Number n = knumleft; | 883 | lua_Number n = knumleft; |
| 886 | int32_t k = lj_num2int(n); | 884 | if (irt_isguard(fins->t)) { |
| 887 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | 885 | int64_t i64; |
| 886 | int32_t k; | ||
| 887 | if (lj_num2int_check(n, i64, k)) | ||
| 888 | return INTFOLD(k); | ||
| 888 | /* We're about to create a guard which always fails, like CONV +1.5. | 889 | /* We're about to create a guard which always fails, like CONV +1.5. |
| 889 | ** Some pathological loops cause this during LICM, e.g.: | 890 | ** Some pathological loops cause this during LICM, e.g.: |
| 890 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | 891 | ** local x,k,t = 0,1.5,{1,[1.5]=2} |
| @@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num) | |||
| 892 | ** assert(x == 300) | 893 | ** assert(x == 300) |
| 893 | */ | 894 | */ |
| 894 | return FAILFOLD; | 895 | return FAILFOLD; |
| 896 | } else { | ||
| 897 | return INTFOLD(lj_num2int(n)); | ||
| 895 | } | 898 | } |
| 896 | return INTFOLD(k); | ||
| 897 | } | ||
| 898 | |||
| 899 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | ||
| 900 | LJFOLDF(kfold_conv_knum_u32_num) | ||
| 901 | { | ||
| 902 | #ifdef _MSC_VER | ||
| 903 | { /* Workaround for MSVC bug. */ | ||
| 904 | volatile uint32_t u = (uint32_t)knumleft; | ||
| 905 | return INTFOLD((int32_t)u); | ||
| 906 | } | ||
| 907 | #else | ||
| 908 | return INTFOLD((int32_t)(uint32_t)knumleft); | ||
| 909 | #endif | ||
| 910 | } | 899 | } |
| 911 | 900 | ||
| 912 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 901 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
| 913 | LJFOLDF(kfold_conv_knum_i64_num) | 902 | LJFOLDF(kfold_conv_knum_i64_num) |
| 914 | { | 903 | { |
| 915 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 904 | return INT64FOLD((uint64_t)lj_num2i64(knumleft)); |
| 916 | } | 905 | } |
| 917 | 906 | ||
| 918 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | 907 | LJFOLD(CONV KNUM IRCONV_U64_NUM) |
| @@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int) | |||
| 1135 | } | 1124 | } |
| 1136 | 1125 | ||
| 1137 | LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ | 1126 | LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ |
| 1138 | LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */ | ||
| 1139 | LJFOLDF(simplify_conv_int_num) | 1127 | LJFOLDF(simplify_conv_int_num) |
| 1140 | { | 1128 | { |
| 1141 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | 1129 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ |
| @@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert) | |||
| 1334 | return lj_opt_narrow_convert(J); | 1322 | return lj_opt_narrow_convert(J); |
| 1335 | } | 1323 | } |
| 1336 | 1324 | ||
| 1325 | LJFOLD(XSTORE any CONV) | ||
| 1326 | LJFOLDF(xstore_conv) | ||
| 1327 | { | ||
| 1328 | #if LJ_64 | ||
| 1329 | PHIBARRIER(fright); | ||
| 1330 | if (!irt_is64(fins->t) && | ||
| 1331 | irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) && | ||
| 1332 | ((fright->op2&IRCONV_SRCMASK) == IRT_I64 || | ||
| 1333 | (fright->op2&IRCONV_SRCMASK) == IRT_U64)) { | ||
| 1334 | fins->op2 = fright->op1; | ||
| 1335 | return RETRYFOLD; | ||
| 1336 | } | ||
| 1337 | #else | ||
| 1338 | UNUSED(J); | ||
| 1339 | #endif | ||
| 1340 | return NEXTFOLD; | ||
| 1341 | } | ||
| 1342 | |||
| 1337 | /* -- Integer algebraic simplifications ----------------------------------- */ | 1343 | /* -- Integer algebraic simplifications ----------------------------------- */ |
| 1338 | 1344 | ||
| 1339 | LJFOLD(ADD any KINT) | 1345 | LJFOLD(ADD any KINT) |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 01b5833d..3085c837 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
| @@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) | |||
| 281 | return 0; | 281 | return 0; |
| 282 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ | 282 | } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ |
| 283 | lua_Number n = ir_knum(ir)->n; | 283 | lua_Number n = ir_knum(ir)->n; |
| 284 | int64_t i64; | ||
| 285 | int32_t k; | ||
| 284 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { | 286 | if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { |
| 285 | /* Allows a wider range of constants. */ | 287 | /* Allows a wider range of constants, if const doesn't lose precision. */ |
| 286 | int64_t k64 = (int64_t)n; | 288 | if (lj_num2int_check(n, i64, k)) { |
| 287 | if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ | ||
| 288 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | ||
| 289 | *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ | ||
| 290 | return 0; | ||
| 291 | } | ||
| 292 | } else { | ||
| 293 | int32_t k = lj_num2int(n); | ||
| 294 | /* Only if constant is a small integer. */ | ||
| 295 | if (checki16(k) && n == (lua_Number)k) { | ||
| 296 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | 289 | *nc->sp++ = NARROWINS(NARROW_INT, 0); |
| 297 | *nc->sp++ = (NarrowIns)k; | 290 | *nc->sp++ = (NarrowIns)k; |
| 298 | return 0; | 291 | return 0; |
| 299 | } | 292 | } |
| 293 | } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) { | ||
| 294 | /* Only if constant is a small integer. */ | ||
| 295 | *nc->sp++ = NARROWINS(NARROW_INT, 0); | ||
| 296 | *nc->sp++ = (NarrowIns)k; | ||
| 297 | return 0; | ||
| 300 | } | 298 | } |
| 301 | return 10; /* Never narrow other FP constants (this is rare). */ | 299 | return 10; /* Never narrow other FP constants (this is rare). */ |
| 302 | } | 300 | } |
| @@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) | |||
| 512 | 510 | ||
| 513 | /* -- Narrowing of arithmetic operators ----------------------------------- */ | 511 | /* -- Narrowing of arithmetic operators ----------------------------------- */ |
| 514 | 512 | ||
| 515 | /* Check whether a number fits into an int32_t (-0 is ok, too). */ | ||
| 516 | static int numisint(lua_Number n) | ||
| 517 | { | ||
| 518 | return (n == (lua_Number)lj_num2int(n)); | ||
| 519 | } | ||
| 520 | |||
| 521 | /* Convert string to number. Error out for non-numeric string values. */ | 513 | /* Convert string to number. Error out for non-numeric string values. */ |
| 522 | static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) | 514 | static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) |
| 523 | { | 515 | { |
| @@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, | |||
| 539 | /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ | 531 | /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ |
| 540 | if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && | 532 | if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && |
| 541 | tref_isinteger(rb) && tref_isinteger(rc) && | 533 | tref_isinteger(rb) && tref_isinteger(rc) && |
| 542 | numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), | 534 | lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), |
| 543 | (int)op - (int)IR_ADD))) | 535 | (int)op - (int)IR_ADD))) |
| 544 | return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); | 536 | return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); |
| 545 | if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); | 537 | if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); |
| 546 | if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | 538 | if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); |
| @@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) | |||
| 591 | static int narrow_forl(jit_State *J, cTValue *o) | 583 | static int narrow_forl(jit_State *J, cTValue *o) |
| 592 | { | 584 | { |
| 593 | if (tvisint(o)) return 1; | 585 | if (tvisint(o)) return 1; |
| 594 | if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); | 586 | if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o)); |
| 595 | return 0; | 587 | return 0; |
| 596 | } | 588 | } |
| 597 | 589 | ||
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 8d025911..d29d1eab 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c | |||
| @@ -573,13 +573,9 @@ static void split_ir(jit_State *J) | |||
| 573 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ | 573 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ |
| 574 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 574 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
| 575 | #if LJ_SOFTFP | 575 | #if LJ_SOFTFP |
| 576 | lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted"); | ||
| 576 | if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ | 577 | if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ |
| 577 | hi = split_call_l(J, hisubst, oir, ir, | 578 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64); |
| 578 | irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); | ||
| 579 | } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ | ||
| 580 | nir->o = IR_CALLN; | ||
| 581 | nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; | ||
| 582 | hi = split_emit(J, IRTI(IR_HIOP), nref, nref); | ||
| 583 | } | 579 | } |
| 584 | #else | 580 | #else |
| 585 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ | 581 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ |
| @@ -692,8 +688,9 @@ static void split_ir(jit_State *J) | |||
| 692 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; | 688 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; |
| 693 | } | 689 | } |
| 694 | } else if (st == IRT_FLOAT) { | 690 | } else if (st == IRT_FLOAT) { |
| 691 | lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); | ||
| 695 | nir->o = IR_CALLN; | 692 | nir->o = IR_CALLN; |
| 696 | nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; | 693 | nir->op2 = IRCALL_softfp_f2i; |
| 697 | } else | 694 | } else |
| 698 | #endif | 695 | #endif |
| 699 | #if LJ_SOFTFP | 696 | #if LJ_SOFTFP |
| @@ -705,9 +702,7 @@ static void split_ir(jit_State *J) | |||
| 705 | } else { | 702 | } else { |
| 706 | split_call_l(J, hisubst, oir, ir, | 703 | split_call_l(J, hisubst, oir, ir, |
| 707 | #if LJ_32 && LJ_HASFFI | 704 | #if LJ_32 && LJ_HASFFI |
| 708 | st == IRT_NUM ? | 705 | st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i |
| 709 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : | ||
| 710 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) | ||
| 711 | #else | 706 | #else |
| 712 | IRCALL_softfp_d2i | 707 | IRCALL_softfp_d2i |
| 713 | #endif | 708 | #endif |
diff --git a/src/lj_parse.c b/src/lj_parse.c index 181ce4d7..832f6bf4 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c | |||
| @@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) | |||
| 522 | ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); | 522 | ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); |
| 523 | else | 523 | else |
| 524 | #else | 524 | #else |
| 525 | lua_Number n = expr_numberV(e); | 525 | int64_t i64; |
| 526 | int32_t k = lj_num2int(n); | 526 | int32_t k; |
| 527 | if (checki16(k) && n == (lua_Number)k) | 527 | if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64))) |
| 528 | ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); | 528 | ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); |
| 529 | else | 529 | else |
| 530 | #endif | 530 | #endif |
| @@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2) | |||
| 782 | setnumV(&o, n); | 782 | setnumV(&o, n); |
| 783 | if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ | 783 | if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ |
| 784 | if (LJ_DUALNUM) { | 784 | if (LJ_DUALNUM) { |
| 785 | int32_t k = lj_num2int(n); | 785 | int64_t i64; |
| 786 | if ((lua_Number)k == n) { | 786 | int32_t k; |
| 787 | if (lj_num2int_check(n, i64, k)) { | ||
| 787 | setintV(&e1->u.nval, k); | 788 | setintV(&e1->u.nval, k); |
| 788 | return 1; | 789 | return 1; |
| 789 | } | 790 | } |
| @@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) | |||
| 1386 | if (tvisnum(&n->key)) { | 1387 | if (tvisnum(&n->key)) { |
| 1387 | TValue *tv = &((TValue *)kptr)[kidx]; | 1388 | TValue *tv = &((TValue *)kptr)[kidx]; |
| 1388 | if (LJ_DUALNUM) { | 1389 | if (LJ_DUALNUM) { |
| 1389 | lua_Number nn = numV(&n->key); | 1390 | int64_t i64; |
| 1390 | int32_t k = lj_num2int(nn); | 1391 | int32_t k; |
| 1391 | lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); | 1392 | lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); |
| 1392 | if ((lua_Number)k == nn) | 1393 | if (lj_num2int_check(numV(&n->key), i64, k)) |
| 1393 | setintV(tv, k); | 1394 | setintV(tv, k); |
| 1394 | else | 1395 | else |
| 1395 | *tv = n->key; | 1396 | *tv = n->key; |
| @@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e) | |||
| 1656 | } | 1657 | } |
| 1657 | } | 1658 | } |
| 1658 | #else | 1659 | #else |
| 1659 | lua_Number n = expr_numberV(e); | 1660 | int64_t i64; |
| 1660 | int32_t k = lj_num2int(n); | 1661 | int32_t k; |
| 1661 | if (checku8(k) && n == (lua_Number)k) { | 1662 | if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) { |
| 1662 | t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ | 1663 | t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ |
| 1663 | return; | 1664 | return; |
| 1664 | } | 1665 | } |
diff --git a/src/lj_record.c b/src/lj_record.c index 6543f274..536d7171 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) | |||
| 351 | } else { | 351 | } else { |
| 352 | cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); | 352 | cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); |
| 353 | if (t == IRT_INT) { | 353 | if (t == IRT_INT) { |
| 354 | int32_t k = numberVint(tv); | 354 | if (tvisint(tv)) { |
| 355 | if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ | 355 | return lj_ir_kint(J, intV(tv)); |
| 356 | return lj_ir_kint(J, k); | 356 | } else { |
| 357 | int64_t i64; | ||
| 358 | int32_t k; | ||
| 359 | if (lj_num2int_check(numV(tv), i64, k)) /* -0 is ok here. */ | ||
| 360 | return lj_ir_kint(J, k); | ||
| 361 | } | ||
| 357 | return 0; /* Type mismatch. */ | 362 | return 0; /* Type mismatch. */ |
| 358 | } else { | 363 | } else { |
| 359 | return lj_ir_knum(J, numberVnum(tv)); | 364 | return lj_ir_knum(J, numberVnum(tv)); |
| @@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, | |||
| 1426 | /* Integer keys are looked up in the array part first. */ | 1431 | /* Integer keys are looked up in the array part first. */ |
| 1427 | key = ix->key; | 1432 | key = ix->key; |
| 1428 | if (tref_isnumber(key)) { | 1433 | if (tref_isnumber(key)) { |
| 1429 | int32_t k = numberVint(&ix->keyv); | 1434 | int32_t k; |
| 1430 | if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) | 1435 | if (tvisint(&ix->keyv)) { |
| 1431 | k = LJ_MAX_ASIZE; | 1436 | k = intV(&ix->keyv); |
| 1437 | } else { | ||
| 1438 | int64_t i64; | ||
| 1439 | if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE; | ||
| 1440 | } | ||
| 1432 | if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ | 1441 | if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ |
| 1433 | TRef ikey = lj_opt_narrow_index(J, key); | 1442 | TRef ikey = lj_opt_narrow_index(J, key); |
| 1434 | TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | 1443 | TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index bb649fc8..0936298d 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c | |||
| @@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) | |||
| 351 | /* Add number formatted as signed integer to buffer. */ | 351 | /* Add number formatted as signed integer to buffer. */ |
| 352 | SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) | 352 | SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) |
| 353 | { | 353 | { |
| 354 | int64_t k = (int64_t)n; | 354 | int64_t k = lj_num2i64(n); |
| 355 | if (checki32(k) && sf == STRFMT_INT) | 355 | if (checki32(k) && sf == STRFMT_INT) |
| 356 | return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ | 356 | return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ |
| 357 | else | 357 | else |
| @@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) | |||
| 361 | /* Add number formatted as unsigned integer to buffer. */ | 361 | /* Add number formatted as unsigned integer to buffer. */ |
| 362 | SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) | 362 | SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) |
| 363 | { | 363 | { |
| 364 | int64_t k; | 364 | return lj_strfmt_putfxint(sb, sf, lj_num2u64(n)); |
| 365 | if (n >= 9223372036854775808.0) | ||
| 366 | k = (int64_t)(n - 18446744073709551616.0); | ||
| 367 | else | ||
| 368 | k = (int64_t)n; | ||
| 369 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); | ||
| 370 | } | 365 | } |
| 371 | 366 | ||
| 372 | /* Format stack arguments to buffer. */ | 367 | /* Format stack arguments to buffer. */ |
diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 502c78e9..fbb959c5 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c | |||
| @@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, | |||
| 523 | fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); | 523 | fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); |
| 524 | 524 | ||
| 525 | /* Try to convert number to integer, if requested. */ | 525 | /* Try to convert number to integer, if requested. */ |
| 526 | if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) { | 526 | if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) { |
| 527 | double n = o->n; | 527 | int64_t tmp; |
| 528 | int32_t i = lj_num2int(n); | 528 | if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o)) |
| 529 | if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } | 529 | return STRSCAN_INT; |
| 530 | } | 530 | } |
| 531 | return fmt; | 531 | return fmt; |
| 532 | } | 532 | } |
diff --git a/src/lj_tab.c b/src/lj_tab.c index 62e33611..2959fadb 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
| @@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins) | |||
| 295 | { | 295 | { |
| 296 | lj_assertX(!tvisint(key), "bad integer key"); | 296 | lj_assertX(!tvisint(key), "bad integer key"); |
| 297 | if (tvisnum(key)) { | 297 | if (tvisnum(key)) { |
| 298 | lua_Number nk = numV(key); | 298 | int64_t i64; |
| 299 | int32_t k = lj_num2int(nk); | 299 | int32_t k; |
| 300 | if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) { | 300 | if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) { |
| 301 | bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; | 301 | bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; |
| 302 | return 1; | 302 | return 1; |
| 303 | } | 303 | } |
| @@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key) | |||
| 409 | if (tv) | 409 | if (tv) |
| 410 | return tv; | 410 | return tv; |
| 411 | } else if (tvisnum(key)) { | 411 | } else if (tvisnum(key)) { |
| 412 | lua_Number nk = numV(key); | 412 | int64_t i64; |
| 413 | int32_t k = lj_num2int(nk); | 413 | int32_t k; |
| 414 | if (nk == (lua_Number)k) { | 414 | if (lj_num2int_check(numV(key), i64, k)) { |
| 415 | cTValue *tv = lj_tab_getint(t, k); | 415 | cTValue *tv = lj_tab_getint(t, k); |
| 416 | if (tv) | 416 | if (tv) |
| 417 | return tv; | 417 | return tv; |
| @@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) | |||
| 542 | } else if (tvisint(key)) { | 542 | } else if (tvisint(key)) { |
| 543 | return lj_tab_setint(L, t, intV(key)); | 543 | return lj_tab_setint(L, t, intV(key)); |
| 544 | } else if (tvisnum(key)) { | 544 | } else if (tvisnum(key)) { |
| 545 | lua_Number nk = numV(key); | 545 | int64_t i64; |
| 546 | int32_t k = lj_num2int(nk); | 546 | int32_t k; |
| 547 | if (nk == (lua_Number)k) | 547 | if (lj_num2int_check(numV(key), i64, k)) |
| 548 | return lj_tab_setint(L, t, k); | 548 | return lj_tab_setint(L, t, k); |
| 549 | if (tvisnan(key)) | 549 | if (tvisnan(key)) |
| 550 | lj_err_msg(L, LJ_ERR_NANIDX); | 550 | lj_err_msg(L, LJ_ERR_NANIDX); |
| @@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) | |||
| 580 | setnumV(&tmp, (lua_Number)k); | 580 | setnumV(&tmp, (lua_Number)k); |
| 581 | key = &tmp; | 581 | key = &tmp; |
| 582 | } else if (tvisnum(key)) { | 582 | } else if (tvisnum(key)) { |
| 583 | lua_Number nk = numV(key); | 583 | int64_t i64; |
| 584 | int32_t k = lj_num2int(nk); | 584 | int32_t k; |
| 585 | if ((uint32_t)k < t->asize && nk == (lua_Number)k) | 585 | if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize)) |
| 586 | return (uint32_t)k + 1; | 586 | return (uint32_t)k + 1; |
| 587 | } | 587 | } |
| 588 | if (!tvisnil(key)) { | 588 | if (!tvisnil(key)) { |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index fa32a5d4..193102ee 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -314,6 +314,7 @@ typedef enum { | |||
| 314 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, | 314 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, |
| 315 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, | 315 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, |
| 316 | XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, | 316 | XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, |
| 317 | XO_FADDd = XO_(d8), XOg_FADDd = 0, | ||
| 317 | XO_FADDq = XO_(dc), XOg_FADDq = 0, | 318 | XO_FADDq = XO_(dc), XOg_FADDq = 0, |
| 318 | XO_FLDCW = XO_(d9), XOg_FLDCW = 5, | 319 | XO_FLDCW = XO_(d9), XOg_FLDCW = 5, |
| 319 | XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 | 320 | XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 |
diff --git a/src/lj_trace.c b/src/lj_trace.c index 47d7faa5..ad329540 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
| @@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g) | |||
| 317 | tv[1].u64 = U64x(80000000,00000000); | 317 | tv[1].u64 = U64x(80000000,00000000); |
| 318 | 318 | ||
| 319 | /* Initialize 32/64 bit constants. */ | 319 | /* Initialize 32/64 bit constants. */ |
| 320 | #if LJ_TARGET_X64 || LJ_TARGET_MIPS64 | ||
| 321 | J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); | ||
| 322 | #endif | ||
| 320 | #if LJ_TARGET_X86ORX64 | 323 | #if LJ_TARGET_X86ORX64 |
| 321 | J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); | 324 | J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); |
| 322 | #if LJ_32 | ||
| 323 | J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); | ||
| 324 | #endif | ||
| 325 | J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); | 325 | J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); |
| 326 | J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; | ||
| 327 | #endif | 326 | #endif |
| 327 | #if LJ_TARGET_MIPS64 | ||
| 328 | J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); | ||
| 329 | #endif | ||
| 330 | #if LJ_TARGET_MIPS | ||
| 331 | J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); | ||
| 332 | #endif | ||
| 333 | |||
| 328 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 | 334 | #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 |
| 329 | J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); | 335 | J->k32[LJ_K32_M2P64] = 0xdf800000; |
| 336 | #endif | ||
| 337 | #if LJ_TARGET_MIPS64 | ||
| 338 | J->k32[LJ_K32_2P63] = 0x5f000000; | ||
| 330 | #endif | 339 | #endif |
| 331 | #if LJ_TARGET_PPC | 340 | #if LJ_TARGET_PPC |
| 332 | J->k32[LJ_K32_2P52_2P31] = 0x59800004; | 341 | J->k32[LJ_K32_2P52_2P31] = 0x59800004; |
| 333 | J->k32[LJ_K32_2P52] = 0x59800000; | 342 | J->k32[LJ_K32_2P52] = 0x59800000; |
| 334 | #endif | 343 | #endif |
| 335 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | 344 | #if LJ_TARGET_PPC |
| 336 | J->k32[LJ_K32_2P31] = 0x4f000000; | 345 | J->k32[LJ_K32_2P31] = 0x4f000000; |
| 337 | #endif | 346 | #endif |
| 338 | #if LJ_TARGET_MIPS | 347 | |
| 339 | J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); | ||
| 340 | #if LJ_64 | ||
| 341 | J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); | ||
| 342 | J->k32[LJ_K32_2P63] = 0x5f000000; | ||
| 343 | J->k32[LJ_K32_M2P64] = 0xdf800000; | ||
| 344 | #endif | ||
| 345 | #endif | ||
| 346 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 | 348 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 |
| 347 | J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; | 349 | J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; |
| 348 | J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; | 350 | J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; |
diff --git a/src/lj_vm.h b/src/lj_vm.h index 9cc42613..96ad2d07 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
| @@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); | |||
| 37 | #if LJ_TARGET_PPC | 37 | #if LJ_TARGET_PPC |
| 38 | void lj_vm_cachesync(void *start, void *end); | 38 | void lj_vm_cachesync(void *start, void *end); |
| 39 | #endif | 39 | #endif |
| 40 | LJ_ASMF double lj_vm_foldarith(double x, double y, int op); | 40 | LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op); |
| 41 | #if LJ_HASJIT | 41 | #if LJ_HASJIT |
| 42 | LJ_ASMF double lj_vm_foldfpm(double x, int op); | 42 | LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op); |
| 43 | #endif | 43 | #endif |
| 44 | #if !LJ_ARCH_HASFPU | 44 | #if LJ_SOFTFP && LJ_TARGET_MIPS64 |
| 45 | /* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ | 45 | LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x); |
| 46 | #endif | 46 | #endif |
| 47 | /* Declared in lj_obj.h: | ||
| 48 | ** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); | ||
| 49 | ** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x); | ||
| 50 | ** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x); | ||
| 51 | ** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x); | ||
| 52 | */ | ||
| 47 | 53 | ||
| 48 | /* Dispatch targets for recording and hooks. */ | 54 | /* Dispatch targets for recording and hooks. */ |
| 49 | LJ_ASMF void lj_vm_record(void); | 55 | LJ_ASMF void lj_vm_record(void); |
| @@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[]; | |||
| 62 | #define lj_vm_floor floor | 68 | #define lj_vm_floor floor |
| 63 | #define lj_vm_ceil ceil | 69 | #define lj_vm_ceil ceil |
| 64 | #else | 70 | #else |
| 65 | LJ_ASMF double lj_vm_floor(double); | 71 | LJ_ASMF LJ_CONSTF double lj_vm_floor(double); |
| 66 | LJ_ASMF double lj_vm_ceil(double); | 72 | LJ_ASMF LJ_CONSTF double lj_vm_ceil(double); |
| 67 | #if LJ_TARGET_ARM | 73 | #if LJ_TARGET_ARM |
| 68 | LJ_ASMF double lj_vm_floor_sf(double); | 74 | LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double); |
| 69 | LJ_ASMF double lj_vm_ceil_sf(double); | 75 | LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double); |
| 70 | #endif | 76 | #endif |
| 71 | #endif | 77 | #endif |
| 72 | #ifdef LUAJIT_NO_LOG2 | 78 | #ifdef LUAJIT_NO_LOG2 |
| 73 | LJ_ASMF double lj_vm_log2(double); | 79 | LJ_ASMF LJ_CONSTF double lj_vm_log2(double); |
| 74 | #else | 80 | #else |
| 75 | #define lj_vm_log2 log2 | 81 | #define lj_vm_log2 log2 |
| 76 | #endif | 82 | #endif |
| @@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); | |||
| 80 | 86 | ||
| 81 | #if LJ_HASJIT | 87 | #if LJ_HASJIT |
| 82 | #if LJ_TARGET_X86ORX64 | 88 | #if LJ_TARGET_X86ORX64 |
| 83 | LJ_ASMF void lj_vm_floor_sse(void); | 89 | LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void); |
| 84 | LJ_ASMF void lj_vm_ceil_sse(void); | 90 | LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void); |
| 85 | LJ_ASMF void lj_vm_trunc_sse(void); | 91 | LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void); |
| 86 | #endif | 92 | #endif |
| 87 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 | 93 | #if LJ_TARGET_PPC || LJ_TARGET_ARM64 |
| 88 | #define lj_vm_trunc trunc | 94 | #define lj_vm_trunc trunc |
| 89 | #else | 95 | #else |
| 90 | LJ_ASMF double lj_vm_trunc(double); | 96 | LJ_ASMF LJ_CONSTF double lj_vm_trunc(double); |
| 91 | #if LJ_TARGET_ARM | 97 | #if LJ_TARGET_ARM |
| 92 | LJ_ASMF double lj_vm_trunc_sf(double); | 98 | LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double); |
| 93 | #endif | 99 | #endif |
| 94 | #endif | 100 | #endif |
| 95 | #if LJ_HASFFI | 101 | #if LJ_HASFFI |
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2c9b96cc..1495102f 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c | |||
| @@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op) | |||
| 59 | case IR_NEG - IR_ADD: return -x; break; | 59 | case IR_NEG - IR_ADD: return -x; break; |
| 60 | case IR_ABS - IR_ADD: return fabs(x); break; | 60 | case IR_ABS - IR_ADD: return fabs(x); break; |
| 61 | #if LJ_HASJIT | 61 | #if LJ_HASJIT |
| 62 | case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; | 62 | case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break; |
| 63 | case IR_MIN - IR_ADD: return x < y ? x : y; break; | 63 | case IR_MIN - IR_ADD: return x < y ? x : y; break; |
| 64 | case IR_MAX - IR_ADD: return x > y ? x : y; break; | 64 | case IR_MAX - IR_ADD: return x > y ? x : y; break; |
| 65 | #endif | 65 | #endif |
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 86bef0cf..2cd7eedb 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc | |||
| @@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2452 | | bx lr | 2452 | | bx lr |
| 2453 | | | 2453 | | |
| 2454 | |//----------------------------------------------------------------------- | 2454 | |//----------------------------------------------------------------------- |
| 2455 | |//-- Number conversion functions ---------------------------------------- | ||
| 2456 | |//----------------------------------------------------------------------- | ||
| 2457 | | | ||
| 2458 | |// int64_t lj_vm_num2int_check(double x) | ||
| 2459 | |->vm_num2int_check: | ||
| 2460 | |.if FPU | ||
| 2461 | |.if not HFABI | ||
| 2462 | | vmov d0, CARG1, CARG2 | ||
| 2463 | |.endif | ||
| 2464 | | vcvt.s32.f64 s4, d0 | ||
| 2465 | | vcvt.f64.s32 d1, s4 | ||
| 2466 | | vcmp.f64 d0, d1 | ||
| 2467 | | vmrs | ||
| 2468 | | bne >1 | ||
| 2469 | | vmov CRET1, s4 | ||
| 2470 | | mov CRET2, #0 | ||
| 2471 | | bx lr | ||
| 2472 | | | ||
| 2473 | |.else | ||
| 2474 | | | ||
| 2475 | | asr CARG4, CARG2, #31 // sign = 0 or -1. | ||
| 2476 | | lsl CARG2, CARG2, #1 | ||
| 2477 | | orrs RB, CARG2, CARG1 | ||
| 2478 | | bxeq lr // Return 0 for +-0. | ||
| 2479 | | mov RB, #1024 | ||
| 2480 | | add RB, RB, #30 | ||
| 2481 | | sub RB, RB, CARG2, lsr #21 | ||
| 2482 | | cmp RB, #32 | ||
| 2483 | | bhs >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. | ||
| 2484 | | lsr CARG3, CARG1, #21 | ||
| 2485 | | orr CARG2, CARG3, CARG2, lsl #10 // Left-aligned mantissa. | ||
| 2486 | | rsb CARG3, RB, #32 | ||
| 2487 | | lsl CARG3, CARG2, CARG3 | ||
| 2488 | | orr CARG2, CARG2, #0x80000000 // Merge leading 1. | ||
| 2489 | | orrs CARG3, CARG3, CARG1, lsl #11 | ||
| 2490 | | lsr CARG1, CARG2, RB // lo = right-aligned absolute value. | ||
| 2491 | | bne >1 // Fail if fractional part != 0. | ||
| 2492 | | adds CRET1, CARG1, CARG4 | ||
| 2493 | | bmi >1 // Fail if lo+sign >= 0x80000000. | ||
| 2494 | | eor CRET1, CRET1, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign. | ||
| 2495 | | mov CRET2, #0 | ||
| 2496 | | bx lr | ||
| 2497 | |.endif | ||
| 2498 | |1: | ||
| 2499 | | mov CRET1, #0x80000000 | ||
| 2500 | | mov CRET2, #0x80000000 | ||
| 2501 | | bx lr | ||
| 2502 | | | ||
| 2503 | |// int64_t lj_vm_num2i64(double x) | ||
| 2504 | |->vm_num2i64: | ||
| 2505 | |// fallthrough, same as lj_vm_num2u64. | ||
| 2506 | | | ||
| 2507 | |// uint64_t lj_vm_num2u64(double x) | ||
| 2508 | |->vm_num2u64: | ||
| 2509 | |.if HFABI | ||
| 2510 | | vmov CARG1, CARG2, d0 | ||
| 2511 | |.endif | ||
| 2512 | | lsl RB, CARG2, #1 | ||
| 2513 | | lsr RB, RB, #21 | ||
| 2514 | | sub RB, RB, #1020 | ||
| 2515 | | sub RB, RB, #3 | ||
| 2516 | | cmp RB, #116 | ||
| 2517 | | bhs >3 // Exponent out of range. | ||
| 2518 | | asr CARG4, CARG2, #31 // sign = 0 or -1. | ||
| 2519 | | lsl CARG2, CARG2, #12 | ||
| 2520 | | lsr CARG2, CARG2, #12 | ||
| 2521 | | rsbs RB, RB, #52 | ||
| 2522 | | orr CARG2, CARG2, #0x00100000 | ||
| 2523 | | bmi >2 // Shift mantissa left or right? | ||
| 2524 | | lsr CARG1, CARG1, RB // 64 bit right shift. | ||
| 2525 | | lsr CARG3, CARG2, RB | ||
| 2526 | | rsb RB, RB, #32 | ||
| 2527 | | orr CARG1, CARG1, CARG2, lsl RB | ||
| 2528 | | rsb RB, RB, #0 | ||
| 2529 | | orr CARG1, CARG1, CARG2, lsr RB | ||
| 2530 | | adds CRET1, CARG1, CARG4 // m = sign?-m:m = (m+sign)^sign. | ||
| 2531 | | adc CRET2, CARG3, CARG4 | ||
| 2532 | |1: | ||
| 2533 | | eor CRET1, CRET1, CARG4 | ||
| 2534 | | eor CRET2, CRET2, CARG4 | ||
| 2535 | | bx lr | ||
| 2536 | |2: | ||
| 2537 | | rsb RB, RB, #0 | ||
| 2538 | | lsl CARG2, CARG2, RB // 64 bit left shift. | ||
| 2539 | | lsl CARG3, CARG1, RB | ||
| 2540 | | sub RB, RB, #32 | ||
| 2541 | | orr CARG2, CARG2, CARG1, lsl RB | ||
| 2542 | | rsb RB, RB, #0 | ||
| 2543 | | orr CARG2, CARG2, CARG1, lsr RB | ||
| 2544 | | adds CRET1, CARG3, CARG4 | ||
| 2545 | | adc CRET2, CARG2, CARG4 | ||
| 2546 | | b <1 | ||
| 2547 | |3: | ||
| 2548 | | mov CRET1, #0 | ||
| 2549 | | mov CRET2, #0 | ||
| 2550 | | bx lr | ||
| 2551 | | | ||
| 2552 | |// int32_t lj_vm_tobit(double x) | ||
| 2553 | |.if FPU | ||
| 2554 | |->vm_tobit: | ||
| 2555 | | vldr d1, >9 | ||
| 2556 | |.if not HFABI | ||
| 2557 | | vmov d0, CARG1, CARG2 | ||
| 2558 | |.endif | ||
| 2559 | | vadd.f64 d0, d0, d1 | ||
| 2560 | | vmov CARG1, s0 | ||
| 2561 | | bx lr | ||
| 2562 | |9: | ||
| 2563 | | .long 0, 0x43380000 // (double)(2^52 + 2^51). | ||
| 2564 | |.endif | ||
| 2565 | | | ||
| 2566 | |//----------------------------------------------------------------------- | ||
| 2455 | |//-- Miscellaneous functions -------------------------------------------- | 2567 | |//-- Miscellaneous functions -------------------------------------------- |
| 2456 | |//----------------------------------------------------------------------- | 2568 | |//----------------------------------------------------------------------- |
| 2457 | | | 2569 | | |
| @@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4097 | | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] | 4209 | | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] |
| 4098 | | // Subsumes ins_next1 and ins_next2. | 4210 | | // Subsumes ins_next1 and ins_next2. |
| 4099 | | ldr INS, TRACE:CARG1->startins | 4211 | | ldr INS, TRACE:CARG1->startins |
| 4100 | | bfi INS, OP, #0, #8 | 4212 | | bic INS, INS, #0xff |
| 4213 | | orr INS, INS, OP | ||
| 4101 | | str INS, [PC], #4 | 4214 | | str INS, [PC], #4 |
| 4102 | | b <1 | 4215 | | b <1 |
| 4103 | |.endif | 4216 | |.endif |
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index a437b657..eb6d0c2f 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
| @@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2156 | | ret | 2156 | | ret |
| 2157 | | | 2157 | | |
| 2158 | |//----------------------------------------------------------------------- | 2158 | |//----------------------------------------------------------------------- |
| 2159 | |//-- Number conversion functions ---------------------------------------- | ||
| 2160 | |//----------------------------------------------------------------------- | ||
| 2161 | | | ||
| 2162 | |// int64_t lj_vm_num2int_check(double x) | ||
| 2163 | |->vm_num2int_check: | ||
| 2164 | | fcvtzs CRET1w, FARG1 | ||
| 2165 | | scvtf FARG2, CRET1w | ||
| 2166 | | fcmp FARG2, FARG1 | ||
| 2167 | | bne >1 | ||
| 2168 | | ret | ||
| 2169 | |1: | ||
| 2170 | | mov CRET1, #0x8000000080000000 | ||
| 2171 | | ret | ||
| 2172 | | | ||
| 2173 | |// int64_t lj_vm_num2i64(double x) | ||
| 2174 | |->vm_num2i64: | ||
| 2175 | | fcvtzs CRET1, FARG1 | ||
| 2176 | | ret | ||
| 2177 | | | ||
| 2178 | |// uint64_t lj_vm_num2u64(double x) | ||
| 2179 | |->vm_num2u64: | ||
| 2180 | | fcvtzs CRET1, FARG1 | ||
| 2181 | | fcvtzu CARG2, FARG1 | ||
| 2182 | | cmn CRET1, #1 // Set overflow if CRET1 == INT64_MAX. | ||
| 2183 | | csel CRET1, CRET1, CARG2, vc // No overflow ? i64 : u64. | ||
| 2184 | | ret | ||
| 2185 | | | ||
| 2186 | |// int32_t lj_vm_tobit(double x) | ||
| 2187 | |->vm_tobit: | ||
| 2188 | | movz CRET1, #0x4338, lsl #48 // 2^52 + 2^51. | ||
| 2189 | | fmov FARG2, CRET1 | ||
| 2190 | | fadd FARG1, FARG1, FARG2 | ||
| 2191 | | fmov CRET1w, s0 | ||
| 2192 | | ret | ||
| 2193 | | | ||
| 2194 | |//----------------------------------------------------------------------- | ||
| 2159 | |//-- Miscellaneous functions -------------------------------------------- | 2195 | |//-- Miscellaneous functions -------------------------------------------- |
| 2160 | |//----------------------------------------------------------------------- | 2196 | |//----------------------------------------------------------------------- |
| 2161 | | | 2197 | | |
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 02e588ee..8a6b8270 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc | |||
| @@ -85,6 +85,7 @@ | |||
| 85 | | | 85 | | |
| 86 | |.if FPU | 86 | |.if FPU |
| 87 | |.define FARG1, f12 | 87 | |.define FARG1, f12 |
| 88 | |.define FARG1HI, f13 | ||
| 88 | |.define FARG2, f14 | 89 | |.define FARG2, f14 |
| 89 | | | 90 | | |
| 90 | |.define FRET1, f0 | 91 | |.define FRET1, f0 |
| @@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2560 | | mtc1 r0, f4 | 2561 | | mtc1 r0, f4 |
| 2561 | | mtc1 TMP0, f5 | 2562 | | mtc1 TMP0, f5 |
| 2562 | | abs.d FRET2, FARG1 // |x| | 2563 | | abs.d FRET2, FARG1 // |x| |
| 2563 | | mfc1 AT, f13 | 2564 | | mfc1 AT, FARG1HI |
| 2564 | | c.olt.d 0, FRET2, f4 | 2565 | | c.olt.d 0, FRET2, f4 |
| 2565 | | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | 2566 | | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 |
| 2566 | | bc1f 0, >1 // Truncate only if |x| < 2^52. | 2567 | | bc1f 0, >1 // Truncate only if |x| < 2^52. |
| @@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2822 | | sfmin_max max, vm_sfcmpogt | 2823 | | sfmin_max max, vm_sfcmpogt |
| 2823 | | | 2824 | | |
| 2824 | |//----------------------------------------------------------------------- | 2825 | |//----------------------------------------------------------------------- |
| 2826 | |//-- Number conversion functions ---------------------------------------- | ||
| 2827 | |//----------------------------------------------------------------------- | ||
| 2828 | | | ||
| 2829 | |// int64_t lj_vm_num2int_check(double x) | ||
| 2830 | |->vm_num2int_check: | ||
| 2831 | |.if FPU | ||
| 2832 | | trunc.w.d FARG2, FARG1 | ||
| 2833 | | mfc1 SFRETLO, FARG2 | ||
| 2834 | | cvt.d.w FARG2, FARG2 | ||
| 2835 | | c.eq.d FARG1, FARG2 | ||
| 2836 | | bc1f 0, >2 | ||
| 2837 | |. nop | ||
| 2838 | | jr ra | ||
| 2839 | |. move SFRETHI, r0 | ||
| 2840 | | | ||
| 2841 | |.else | ||
| 2842 | | | ||
| 2843 | | sll SFRETLO, SFARG1HI, 1 | ||
| 2844 | | or SFRETHI, SFRETLO, SFARG1LO | ||
| 2845 | | beqz SFRETHI, >1 // Return 0 for +-0. | ||
| 2846 | |. li TMP0, 1054 | ||
| 2847 | | srl AT, SFRETLO, 21 | ||
| 2848 | | subu TMP0, TMP0, AT | ||
| 2849 | | sltiu AT, TMP0, 32 | ||
| 2850 | | beqz AT, >2 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. | ||
| 2851 | |. sll SFRETLO, SFARG1HI, 11 | ||
| 2852 | | srl SFRETHI, SFARG1LO, 21 | ||
| 2853 | | negu TMP1, TMP0 | ||
| 2854 | | or SFRETLO, SFRETLO, SFRETHI // Left-aligned mantissa. | ||
| 2855 | | sllv TMP2, SFRETLO, TMP1 | ||
| 2856 | | lui AT, 0x8000 | ||
| 2857 | | sll SFRETHI, SFARG1LO, 11 | ||
| 2858 | | or SFRETLO, SFRETLO, AT // Merge leading 1. | ||
| 2859 | | or TMP2, TMP2, SFRETHI | ||
| 2860 | | srlv SFRETLO, SFRETLO, TMP0 // lo = right-aligned absolute value. | ||
| 2861 | | bnez TMP2, >2 // Fail if fractional part != 0. | ||
| 2862 | |. sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1. | ||
| 2863 | | addu SFRETLO, SFRETLO, SFARG1HI | ||
| 2864 | | bltz SFRETLO, >2 // Fail if lo+sign >= 0x80000000. | ||
| 2865 | |. xor SFRETLO, SFRETLO, SFARG1HI // lo = sign?-lo:lo = (lo+sign)^sign. | ||
| 2866 | |1: | ||
| 2867 | | jr ra | ||
| 2868 | |. move SFRETHI, r0 | ||
| 2869 | |.endif | ||
| 2870 | |2: // Not an integer, return 0x8000000080000000LL. | ||
| 2871 | | lui SFRETHI, 0x8000 | ||
| 2872 | | jr ra | ||
| 2873 | |. lui SFRETLO, 0x8000 | ||
| 2874 | | | ||
| 2875 | |// int64_t lj_vm_num2i64(double x) | ||
| 2876 | |->vm_num2i64: | ||
| 2877 | |// fallthrough, same as lj_vm_num2u64. | ||
| 2878 | | | ||
| 2879 | |// uint64_t lj_vm_num2u64(double x) | ||
| 2880 | |->vm_num2u64: | ||
| 2881 | |.if FPU | ||
| 2882 | | mfc1 SFARG1HI, FARG1HI | ||
| 2883 | | mfc1 SFARG1LO, FARG1 | ||
| 2884 | |.endif | ||
| 2885 | | srl TMP0, SFARG1HI, 20 | ||
| 2886 | | andi TMP0, TMP0, 0x7ff | ||
| 2887 | | addiu SFRETLO, TMP0, -1023 | ||
| 2888 | | sltiu SFRETLO, SFRETLO, 116 | ||
| 2889 | | beqz SFRETLO, >3 // Exponent out of range. | ||
| 2890 | |. sll SFRETHI, SFARG1HI, 12 | ||
| 2891 | | lui AT, 0x0010 | ||
| 2892 | | srl SFRETHI, SFRETHI, 12 | ||
| 2893 | | addiu TMP0, TMP0, -1075 | ||
| 2894 | | sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1. | ||
| 2895 | | bgez TMP0, >2 // Shift mantissa left or right? | ||
| 2896 | |. or SFRETHI, SFRETHI, AT // Merge leading 1 into masked mantissa. | ||
| 2897 | | subu TMP1, r0, TMP0 | ||
| 2898 | | sll AT, SFRETHI, 1 | ||
| 2899 | | nor TMP0, r0, TMP1 | ||
| 2900 | | srlv SFRETHI, SFRETHI, TMP1 // Shift hi mantissa right for low exp. | ||
| 2901 | | sllv AT, AT, TMP0 // Shifted-out hi mantissa. | ||
| 2902 | | srlv SFRETLO, SFARG1LO, TMP1 // Shift lo mantissa right for low exp. | ||
| 2903 | | andi TMP1, TMP1, 0x20 // Conditional right shift by 32. | ||
| 2904 | | or AT, AT, SFRETLO // Merge into lo mantissa. | ||
| 2905 | | movn AT, SFRETHI, TMP1 | ||
| 2906 | | movn SFRETHI, r0, TMP1 | ||
| 2907 | |1: | ||
| 2908 | | addu SFRETLO, AT, SFARG1HI // m = sign?-m:m = (m+sign)^sign. | ||
| 2909 | | addu SFRETHI, SFRETHI, SFARG1HI | ||
| 2910 | | sltu TMP0, SFRETLO, AT // Carry | ||
| 2911 | | addu SFRETHI, SFRETHI, TMP0 | ||
| 2912 | | xor SFRETLO, SFRETLO, SFARG1HI | ||
| 2913 | | jr ra | ||
| 2914 | |. xor SFRETHI, SFRETHI, SFARG1HI | ||
| 2915 | |2: | ||
| 2916 | | srl TMP2, SFARG1LO, 1 | ||
| 2917 | | nor AT, r0, TMP0 | ||
| 2918 | | sllv SFRETHI, SFRETHI, TMP0 // Shift hi mantissa left for high exp. | ||
| 2919 | | srlv TMP2, TMP2, AT // Shifted-out lo mantissa. | ||
| 2920 | | sllv AT, SFARG1LO, TMP0 // Shift lo mantissa left for high exp. | ||
| 2921 | | andi TMP0, TMP0, 0x20 // Conditional left shift by 32. | ||
| 2922 | | or SFRETHI, SFRETHI, TMP2 // Merge into hi mantissa. | ||
| 2923 | | movn SFRETHI, AT, TMP0 | ||
| 2924 | | b <1 | ||
| 2925 | |. movn AT, r0, TMP0 | ||
| 2926 | |3: | ||
| 2927 | | jr ra | ||
| 2928 | |. li SFRETHI, 0 | ||
| 2929 | | | ||
| 2930 | |// int32_t lj_vm_tobit(double x) | ||
| 2931 | |.if FPU | ||
| 2932 | |->vm_tobit: | ||
| 2933 | | lui AT, 0x59c0 // 2^52 + 2^51 (float). | ||
| 2934 | | mtc1 AT, FARG2 | ||
| 2935 | | cvt.d.s FARG2, FARG2 | ||
| 2936 | | add.d FARG1, FARG1, FARG2 | ||
| 2937 | | jr ra | ||
| 2938 | |. mfc1 CRET1, FARG1 | ||
| 2939 | |.endif | ||
| 2940 | | | ||
| 2941 | |//----------------------------------------------------------------------- | ||
| 2825 | |//-- Miscellaneous functions -------------------------------------------- | 2942 | |//-- Miscellaneous functions -------------------------------------------- |
| 2826 | |//----------------------------------------------------------------------- | 2943 | |//----------------------------------------------------------------------- |
| 2827 | | | 2944 | | |
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 859c0aee..4dc40d8a 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc | |||
| @@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2113 | | dinsu CRET2, AT, 21, 21 | 2113 | | dinsu CRET2, AT, 21, 21 |
| 2114 | | slt AT, CARG1, r0 | 2114 | | slt AT, CARG1, r0 |
| 2115 | | dsrlv CRET1, CRET2, TMP0 | 2115 | | dsrlv CRET1, CRET2, TMP0 |
| 2116 | | dsubu CARG1, r0, CRET1 | 2116 | | negu CARG1, CRET1 |
| 2117 | |.if MIPSR6 | 2117 | |.if MIPSR6 |
| 2118 | | seleqz CRET1, CRET1, AT | 2118 | | seleqz CRET1, CRET1, AT |
| 2119 | | selnez CARG1, CARG1, AT | 2119 | | selnez CARG1, CARG1, AT |
| @@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2121 | |.else | 2121 | |.else |
| 2122 | | movn CRET1, CARG1, AT | 2122 | | movn CRET1, CARG1, AT |
| 2123 | |.endif | 2123 | |.endif |
| 2124 | | li CARG1, 64 | 2124 | | negu TMP0, TMP0 |
| 2125 | | subu TMP0, CARG1, TMP0 | ||
| 2126 | | dsllv CRET2, CRET2, TMP0 // Integer check. | 2125 | | dsllv CRET2, CRET2, TMP0 // Integer check. |
| 2127 | | sextw AT, CRET1 | 2126 | | sextw AT, CRET1 |
| 2128 | | xor AT, CRET1, AT // Range check. | 2127 | | xor AT, CRET1, AT // Range check. |
| 2129 | |.if MIPSR6 | ||
| 2130 | | seleqz AT, AT, CRET2 | ||
| 2131 | | selnez CRET2, CRET2, CRET2 | ||
| 2132 | | jr ra | 2128 | | jr ra |
| 2133 | |. or CRET2, AT, CRET2 | 2129 | |. or CRET2, AT, CRET2 |
| 2134 | |.else | ||
| 2135 | | jr ra | ||
| 2136 | |. movz CRET2, AT, CRET2 | ||
| 2137 | |.endif | ||
| 2138 | |1: | 2130 | |1: |
| 2139 | | jr ra | 2131 | | jr ra |
| 2140 | |. li CRET2, 1 | 2132 | |. li CRET2, 1 |
| @@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2929 | | sfmin_max max, vm_sfcmpogt | 2921 | | sfmin_max max, vm_sfcmpogt |
| 2930 | | | 2922 | | |
| 2931 | |//----------------------------------------------------------------------- | 2923 | |//----------------------------------------------------------------------- |
| 2924 | |//-- Number conversion functions ---------------------------------------- | ||
| 2925 | |//----------------------------------------------------------------------- | ||
| 2926 | | | ||
| 2927 | |// int64_t lj_vm_num2int_check(double x) | ||
| 2928 | |->vm_num2int_check: | ||
| 2929 | |.if FPU | ||
| 2930 | | trunc.w.d FARG2, FARG1 | ||
| 2931 | | mfc1 CRET1, FARG2 | ||
| 2932 | | cvt.d.w FARG2, FARG2 | ||
| 2933 | |.if MIPSR6 | ||
| 2934 | | cmp.eq.d FARG2, FARG1, FARG2 | ||
| 2935 | | bc1eqz FARG2, >2 | ||
| 2936 | |.else | ||
| 2937 | | c.eq.d FARG1, FARG2 | ||
| 2938 | | bc1f 0, >2 | ||
| 2939 | |.endif | ||
| 2940 | |. nop | ||
| 2941 | | jr ra | ||
| 2942 | |. zextw CRET1, CRET1 | ||
| 2943 | | | ||
| 2944 | |.else | ||
| 2945 | | | ||
| 2946 | | dsll CRET2, CARG1, 1 | ||
| 2947 | | beqz CRET2, >1 | ||
| 2948 | |. li TMP0, 1076 | ||
| 2949 | | dsrl AT, CRET2, 53 | ||
| 2950 | | dsubu TMP0, TMP0, AT | ||
| 2951 | | sltiu AT, TMP0, 54 | ||
| 2952 | | beqz AT, >2 | ||
| 2953 | |. dextm CRET2, CRET2, 0, 20 | ||
| 2954 | | dinsu CRET2, AT, 21, 21 | ||
| 2955 | | slt AT, CARG1, r0 | ||
| 2956 | | dsrlv CRET1, CRET2, TMP0 | ||
| 2957 | | negu CARG1, CRET1 | ||
| 2958 | |.if MIPSR6 | ||
| 2959 | | seleqz CRET1, CRET1, AT | ||
| 2960 | | selnez CARG1, CARG1, AT | ||
| 2961 | | or CRET1, CRET1, CARG1 | ||
| 2962 | |.else | ||
| 2963 | | movn CRET1, CARG1, AT | ||
| 2964 | |.endif | ||
| 2965 | | negu TMP0, TMP0 | ||
| 2966 | | dsllv CRET2, CRET2, TMP0 // Integer check. | ||
| 2967 | | sextw AT, CRET1 | ||
| 2968 | | xor AT, CRET1, AT // Range check. | ||
| 2969 | | or AT, AT, CRET2 | ||
| 2970 | | bnez AT, >2 | ||
| 2971 | |. nop | ||
| 2972 | | jr ra | ||
| 2973 | |. zextw CRET1, CRET1 | ||
| 2974 | |1: | ||
| 2975 | | jr ra | ||
| 2976 | |. move CRET1, r0 | ||
| 2977 | |.endif | ||
| 2978 | |2: | ||
| 2979 | | lui CRET1, 0x8000 | ||
| 2980 | | dsll CRET1, CRET1, 16 | ||
| 2981 | | ori CRET1, CRET1, 0x8000 | ||
| 2982 | | jr ra | ||
| 2983 | |. dsll CRET1, CRET1, 16 | ||
| 2984 | | | ||
| 2985 | |// int64_t lj_vm_num2i64(double x) | ||
| 2986 | |->vm_num2i64: | ||
| 2987 | |.if FPU | ||
| 2988 | | trunc.l.d FARG1, FARG1 | ||
| 2989 | | jr ra | ||
| 2990 | |. dmfc1 CRET1, FARG1 | ||
| 2991 | |.else | ||
| 2992 | |// fallthrough, same as lj_vm_num2u64 for soft-float. | ||
| 2993 | |.endif | ||
| 2994 | | | ||
| 2995 | |// uint64_t lj_vm_num2u64(double x) | ||
| 2996 | |->vm_num2u64: | ||
| 2997 | |.if FPU | ||
| 2998 | | trunc.l.d FARG2, FARG1 | ||
| 2999 | | dmfc1 CRET1, FARG2 | ||
| 3000 | | li AT, -1 | ||
| 3001 | | dsrl AT, AT, 1 | ||
| 3002 | | beq CRET1, AT, >1 | ||
| 3003 | |. lui AT, 0xdf80 // -2^64 (float). | ||
| 3004 | | jr ra | ||
| 3005 | |. nop | ||
| 3006 | |1: | ||
| 3007 | | mtc1 AT, FARG2 | ||
| 3008 | | cvt.d.s FARG2, FARG2 | ||
| 3009 | | add.d FARG1, FARG1, FARG2 | ||
| 3010 | | trunc.l.d FARG2, FARG1 | ||
| 3011 | | jr ra | ||
| 3012 | |. dmfc1 CRET1, FARG2 | ||
| 3013 | | | ||
| 3014 | |.else | ||
| 3015 | | | ||
| 3016 | | dextu CARG2, CARG1, 20, 10 | ||
| 3017 | | addiu AT, CARG2, -1023 | ||
| 3018 | | sltiu AT, AT, 116 | ||
| 3019 | | beqz AT, >2 // Exponent out of range. | ||
| 3020 | |. addiu CARG2, CARG2, -1075 | ||
| 3021 | | dextm CRET1, CARG1, 0, 19 | ||
| 3022 | | dsll AT, AT, 52 | ||
| 3023 | | dsra CARG1, CARG1, 63 // sign = 0 or -1. | ||
| 3024 | | bgez CARG2, >1 // Shift mantissa left or right? | ||
| 3025 | |. or CRET1, CRET1, AT // Merge leading 1 into masked mantissa. | ||
| 3026 | | subu CARG2, r0, CARG2 | ||
| 3027 | | dsrlv CRET1, CRET1, CARG2 // Shift mantissa right for low exp. | ||
| 3028 | | daddu CRET1, CRET1, CARG1 | ||
| 3029 | | jr ra | ||
| 3030 | |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign. | ||
| 3031 | |1: | ||
| 3032 | | dsllv CRET1, CRET1, CARG2 // Shift mantissa left for high exp. | ||
| 3033 | | daddu CRET1, CRET1, CARG1 | ||
| 3034 | | jr ra | ||
| 3035 | |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign. | ||
| 3036 | |2: | ||
| 3037 | | jr ra | ||
| 3038 | |. move CRET1, r0 | ||
| 3039 | |.endif | ||
| 3040 | | | ||
| 3041 | |// int32_t lj_vm_tobit(double x) | ||
| 3042 | |.if FPU | ||
| 3043 | |->vm_tobit: | ||
| 3044 | | lui AT, 0x59c0 // 2^52 + 2^51 (float). | ||
| 3045 | | mtc1 AT, FARG2 | ||
| 3046 | | cvt.d.s FARG2, FARG2 | ||
| 3047 | | add.d FARG1, FARG1, FARG2 | ||
| 3048 | | mfc1 CRET1, FARG1 | ||
| 3049 | | jr ra | ||
| 3050 | |. sextw CRET1, CRET1 | ||
| 3051 | |.endif | ||
| 3052 | | | ||
| 3053 | |//----------------------------------------------------------------------- | ||
| 2932 | |//-- Miscellaneous functions -------------------------------------------- | 3054 | |//-- Miscellaneous functions -------------------------------------------- |
| 2933 | |//----------------------------------------------------------------------- | 3055 | |//----------------------------------------------------------------------- |
| 2934 | | | 3056 | | |
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 2ddeefbf..1761e39b 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc | |||
| @@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3160 | | blr | 3160 | | blr |
| 3161 | | | 3161 | | |
| 3162 | |//----------------------------------------------------------------------- | 3162 | |//----------------------------------------------------------------------- |
| 3163 | |//-- Number conversion functions ---------------------------------------- | ||
| 3164 | |//----------------------------------------------------------------------- | ||
| 3165 | | | ||
| 3166 | |// int64_t lj_vm_num2int_check(double x) | ||
| 3167 | |->vm_num2int_check: | ||
| 3168 | |.if FPU | ||
| 3169 | | subi sp, sp, 16 | ||
| 3170 | | stfd FARG1, 0(sp) | ||
| 3171 | | lwz CARG1, 0(sp) | ||
| 3172 | | lwz CARG2, 4(sp) | ||
| 3173 | |.endif | ||
| 3174 | | slwi TMP1, CARG1, 1 | ||
| 3175 | |.if PPE | ||
| 3176 | | or TMP1, TMP1, CARG2 | ||
| 3177 | | cmpwi TMP1, 0 | ||
| 3178 | |.else | ||
| 3179 | | or. TMP1, TMP1, CARG2 | ||
| 3180 | |.endif | ||
| 3181 | | beq >2 // Return 0 for +-0. | ||
| 3182 | | rlwinm RB, CARG1, 12, 21, 31 | ||
| 3183 | | subfic RB, RB, 1054 | ||
| 3184 | | cmplwi RB, 32 | ||
| 3185 | | bge >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. | ||
| 3186 | | slwi CARG3, CARG1, 11 | ||
| 3187 | | rlwimi CARG3, CARG2, 11, 21, 31 // Left-aligned mantissa. | ||
| 3188 | | subfic TMP1, RB, 32 | ||
| 3189 | | slw TMP1, CARG3, TMP1 | ||
| 3190 | | slwi TMP2, CARG2, 11 | ||
| 3191 | |.if PPE | ||
| 3192 | | or. TMP1, TMP1, TMP2 | ||
| 3193 | |.else | ||
| 3194 | | or TMP1, TMP1, TMP2 | ||
| 3195 | | cmpwi TMP1, 0 | ||
| 3196 | |.endif | ||
| 3197 | | bne >1 // Fail if fractional part != 0. | ||
| 3198 | | oris CARG3, CARG3, 0x8000 // Merge leading 1. | ||
| 3199 | | srw CRET2, CARG3, RB // lo = right-aligned absolute value. | ||
| 3200 | | srawi CARG4, CARG1, 31 // sign = 0 or -1. | ||
| 3201 | |.if GPR64 | ||
| 3202 | | add CRET2, CRET2, CARG4 | ||
| 3203 | | cmpwi CRET2, 0 | ||
| 3204 | |.else | ||
| 3205 | | add. CRET2, CRET2, CARG4 | ||
| 3206 | |.endif | ||
| 3207 | | blt >1 // Fail if fractional part != 0. | ||
| 3208 | | xor CRET2, CRET2, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign. | ||
| 3209 | |2: | ||
| 3210 | |.if GPR64 | ||
| 3211 | | rldicl CRET1, CRET1, 0, 32 | ||
| 3212 | |.else | ||
| 3213 | | li CRET1, 0 | ||
| 3214 | |.endif | ||
| 3215 | |.if FPU | ||
| 3216 | | addi sp, sp, 16 | ||
| 3217 | |.endif | ||
| 3218 | | blr | ||
| 3219 | |1: | ||
| 3220 | |.if GPR64 | ||
| 3221 | | lus CRET1, 0x8000 | ||
| 3222 | | rldicr CRET1, CRET1, 32, 32 | ||
| 3223 | |.else | ||
| 3224 | | lus CRET1, 0x8000 | ||
| 3225 | | lus CRET2, 0x8000 | ||
| 3226 | |.endif | ||
| 3227 | |.if FPU | ||
| 3228 | | addi sp, sp, 16 | ||
| 3229 | |.endif | ||
| 3230 | | blr | ||
| 3231 | | | ||
| 3232 | |// int64_t lj_vm_num2i64(double x) | ||
| 3233 | |->vm_num2i64: | ||
| 3234 | |// fallthrough, same as lj_vm_num2u64. | ||
| 3235 | | | ||
| 3236 | |// uint64_t lj_vm_num2u64(double x) | ||
| 3237 | |->vm_num2u64: | ||
| 3238 | |.if FPU | ||
| 3239 | | subi sp, sp, 16 | ||
| 3240 | | stfd FARG1, 0(sp) | ||
| 3241 | | lwz CARG1, 0(sp) | ||
| 3242 | | lwz CARG2, 4(sp) | ||
| 3243 | |.endif | ||
| 3244 | | rlwinm RB, CARG1, 12, 21, 31 | ||
| 3245 | | addi RB, RB, -1023 | ||
| 3246 | | cmplwi RB, 116 | ||
| 3247 | | bge >3 // Exponent out of range. | ||
| 3248 | | srawi CARG4, CARG1, 31 // sign = 0 or -1. | ||
| 3249 | | clrlwi CARG1, CARG1, 12 | ||
| 3250 | | subfic RB, RB, 52 | ||
| 3251 | | oris CARG1, CARG1, 0x0010 | ||
| 3252 | | cmpwi RB, 0 | ||
| 3253 | | blt >2 // Shift mantissa left or right? | ||
| 3254 | | subfic TMP1, RB, 32 // 64 bit right shift. | ||
| 3255 | | srw CARG2, CARG2, RB | ||
| 3256 | | slw TMP2, CARG1, TMP1 | ||
| 3257 | | addi TMP1, RB, -32 | ||
| 3258 | | or CARG2, CARG2, TMP2 | ||
| 3259 | | srw TMP2, CARG1, TMP1 | ||
| 3260 | | or CARG2, CARG2, TMP2 | ||
| 3261 | | srw CARG1, CARG1, RB | ||
| 3262 | |1: | ||
| 3263 | | addc CARG2, CARG2, CARG4 | ||
| 3264 | | adde CARG1, CARG1, CARG4 | ||
| 3265 | | xor CRET2, CARG2, CARG4 | ||
| 3266 | | xor CRET1, CARG1, CARG4 | ||
| 3267 | |.if GPR64 | ||
| 3268 | | rldimi CRET2, CRET1, 0, 32 | ||
| 3269 | | mr CRET1, CRET2 | ||
| 3270 | |.endif | ||
| 3271 | | addi sp, sp, 16 | ||
| 3272 | | blr | ||
| 3273 | |2: | ||
| 3274 | | subfic TMP1, RB, 0 // 64 bit left shift. | ||
| 3275 | | addi RB, RB, -32 | ||
| 3276 | | slw CARG1, CARG1, TMP1 | ||
| 3277 | | srw TMP2, CARG2, RB | ||
| 3278 | | addi RB, TMP1, -32 | ||
| 3279 | | or CARG1, CARG1, TMP2 | ||
| 3280 | | slw TMP2, CARG2, RB | ||
| 3281 | | or CARG1, CARG1, TMP2 | ||
| 3282 | | slw CARG2, CARG2, TMP1 | ||
| 3283 | | b <1 | ||
| 3284 | |3: | ||
| 3285 | | li CRET1, 0 | ||
| 3286 | |.if not GPR64 | ||
| 3287 | | li CRET2, 0 | ||
| 3288 | |.endif | ||
| 3289 | |.if FPU | ||
| 3290 | | addi sp, sp, 16 | ||
| 3291 | |.endif | ||
| 3292 | | blr | ||
| 3293 | | | ||
| 3294 | |// int32_t lj_vm_tobit(double x) | ||
| 3295 | |.if FPU | ||
| 3296 | |->vm_tobit: | ||
| 3297 | | lus TMP0, 0x59c0 // 2^52 + 2^51 (float). | ||
| 3298 | | subi sp, sp, 16 | ||
| 3299 | | stw TMP0, 0(sp) | ||
| 3300 | | lfs FARG2, 0(sp) | ||
| 3301 | | fadd FARG1, FARG1, FARG2 | ||
| 3302 | | stfd FARG1, 0(sp) | ||
| 3303 | | lwz CRET1, 4(sp) | ||
| 3304 | | addi sp, sp, 16 | ||
| 3305 | | blr | ||
| 3306 | |.endif | ||
| 3307 | | | ||
| 3308 | |//----------------------------------------------------------------------- | ||
| 3163 | |//-- Miscellaneous functions -------------------------------------------- | 3309 | |//-- Miscellaneous functions -------------------------------------------- |
| 3164 | |//----------------------------------------------------------------------- | 3310 | |//----------------------------------------------------------------------- |
| 3165 | | | 3311 | | |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 4cfb7b6a..970e8e43 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc | |||
| @@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2625 | | ret | 2625 | | ret |
| 2626 | | | 2626 | | |
| 2627 | |//----------------------------------------------------------------------- | 2627 | |//----------------------------------------------------------------------- |
| 2628 | |//-- Number conversion functions ---------------------------------------- | ||
| 2629 | |//----------------------------------------------------------------------- | ||
| 2630 | | | ||
| 2631 | |// int64_t lj_vm_num2int_check(double x) | ||
| 2632 | |->vm_num2int_check: | ||
| 2633 | | cvttsd2si eax, xmm0 | ||
| 2634 | | xorps xmm1, xmm1 | ||
| 2635 | | cvtsi2sd xmm1, eax | ||
| 2636 | | ucomisd xmm1, xmm0 | ||
| 2637 | | jp >1 | ||
| 2638 | | jne >1 | ||
| 2639 | | ret | ||
| 2640 | |1: | ||
| 2641 | | mov64 rax, U64x(80000000,80000000) | ||
| 2642 | | ret | ||
| 2643 | | | ||
| 2644 | |// int64_t lj_vm_num2i64(double x) | ||
| 2645 | |->vm_num2i64: | ||
| 2646 | | cvttsd2si rax, xmm0 | ||
| 2647 | | ret | ||
| 2648 | | | ||
| 2649 | |// uint64_t lj_vm_num2u64(double x) | ||
| 2650 | |->vm_num2u64: | ||
| 2651 | | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range. | ||
| 2652 | | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow. | ||
| 2653 | | jo >1 | ||
| 2654 | | ret | ||
| 2655 | |1: | ||
| 2656 | | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double). | ||
| 2657 | | movd xmm1, rdx | ||
| 2658 | | addsd xmm0, xmm1 | ||
| 2659 | | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range. | ||
| 2660 | | // Note that -0x1p63 converts to -0x8000000000000000LL either way. | ||
| 2661 | | ret | ||
| 2662 | | | ||
| 2663 | |// int32_t lj_vm_tobit(double x) | ||
| 2664 | |->vm_tobit: | ||
| 2665 | | sseconst_tobit xmm1, RC | ||
| 2666 | | addsd xmm0, xmm1 | ||
| 2667 | | movd eax, xmm0 | ||
| 2668 | | ret | ||
| 2669 | | | ||
| 2670 | |//----------------------------------------------------------------------- | ||
| 2628 | |//-- Miscellaneous functions -------------------------------------------- | 2671 | |//-- Miscellaneous functions -------------------------------------------- |
| 2629 | |//----------------------------------------------------------------------- | 2672 | |//----------------------------------------------------------------------- |
| 2630 | | | 2673 | | |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 77c4069d..485ed809 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc | |||
| @@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 3059 | | ret | 3059 | | ret |
| 3060 | | | 3060 | | |
| 3061 | |//----------------------------------------------------------------------- | 3061 | |//----------------------------------------------------------------------- |
| 3062 | |//-- Number conversion functions ---------------------------------------- | ||
| 3063 | |//----------------------------------------------------------------------- | ||
| 3064 | | | ||
| 3065 | |// int64_t lj_vm_num2int_check(double x) | ||
| 3066 | |->vm_num2int_check: | ||
| 3067 | |.if not X64 | ||
| 3068 | | movsd xmm0, qword [esp+4] | ||
| 3069 | |.endif | ||
| 3070 | | cvttsd2si eax, xmm0 | ||
| 3071 | | xorps xmm1, xmm1 | ||
| 3072 | | cvtsi2sd xmm1, eax | ||
| 3073 | | ucomisd xmm1, xmm0 | ||
| 3074 | | jp >1 | ||
| 3075 | | jne >1 | ||
| 3076 | |.if not X64 | ||
| 3077 | | xor edx, edx | ||
| 3078 | |.endif | ||
| 3079 | | ret | ||
| 3080 | |1: | ||
| 3081 | |.if X64 | ||
| 3082 | | mov64 rax, U64x(80000000,80000000) | ||
| 3083 | |.else | ||
| 3084 | | mov eax, 0x80000000 | ||
| 3085 | | mov edx, eax | ||
| 3086 | |.endif | ||
| 3087 | | ret | ||
| 3088 | | | ||
| 3089 | |// int64_t lj_vm_num2i64(double x) | ||
| 3090 | |->vm_num2i64: | ||
| 3091 | |.if X64 | ||
| 3092 | | cvttsd2si rax, xmm0 | ||
| 3093 | | ret | ||
| 3094 | |.else | ||
| 3095 | | sub esp, 12 | ||
| 3096 | | fld qword [esp+16] | ||
| 3097 | | fisttp qword [esp] | ||
| 3098 | | mov eax, dword [esp] | ||
| 3099 | | mov edx, dword [esp+4] | ||
| 3100 | | add esp, 12 | ||
| 3101 | | ret | ||
| 3102 | |.endif | ||
| 3103 | | | ||
| 3104 | |// uint64_t lj_vm_num2u64(double x) | ||
| 3105 | |->vm_num2u64: | ||
| 3106 | |.if X64 | ||
| 3107 | | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range. | ||
| 3108 | | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow. | ||
| 3109 | | jo >1 | ||
| 3110 | | ret | ||
| 3111 | |1: | ||
| 3112 | | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double). | ||
| 3113 | | movd xmm1, rdx | ||
| 3114 | | addsd xmm0, xmm1 | ||
| 3115 | | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range. | ||
| 3116 | | // Note that -0x1p63 converts to -0x8000000000000000LL either way. | ||
| 3117 | | ret | ||
| 3118 | |.else | ||
| 3119 | | sub esp, 12 | ||
| 3120 | | fld qword [esp+16] | ||
| 3121 | | fld st0 | ||
| 3122 | | fisttp qword [esp] | ||
| 3123 | | mov edx, dword [esp+4] | ||
| 3124 | | mov eax, dword [esp] | ||
| 3125 | | cmp edx, 1 | ||
| 3126 | | jo >2 | ||
| 3127 | |1: | ||
| 3128 | | fpop | ||
| 3129 | | add esp, 12 | ||
| 3130 | | ret | ||
| 3131 | |2: | ||
| 3132 | | cmp eax, 0 | ||
| 3133 | | jne <1 | ||
| 3134 | | mov dword [esp+8], 0xdf800000 // -0x1p64 (float). | ||
| 3135 | | fadd dword [esp+8] | ||
| 3136 | | fisttp qword [esp] | ||
| 3137 | | mov eax, dword [esp] | ||
| 3138 | | mov edx, dword [esp+4] | ||
| 3139 | | add esp, 12 | ||
| 3140 | | ret | ||
| 3141 | |.endif | ||
| 3142 | | | ||
| 3143 | |// int32_t lj_vm_tobit(double x) | ||
| 3144 | |->vm_tobit: | ||
| 3145 | |.if not X64 | ||
| 3146 | | movsd xmm0, qword [esp+4] | ||
| 3147 | |.endif | ||
| 3148 | | sseconst_tobit xmm1, RCa | ||
| 3149 | | addsd xmm0, xmm1 | ||
| 3150 | | movd eax, xmm0 | ||
| 3151 | | ret | ||
| 3152 | | | ||
| 3153 | |//----------------------------------------------------------------------- | ||
| 3062 | |//-- Miscellaneous functions -------------------------------------------- | 3154 | |//-- Miscellaneous functions -------------------------------------------- |
| 3063 | |//----------------------------------------------------------------------- | 3155 | |//----------------------------------------------------------------------- |
| 3064 | | | 3156 | | |
