diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile | 2 | ||||
| -rw-r--r-- | src/Makefile.dep | 15 | ||||
| -rw-r--r-- | src/lj_asm.c | 444 | ||||
| -rw-r--r-- | src/lj_carith.c | 8 | ||||
| -rw-r--r-- | src/lj_carith.h | 3 | ||||
| -rw-r--r-- | src/lj_crecord.c | 20 | ||||
| -rw-r--r-- | src/lj_ir.h | 21 | ||||
| -rw-r--r-- | src/lj_iropt.h | 6 | ||||
| -rw-r--r-- | src/lj_jit.h | 12 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 25 | ||||
| -rw-r--r-- | src/lj_opt_split.c | 343 | ||||
| -rw-r--r-- | src/lj_target_x86.h | 9 | ||||
| -rw-r--r-- | src/lj_trace.c | 2 | ||||
| -rw-r--r-- | src/ljamalg.c | 1 |
14 files changed, 795 insertions, 116 deletions
diff --git a/src/Makefile b/src/Makefile index a2be1a18..0150b049 100644 --- a/src/Makefile +++ b/src/Makefile | |||
| @@ -331,7 +331,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ | |||
| 331 | lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \ | 331 | lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \ |
| 332 | lj_lex.o lj_parse.o \ | 332 | lj_lex.o lj_parse.o \ |
| 333 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | 333 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ |
| 334 | lj_opt_dce.o lj_opt_loop.o \ | 334 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \ |
| 335 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ | 335 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ |
| 336 | lj_asm.o lj_trace.o lj_gdbjit.o \ | 336 | lj_asm.o lj_trace.o lj_gdbjit.o \ |
| 337 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_carith.o lj_clib.o \ | 337 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_carith.o lj_clib.o \ |
diff --git a/src/Makefile.dep b/src/Makefile.dep index 3d0c4239..1534ac27 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
| @@ -128,6 +128,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 128 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 128 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
| 129 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 129 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
| 130 | lj_dispatch.h lj_traceerr.h | 130 | lj_dispatch.h lj_traceerr.h |
| 131 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
| 132 | lj_arch.h | ||
| 131 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 133 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 132 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ | 134 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ |
| 133 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | 135 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h |
| @@ -167,10 +169,11 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | |||
| 167 | lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_carith.c lj_carith.h \ | 169 | lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_carith.c lj_carith.h \ |
| 168 | lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c \ | 170 | lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c \ |
| 169 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ | 171 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ |
| 170 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_mcode.c lj_mcode.h lj_snap.c \ | 172 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c \ |
| 171 | lj_target.h lj_target_*.h lj_record.c lj_record.h lj_ffrecord.h \ | 173 | lj_mcode.h lj_snap.c lj_target.h lj_target_*.h lj_record.c lj_record.h \ |
| 172 | lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ | 174 | lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h \ |
| 173 | lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \ | 175 | lj_asm.c lj_asm.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ |
| 174 | lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ | 176 | lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ |
| 175 | lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c | 177 | lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ |
| 178 | lib_ffi.c lib_init.c | ||
| 176 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h | 179 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h |
diff --git a/src/lj_asm.c b/src/lj_asm.c index cc2ae597..441700d4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -347,6 +347,20 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
| 347 | } | 347 | } |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | /* op rm/mrm, i */ | ||
| 351 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | ||
| 352 | { | ||
| 353 | x86Op xo; | ||
| 354 | if (checki8(i)) { | ||
| 355 | emit_i8(as, i); | ||
| 356 | xo = XG_TOXOi8(xg); | ||
| 357 | } else { | ||
| 358 | emit_i32(as, i); | ||
| 359 | xo = XG_TOXOi(xg); | ||
| 360 | } | ||
| 361 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); | ||
| 362 | } | ||
| 363 | |||
| 350 | /* -- Emit moves ---------------------------------------------------------- */ | 364 | /* -- Emit moves ---------------------------------------------------------- */ |
| 351 | 365 | ||
| 352 | /* mov [base+ofs], i */ | 366 | /* mov [base+ofs], i */ |
| @@ -371,7 +385,10 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
| 371 | /* mov r, i / xor r, r */ | 385 | /* mov r, i / xor r, r */ |
| 372 | static void emit_loadi(ASMState *as, Reg r, int32_t i) | 386 | static void emit_loadi(ASMState *as, Reg r, int32_t i) |
| 373 | { | 387 | { |
| 374 | if (i == 0) { | 388 | /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ |
| 389 | if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || | ||
| 390 | (as->curins+1 < as->T->nins && | ||
| 391 | IR(as->curins+1)->o == IR_HIOP)))) { | ||
| 375 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); | 392 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); |
| 376 | } else { | 393 | } else { |
| 377 | MCode *p = as->mcp; | 394 | MCode *p = as->mcp; |
| @@ -422,6 +439,19 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
| 422 | /* Label for short jumps. */ | 439 | /* Label for short jumps. */ |
| 423 | typedef MCode *MCLabel; | 440 | typedef MCode *MCLabel; |
| 424 | 441 | ||
| 442 | #if LJ_32 && LJ_HASFFI | ||
| 443 | /* jmp short target */ | ||
| 444 | static void emit_sjmp(ASMState *as, MCLabel target) | ||
| 445 | { | ||
| 446 | MCode *p = as->mcp; | ||
| 447 | ptrdiff_t delta = target - p; | ||
| 448 | lua_assert(delta == (int8_t)delta); | ||
| 449 | p[-1] = (MCode)(int8_t)delta; | ||
| 450 | p[-2] = XI_JMPs; | ||
| 451 | as->mcp = p - 2; | ||
| 452 | } | ||
| 453 | #endif | ||
| 454 | |||
| 425 | /* jcc short target */ | 455 | /* jcc short target */ |
| 426 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) | 456 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) |
| 427 | { | 457 | { |
| @@ -630,7 +660,7 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
| 630 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | 660 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ |
| 631 | lua_assert(irt_isnil(ir->t)); | 661 | lua_assert(irt_isnil(ir->t)); |
| 632 | emit_getgl(as, r, jit_L); | 662 | emit_getgl(as, r, jit_L); |
| 633 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 663 | #if LJ_64 |
| 634 | } else if (ir->o == IR_KINT64) { | 664 | } else if (ir->o == IR_KINT64) { |
| 635 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 665 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
| 636 | #endif | 666 | #endif |
| @@ -681,8 +711,7 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
| 681 | #if LJ_64 | 711 | #if LJ_64 |
| 682 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 712 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
| 683 | #else | 713 | #else |
| 684 | /* NYI: 32 bit register pairs. */ | 714 | #define REX_64IR(ir, r) (r) |
| 685 | #define REX_64IR(ir, r) check_exp(!irt_is64((ir)->t), (r)) | ||
| 686 | #endif | 715 | #endif |
| 687 | 716 | ||
| 688 | /* Generic move between two regs. */ | 717 | /* Generic move between two regs. */ |
| @@ -939,7 +968,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
| 939 | emit_loadn(as, dest, tv); | 968 | emit_loadn(as, dest, tv); |
| 940 | return; | 969 | return; |
| 941 | } | 970 | } |
| 942 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 971 | #if LJ_64 |
| 943 | } else if (ir->o == IR_KINT64) { | 972 | } else if (ir->o == IR_KINT64) { |
| 944 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 973 | emit_loadu64(as, dest, ir_kint64(ir)->u64); |
| 945 | return; | 974 | return; |
| @@ -1463,7 +1492,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 1463 | #endif | 1492 | #endif |
| 1464 | if (r) { /* Argument is in a register. */ | 1493 | if (r) { /* Argument is in a register. */ |
| 1465 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 1494 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
| 1466 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 1495 | #if LJ_64 |
| 1467 | if (ir->o == IR_KINT64) | 1496 | if (ir->o == IR_KINT64) |
| 1468 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 1497 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
| 1469 | else | 1498 | else |
| @@ -1519,7 +1548,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
| 1519 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 1548 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
| 1520 | if (ra_used(ir)) { | 1549 | if (ra_used(ir)) { |
| 1521 | if (irt_isfp(ir->t)) { | 1550 | if (irt_isfp(ir->t)) { |
| 1522 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1551 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
| 1523 | #if LJ_64 | 1552 | #if LJ_64 |
| 1524 | if ((ci->flags & CCI_CASTU64)) { | 1553 | if ((ci->flags & CCI_CASTU64)) { |
| 1525 | Reg dest = ir->r; | 1554 | Reg dest = ir->r; |
| @@ -1632,19 +1661,24 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1632 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 1661 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
| 1633 | IRRef lref = ir->op1; | 1662 | IRRef lref = ir->op1; |
| 1634 | lua_assert(irt_type(ir->t) != st); | 1663 | lua_assert(irt_type(ir->t) != st); |
| 1664 | lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ | ||
| 1635 | if (irt_isfp(ir->t)) { | 1665 | if (irt_isfp(ir->t)) { |
| 1636 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1666 | Reg dest = ra_dest(as, ir, RSET_FPR); |
| 1637 | if (stfp) { /* FP to FP conversion. */ | 1667 | if (stfp) { /* FP to FP conversion. */ |
| 1638 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1668 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
| 1639 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); | 1669 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); |
| 1640 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 1670 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
| 1641 | #if LJ_32 | 1671 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
| 1642 | } else if (st >= IRT_U32) { | 1672 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
| 1643 | /* NYI: 64 bit integer or uint32_t to number conversion. */ | 1673 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); |
| 1644 | setintV(&as->J->errinfo, ir->o); | 1674 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
| 1645 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 1675 | if (irt_isfloat(ir->t)) |
| 1676 | emit_rr(as, XO_CVTSD2SS, dest, dest); | ||
| 1677 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ | ||
| 1678 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ | ||
| 1679 | emit_loadn(as, bias, k); | ||
| 1680 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); | ||
| 1646 | return; | 1681 | return; |
| 1647 | #endif | ||
| 1648 | } else { /* Integer to FP conversion. */ | 1682 | } else { /* Integer to FP conversion. */ |
| 1649 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? | 1683 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? |
| 1650 | ra_alloc1(as, lref, RSET_GPR) : | 1684 | ra_alloc1(as, lref, RSET_GPR) : |
| @@ -1663,41 +1697,47 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1663 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | 1697 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
| 1664 | } else if (stfp) { /* FP to integer conversion. */ | 1698 | } else if (stfp) { /* FP to integer conversion. */ |
| 1665 | if (irt_isguard(ir->t)) { | 1699 | if (irt_isguard(ir->t)) { |
| 1666 | lua_assert(!irt_is64(ir->t)); /* No support for checked 64 bit conv. */ | 1700 | /* Checked conversions are only supported from number to int. */ |
| 1701 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
| 1667 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 1702 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
| 1668 | #if LJ_32 | ||
| 1669 | } else if (irt_isi64(ir->t) || irt_isu64(ir->t) || irt_isu32(ir->t)) { | ||
| 1670 | /* NYI: number to 64 bit integer or uint32_t conversion. */ | ||
| 1671 | setintV(&as->J->errinfo, ir->o); | ||
| 1672 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
| 1673 | #endif | ||
| 1674 | } else { | 1703 | } else { |
| 1675 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1704 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 1676 | x86Op op = st == IRT_NUM ? | 1705 | x86Op op = st == IRT_NUM ? |
| 1677 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | 1706 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : |
| 1678 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | 1707 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); |
| 1679 | if (LJ_64 && irt_isu64(ir->t)) { | 1708 | if (LJ_32 && irt_isu32(ir->t)) { /* FP to U32 conversion on x86. */ |
| 1680 | const void *k = lj_ir_k64_find(as->J, U64x(c3f00000,00000000)); | 1709 | /* u32 = (int32_t)(number - 2^31) + 2^31 */ |
| 1681 | MCLabel l_end = emit_label(as); | 1710 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
| 1682 | Reg left = IR(lref)->r; | 1711 | ra_scratch(as, RSET_FPR); |
| 1712 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | ||
| 1713 | emit_rr(as, op, dest, tmp); | ||
| 1714 | if (st == IRT_NUM) | ||
| 1715 | emit_rma(as, XO_ADDSD, tmp, | ||
| 1716 | lj_ir_k64_find(as->J, U64x(c1e00000,00000000))); | ||
| 1717 | else | ||
| 1718 | emit_rma(as, XO_ADDSS, tmp, | ||
| 1719 | lj_ir_k64_find(as->J, U64x(00000000,cf000000))); | ||
| 1720 | ra_left(as, tmp, lref); | ||
| 1721 | } else if (LJ_64 && irt_isu64(ir->t)) { | ||
| 1683 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | 1722 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ |
| 1684 | if (ra_hasreg(left)) { | 1723 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
| 1685 | Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1724 | ra_scratch(as, RSET_FPR); |
| 1686 | emit_rr(as, op, dest|REX_64, tmpn); | 1725 | MCLabel l_end = emit_label(as); |
| 1687 | emit_rr(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, tmpn, left); | 1726 | emit_rr(as, op, dest|REX_64, tmp); |
| 1688 | emit_rma(as, st == IRT_NUM ? XMM_MOVRM(as) : XO_MOVSS, tmpn, k); | 1727 | if (st == IRT_NUM) |
| 1689 | } else { | 1728 | emit_rma(as, XO_ADDSD, tmp, |
| 1690 | left = ra_allocref(as, lref, RSET_FPR); | 1729 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); |
| 1691 | emit_rr(as, op, dest|REX_64, left); | 1730 | else |
| 1692 | emit_rma(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, left, k); | 1731 | emit_rma(as, XO_ADDSS, tmp, |
| 1693 | } | 1732 | lj_ir_k64_find(as->J, U64x(00000000,df800000))); |
| 1694 | emit_sjcc(as, CC_NS, l_end); | 1733 | emit_sjcc(as, CC_NS, l_end); |
| 1695 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ | 1734 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ |
| 1696 | emit_rr(as, op, dest|REX_64, left); | 1735 | emit_rr(as, op, dest|REX_64, tmp); |
| 1736 | ra_left(as, tmp, lref); | ||
| 1697 | } else { | 1737 | } else { |
| 1698 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1738 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
| 1699 | if (LJ_64 && irt_isu32(ir->t)) | 1739 | if (LJ_64 && irt_isu32(ir->t)) |
| 1700 | emit_rr(as, XO_MOV, dest, dest); /* Zero upper 32 bits. */ | 1740 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
| 1701 | emit_mrm(as, op, | 1741 | emit_mrm(as, op, |
| 1702 | dest|((LJ_64 && | 1742 | dest|((LJ_64 && |
| 1703 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 1743 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
| @@ -1728,12 +1768,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1728 | emit_mrm(as, op, dest, left); | 1768 | emit_mrm(as, op, dest, left); |
| 1729 | } | 1769 | } |
| 1730 | } else { /* 32/64 bit integer conversions. */ | 1770 | } else { /* 32/64 bit integer conversions. */ |
| 1731 | if (irt_is64(ir->t)) { | 1771 | if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */ |
| 1732 | #if LJ_32 | 1772 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 1733 | /* NYI: conversion to 64 bit integers. */ | 1773 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
| 1734 | setintV(&as->J->errinfo, ir->o); | 1774 | } else if (irt_is64(ir->t)) { |
| 1735 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
| 1736 | #else | ||
| 1737 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1775 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 1738 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | 1776 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { |
| 1739 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | 1777 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ |
| @@ -1742,21 +1780,14 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1742 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1780 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
| 1743 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | 1781 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); |
| 1744 | } | 1782 | } |
| 1745 | #endif | ||
| 1746 | } else { | 1783 | } else { |
| 1747 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1784 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 1748 | if (st64) { | 1785 | if (st64) { |
| 1749 | #if LJ_32 | ||
| 1750 | /* NYI: conversion from 64 bit integers. */ | ||
| 1751 | setintV(&as->J->errinfo, ir->o); | ||
| 1752 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
| 1753 | #else | ||
| 1754 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1786 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
| 1755 | /* This is either a 32 bit reg/reg mov which zeroes the hi-32 bits | 1787 | /* This is either a 32 bit reg/reg mov which zeroes the hiword |
| 1756 | ** or a load of the lower 32 bits from a 64 bit address. | 1788 | ** or a load of the loword from a 64 bit address. |
| 1757 | */ | 1789 | */ |
| 1758 | emit_mrm(as, XO_MOV, dest, left); | 1790 | emit_mrm(as, XO_MOV, dest, left); |
| 1759 | #endif | ||
| 1760 | } else { /* 32/32 bit no-op (cast). */ | 1791 | } else { /* 32/32 bit no-op (cast). */ |
| 1761 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ | 1792 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
| 1762 | } | 1793 | } |
| @@ -1764,6 +1795,93 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1764 | } | 1795 | } |
| 1765 | } | 1796 | } |
| 1766 | 1797 | ||
| 1798 | #if LJ_32 && LJ_HASFFI | ||
| 1799 | /* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */ | ||
| 1800 | |||
| 1801 | /* 64 bit integer to FP conversion in 32 bit mode. */ | ||
| 1802 | static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | ||
| 1803 | { | ||
| 1804 | Reg hi = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1805 | Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi)); | ||
| 1806 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
| 1807 | Reg dest = ir->r; | ||
| 1808 | if (ra_hasreg(dest)) { | ||
| 1809 | ra_free(as, dest); | ||
| 1810 | ra_modified(as, dest); | ||
| 1811 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | ||
| 1812 | dest, RID_ESP, ofs); | ||
| 1813 | } | ||
| 1814 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | ||
| 1815 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | ||
| 1816 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | ||
| 1817 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | ||
| 1818 | MCLabel l_end = emit_label(as); | ||
| 1819 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
| 1820 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
| 1821 | emit_sjcc(as, CC_NS, l_end); | ||
| 1822 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | ||
| 1823 | } else { | ||
| 1824 | lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); | ||
| 1825 | } | ||
| 1826 | emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); | ||
| 1827 | /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ | ||
| 1828 | emit_rmro(as, XO_MOVto, hi, RID_ESP, 4); | ||
| 1829 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 0); | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | /* FP to 64 bit integer conversion in 32 bit mode. */ | ||
| 1833 | static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | ||
| 1834 | { | ||
| 1835 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
| 1836 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
| 1837 | Reg lo, hi; | ||
| 1838 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | ||
| 1839 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | ||
| 1840 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
| 1841 | hi = ra_dest(as, ir, RSET_GPR); | ||
| 1842 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | ||
| 1843 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | ||
| 1844 | /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */ | ||
| 1845 | if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */ | ||
| 1846 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4); | ||
| 1847 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 4); | ||
| 1848 | emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); | ||
| 1849 | } | ||
| 1850 | if (dt == IRT_U64) { | ||
| 1851 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | ||
| 1852 | MCLabel l_pop, l_end = emit_label(as); | ||
| 1853 | emit_x87op(as, XI_FPOP); | ||
| 1854 | l_pop = emit_label(as); | ||
| 1855 | emit_sjmp(as, l_end); | ||
| 1856 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
| 1857 | if ((as->flags & JIT_F_SSE3)) | ||
| 1858 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
| 1859 | else | ||
| 1860 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
| 1861 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
| 1862 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
| 1863 | emit_sjcc(as, CC_NS, l_pop); | ||
| 1864 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | ||
| 1865 | } | ||
| 1866 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
| 1867 | if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ | ||
| 1868 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
| 1869 | } else { /* Otherwise set FPU rounding mode to truncate before the store. */ | ||
| 1870 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
| 1871 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0); | ||
| 1872 | emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0); | ||
| 1873 | emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0); | ||
| 1874 | emit_loadi(as, lo, 0xc00); | ||
| 1875 | emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0); | ||
| 1876 | } | ||
| 1877 | if (dt == IRT_U64) | ||
| 1878 | emit_x87op(as, XI_FDUP); | ||
| 1879 | emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd, | ||
| 1880 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | ||
| 1881 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | ||
| 1882 | } | ||
| 1883 | #endif | ||
| 1884 | |||
| 1767 | static void asm_strto(ASMState *as, IRIns *ir) | 1885 | static void asm_strto(ASMState *as, IRIns *ir) |
| 1768 | { | 1886 | { |
| 1769 | /* Force a spill slot for the destination register (if any). */ | 1887 | /* Force a spill slot for the destination register (if any). */ |
| @@ -2644,6 +2762,18 @@ static void asm_powi(ASMState *as, IRIns *ir) | |||
| 2644 | ra_left(as, RID_EAX, ir->op2); | 2762 | ra_left(as, RID_EAX, ir->op2); |
| 2645 | } | 2763 | } |
| 2646 | 2764 | ||
| 2765 | #if LJ_64 && LJ_HASFFI | ||
| 2766 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
| 2767 | { | ||
| 2768 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
| 2769 | IRRef args[2]; | ||
| 2770 | args[0] = ir->op1; | ||
| 2771 | args[1] = ir->op2; | ||
| 2772 | asm_setupresult(as, ir, ci); | ||
| 2773 | asm_gencall(as, ci, args); | ||
| 2774 | } | ||
| 2775 | #endif | ||
| 2776 | |||
| 2647 | /* Find out whether swapping operands might be beneficial. */ | 2777 | /* Find out whether swapping operands might be beneficial. */ |
| 2648 | static int swapops(ASMState *as, IRIns *ir) | 2778 | static int swapops(ASMState *as, IRIns *ir) |
| 2649 | { | 2779 | { |
| @@ -2877,12 +3007,30 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
| 2877 | /* -- Comparisons --------------------------------------------------------- */ | 3007 | /* -- Comparisons --------------------------------------------------------- */ |
| 2878 | 3008 | ||
| 2879 | /* Virtual flags for unordered FP comparisons. */ | 3009 | /* Virtual flags for unordered FP comparisons. */ |
| 2880 | #define VCC_U 0x100 /* Unordered. */ | 3010 | #define VCC_U 0x1000 /* Unordered. */ |
| 2881 | #define VCC_P 0x200 /* Needs extra CC_P branch. */ | 3011 | #define VCC_P 0x2000 /* Needs extra CC_P branch. */ |
| 2882 | #define VCC_S 0x400 /* Swap avoids CC_P branch. */ | 3012 | #define VCC_S 0x4000 /* Swap avoids CC_P branch. */ |
| 2883 | #define VCC_PS (VCC_P|VCC_S) | 3013 | #define VCC_PS (VCC_P|VCC_S) |
| 2884 | 3014 | ||
| 2885 | static void asm_comp_(ASMState *as, IRIns *ir, int cc) | 3015 | /* Map of comparisons to flags. ORDER IR. */ |
| 3016 | #define COMPFLAGS(ci, cin, cu, cf) ((ci)+((cu)<<4)+((cin)<<8)+(cf)) | ||
| 3017 | static const uint16_t asm_compmap[IR_ABC+1] = { | ||
| 3018 | /* signed non-eq unsigned flags */ | ||
| 3019 | /* LT */ COMPFLAGS(CC_GE, CC_G, CC_AE, VCC_PS), | ||
| 3020 | /* GE */ COMPFLAGS(CC_L, CC_L, CC_B, 0), | ||
| 3021 | /* LE */ COMPFLAGS(CC_G, CC_G, CC_A, VCC_PS), | ||
| 3022 | /* GT */ COMPFLAGS(CC_LE, CC_L, CC_BE, 0), | ||
| 3023 | /* ULT */ COMPFLAGS(CC_AE, CC_A, CC_AE, VCC_U), | ||
| 3024 | /* UGE */ COMPFLAGS(CC_B, CC_B, CC_B, VCC_U|VCC_PS), | ||
| 3025 | /* ULE */ COMPFLAGS(CC_A, CC_A, CC_A, VCC_U), | ||
| 3026 | /* UGT */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS), | ||
| 3027 | /* EQ */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P), | ||
| 3028 | /* NE */ COMPFLAGS(CC_E, CC_E, CC_E, VCC_U|VCC_P), | ||
| 3029 | /* ABC */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS) /* Same as UGT. */ | ||
| 3030 | }; | ||
| 3031 | |||
| 3032 | /* FP and integer comparisons. */ | ||
| 3033 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | ||
| 2886 | { | 3034 | { |
| 2887 | if (irt_isnum(ir->t)) { | 3035 | if (irt_isnum(ir->t)) { |
| 2888 | IRRef lref = ir->op1; | 3036 | IRRef lref = ir->op1; |
| @@ -3008,15 +3156,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 3008 | if (irl+1 == ir) /* Referencing previous ins? */ | 3156 | if (irl+1 == ir) /* Referencing previous ins? */ |
| 3009 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 3157 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
| 3010 | } else { | 3158 | } else { |
| 3011 | x86Op xo; | 3159 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); |
| 3012 | if (checki8(imm)) { | ||
| 3013 | emit_i8(as, imm); | ||
| 3014 | xo = XO_ARITHi8; | ||
| 3015 | } else { | ||
| 3016 | emit_i32(as, imm); | ||
| 3017 | xo = XO_ARITHi; | ||
| 3018 | } | ||
| 3019 | emit_mrm(as, xo, r64 + XOg_CMP, left); | ||
| 3020 | } | 3160 | } |
| 3021 | } | 3161 | } |
| 3022 | } else { | 3162 | } else { |
| @@ -3028,8 +3168,133 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 3028 | } | 3168 | } |
| 3029 | } | 3169 | } |
| 3030 | 3170 | ||
| 3031 | #define asm_comp(as, ir, ci, cf, cu) \ | 3171 | #if LJ_32 && LJ_HASFFI |
| 3032 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) | 3172 | /* 64 bit integer comparisons in 32 bit mode. */ |
| 3173 | static void asm_comp_int64(ASMState *as, IRIns *ir) | ||
| 3174 | { | ||
| 3175 | uint32_t cc = asm_compmap[(ir-1)->o]; | ||
| 3176 | RegSet allow = RSET_GPR; | ||
| 3177 | Reg lefthi = RID_NONE, leftlo = RID_NONE; | ||
| 3178 | Reg righthi = RID_NONE, rightlo = RID_NONE; | ||
| 3179 | MCLabel l_around; | ||
| 3180 | x86ModRM mrm; | ||
| 3181 | |||
| 3182 | as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */ | ||
| 3183 | |||
| 3184 | /* Allocate/fuse hiword operands. */ | ||
| 3185 | if (irref_isk(ir->op2)) { | ||
| 3186 | lefthi = asm_fuseload(as, ir->op1, allow); | ||
| 3187 | } else { | ||
| 3188 | lefthi = ra_alloc1(as, ir->op1, allow); | ||
| 3189 | righthi = asm_fuseload(as, ir->op2, allow); | ||
| 3190 | if (righthi == RID_MRM) { | ||
| 3191 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
| 3192 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
| 3193 | } else { | ||
| 3194 | rset_clear(allow, righthi); | ||
| 3195 | } | ||
| 3196 | } | ||
| 3197 | mrm = as->mrm; /* Save state for hiword instruction. */ | ||
| 3198 | |||
| 3199 | /* Allocate/fuse loword operands. */ | ||
| 3200 | if (irref_isk((ir-1)->op2)) { | ||
| 3201 | leftlo = asm_fuseload(as, (ir-1)->op1, allow); | ||
| 3202 | } else { | ||
| 3203 | leftlo = ra_alloc1(as, (ir-1)->op1, allow); | ||
| 3204 | rightlo = asm_fuseload(as, (ir-1)->op2, allow); | ||
| 3205 | if (rightlo == RID_MRM) { | ||
| 3206 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
| 3207 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
| 3208 | } else { | ||
| 3209 | rset_clear(allow, rightlo); | ||
| 3210 | } | ||
| 3211 | } | ||
| 3212 | |||
| 3213 | /* All register allocations must be performed _before_ this point. */ | ||
| 3214 | l_around = emit_label(as); | ||
| 3215 | as->invmcp = as->testmcp = NULL; /* Cannot use these optimizations. */ | ||
| 3216 | |||
| 3217 | /* Loword comparison and branch. */ | ||
| 3218 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ | ||
| 3219 | if (ra_noreg(rightlo)) { | ||
| 3220 | int32_t imm = IR((ir-1)->op2)->i; | ||
| 3221 | if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM) | ||
| 3222 | emit_rr(as, XO_TEST, leftlo, leftlo); | ||
| 3223 | else | ||
| 3224 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm); | ||
| 3225 | } else { | ||
| 3226 | emit_mrm(as, XO_CMP, leftlo, rightlo); | ||
| 3227 | } | ||
| 3228 | |||
| 3229 | /* Hiword comparison and branches. */ | ||
| 3230 | if ((cc & 15) != CC_NE) | ||
| 3231 | emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */ | ||
| 3232 | if ((cc & 15) != CC_E) | ||
| 3233 | asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */ | ||
| 3234 | as->mrm = mrm; /* Restore state. */ | ||
| 3235 | if (ra_noreg(righthi)) { | ||
| 3236 | int32_t imm = IR(ir->op2)->i; | ||
| 3237 | if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM) | ||
| 3238 | emit_rr(as, XO_TEST, lefthi, lefthi); | ||
| 3239 | else | ||
| 3240 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm); | ||
| 3241 | } else { | ||
| 3242 | emit_mrm(as, XO_CMP, lefthi, righthi); | ||
| 3243 | } | ||
| 3244 | } | ||
| 3245 | #endif | ||
| 3246 | |||
| 3247 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
| 3248 | |||
| 3249 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
| 3250 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
| 3251 | { | ||
| 3252 | #if LJ_32 && LJ_HASFFI | ||
| 3253 | /* HIOP is marked as a store because it needs its own DCE logic. */ | ||
| 3254 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | ||
| 3255 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | ||
| 3256 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | ||
| 3257 | if (usehi || uselo) { | ||
| 3258 | if (irt_isfp(ir->t)) | ||
| 3259 | asm_conv_fp_int64(as, ir); | ||
| 3260 | else | ||
| 3261 | asm_conv_int64_fp(as, ir); | ||
| 3262 | } | ||
| 3263 | as->curins--; /* Always skip the CONV. */ | ||
| 3264 | return; | ||
| 3265 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | ||
| 3266 | asm_comp_int64(as, ir); | ||
| 3267 | return; | ||
| 3268 | } | ||
| 3269 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | ||
| 3270 | switch ((ir-1)->o) { | ||
| 3271 | case IR_ADD: | ||
| 3272 | asm_intarith(as, ir, uselo ? XOg_ADC : XOg_ADD); | ||
| 3273 | break; | ||
| 3274 | case IR_SUB: | ||
| 3275 | asm_intarith(as, ir, uselo ? XOg_SBB : XOg_SUB); | ||
| 3276 | break; | ||
| 3277 | case IR_NEG: { | ||
| 3278 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 3279 | emit_rr(as, XO_GROUP3, XOg_NEG, dest); | ||
| 3280 | if (uselo) { | ||
| 3281 | emit_i8(as, 0); | ||
| 3282 | emit_rr(as, XO_ARITHi8, XOg_ADC, dest); | ||
| 3283 | } | ||
| 3284 | ra_left(as, dest, ir->op1); | ||
| 3285 | break; | ||
| 3286 | } | ||
| 3287 | case IR_CALLN: | ||
| 3288 | ra_destreg(as, ir, RID_RETHI); | ||
| 3289 | if (!uselo) | ||
| 3290 | ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ | ||
| 3291 | break; | ||
| 3292 | default: lua_assert(0); break; | ||
| 3293 | } | ||
| 3294 | #else | ||
| 3295 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ | ||
| 3296 | #endif | ||
| 3297 | } | ||
| 3033 | 3298 | ||
| 3034 | /* -- Stack handling ------------------------------------------------------ */ | 3299 | /* -- Stack handling ------------------------------------------------------ */ |
| 3035 | 3300 | ||
| @@ -3682,21 +3947,16 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 3682 | switch ((IROp)ir->o) { | 3947 | switch ((IROp)ir->o) { |
| 3683 | /* Miscellaneous ops. */ | 3948 | /* Miscellaneous ops. */ |
| 3684 | case IR_LOOP: asm_loop(as); break; | 3949 | case IR_LOOP: asm_loop(as); break; |
| 3685 | case IR_NOP: break; | 3950 | case IR_NOP: lua_assert(!ra_used(ir)); break; |
| 3686 | case IR_PHI: asm_phi(as, ir); break; | 3951 | case IR_PHI: asm_phi(as, ir); break; |
| 3952 | case IR_HIOP: asm_hiop(as, ir); break; | ||
| 3687 | 3953 | ||
| 3688 | /* Guarded assertions. */ | 3954 | /* Guarded assertions. */ |
| 3689 | case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break; | 3955 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: |
| 3690 | case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break; | 3956 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: |
| 3691 | case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break; | 3957 | case IR_EQ: case IR_NE: case IR_ABC: |
| 3692 | case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break; | 3958 | asm_comp(as, ir, asm_compmap[ir->o]); |
| 3693 | case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break; | 3959 | break; |
| 3694 | case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break; | ||
| 3695 | case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break; | ||
| 3696 | case IR_ABC: | ||
| 3697 | case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break; | ||
| 3698 | case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break; | ||
| 3699 | case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break; | ||
| 3700 | 3960 | ||
| 3701 | case IR_RETF: asm_retf(as, ir); break; | 3961 | case IR_RETF: asm_retf(as, ir); break; |
| 3702 | 3962 | ||
| @@ -3744,7 +4004,15 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 3744 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | 4004 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: |
| 3745 | asm_fpmath(as, ir); | 4005 | asm_fpmath(as, ir); |
| 3746 | break; | 4006 | break; |
| 3747 | case IR_POWI: asm_powi(as, ir); break; | 4007 | case IR_POWI: |
| 4008 | #if LJ_64 && LJ_HASFFI | ||
| 4009 | if (!irt_isnum(ir->t)) | ||
| 4010 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
| 4011 | IRCALL_lj_carith_powu64); | ||
| 4012 | else | ||
| 4013 | #endif | ||
| 4014 | asm_powi(as, ir); | ||
| 4015 | break; | ||
| 3748 | 4016 | ||
| 3749 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | 4017 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ |
| 3750 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | 4018 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; |
| @@ -3801,6 +4069,7 @@ static void asm_trace(ASMState *as) | |||
| 3801 | { | 4069 | { |
| 3802 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 4070 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
| 3803 | IRIns *ir = IR(as->curins); | 4071 | IRIns *ir = IR(as->curins); |
| 4072 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
| 3804 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 4073 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
| 3805 | continue; /* Dead-code elimination can be soooo easy. */ | 4074 | continue; /* Dead-code elimination can be soooo easy. */ |
| 3806 | if (irt_isguard(ir->t)) | 4075 | if (irt_isguard(ir->t)) |
| @@ -3864,11 +4133,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 3864 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 4133 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
| 3865 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 4134 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
| 3866 | #if LJ_64 | 4135 | #if LJ_64 |
| 3867 | /* NYI: add stack slots for x64 calls with many args. */ | ||
| 3868 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); | 4136 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); |
| 3869 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 4137 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
| 3870 | #else | 4138 | #else |
| 3871 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | 4139 | lua_assert(!(ci->flags & CCI_FASTCALL) || CCI_NARGS(ci) <= 2); |
| 3872 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | 4140 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ |
| 3873 | as->evenspill = (int32_t)CCI_NARGS(ci); | 4141 | as->evenspill = (int32_t)CCI_NARGS(ci); |
| 3874 | ir->prev = REGSP_HINT(RID_RET); | 4142 | ir->prev = REGSP_HINT(RID_RET); |
| @@ -3878,6 +4146,12 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 3878 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | 4146 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; |
| 3879 | continue; | 4147 | continue; |
| 3880 | } | 4148 | } |
| 4149 | #if LJ_32 && LJ_HASFFI | ||
| 4150 | case IR_HIOP: | ||
| 4151 | if ((ir-1)->o == IR_CALLN) | ||
| 4152 | ir->prev = REGSP_HINT(RID_RETHI); | ||
| 4153 | break; | ||
| 4154 | #endif | ||
| 3881 | /* C calls evict all scratch regs and return results in RID_RET. */ | 4155 | /* C calls evict all scratch regs and return results in RID_RET. */ |
| 3882 | case IR_SNEW: case IR_NEWREF: | 4156 | case IR_SNEW: case IR_NEWREF: |
| 3883 | #if !LJ_64 | 4157 | #if !LJ_64 |
| @@ -3894,6 +4168,14 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 3894 | as->modset = RSET_SCRATCH; | 4168 | as->modset = RSET_SCRATCH; |
| 3895 | break; | 4169 | break; |
| 3896 | case IR_POWI: | 4170 | case IR_POWI: |
| 4171 | #if LJ_64 && LJ_HASFFI | ||
| 4172 | if (!irt_isnum(ir->t)) { | ||
| 4173 | ir->prev = REGSP_HINT(RID_RET); | ||
| 4174 | if (inloop) | ||
| 4175 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
| 4176 | continue; | ||
| 4177 | } | ||
| 4178 | #endif | ||
| 3897 | ir->prev = REGSP_HINT(RID_XMM0); | 4179 | ir->prev = REGSP_HINT(RID_XMM0); |
| 3898 | if (inloop) | 4180 | if (inloop) |
| 3899 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 4181 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); |
diff --git a/src/lj_carith.c b/src/lj_carith.c index 46f07be7..134a61fb 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c | |||
| @@ -230,6 +230,14 @@ int lj_carith_op(lua_State *L, MMS mm) | |||
| 230 | 230 | ||
| 231 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ | 231 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ |
| 232 | 232 | ||
| 233 | #if LJ_32 | ||
| 234 | /* Signed/unsigned 64 bit multiply. */ | ||
| 235 | int64_t lj_carith_mul64(int64_t a, int64_t b) | ||
| 236 | { | ||
| 237 | return a * b; | ||
| 238 | } | ||
| 239 | #endif | ||
| 240 | |||
| 233 | /* Unsigned 64 bit x^k. */ | 241 | /* Unsigned 64 bit x^k. */ |
| 234 | uint64_t lj_carith_powu64(uint64_t x, uint64_t k) | 242 | uint64_t lj_carith_powu64(uint64_t x, uint64_t k) |
| 235 | { | 243 | { |
diff --git a/src/lj_carith.h b/src/lj_carith.h index 6870172b..14073603 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h | |||
| @@ -12,6 +12,9 @@ | |||
| 12 | 12 | ||
| 13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); | 13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); |
| 14 | 14 | ||
| 15 | #if LJ_32 | ||
| 16 | LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); | ||
| 17 | #endif | ||
| 15 | LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k); | 18 | LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k); |
| 16 | LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k); | 19 | LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k); |
| 17 | 20 | ||
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 61210907..5eafa3a7 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
| @@ -189,6 +189,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
| 189 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); | 189 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); |
| 190 | #endif | 190 | #endif |
| 191 | xstore: | 191 | xstore: |
| 192 | if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); | ||
| 192 | emitir(IRT(IR_XSTORE, dt), dp, sp); | 193 | emitir(IRT(IR_XSTORE, dt), dp, sp); |
| 193 | break; | 194 | break; |
| 194 | case CCX(I, C): | 195 | case CCX(I, C): |
| @@ -311,6 +312,7 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp) | |||
| 311 | TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, | 312 | TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, |
| 312 | lj_ir_kintp(J, sizeof(GCcdata))); | 313 | lj_ir_kintp(J, sizeof(GCcdata))); |
| 313 | emitir(IRT(IR_XSTORE, t), ptr, tr); | 314 | emitir(IRT(IR_XSTORE, t), ptr, tr); |
| 315 | lj_needsplit(J); | ||
| 314 | return dp; | 316 | return dp; |
| 315 | } else if ((sinfo & CTF_BOOL)) { | 317 | } else if ((sinfo & CTF_BOOL)) { |
| 316 | /* Assume not equal to zero. Fixup and emit pending guard later. */ | 318 | /* Assume not equal to zero. Fixup and emit pending guard later. */ |
| @@ -406,7 +408,10 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) | |||
| 406 | if (ctype_isenum(s->info)) s = ctype_child(cts, s); | 408 | if (ctype_isenum(s->info)) s = ctype_child(cts, s); |
| 407 | if (ctype_isnum(s->info)) { /* Load number value. */ | 409 | if (ctype_isnum(s->info)) { /* Load number value. */ |
| 408 | IRType t = crec_ct2irt(s); | 410 | IRType t = crec_ct2irt(s); |
| 409 | if (t != IRT_CDATA) sp = emitir(IRT(IR_XLOAD, t), sp, 0); | 411 | if (t != IRT_CDATA) { |
| 412 | sp = emitir(IRT(IR_XLOAD, t), sp, 0); | ||
| 413 | if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); | ||
| 414 | } | ||
| 410 | } | 415 | } |
| 411 | goto doconv; | 416 | goto doconv; |
| 412 | } | 417 | } |
| @@ -499,8 +504,10 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) | |||
| 499 | if (ctype_isinteger(ctk->info) && (t = crec_ct2irt(ctk)) != IRT_CDATA) { | 504 | if (ctype_isinteger(ctk->info) && (t = crec_ct2irt(ctk)) != IRT_CDATA) { |
| 500 | idx = emitir(IRT(IR_ADD, IRT_PTR), idx, lj_ir_kintp(J, sizeof(GCcdata))); | 505 | idx = emitir(IRT(IR_ADD, IRT_PTR), idx, lj_ir_kintp(J, sizeof(GCcdata))); |
| 501 | idx = emitir(IRT(IR_XLOAD, t), idx, 0); | 506 | idx = emitir(IRT(IR_XLOAD, t), idx, 0); |
| 502 | if (!LJ_64 && (t == IRT_I64 || t == IRT_U64)) | 507 | if (!LJ_64 && (t == IRT_I64 || t == IRT_U64)) { |
| 503 | idx = emitconv(idx, IRT_INT, t, 0); | 508 | idx = emitconv(idx, IRT_INT, t, 0); |
| 509 | lj_needsplit(J); | ||
| 510 | } | ||
| 504 | goto integer_key; | 511 | goto integer_key; |
| 505 | } | 512 | } |
| 506 | } else if (tref_isstr(idx)) { | 513 | } else if (tref_isstr(idx)) { |
| @@ -664,6 +671,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
| 664 | CTypeID id; | 671 | CTypeID id; |
| 665 | TRef tr, dp, ptr; | 672 | TRef tr, dp, ptr; |
| 666 | MSize i; | 673 | MSize i; |
| 674 | lj_needsplit(J); | ||
| 667 | if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || | 675 | if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || |
| 668 | ((s[1]->info & CTF_UNSIGNED) && s[1]->size == 8)) { | 676 | ((s[1]->info & CTF_UNSIGNED) && s[1]->size == 8)) { |
| 669 | dt = IRT_U64; id = CTID_UINT64; | 677 | dt = IRT_U64; id = CTID_UINT64; |
| @@ -691,9 +699,6 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
| 691 | lj_ir_set(J, IRTG(op, dt), sp[0], sp[1]); | 699 | lj_ir_set(J, IRTG(op, dt), sp[0], sp[1]); |
| 692 | J->postproc = LJ_POST_FIXGUARD; | 700 | J->postproc = LJ_POST_FIXGUARD; |
| 693 | return TREF_TRUE; | 701 | return TREF_TRUE; |
| 694 | } else if (mm == MM_pow) { | ||
| 695 | tr = lj_ir_call(J, dt == IRT_I64 ? IRCALL_lj_carith_powi64 : | ||
| 696 | IRCALL_lj_carith_powu64, sp[0], sp[1]); | ||
| 697 | } else { | 702 | } else { |
| 698 | if (mm == MM_div || mm == MM_mod) | 703 | if (mm == MM_div || mm == MM_mod) |
| 699 | return 0; /* NYI: integer div, mod. */ | 704 | return 0; /* NYI: integer div, mod. */ |
| @@ -754,10 +759,11 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
| 754 | tr = emitconv(tr, IRT_INTP, IRT_INT, | 759 | tr = emitconv(tr, IRT_INTP, IRT_INT, |
| 755 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); | 760 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); |
| 756 | #else | 761 | #else |
| 757 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) | 762 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { |
| 758 | tr = emitconv(tr, IRT_INTP, t, | 763 | tr = emitconv(tr, IRT_INTP, t, |
| 759 | (t == IRT_NUM || t == IRT_FLOAT) ? | 764 | (t == IRT_NUM || t == IRT_FLOAT) ? |
| 760 | IRCONV_TRUNC|IRCONV_ANY : 0); | 765 | IRCONV_TRUNC|IRCONV_ANY : 0); |
| 766 | } | ||
| 761 | #endif | 767 | #endif |
| 762 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); | 768 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); |
| 763 | tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); | 769 | tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); |
| @@ -790,6 +796,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) | |||
| 790 | if (ctype_isnum(ct->info)) { | 796 | if (ctype_isnum(ct->info)) { |
| 791 | IRType t = crec_ct2irt(ct); | 797 | IRType t = crec_ct2irt(ct); |
| 792 | if (t == IRT_CDATA) goto err_type; | 798 | if (t == IRT_CDATA) goto err_type; |
| 799 | if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); | ||
| 793 | tr = emitir(IRT(IR_XLOAD, t), tr, 0); | 800 | tr = emitir(IRT(IR_XLOAD, t), tr, 0); |
| 794 | } else if (!(ctype_isptr(ct->info) || ctype_isrefarray(ct->info))) { | 801 | } else if (!(ctype_isptr(ct->info) || ctype_isrefarray(ct->info))) { |
| 795 | goto err_type; | 802 | goto err_type; |
| @@ -842,6 +849,7 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) | |||
| 842 | IRType t = crec_ct2irt(s); | 849 | IRType t = crec_ct2irt(s); |
| 843 | if (t != IRT_CDATA) { | 850 | if (t != IRT_CDATA) { |
| 844 | TRef tr = emitir(IRT(IR_XLOAD, t), sp, 0); /* Load number value. */ | 851 | TRef tr = emitir(IRT(IR_XLOAD, t), sp, 0); /* Load number value. */ |
| 852 | if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); | ||
| 845 | if (t == IRT_FLOAT || t == IRT_U32 || t == IRT_I64 || t == IRT_U64) | 853 | if (t == IRT_FLOAT || t == IRT_U32 || t == IRT_I64 || t == IRT_U64) |
| 846 | tr = emitconv(tr, IRT_NUM, t, 0); | 854 | tr = emitconv(tr, IRT_NUM, t, 0); |
| 847 | J->base[0] = tr; | 855 | J->base[0] = tr; |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 1cb3566e..286eb219 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | /* Miscellaneous ops. */ \ | 33 | /* Miscellaneous ops. */ \ |
| 34 | _(NOP, N , ___, ___) \ | 34 | _(NOP, N , ___, ___) \ |
| 35 | _(BASE, N , lit, lit) \ | 35 | _(BASE, N , lit, lit) \ |
| 36 | _(HIOP, S , ref, ref) \ | ||
| 36 | _(LOOP, S , ___, ___) \ | 37 | _(LOOP, S , ___, ___) \ |
| 37 | _(PHI, S , ref, ref) \ | 38 | _(PHI, S , ref, ref) \ |
| 38 | _(RENAME, S , ref, lit) \ | 39 | _(RENAME, S , ref, lit) \ |
| @@ -212,8 +213,9 @@ IRFLDEF(FLENUM) | |||
| 212 | /* CONV mode, stored in op2. */ | 213 | /* CONV mode, stored in op2. */ |
| 213 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ | 214 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ |
| 214 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ | 215 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ |
| 215 | #define IRCONV_NUM_INT ((IRT_NUM<<5)|IRT_INT) | 216 | #define IRCONV_DSH 5 |
| 216 | #define IRCONV_INT_NUM ((IRT_INT<<5)|IRT_NUM) | 217 | #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) |
| 218 | #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) | ||
| 217 | #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ | 219 | #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ |
| 218 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ | 220 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ |
| 219 | #define IRCONV_MODEMASK 0x0fff | 221 | #define IRCONV_MODEMASK 0x0fff |
| @@ -251,13 +253,21 @@ typedef struct CCallInfo { | |||
| 251 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ | 253 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ |
| 252 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ | 254 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ |
| 253 | #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ | 255 | #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ |
| 254 | #define CCI_STACK64 0x1000 /* Needs 64 bits per argument. */ | ||
| 255 | 256 | ||
| 256 | /* Function definitions for CALL* instructions. */ | 257 | /* Function definitions for CALL* instructions. */ |
| 257 | #if LJ_HASFFI | 258 | #if LJ_HASFFI |
| 259 | #if LJ_32 | ||
| 260 | #define ARG2_64 4 /* Treat as 4 32 bit arguments. */ | ||
| 261 | #define IRCALLDEF_FFI32(_) \ | ||
| 262 | _(lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) | ||
| 263 | #else | ||
| 264 | #define ARG2_64 2 | ||
| 265 | #define IRCALLDEF_FFI32(_) | ||
| 266 | #endif | ||
| 258 | #define IRCALLDEF_FFI(_) \ | 267 | #define IRCALLDEF_FFI(_) \ |
| 259 | _(lj_carith_powi64, 2, N, I64, CCI_STACK64|CCI_NOFPRCLOBBER) \ | 268 | IRCALLDEF_FFI32(_) \ |
| 260 | _(lj_carith_powu64, 2, N, U64, CCI_STACK64|CCI_NOFPRCLOBBER) | 269 | _(lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ |
| 270 | _(lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) | ||
| 261 | #else | 271 | #else |
| 262 | #define IRCALLDEF_FFI(_) | 272 | #define IRCALLDEF_FFI(_) |
| 263 | #endif | 273 | #endif |
| @@ -402,6 +412,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
| 402 | #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) | 412 | #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) |
| 403 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) | 413 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) |
| 404 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 414 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
| 415 | #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) | ||
| 405 | 416 | ||
| 406 | #if LJ_64 | 417 | #if LJ_64 |
| 407 | #define IRT_IS64 \ | 418 | #define IRT_IS64 \ |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 43c414c1..db99c118 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
| @@ -141,6 +141,12 @@ LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); | |||
| 141 | /* Optimization passes. */ | 141 | /* Optimization passes. */ |
| 142 | LJ_FUNC void lj_opt_dce(jit_State *J); | 142 | LJ_FUNC void lj_opt_dce(jit_State *J); |
| 143 | LJ_FUNC int lj_opt_loop(jit_State *J); | 143 | LJ_FUNC int lj_opt_loop(jit_State *J); |
| 144 | #if LJ_HASFFI && LJ_32 | ||
| 145 | LJ_FUNC void lj_opt_split(jit_State *J); | ||
| 146 | #else | ||
| 147 | #define lj_opt_split(J) UNUSED(J) | ||
| 148 | #endif | ||
| 149 | |||
| 144 | #endif | 150 | #endif |
| 145 | 151 | ||
| 146 | #endif | 152 | #endif |
diff --git a/src/lj_jit.h b/src/lj_jit.h index a8be1a97..38970fc7 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -240,6 +240,15 @@ enum { | |||
| 240 | #define LJ_KSIMD(J, n) \ | 240 | #define LJ_KSIMD(J, n) \ |
| 241 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 241 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
| 242 | 242 | ||
| 243 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ | ||
| 244 | #if LJ_32 && LJ_HASFFI | ||
| 245 | #define lj_needsplit(J) (J->needsplit = 1) | ||
| 246 | #define lj_resetsplit(J) (J->needsplit = 0) | ||
| 247 | #else | ||
| 248 | #define lj_needsplit(J) UNUSED(J) | ||
| 249 | #define lj_resetsplit(J) UNUSED(J) | ||
| 250 | #endif | ||
| 251 | |||
| 243 | /* Fold state is used to fold instructions on-the-fly. */ | 252 | /* Fold state is used to fold instructions on-the-fly. */ |
| 244 | typedef struct FoldState { | 253 | typedef struct FoldState { |
| 245 | IRIns ins; /* Currently emitted instruction. */ | 254 | IRIns ins; /* Currently emitted instruction. */ |
| @@ -293,6 +302,9 @@ typedef struct jit_State { | |||
| 293 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | 302 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ |
| 294 | 303 | ||
| 295 | PostProc postproc; /* Required post-processing after execution. */ | 304 | PostProc postproc; /* Required post-processing after execution. */ |
| 305 | #if LJ_32 && LJ_HASFFI | ||
| 306 | int needsplit; /* Need SPLIT pass. */ | ||
| 307 | #endif | ||
| 296 | 308 | ||
| 297 | GCRef *trace; /* Array of traces. */ | 309 | GCRef *trace; /* Array of traces. */ |
| 298 | TraceNo freetrace; /* Start of scan for next free trace. */ | 310 | TraceNo freetrace; /* Start of scan for next free trace. */ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 2d08e187..03caf80d 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -538,6 +538,13 @@ LJFOLDF(kfold_conv_knum_int_num) | |||
| 538 | } | 538 | } |
| 539 | } | 539 | } |
| 540 | 540 | ||
| 541 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | ||
| 542 | LJFOLDF(kfold_conv_knum_u32_num) | ||
| 543 | { | ||
| 544 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
| 545 | return INTFOLD((int32_t)(uint32_t)knumleft); | ||
| 546 | } | ||
| 547 | |||
| 541 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 548 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
| 542 | LJFOLDF(kfold_conv_knum_i64_num) | 549 | LJFOLDF(kfold_conv_knum_i64_num) |
| 543 | { | 550 | { |
| @@ -805,6 +812,7 @@ LJFOLDF(simplify_conv_u32_num) | |||
| 805 | } | 812 | } |
| 806 | 813 | ||
| 807 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ | 814 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ |
| 815 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/ | ||
| 808 | LJFOLDF(simplify_conv_i64_num) | 816 | LJFOLDF(simplify_conv_i64_num) |
| 809 | { | 817 | { |
| 810 | PHIBARRIER(fleft); | 818 | PHIBARRIER(fleft); |
| @@ -826,23 +834,6 @@ LJFOLDF(simplify_conv_i64_num) | |||
| 826 | return NEXTFOLD; | 834 | return NEXTFOLD; |
| 827 | } | 835 | } |
| 828 | 836 | ||
| 829 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/ | ||
| 830 | LJFOLDF(simplify_conv_u64_num) | ||
| 831 | { | ||
| 832 | PHIBARRIER(fleft); | ||
| 833 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
| 834 | #if LJ_TARGET_X64 | ||
| 835 | return fleft->op1; | ||
| 836 | #else | ||
| 837 | /* Reduce to a zero-extension. */ | ||
| 838 | fins->op1 = fleft->op1; | ||
| 839 | fins->op2 = (IRT_U64<<5)|IRT_U32; | ||
| 840 | return RETRYFOLD; | ||
| 841 | #endif | ||
| 842 | } | ||
| 843 | return NEXTFOLD; | ||
| 844 | } | ||
| 845 | |||
| 846 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | 837 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ |
| 847 | LJFOLD(TOBIT CONV KNUM) | 838 | LJFOLD(TOBIT CONV KNUM) |
| 848 | LJFOLDF(simplify_tobit_conv) | 839 | LJFOLDF(simplify_tobit_conv) |
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c new file mode 100644 index 00000000..3cb30514 --- /dev/null +++ b/src/lj_opt_split.c | |||
| @@ -0,0 +1,343 @@ | |||
| 1 | /* | ||
| 2 | ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. | ||
| 3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
| 4 | */ | ||
| 5 | |||
| 6 | #define lj_opt_split_c | ||
| 7 | #define LUA_CORE | ||
| 8 | |||
| 9 | #include "lj_obj.h" | ||
| 10 | |||
| 11 | #if LJ_HASJIT && LJ_HASFFI && LJ_32 | ||
| 12 | |||
| 13 | #include "lj_err.h" | ||
| 14 | #include "lj_str.h" | ||
| 15 | #include "lj_ir.h" | ||
| 16 | #include "lj_jit.h" | ||
| 17 | #include "lj_iropt.h" | ||
| 18 | #include "lj_vm.h" | ||
| 19 | |||
| 20 | /* SPLIT pass: | ||
| 21 | ** | ||
| 22 | ** This pass splits up 64 bit IR instructions into multiple 32 bit IR | ||
| 23 | ** instructions. It's only active for 32 bit CPUs which lack native 64 bit | ||
| 24 | ** operations. The FFI is currently the only emitter for 64 bit | ||
| 25 | ** instructions, so this pass is disabled if the FFI is disabled. | ||
| 26 | ** | ||
| 27 | ** Splitting the IR in a separate pass keeps each 32 bit IR assembler | ||
| 28 | ** backend simple. Only a small amount of extra functionality needs to be | ||
| 29 | ** implemented. This is much easier than adding support for allocating | ||
| 30 | ** register pairs to each backend (believe me, I tried). A few simple, but | ||
| 31 | ** important optimizations can be performed by the SPLIT pass, which would | ||
| 32 | ** be tedious to do in the backend. | ||
| 33 | ** | ||
| 34 | ** The basic idea is to replace each 64 bit IR instruction with its 32 bit | ||
| 35 | ** equivalent plus an extra HIOP instruction. The splitted IR is not passed | ||
| 36 | ** through FOLD or any other optimizations, so each HIOP is guaranteed to | ||
| 37 | ** immediately follow it's counterpart. The actual functionality of HIOP is | ||
| 38 | ** inferred from the previous instruction. | ||
| 39 | ** | ||
| 40 | ** The operands of HIOP hold the hiword input references. The output of HIOP | ||
| 41 | ** is the hiword output reference, which is also used to hold the hiword | ||
| 42 | ** register or spill slot information. The register allocator treats this | ||
| 43 | ** instruction independent of any other instruction, which improves code | ||
| 44 | ** quality compared to using fixed register pairs. | ||
| 45 | ** | ||
| 46 | ** It's easier to split up some instructions into two regular 32 bit | ||
| 47 | ** instructions. E.g. XLOAD is split up into two XLOADs with two different | ||
| 48 | ** addresses. Obviously 64 bit constants need to be split up into two 32 bit | ||
| 49 | ** constants, too. Some hiword instructions can be entirely omitted, e.g. | ||
| 50 | ** when zero-extending a 32 bit value to 64 bits. | ||
| 51 | ** | ||
| 52 | ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with | ||
| 53 | ** two int64_t fields: | ||
| 54 | ** | ||
| 55 | ** 0100 p32 ADD base +8 | ||
| 56 | ** 0101 i64 XLOAD 0100 | ||
| 57 | ** 0102 i64 ADD 0101 +1 | ||
| 58 | ** 0103 p32 ADD base +16 | ||
| 59 | ** 0104 i64 XSTORE 0103 0102 | ||
| 60 | ** | ||
| 61 | ** mov rax, [esi+0x8] | ||
| 62 | ** add rax, +0x01 | ||
| 63 | ** mov [esi+0x10], rax | ||
| 64 | ** | ||
| 65 | ** Here's the transformed IR and the x86 machine code after the SPLIT pass: | ||
| 66 | ** | ||
| 67 | ** 0100 p32 ADD base +8 | ||
| 68 | ** 0101 int XLOAD 0100 | ||
| 69 | ** 0102 p32 ADD base +12 | ||
| 70 | ** 0103 int XLOAD 0102 | ||
| 71 | ** 0104 int ADD 0101 +1 | ||
| 72 | ** 0105 int HIOP 0103 +0 | ||
| 73 | ** 0106 p32 ADD base +16 | ||
| 74 | ** 0107 int XSTORE 0106 0104 | ||
| 75 | ** 0108 p32 ADD base +20 | ||
| 76 | ** 0109 int XSTORE 0108 0105 | ||
| 77 | ** | ||
| 78 | ** mov eax, [esi+0x8] | ||
| 79 | ** mov ecx, [esi+0xc] | ||
| 80 | ** add eax, +0x01 | ||
| 81 | ** adc ecx, +0x00 | ||
| 82 | ** mov [esi+0x10], eax | ||
| 83 | ** mov [esi+0x14], ecx | ||
| 84 | ** | ||
| 85 | ** You may notice the reassociated hiword address computation, which is | ||
| 86 | ** later fused into the mov operands by the assembler. | ||
| 87 | */ | ||
| 88 | |||
| 89 | /* Some local macros to save typing. Undef'd at the end. */ | ||
| 90 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
| 91 | |||
| 92 | /* Directly emit the transformed IR without updating chains etc. */ | ||
| 93 | static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) | ||
| 94 | { | ||
| 95 | IRRef nref = lj_ir_nextins(J); | ||
| 96 | IRIns *ir = IR(nref); | ||
| 97 | ir->ot = ot; | ||
| 98 | ir->op1 = op1; | ||
| 99 | ir->op2 = op2; | ||
| 100 | return nref; | ||
| 101 | } | ||
| 102 | |||
| 103 | /* Emit a CALLN with two split 64 bit arguments. */ | ||
| 104 | static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir, | ||
| 105 | IRIns *ir, IRCallID id) | ||
| 106 | { | ||
| 107 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; | ||
| 108 | J->cur.nins--; | ||
| 109 | #if LJ_LE | ||
| 110 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | ||
| 111 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
| 112 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | ||
| 113 | #else | ||
| 114 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | ||
| 115 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | ||
| 116 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
| 117 | #endif | ||
| 118 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | ||
| 119 | return split_emit(J, IRTI(IR_HIOP), tmp, tmp); | ||
| 120 | } | ||
| 121 | |||
| 122 | /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ | ||
| 123 | static IRRef split_ptr(jit_State *J, IRRef ref) | ||
| 124 | { | ||
| 125 | IRIns *ir = IR(ref); | ||
| 126 | int32_t ofs = 4; | ||
| 127 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Reassociate address. */ | ||
| 128 | ofs += IR(ir->op2)->i; | ||
| 129 | ref = ir->op1; | ||
| 130 | if (ofs == 0) return ref; | ||
| 131 | } | ||
| 132 | return split_emit(J, IRTI(IR_ADD), ref, lj_ir_kint(J, ofs)); | ||
| 133 | } | ||
| 134 | |||
| 135 | /* Transform the old IR to the new IR. */ | ||
| 136 | static void split_ir(jit_State *J) | ||
| 137 | { | ||
| 138 | IRRef nins = J->cur.nins, nk = J->cur.nk; | ||
| 139 | MSize irlen = nins - nk; | ||
| 140 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); | ||
| 141 | IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); | ||
| 142 | IRRef1 *hisubst; | ||
| 143 | IRRef ref; | ||
| 144 | |||
| 145 | /* Copy old IR to buffer. */ | ||
| 146 | memcpy(oir, IR(nk), irlen*sizeof(IRIns)); | ||
| 147 | /* Bias hiword substitution table and old IR. Loword kept in field prev. */ | ||
| 148 | hisubst = (IRRef1 *)&oir[irlen] - nk; | ||
| 149 | oir -= nk; | ||
| 150 | |||
| 151 | /* Remove all IR instructions, but retain IR constants. */ | ||
| 152 | J->cur.nins = REF_FIRST; | ||
| 153 | |||
| 154 | /* Process constants and fixed references. */ | ||
| 155 | for (ref = nk; ref <= REF_BASE; ref++) { | ||
| 156 | IRIns *ir = &oir[ref]; | ||
| 157 | if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ | ||
| 158 | TValue tv = *ir_k64(ir); | ||
| 159 | ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); | ||
| 160 | hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); | ||
| 161 | } else { | ||
| 162 | ir->prev = (IRRef1)ref; /* Identity substitution for loword. */ | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | /* Process old IR instructions. */ | ||
| 167 | for (ref = REF_FIRST; ref < nins; ref++) { | ||
| 168 | IRIns *ir = &oir[ref]; | ||
| 169 | IRRef nref = lj_ir_nextins(J); | ||
| 170 | IRIns *nir = IR(nref); | ||
| 171 | |||
| 172 | /* Copy-substitute old instruction to new instruction. */ | ||
| 173 | nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; | ||
| 174 | nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; | ||
| 175 | ir->prev = nref; /* Loword substitution. */ | ||
| 176 | nir->o = ir->o; | ||
| 177 | nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); | ||
| 178 | |||
| 179 | /* Split 64 bit instructions. */ | ||
| 180 | if (irt_isint64(ir->t)) { | ||
| 181 | IRRef hi = hisubst[ir->op1]; | ||
| 182 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ | ||
| 183 | switch (ir->o) { | ||
| 184 | case IR_ADD: | ||
| 185 | case IR_SUB: | ||
| 186 | /* Use plain op for hiword if loword cannot produce a carry/borrow. */ | ||
| 187 | if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { | ||
| 188 | ir->prev = nir->op1; /* Pass through loword. */ | ||
| 189 | nir->op1 = hi; nir->op2 = hisubst[ir->op2]; | ||
| 190 | hi = nref; | ||
| 191 | break; | ||
| 192 | } | ||
| 193 | /* fallthrough */ | ||
| 194 | case IR_NEG: | ||
| 195 | hi = split_emit(J, IRTI(IR_HIOP), hi, hisubst[ir->op2]); | ||
| 196 | break; | ||
| 197 | case IR_MUL: | ||
| 198 | hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); | ||
| 199 | break; | ||
| 200 | case IR_POWI: | ||
| 201 | hi = split_call64(J, hisubst, oir, ir, | ||
| 202 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
| 203 | IRCALL_lj_carith_powu64); | ||
| 204 | break; | ||
| 205 | case IR_XLOAD: | ||
| 206 | hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); | ||
| 207 | #if LJ_BE | ||
| 208 | ir->prev = hi; hi = nref; | ||
| 209 | #endif | ||
| 210 | break; | ||
| 211 | case IR_XSTORE: | ||
| 212 | #if LJ_LE | ||
| 213 | hi = hisubst[ir->op2]; | ||
| 214 | #else | ||
| 215 | hi = nir->op2; nir->op2 = hisubst[ir->op2]; | ||
| 216 | #endif | ||
| 217 | split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hi); | ||
| 218 | continue; | ||
| 219 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ | ||
| 220 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
| 221 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ | ||
| 222 | hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); | ||
| 223 | } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ | ||
| 224 | /* Drop cast, since assembler doesn't care. */ | ||
| 225 | hisubst[ref] = hi; | ||
| 226 | goto fwdlo; | ||
| 227 | } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ | ||
| 228 | IRRef k31 = lj_ir_kint(J, 31); | ||
| 229 | nir = IR(nref); /* May have been reallocated. */ | ||
| 230 | ir->prev = nir->op1; /* Pass through loword. */ | ||
| 231 | nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ | ||
| 232 | nir->op2 = k31; | ||
| 233 | hi = nref; | ||
| 234 | } else { /* Zero-extend to 64 bit. */ | ||
| 235 | hisubst[ref] = lj_ir_kint(J, 0); | ||
| 236 | goto fwdlo; | ||
| 237 | } | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case IR_PHI: { | ||
| 241 | IRRef hi2; | ||
| 242 | if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || | ||
| 243 | nir->op1 == nir->op2) | ||
| 244 | J->cur.nins--; /* Drop useless PHIs. */ | ||
| 245 | hi2 = hisubst[ir->op2]; | ||
| 246 | if (!((irref_isk(hi) && irref_isk(hi2)) || hi == hi2)) | ||
| 247 | split_emit(J, IRTI(IR_PHI), hi, hi2); | ||
| 248 | continue; | ||
| 249 | } | ||
| 250 | default: | ||
| 251 | lua_assert(ir->o <= IR_NE); | ||
| 252 | split_emit(J, IRTGI(IR_HIOP), hi, hisubst[ir->op2]); /* Comparisons. */ | ||
| 253 | continue; | ||
| 254 | } | ||
| 255 | hisubst[ref] = hi; /* Store hiword substitution. */ | ||
| 256 | } else if (ir->o == IR_CONV) { /* See above, too. */ | ||
| 257 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
| 258 | if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ | ||
| 259 | if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ | ||
| 260 | ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), | ||
| 261 | hisubst[ir->op1], nref); | ||
| 262 | } else { /* Truncate to lower 32 bits. */ | ||
| 263 | fwdlo: | ||
| 264 | ir->prev = nir->op1; /* Forward loword. */ | ||
| 265 | /* Replace with NOP to avoid messing up the snapshot logic. */ | ||
| 266 | nir->ot = IRT(IR_NOP, IRT_NIL); | ||
| 267 | nir->op1 = nir->op2 = 0; | ||
| 268 | } | ||
| 269 | } | ||
| 270 | } else if (ir->o == IR_LOOP) { | ||
| 271 | J->loopref = nref; /* Needed by assembler. */ | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | /* Add PHI marks. */ | ||
| 276 | for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { | ||
| 277 | IRIns *ir = IR(ref); | ||
| 278 | if (ir->o != IR_PHI) break; | ||
| 279 | if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); | ||
| 280 | if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); | ||
| 281 | } | ||
| 282 | |||
| 283 | /* Substitute snapshot maps. */ | ||
| 284 | oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ | ||
| 285 | { | ||
| 286 | SnapNo i, nsnap = J->cur.nsnap; | ||
| 287 | for (i = 0; i < nsnap; i++) { | ||
| 288 | SnapShot *snap = &J->cur.snap[i]; | ||
| 289 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | ||
| 290 | MSize n, nent = snap->nent; | ||
| 291 | snap->ref = oir[snap->ref].prev; | ||
| 292 | for (n = 0; n < nent; n++) { | ||
| 293 | SnapEntry sn = map[n]; | ||
| 294 | map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev); | ||
| 295 | } | ||
| 296 | } | ||
| 297 | } | ||
| 298 | } | ||
| 299 | |||
| 300 | /* Protected callback for split pass. */ | ||
| 301 | static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) | ||
| 302 | { | ||
| 303 | jit_State *J = (jit_State *)ud; | ||
| 304 | split_ir(J); | ||
| 305 | UNUSED(L); UNUSED(dummy); | ||
| 306 | return NULL; | ||
| 307 | } | ||
| 308 | |||
| 309 | #ifdef LUA_USE_ASSERT | ||
| 310 | /* Slow, but sure way to check whether a SPLIT pass is needed. */ | ||
| 311 | static int split_needsplit(jit_State *J) | ||
| 312 | { | ||
| 313 | IRIns *ir, *irend; | ||
| 314 | IRRef ref; | ||
| 315 | for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) | ||
| 316 | if (irt_isint64(ir->t)) | ||
| 317 | return 1; | ||
| 318 | for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) | ||
| 319 | if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 || | ||
| 320 | (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64) | ||
| 321 | return 1; | ||
| 322 | return 0; /* Nope. */ | ||
| 323 | } | ||
| 324 | #endif | ||
| 325 | |||
| 326 | /* SPLIT pass. */ | ||
| 327 | void lj_opt_split(jit_State *J) | ||
| 328 | { | ||
| 329 | lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ | ||
| 330 | if (J->needsplit) { | ||
| 331 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); | ||
| 332 | if (errcode) { | ||
| 333 | /* Completely reset the trace to avoid inconsistent dump on abort. */ | ||
| 334 | J->cur.nins = J->cur.nk = REF_BASE; | ||
| 335 | J->cur.nsnap = 0; | ||
| 336 | lj_err_throw(J->L, errcode); /* Propagate errors. */ | ||
| 337 | } | ||
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | #undef IR | ||
| 342 | |||
| 343 | #endif | ||
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 94ab3c32..37c68f4b 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -193,6 +193,7 @@ typedef enum { | |||
| 193 | XI_FLD1 = 0xe8d9, | 193 | XI_FLD1 = 0xe8d9, |
| 194 | XI_FLDLG2 = 0xecd9, | 194 | XI_FLDLG2 = 0xecd9, |
| 195 | XI_FLDLN2 = 0xedd9, | 195 | XI_FLDLN2 = 0xedd9, |
| 196 | XI_FDUP = 0xc0d9, /* Really fld st0. */ | ||
| 196 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ | 197 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ |
| 197 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ | 198 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ |
| 198 | XI_FRNDINT = 0xfcd9, | 199 | XI_FRNDINT = 0xfcd9, |
| @@ -263,10 +264,17 @@ typedef enum { | |||
| 263 | XO_MOVD = XO_660f(6e), | 264 | XO_MOVD = XO_660f(6e), |
| 264 | XO_MOVDto = XO_660f(7e), | 265 | XO_MOVDto = XO_660f(7e), |
| 265 | 266 | ||
| 267 | XO_FLDd = XO_(d9), XOg_FLDd = 0, | ||
| 266 | XO_FLDq = XO_(dd), XOg_FLDq = 0, | 268 | XO_FLDq = XO_(dd), XOg_FLDq = 0, |
| 267 | XO_FILDd = XO_(db), XOg_FILDd = 0, | 269 | XO_FILDd = XO_(db), XOg_FILDd = 0, |
| 270 | XO_FILDq = XO_(df), XOg_FILDq = 5, | ||
| 271 | XO_FSTPd = XO_(d9), XOg_FSTPd = 3, | ||
| 268 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, | 272 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, |
| 269 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, | 273 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, |
| 274 | XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, | ||
| 275 | XO_FADDq = XO_(dc), XOg_FADDq = 0, | ||
| 276 | XO_FLDCW = XO_(d9), XOg_FLDCW = 5, | ||
| 277 | XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 | ||
| 270 | } x86Op; | 278 | } x86Op; |
| 271 | 279 | ||
| 272 | /* x86 opcode groups. */ | 280 | /* x86 opcode groups. */ |
| @@ -278,6 +286,7 @@ typedef uint32_t x86Group; | |||
| 278 | #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) | 286 | #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) |
| 279 | 287 | ||
| 280 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) | 288 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) |
| 289 | #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27))) | ||
| 281 | 290 | ||
| 282 | typedef enum { | 291 | typedef enum { |
| 283 | XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, | 292 | XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, |
diff --git a/src/lj_trace.c b/src/lj_trace.c index da20f991..b67e8f75 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
| @@ -394,6 +394,7 @@ static void trace_start(jit_State *J) | |||
| 394 | J->bcskip = 0; | 394 | J->bcskip = 0; |
| 395 | J->guardemit.irt = 0; | 395 | J->guardemit.irt = 0; |
| 396 | J->postproc = LJ_POST_NONE; | 396 | J->postproc = LJ_POST_NONE; |
| 397 | lj_resetsplit(J); | ||
| 397 | setgcref(J->cur.startpt, obj2gco(J->pt)); | 398 | setgcref(J->cur.startpt, obj2gco(J->pt)); |
| 398 | 399 | ||
| 399 | L = J->L; | 400 | L = J->L; |
| @@ -592,6 +593,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | |||
| 592 | } | 593 | } |
| 593 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ | 594 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ |
| 594 | } | 595 | } |
| 596 | lj_opt_split(J); | ||
| 595 | J->state = LJ_TRACE_ASM; | 597 | J->state = LJ_TRACE_ASM; |
| 596 | break; | 598 | break; |
| 597 | 599 | ||
diff --git a/src/ljamalg.c b/src/ljamalg.c index 4d5f7600..5d90c002 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | #include "lj_opt_narrow.c" | 58 | #include "lj_opt_narrow.c" |
| 59 | #include "lj_opt_dce.c" | 59 | #include "lj_opt_dce.c" |
| 60 | #include "lj_opt_loop.c" | 60 | #include "lj_opt_loop.c" |
| 61 | #include "lj_opt_split.c" | ||
| 61 | #include "lj_mcode.c" | 62 | #include "lj_mcode.c" |
| 62 | #include "lj_snap.c" | 63 | #include "lj_snap.c" |
| 63 | #include "lj_record.c" | 64 | #include "lj_record.c" |
