diff options
-rw-r--r-- | src/Makefile | 2 | ||||
-rw-r--r-- | src/Makefile.dep | 15 | ||||
-rw-r--r-- | src/lj_asm.c | 444 | ||||
-rw-r--r-- | src/lj_carith.c | 8 | ||||
-rw-r--r-- | src/lj_carith.h | 3 | ||||
-rw-r--r-- | src/lj_crecord.c | 20 | ||||
-rw-r--r-- | src/lj_ir.h | 21 | ||||
-rw-r--r-- | src/lj_iropt.h | 6 | ||||
-rw-r--r-- | src/lj_jit.h | 12 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 25 | ||||
-rw-r--r-- | src/lj_opt_split.c | 343 | ||||
-rw-r--r-- | src/lj_target_x86.h | 9 | ||||
-rw-r--r-- | src/lj_trace.c | 2 | ||||
-rw-r--r-- | src/ljamalg.c | 1 |
14 files changed, 795 insertions, 116 deletions
diff --git a/src/Makefile b/src/Makefile index a2be1a18..0150b049 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -331,7 +331,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ | |||
331 | lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \ | 331 | lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \ |
332 | lj_lex.o lj_parse.o \ | 332 | lj_lex.o lj_parse.o \ |
333 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | 333 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ |
334 | lj_opt_dce.o lj_opt_loop.o \ | 334 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \ |
335 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ | 335 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ |
336 | lj_asm.o lj_trace.o lj_gdbjit.o \ | 336 | lj_asm.o lj_trace.o lj_gdbjit.o \ |
337 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_carith.o lj_clib.o \ | 337 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_carith.o lj_clib.o \ |
diff --git a/src/Makefile.dep b/src/Makefile.dep index 3d0c4239..1534ac27 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -128,6 +128,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
128 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 128 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
129 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 129 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
130 | lj_dispatch.h lj_traceerr.h | 130 | lj_dispatch.h lj_traceerr.h |
131 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | ||
132 | lj_arch.h | ||
131 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 133 | lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
132 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ | 134 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ |
133 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h | 135 | lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h |
@@ -167,10 +169,11 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | |||
167 | lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_carith.c lj_carith.h \ | 169 | lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_carith.c lj_carith.h \ |
168 | lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c \ | 170 | lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c \ |
169 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ | 171 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ |
170 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_mcode.c lj_mcode.h lj_snap.c \ | 172 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c \ |
171 | lj_target.h lj_target_*.h lj_record.c lj_record.h lj_ffrecord.h \ | 173 | lj_mcode.h lj_snap.c lj_target.h lj_target_*.h lj_record.c lj_record.h \ |
172 | lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ | 174 | lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h \ |
173 | lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \ | 175 | lj_asm.c lj_asm.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ |
174 | lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ | 176 | lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ |
175 | lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c | 177 | lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ |
178 | lib_ffi.c lib_init.c | ||
176 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h | 179 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h |
diff --git a/src/lj_asm.c b/src/lj_asm.c index cc2ae597..441700d4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -347,6 +347,20 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
347 | } | 347 | } |
348 | } | 348 | } |
349 | 349 | ||
350 | /* op rm/mrm, i */ | ||
351 | static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | ||
352 | { | ||
353 | x86Op xo; | ||
354 | if (checki8(i)) { | ||
355 | emit_i8(as, i); | ||
356 | xo = XG_TOXOi8(xg); | ||
357 | } else { | ||
358 | emit_i32(as, i); | ||
359 | xo = XG_TOXOi(xg); | ||
360 | } | ||
361 | emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); | ||
362 | } | ||
363 | |||
350 | /* -- Emit moves ---------------------------------------------------------- */ | 364 | /* -- Emit moves ---------------------------------------------------------- */ |
351 | 365 | ||
352 | /* mov [base+ofs], i */ | 366 | /* mov [base+ofs], i */ |
@@ -371,7 +385,10 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
371 | /* mov r, i / xor r, r */ | 385 | /* mov r, i / xor r, r */ |
372 | static void emit_loadi(ASMState *as, Reg r, int32_t i) | 386 | static void emit_loadi(ASMState *as, Reg r, int32_t i) |
373 | { | 387 | { |
374 | if (i == 0) { | 388 | /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ |
389 | if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || | ||
390 | (as->curins+1 < as->T->nins && | ||
391 | IR(as->curins+1)->o == IR_HIOP)))) { | ||
375 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); | 392 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); |
376 | } else { | 393 | } else { |
377 | MCode *p = as->mcp; | 394 | MCode *p = as->mcp; |
@@ -422,6 +439,19 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
422 | /* Label for short jumps. */ | 439 | /* Label for short jumps. */ |
423 | typedef MCode *MCLabel; | 440 | typedef MCode *MCLabel; |
424 | 441 | ||
442 | #if LJ_32 && LJ_HASFFI | ||
443 | /* jmp short target */ | ||
444 | static void emit_sjmp(ASMState *as, MCLabel target) | ||
445 | { | ||
446 | MCode *p = as->mcp; | ||
447 | ptrdiff_t delta = target - p; | ||
448 | lua_assert(delta == (int8_t)delta); | ||
449 | p[-1] = (MCode)(int8_t)delta; | ||
450 | p[-2] = XI_JMPs; | ||
451 | as->mcp = p - 2; | ||
452 | } | ||
453 | #endif | ||
454 | |||
425 | /* jcc short target */ | 455 | /* jcc short target */ |
426 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) | 456 | static void emit_sjcc(ASMState *as, int cc, MCLabel target) |
427 | { | 457 | { |
@@ -630,7 +660,7 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
630 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | 660 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ |
631 | lua_assert(irt_isnil(ir->t)); | 661 | lua_assert(irt_isnil(ir->t)); |
632 | emit_getgl(as, r, jit_L); | 662 | emit_getgl(as, r, jit_L); |
633 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 663 | #if LJ_64 |
634 | } else if (ir->o == IR_KINT64) { | 664 | } else if (ir->o == IR_KINT64) { |
635 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 665 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
636 | #endif | 666 | #endif |
@@ -681,8 +711,7 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
681 | #if LJ_64 | 711 | #if LJ_64 |
682 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 712 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
683 | #else | 713 | #else |
684 | /* NYI: 32 bit register pairs. */ | 714 | #define REX_64IR(ir, r) (r) |
685 | #define REX_64IR(ir, r) check_exp(!irt_is64((ir)->t), (r)) | ||
686 | #endif | 715 | #endif |
687 | 716 | ||
688 | /* Generic move between two regs. */ | 717 | /* Generic move between two regs. */ |
@@ -939,7 +968,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
939 | emit_loadn(as, dest, tv); | 968 | emit_loadn(as, dest, tv); |
940 | return; | 969 | return; |
941 | } | 970 | } |
942 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 971 | #if LJ_64 |
943 | } else if (ir->o == IR_KINT64) { | 972 | } else if (ir->o == IR_KINT64) { |
944 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 973 | emit_loadu64(as, dest, ir_kint64(ir)->u64); |
945 | return; | 974 | return; |
@@ -1463,7 +1492,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1463 | #endif | 1492 | #endif |
1464 | if (r) { /* Argument is in a register. */ | 1493 | if (r) { /* Argument is in a register. */ |
1465 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 1494 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
1466 | #if LJ_64 /* NYI: 32 bit register pairs. */ | 1495 | #if LJ_64 |
1467 | if (ir->o == IR_KINT64) | 1496 | if (ir->o == IR_KINT64) |
1468 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 1497 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
1469 | else | 1498 | else |
@@ -1519,7 +1548,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
1519 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 1548 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
1520 | if (ra_used(ir)) { | 1549 | if (ra_used(ir)) { |
1521 | if (irt_isfp(ir->t)) { | 1550 | if (irt_isfp(ir->t)) { |
1522 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1551 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
1523 | #if LJ_64 | 1552 | #if LJ_64 |
1524 | if ((ci->flags & CCI_CASTU64)) { | 1553 | if ((ci->flags & CCI_CASTU64)) { |
1525 | Reg dest = ir->r; | 1554 | Reg dest = ir->r; |
@@ -1632,19 +1661,24 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1632 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 1661 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
1633 | IRRef lref = ir->op1; | 1662 | IRRef lref = ir->op1; |
1634 | lua_assert(irt_type(ir->t) != st); | 1663 | lua_assert(irt_type(ir->t) != st); |
1664 | lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ | ||
1635 | if (irt_isfp(ir->t)) { | 1665 | if (irt_isfp(ir->t)) { |
1636 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1666 | Reg dest = ra_dest(as, ir, RSET_FPR); |
1637 | if (stfp) { /* FP to FP conversion. */ | 1667 | if (stfp) { /* FP to FP conversion. */ |
1638 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1668 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
1639 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); | 1669 | emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); |
1640 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 1670 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
1641 | #if LJ_32 | 1671 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
1642 | } else if (st >= IRT_U32) { | 1672 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
1643 | /* NYI: 64 bit integer or uint32_t to number conversion. */ | 1673 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); |
1644 | setintV(&as->J->errinfo, ir->o); | 1674 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
1645 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 1675 | if (irt_isfloat(ir->t)) |
1676 | emit_rr(as, XO_CVTSD2SS, dest, dest); | ||
1677 | emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ | ||
1678 | emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ | ||
1679 | emit_loadn(as, bias, k); | ||
1680 | emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); | ||
1646 | return; | 1681 | return; |
1647 | #endif | ||
1648 | } else { /* Integer to FP conversion. */ | 1682 | } else { /* Integer to FP conversion. */ |
1649 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? | 1683 | Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? |
1650 | ra_alloc1(as, lref, RSET_GPR) : | 1684 | ra_alloc1(as, lref, RSET_GPR) : |
@@ -1663,41 +1697,47 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1663 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | 1697 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
1664 | } else if (stfp) { /* FP to integer conversion. */ | 1698 | } else if (stfp) { /* FP to integer conversion. */ |
1665 | if (irt_isguard(ir->t)) { | 1699 | if (irt_isguard(ir->t)) { |
1666 | lua_assert(!irt_is64(ir->t)); /* No support for checked 64 bit conv. */ | 1700 | /* Checked conversions are only supported from number to int. */ |
1701 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
1667 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 1702 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
1668 | #if LJ_32 | ||
1669 | } else if (irt_isi64(ir->t) || irt_isu64(ir->t) || irt_isu32(ir->t)) { | ||
1670 | /* NYI: number to 64 bit integer or uint32_t conversion. */ | ||
1671 | setintV(&as->J->errinfo, ir->o); | ||
1672 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1673 | #endif | ||
1674 | } else { | 1703 | } else { |
1675 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1704 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1676 | x86Op op = st == IRT_NUM ? | 1705 | x86Op op = st == IRT_NUM ? |
1677 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : | 1706 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : |
1678 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); | 1707 | ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); |
1679 | if (LJ_64 && irt_isu64(ir->t)) { | 1708 | if (LJ_32 && irt_isu32(ir->t)) { /* FP to U32 conversion on x86. */ |
1680 | const void *k = lj_ir_k64_find(as->J, U64x(c3f00000,00000000)); | 1709 | /* u32 = (int32_t)(number - 2^31) + 2^31 */ |
1681 | MCLabel l_end = emit_label(as); | 1710 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
1682 | Reg left = IR(lref)->r; | 1711 | ra_scratch(as, RSET_FPR); |
1712 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | ||
1713 | emit_rr(as, op, dest, tmp); | ||
1714 | if (st == IRT_NUM) | ||
1715 | emit_rma(as, XO_ADDSD, tmp, | ||
1716 | lj_ir_k64_find(as->J, U64x(c1e00000,00000000))); | ||
1717 | else | ||
1718 | emit_rma(as, XO_ADDSS, tmp, | ||
1719 | lj_ir_k64_find(as->J, U64x(00000000,cf000000))); | ||
1720 | ra_left(as, tmp, lref); | ||
1721 | } else if (LJ_64 && irt_isu64(ir->t)) { | ||
1683 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | 1722 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ |
1684 | if (ra_hasreg(left)) { | 1723 | Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : |
1685 | Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1724 | ra_scratch(as, RSET_FPR); |
1686 | emit_rr(as, op, dest|REX_64, tmpn); | 1725 | MCLabel l_end = emit_label(as); |
1687 | emit_rr(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, tmpn, left); | 1726 | emit_rr(as, op, dest|REX_64, tmp); |
1688 | emit_rma(as, st == IRT_NUM ? XMM_MOVRM(as) : XO_MOVSS, tmpn, k); | 1727 | if (st == IRT_NUM) |
1689 | } else { | 1728 | emit_rma(as, XO_ADDSD, tmp, |
1690 | left = ra_allocref(as, lref, RSET_FPR); | 1729 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); |
1691 | emit_rr(as, op, dest|REX_64, left); | 1730 | else |
1692 | emit_rma(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, left, k); | 1731 | emit_rma(as, XO_ADDSS, tmp, |
1693 | } | 1732 | lj_ir_k64_find(as->J, U64x(00000000,df800000))); |
1694 | emit_sjcc(as, CC_NS, l_end); | 1733 | emit_sjcc(as, CC_NS, l_end); |
1695 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ | 1734 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ |
1696 | emit_rr(as, op, dest|REX_64, left); | 1735 | emit_rr(as, op, dest|REX_64, tmp); |
1736 | ra_left(as, tmp, lref); | ||
1697 | } else { | 1737 | } else { |
1698 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1738 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
1699 | if (LJ_64 && irt_isu32(ir->t)) | 1739 | if (LJ_64 && irt_isu32(ir->t)) |
1700 | emit_rr(as, XO_MOV, dest, dest); /* Zero upper 32 bits. */ | 1740 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
1701 | emit_mrm(as, op, | 1741 | emit_mrm(as, op, |
1702 | dest|((LJ_64 && | 1742 | dest|((LJ_64 && |
1703 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 1743 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
@@ -1728,12 +1768,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1728 | emit_mrm(as, op, dest, left); | 1768 | emit_mrm(as, op, dest, left); |
1729 | } | 1769 | } |
1730 | } else { /* 32/64 bit integer conversions. */ | 1770 | } else { /* 32/64 bit integer conversions. */ |
1731 | if (irt_is64(ir->t)) { | 1771 | if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */ |
1732 | #if LJ_32 | 1772 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1733 | /* NYI: conversion to 64 bit integers. */ | 1773 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
1734 | setintV(&as->J->errinfo, ir->o); | 1774 | } else if (irt_is64(ir->t)) { |
1735 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1736 | #else | ||
1737 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1775 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1738 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { | 1776 | if (st64 || !(ir->op2 & IRCONV_SEXT)) { |
1739 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ | 1777 | /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ |
@@ -1742,21 +1780,14 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1742 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1780 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
1743 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | 1781 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); |
1744 | } | 1782 | } |
1745 | #endif | ||
1746 | } else { | 1783 | } else { |
1747 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1784 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1748 | if (st64) { | 1785 | if (st64) { |
1749 | #if LJ_32 | ||
1750 | /* NYI: conversion from 64 bit integers. */ | ||
1751 | setintV(&as->J->errinfo, ir->o); | ||
1752 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1753 | #else | ||
1754 | Reg left = asm_fuseload(as, lref, RSET_GPR); | 1786 | Reg left = asm_fuseload(as, lref, RSET_GPR); |
1755 | /* This is either a 32 bit reg/reg mov which zeroes the hi-32 bits | 1787 | /* This is either a 32 bit reg/reg mov which zeroes the hiword |
1756 | ** or a load of the lower 32 bits from a 64 bit address. | 1788 | ** or a load of the loword from a 64 bit address. |
1757 | */ | 1789 | */ |
1758 | emit_mrm(as, XO_MOV, dest, left); | 1790 | emit_mrm(as, XO_MOV, dest, left); |
1759 | #endif | ||
1760 | } else { /* 32/32 bit no-op (cast). */ | 1791 | } else { /* 32/32 bit no-op (cast). */ |
1761 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ | 1792 | ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ |
1762 | } | 1793 | } |
@@ -1764,6 +1795,93 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
1764 | } | 1795 | } |
1765 | } | 1796 | } |
1766 | 1797 | ||
1798 | #if LJ_32 && LJ_HASFFI | ||
1799 | /* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */ | ||
1800 | |||
1801 | /* 64 bit integer to FP conversion in 32 bit mode. */ | ||
1802 | static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | ||
1803 | { | ||
1804 | Reg hi = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1805 | Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi)); | ||
1806 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | ||
1807 | Reg dest = ir->r; | ||
1808 | if (ra_hasreg(dest)) { | ||
1809 | ra_free(as, dest); | ||
1810 | ra_modified(as, dest); | ||
1811 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | ||
1812 | dest, RID_ESP, ofs); | ||
1813 | } | ||
1814 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | ||
1815 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | ||
1816 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | ||
1817 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | ||
1818 | MCLabel l_end = emit_label(as); | ||
1819 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
1820 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
1821 | emit_sjcc(as, CC_NS, l_end); | ||
1822 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | ||
1823 | } else { | ||
1824 | lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); | ||
1825 | } | ||
1826 | emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); | ||
1827 | /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ | ||
1828 | emit_rmro(as, XO_MOVto, hi, RID_ESP, 4); | ||
1829 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 0); | ||
1830 | } | ||
1831 | |||
1832 | /* FP to 64 bit integer conversion in 32 bit mode. */ | ||
1833 | static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | ||
1834 | { | ||
1835 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1836 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1837 | Reg lo, hi; | ||
1838 | lua_assert(st == IRT_NUM || st == IRT_FLOAT); | ||
1839 | lua_assert(dt == IRT_I64 || dt == IRT_U64); | ||
1840 | lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); | ||
1841 | hi = ra_dest(as, ir, RSET_GPR); | ||
1842 | lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); | ||
1843 | if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); | ||
1844 | /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */ | ||
1845 | if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */ | ||
1846 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4); | ||
1847 | emit_rmro(as, XO_MOVto, lo, RID_ESP, 4); | ||
1848 | emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); | ||
1849 | } | ||
1850 | if (dt == IRT_U64) { | ||
1851 | /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ | ||
1852 | MCLabel l_pop, l_end = emit_label(as); | ||
1853 | emit_x87op(as, XI_FPOP); | ||
1854 | l_pop = emit_label(as); | ||
1855 | emit_sjmp(as, l_end); | ||
1856 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
1857 | if ((as->flags & JIT_F_SSE3)) | ||
1858 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
1859 | else | ||
1860 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
1861 | emit_rma(as, XO_FADDq, XOg_FADDq, | ||
1862 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
1863 | emit_sjcc(as, CC_NS, l_pop); | ||
1864 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | ||
1865 | } | ||
1866 | emit_rmro(as, XO_MOV, hi, RID_ESP, 4); | ||
1867 | if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ | ||
1868 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | ||
1869 | } else { /* Otherwise set FPU rounding mode to truncate before the store. */ | ||
1870 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | ||
1871 | emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0); | ||
1872 | emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0); | ||
1873 | emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0); | ||
1874 | emit_loadi(as, lo, 0xc00); | ||
1875 | emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0); | ||
1876 | } | ||
1877 | if (dt == IRT_U64) | ||
1878 | emit_x87op(as, XI_FDUP); | ||
1879 | emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd, | ||
1880 | st == IRT_NUM ? XOg_FLDq: XOg_FLDd, | ||
1881 | asm_fuseload(as, ir->op1, RSET_EMPTY)); | ||
1882 | } | ||
1883 | #endif | ||
1884 | |||
1767 | static void asm_strto(ASMState *as, IRIns *ir) | 1885 | static void asm_strto(ASMState *as, IRIns *ir) |
1768 | { | 1886 | { |
1769 | /* Force a spill slot for the destination register (if any). */ | 1887 | /* Force a spill slot for the destination register (if any). */ |
@@ -2644,6 +2762,18 @@ static void asm_powi(ASMState *as, IRIns *ir) | |||
2644 | ra_left(as, RID_EAX, ir->op2); | 2762 | ra_left(as, RID_EAX, ir->op2); |
2645 | } | 2763 | } |
2646 | 2764 | ||
2765 | #if LJ_64 && LJ_HASFFI | ||
2766 | static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) | ||
2767 | { | ||
2768 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
2769 | IRRef args[2]; | ||
2770 | args[0] = ir->op1; | ||
2771 | args[1] = ir->op2; | ||
2772 | asm_setupresult(as, ir, ci); | ||
2773 | asm_gencall(as, ci, args); | ||
2774 | } | ||
2775 | #endif | ||
2776 | |||
2647 | /* Find out whether swapping operands might be beneficial. */ | 2777 | /* Find out whether swapping operands might be beneficial. */ |
2648 | static int swapops(ASMState *as, IRIns *ir) | 2778 | static int swapops(ASMState *as, IRIns *ir) |
2649 | { | 2779 | { |
@@ -2877,12 +3007,30 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2877 | /* -- Comparisons --------------------------------------------------------- */ | 3007 | /* -- Comparisons --------------------------------------------------------- */ |
2878 | 3008 | ||
2879 | /* Virtual flags for unordered FP comparisons. */ | 3009 | /* Virtual flags for unordered FP comparisons. */ |
2880 | #define VCC_U 0x100 /* Unordered. */ | 3010 | #define VCC_U 0x1000 /* Unordered. */ |
2881 | #define VCC_P 0x200 /* Needs extra CC_P branch. */ | 3011 | #define VCC_P 0x2000 /* Needs extra CC_P branch. */ |
2882 | #define VCC_S 0x400 /* Swap avoids CC_P branch. */ | 3012 | #define VCC_S 0x4000 /* Swap avoids CC_P branch. */ |
2883 | #define VCC_PS (VCC_P|VCC_S) | 3013 | #define VCC_PS (VCC_P|VCC_S) |
2884 | 3014 | ||
2885 | static void asm_comp_(ASMState *as, IRIns *ir, int cc) | 3015 | /* Map of comparisons to flags. ORDER IR. */ |
3016 | #define COMPFLAGS(ci, cin, cu, cf) ((ci)+((cu)<<4)+((cin)<<8)+(cf)) | ||
3017 | static const uint16_t asm_compmap[IR_ABC+1] = { | ||
3018 | /* signed non-eq unsigned flags */ | ||
3019 | /* LT */ COMPFLAGS(CC_GE, CC_G, CC_AE, VCC_PS), | ||
3020 | /* GE */ COMPFLAGS(CC_L, CC_L, CC_B, 0), | ||
3021 | /* LE */ COMPFLAGS(CC_G, CC_G, CC_A, VCC_PS), | ||
3022 | /* GT */ COMPFLAGS(CC_LE, CC_L, CC_BE, 0), | ||
3023 | /* ULT */ COMPFLAGS(CC_AE, CC_A, CC_AE, VCC_U), | ||
3024 | /* UGE */ COMPFLAGS(CC_B, CC_B, CC_B, VCC_U|VCC_PS), | ||
3025 | /* ULE */ COMPFLAGS(CC_A, CC_A, CC_A, VCC_U), | ||
3026 | /* UGT */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS), | ||
3027 | /* EQ */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P), | ||
3028 | /* NE */ COMPFLAGS(CC_E, CC_E, CC_E, VCC_U|VCC_P), | ||
3029 | /* ABC */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS) /* Same as UGT. */ | ||
3030 | }; | ||
3031 | |||
3032 | /* FP and integer comparisons. */ | ||
3033 | static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) | ||
2886 | { | 3034 | { |
2887 | if (irt_isnum(ir->t)) { | 3035 | if (irt_isnum(ir->t)) { |
2888 | IRRef lref = ir->op1; | 3036 | IRRef lref = ir->op1; |
@@ -3008,15 +3156,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
3008 | if (irl+1 == ir) /* Referencing previous ins? */ | 3156 | if (irl+1 == ir) /* Referencing previous ins? */ |
3009 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 3157 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
3010 | } else { | 3158 | } else { |
3011 | x86Op xo; | 3159 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm); |
3012 | if (checki8(imm)) { | ||
3013 | emit_i8(as, imm); | ||
3014 | xo = XO_ARITHi8; | ||
3015 | } else { | ||
3016 | emit_i32(as, imm); | ||
3017 | xo = XO_ARITHi; | ||
3018 | } | ||
3019 | emit_mrm(as, xo, r64 + XOg_CMP, left); | ||
3020 | } | 3160 | } |
3021 | } | 3161 | } |
3022 | } else { | 3162 | } else { |
@@ -3028,8 +3168,133 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
3028 | } | 3168 | } |
3029 | } | 3169 | } |
3030 | 3170 | ||
3031 | #define asm_comp(as, ir, ci, cf, cu) \ | 3171 | #if LJ_32 && LJ_HASFFI |
3032 | asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) | 3172 | /* 64 bit integer comparisons in 32 bit mode. */ |
3173 | static void asm_comp_int64(ASMState *as, IRIns *ir) | ||
3174 | { | ||
3175 | uint32_t cc = asm_compmap[(ir-1)->o]; | ||
3176 | RegSet allow = RSET_GPR; | ||
3177 | Reg lefthi = RID_NONE, leftlo = RID_NONE; | ||
3178 | Reg righthi = RID_NONE, rightlo = RID_NONE; | ||
3179 | MCLabel l_around; | ||
3180 | x86ModRM mrm; | ||
3181 | |||
3182 | as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */ | ||
3183 | |||
3184 | /* Allocate/fuse hiword operands. */ | ||
3185 | if (irref_isk(ir->op2)) { | ||
3186 | lefthi = asm_fuseload(as, ir->op1, allow); | ||
3187 | } else { | ||
3188 | lefthi = ra_alloc1(as, ir->op1, allow); | ||
3189 | righthi = asm_fuseload(as, ir->op2, allow); | ||
3190 | if (righthi == RID_MRM) { | ||
3191 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
3192 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
3193 | } else { | ||
3194 | rset_clear(allow, righthi); | ||
3195 | } | ||
3196 | } | ||
3197 | mrm = as->mrm; /* Save state for hiword instruction. */ | ||
3198 | |||
3199 | /* Allocate/fuse loword operands. */ | ||
3200 | if (irref_isk((ir-1)->op2)) { | ||
3201 | leftlo = asm_fuseload(as, (ir-1)->op1, allow); | ||
3202 | } else { | ||
3203 | leftlo = ra_alloc1(as, (ir-1)->op1, allow); | ||
3204 | rightlo = asm_fuseload(as, (ir-1)->op2, allow); | ||
3205 | if (rightlo == RID_MRM) { | ||
3206 | if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); | ||
3207 | if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); | ||
3208 | } else { | ||
3209 | rset_clear(allow, rightlo); | ||
3210 | } | ||
3211 | } | ||
3212 | |||
3213 | /* All register allocations must be performed _before_ this point. */ | ||
3214 | l_around = emit_label(as); | ||
3215 | as->invmcp = as->testmcp = NULL; /* Cannot use these optimizations. */ | ||
3216 | |||
3217 | /* Loword comparison and branch. */ | ||
3218 | asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ | ||
3219 | if (ra_noreg(rightlo)) { | ||
3220 | int32_t imm = IR((ir-1)->op2)->i; | ||
3221 | if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM) | ||
3222 | emit_rr(as, XO_TEST, leftlo, leftlo); | ||
3223 | else | ||
3224 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm); | ||
3225 | } else { | ||
3226 | emit_mrm(as, XO_CMP, leftlo, rightlo); | ||
3227 | } | ||
3228 | |||
3229 | /* Hiword comparison and branches. */ | ||
3230 | if ((cc & 15) != CC_NE) | ||
3231 | emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */ | ||
3232 | if ((cc & 15) != CC_E) | ||
3233 | asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */ | ||
3234 | as->mrm = mrm; /* Restore state. */ | ||
3235 | if (ra_noreg(righthi)) { | ||
3236 | int32_t imm = IR(ir->op2)->i; | ||
3237 | if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM) | ||
3238 | emit_rr(as, XO_TEST, lefthi, lefthi); | ||
3239 | else | ||
3240 | emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm); | ||
3241 | } else { | ||
3242 | emit_mrm(as, XO_CMP, lefthi, righthi); | ||
3243 | } | ||
3244 | } | ||
3245 | #endif | ||
3246 | |||
3247 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | ||
3248 | |||
3249 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | ||
3250 | static void asm_hiop(ASMState *as, IRIns *ir) | ||
3251 | { | ||
3252 | #if LJ_32 && LJ_HASFFI | ||
3253 | /* HIOP is marked as a store because it needs its own DCE logic. */ | ||
3254 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | ||
3255 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | ||
3256 | if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | ||
3257 | if (usehi || uselo) { | ||
3258 | if (irt_isfp(ir->t)) | ||
3259 | asm_conv_fp_int64(as, ir); | ||
3260 | else | ||
3261 | asm_conv_int64_fp(as, ir); | ||
3262 | } | ||
3263 | as->curins--; /* Always skip the CONV. */ | ||
3264 | return; | ||
3265 | } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | ||
3266 | asm_comp_int64(as, ir); | ||
3267 | return; | ||
3268 | } | ||
3269 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | ||
3270 | switch ((ir-1)->o) { | ||
3271 | case IR_ADD: | ||
3272 | asm_intarith(as, ir, uselo ? XOg_ADC : XOg_ADD); | ||
3273 | break; | ||
3274 | case IR_SUB: | ||
3275 | asm_intarith(as, ir, uselo ? XOg_SBB : XOg_SUB); | ||
3276 | break; | ||
3277 | case IR_NEG: { | ||
3278 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
3279 | emit_rr(as, XO_GROUP3, XOg_NEG, dest); | ||
3280 | if (uselo) { | ||
3281 | emit_i8(as, 0); | ||
3282 | emit_rr(as, XO_ARITHi8, XOg_ADC, dest); | ||
3283 | } | ||
3284 | ra_left(as, dest, ir->op1); | ||
3285 | break; | ||
3286 | } | ||
3287 | case IR_CALLN: | ||
3288 | ra_destreg(as, ir, RID_RETHI); | ||
3289 | if (!uselo) | ||
3290 | ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ | ||
3291 | break; | ||
3292 | default: lua_assert(0); break; | ||
3293 | } | ||
3294 | #else | ||
3295 | UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ | ||
3296 | #endif | ||
3297 | } | ||
3033 | 3298 | ||
3034 | /* -- Stack handling ------------------------------------------------------ */ | 3299 | /* -- Stack handling ------------------------------------------------------ */ |
3035 | 3300 | ||
@@ -3682,21 +3947,16 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3682 | switch ((IROp)ir->o) { | 3947 | switch ((IROp)ir->o) { |
3683 | /* Miscellaneous ops. */ | 3948 | /* Miscellaneous ops. */ |
3684 | case IR_LOOP: asm_loop(as); break; | 3949 | case IR_LOOP: asm_loop(as); break; |
3685 | case IR_NOP: break; | 3950 | case IR_NOP: lua_assert(!ra_used(ir)); break; |
3686 | case IR_PHI: asm_phi(as, ir); break; | 3951 | case IR_PHI: asm_phi(as, ir); break; |
3952 | case IR_HIOP: asm_hiop(as, ir); break; | ||
3687 | 3953 | ||
3688 | /* Guarded assertions. */ | 3954 | /* Guarded assertions. */ |
3689 | case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break; | 3955 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: |
3690 | case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break; | 3956 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: |
3691 | case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break; | 3957 | case IR_EQ: case IR_NE: case IR_ABC: |
3692 | case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break; | 3958 | asm_comp(as, ir, asm_compmap[ir->o]); |
3693 | case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break; | 3959 | break; |
3694 | case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break; | ||
3695 | case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break; | ||
3696 | case IR_ABC: | ||
3697 | case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break; | ||
3698 | case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break; | ||
3699 | case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break; | ||
3700 | 3960 | ||
3701 | case IR_RETF: asm_retf(as, ir); break; | 3961 | case IR_RETF: asm_retf(as, ir); break; |
3702 | 3962 | ||
@@ -3744,7 +4004,15 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3744 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | 4004 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: |
3745 | asm_fpmath(as, ir); | 4005 | asm_fpmath(as, ir); |
3746 | break; | 4006 | break; |
3747 | case IR_POWI: asm_powi(as, ir); break; | 4007 | case IR_POWI: |
4008 | #if LJ_64 && LJ_HASFFI | ||
4009 | if (!irt_isnum(ir->t)) | ||
4010 | asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
4011 | IRCALL_lj_carith_powu64); | ||
4012 | else | ||
4013 | #endif | ||
4014 | asm_powi(as, ir); | ||
4015 | break; | ||
3748 | 4016 | ||
3749 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ | 4017 | /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ |
3750 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; | 4018 | case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; |
@@ -3801,6 +4069,7 @@ static void asm_trace(ASMState *as) | |||
3801 | { | 4069 | { |
3802 | for (as->curins--; as->curins > as->stopins; as->curins--) { | 4070 | for (as->curins--; as->curins > as->stopins; as->curins--) { |
3803 | IRIns *ir = IR(as->curins); | 4071 | IRIns *ir = IR(as->curins); |
4072 | lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ | ||
3804 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) | 4073 | if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) |
3805 | continue; /* Dead-code elimination can be soooo easy. */ | 4074 | continue; /* Dead-code elimination can be soooo easy. */ |
3806 | if (irt_isguard(ir->t)) | 4075 | if (irt_isguard(ir->t)) |
@@ -3864,11 +4133,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3864 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 4133 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
3865 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 4134 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
3866 | #if LJ_64 | 4135 | #if LJ_64 |
3867 | /* NYI: add stack slots for x64 calls with many args. */ | ||
3868 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); | 4136 | lua_assert(CCI_NARGS(ci) <= (LJ_ABI_WIN ? 4 : 6)); |
3869 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 4137 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
3870 | #else | 4138 | #else |
3871 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | 4139 | lua_assert(!(ci->flags & CCI_FASTCALL) || CCI_NARGS(ci) <= 2); |
3872 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | 4140 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ |
3873 | as->evenspill = (int32_t)CCI_NARGS(ci); | 4141 | as->evenspill = (int32_t)CCI_NARGS(ci); |
3874 | ir->prev = REGSP_HINT(RID_RET); | 4142 | ir->prev = REGSP_HINT(RID_RET); |
@@ -3878,6 +4146,12 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3878 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | 4146 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; |
3879 | continue; | 4147 | continue; |
3880 | } | 4148 | } |
4149 | #if LJ_32 && LJ_HASFFI | ||
4150 | case IR_HIOP: | ||
4151 | if ((ir-1)->o == IR_CALLN) | ||
4152 | ir->prev = REGSP_HINT(RID_RETHI); | ||
4153 | break; | ||
4154 | #endif | ||
3881 | /* C calls evict all scratch regs and return results in RID_RET. */ | 4155 | /* C calls evict all scratch regs and return results in RID_RET. */ |
3882 | case IR_SNEW: case IR_NEWREF: | 4156 | case IR_SNEW: case IR_NEWREF: |
3883 | #if !LJ_64 | 4157 | #if !LJ_64 |
@@ -3894,6 +4168,14 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3894 | as->modset = RSET_SCRATCH; | 4168 | as->modset = RSET_SCRATCH; |
3895 | break; | 4169 | break; |
3896 | case IR_POWI: | 4170 | case IR_POWI: |
4171 | #if LJ_64 && LJ_HASFFI | ||
4172 | if (!irt_isnum(ir->t)) { | ||
4173 | ir->prev = REGSP_HINT(RID_RET); | ||
4174 | if (inloop) | ||
4175 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
4176 | continue; | ||
4177 | } | ||
4178 | #endif | ||
3897 | ir->prev = REGSP_HINT(RID_XMM0); | 4179 | ir->prev = REGSP_HINT(RID_XMM0); |
3898 | if (inloop) | 4180 | if (inloop) |
3899 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 4181 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); |
diff --git a/src/lj_carith.c b/src/lj_carith.c index 46f07be7..134a61fb 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c | |||
@@ -230,6 +230,14 @@ int lj_carith_op(lua_State *L, MMS mm) | |||
230 | 230 | ||
231 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ | 231 | /* -- 64 bit integer arithmetic helpers ----------------------------------- */ |
232 | 232 | ||
233 | #if LJ_32 | ||
234 | /* Signed/unsigned 64 bit multiply. */ | ||
235 | int64_t lj_carith_mul64(int64_t a, int64_t b) | ||
236 | { | ||
237 | return a * b; | ||
238 | } | ||
239 | #endif | ||
240 | |||
233 | /* Unsigned 64 bit x^k. */ | 241 | /* Unsigned 64 bit x^k. */ |
234 | uint64_t lj_carith_powu64(uint64_t x, uint64_t k) | 242 | uint64_t lj_carith_powu64(uint64_t x, uint64_t k) |
235 | { | 243 | { |
diff --git a/src/lj_carith.h b/src/lj_carith.h index 6870172b..14073603 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h | |||
@@ -12,6 +12,9 @@ | |||
12 | 12 | ||
13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); | 13 | LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); |
14 | 14 | ||
15 | #if LJ_32 | ||
16 | LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); | ||
17 | #endif | ||
15 | LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k); | 18 | LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k); |
16 | LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k); | 19 | LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k); |
17 | 20 | ||
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 61210907..5eafa3a7 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
@@ -189,6 +189,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, | |||
189 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); | 189 | sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, 0); |
190 | #endif | 190 | #endif |
191 | xstore: | 191 | xstore: |
192 | if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); | ||
192 | emitir(IRT(IR_XSTORE, dt), dp, sp); | 193 | emitir(IRT(IR_XSTORE, dt), dp, sp); |
193 | break; | 194 | break; |
194 | case CCX(I, C): | 195 | case CCX(I, C): |
@@ -311,6 +312,7 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp) | |||
311 | TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, | 312 | TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, |
312 | lj_ir_kintp(J, sizeof(GCcdata))); | 313 | lj_ir_kintp(J, sizeof(GCcdata))); |
313 | emitir(IRT(IR_XSTORE, t), ptr, tr); | 314 | emitir(IRT(IR_XSTORE, t), ptr, tr); |
315 | lj_needsplit(J); | ||
314 | return dp; | 316 | return dp; |
315 | } else if ((sinfo & CTF_BOOL)) { | 317 | } else if ((sinfo & CTF_BOOL)) { |
316 | /* Assume not equal to zero. Fixup and emit pending guard later. */ | 318 | /* Assume not equal to zero. Fixup and emit pending guard later. */ |
@@ -406,7 +408,10 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) | |||
406 | if (ctype_isenum(s->info)) s = ctype_child(cts, s); | 408 | if (ctype_isenum(s->info)) s = ctype_child(cts, s); |
407 | if (ctype_isnum(s->info)) { /* Load number value. */ | 409 | if (ctype_isnum(s->info)) { /* Load number value. */ |
408 | IRType t = crec_ct2irt(s); | 410 | IRType t = crec_ct2irt(s); |
409 | if (t != IRT_CDATA) sp = emitir(IRT(IR_XLOAD, t), sp, 0); | 411 | if (t != IRT_CDATA) { |
412 | sp = emitir(IRT(IR_XLOAD, t), sp, 0); | ||
413 | if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); | ||
414 | } | ||
410 | } | 415 | } |
411 | goto doconv; | 416 | goto doconv; |
412 | } | 417 | } |
@@ -499,8 +504,10 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) | |||
499 | if (ctype_isinteger(ctk->info) && (t = crec_ct2irt(ctk)) != IRT_CDATA) { | 504 | if (ctype_isinteger(ctk->info) && (t = crec_ct2irt(ctk)) != IRT_CDATA) { |
500 | idx = emitir(IRT(IR_ADD, IRT_PTR), idx, lj_ir_kintp(J, sizeof(GCcdata))); | 505 | idx = emitir(IRT(IR_ADD, IRT_PTR), idx, lj_ir_kintp(J, sizeof(GCcdata))); |
501 | idx = emitir(IRT(IR_XLOAD, t), idx, 0); | 506 | idx = emitir(IRT(IR_XLOAD, t), idx, 0); |
502 | if (!LJ_64 && (t == IRT_I64 || t == IRT_U64)) | 507 | if (!LJ_64 && (t == IRT_I64 || t == IRT_U64)) { |
503 | idx = emitconv(idx, IRT_INT, t, 0); | 508 | idx = emitconv(idx, IRT_INT, t, 0); |
509 | lj_needsplit(J); | ||
510 | } | ||
504 | goto integer_key; | 511 | goto integer_key; |
505 | } | 512 | } |
506 | } else if (tref_isstr(idx)) { | 513 | } else if (tref_isstr(idx)) { |
@@ -664,6 +671,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
664 | CTypeID id; | 671 | CTypeID id; |
665 | TRef tr, dp, ptr; | 672 | TRef tr, dp, ptr; |
666 | MSize i; | 673 | MSize i; |
674 | lj_needsplit(J); | ||
667 | if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || | 675 | if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || |
668 | ((s[1]->info & CTF_UNSIGNED) && s[1]->size == 8)) { | 676 | ((s[1]->info & CTF_UNSIGNED) && s[1]->size == 8)) { |
669 | dt = IRT_U64; id = CTID_UINT64; | 677 | dt = IRT_U64; id = CTID_UINT64; |
@@ -691,9 +699,6 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
691 | lj_ir_set(J, IRTG(op, dt), sp[0], sp[1]); | 699 | lj_ir_set(J, IRTG(op, dt), sp[0], sp[1]); |
692 | J->postproc = LJ_POST_FIXGUARD; | 700 | J->postproc = LJ_POST_FIXGUARD; |
693 | return TREF_TRUE; | 701 | return TREF_TRUE; |
694 | } else if (mm == MM_pow) { | ||
695 | tr = lj_ir_call(J, dt == IRT_I64 ? IRCALL_lj_carith_powi64 : | ||
696 | IRCALL_lj_carith_powu64, sp[0], sp[1]); | ||
697 | } else { | 702 | } else { |
698 | if (mm == MM_div || mm == MM_mod) | 703 | if (mm == MM_div || mm == MM_mod) |
699 | return 0; /* NYI: integer div, mod. */ | 704 | return 0; /* NYI: integer div, mod. */ |
@@ -754,10 +759,11 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) | |||
754 | tr = emitconv(tr, IRT_INTP, IRT_INT, | 759 | tr = emitconv(tr, IRT_INTP, IRT_INT, |
755 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); | 760 | ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); |
756 | #else | 761 | #else |
757 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) | 762 | if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { |
758 | tr = emitconv(tr, IRT_INTP, t, | 763 | tr = emitconv(tr, IRT_INTP, t, |
759 | (t == IRT_NUM || t == IRT_FLOAT) ? | 764 | (t == IRT_NUM || t == IRT_FLOAT) ? |
760 | IRCONV_TRUNC|IRCONV_ANY : 0); | 765 | IRCONV_TRUNC|IRCONV_ANY : 0); |
766 | } | ||
761 | #endif | 767 | #endif |
762 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); | 768 | tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); |
763 | tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); | 769 | tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); |
@@ -790,6 +796,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) | |||
790 | if (ctype_isnum(ct->info)) { | 796 | if (ctype_isnum(ct->info)) { |
791 | IRType t = crec_ct2irt(ct); | 797 | IRType t = crec_ct2irt(ct); |
792 | if (t == IRT_CDATA) goto err_type; | 798 | if (t == IRT_CDATA) goto err_type; |
799 | if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); | ||
793 | tr = emitir(IRT(IR_XLOAD, t), tr, 0); | 800 | tr = emitir(IRT(IR_XLOAD, t), tr, 0); |
794 | } else if (!(ctype_isptr(ct->info) || ctype_isrefarray(ct->info))) { | 801 | } else if (!(ctype_isptr(ct->info) || ctype_isrefarray(ct->info))) { |
795 | goto err_type; | 802 | goto err_type; |
@@ -842,6 +849,7 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) | |||
842 | IRType t = crec_ct2irt(s); | 849 | IRType t = crec_ct2irt(s); |
843 | if (t != IRT_CDATA) { | 850 | if (t != IRT_CDATA) { |
844 | TRef tr = emitir(IRT(IR_XLOAD, t), sp, 0); /* Load number value. */ | 851 | TRef tr = emitir(IRT(IR_XLOAD, t), sp, 0); /* Load number value. */ |
852 | if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); | ||
845 | if (t == IRT_FLOAT || t == IRT_U32 || t == IRT_I64 || t == IRT_U64) | 853 | if (t == IRT_FLOAT || t == IRT_U32 || t == IRT_I64 || t == IRT_U64) |
846 | tr = emitconv(tr, IRT_NUM, t, 0); | 854 | tr = emitconv(tr, IRT_NUM, t, 0); |
847 | J->base[0] = tr; | 855 | J->base[0] = tr; |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 1cb3566e..286eb219 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -33,6 +33,7 @@ | |||
33 | /* Miscellaneous ops. */ \ | 33 | /* Miscellaneous ops. */ \ |
34 | _(NOP, N , ___, ___) \ | 34 | _(NOP, N , ___, ___) \ |
35 | _(BASE, N , lit, lit) \ | 35 | _(BASE, N , lit, lit) \ |
36 | _(HIOP, S , ref, ref) \ | ||
36 | _(LOOP, S , ___, ___) \ | 37 | _(LOOP, S , ___, ___) \ |
37 | _(PHI, S , ref, ref) \ | 38 | _(PHI, S , ref, ref) \ |
38 | _(RENAME, S , ref, lit) \ | 39 | _(RENAME, S , ref, lit) \ |
@@ -212,8 +213,9 @@ IRFLDEF(FLENUM) | |||
212 | /* CONV mode, stored in op2. */ | 213 | /* CONV mode, stored in op2. */ |
213 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ | 214 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ |
214 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ | 215 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ |
215 | #define IRCONV_NUM_INT ((IRT_NUM<<5)|IRT_INT) | 216 | #define IRCONV_DSH 5 |
216 | #define IRCONV_INT_NUM ((IRT_INT<<5)|IRT_NUM) | 217 | #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) |
218 | #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) | ||
217 | #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ | 219 | #define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ |
218 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ | 220 | #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ |
219 | #define IRCONV_MODEMASK 0x0fff | 221 | #define IRCONV_MODEMASK 0x0fff |
@@ -251,13 +253,21 @@ typedef struct CCallInfo { | |||
251 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ | 253 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ |
252 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ | 254 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ |
253 | #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ | 255 | #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ |
254 | #define CCI_STACK64 0x1000 /* Needs 64 bits per argument. */ | ||
255 | 256 | ||
256 | /* Function definitions for CALL* instructions. */ | 257 | /* Function definitions for CALL* instructions. */ |
257 | #if LJ_HASFFI | 258 | #if LJ_HASFFI |
259 | #if LJ_32 | ||
260 | #define ARG2_64 4 /* Treat as 4 32 bit arguments. */ | ||
261 | #define IRCALLDEF_FFI32(_) \ | ||
262 | _(lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) | ||
263 | #else | ||
264 | #define ARG2_64 2 | ||
265 | #define IRCALLDEF_FFI32(_) | ||
266 | #endif | ||
258 | #define IRCALLDEF_FFI(_) \ | 267 | #define IRCALLDEF_FFI(_) \ |
259 | _(lj_carith_powi64, 2, N, I64, CCI_STACK64|CCI_NOFPRCLOBBER) \ | 268 | IRCALLDEF_FFI32(_) \ |
260 | _(lj_carith_powu64, 2, N, U64, CCI_STACK64|CCI_NOFPRCLOBBER) | 269 | _(lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ |
270 | _(lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) | ||
261 | #else | 271 | #else |
262 | #define IRCALLDEF_FFI(_) | 272 | #define IRCALLDEF_FFI(_) |
263 | #endif | 273 | #endif |
@@ -402,6 +412,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
402 | #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) | 412 | #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) |
403 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) | 413 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) |
404 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 414 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
415 | #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) | ||
405 | 416 | ||
406 | #if LJ_64 | 417 | #if LJ_64 |
407 | #define IRT_IS64 \ | 418 | #define IRT_IS64 \ |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 43c414c1..db99c118 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -141,6 +141,12 @@ LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); | |||
141 | /* Optimization passes. */ | 141 | /* Optimization passes. */ |
142 | LJ_FUNC void lj_opt_dce(jit_State *J); | 142 | LJ_FUNC void lj_opt_dce(jit_State *J); |
143 | LJ_FUNC int lj_opt_loop(jit_State *J); | 143 | LJ_FUNC int lj_opt_loop(jit_State *J); |
144 | #if LJ_HASFFI && LJ_32 | ||
145 | LJ_FUNC void lj_opt_split(jit_State *J); | ||
146 | #else | ||
147 | #define lj_opt_split(J) UNUSED(J) | ||
148 | #endif | ||
149 | |||
144 | #endif | 150 | #endif |
145 | 151 | ||
146 | #endif | 152 | #endif |
diff --git a/src/lj_jit.h b/src/lj_jit.h index a8be1a97..38970fc7 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -240,6 +240,15 @@ enum { | |||
240 | #define LJ_KSIMD(J, n) \ | 240 | #define LJ_KSIMD(J, n) \ |
241 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 241 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
242 | 242 | ||
243 | /* Set/reset flag to activate the SPLIT pass for the current trace. */ | ||
244 | #if LJ_32 && LJ_HASFFI | ||
245 | #define lj_needsplit(J) (J->needsplit = 1) | ||
246 | #define lj_resetsplit(J) (J->needsplit = 0) | ||
247 | #else | ||
248 | #define lj_needsplit(J) UNUSED(J) | ||
249 | #define lj_resetsplit(J) UNUSED(J) | ||
250 | #endif | ||
251 | |||
243 | /* Fold state is used to fold instructions on-the-fly. */ | 252 | /* Fold state is used to fold instructions on-the-fly. */ |
244 | typedef struct FoldState { | 253 | typedef struct FoldState { |
245 | IRIns ins; /* Currently emitted instruction. */ | 254 | IRIns ins; /* Currently emitted instruction. */ |
@@ -293,6 +302,9 @@ typedef struct jit_State { | |||
293 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ | 302 | MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ |
294 | 303 | ||
295 | PostProc postproc; /* Required post-processing after execution. */ | 304 | PostProc postproc; /* Required post-processing after execution. */ |
305 | #if LJ_32 && LJ_HASFFI | ||
306 | int needsplit; /* Need SPLIT pass. */ | ||
307 | #endif | ||
296 | 308 | ||
297 | GCRef *trace; /* Array of traces. */ | 309 | GCRef *trace; /* Array of traces. */ |
298 | TraceNo freetrace; /* Start of scan for next free trace. */ | 310 | TraceNo freetrace; /* Start of scan for next free trace. */ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 2d08e187..03caf80d 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -538,6 +538,13 @@ LJFOLDF(kfold_conv_knum_int_num) | |||
538 | } | 538 | } |
539 | } | 539 | } |
540 | 540 | ||
541 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | ||
542 | LJFOLDF(kfold_conv_knum_u32_num) | ||
543 | { | ||
544 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
545 | return INTFOLD((int32_t)(uint32_t)knumleft); | ||
546 | } | ||
547 | |||
541 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | 548 | LJFOLD(CONV KNUM IRCONV_I64_NUM) |
542 | LJFOLDF(kfold_conv_knum_i64_num) | 549 | LJFOLDF(kfold_conv_knum_i64_num) |
543 | { | 550 | { |
@@ -805,6 +812,7 @@ LJFOLDF(simplify_conv_u32_num) | |||
805 | } | 812 | } |
806 | 813 | ||
807 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ | 814 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ |
815 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/ | ||
808 | LJFOLDF(simplify_conv_i64_num) | 816 | LJFOLDF(simplify_conv_i64_num) |
809 | { | 817 | { |
810 | PHIBARRIER(fleft); | 818 | PHIBARRIER(fleft); |
@@ -826,23 +834,6 @@ LJFOLDF(simplify_conv_i64_num) | |||
826 | return NEXTFOLD; | 834 | return NEXTFOLD; |
827 | } | 835 | } |
828 | 836 | ||
829 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/ | ||
830 | LJFOLDF(simplify_conv_u64_num) | ||
831 | { | ||
832 | PHIBARRIER(fleft); | ||
833 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
834 | #if LJ_TARGET_X64 | ||
835 | return fleft->op1; | ||
836 | #else | ||
837 | /* Reduce to a zero-extension. */ | ||
838 | fins->op1 = fleft->op1; | ||
839 | fins->op2 = (IRT_U64<<5)|IRT_U32; | ||
840 | return RETRYFOLD; | ||
841 | #endif | ||
842 | } | ||
843 | return NEXTFOLD; | ||
844 | } | ||
845 | |||
846 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | 837 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ |
847 | LJFOLD(TOBIT CONV KNUM) | 838 | LJFOLD(TOBIT CONV KNUM) |
848 | LJFOLDF(simplify_tobit_conv) | 839 | LJFOLDF(simplify_tobit_conv) |
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c new file mode 100644 index 00000000..3cb30514 --- /dev/null +++ b/src/lj_opt_split.c | |||
@@ -0,0 +1,343 @@ | |||
1 | /* | ||
2 | ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. | ||
3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_opt_split_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT && LJ_HASFFI && LJ_32 | ||
12 | |||
13 | #include "lj_err.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_ir.h" | ||
16 | #include "lj_jit.h" | ||
17 | #include "lj_iropt.h" | ||
18 | #include "lj_vm.h" | ||
19 | |||
20 | /* SPLIT pass: | ||
21 | ** | ||
22 | ** This pass splits up 64 bit IR instructions into multiple 32 bit IR | ||
23 | ** instructions. It's only active for 32 bit CPUs which lack native 64 bit | ||
24 | ** operations. The FFI is currently the only emitter for 64 bit | ||
25 | ** instructions, so this pass is disabled if the FFI is disabled. | ||
26 | ** | ||
27 | ** Splitting the IR in a separate pass keeps each 32 bit IR assembler | ||
28 | ** backend simple. Only a small amount of extra functionality needs to be | ||
29 | ** implemented. This is much easier than adding support for allocating | ||
30 | ** register pairs to each backend (believe me, I tried). A few simple, but | ||
31 | ** important optimizations can be performed by the SPLIT pass, which would | ||
32 | ** be tedious to do in the backend. | ||
33 | ** | ||
34 | ** The basic idea is to replace each 64 bit IR instruction with its 32 bit | ||
35 | ** equivalent plus an extra HIOP instruction. The splitted IR is not passed | ||
36 | ** through FOLD or any other optimizations, so each HIOP is guaranteed to | ||
37 | ** immediately follow it's counterpart. The actual functionality of HIOP is | ||
38 | ** inferred from the previous instruction. | ||
39 | ** | ||
40 | ** The operands of HIOP hold the hiword input references. The output of HIOP | ||
41 | ** is the hiword output reference, which is also used to hold the hiword | ||
42 | ** register or spill slot information. The register allocator treats this | ||
43 | ** instruction independent of any other instruction, which improves code | ||
44 | ** quality compared to using fixed register pairs. | ||
45 | ** | ||
46 | ** It's easier to split up some instructions into two regular 32 bit | ||
47 | ** instructions. E.g. XLOAD is split up into two XLOADs with two different | ||
48 | ** addresses. Obviously 64 bit constants need to be split up into two 32 bit | ||
49 | ** constants, too. Some hiword instructions can be entirely omitted, e.g. | ||
50 | ** when zero-extending a 32 bit value to 64 bits. | ||
51 | ** | ||
52 | ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with | ||
53 | ** two int64_t fields: | ||
54 | ** | ||
55 | ** 0100 p32 ADD base +8 | ||
56 | ** 0101 i64 XLOAD 0100 | ||
57 | ** 0102 i64 ADD 0101 +1 | ||
58 | ** 0103 p32 ADD base +16 | ||
59 | ** 0104 i64 XSTORE 0103 0102 | ||
60 | ** | ||
61 | ** mov rax, [esi+0x8] | ||
62 | ** add rax, +0x01 | ||
63 | ** mov [esi+0x10], rax | ||
64 | ** | ||
65 | ** Here's the transformed IR and the x86 machine code after the SPLIT pass: | ||
66 | ** | ||
67 | ** 0100 p32 ADD base +8 | ||
68 | ** 0101 int XLOAD 0100 | ||
69 | ** 0102 p32 ADD base +12 | ||
70 | ** 0103 int XLOAD 0102 | ||
71 | ** 0104 int ADD 0101 +1 | ||
72 | ** 0105 int HIOP 0103 +0 | ||
73 | ** 0106 p32 ADD base +16 | ||
74 | ** 0107 int XSTORE 0106 0104 | ||
75 | ** 0108 p32 ADD base +20 | ||
76 | ** 0109 int XSTORE 0108 0105 | ||
77 | ** | ||
78 | ** mov eax, [esi+0x8] | ||
79 | ** mov ecx, [esi+0xc] | ||
80 | ** add eax, +0x01 | ||
81 | ** adc ecx, +0x00 | ||
82 | ** mov [esi+0x10], eax | ||
83 | ** mov [esi+0x14], ecx | ||
84 | ** | ||
85 | ** You may notice the reassociated hiword address computation, which is | ||
86 | ** later fused into the mov operands by the assembler. | ||
87 | */ | ||
88 | |||
89 | /* Some local macros to save typing. Undef'd at the end. */ | ||
90 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
91 | |||
92 | /* Directly emit the transformed IR without updating chains etc. */ | ||
93 | static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) | ||
94 | { | ||
95 | IRRef nref = lj_ir_nextins(J); | ||
96 | IRIns *ir = IR(nref); | ||
97 | ir->ot = ot; | ||
98 | ir->op1 = op1; | ||
99 | ir->op2 = op2; | ||
100 | return nref; | ||
101 | } | ||
102 | |||
103 | /* Emit a CALLN with two split 64 bit arguments. */ | ||
104 | static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir, | ||
105 | IRIns *ir, IRCallID id) | ||
106 | { | ||
107 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; | ||
108 | J->cur.nins--; | ||
109 | #if LJ_LE | ||
110 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | ||
111 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
112 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | ||
113 | #else | ||
114 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | ||
115 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | ||
116 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
117 | #endif | ||
118 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | ||
119 | return split_emit(J, IRTI(IR_HIOP), tmp, tmp); | ||
120 | } | ||
121 | |||
122 | /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ | ||
123 | static IRRef split_ptr(jit_State *J, IRRef ref) | ||
124 | { | ||
125 | IRIns *ir = IR(ref); | ||
126 | int32_t ofs = 4; | ||
127 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Reassociate address. */ | ||
128 | ofs += IR(ir->op2)->i; | ||
129 | ref = ir->op1; | ||
130 | if (ofs == 0) return ref; | ||
131 | } | ||
132 | return split_emit(J, IRTI(IR_ADD), ref, lj_ir_kint(J, ofs)); | ||
133 | } | ||
134 | |||
135 | /* Transform the old IR to the new IR. */ | ||
136 | static void split_ir(jit_State *J) | ||
137 | { | ||
138 | IRRef nins = J->cur.nins, nk = J->cur.nk; | ||
139 | MSize irlen = nins - nk; | ||
140 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); | ||
141 | IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); | ||
142 | IRRef1 *hisubst; | ||
143 | IRRef ref; | ||
144 | |||
145 | /* Copy old IR to buffer. */ | ||
146 | memcpy(oir, IR(nk), irlen*sizeof(IRIns)); | ||
147 | /* Bias hiword substitution table and old IR. Loword kept in field prev. */ | ||
148 | hisubst = (IRRef1 *)&oir[irlen] - nk; | ||
149 | oir -= nk; | ||
150 | |||
151 | /* Remove all IR instructions, but retain IR constants. */ | ||
152 | J->cur.nins = REF_FIRST; | ||
153 | |||
154 | /* Process constants and fixed references. */ | ||
155 | for (ref = nk; ref <= REF_BASE; ref++) { | ||
156 | IRIns *ir = &oir[ref]; | ||
157 | if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ | ||
158 | TValue tv = *ir_k64(ir); | ||
159 | ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); | ||
160 | hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); | ||
161 | } else { | ||
162 | ir->prev = (IRRef1)ref; /* Identity substitution for loword. */ | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* Process old IR instructions. */ | ||
167 | for (ref = REF_FIRST; ref < nins; ref++) { | ||
168 | IRIns *ir = &oir[ref]; | ||
169 | IRRef nref = lj_ir_nextins(J); | ||
170 | IRIns *nir = IR(nref); | ||
171 | |||
172 | /* Copy-substitute old instruction to new instruction. */ | ||
173 | nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; | ||
174 | nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; | ||
175 | ir->prev = nref; /* Loword substitution. */ | ||
176 | nir->o = ir->o; | ||
177 | nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); | ||
178 | |||
179 | /* Split 64 bit instructions. */ | ||
180 | if (irt_isint64(ir->t)) { | ||
181 | IRRef hi = hisubst[ir->op1]; | ||
182 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ | ||
183 | switch (ir->o) { | ||
184 | case IR_ADD: | ||
185 | case IR_SUB: | ||
186 | /* Use plain op for hiword if loword cannot produce a carry/borrow. */ | ||
187 | if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { | ||
188 | ir->prev = nir->op1; /* Pass through loword. */ | ||
189 | nir->op1 = hi; nir->op2 = hisubst[ir->op2]; | ||
190 | hi = nref; | ||
191 | break; | ||
192 | } | ||
193 | /* fallthrough */ | ||
194 | case IR_NEG: | ||
195 | hi = split_emit(J, IRTI(IR_HIOP), hi, hisubst[ir->op2]); | ||
196 | break; | ||
197 | case IR_MUL: | ||
198 | hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); | ||
199 | break; | ||
200 | case IR_POWI: | ||
201 | hi = split_call64(J, hisubst, oir, ir, | ||
202 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
203 | IRCALL_lj_carith_powu64); | ||
204 | break; | ||
205 | case IR_XLOAD: | ||
206 | hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); | ||
207 | #if LJ_BE | ||
208 | ir->prev = hi; hi = nref; | ||
209 | #endif | ||
210 | break; | ||
211 | case IR_XSTORE: | ||
212 | #if LJ_LE | ||
213 | hi = hisubst[ir->op2]; | ||
214 | #else | ||
215 | hi = nir->op2; nir->op2 = hisubst[ir->op2]; | ||
216 | #endif | ||
217 | split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hi); | ||
218 | continue; | ||
219 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ | ||
220 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
221 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ | ||
222 | hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); | ||
223 | } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ | ||
224 | /* Drop cast, since assembler doesn't care. */ | ||
225 | hisubst[ref] = hi; | ||
226 | goto fwdlo; | ||
227 | } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ | ||
228 | IRRef k31 = lj_ir_kint(J, 31); | ||
229 | nir = IR(nref); /* May have been reallocated. */ | ||
230 | ir->prev = nir->op1; /* Pass through loword. */ | ||
231 | nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ | ||
232 | nir->op2 = k31; | ||
233 | hi = nref; | ||
234 | } else { /* Zero-extend to 64 bit. */ | ||
235 | hisubst[ref] = lj_ir_kint(J, 0); | ||
236 | goto fwdlo; | ||
237 | } | ||
238 | break; | ||
239 | } | ||
240 | case IR_PHI: { | ||
241 | IRRef hi2; | ||
242 | if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || | ||
243 | nir->op1 == nir->op2) | ||
244 | J->cur.nins--; /* Drop useless PHIs. */ | ||
245 | hi2 = hisubst[ir->op2]; | ||
246 | if (!((irref_isk(hi) && irref_isk(hi2)) || hi == hi2)) | ||
247 | split_emit(J, IRTI(IR_PHI), hi, hi2); | ||
248 | continue; | ||
249 | } | ||
250 | default: | ||
251 | lua_assert(ir->o <= IR_NE); | ||
252 | split_emit(J, IRTGI(IR_HIOP), hi, hisubst[ir->op2]); /* Comparisons. */ | ||
253 | continue; | ||
254 | } | ||
255 | hisubst[ref] = hi; /* Store hiword substitution. */ | ||
256 | } else if (ir->o == IR_CONV) { /* See above, too. */ | ||
257 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
258 | if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ | ||
259 | if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ | ||
260 | ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), | ||
261 | hisubst[ir->op1], nref); | ||
262 | } else { /* Truncate to lower 32 bits. */ | ||
263 | fwdlo: | ||
264 | ir->prev = nir->op1; /* Forward loword. */ | ||
265 | /* Replace with NOP to avoid messing up the snapshot logic. */ | ||
266 | nir->ot = IRT(IR_NOP, IRT_NIL); | ||
267 | nir->op1 = nir->op2 = 0; | ||
268 | } | ||
269 | } | ||
270 | } else if (ir->o == IR_LOOP) { | ||
271 | J->loopref = nref; /* Needed by assembler. */ | ||
272 | } | ||
273 | } | ||
274 | |||
275 | /* Add PHI marks. */ | ||
276 | for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { | ||
277 | IRIns *ir = IR(ref); | ||
278 | if (ir->o != IR_PHI) break; | ||
279 | if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); | ||
280 | if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); | ||
281 | } | ||
282 | |||
283 | /* Substitute snapshot maps. */ | ||
284 | oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ | ||
285 | { | ||
286 | SnapNo i, nsnap = J->cur.nsnap; | ||
287 | for (i = 0; i < nsnap; i++) { | ||
288 | SnapShot *snap = &J->cur.snap[i]; | ||
289 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | ||
290 | MSize n, nent = snap->nent; | ||
291 | snap->ref = oir[snap->ref].prev; | ||
292 | for (n = 0; n < nent; n++) { | ||
293 | SnapEntry sn = map[n]; | ||
294 | map[n] = ((sn & 0xffff0000) | oir[snap_ref(sn)].prev); | ||
295 | } | ||
296 | } | ||
297 | } | ||
298 | } | ||
299 | |||
300 | /* Protected callback for split pass. */ | ||
301 | static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) | ||
302 | { | ||
303 | jit_State *J = (jit_State *)ud; | ||
304 | split_ir(J); | ||
305 | UNUSED(L); UNUSED(dummy); | ||
306 | return NULL; | ||
307 | } | ||
308 | |||
309 | #ifdef LUA_USE_ASSERT | ||
310 | /* Slow, but sure way to check whether a SPLIT pass is needed. */ | ||
311 | static int split_needsplit(jit_State *J) | ||
312 | { | ||
313 | IRIns *ir, *irend; | ||
314 | IRRef ref; | ||
315 | for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) | ||
316 | if (irt_isint64(ir->t)) | ||
317 | return 1; | ||
318 | for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) | ||
319 | if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 || | ||
320 | (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64) | ||
321 | return 1; | ||
322 | return 0; /* Nope. */ | ||
323 | } | ||
324 | #endif | ||
325 | |||
326 | /* SPLIT pass. */ | ||
327 | void lj_opt_split(jit_State *J) | ||
328 | { | ||
329 | lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ | ||
330 | if (J->needsplit) { | ||
331 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); | ||
332 | if (errcode) { | ||
333 | /* Completely reset the trace to avoid inconsistent dump on abort. */ | ||
334 | J->cur.nins = J->cur.nk = REF_BASE; | ||
335 | J->cur.nsnap = 0; | ||
336 | lj_err_throw(J->L, errcode); /* Propagate errors. */ | ||
337 | } | ||
338 | } | ||
339 | } | ||
340 | |||
341 | #undef IR | ||
342 | |||
343 | #endif | ||
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 94ab3c32..37c68f4b 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -193,6 +193,7 @@ typedef enum { | |||
193 | XI_FLD1 = 0xe8d9, | 193 | XI_FLD1 = 0xe8d9, |
194 | XI_FLDLG2 = 0xecd9, | 194 | XI_FLDLG2 = 0xecd9, |
195 | XI_FLDLN2 = 0xedd9, | 195 | XI_FLDLN2 = 0xedd9, |
196 | XI_FDUP = 0xc0d9, /* Really fld st0. */ | ||
196 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ | 197 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ |
197 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ | 198 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ |
198 | XI_FRNDINT = 0xfcd9, | 199 | XI_FRNDINT = 0xfcd9, |
@@ -263,10 +264,17 @@ typedef enum { | |||
263 | XO_MOVD = XO_660f(6e), | 264 | XO_MOVD = XO_660f(6e), |
264 | XO_MOVDto = XO_660f(7e), | 265 | XO_MOVDto = XO_660f(7e), |
265 | 266 | ||
267 | XO_FLDd = XO_(d9), XOg_FLDd = 0, | ||
266 | XO_FLDq = XO_(dd), XOg_FLDq = 0, | 268 | XO_FLDq = XO_(dd), XOg_FLDq = 0, |
267 | XO_FILDd = XO_(db), XOg_FILDd = 0, | 269 | XO_FILDd = XO_(db), XOg_FILDd = 0, |
270 | XO_FILDq = XO_(df), XOg_FILDq = 5, | ||
271 | XO_FSTPd = XO_(d9), XOg_FSTPd = 3, | ||
268 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, | 272 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, |
269 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, | 273 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, |
274 | XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, | ||
275 | XO_FADDq = XO_(dc), XOg_FADDq = 0, | ||
276 | XO_FLDCW = XO_(d9), XOg_FLDCW = 5, | ||
277 | XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 | ||
270 | } x86Op; | 278 | } x86Op; |
271 | 279 | ||
272 | /* x86 opcode groups. */ | 280 | /* x86 opcode groups. */ |
@@ -278,6 +286,7 @@ typedef uint32_t x86Group; | |||
278 | #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) | 286 | #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) |
279 | 287 | ||
280 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) | 288 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) |
289 | #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27))) | ||
281 | 290 | ||
282 | typedef enum { | 291 | typedef enum { |
283 | XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, | 292 | XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, |
diff --git a/src/lj_trace.c b/src/lj_trace.c index da20f991..b67e8f75 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -394,6 +394,7 @@ static void trace_start(jit_State *J) | |||
394 | J->bcskip = 0; | 394 | J->bcskip = 0; |
395 | J->guardemit.irt = 0; | 395 | J->guardemit.irt = 0; |
396 | J->postproc = LJ_POST_NONE; | 396 | J->postproc = LJ_POST_NONE; |
397 | lj_resetsplit(J); | ||
397 | setgcref(J->cur.startpt, obj2gco(J->pt)); | 398 | setgcref(J->cur.startpt, obj2gco(J->pt)); |
398 | 399 | ||
399 | L = J->L; | 400 | L = J->L; |
@@ -592,6 +593,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | |||
592 | } | 593 | } |
593 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ | 594 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ |
594 | } | 595 | } |
596 | lj_opt_split(J); | ||
595 | J->state = LJ_TRACE_ASM; | 597 | J->state = LJ_TRACE_ASM; |
596 | break; | 598 | break; |
597 | 599 | ||
diff --git a/src/ljamalg.c b/src/ljamalg.c index 4d5f7600..5d90c002 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include "lj_opt_narrow.c" | 58 | #include "lj_opt_narrow.c" |
59 | #include "lj_opt_dce.c" | 59 | #include "lj_opt_dce.c" |
60 | #include "lj_opt_loop.c" | 60 | #include "lj_opt_loop.c" |
61 | #include "lj_opt_split.c" | ||
61 | #include "lj_mcode.c" | 62 | #include "lj_mcode.c" |
62 | #include "lj_snap.c" | 63 | #include "lj_snap.c" |
63 | #include "lj_record.c" | 64 | #include "lj_record.c" |