diff options
-rw-r--r-- | lib/dump.lua | 3 | ||||
-rw-r--r-- | src/lj_asm.c | 46 | ||||
-rw-r--r-- | src/lj_ir.h | 13 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 126 |
4 files changed, 2 insertions, 186 deletions
diff --git a/lib/dump.lua b/lib/dump.lua index 39dc241d..5c127ae9 100644 --- a/lib/dump.lua +++ b/lib/dump.lua | |||
@@ -222,7 +222,6 @@ span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b04 | |||
222 | local colorize, irtype | 222 | local colorize, irtype |
223 | 223 | ||
224 | -- Lookup tables to convert some literals into names. | 224 | -- Lookup tables to convert some literals into names. |
225 | local tointname = { [0] = "check", "index", "", "Z", "S", "T", } | ||
226 | local litname = { | 225 | local litname = { |
227 | ["SLOAD "] = setmetatable({}, { __index = function(t, mode) | 226 | ["SLOAD "] = setmetatable({}, { __index = function(t, mode) |
228 | local s = "" | 227 | local s = "" |
@@ -246,8 +245,6 @@ local litname = { | |||
246 | t[mode] = s | 245 | t[mode] = s |
247 | return s | 246 | return s |
248 | end}), | 247 | end}), |
249 | ["TOINT "] = tointname, | ||
250 | ["TOI64 "] = tointname, | ||
251 | ["FLOAD "] = vmdef.irfield, | 248 | ["FLOAD "] = vmdef.irfield, |
252 | ["FREF "] = vmdef.irfield, | 249 | ["FREF "] = vmdef.irfield, |
253 | ["FPMATH"] = vmdef.irfpm, | 250 | ["FPMATH"] = vmdef.irfpm, |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9009a7d5..b9ada175 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1594,15 +1594,6 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
1594 | 1594 | ||
1595 | /* -- Type conversions ---------------------------------------------------- */ | 1595 | /* -- Type conversions ---------------------------------------------------- */ |
1596 | 1596 | ||
1597 | static void asm_tonum(ASMState *as, IRIns *ir) | ||
1598 | { | ||
1599 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
1600 | Reg left = asm_fuseload(as, ir->op1, RSET_GPR); | ||
1601 | emit_mrm(as, XO_CVTSI2SD, dest, left); | ||
1602 | if (!(as->flags & JIT_F_SPLIT_XMM)) | ||
1603 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
1604 | } | ||
1605 | |||
1606 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 1597 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
1607 | { | 1598 | { |
1608 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1599 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
@@ -1617,13 +1608,6 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
1617 | /* Can't fuse since left is needed twice. */ | 1608 | /* Can't fuse since left is needed twice. */ |
1618 | } | 1609 | } |
1619 | 1610 | ||
1620 | static void asm_toint(ASMState *as, IRIns *ir) | ||
1621 | { | ||
1622 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1623 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | ||
1624 | emit_mrm(as, XO_CVTSD2SI, dest, left); | ||
1625 | } | ||
1626 | |||
1627 | static void asm_tobit(ASMState *as, IRIns *ir) | 1611 | static void asm_tobit(ASMState *as, IRIns *ir) |
1628 | { | 1612 | { |
1629 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1613 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1636,24 +1620,6 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
1636 | ra_left(as, tmp, ir->op1); | 1620 | ra_left(as, tmp, ir->op1); |
1637 | } | 1621 | } |
1638 | 1622 | ||
1639 | static void asm_toi64(ASMState *as, IRIns *ir) | ||
1640 | { | ||
1641 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1642 | IRRef lref = ir->op1; | ||
1643 | lua_assert(LJ_64); /* NYI: 32 bit register pairs. */ | ||
1644 | if (ir->op2 == IRTOINT_TRUNCI64) { | ||
1645 | Reg left = asm_fuseload(as, lref, RSET_FPR); | ||
1646 | emit_mrm(as, XO_CVTTSD2SI, dest|REX_64, left); | ||
1647 | } else if (ir->op2 == IRTOINT_ZEXT64) { | ||
1648 | /* Nothing to do. This assumes 32 bit regs are already zero-extended. */ | ||
1649 | ra_left(as, dest, lref); /* But may need to move regs. */ | ||
1650 | } else { | ||
1651 | Reg left = asm_fuseload(as, lref, RSET_GPR); | ||
1652 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | ||
1653 | lua_assert(ir->op2 == IRTOINT_SEXT64); | ||
1654 | } | ||
1655 | } | ||
1656 | |||
1657 | static void asm_conv(ASMState *as, IRIns *ir) | 1623 | static void asm_conv(ASMState *as, IRIns *ir) |
1658 | { | 1624 | { |
1659 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 1625 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
@@ -2499,7 +2465,7 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
2499 | emit_x87op(as, XI_FLD1); | 2465 | emit_x87op(as, XI_FLD1); |
2500 | else | 2466 | else |
2501 | emit_rma(as, XO_FLDq, XOg_FLDq, tv); | 2467 | emit_rma(as, XO_FLDq, XOg_FLDq, tv); |
2502 | } else if (ir->o == IR_TONUM && !ra_used(ir) && | 2468 | } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && !ra_used(ir) && |
2503 | !irref_isk(ir->op1) && mayfuse(as, ir->op1)) { | 2469 | !irref_isk(ir->op1) && mayfuse(as, ir->op1)) { |
2504 | IRIns *iri = IR(ir->op1); | 2470 | IRIns *iri = IR(ir->op1); |
2505 | emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri)); | 2471 | emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri)); |
@@ -3753,15 +3719,7 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3753 | case IR_OBAR: asm_obar(as, ir); break; | 3719 | case IR_OBAR: asm_obar(as, ir); break; |
3754 | 3720 | ||
3755 | /* Type conversions. */ | 3721 | /* Type conversions. */ |
3756 | case IR_TONUM: asm_tonum(as, ir); break; | ||
3757 | case IR_TOINT: | ||
3758 | if (irt_isguard(ir->t)) | ||
3759 | asm_tointg(as, ir, ra_alloc1(as, ir->op1, RSET_FPR)); | ||
3760 | else | ||
3761 | asm_toint(as, ir); break; | ||
3762 | break; | ||
3763 | case IR_TOBIT: asm_tobit(as, ir); break; | 3722 | case IR_TOBIT: asm_tobit(as, ir); break; |
3764 | case IR_TOI64: asm_toi64(as, ir); break; | ||
3765 | case IR_CONV: asm_conv(as, ir); break; | 3723 | case IR_CONV: asm_conv(as, ir); break; |
3766 | case IR_TOSTR: asm_tostr(as, ir); break; | 3724 | case IR_TOSTR: asm_tostr(as, ir); break; |
3767 | case IR_STRTO: asm_strto(as, ir); break; | 3725 | case IR_STRTO: asm_strto(as, ir); break; |
@@ -3905,7 +3863,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
3905 | } | 3863 | } |
3906 | break; | 3864 | break; |
3907 | /* Do not propagate hints across type conversions. */ | 3865 | /* Do not propagate hints across type conversions. */ |
3908 | case IR_CONV: case IR_TONUM: case IR_TOINT: case IR_TOBIT: | 3866 | case IR_CONV: case IR_TOBIT: |
3909 | break; | 3867 | break; |
3910 | default: | 3868 | default: |
3911 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ | 3869 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 5733f6af..4cf412cf 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -119,10 +119,7 @@ | |||
119 | \ | 119 | \ |
120 | /* Type conversions. */ \ | 120 | /* Type conversions. */ \ |
121 | _(CONV, N , ref, lit) \ | 121 | _(CONV, N , ref, lit) \ |
122 | _(TONUM, N , ref, ___) \ | ||
123 | _(TOINT, N , ref, lit) \ | ||
124 | _(TOBIT, N , ref, ref) \ | 122 | _(TOBIT, N , ref, ref) \ |
125 | _(TOI64, N , ref, lit) \ | ||
126 | _(TOSTR, N , ref, ___) \ | 123 | _(TOSTR, N , ref, ___) \ |
127 | _(STRTO, N , ref, ___) \ | 124 | _(STRTO, N , ref, ___) \ |
128 | \ | 125 | \ |
@@ -210,15 +207,6 @@ IRFLDEF(FLENUM) | |||
210 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ | 207 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ |
211 | #define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ | 208 | #define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ |
212 | 209 | ||
213 | /* TOINT/TOI64 mode, stored in op2. Ordered by strength of the checks. */ | ||
214 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ | ||
215 | #define IRTOINT_INDEX 1 /* Checked + special backprop rules. */ | ||
216 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ | ||
217 | #define IRTOINT_ZEXT64 3 /* Convert uint32_t to int64_t. */ | ||
218 | #define IRTOINT_SEXT64 4 /* Convert int32_t to int64_t. */ | ||
219 | #define IRTOINT_TRUNCI64 5 /* Truncate number to int64_t. */ | ||
220 | #define IRTOINT_TOBIT 6 /* Cache only: TOBIT conversion. */ | ||
221 | |||
222 | /* CONV mode, stored in op2. */ | 210 | /* CONV mode, stored in op2. */ |
223 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ | 211 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ |
224 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ | 212 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ |
@@ -235,7 +223,6 @@ IRFLDEF(FLENUM) | |||
235 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ | 223 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ |
236 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ | 224 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ |
237 | 225 | ||
238 | |||
239 | /* C call info for CALL* instructions. */ | 226 | /* C call info for CALL* instructions. */ |
240 | typedef struct CCallInfo { | 227 | typedef struct CCallInfo { |
241 | ASMFunction func; /* Function pointer. */ | 228 | ASMFunction func; /* Function pointer. */ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index adf88f4d..e05d6b7b 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -441,12 +441,6 @@ LJFOLDF(kfold_strcmp) | |||
441 | 441 | ||
442 | /* -- Constant folding of conversions ------------------------------------- */ | 442 | /* -- Constant folding of conversions ------------------------------------- */ |
443 | 443 | ||
444 | LJFOLD(TONUM KINT) | ||
445 | LJFOLDF(kfold_tonum) | ||
446 | { | ||
447 | return lj_ir_knum(J, cast_num(fleft->i)); | ||
448 | } | ||
449 | |||
450 | LJFOLD(TOBIT KNUM KNUM) | 444 | LJFOLD(TOBIT KNUM KNUM) |
451 | LJFOLDF(kfold_tobit) | 445 | LJFOLDF(kfold_tobit) |
452 | { | 446 | { |
@@ -455,40 +449,6 @@ LJFOLDF(kfold_tobit) | |||
455 | return INTFOLD((int32_t)tv.u32.lo); | 449 | return INTFOLD((int32_t)tv.u32.lo); |
456 | } | 450 | } |
457 | 451 | ||
458 | LJFOLD(TOINT KNUM any) | ||
459 | LJFOLDF(kfold_toint) | ||
460 | { | ||
461 | lua_Number n = knumleft; | ||
462 | int32_t k = lj_num2int(n); | ||
463 | if (irt_isguard(fins->t) && n != cast_num(k)) { | ||
464 | /* We're about to create a guard which always fails, like TOINT +1.5. | ||
465 | ** Some pathological loops cause this during LICM, e.g.: | ||
466 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | ||
467 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | ||
468 | ** assert(x == 300) | ||
469 | */ | ||
470 | return FAILFOLD; | ||
471 | } | ||
472 | return INTFOLD(k); | ||
473 | } | ||
474 | |||
475 | LJFOLD(TOI64 KINT any) | ||
476 | LJFOLDF(kfold_toi64_kint) | ||
477 | { | ||
478 | lua_assert(fins->op2 == IRTOINT_ZEXT64 || fins->op2 == IRTOINT_SEXT64); | ||
479 | if (fins->op2 == IRTOINT_ZEXT64) | ||
480 | return INT64FOLD((uint64_t)(uint32_t)fleft->i); | ||
481 | else | ||
482 | return INT64FOLD((uint64_t)(int32_t)fleft->i); | ||
483 | } | ||
484 | |||
485 | LJFOLD(TOI64 KNUM any) | ||
486 | LJFOLDF(kfold_toi64_knum) | ||
487 | { | ||
488 | lua_assert(fins->op2 == IRTOINT_TRUNCI64); | ||
489 | return INT64FOLD((uint64_t)(int64_t)knumleft); | ||
490 | } | ||
491 | |||
492 | LJFOLD(CONV KINT IRCONV_NUM_INT) | 452 | LJFOLD(CONV KINT IRCONV_NUM_INT) |
493 | LJFOLDF(kfold_conv_kint_num) | 453 | LJFOLDF(kfold_conv_kint_num) |
494 | { | 454 | { |
@@ -613,9 +573,6 @@ LJFOLDF(shortcut_round) | |||
613 | return NEXTFOLD; | 573 | return NEXTFOLD; |
614 | } | 574 | } |
615 | 575 | ||
616 | LJFOLD(FPMATH TONUM IRFPM_FLOOR) | ||
617 | LJFOLD(FPMATH TONUM IRFPM_CEIL) | ||
618 | LJFOLD(FPMATH TONUM IRFPM_TRUNC) | ||
619 | LJFOLD(ABS ABS KNUM) | 576 | LJFOLD(ABS ABS KNUM) |
620 | LJFOLDF(shortcut_left) | 577 | LJFOLDF(shortcut_left) |
621 | { | 578 | { |
@@ -640,32 +597,6 @@ LJFOLDF(shortcut_leftleft) | |||
640 | return fleft->op1; /* f(g(x)) ==> x */ | 597 | return fleft->op1; /* f(g(x)) ==> x */ |
641 | } | 598 | } |
642 | 599 | ||
643 | LJFOLD(TONUM TOINT) | ||
644 | LJFOLDF(shortcut_leftleft_toint) | ||
645 | { | ||
646 | PHIBARRIER(fleft); | ||
647 | if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */ | ||
648 | return fleft->op1; /* f(g(x)) ==> x */ | ||
649 | return NEXTFOLD; | ||
650 | } | ||
651 | |||
652 | LJFOLD(TOINT TONUM any) | ||
653 | LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */ | ||
654 | LJFOLDF(shortcut_leftleft_across_phi) | ||
655 | { | ||
656 | /* Fold even across PHI to avoid expensive int->num->int conversions. */ | ||
657 | return fleft->op1; /* f(g(x)) ==> x */ | ||
658 | } | ||
659 | |||
660 | LJFOLD(TOI64 TONUM any) | ||
661 | LJFOLDF(shortcut_leftleft_toint64) | ||
662 | { | ||
663 | /* Fold even across PHI to avoid expensive int->num->int64 conversions. */ | ||
664 | fins->op1 = fleft->op1; /* (int64_t)(double)(int)x ==> (int64_t)x */ | ||
665 | fins->op2 = IRTOINT_SEXT64; | ||
666 | return RETRYFOLD; | ||
667 | } | ||
668 | |||
669 | /* -- FP algebraic simplifications ---------------------------------------- */ | 600 | /* -- FP algebraic simplifications ---------------------------------------- */ |
670 | 601 | ||
671 | /* FP arithmetic is tricky -- there's not much to simplify. | 602 | /* FP arithmetic is tricky -- there's not much to simplify. |
@@ -969,63 +900,6 @@ LJFOLDF(narrow_convert) | |||
969 | return lj_opt_narrow_convert(J); | 900 | return lj_opt_narrow_convert(J); |
970 | } | 901 | } |
971 | 902 | ||
972 | /* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */ | ||
973 | LJFOLD(TOINT any any) | ||
974 | LJFOLDF(cse_toint) | ||
975 | { | ||
976 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
977 | IRRef ref, op1 = fins->op1; | ||
978 | uint8_t guard = irt_isguard(fins->t); | ||
979 | for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev) | ||
980 | if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard) | ||
981 | return ref; | ||
982 | } | ||
983 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
984 | } | ||
985 | |||
986 | /* -- Strength reduction of widening -------------------------------------- */ | ||
987 | |||
988 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ | ||
989 | LJFOLDF(simplify_zext64) | ||
990 | { | ||
991 | #if LJ_TARGET_X64 | ||
992 | /* Eliminate widening. All 32 bit ops implicitly zero-extend the result. */ | ||
993 | PHIBARRIER(fleft); | ||
994 | return LEFTFOLD; | ||
995 | #else | ||
996 | UNUSED(J); | ||
997 | return NEXTFOLD; | ||
998 | #endif | ||
999 | } | ||
1000 | |||
1001 | LJFOLD(TOI64 any 4) /* IRTOINT_SEXT64 */ | ||
1002 | LJFOLDF(simplify_sext64) | ||
1003 | { | ||
1004 | IRRef ref = fins->op1; | ||
1005 | int64_t ofs = 0; | ||
1006 | PHIBARRIER(fleft); | ||
1007 | if (fleft->o == IR_ADD && irref_isk(fleft->op2)) { | ||
1008 | ofs = (int64_t)IR(fleft->op2)->i; | ||
1009 | ref = fleft->op1; | ||
1010 | } | ||
1011 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | ||
1012 | if (ref == J->scev.idx) { | ||
1013 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | ||
1014 | lua_assert(irt_isint(J->scev.t)); | ||
1015 | if (lo && IR(lo)->i + ofs >= 0) { | ||
1016 | #if LJ_TARGET_X64 | ||
1017 | /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ | ||
1018 | return LEFTFOLD; | ||
1019 | #else | ||
1020 | /* Reduce to a (cheaper) zero-extension. */ | ||
1021 | fins->op2 = IRTOINT_ZEXT64; | ||
1022 | return RETRYFOLD; | ||
1023 | #endif | ||
1024 | } | ||
1025 | } | ||
1026 | return NEXTFOLD; | ||
1027 | } | ||
1028 | |||
1029 | /* -- Integer algebraic simplifications ----------------------------------- */ | 903 | /* -- Integer algebraic simplifications ----------------------------------- */ |
1030 | 904 | ||
1031 | LJFOLD(ADD any KINT) | 905 | LJFOLD(ADD any KINT) |