diff options
| -rw-r--r-- | lib/dump.lua | 3 | ||||
| -rw-r--r-- | src/lj_asm.c | 46 | ||||
| -rw-r--r-- | src/lj_ir.h | 13 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 126 |
4 files changed, 2 insertions, 186 deletions
diff --git a/lib/dump.lua b/lib/dump.lua index 39dc241d..5c127ae9 100644 --- a/lib/dump.lua +++ b/lib/dump.lua | |||
| @@ -222,7 +222,6 @@ span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b04 | |||
| 222 | local colorize, irtype | 222 | local colorize, irtype |
| 223 | 223 | ||
| 224 | -- Lookup tables to convert some literals into names. | 224 | -- Lookup tables to convert some literals into names. |
| 225 | local tointname = { [0] = "check", "index", "", "Z", "S", "T", } | ||
| 226 | local litname = { | 225 | local litname = { |
| 227 | ["SLOAD "] = setmetatable({}, { __index = function(t, mode) | 226 | ["SLOAD "] = setmetatable({}, { __index = function(t, mode) |
| 228 | local s = "" | 227 | local s = "" |
| @@ -246,8 +245,6 @@ local litname = { | |||
| 246 | t[mode] = s | 245 | t[mode] = s |
| 247 | return s | 246 | return s |
| 248 | end}), | 247 | end}), |
| 249 | ["TOINT "] = tointname, | ||
| 250 | ["TOI64 "] = tointname, | ||
| 251 | ["FLOAD "] = vmdef.irfield, | 248 | ["FLOAD "] = vmdef.irfield, |
| 252 | ["FREF "] = vmdef.irfield, | 249 | ["FREF "] = vmdef.irfield, |
| 253 | ["FPMATH"] = vmdef.irfpm, | 250 | ["FPMATH"] = vmdef.irfpm, |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9009a7d5..b9ada175 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1594,15 +1594,6 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
| 1594 | 1594 | ||
| 1595 | /* -- Type conversions ---------------------------------------------------- */ | 1595 | /* -- Type conversions ---------------------------------------------------- */ |
| 1596 | 1596 | ||
| 1597 | static void asm_tonum(ASMState *as, IRIns *ir) | ||
| 1598 | { | ||
| 1599 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 1600 | Reg left = asm_fuseload(as, ir->op1, RSET_GPR); | ||
| 1601 | emit_mrm(as, XO_CVTSI2SD, dest, left); | ||
| 1602 | if (!(as->flags & JIT_F_SPLIT_XMM)) | ||
| 1603 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | 1597 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) |
| 1607 | { | 1598 | { |
| 1608 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1599 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
| @@ -1617,13 +1608,6 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
| 1617 | /* Can't fuse since left is needed twice. */ | 1608 | /* Can't fuse since left is needed twice. */ |
| 1618 | } | 1609 | } |
| 1619 | 1610 | ||
| 1620 | static void asm_toint(ASMState *as, IRIns *ir) | ||
| 1621 | { | ||
| 1622 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1623 | Reg left = asm_fuseload(as, ir->op1, RSET_FPR); | ||
| 1624 | emit_mrm(as, XO_CVTSD2SI, dest, left); | ||
| 1625 | } | ||
| 1626 | |||
| 1627 | static void asm_tobit(ASMState *as, IRIns *ir) | 1611 | static void asm_tobit(ASMState *as, IRIns *ir) |
| 1628 | { | 1612 | { |
| 1629 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1613 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| @@ -1636,24 +1620,6 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
| 1636 | ra_left(as, tmp, ir->op1); | 1620 | ra_left(as, tmp, ir->op1); |
| 1637 | } | 1621 | } |
| 1638 | 1622 | ||
| 1639 | static void asm_toi64(ASMState *as, IRIns *ir) | ||
| 1640 | { | ||
| 1641 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1642 | IRRef lref = ir->op1; | ||
| 1643 | lua_assert(LJ_64); /* NYI: 32 bit register pairs. */ | ||
| 1644 | if (ir->op2 == IRTOINT_TRUNCI64) { | ||
| 1645 | Reg left = asm_fuseload(as, lref, RSET_FPR); | ||
| 1646 | emit_mrm(as, XO_CVTTSD2SI, dest|REX_64, left); | ||
| 1647 | } else if (ir->op2 == IRTOINT_ZEXT64) { | ||
| 1648 | /* Nothing to do. This assumes 32 bit regs are already zero-extended. */ | ||
| 1649 | ra_left(as, dest, lref); /* But may need to move regs. */ | ||
| 1650 | } else { | ||
| 1651 | Reg left = asm_fuseload(as, lref, RSET_GPR); | ||
| 1652 | emit_mrm(as, XO_MOVSXd, dest|REX_64, left); | ||
| 1653 | lua_assert(ir->op2 == IRTOINT_SEXT64); | ||
| 1654 | } | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | static void asm_conv(ASMState *as, IRIns *ir) | 1623 | static void asm_conv(ASMState *as, IRIns *ir) |
| 1658 | { | 1624 | { |
| 1659 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 1625 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
| @@ -2499,7 +2465,7 @@ static void asm_x87load(ASMState *as, IRRef ref) | |||
| 2499 | emit_x87op(as, XI_FLD1); | 2465 | emit_x87op(as, XI_FLD1); |
| 2500 | else | 2466 | else |
| 2501 | emit_rma(as, XO_FLDq, XOg_FLDq, tv); | 2467 | emit_rma(as, XO_FLDq, XOg_FLDq, tv); |
| 2502 | } else if (ir->o == IR_TONUM && !ra_used(ir) && | 2468 | } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && !ra_used(ir) && |
| 2503 | !irref_isk(ir->op1) && mayfuse(as, ir->op1)) { | 2469 | !irref_isk(ir->op1) && mayfuse(as, ir->op1)) { |
| 2504 | IRIns *iri = IR(ir->op1); | 2470 | IRIns *iri = IR(ir->op1); |
| 2505 | emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri)); | 2471 | emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri)); |
| @@ -3753,15 +3719,7 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 3753 | case IR_OBAR: asm_obar(as, ir); break; | 3719 | case IR_OBAR: asm_obar(as, ir); break; |
| 3754 | 3720 | ||
| 3755 | /* Type conversions. */ | 3721 | /* Type conversions. */ |
| 3756 | case IR_TONUM: asm_tonum(as, ir); break; | ||
| 3757 | case IR_TOINT: | ||
| 3758 | if (irt_isguard(ir->t)) | ||
| 3759 | asm_tointg(as, ir, ra_alloc1(as, ir->op1, RSET_FPR)); | ||
| 3760 | else | ||
| 3761 | asm_toint(as, ir); break; | ||
| 3762 | break; | ||
| 3763 | case IR_TOBIT: asm_tobit(as, ir); break; | 3722 | case IR_TOBIT: asm_tobit(as, ir); break; |
| 3764 | case IR_TOI64: asm_toi64(as, ir); break; | ||
| 3765 | case IR_CONV: asm_conv(as, ir); break; | 3723 | case IR_CONV: asm_conv(as, ir); break; |
| 3766 | case IR_TOSTR: asm_tostr(as, ir); break; | 3724 | case IR_TOSTR: asm_tostr(as, ir); break; |
| 3767 | case IR_STRTO: asm_strto(as, ir); break; | 3725 | case IR_STRTO: asm_strto(as, ir); break; |
| @@ -3905,7 +3863,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T) | |||
| 3905 | } | 3863 | } |
| 3906 | break; | 3864 | break; |
| 3907 | /* Do not propagate hints across type conversions. */ | 3865 | /* Do not propagate hints across type conversions. */ |
| 3908 | case IR_CONV: case IR_TONUM: case IR_TOINT: case IR_TOBIT: | 3866 | case IR_CONV: case IR_TOBIT: |
| 3909 | break; | 3867 | break; |
| 3910 | default: | 3868 | default: |
| 3911 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ | 3869 | /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 5733f6af..4cf412cf 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -119,10 +119,7 @@ | |||
| 119 | \ | 119 | \ |
| 120 | /* Type conversions. */ \ | 120 | /* Type conversions. */ \ |
| 121 | _(CONV, N , ref, lit) \ | 121 | _(CONV, N , ref, lit) \ |
| 122 | _(TONUM, N , ref, ___) \ | ||
| 123 | _(TOINT, N , ref, lit) \ | ||
| 124 | _(TOBIT, N , ref, ref) \ | 122 | _(TOBIT, N , ref, ref) \ |
| 125 | _(TOI64, N , ref, lit) \ | ||
| 126 | _(TOSTR, N , ref, ___) \ | 123 | _(TOSTR, N , ref, ___) \ |
| 127 | _(STRTO, N , ref, ___) \ | 124 | _(STRTO, N , ref, ___) \ |
| 128 | \ | 125 | \ |
| @@ -210,15 +207,6 @@ IRFLDEF(FLENUM) | |||
| 210 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ | 207 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ |
| 211 | #define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ | 208 | #define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ |
| 212 | 209 | ||
| 213 | /* TOINT/TOI64 mode, stored in op2. Ordered by strength of the checks. */ | ||
| 214 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ | ||
| 215 | #define IRTOINT_INDEX 1 /* Checked + special backprop rules. */ | ||
| 216 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ | ||
| 217 | #define IRTOINT_ZEXT64 3 /* Convert uint32_t to int64_t. */ | ||
| 218 | #define IRTOINT_SEXT64 4 /* Convert int32_t to int64_t. */ | ||
| 219 | #define IRTOINT_TRUNCI64 5 /* Truncate number to int64_t. */ | ||
| 220 | #define IRTOINT_TOBIT 6 /* Cache only: TOBIT conversion. */ | ||
| 221 | |||
| 222 | /* CONV mode, stored in op2. */ | 210 | /* CONV mode, stored in op2. */ |
| 223 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ | 211 | #define IRCONV_SRCMASK 0x001f /* Source IRType. */ |
| 224 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ | 212 | #define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ |
| @@ -235,7 +223,6 @@ IRFLDEF(FLENUM) | |||
| 235 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ | 223 | #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ |
| 236 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ | 224 | #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ |
| 237 | 225 | ||
| 238 | |||
| 239 | /* C call info for CALL* instructions. */ | 226 | /* C call info for CALL* instructions. */ |
| 240 | typedef struct CCallInfo { | 227 | typedef struct CCallInfo { |
| 241 | ASMFunction func; /* Function pointer. */ | 228 | ASMFunction func; /* Function pointer. */ |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index adf88f4d..e05d6b7b 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -441,12 +441,6 @@ LJFOLDF(kfold_strcmp) | |||
| 441 | 441 | ||
| 442 | /* -- Constant folding of conversions ------------------------------------- */ | 442 | /* -- Constant folding of conversions ------------------------------------- */ |
| 443 | 443 | ||
| 444 | LJFOLD(TONUM KINT) | ||
| 445 | LJFOLDF(kfold_tonum) | ||
| 446 | { | ||
| 447 | return lj_ir_knum(J, cast_num(fleft->i)); | ||
| 448 | } | ||
| 449 | |||
| 450 | LJFOLD(TOBIT KNUM KNUM) | 444 | LJFOLD(TOBIT KNUM KNUM) |
| 451 | LJFOLDF(kfold_tobit) | 445 | LJFOLDF(kfold_tobit) |
| 452 | { | 446 | { |
| @@ -455,40 +449,6 @@ LJFOLDF(kfold_tobit) | |||
| 455 | return INTFOLD((int32_t)tv.u32.lo); | 449 | return INTFOLD((int32_t)tv.u32.lo); |
| 456 | } | 450 | } |
| 457 | 451 | ||
| 458 | LJFOLD(TOINT KNUM any) | ||
| 459 | LJFOLDF(kfold_toint) | ||
| 460 | { | ||
| 461 | lua_Number n = knumleft; | ||
| 462 | int32_t k = lj_num2int(n); | ||
| 463 | if (irt_isguard(fins->t) && n != cast_num(k)) { | ||
| 464 | /* We're about to create a guard which always fails, like TOINT +1.5. | ||
| 465 | ** Some pathological loops cause this during LICM, e.g.: | ||
| 466 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | ||
| 467 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | ||
| 468 | ** assert(x == 300) | ||
| 469 | */ | ||
| 470 | return FAILFOLD; | ||
| 471 | } | ||
| 472 | return INTFOLD(k); | ||
| 473 | } | ||
| 474 | |||
| 475 | LJFOLD(TOI64 KINT any) | ||
| 476 | LJFOLDF(kfold_toi64_kint) | ||
| 477 | { | ||
| 478 | lua_assert(fins->op2 == IRTOINT_ZEXT64 || fins->op2 == IRTOINT_SEXT64); | ||
| 479 | if (fins->op2 == IRTOINT_ZEXT64) | ||
| 480 | return INT64FOLD((uint64_t)(uint32_t)fleft->i); | ||
| 481 | else | ||
| 482 | return INT64FOLD((uint64_t)(int32_t)fleft->i); | ||
| 483 | } | ||
| 484 | |||
| 485 | LJFOLD(TOI64 KNUM any) | ||
| 486 | LJFOLDF(kfold_toi64_knum) | ||
| 487 | { | ||
| 488 | lua_assert(fins->op2 == IRTOINT_TRUNCI64); | ||
| 489 | return INT64FOLD((uint64_t)(int64_t)knumleft); | ||
| 490 | } | ||
| 491 | |||
| 492 | LJFOLD(CONV KINT IRCONV_NUM_INT) | 452 | LJFOLD(CONV KINT IRCONV_NUM_INT) |
| 493 | LJFOLDF(kfold_conv_kint_num) | 453 | LJFOLDF(kfold_conv_kint_num) |
| 494 | { | 454 | { |
| @@ -613,9 +573,6 @@ LJFOLDF(shortcut_round) | |||
| 613 | return NEXTFOLD; | 573 | return NEXTFOLD; |
| 614 | } | 574 | } |
| 615 | 575 | ||
| 616 | LJFOLD(FPMATH TONUM IRFPM_FLOOR) | ||
| 617 | LJFOLD(FPMATH TONUM IRFPM_CEIL) | ||
| 618 | LJFOLD(FPMATH TONUM IRFPM_TRUNC) | ||
| 619 | LJFOLD(ABS ABS KNUM) | 576 | LJFOLD(ABS ABS KNUM) |
| 620 | LJFOLDF(shortcut_left) | 577 | LJFOLDF(shortcut_left) |
| 621 | { | 578 | { |
| @@ -640,32 +597,6 @@ LJFOLDF(shortcut_leftleft) | |||
| 640 | return fleft->op1; /* f(g(x)) ==> x */ | 597 | return fleft->op1; /* f(g(x)) ==> x */ |
| 641 | } | 598 | } |
| 642 | 599 | ||
| 643 | LJFOLD(TONUM TOINT) | ||
| 644 | LJFOLDF(shortcut_leftleft_toint) | ||
| 645 | { | ||
| 646 | PHIBARRIER(fleft); | ||
| 647 | if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */ | ||
| 648 | return fleft->op1; /* f(g(x)) ==> x */ | ||
| 649 | return NEXTFOLD; | ||
| 650 | } | ||
| 651 | |||
| 652 | LJFOLD(TOINT TONUM any) | ||
| 653 | LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */ | ||
| 654 | LJFOLDF(shortcut_leftleft_across_phi) | ||
| 655 | { | ||
| 656 | /* Fold even across PHI to avoid expensive int->num->int conversions. */ | ||
| 657 | return fleft->op1; /* f(g(x)) ==> x */ | ||
| 658 | } | ||
| 659 | |||
| 660 | LJFOLD(TOI64 TONUM any) | ||
| 661 | LJFOLDF(shortcut_leftleft_toint64) | ||
| 662 | { | ||
| 663 | /* Fold even across PHI to avoid expensive int->num->int64 conversions. */ | ||
| 664 | fins->op1 = fleft->op1; /* (int64_t)(double)(int)x ==> (int64_t)x */ | ||
| 665 | fins->op2 = IRTOINT_SEXT64; | ||
| 666 | return RETRYFOLD; | ||
| 667 | } | ||
| 668 | |||
| 669 | /* -- FP algebraic simplifications ---------------------------------------- */ | 600 | /* -- FP algebraic simplifications ---------------------------------------- */ |
| 670 | 601 | ||
| 671 | /* FP arithmetic is tricky -- there's not much to simplify. | 602 | /* FP arithmetic is tricky -- there's not much to simplify. |
| @@ -969,63 +900,6 @@ LJFOLDF(narrow_convert) | |||
| 969 | return lj_opt_narrow_convert(J); | 900 | return lj_opt_narrow_convert(J); |
| 970 | } | 901 | } |
| 971 | 902 | ||
| 972 | /* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */ | ||
| 973 | LJFOLD(TOINT any any) | ||
| 974 | LJFOLDF(cse_toint) | ||
| 975 | { | ||
| 976 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
| 977 | IRRef ref, op1 = fins->op1; | ||
| 978 | uint8_t guard = irt_isguard(fins->t); | ||
| 979 | for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev) | ||
| 980 | if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard) | ||
| 981 | return ref; | ||
| 982 | } | ||
| 983 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
| 984 | } | ||
| 985 | |||
| 986 | /* -- Strength reduction of widening -------------------------------------- */ | ||
| 987 | |||
| 988 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ | ||
| 989 | LJFOLDF(simplify_zext64) | ||
| 990 | { | ||
| 991 | #if LJ_TARGET_X64 | ||
| 992 | /* Eliminate widening. All 32 bit ops implicitly zero-extend the result. */ | ||
| 993 | PHIBARRIER(fleft); | ||
| 994 | return LEFTFOLD; | ||
| 995 | #else | ||
| 996 | UNUSED(J); | ||
| 997 | return NEXTFOLD; | ||
| 998 | #endif | ||
| 999 | } | ||
| 1000 | |||
| 1001 | LJFOLD(TOI64 any 4) /* IRTOINT_SEXT64 */ | ||
| 1002 | LJFOLDF(simplify_sext64) | ||
| 1003 | { | ||
| 1004 | IRRef ref = fins->op1; | ||
| 1005 | int64_t ofs = 0; | ||
| 1006 | PHIBARRIER(fleft); | ||
| 1007 | if (fleft->o == IR_ADD && irref_isk(fleft->op2)) { | ||
| 1008 | ofs = (int64_t)IR(fleft->op2)->i; | ||
| 1009 | ref = fleft->op1; | ||
| 1010 | } | ||
| 1011 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | ||
| 1012 | if (ref == J->scev.idx) { | ||
| 1013 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | ||
| 1014 | lua_assert(irt_isint(J->scev.t)); | ||
| 1015 | if (lo && IR(lo)->i + ofs >= 0) { | ||
| 1016 | #if LJ_TARGET_X64 | ||
| 1017 | /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ | ||
| 1018 | return LEFTFOLD; | ||
| 1019 | #else | ||
| 1020 | /* Reduce to a (cheaper) zero-extension. */ | ||
| 1021 | fins->op2 = IRTOINT_ZEXT64; | ||
| 1022 | return RETRYFOLD; | ||
| 1023 | #endif | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | return NEXTFOLD; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | /* -- Integer algebraic simplifications ----------------------------------- */ | 903 | /* -- Integer algebraic simplifications ----------------------------------- */ |
| 1030 | 904 | ||
| 1031 | LJFOLD(ADD any KINT) | 905 | LJFOLD(ADD any KINT) |
