diff options
| author | Mike Pall <mike> | 2010-12-31 01:09:30 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2010-12-31 03:47:30 +0100 |
| commit | 1f269610925829f55ed3e88e4af2b6575598adbc (patch) | |
| tree | 69bce04f2686f7e05d77ff4445c78a4e9bc27a31 | |
| parent | 65b194a2f89eb315029724af56354bb527021192 (diff) | |
| download | luajit-1f269610925829f55ed3e88e4af2b6575598adbc.tar.gz luajit-1f269610925829f55ed3e88e4af2b6575598adbc.tar.bz2 luajit-1f269610925829f55ed3e88e4af2b6575598adbc.zip | |
Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV.
| -rw-r--r-- | src/buildvm_fold.c | 8 | ||||
| -rw-r--r-- | src/lj_opt_fold.c | 253 |
2 files changed, 226 insertions, 35 deletions
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c index 468e5300..5f463575 100644 --- a/src/buildvm_fold.c +++ b/src/buildvm_fold.c | |||
| @@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) | |||
| 124 | if (*p == '\0') | 124 | if (*p == '\0') |
| 125 | return i; | 125 | return i; |
| 126 | } else if (allowany && !strcmp("any", p)) { | 126 | } else if (allowany && !strcmp("any", p)) { |
| 127 | return 0xff; | 127 | return allowany; |
| 128 | } else { | 128 | } else { |
| 129 | for (i = 0; ir_names[i]; i++) | 129 | for (i = 0; ir_names[i]; i++) |
| 130 | if (!strcmp(ir_names[i], p)) | 130 | if (!strcmp(ir_names[i], p)) |
| @@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) | |||
| 140 | static void foldrule(char *p) | 140 | static void foldrule(char *p) |
| 141 | { | 141 | { |
| 142 | uint32_t op = nexttoken(&p, 0, 0); | 142 | uint32_t op = nexttoken(&p, 0, 0); |
| 143 | uint32_t left = nexttoken(&p, 0, 1); | 143 | uint32_t left = nexttoken(&p, 0, 0x7f); |
| 144 | uint32_t right = nexttoken(&p, 1, 1); | 144 | uint32_t right = nexttoken(&p, 1, 0x3ff); |
| 145 | uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; | 145 | uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right; |
| 146 | uint32_t i; | 146 | uint32_t i; |
| 147 | if (nkeys >= BUILD_MAX_FOLD) { | 147 | if (nkeys >= BUILD_MAX_FOLD) { |
| 148 | fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); | 148 | fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 4790d245..92b9dfb4 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum) | |||
| 489 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 489 | return INT64FOLD((uint64_t)(int64_t)knumleft); |
| 490 | } | 490 | } |
| 491 | 491 | ||
| 492 | LJFOLD(CONV KINT IRCONV_NUM_INT) | ||
| 493 | LJFOLDF(kfold_conv_kint_num) | ||
| 494 | { | ||
| 495 | return lj_ir_knum(J, cast_num(fleft->i)); | ||
| 496 | } | ||
| 497 | |||
| 498 | LJFOLD(CONV KINT IRCONV_I64_INT) | ||
| 499 | LJFOLD(CONV KINT IRCONV_U64_INT) | ||
| 500 | LJFOLDF(kfold_conv_kint_i64) | ||
| 501 | { | ||
| 502 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | ||
| 503 | } | ||
| 504 | |||
| 505 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) | ||
| 506 | LJFOLDF(kfold_conv_kint64_num_i64) | ||
| 507 | { | ||
| 508 | return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64)); | ||
| 509 | } | ||
| 510 | |||
| 511 | LJFOLD(CONV KINT64 IRCONV_NUM_U64) | ||
| 512 | LJFOLDF(kfold_conv_kint64_num_u64) | ||
| 513 | { | ||
| 514 | return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64)); | ||
| 515 | } | ||
| 516 | |||
| 517 | LJFOLD(CONV KINT64 IRCONV_INT_I64) | ||
| 518 | LJFOLD(CONV KINT64 IRCONV_U32_I64) | ||
| 519 | LJFOLDF(kfold_conv_kint64_int_i64) | ||
| 520 | { | ||
| 521 | return INTFOLD((int32_t)ir_kint64(fleft)->u64); | ||
| 522 | } | ||
| 523 | |||
| 524 | LJFOLD(CONV KNUM IRCONV_INT_NUM) | ||
| 525 | LJFOLDF(kfold_conv_knum_int_num) | ||
| 526 | { | ||
| 527 | lua_Number n = knumleft; | ||
| 528 | if (!(fins->op2 & IRCONV_TRUNC)) { | ||
| 529 | int32_t k = lj_num2int(n); | ||
| 530 | if (irt_isguard(fins->t) && n != cast_num(k)) { | ||
| 531 | /* We're about to create a guard which always fails, like CONV +1.5. | ||
| 532 | ** Some pathological loops cause this during LICM, e.g.: | ||
| 533 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | ||
| 534 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | ||
| 535 | ** assert(x == 300) | ||
| 536 | */ | ||
| 537 | return FAILFOLD; | ||
| 538 | } | ||
| 539 | return INTFOLD(k); | ||
| 540 | } else { | ||
| 541 | return INTFOLD((int32_t)n); | ||
| 542 | } | ||
| 543 | } | ||
| 544 | |||
| 545 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | ||
| 546 | LJFOLDF(kfold_conv_knum_i64_num) | ||
| 547 | { | ||
| 548 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
| 549 | return INT64FOLD((uint64_t)(int64_t)knumleft); | ||
| 550 | } | ||
| 551 | |||
| 552 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | ||
| 553 | LJFOLDF(kfold_conv_knum_u64_num) | ||
| 554 | { | ||
| 555 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
| 556 | return INT64FOLD(lj_num2u64(knumleft)); | ||
| 557 | } | ||
| 558 | |||
| 492 | LJFOLD(TOSTR KNUM) | 559 | LJFOLD(TOSTR KNUM) |
| 493 | LJFOLDF(kfold_tostr_knum) | 560 | LJFOLDF(kfold_tostr_knum) |
| 494 | { | 561 | { |
| @@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx) | |||
| 740 | return NEXTFOLD; | 807 | return NEXTFOLD; |
| 741 | } | 808 | } |
| 742 | 809 | ||
| 743 | /* -- FP conversion narrowing --------------------------------------------- */ | 810 | /* -- Simplify conversions ------------------------------------------------ */ |
| 744 | 811 | ||
| 812 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ | ||
| 813 | LJFOLDF(shortcut_conv_num_int) | ||
| 814 | { | ||
| 815 | PHIBARRIER(fleft); | ||
| 816 | /* Only safe with a guarded conversion to int. */ | ||
| 817 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t)) | ||
| 818 | return fleft->op1; /* f(g(x)) ==> x */ | ||
| 819 | return NEXTFOLD; | ||
| 820 | } | ||
| 821 | |||
| 822 | LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ | ||
| 823 | LJFOLDF(simplify_conv_int_num) | ||
| 824 | { | ||
| 825 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
| 826 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) | ||
| 827 | return fleft->op1; | ||
| 828 | return NEXTFOLD; | ||
| 829 | } | ||
| 830 | |||
| 831 | LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/ | ||
| 832 | LJFOLDF(simplify_conv_u32_num) | ||
| 833 | { | ||
| 834 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
| 835 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) | ||
| 836 | return fleft->op1; | ||
| 837 | return NEXTFOLD; | ||
| 838 | } | ||
| 839 | |||
| 840 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ | ||
| 841 | LJFOLDF(simplify_conv_i64_num) | ||
| 842 | { | ||
| 843 | PHIBARRIER(fleft); | ||
| 844 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { | ||
| 845 | /* Reduce to a sign-extension. */ | ||
| 846 | fins->op1 = fleft->op1; | ||
| 847 | fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | ||
| 848 | return RETRYFOLD; | ||
| 849 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
| 850 | #if LJ_TARGET_X64 | ||
| 851 | return fleft->op1; | ||
| 852 | #else | ||
| 853 | /* Reduce to a zero-extension. */ | ||
| 854 | fins->op1 = fleft->op1; | ||
| 855 | fins->op2 = (IRT_I64<<5)|IRT_U32; | ||
| 856 | return RETRYFOLD; | ||
| 857 | #endif | ||
| 858 | } | ||
| 859 | return NEXTFOLD; | ||
| 860 | } | ||
| 861 | |||
| 862 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/ | ||
| 863 | LJFOLDF(simplify_conv_u64_num) | ||
| 864 | { | ||
| 865 | PHIBARRIER(fleft); | ||
| 866 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
| 867 | #if LJ_TARGET_X64 | ||
| 868 | return fleft->op1; | ||
| 869 | #else | ||
| 870 | /* Reduce to a zero-extension. */ | ||
| 871 | fins->op1 = fleft->op1; | ||
| 872 | fins->op2 = (IRT_U64<<5)|IRT_U32; | ||
| 873 | return RETRYFOLD; | ||
| 874 | #endif | ||
| 875 | } | ||
| 876 | return NEXTFOLD; | ||
| 877 | } | ||
| 878 | |||
| 879 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | ||
| 880 | LJFOLD(TOBIT CONV KNUM) | ||
| 881 | LJFOLDF(simplify_tobit_conv) | ||
| 882 | { | ||
| 883 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || | ||
| 884 | (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
| 885 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
| 886 | lua_assert(irt_isnum(fleft->t)); | ||
| 887 | return fleft->op1; | ||
| 888 | } | ||
| 889 | return NEXTFOLD; | ||
| 890 | } | ||
| 891 | |||
| 892 | /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | ||
| 893 | LJFOLD(FPMATH CONV IRFPM_FLOOR) | ||
| 894 | LJFOLD(FPMATH CONV IRFPM_CEIL) | ||
| 895 | LJFOLD(FPMATH CONV IRFPM_TRUNC) | ||
| 896 | LJFOLDF(simplify_floor_conv) | ||
| 897 | { | ||
| 898 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || | ||
| 899 | (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) | ||
| 900 | return LEFTFOLD; | ||
| 901 | return NEXTFOLD; | ||
| 902 | } | ||
| 903 | |||
| 904 | /* Strength reduction of widening. */ | ||
| 905 | LJFOLD(CONV any IRCONV_I64_INT) | ||
| 906 | LJFOLDF(simplify_conv_sext) | ||
| 907 | { | ||
| 908 | IRRef ref = fins->op1; | ||
| 909 | int64_t ofs = 0; | ||
| 910 | if (!(fins->op2 & IRCONV_SEXT)) | ||
| 911 | return NEXTFOLD; | ||
| 912 | PHIBARRIER(fleft); | ||
| 913 | if (fleft->o == IR_ADD && irref_isk(fleft->op2)) { | ||
| 914 | ofs = (int64_t)IR(fleft->op2)->i; | ||
| 915 | ref = fleft->op1; | ||
| 916 | } | ||
| 917 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | ||
| 918 | if (ref == J->scev.idx) { | ||
| 919 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | ||
| 920 | lua_assert(irt_isint(J->scev.t)); | ||
| 921 | if (lo && IR(lo)->i + ofs >= 0) { | ||
| 922 | #if LJ_TARGET_X64 | ||
| 923 | /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ | ||
| 924 | return LEFTFOLD; | ||
| 925 | #else | ||
| 926 | /* Reduce to a (cheaper) zero-extension. */ | ||
| 927 | fins->op2 &= ~IRCONV_SEXT; | ||
| 928 | return RETRYFOLD; | ||
| 929 | #endif | ||
| 930 | } | ||
| 931 | } | ||
| 932 | return NEXTFOLD; | ||
| 933 | } | ||
| 934 | |||
| 935 | /* Special CSE rule for CONV. */ | ||
| 936 | LJFOLD(CONV any any) | ||
| 937 | LJFOLDF(cse_conv) | ||
| 938 | { | ||
| 939 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
| 940 | IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); | ||
| 941 | uint8_t guard = irt_isguard(fins->t); | ||
| 942 | IRRef ref = J->chain[IR_CONV]; | ||
| 943 | while (ref > op1) { | ||
| 944 | IRIns *ir = IR(ref); | ||
| 945 | /* Commoning with stronger checks is ok. */ | ||
| 946 | if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && | ||
| 947 | irt_isguard(ir->t) >= guard) | ||
| 948 | return ref; | ||
| 949 | ref = ir->prev; | ||
| 950 | } | ||
| 951 | } | ||
| 952 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
| 953 | } | ||
| 954 | |||
| 955 | /* FP conversion narrowing. */ | ||
| 745 | LJFOLD(TOINT ADD any) | 956 | LJFOLD(TOINT ADD any) |
| 746 | LJFOLD(TOINT SUB any) | 957 | LJFOLD(TOINT SUB any) |
| 747 | LJFOLD(TOBIT ADD KNUM) | 958 | LJFOLD(TOBIT ADD KNUM) |
| @@ -771,26 +982,6 @@ LJFOLDF(cse_toint) | |||
| 771 | return EMITFOLD; /* No fallthrough to regular CSE. */ | 982 | return EMITFOLD; /* No fallthrough to regular CSE. */ |
| 772 | } | 983 | } |
| 773 | 984 | ||
| 774 | /* Special CSE rule for CONV. */ | ||
| 775 | LJFOLD(CONV any any) | ||
| 776 | LJFOLDF(cse_conv) | ||
| 777 | { | ||
| 778 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
| 779 | IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); | ||
| 780 | uint8_t guard = irt_isguard(fins->t); | ||
| 781 | IRRef ref = J->chain[IR_CONV]; | ||
| 782 | while (ref > op1) { | ||
| 783 | IRIns *ir = IR(ref); | ||
| 784 | /* Commoning with stronger checks is ok. */ | ||
| 785 | if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && | ||
| 786 | irt_isguard(ir->t) >= guard) | ||
| 787 | return ref; | ||
| 788 | ref = ir->prev; | ||
| 789 | } | ||
| 790 | } | ||
| 791 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
| 792 | } | ||
| 793 | |||
| 794 | /* -- Strength reduction of widening -------------------------------------- */ | 985 | /* -- Strength reduction of widening -------------------------------------- */ |
| 795 | 986 | ||
| 796 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ | 987 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ |
| @@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit) | |||
| 1723 | 1914 | ||
| 1724 | /* Every entry in the generated hash table is a 32 bit pattern: | 1915 | /* Every entry in the generated hash table is a 32 bit pattern: |
| 1725 | ** | 1916 | ** |
| 1726 | ** xxxxxxxx iiiiiiii llllllll rrrrrrrr | 1917 | ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr |
| 1727 | ** | 1918 | ** |
| 1728 | ** xxxxxxxx = 8 bit index into fold function table | 1919 | ** xxxxxxxx = 8 bit index into fold function table |
| 1729 | ** iiiiiiii = 8 bit folded instruction opcode | 1920 | ** iiiiiii = 7 bit folded instruction opcode |
| 1730 | ** llllllll = 8 bit left instruction opcode | 1921 | ** lllllll = 7 bit left instruction opcode |
| 1731 | ** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field | 1922 | ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field |
| 1732 | */ | 1923 | */ |
| 1733 | 1924 | ||
| 1734 | #include "lj_folddef.h" | 1925 | #include "lj_folddef.h" |
| @@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | |||
| 1762 | /* Fold engine start/retry point. */ | 1953 | /* Fold engine start/retry point. */ |
| 1763 | retry: | 1954 | retry: |
| 1764 | /* Construct key from opcode and operand opcodes (unless literal/none). */ | 1955 | /* Construct key from opcode and operand opcodes (unless literal/none). */ |
| 1765 | key = ((uint32_t)fins->o << 16); | 1956 | key = ((uint32_t)fins->o << 17); |
| 1766 | if (fins->op1 >= J->cur.nk) { | 1957 | if (fins->op1 >= J->cur.nk) { |
| 1767 | key += (uint32_t)IR(fins->op1)->o << 8; | 1958 | key += (uint32_t)IR(fins->op1)->o << 10; |
| 1768 | *fleft = *IR(fins->op1); | 1959 | *fleft = *IR(fins->op1); |
| 1769 | } | 1960 | } |
| 1770 | if (fins->op2 >= J->cur.nk) { | 1961 | if (fins->op2 >= J->cur.nk) { |
| @@ -1777,7 +1968,7 @@ retry: | |||
| 1777 | /* Check for a match in order from most specific to least specific. */ | 1968 | /* Check for a match in order from most specific to least specific. */ |
| 1778 | any = 0; | 1969 | any = 0; |
| 1779 | for (;;) { | 1970 | for (;;) { |
| 1780 | uint32_t k = key | any; | 1971 | uint32_t k = key | (any & 0x1ffff); |
| 1781 | uint32_t h = fold_hashkey(k); | 1972 | uint32_t h = fold_hashkey(k); |
| 1782 | uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ | 1973 | uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ |
| 1783 | if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { | 1974 | if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { |
| @@ -1785,9 +1976,9 @@ retry: | |||
| 1785 | if (ref != NEXTFOLD) | 1976 | if (ref != NEXTFOLD) |
| 1786 | break; | 1977 | break; |
| 1787 | } | 1978 | } |
| 1788 | if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ | 1979 | if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */ |
| 1789 | return lj_opt_cse(J); | 1980 | return lj_opt_cse(J); |
| 1790 | any = (any | (any >> 8)) ^ 0xff00; | 1981 | any = (any | (any >> 10)) ^ 0xffc00; |
| 1791 | } | 1982 | } |
| 1792 | 1983 | ||
| 1793 | /* Return value processing, ordered by frequency. */ | 1984 | /* Return value processing, ordered by frequency. */ |
