diff options
author | Mike Pall <mike> | 2010-12-31 01:09:30 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-12-31 03:47:30 +0100 |
commit | 1f269610925829f55ed3e88e4af2b6575598adbc (patch) | |
tree | 69bce04f2686f7e05d77ff4445c78a4e9bc27a31 | |
parent | 65b194a2f89eb315029724af56354bb527021192 (diff) | |
download | luajit-1f269610925829f55ed3e88e4af2b6575598adbc.tar.gz luajit-1f269610925829f55ed3e88e4af2b6575598adbc.tar.bz2 luajit-1f269610925829f55ed3e88e4af2b6575598adbc.zip |
Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV.
-rw-r--r-- | src/buildvm_fold.c | 8 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 253 |
2 files changed, 226 insertions, 35 deletions
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c index 468e5300..5f463575 100644 --- a/src/buildvm_fold.c +++ b/src/buildvm_fold.c | |||
@@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) | |||
124 | if (*p == '\0') | 124 | if (*p == '\0') |
125 | return i; | 125 | return i; |
126 | } else if (allowany && !strcmp("any", p)) { | 126 | } else if (allowany && !strcmp("any", p)) { |
127 | return 0xff; | 127 | return allowany; |
128 | } else { | 128 | } else { |
129 | for (i = 0; ir_names[i]; i++) | 129 | for (i = 0; ir_names[i]; i++) |
130 | if (!strcmp(ir_names[i], p)) | 130 | if (!strcmp(ir_names[i], p)) |
@@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) | |||
140 | static void foldrule(char *p) | 140 | static void foldrule(char *p) |
141 | { | 141 | { |
142 | uint32_t op = nexttoken(&p, 0, 0); | 142 | uint32_t op = nexttoken(&p, 0, 0); |
143 | uint32_t left = nexttoken(&p, 0, 1); | 143 | uint32_t left = nexttoken(&p, 0, 0x7f); |
144 | uint32_t right = nexttoken(&p, 1, 1); | 144 | uint32_t right = nexttoken(&p, 1, 0x3ff); |
145 | uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; | 145 | uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right; |
146 | uint32_t i; | 146 | uint32_t i; |
147 | if (nkeys >= BUILD_MAX_FOLD) { | 147 | if (nkeys >= BUILD_MAX_FOLD) { |
148 | fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); | 148 | fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 4790d245..92b9dfb4 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum) | |||
489 | return INT64FOLD((uint64_t)(int64_t)knumleft); | 489 | return INT64FOLD((uint64_t)(int64_t)knumleft); |
490 | } | 490 | } |
491 | 491 | ||
492 | LJFOLD(CONV KINT IRCONV_NUM_INT) | ||
493 | LJFOLDF(kfold_conv_kint_num) | ||
494 | { | ||
495 | return lj_ir_knum(J, cast_num(fleft->i)); | ||
496 | } | ||
497 | |||
498 | LJFOLD(CONV KINT IRCONV_I64_INT) | ||
499 | LJFOLD(CONV KINT IRCONV_U64_INT) | ||
500 | LJFOLDF(kfold_conv_kint_i64) | ||
501 | { | ||
502 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | ||
503 | } | ||
504 | |||
505 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) | ||
506 | LJFOLDF(kfold_conv_kint64_num_i64) | ||
507 | { | ||
508 | return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64)); | ||
509 | } | ||
510 | |||
511 | LJFOLD(CONV KINT64 IRCONV_NUM_U64) | ||
512 | LJFOLDF(kfold_conv_kint64_num_u64) | ||
513 | { | ||
514 | return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64)); | ||
515 | } | ||
516 | |||
517 | LJFOLD(CONV KINT64 IRCONV_INT_I64) | ||
518 | LJFOLD(CONV KINT64 IRCONV_U32_I64) | ||
519 | LJFOLDF(kfold_conv_kint64_int_i64) | ||
520 | { | ||
521 | return INTFOLD((int32_t)ir_kint64(fleft)->u64); | ||
522 | } | ||
523 | |||
524 | LJFOLD(CONV KNUM IRCONV_INT_NUM) | ||
525 | LJFOLDF(kfold_conv_knum_int_num) | ||
526 | { | ||
527 | lua_Number n = knumleft; | ||
528 | if (!(fins->op2 & IRCONV_TRUNC)) { | ||
529 | int32_t k = lj_num2int(n); | ||
530 | if (irt_isguard(fins->t) && n != cast_num(k)) { | ||
531 | /* We're about to create a guard which always fails, like CONV +1.5. | ||
532 | ** Some pathological loops cause this during LICM, e.g.: | ||
533 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | ||
534 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | ||
535 | ** assert(x == 300) | ||
536 | */ | ||
537 | return FAILFOLD; | ||
538 | } | ||
539 | return INTFOLD(k); | ||
540 | } else { | ||
541 | return INTFOLD((int32_t)n); | ||
542 | } | ||
543 | } | ||
544 | |||
545 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | ||
546 | LJFOLDF(kfold_conv_knum_i64_num) | ||
547 | { | ||
548 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
549 | return INT64FOLD((uint64_t)(int64_t)knumleft); | ||
550 | } | ||
551 | |||
552 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | ||
553 | LJFOLDF(kfold_conv_knum_u64_num) | ||
554 | { | ||
555 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
556 | return INT64FOLD(lj_num2u64(knumleft)); | ||
557 | } | ||
558 | |||
492 | LJFOLD(TOSTR KNUM) | 559 | LJFOLD(TOSTR KNUM) |
493 | LJFOLDF(kfold_tostr_knum) | 560 | LJFOLDF(kfold_tostr_knum) |
494 | { | 561 | { |
@@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx) | |||
740 | return NEXTFOLD; | 807 | return NEXTFOLD; |
741 | } | 808 | } |
742 | 809 | ||
743 | /* -- FP conversion narrowing --------------------------------------------- */ | 810 | /* -- Simplify conversions ------------------------------------------------ */ |
744 | 811 | ||
812 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ | ||
813 | LJFOLDF(shortcut_conv_num_int) | ||
814 | { | ||
815 | PHIBARRIER(fleft); | ||
816 | /* Only safe with a guarded conversion to int. */ | ||
817 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t)) | ||
818 | return fleft->op1; /* f(g(x)) ==> x */ | ||
819 | return NEXTFOLD; | ||
820 | } | ||
821 | |||
822 | LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ | ||
823 | LJFOLDF(simplify_conv_int_num) | ||
824 | { | ||
825 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
826 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) | ||
827 | return fleft->op1; | ||
828 | return NEXTFOLD; | ||
829 | } | ||
830 | |||
831 | LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/ | ||
832 | LJFOLDF(simplify_conv_u32_num) | ||
833 | { | ||
834 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
835 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) | ||
836 | return fleft->op1; | ||
837 | return NEXTFOLD; | ||
838 | } | ||
839 | |||
840 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ | ||
841 | LJFOLDF(simplify_conv_i64_num) | ||
842 | { | ||
843 | PHIBARRIER(fleft); | ||
844 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { | ||
845 | /* Reduce to a sign-extension. */ | ||
846 | fins->op1 = fleft->op1; | ||
847 | fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | ||
848 | return RETRYFOLD; | ||
849 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
850 | #if LJ_TARGET_X64 | ||
851 | return fleft->op1; | ||
852 | #else | ||
853 | /* Reduce to a zero-extension. */ | ||
854 | fins->op1 = fleft->op1; | ||
855 | fins->op2 = (IRT_I64<<5)|IRT_U32; | ||
856 | return RETRYFOLD; | ||
857 | #endif | ||
858 | } | ||
859 | return NEXTFOLD; | ||
860 | } | ||
861 | |||
862 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/ | ||
863 | LJFOLDF(simplify_conv_u64_num) | ||
864 | { | ||
865 | PHIBARRIER(fleft); | ||
866 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
867 | #if LJ_TARGET_X64 | ||
868 | return fleft->op1; | ||
869 | #else | ||
870 | /* Reduce to a zero-extension. */ | ||
871 | fins->op1 = fleft->op1; | ||
872 | fins->op2 = (IRT_U64<<5)|IRT_U32; | ||
873 | return RETRYFOLD; | ||
874 | #endif | ||
875 | } | ||
876 | return NEXTFOLD; | ||
877 | } | ||
878 | |||
879 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | ||
880 | LJFOLD(TOBIT CONV KNUM) | ||
881 | LJFOLDF(simplify_tobit_conv) | ||
882 | { | ||
883 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || | ||
884 | (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
885 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
886 | lua_assert(irt_isnum(fleft->t)); | ||
887 | return fleft->op1; | ||
888 | } | ||
889 | return NEXTFOLD; | ||
890 | } | ||
891 | |||
892 | /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | ||
893 | LJFOLD(FPMATH CONV IRFPM_FLOOR) | ||
894 | LJFOLD(FPMATH CONV IRFPM_CEIL) | ||
895 | LJFOLD(FPMATH CONV IRFPM_TRUNC) | ||
896 | LJFOLDF(simplify_floor_conv) | ||
897 | { | ||
898 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || | ||
899 | (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) | ||
900 | return LEFTFOLD; | ||
901 | return NEXTFOLD; | ||
902 | } | ||
903 | |||
904 | /* Strength reduction of widening. */ | ||
905 | LJFOLD(CONV any IRCONV_I64_INT) | ||
906 | LJFOLDF(simplify_conv_sext) | ||
907 | { | ||
908 | IRRef ref = fins->op1; | ||
909 | int64_t ofs = 0; | ||
910 | if (!(fins->op2 & IRCONV_SEXT)) | ||
911 | return NEXTFOLD; | ||
912 | PHIBARRIER(fleft); | ||
913 | if (fleft->o == IR_ADD && irref_isk(fleft->op2)) { | ||
914 | ofs = (int64_t)IR(fleft->op2)->i; | ||
915 | ref = fleft->op1; | ||
916 | } | ||
917 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | ||
918 | if (ref == J->scev.idx) { | ||
919 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | ||
920 | lua_assert(irt_isint(J->scev.t)); | ||
921 | if (lo && IR(lo)->i + ofs >= 0) { | ||
922 | #if LJ_TARGET_X64 | ||
923 | /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ | ||
924 | return LEFTFOLD; | ||
925 | #else | ||
926 | /* Reduce to a (cheaper) zero-extension. */ | ||
927 | fins->op2 &= ~IRCONV_SEXT; | ||
928 | return RETRYFOLD; | ||
929 | #endif | ||
930 | } | ||
931 | } | ||
932 | return NEXTFOLD; | ||
933 | } | ||
934 | |||
935 | /* Special CSE rule for CONV. */ | ||
936 | LJFOLD(CONV any any) | ||
937 | LJFOLDF(cse_conv) | ||
938 | { | ||
939 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
940 | IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); | ||
941 | uint8_t guard = irt_isguard(fins->t); | ||
942 | IRRef ref = J->chain[IR_CONV]; | ||
943 | while (ref > op1) { | ||
944 | IRIns *ir = IR(ref); | ||
945 | /* Commoning with stronger checks is ok. */ | ||
946 | if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && | ||
947 | irt_isguard(ir->t) >= guard) | ||
948 | return ref; | ||
949 | ref = ir->prev; | ||
950 | } | ||
951 | } | ||
952 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
953 | } | ||
954 | |||
955 | /* FP conversion narrowing. */ | ||
745 | LJFOLD(TOINT ADD any) | 956 | LJFOLD(TOINT ADD any) |
746 | LJFOLD(TOINT SUB any) | 957 | LJFOLD(TOINT SUB any) |
747 | LJFOLD(TOBIT ADD KNUM) | 958 | LJFOLD(TOBIT ADD KNUM) |
@@ -771,26 +982,6 @@ LJFOLDF(cse_toint) | |||
771 | return EMITFOLD; /* No fallthrough to regular CSE. */ | 982 | return EMITFOLD; /* No fallthrough to regular CSE. */ |
772 | } | 983 | } |
773 | 984 | ||
774 | /* Special CSE rule for CONV. */ | ||
775 | LJFOLD(CONV any any) | ||
776 | LJFOLDF(cse_conv) | ||
777 | { | ||
778 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
779 | IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); | ||
780 | uint8_t guard = irt_isguard(fins->t); | ||
781 | IRRef ref = J->chain[IR_CONV]; | ||
782 | while (ref > op1) { | ||
783 | IRIns *ir = IR(ref); | ||
784 | /* Commoning with stronger checks is ok. */ | ||
785 | if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && | ||
786 | irt_isguard(ir->t) >= guard) | ||
787 | return ref; | ||
788 | ref = ir->prev; | ||
789 | } | ||
790 | } | ||
791 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
792 | } | ||
793 | |||
794 | /* -- Strength reduction of widening -------------------------------------- */ | 985 | /* -- Strength reduction of widening -------------------------------------- */ |
795 | 986 | ||
796 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ | 987 | LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ |
@@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit) | |||
1723 | 1914 | ||
1724 | /* Every entry in the generated hash table is a 32 bit pattern: | 1915 | /* Every entry in the generated hash table is a 32 bit pattern: |
1725 | ** | 1916 | ** |
1726 | ** xxxxxxxx iiiiiiii llllllll rrrrrrrr | 1917 | ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr |
1727 | ** | 1918 | ** |
1728 | ** xxxxxxxx = 8 bit index into fold function table | 1919 | ** xxxxxxxx = 8 bit index into fold function table |
1729 | ** iiiiiiii = 8 bit folded instruction opcode | 1920 | ** iiiiiii = 7 bit folded instruction opcode |
1730 | ** llllllll = 8 bit left instruction opcode | 1921 | ** lllllll = 7 bit left instruction opcode |
1731 | ** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field | 1922 | ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field |
1732 | */ | 1923 | */ |
1733 | 1924 | ||
1734 | #include "lj_folddef.h" | 1925 | #include "lj_folddef.h" |
@@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | |||
1762 | /* Fold engine start/retry point. */ | 1953 | /* Fold engine start/retry point. */ |
1763 | retry: | 1954 | retry: |
1764 | /* Construct key from opcode and operand opcodes (unless literal/none). */ | 1955 | /* Construct key from opcode and operand opcodes (unless literal/none). */ |
1765 | key = ((uint32_t)fins->o << 16); | 1956 | key = ((uint32_t)fins->o << 17); |
1766 | if (fins->op1 >= J->cur.nk) { | 1957 | if (fins->op1 >= J->cur.nk) { |
1767 | key += (uint32_t)IR(fins->op1)->o << 8; | 1958 | key += (uint32_t)IR(fins->op1)->o << 10; |
1768 | *fleft = *IR(fins->op1); | 1959 | *fleft = *IR(fins->op1); |
1769 | } | 1960 | } |
1770 | if (fins->op2 >= J->cur.nk) { | 1961 | if (fins->op2 >= J->cur.nk) { |
@@ -1777,7 +1968,7 @@ retry: | |||
1777 | /* Check for a match in order from most specific to least specific. */ | 1968 | /* Check for a match in order from most specific to least specific. */ |
1778 | any = 0; | 1969 | any = 0; |
1779 | for (;;) { | 1970 | for (;;) { |
1780 | uint32_t k = key | any; | 1971 | uint32_t k = key | (any & 0x1ffff); |
1781 | uint32_t h = fold_hashkey(k); | 1972 | uint32_t h = fold_hashkey(k); |
1782 | uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ | 1973 | uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ |
1783 | if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { | 1974 | if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { |
@@ -1785,9 +1976,9 @@ retry: | |||
1785 | if (ref != NEXTFOLD) | 1976 | if (ref != NEXTFOLD) |
1786 | break; | 1977 | break; |
1787 | } | 1978 | } |
1788 | if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ | 1979 | if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */ |
1789 | return lj_opt_cse(J); | 1980 | return lj_opt_cse(J); |
1790 | any = (any | (any >> 8)) ^ 0xff00; | 1981 | any = (any | (any >> 10)) ^ 0xffc00; |
1791 | } | 1982 | } |
1792 | 1983 | ||
1793 | /* Return value processing, ordered by frequency. */ | 1984 | /* Return value processing, ordered by frequency. */ |