aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2010-12-31 01:09:30 +0100
committerMike Pall <mike>2010-12-31 03:47:30 +0100
commit1f269610925829f55ed3e88e4af2b6575598adbc (patch)
tree69bce04f2686f7e05d77ff4445c78a4e9bc27a31
parent65b194a2f89eb315029724af56354bb527021192 (diff)
downloadluajit-1f269610925829f55ed3e88e4af2b6575598adbc.tar.gz
luajit-1f269610925829f55ed3e88e4af2b6575598adbc.tar.bz2
luajit-1f269610925829f55ed3e88e4af2b6575598adbc.zip
Refactoring of conversion ops, part 3: add FOLD rules for IR_CONV.
-rw-r--r--src/buildvm_fold.c8
-rw-r--r--src/lj_opt_fold.c253
2 files changed, 226 insertions, 35 deletions
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c
index 468e5300..5f463575 100644
--- a/src/buildvm_fold.c
+++ b/src/buildvm_fold.c
@@ -124,7 +124,7 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
124 if (*p == '\0') 124 if (*p == '\0')
125 return i; 125 return i;
126 } else if (allowany && !strcmp("any", p)) { 126 } else if (allowany && !strcmp("any", p)) {
127 return 0xff; 127 return allowany;
128 } else { 128 } else {
129 for (i = 0; ir_names[i]; i++) 129 for (i = 0; ir_names[i]; i++)
130 if (!strcmp(ir_names[i], p)) 130 if (!strcmp(ir_names[i], p))
@@ -140,9 +140,9 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
140static void foldrule(char *p) 140static void foldrule(char *p)
141{ 141{
142 uint32_t op = nexttoken(&p, 0, 0); 142 uint32_t op = nexttoken(&p, 0, 0);
143 uint32_t left = nexttoken(&p, 0, 1); 143 uint32_t left = nexttoken(&p, 0, 0x7f);
144 uint32_t right = nexttoken(&p, 1, 1); 144 uint32_t right = nexttoken(&p, 1, 0x3ff);
145 uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; 145 uint32_t key = (funcidx << 24) | (op << 17) | (left << 10) | right;
146 uint32_t i; 146 uint32_t i;
147 if (nkeys >= BUILD_MAX_FOLD) { 147 if (nkeys >= BUILD_MAX_FOLD) {
148 fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); 148 fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 4790d245..92b9dfb4 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -489,6 +489,73 @@ LJFOLDF(kfold_toi64_knum)
489 return INT64FOLD((uint64_t)(int64_t)knumleft); 489 return INT64FOLD((uint64_t)(int64_t)knumleft);
490} 490}
491 491
492LJFOLD(CONV KINT IRCONV_NUM_INT)
493LJFOLDF(kfold_conv_kint_num)
494{
495 return lj_ir_knum(J, cast_num(fleft->i));
496}
497
498LJFOLD(CONV KINT IRCONV_I64_INT)
499LJFOLD(CONV KINT IRCONV_U64_INT)
500LJFOLDF(kfold_conv_kint_i64)
501{
502 return INT64FOLD((uint64_t)(int64_t)fleft->i);
503}
504
505LJFOLD(CONV KINT64 IRCONV_NUM_I64)
506LJFOLDF(kfold_conv_kint64_num_i64)
507{
508 return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64));
509}
510
511LJFOLD(CONV KINT64 IRCONV_NUM_U64)
512LJFOLDF(kfold_conv_kint64_num_u64)
513{
514 return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64));
515}
516
517LJFOLD(CONV KINT64 IRCONV_INT_I64)
518LJFOLD(CONV KINT64 IRCONV_U32_I64)
519LJFOLDF(kfold_conv_kint64_int_i64)
520{
521 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
522}
523
524LJFOLD(CONV KNUM IRCONV_INT_NUM)
525LJFOLDF(kfold_conv_knum_int_num)
526{
527 lua_Number n = knumleft;
528 if (!(fins->op2 & IRCONV_TRUNC)) {
529 int32_t k = lj_num2int(n);
530 if (irt_isguard(fins->t) && n != cast_num(k)) {
531 /* We're about to create a guard which always fails, like CONV +1.5.
532 ** Some pathological loops cause this during LICM, e.g.:
533 ** local x,k,t = 0,1.5,{1,[1.5]=2}
534 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
535 ** assert(x == 300)
536 */
537 return FAILFOLD;
538 }
539 return INTFOLD(k);
540 } else {
541 return INTFOLD((int32_t)n);
542 }
543}
544
545LJFOLD(CONV KNUM IRCONV_I64_NUM)
546LJFOLDF(kfold_conv_knum_i64_num)
547{
548 lua_assert((fins->op2 & IRCONV_TRUNC));
549 return INT64FOLD((uint64_t)(int64_t)knumleft);
550}
551
552LJFOLD(CONV KNUM IRCONV_U64_NUM)
553LJFOLDF(kfold_conv_knum_u64_num)
554{
555 lua_assert((fins->op2 & IRCONV_TRUNC));
556 return INT64FOLD(lj_num2u64(knumleft));
557}
558
492LJFOLD(TOSTR KNUM) 559LJFOLD(TOSTR KNUM)
493LJFOLDF(kfold_tostr_knum) 560LJFOLDF(kfold_tostr_knum)
494{ 561{
@@ -740,8 +807,152 @@ LJFOLDF(simplify_powi_kx)
740 return NEXTFOLD; 807 return NEXTFOLD;
741} 808}
742 809
743/* -- FP conversion narrowing --------------------------------------------- */ 810/* -- Simplify conversions ------------------------------------------------ */
744 811
812LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
813LJFOLDF(shortcut_conv_num_int)
814{
815 PHIBARRIER(fleft);
816 /* Only safe with a guarded conversion to int. */
817 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
818 return fleft->op1; /* f(g(x)) ==> x */
819 return NEXTFOLD;
820}
821
822LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
823LJFOLDF(simplify_conv_int_num)
824{
825 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
826 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
827 return fleft->op1;
828 return NEXTFOLD;
829}
830
831LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
832LJFOLDF(simplify_conv_u32_num)
833{
834 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
835 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
836 return fleft->op1;
837 return NEXTFOLD;
838}
839
840LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
841LJFOLDF(simplify_conv_i64_num)
842{
843 PHIBARRIER(fleft);
844 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
845 /* Reduce to a sign-extension. */
846 fins->op1 = fleft->op1;
847 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
848 return RETRYFOLD;
849 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
850#if LJ_TARGET_X64
851 return fleft->op1;
852#else
853 /* Reduce to a zero-extension. */
854 fins->op1 = fleft->op1;
855 fins->op2 = (IRT_I64<<5)|IRT_U32;
856 return RETRYFOLD;
857#endif
858 }
859 return NEXTFOLD;
860}
861
862LJFOLD(CONV CONV IRCONV_U64_NUM) /* _U32*/
863LJFOLDF(simplify_conv_u64_num)
864{
865 PHIBARRIER(fleft);
866 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
867#if LJ_TARGET_X64
868 return fleft->op1;
869#else
870 /* Reduce to a zero-extension. */
871 fins->op1 = fleft->op1;
872 fins->op2 = (IRT_U64<<5)|IRT_U32;
873 return RETRYFOLD;
874#endif
875 }
876 return NEXTFOLD;
877}
878
879/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
880LJFOLD(TOBIT CONV KNUM)
881LJFOLDF(simplify_tobit_conv)
882{
883 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
884 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
885 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
886 lua_assert(irt_isnum(fleft->t));
887 return fleft->op1;
888 }
889 return NEXTFOLD;
890}
891
892/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
893LJFOLD(FPMATH CONV IRFPM_FLOOR)
894LJFOLD(FPMATH CONV IRFPM_CEIL)
895LJFOLD(FPMATH CONV IRFPM_TRUNC)
896LJFOLDF(simplify_floor_conv)
897{
898 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
899 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
900 return LEFTFOLD;
901 return NEXTFOLD;
902}
903
904/* Strength reduction of widening. */
905LJFOLD(CONV any IRCONV_I64_INT)
906LJFOLDF(simplify_conv_sext)
907{
908 IRRef ref = fins->op1;
909 int64_t ofs = 0;
910 if (!(fins->op2 & IRCONV_SEXT))
911 return NEXTFOLD;
912 PHIBARRIER(fleft);
913 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
914 ofs = (int64_t)IR(fleft->op2)->i;
915 ref = fleft->op1;
916 }
917 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
918 if (ref == J->scev.idx) {
919 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
920 lua_assert(irt_isint(J->scev.t));
921 if (lo && IR(lo)->i + ofs >= 0) {
922#if LJ_TARGET_X64
923 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
924 return LEFTFOLD;
925#else
926 /* Reduce to a (cheaper) zero-extension. */
927 fins->op2 &= ~IRCONV_SEXT;
928 return RETRYFOLD;
929#endif
930 }
931 }
932 return NEXTFOLD;
933}
934
935/* Special CSE rule for CONV. */
936LJFOLD(CONV any any)
937LJFOLDF(cse_conv)
938{
939 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
940 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
941 uint8_t guard = irt_isguard(fins->t);
942 IRRef ref = J->chain[IR_CONV];
943 while (ref > op1) {
944 IRIns *ir = IR(ref);
945 /* Commoning with stronger checks is ok. */
946 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
947 irt_isguard(ir->t) >= guard)
948 return ref;
949 ref = ir->prev;
950 }
951 }
952 return EMITFOLD; /* No fallthrough to regular CSE. */
953}
954
955/* FP conversion narrowing. */
745LJFOLD(TOINT ADD any) 956LJFOLD(TOINT ADD any)
746LJFOLD(TOINT SUB any) 957LJFOLD(TOINT SUB any)
747LJFOLD(TOBIT ADD KNUM) 958LJFOLD(TOBIT ADD KNUM)
@@ -771,26 +982,6 @@ LJFOLDF(cse_toint)
771 return EMITFOLD; /* No fallthrough to regular CSE. */ 982 return EMITFOLD; /* No fallthrough to regular CSE. */
772} 983}
773 984
774/* Special CSE rule for CONV. */
775LJFOLD(CONV any any)
776LJFOLDF(cse_conv)
777{
778 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
779 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
780 uint8_t guard = irt_isguard(fins->t);
781 IRRef ref = J->chain[IR_CONV];
782 while (ref > op1) {
783 IRIns *ir = IR(ref);
784 /* Commoning with stronger checks is ok. */
785 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
786 irt_isguard(ir->t) >= guard)
787 return ref;
788 ref = ir->prev;
789 }
790 }
791 return EMITFOLD; /* No fallthrough to regular CSE. */
792}
793
794/* -- Strength reduction of widening -------------------------------------- */ 985/* -- Strength reduction of widening -------------------------------------- */
795 986
796LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */ 987LJFOLD(TOI64 any 3) /* IRTOINT_ZEXT64 */
@@ -1723,12 +1914,12 @@ LJFOLDX(lj_ir_emit)
1723 1914
1724/* Every entry in the generated hash table is a 32 bit pattern: 1915/* Every entry in the generated hash table is a 32 bit pattern:
1725** 1916**
1726** xxxxxxxx iiiiiiii llllllll rrrrrrrr 1917** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
1727** 1918**
1728** xxxxxxxx = 8 bit index into fold function table 1919** xxxxxxxx = 8 bit index into fold function table
1729** iiiiiiii = 8 bit folded instruction opcode 1920** iiiiiii = 7 bit folded instruction opcode
1730** llllllll = 8 bit left instruction opcode 1921** lllllll = 7 bit left instruction opcode
1731** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field 1922** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
1732*/ 1923*/
1733 1924
1734#include "lj_folddef.h" 1925#include "lj_folddef.h"
@@ -1762,9 +1953,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
1762 /* Fold engine start/retry point. */ 1953 /* Fold engine start/retry point. */
1763retry: 1954retry:
1764 /* Construct key from opcode and operand opcodes (unless literal/none). */ 1955 /* Construct key from opcode and operand opcodes (unless literal/none). */
1765 key = ((uint32_t)fins->o << 16); 1956 key = ((uint32_t)fins->o << 17);
1766 if (fins->op1 >= J->cur.nk) { 1957 if (fins->op1 >= J->cur.nk) {
1767 key += (uint32_t)IR(fins->op1)->o << 8; 1958 key += (uint32_t)IR(fins->op1)->o << 10;
1768 *fleft = *IR(fins->op1); 1959 *fleft = *IR(fins->op1);
1769 } 1960 }
1770 if (fins->op2 >= J->cur.nk) { 1961 if (fins->op2 >= J->cur.nk) {
@@ -1777,7 +1968,7 @@ retry:
1777 /* Check for a match in order from most specific to least specific. */ 1968 /* Check for a match in order from most specific to least specific. */
1778 any = 0; 1969 any = 0;
1779 for (;;) { 1970 for (;;) {
1780 uint32_t k = key | any; 1971 uint32_t k = key | (any & 0x1ffff);
1781 uint32_t h = fold_hashkey(k); 1972 uint32_t h = fold_hashkey(k);
1782 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ 1973 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
1783 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { 1974 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
@@ -1785,9 +1976,9 @@ retry:
1785 if (ref != NEXTFOLD) 1976 if (ref != NEXTFOLD)
1786 break; 1977 break;
1787 } 1978 }
1788 if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ 1979 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
1789 return lj_opt_cse(J); 1980 return lj_opt_cse(J);
1790 any = (any | (any >> 8)) ^ 0xff00; 1981 any = (any | (any >> 10)) ^ 0xffc00;
1791 } 1982 }
1792 1983
1793 /* Return value processing, ordered by frequency. */ 1984 /* Return value processing, ordered by frequency. */