aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2013-04-21 01:01:33 +0200
committerMike Pall <mike>2013-04-21 01:01:33 +0200
commit5f1781a1277508c2b7bec527f722da98d8556e26 (patch)
treee1bbc8b5b4af7c7b374a0139225a585aa0009fdf
parent7b629b7bcf6bca3bd7733db601722c551098557e (diff)
downloadluajit-5f1781a1277508c2b7bec527f722da98d8556e26.tar.gz
luajit-5f1781a1277508c2b7bec527f722da98d8556e26.tar.bz2
luajit-5f1781a1277508c2b7bec527f722da98d8556e26.zip
Compile string concatenations (BC_CAT).
-rw-r--r--src/Makefile.dep20
-rw-r--r--src/jit/dump.lua1
-rw-r--r--src/lj_asm.c65
-rw-r--r--src/lj_asm_arm.h5
-rw-r--r--src/lj_asm_mips.h5
-rw-r--r--src/lj_asm_ppc.h5
-rw-r--r--src/lj_asm_x86.h5
-rw-r--r--src/lj_buf.c29
-rw-r--r--src/lj_buf.h6
-rw-r--r--src/lj_ir.c1
-rw-r--r--src/lj_ir.h9
-rw-r--r--src/lj_ircall.h10
-rw-r--r--src/lj_opt_fold.c95
-rw-r--r--src/lj_record.c35
-rw-r--r--src/vm_arm.dasc1
-rw-r--r--src/vm_mips.dasc1
-rw-r--r--src/vm_ppc.dasc1
-rw-r--r--src/vm_x86.dasc1
18 files changed, 279 insertions, 16 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 82834811..c501db44 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -120,9 +120,9 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
120 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ 120 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
121 lj_str.h lj_jit.h lj_ir.h lj_dispatch.h 121 lj_str.h lj_jit.h lj_ir.h lj_dispatch.h
122lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 122lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 123 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
124 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 124 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
125 lj_vm.h lj_strscan.h lj_lib.h 125 lj_carith.h lj_vm.h lj_strscan.h lj_lib.h
126lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 126lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
127 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ 127 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
128 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 128 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
@@ -143,9 +143,9 @@ lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
143lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 143lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
144 lj_ir.h lj_jit.h lj_iropt.h 144 lj_ir.h lj_jit.h lj_iropt.h
145lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 145lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
146 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 146 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h \
147 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 147 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_carith.h \
148 lj_strscan.h lj_folddef.h 148 lj_vm.h lj_strscan.h lj_folddef.h
149lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 149lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
150 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \ 150 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ 151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
@@ -165,10 +165,10 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
165 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h \ 165 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h \
166 lj_vmevent.h 166 lj_vmevent.h
167lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 167lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
168 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 168 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_meta.h \
169 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 169 lj_frame.h lj_bc.h lj_ctype.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h \
170 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 170 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
171 lj_ffrecord.h lj_snap.h lj_vm.h 171 lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
172lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 172lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
173 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ 173 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
174 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 174 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 7f930f51..c025a239 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -278,6 +278,7 @@ local litname = {
278 ["FLOAD "] = vmdef.irfield, 278 ["FLOAD "] = vmdef.irfield,
279 ["FREF "] = vmdef.irfield, 279 ["FREF "] = vmdef.irfield,
280 ["FPMATH"] = vmdef.irfpm, 280 ["FPMATH"] = vmdef.irfpm,
281 ["BUFHDR"] = { [0] = "RESET", "APPEND" },
281} 282}
282 283
283local function ctlsub(c) 284local function ctlsub(c)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 358ace6e..1304c180 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1071,6 +1071,70 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1071 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1071 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1072} 1072}
1073 1073
1074/* -- Buffer handling ----------------------------------------------------- */
1075
1076static void asm_bufhdr(ASMState *as, IRIns *ir)
1077{
1078 if (ra_used(ir)) {
1079 Reg sb = ra_dest(as, ir, RSET_GPR);
1080 if (!(ir->op2 & IRBUFHDR_APPEND)) {
1081 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1082 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1083 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1084 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1085 }
1086#if LJ_TARGET_X86ORX64
1087 ra_left(as, sb, ir->op1);
1088#else
1089 ra_leftov(as, sb, ir->op1);
1090#endif
1091 }
1092}
1093
1094#if !LJ_TARGET_X86ORX64
1095static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1096#endif
1097
1098static void asm_bufput(ASMState *as, IRIns *ir)
1099{
1100 const CCallInfo *ci;
1101 IRRef args[2];
1102 IRIns *ir2;
1103 if (!ra_used(ir)) return;
1104 args[0] = ir->op1; /* SBuf * */
1105 args[1] = ir->op2; /* int, double, GCstr * */
1106 ir2 = IR(ir->op2);
1107 if (irt_isstr(ir2->t)) {
1108 ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1109 } else if (LJ_SOFTFP ? irt_type((ir2+1)->t)==IRT_SOFTFP : irt_isnum(ir2->t)) {
1110 ci = &lj_ir_callinfo[IRCALL_lj_buf_putnum];
1111 args[1] = ASMREF_TMP1;
1112 } else {
1113 lua_assert(irt_isinteger(ir2->t));
1114 ci = &lj_ir_callinfo[IRCALL_lj_buf_putint];
1115 }
1116 asm_setupresult(as, ir, ci); /* SBuf * */
1117 asm_gencall(as, ci, args);
1118 if (args[1] == ASMREF_TMP1) {
1119#if LJ_TARGET_X86ORX64
1120 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
1121 RID_ESP, ra_spill(as, IR(ir->op2)));
1122#else
1123 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1124#endif
1125 }
1126}
1127
1128static void asm_bufstr(ASMState *as, IRIns *ir)
1129{
1130 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1131 IRRef args[1];
1132 args[0] = ir->op2; /* SBuf *sb */
1133 as->gcsteps++;
1134 asm_setupresult(as, ir, ci); /* GCstr * */
1135 asm_gencall(as, ci, args);
1136}
1137
1074/* -- PHI and loop handling ----------------------------------------------- */ 1138/* -- PHI and loop handling ----------------------------------------------- */
1075 1139
1076/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1140/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1724,6 +1788,7 @@ static void asm_setup_regsp(ASMState *as)
1724 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 1788 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1725 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 1789 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1726 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 1790 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
1791 case IR_BUFPUT: case IR_BUFSTR:
1727 ir->prev = REGSP_HINT(RID_RET); 1792 ir->prev = REGSP_HINT(RID_RET);
1728 if (inloop) 1793 if (inloop)
1729 as->modset = RSET_SCRATCH; 1794 as->modset = RSET_SCRATCH;
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 196f797e..57c2dd81 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -2264,6 +2264,11 @@ static void asm_ir(ASMState *as, IRIns *ir)
2264 case IR_TDUP: asm_tdup(as, ir); break; 2264 case IR_TDUP: asm_tdup(as, ir); break;
2265 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; 2265 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2266 2266
2267 /* Buffer operations. */
2268 case IR_BUFHDR: asm_bufhdr(as, ir); break;
2269 case IR_BUFPUT: asm_bufput(as, ir); break;
2270 case IR_BUFSTR: asm_bufstr(as, ir); break;
2271
2267 /* Write barriers. */ 2272 /* Write barriers. */
2268 case IR_TBAR: asm_tbar(as, ir); break; 2273 case IR_TBAR: asm_tbar(as, ir); break;
2269 case IR_OBAR: asm_obar(as, ir); break; 2274 case IR_OBAR: asm_obar(as, ir); break;
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index a1d31d49..55fe10b8 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -1867,6 +1867,11 @@ static void asm_ir(ASMState *as, IRIns *ir)
1867 case IR_TDUP: asm_tdup(as, ir); break; 1867 case IR_TDUP: asm_tdup(as, ir); break;
1868 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; 1868 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1869 1869
1870 /* Buffer operations. */
1871 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1872 case IR_BUFPUT: asm_bufput(as, ir); break;
1873 case IR_BUFSTR: asm_bufstr(as, ir); break;
1874
1870 /* Write barriers. */ 1875 /* Write barriers. */
1871 case IR_TBAR: asm_tbar(as, ir); break; 1876 case IR_TBAR: asm_tbar(as, ir); break;
1872 case IR_OBAR: asm_obar(as, ir); break; 1877 case IR_OBAR: asm_obar(as, ir); break;
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 34bd721f..d0feb43a 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -2065,6 +2065,11 @@ static void asm_ir(ASMState *as, IRIns *ir)
2065 case IR_TDUP: asm_tdup(as, ir); break; 2065 case IR_TDUP: asm_tdup(as, ir); break;
2066 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; 2066 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2067 2067
2068 /* Buffer operations. */
2069 case IR_BUFHDR: asm_bufhdr(as, ir); break;
2070 case IR_BUFPUT: asm_bufput(as, ir); break;
2071 case IR_BUFSTR: asm_bufstr(as, ir); break;
2072
2068 /* Write barriers. */ 2073 /* Write barriers. */
2069 case IR_TBAR: asm_tbar(as, ir); break; 2074 case IR_TBAR: asm_tbar(as, ir); break;
2070 case IR_OBAR: asm_obar(as, ir); break; 2075 case IR_OBAR: asm_obar(as, ir); break;
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index d90963ef..1e32b6c9 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -2707,6 +2707,11 @@ static void asm_ir(ASMState *as, IRIns *ir)
2707 case IR_TDUP: asm_tdup(as, ir); break; 2707 case IR_TDUP: asm_tdup(as, ir); break;
2708 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; 2708 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2709 2709
2710 /* Buffer operations. */
2711 case IR_BUFHDR: asm_bufhdr(as, ir); break;
2712 case IR_BUFPUT: asm_bufput(as, ir); break;
2713 case IR_BUFSTR: asm_bufstr(as, ir); break;
2714
2710 /* Write barriers. */ 2715 /* Write barriers. */
2711 case IR_TBAR: asm_tbar(as, ir); break; 2716 case IR_TBAR: asm_tbar(as, ir); break;
2712 case IR_OBAR: asm_obar(as, ir); break; 2717 case IR_OBAR: asm_obar(as, ir); break;
diff --git a/src/lj_buf.c b/src/lj_buf.c
index c08d23c9..ef48b580 100644
--- a/src/lj_buf.c
+++ b/src/lj_buf.c
@@ -12,6 +12,7 @@
12#include "lj_gc.h" 12#include "lj_gc.h"
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h" 14#include "lj_buf.h"
15#include "lj_str.h"
15 16
16LJ_NOINLINE void LJ_FASTCALL lj_buf_grow(SBuf *sb, char *en) 17LJ_NOINLINE void LJ_FASTCALL lj_buf_grow(SBuf *sb, char *en)
17{ 18{
@@ -64,6 +65,34 @@ void lj_buf_putmem(SBuf *sb, const void *q, MSize len)
64 setsbufP(sb, p); 65 setsbufP(sb, p);
65} 66}
66 67
68#if LJ_HASJIT
69SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
70{
71 MSize len = s->len;
72 char *p = lj_buf_more(sb, len);
73 p = lj_buf_wmem(p, strdata(s), len);
74 setsbufP(sb, p);
75 return sb;
76}
77
78SBuf * LJ_FASTCALL lj_buf_putint(SBuf *sb, int32_t k)
79{
80 setsbufP(sb, lj_str_bufint(lj_buf_more(sb, LJ_STR_INTBUF), k));
81 return sb;
82}
83
84SBuf * LJ_FASTCALL lj_buf_putnum(SBuf *sb, cTValue *o)
85{
86 setsbufP(sb, lj_str_bufnum(lj_buf_more(sb, LJ_STR_NUMBUF), o));
87 return sb;
88}
89
90GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
91{
92 return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
93}
94#endif
95
67uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) 96uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
68{ 97{
69 const uint8_t *p = (const uint8_t *)*pp; 98 const uint8_t *p = (const uint8_t *)*pp;
diff --git a/src/lj_buf.h b/src/lj_buf.h
index 289eb01d..e028a434 100644
--- a/src/lj_buf.h
+++ b/src/lj_buf.h
@@ -26,6 +26,12 @@ LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
26 26
27LJ_FUNC char *lj_buf_wmem(char *p, const void *q, MSize len); 27LJ_FUNC char *lj_buf_wmem(char *p, const void *q, MSize len);
28LJ_FUNC void lj_buf_putmem(SBuf *sb, const void *q, MSize len); 28LJ_FUNC void lj_buf_putmem(SBuf *sb, const void *q, MSize len);
29#if LJ_HASJIT
30LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
31LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putint(SBuf *sb, int32_t k);
32LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putnum(SBuf *sb, cTValue *o);
33LJ_FUNC GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
34#endif
29LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); 35LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
30LJ_FUNC char * LJ_FASTCALL lj_buf_wuleb128(char *p, uint32_t v); 36LJ_FUNC char * LJ_FASTCALL lj_buf_wuleb128(char *p, uint32_t v);
31 37
diff --git a/src/lj_ir.c b/src/lj_ir.c
index e1a59105..f1e1959f 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 9d2521c9..0cbd8b55 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -120,6 +120,11 @@
120 _(CNEW, AW, ref, ref) \ 120 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 122 \
123 /* Buffer operations. */ \
124 _(BUFHDR, S , ref, lit) \
125 _(BUFPUT, S , ref, ref) \
126 _(BUFSTR, A , ref, ref) \
127 \
123 /* Barriers. */ \ 128 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 129 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 130 _(OBAR, S , ref, ref) \
@@ -221,6 +226,10 @@ IRFLDEF(FLENUM)
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 226#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 227#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
223 228
229/* BUFHDR mode, stored in op2. */
230#define IRBUFHDR_RESET 0 /* Reset buffer. */
231#define IRBUFHDR_APPEND 1 /* Append to buffer. */
232
224/* CONV mode, stored in op2. */ 233/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 234#define IRCONV_SRCMASK 0x001f /* Source IRType. */
226#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ 235#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 2c160bdf..3b1cc928 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -105,6 +105,10 @@ typedef struct CCallInfo {
105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \
108 _(ANY, lj_buf_putstr, 2, FS, P32, 0) \
109 _(ANY, lj_buf_putint, 2, FS, P32, 0) \
110 _(ANY, lj_buf_putnum, 2, FS, P32, 0) \
111 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
108 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 112 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
109 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 113 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
110 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 114 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
@@ -114,9 +118,9 @@ typedef struct CCallInfo {
114 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 118 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
115 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ 119 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
116 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 120 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
117 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 121 _(ANY, sinh, ARG1_FP, N, NUM, 0) \
118 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 122 _(ANY, cosh, ARG1_FP, N, NUM, 0) \
119 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 123 _(ANY, tanh, ARG1_FP, N, NUM, 0) \
120 _(ANY, fputc, 2, S, INT, 0) \ 124 _(ANY, fputc, 2, S, INT, 0) \
121 _(ANY, fwrite, 4, S, INT, 0) \ 125 _(ANY, fwrite, 4, S, INT, 0) \
122 _(ANY, fflush, 1, S, INT, 0) \ 126 _(ANY, fflush, 1, S, INT, 0) \
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 75db47df..f35593f3 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,6 +14,7 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
@@ -155,13 +156,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 156
156/* Barrier to prevent folding across a GC step. 157/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 158** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 159** And the GC is only driven forward if there's at least one allocation.
159*/ 160*/
160#define gcstep_barrier(J, ref) \ 161#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 162 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 163 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 164 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 165 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
166 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR]))
165 167
166/* -- Constant folding for FP numbers ------------------------------------- */ 168/* -- Constant folding for FP numbers ------------------------------------- */
167 169
@@ -515,6 +517,94 @@ LJFOLDF(kfold_strcmp)
515 return NEXTFOLD; 517 return NEXTFOLD;
516} 518}
517 519
520/* -- Constant folding and forwarding for buffers ------------------------- */
521
522/* Note: buffer ops are not CSEd until the BUFSTR. It's ok to modify them. */
523
524/* BUFHDR is treated like a store, see below. */
525
526LJFOLD(BUFPUT BUFHDR BUFSTR)
527LJFOLDF(bufput_append)
528{
529 /* New buffer, no other buffer op inbetween and same buffer? */
530 if ((J->flags & JIT_F_OPT_FWD) &&
531 !(fleft->op2 & IRBUFHDR_APPEND) &&
532 fleft->prev == fright->op1 &&
533 fleft->op1 == IR(fright->op1)->op1) {
534 IRRef ref = fins->op1;
535 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
536 return ref;
537 }
538 return EMITFOLD; /* This is a store and always emitted. */
539}
540
541LJFOLD(BUFPUT any any)
542LJFOLDF(bufput_kgc)
543{
544 if (fright->o == IR_KGC) {
545 GCstr *s2 = ir_kstr(fright);
546 MSize len2 = s2->len;
547 if (len2 == 0) { /* Empty string? */
548 return LEFTFOLD;
549 } else {
550 PHIBARRIER(fleft);
551 if (fleft->o == IR_BUFPUT && IR(fleft->op2)->o == IR_KGC) {
552 /* Join two constant string puts in a row. */
553 GCstr *s1 = ir_kstr(IR(fleft->op2));
554 MSize len1 = s1->len;
555 char *buf = lj_buf_tmp(J->L, len1 + len2);
556 IRRef kref;
557 memcpy(buf, strdata(s1), len1);
558 memcpy(buf+len1, strdata(s2), len2);
559 kref = lj_ir_kstr(J, lj_str_new(J->L, buf, len1 + len2));
560 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
561 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
562 return fins->op1;
563 }
564 }
565 }
566 return EMITFOLD; /* This is a store and always emitted. */
567}
568
569LJFOLD(BUFSTR any any)
570LJFOLDF(bufstr_kfold_cse)
571{
572 lua_assert(fright->o == IR_BUFHDR || fright->o == IR_BUFPUT);
573 if (fright->o == IR_BUFHDR) { /* No put operations? */
574 if (!(fright->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
575 return lj_ir_kstr(J, &J2G(J)->strempty);
576 fins->op2 = fright->prev; /* Relies on checks in bufput_append. */
577 return CSEFOLD;
578 } else {
579 /* Shortcut for a single put operation. */
580 IRIns *irb = IR(fright->op1);
581 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND)) {
582 IRRef ref = fright->op2;
583 if (irt_isstr(IR(ref)->t))
584 return ref;
585 lua_assert(irt_isinteger(IR(ref)->t) || irt_isnum(IR(ref)->t));
586 return emitir(IRT(IR_TOSTR, IRT_STR), ref, 0);
587 }
588 }
589 /* Try to CSE the whole chain. */
590 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE) && !(fleft->op2 & IRBUFHDR_APPEND)) {
591 IRRef ref = J->chain[IR_BUFSTR];
592 while (ref) {
593 IRIns *irs = IR(ref), *ira = fright, *irb = IR(irs->op2);
594 while (ira->o == irb->o && ira->op2 == irb->op2) {
595 if (ira->o == IR_BUFHDR) {
596 lj_ir_rollback(J, fins->op1); /* Eliminate the current chain. */
597 return ref; /* CSE succeeded. */
598 }
599 ira = IR(ira->op1);
600 irb = IR(irb->op1);
601 }
602 ref = irs->prev;
603 }
604 }
605 return EMITFOLD; /* No CSE possible. */
606}
607
518/* -- Constant folding of pointer arithmetic ------------------------------ */ 608/* -- Constant folding of pointer arithmetic ------------------------------ */
519 609
520LJFOLD(ADD KGC KINT) 610LJFOLD(ADD KGC KINT)
@@ -2128,6 +2218,7 @@ LJFOLD(TNEW any any)
2128LJFOLD(TDUP any) 2218LJFOLD(TDUP any)
2129LJFOLD(CNEW any any) 2219LJFOLD(CNEW any any)
2130LJFOLD(XSNEW any any) 2220LJFOLD(XSNEW any any)
2221LJFOLD(BUFHDR any any)
2131LJFOLDX(lj_ir_emit) 2222LJFOLDX(lj_ir_emit)
2132 2223
2133/* ------------------------------------------------------------------------ */ 2224/* ------------------------------------------------------------------------ */
diff --git a/src/lj_record.c b/src/lj_record.c
index 003910a9..bbabd3ce 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_meta.h" 17#include "lj_meta.h"
@@ -1599,6 +1600,33 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1599 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 1600 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1600} 1601}
1601 1602
1603/* -- Concatenation ------------------------------------------------------- */
1604
1605static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1606{
1607 TRef *top = &J->base[topslot], tr = *top;
1608 lua_assert(baseslot < topslot);
1609 if (tref_isnumber_str(tr) && tref_isnumber_str(*(top-1))) {
1610 TRef hdr, *trp, *xbase, *base = &J->base[baseslot];
1611 /* First convert number consts to string consts to simplify FOLD rules. */
1612 for (trp = top; trp >= base && tref_isnumber_str(*trp); trp--)
1613 if (tref_isk(*trp) && tref_isnumber(*trp))
1614 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp, 0);
1615 xbase = ++trp;
1616 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_P32),
1617 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
1618 do {
1619 tr = emitir(IRT(IR_BUFPUT, IRT_P32), tr, *trp++);
1620 } while (trp <= top);
1621 tr = emitir(IRT(IR_BUFSTR, IRT_STR), hdr, tr);
1622 J->maxslot = (BCReg)(xbase - J->base);
1623 if (xbase == base) return tr;
1624 }
1625 setintV(&J->errinfo, BC_CAT);
1626 lj_trace_err_info(J, LJ_TRERR_NYIBC); /* __concat metamethod. */
1627 return 0;
1628}
1629
1602/* -- Record bytecode ops ------------------------------------------------- */ 1630/* -- Record bytecode ops ------------------------------------------------- */
1603 1631
1604/* Prepare for comparison. */ 1632/* Prepare for comparison. */
@@ -1901,6 +1929,12 @@ void lj_record_ins(jit_State *J)
1901 rc = rec_mm_arith(J, &ix, MM_pow); 1929 rc = rec_mm_arith(J, &ix, MM_pow);
1902 break; 1930 break;
1903 1931
1932 /* -- Miscellaneous ops ------------------------------------------------- */
1933
1934 case BC_CAT:
1935 rc = rec_cat(J, rb, rc);
1936 break;
1937
1904 /* -- Constant and move ops --------------------------------------------- */ 1938 /* -- Constant and move ops --------------------------------------------- */
1905 1939
1906 case BC_MOV: 1940 case BC_MOV:
@@ -2082,7 +2116,6 @@ void lj_record_ins(jit_State *J)
2082 /* fallthrough */ 2116 /* fallthrough */
2083 case BC_ITERN: 2117 case BC_ITERN:
2084 case BC_ISNEXT: 2118 case BC_ISNEXT:
2085 case BC_CAT:
2086 case BC_UCLO: 2119 case BC_UCLO:
2087 case BC_FNEW: 2120 case BC_FNEW:
2088 case BC_TSETM: 2121 case BC_TSETM:
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 1d4b60f4..4579b263 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -4344,6 +4344,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4344 | ldr RA, TRACE:RC->mcode 4344 | ldr RA, TRACE:RC->mcode
4345 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4345 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4346 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4346 | str L, [DISPATCH, #DISPATCH_GL(jit_L)]
4347 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4347 | bx RA 4348 | bx RA
4348 |.endif 4349 |.endif
4349 break; 4350 break;
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 53000411..c4f01e81 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -4051,6 +4051,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4051 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4051 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4052 | sw L, DISPATCH_GL(jit_L)(DISPATCH) 4052 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
4053 | lw TMP2, TRACE:TMP2->mcode 4053 | lw TMP2, TRACE:TMP2->mcode
4054 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4054 | jr TMP2 4055 | jr TMP2
4055 |. addiu JGL, DISPATCH, GG_DISP2G+32768 4056 |. addiu JGL, DISPATCH, GG_DISP2G+32768
4056 |.endif 4057 |.endif
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 514bd231..c85f1f10 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -4936,6 +4936,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4936 | mtctr TMP2 4936 | mtctr TMP2
4937 | stw L, DISPATCH_GL(jit_L)(DISPATCH) 4937 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4938 | addi JGL, DISPATCH, GG_DISP2G+32768 4938 | addi JGL, DISPATCH, GG_DISP2G+32768
4939 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4939 | bctr 4940 | bctr
4940 |.endif 4941 |.endif
4941 break; 4942 break;
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 3fd897ec..8ed55fd2 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -5465,6 +5465,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5465 | mov L:RB, SAVE_L 5465 | mov L:RB, SAVE_L
5466 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5466 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5467 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5467 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
5468 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5468 | // Save additional callee-save registers only used in compiled code. 5469 | // Save additional callee-save registers only used in compiled code.
5469 |.if X64WIN 5470 |.if X64WIN
5470 | mov TMPQ, r12 5471 | mov TMPQ, r12