aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/dump.lua5
-rw-r--r--src/lj_asm.c47
-rw-r--r--src/lj_ir.h9
-rw-r--r--src/lj_opt_fold.c9
-rw-r--r--src/lj_record.c80
5 files changed, 102 insertions, 48 deletions
diff --git a/lib/dump.lua b/lib/dump.lua
index 82a6d964..a00862d0 100644
--- a/lib/dump.lua
+++ b/lib/dump.lua
@@ -210,7 +210,10 @@ local colorize, irtype
210-- Lookup table to convert some literals into names. 210-- Lookup table to convert some literals into names.
211local litname = { 211local litname = {
212 ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", 212 ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI",
213 "T", "IT", "RT", "RIT", "PT", "PIT", "PRT", "PRIT", }, 213 "T", "IT", "RT", "RIT", "PT", "PIT", "PRT", "PRIT",
214 "F", "IF", "RF", "RIF", "PF", "PIF", "PRF", "PRIF",
215 "TF", "ITF", "RTF", "RITF", "PTF", "PITF", "PRTF", "PRITF",
216 },
214 ["XLOAD "] = { [0] = "", "R", "U", "RU", }, 217 ["XLOAD "] = { [0] = "", "R", "U", "RU", },
215 ["TOINT "] = { [0] = "check", "index", "", }, 218 ["TOINT "] = { [0] = "check", "index", "", },
216 ["FLOAD "] = vmdef.irfield, 219 ["FLOAD "] = vmdef.irfield,
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 7773abe1..d26d0b4b 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1110,23 +1110,34 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict)
1110 return 1; /* Ok, no conflict. */ 1110 return 1; /* Ok, no conflict. */
1111} 1111}
1112 1112
1113/* Fuse array base into memory operand. */
1114static IRRef asm_fuseabase(ASMState *as, IRRef ref)
1115{
1116 IRIns *irb = IR(ref);
1117 as->mrm.ofs = 0;
1118 if (irb->o == IR_FLOAD) {
1119 IRIns *ira = IR(irb->op1);
1120 lua_assert(irb->op2 == IRFL_TAB_ARRAY);
1121 /* We can avoid the FLOAD of t->array for colocated arrays. */
1122 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
1123 noconflict(as, irb->op1, IR_NEWREF)) {
1124 as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
1125 return irb->op1; /* Table obj. */
1126 }
1127 } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
1128 /* Fuse base offset (vararg load). */
1129 as->mrm.ofs = IR(irb->op2)->i;
1130 return irb->op1;
1131 }
1132 return ref; /* Otherwise use the given array base. */
1133}
1134
1113/* Fuse array reference into memory operand. */ 1135/* Fuse array reference into memory operand. */
1114static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) 1136static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
1115{ 1137{
1116 IRIns *irb = IR(ir->op1); 1138 IRIns *irx;
1117 IRIns *ira, *irx;
1118 lua_assert(ir->o == IR_AREF); 1139 lua_assert(ir->o == IR_AREF);
1119 lua_assert(irb->o == IR_FLOAD && irb->op2 == IRFL_TAB_ARRAY); 1140 as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
1120 ira = IR(irb->op1);
1121 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
1122 noconflict(as, irb->op1, IR_NEWREF)) {
1123 /* We can avoid the FLOAD of t->array for colocated arrays. */
1124 as->mrm.base = (uint8_t)ra_alloc1(as, irb->op1, allow); /* Table obj. */
1125 as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
1126 } else {
1127 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); /* Array base. */
1128 as->mrm.ofs = 0;
1129 }
1130 irx = IR(ir->op2); 1141 irx = IR(ir->op2);
1131 if (irref_isk(ir->op2)) { 1142 if (irref_isk(ir->op2)) {
1132 as->mrm.ofs += 8*irx->i; 1143 as->mrm.ofs += 8*irx->i;
@@ -1277,10 +1288,10 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1277 } else if (mayfuse(as, ref)) { 1288 } else if (mayfuse(as, ref)) {
1278 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 1289 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
1279 if (ir->o == IR_SLOAD) { 1290 if (ir->o == IR_SLOAD) {
1280 if (!irt_isint(ir->t) && !(ir->op2 & IRSLOAD_PARENT) && 1291 if ((!irt_isint(ir->t) || (ir->op2 & IRSLOAD_FRAME)) &&
1281 noconflict(as, ref, IR_RETF)) { 1292 !(ir->op2 & IRSLOAD_PARENT) && noconflict(as, ref, IR_RETF)) {
1282 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 1293 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
1283 as->mrm.ofs = 8*((int32_t)ir->op1-1); 1294 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
1284 as->mrm.idx = RID_NONE; 1295 as->mrm.idx = RID_NONE;
1285 return RID_MRM; 1296 return RID_MRM;
1286 } 1297 }
@@ -2031,7 +2042,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
2031 2042
2032static void asm_sload(ASMState *as, IRIns *ir) 2043static void asm_sload(ASMState *as, IRIns *ir)
2033{ 2044{
2034 int32_t ofs = 8*((int32_t)ir->op1-1); 2045 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
2035 IRType1 t = ir->t; 2046 IRType1 t = ir->t;
2036 Reg base; 2047 Reg base;
2037 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 2048 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
@@ -2056,7 +2067,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
2056 Reg dest = ra_dest(as, ir, allow); 2067 Reg dest = ra_dest(as, ir, allow);
2057 base = ra_alloc1(as, REF_BASE, RSET_GPR); 2068 base = ra_alloc1(as, REF_BASE, RSET_GPR);
2058 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 2069 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
2059 if (irt_isint(t)) 2070 if (irt_isint(t) && !(ir->op2 & IRSLOAD_FRAME))
2060 emit_rmro(as, XO_CVTSD2SI, dest, base, ofs); 2071 emit_rmro(as, XO_CVTSD2SI, dest, base, ofs);
2061 else if (irt_isnum(t)) 2072 else if (irt_isnum(t))
2062 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); 2073 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 22127806..cc57560d 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -189,10 +189,11 @@ IRFLDEF(FLENUM)
189} IRFieldID; 189} IRFieldID;
190 190
191/* SLOAD mode bits, stored in op2. */ 191/* SLOAD mode bits, stored in op2. */
192#define IRSLOAD_INHERIT 1 /* Inherited by exits/side traces. */ 192#define IRSLOAD_INHERIT 0x01 /* Inherited by exits/side traces. */
193#define IRSLOAD_READONLY 2 /* Read-only, omit slot store. */ 193#define IRSLOAD_READONLY 0x02 /* Read-only, omit slot store. */
194#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ 194#define IRSLOAD_PARENT 0x04 /* Coalesce with parent trace. */
195#define IRSLOAD_TYPECHECK 8 /* Needs type check. */ 195#define IRSLOAD_TYPECHECK 0x08 /* Needs type check. */
196#define IRSLOAD_FRAME 0x10 /* Load hiword of frame. */
196 197
197/* XLOAD mode, stored in op2. */ 198/* XLOAD mode, stored in op2. */
198#define IRXLOAD_READONLY 1 /* Load from read-only data. */ 199#define IRXLOAD_READONLY 1 /* Load from read-only data. */
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 312c9cf0..90520d8c 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -1318,8 +1318,13 @@ LJFOLDX(lj_opt_fwd_fload)
1318LJFOLD(SLOAD any any) 1318LJFOLD(SLOAD any any)
1319LJFOLDF(fwd_sload) 1319LJFOLDF(fwd_sload)
1320{ 1320{
1321 lua_assert(J->slot[fins->op1] != 0); 1321 if ((fins->op2 & IRSLOAD_FRAME)) {
1322 return J->slot[fins->op1]; 1322 TRef tr = lj_opt_cse(J);
1323 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
1324 } else {
1325 lua_assert(J->slot[fins->op1] != 0);
1326 return J->slot[fins->op1];
1327 }
1323} 1328}
1324 1329
1325LJFOLD(XLOAD KPTR any) 1330LJFOLD(XLOAD KPTR any)
diff --git a/src/lj_record.c b/src/lj_record.c
index 15d72440..739279ad 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -650,29 +650,6 @@ static void rec_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
650 lua_assert(J->baseslot >= 1); 650 lua_assert(J->baseslot >= 1);
651} 651}
652 652
653/* -- Vararg handling ----------------------------------------------------- */
654
655/* Record vararg instruction. */
656static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
657{
658 ptrdiff_t nvararg = frame_delta(J->L->base-1) - J->pt->numparams - 1;
659 lua_assert(frame_isvarg(J->L->base-1));
660 if (J->framedepth == 0) { /* NYI: unknown number of varargs. */
661 setintV(&J->errinfo, BC_VARG);
662 lj_trace_err_info(J, LJ_TRERR_NYIBC);
663 } else { /* Simple case: known fixed number of varargs defined on-trace. */
664 ptrdiff_t i;
665 if (nresults == -1) {
666 nresults = nvararg;
667 J->maxslot = dst + nvararg;
668 } else if (dst + nresults > J->maxslot) {
669 J->maxslot = dst + nresults;
670 }
671 for (i = 0; i < nresults; i++)
672 J->base[dst+i] = i < nvararg ? J->base[i - nvararg - 1] : TREF_NIL;
673 }
674}
675
676/* -- Metamethod handling ------------------------------------------------- */ 653/* -- Metamethod handling ------------------------------------------------- */
677 654
678/* Prepare to record call to metamethod. */ 655/* Prepare to record call to metamethod. */
@@ -1928,6 +1905,63 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1928 rec_stop(J, lnk); /* Link to the function. */ 1905 rec_stop(J, lnk); /* Link to the function. */
1929} 1906}
1930 1907
1908/* -- Vararg handling ----------------------------------------------------- */
1909
1910/* Record vararg instruction. */
1911static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1912{
1913 int32_t numparams = J->pt->numparams;
1914 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1;
1915 lua_assert(frame_isvarg(J->L->base-1));
1916 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
1917 ptrdiff_t i;
1918 if (nvararg < 0) nvararg = 0;
1919 if (nresults == -1) {
1920 nresults = nvararg;
1921 J->maxslot = dst + (BCReg)nvararg;
1922 } else if (dst + nresults > J->maxslot) {
1923 J->maxslot = dst + (BCReg)nresults;
1924 }
1925 for (i = 0; i < nresults; i++) {
1926 J->base[dst+i] = i < nvararg ? J->base[i - nvararg - 1] : TREF_NIL;
1927 lua_assert(J->base[dst+i] != 0);
1928 }
1929 } else { /* Unknown number of varargs passed to trace. */
1930 TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME);
1931 int32_t frofs = 8*(1+numparams)+FRAME_VARG;
1932 if (nresults >= 0) { /* Known fixed number of results. */
1933 ptrdiff_t i;
1934 if (nvararg > 0) {
1935 TRef vbase;
1936 if (nvararg >= nresults)
1937 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
1938 else
1939 emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1)));
1940 vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
1941 vbase = emitir(IRT(IR_ADD, IRT_PTR), vbase, lj_ir_kint(J, frofs-8));
1942 for (i = 0; i < nvararg; i++) {
1943 IRType t = itype2irt(&J->L->base[i-1-nvararg]);
1944 TRef aref = emitir(IRT(IR_AREF, IRT_PTR),
1945 vbase, lj_ir_kint(J, (int32_t)i));
1946 TRef tr = emitir(IRTG(IR_ALOAD, t), aref, 0);
1947 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
1948 J->base[dst+i] = tr;
1949 }
1950 } else {
1951 emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
1952 nvararg = 0;
1953 }
1954 for (i = nvararg; i < nresults; i++)
1955 J->base[dst+i] = TREF_NIL;
1956 if (dst + (BCReg)nresults > J->maxslot)
1957 J->maxslot = dst + (BCReg)nresults;
1958 } else {
1959 setintV(&J->errinfo, BC_VARG);
1960 lj_trace_err_info(J, LJ_TRERR_NYIBC);
1961 }
1962 }
1963}
1964
1931/* -- Record allocations -------------------------------------------------- */ 1965/* -- Record allocations -------------------------------------------------- */
1932 1966
1933static TRef rec_tnew(jit_State *J, uint32_t ah) 1967static TRef rec_tnew(jit_State *J, uint32_t ah)