aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c667
1 files changed, 557 insertions, 110 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index a5e0c01e..dba5c178 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 179#error "Missing instruction emitter for target CPU"
180#endif 180#endif
181 181
182/* Generic load/store of register from/to stack slot. */
183#define emit_spload(as, ir, r, ofs) \
184 emit_loadofs(as, ir, (r), RID_SP, (ofs))
185#define emit_spstore(as, ir, r, ofs) \
186 emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
182/* -- Register allocator debugging ---------------------------------------- */ 188/* -- Register allocator debugging ---------------------------------------- */
183 189
184/* #define LUAJIT_DEBUG_RA */ 190/* #define LUAJIT_DEBUG_RA */
@@ -328,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
328 RA_DBGX((as, "remat $i $r", ir, r)); 334 RA_DBGX((as, "remat $i $r", ir, r));
329#if !LJ_SOFTFP 335#if !LJ_SOFTFP
330 if (ir->o == IR_KNUM) { 336 if (ir->o == IR_KNUM) {
331 emit_loadn(as, r, ir_knum(ir)); 337 emit_loadk64(as, r, ir);
332 } else 338 } else
333#endif 339#endif
334 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 340 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@@ -336,10 +342,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
336 emit_getgl(as, r, jit_base); 342 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 343 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 344 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 345 emit_getgl(as, r, cur_L);
340#if LJ_64 346#if LJ_64
341 } else if (ir->o == IR_KINT64) { 347 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 348 emit_loadu64(as, r, ir_kint64(ir)->u64);
349#if LJ_GC64
350 } else if (ir->o == IR_KGC) {
351 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
352 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
353 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
354#endif
343#endif 355#endif
344 } else { 356 } else {
345 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 357 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -613,10 +625,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
613 return r; 625 return r;
614} 626}
615 627
628/* Add a register rename to the IR. */
629static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
630{
631 IRRef ren;
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
633 ren = tref_ref(lj_ir_emit(as->J));
634 as->J->cur.ir[ren].r = (uint8_t)down;
635 as->J->cur.ir[ren].s = SPS_NONE;
636}
637
616/* Rename register allocation and emit move. */ 638/* Rename register allocation and emit move. */
617static void ra_rename(ASMState *as, Reg down, Reg up) 639static void ra_rename(ASMState *as, Reg down, Reg up)
618{ 640{
619 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 641 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
620 IRIns *ir = IR(ref); 642 IRIns *ir = IR(ref);
621 ir->r = (uint8_t)up; 643 ir->r = (uint8_t)up;
622 as->cost[down] = 0; 644 as->cost[down] = 0;
@@ -629,11 +651,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
629 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 651 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
630 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 652 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
631 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 653 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 654 ra_addrename(as, down, ref, as->snapno);
633 ren = tref_ref(lj_ir_emit(as->J));
634 as->ir = as->T->ir; /* The IR may have been reallocated. */
635 IR(ren)->r = (uint8_t)down;
636 IR(ren)->s = SPS_NONE;
637 } 655 }
638} 656}
639 657
@@ -683,18 +701,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
683 if (ra_noreg(left)) { 701 if (ra_noreg(left)) {
684 if (irref_isk(lref)) { 702 if (irref_isk(lref)) {
685 if (ir->o == IR_KNUM) { 703 if (ir->o == IR_KNUM) {
686 cTValue *tv = ir_knum(ir);
687 /* FP remat needs a load except for +0. Still better than eviction. */ 704 /* FP remat needs a load except for +0. Still better than eviction. */
688 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 705 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
689 emit_loadn(as, dest, tv); 706 emit_loadk64(as, dest, ir);
690 return; 707 return;
691 } 708 }
692#if LJ_64 709#if LJ_64
693 } else if (ir->o == IR_KINT64) { 710 } else if (ir->o == IR_KINT64) {
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 711 emit_loadk64(as, dest, ir);
712 return;
713#if LJ_GC64
714 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
715 emit_loadk64(as, dest, ir);
695 return; 716 return;
696#endif 717#endif
697 } else { 718#endif
719 } else if (ir->o != IR_KPRI) {
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 720 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 721 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
700 emit_loadi(as, dest, ir->i); 722 emit_loadi(as, dest, ir->i);
@@ -935,7 +957,7 @@ static void asm_snap_prep(ASMState *as)
935 } else { 957 } else {
936 /* Process any renames above the highwater mark. */ 958 /* Process any renames above the highwater mark. */
937 for (; as->snaprename < as->T->nins; as->snaprename++) { 959 for (; as->snaprename < as->T->nins; as->snaprename++) {
938 IRIns *ir = IR(as->snaprename); 960 IRIns *ir = &as->T->ir[as->snaprename];
939 if (asm_snap_checkrename(as, ir->op1)) 961 if (asm_snap_checkrename(as, ir->op1))
940 ir->op2 = REF_BIAS-1; /* Kill rename. */ 962 ir->op2 = REF_BIAS-1; /* Kill rename. */
941 } 963 }
@@ -944,44 +966,6 @@ static void asm_snap_prep(ASMState *as)
944 966
945/* -- Miscellaneous helpers ----------------------------------------------- */ 967/* -- Miscellaneous helpers ----------------------------------------------- */
946 968
947/* Collect arguments from CALL* and CARG instructions. */
948static void asm_collectargs(ASMState *as, IRIns *ir,
949 const CCallInfo *ci, IRRef *args)
950{
951 uint32_t n = CCI_NARGS(ci);
952 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
953 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
954 while (n-- > 1) {
955 ir = IR(ir->op1);
956 lua_assert(ir->o == IR_CARG);
957 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
958 }
959 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
960 lua_assert(IR(ir->op1)->o != IR_CARG);
961}
962
963/* Reconstruct CCallInfo flags for CALLX*. */
964static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
965{
966 uint32_t nargs = 0;
967 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
968 IRIns *ira = IR(ir->op1);
969 nargs++;
970 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
971 }
972#if LJ_HASFFI
973 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
974 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
975 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
976 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
977#if LJ_TARGET_X86
978 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
979#endif
980 }
981#endif
982 return (nargs | (ir->t.irt << CCI_OTSHIFT));
983}
984
985/* Calculate stack adjustment. */ 969/* Calculate stack adjustment. */
986static int32_t asm_stack_adjust(ASMState *as) 970static int32_t asm_stack_adjust(ASMState *as)
987{ 971{
@@ -1066,6 +1050,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1066 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1050 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1067} 1051}
1068 1052
1053/* -- Buffer operations --------------------------------------------------- */
1054
1055static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1056
1057static void asm_bufhdr(ASMState *as, IRIns *ir)
1058{
1059 Reg sb = ra_dest(as, ir, RSET_GPR);
1060 if ((ir->op2 & IRBUFHDR_APPEND)) {
1061 /* Rematerialize const buffer pointer instead of likely spill. */
1062 IRIns *irp = IR(ir->op1);
1063 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1064 (irp == ir-2 && !ra_used(ir-1)))) {
1065 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1066 irp = IR(irp->op1);
1067 if (irref_isk(irp->op1)) {
1068 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1069 ir = irp;
1070 }
1071 }
1072 } else {
1073 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1074 /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1075 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1076 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1077 }
1078#if LJ_TARGET_X86ORX64
1079 ra_left(as, sb, ir->op1);
1080#else
1081 ra_leftov(as, sb, ir->op1);
1082#endif
1083}
1084
1085static void asm_bufput(ASMState *as, IRIns *ir)
1086{
1087 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1088 IRRef args[3];
1089 IRIns *irs;
1090 int kchar = -1;
1091 args[0] = ir->op1; /* SBuf * */
1092 args[1] = ir->op2; /* GCstr * */
1093 irs = IR(ir->op2);
1094 lua_assert(irt_isstr(irs->t));
1095 if (irs->o == IR_KGC) {
1096 GCstr *s = ir_kstr(irs);
1097 if (s->len == 1) { /* Optimize put of single-char string constant. */
1098 kchar = strdata(s)[0];
1099 args[1] = ASMREF_TMP1; /* int, truncated to char */
1100 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1101 }
1102 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1103 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1104 if (irs->op2 == IRTOSTR_NUM) {
1105 args[1] = ASMREF_TMP1; /* TValue * */
1106 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1107 } else {
1108 lua_assert(irt_isinteger(IR(irs->op1)->t));
1109 args[1] = irs->op1; /* int */
1110 if (irs->op2 == IRTOSTR_INT)
1111 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1112 else
1113 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1114 }
1115 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1116 args[1] = irs->op1; /* const void * */
1117 args[2] = irs->op2; /* MSize */
1118 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1119 }
1120 }
1121 asm_setupresult(as, ir, ci); /* SBuf * */
1122 asm_gencall(as, ci, args);
1123 if (args[1] == ASMREF_TMP1) {
1124 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1125 if (kchar == -1)
1126 asm_tvptr(as, tmp, irs->op1);
1127 else
1128 ra_allockreg(as, kchar, tmp);
1129 }
1130}
1131
1132static void asm_bufstr(ASMState *as, IRIns *ir)
1133{
1134 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1135 IRRef args[1];
1136 args[0] = ir->op1; /* SBuf *sb */
1137 as->gcsteps++;
1138 asm_setupresult(as, ir, ci); /* GCstr * */
1139 asm_gencall(as, ci, args);
1140}
1141
1142/* -- Type conversions ---------------------------------------------------- */
1143
1144static void asm_tostr(ASMState *as, IRIns *ir)
1145{
1146 const CCallInfo *ci;
1147 IRRef args[2];
1148 args[0] = ASMREF_L;
1149 as->gcsteps++;
1150 if (ir->op2 == IRTOSTR_NUM) {
1151 args[1] = ASMREF_TMP1; /* cTValue * */
1152 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1153 } else {
1154 args[1] = ir->op1; /* int32_t k */
1155 if (ir->op2 == IRTOSTR_INT)
1156 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1157 else
1158 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1159 }
1160 asm_setupresult(as, ir, ci); /* GCstr * */
1161 asm_gencall(as, ci, args);
1162 if (ir->op2 == IRTOSTR_NUM)
1163 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1164}
1165
1166#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1167static void asm_conv64(ASMState *as, IRIns *ir)
1168{
1169 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1170 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1171 IRCallID id;
1172 IRRef args[2];
1173 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1174 args[LJ_BE] = (ir-1)->op1;
1175 args[LJ_LE] = ir->op1;
1176 if (st == IRT_NUM || st == IRT_FLOAT) {
1177 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1178 ir--;
1179 } else {
1180 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1181 }
1182 {
1183#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1184 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1185 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1186#else
1187 const CCallInfo *ci = &lj_ir_callinfo[id];
1188#endif
1189 asm_setupresult(as, ir, ci);
1190 asm_gencall(as, ci, args);
1191 }
1192}
1193#endif
1194
1195/* -- Memory references --------------------------------------------------- */
1196
1197static void asm_newref(ASMState *as, IRIns *ir)
1198{
1199 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1200 IRRef args[3];
1201 if (ir->r == RID_SINK)
1202 return;
1203 args[0] = ASMREF_L; /* lua_State *L */
1204 args[1] = ir->op1; /* GCtab *t */
1205 args[2] = ASMREF_TMP1; /* cTValue *key */
1206 asm_setupresult(as, ir, ci); /* TValue * */
1207 asm_gencall(as, ci, args);
1208 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1209}
1210
1211static void asm_lref(ASMState *as, IRIns *ir)
1212{
1213 Reg r = ra_dest(as, ir, RSET_GPR);
1214#if LJ_TARGET_X86ORX64
1215 ra_left(as, r, ASMREF_L);
1216#else
1217 ra_leftov(as, r, ASMREF_L);
1218#endif
1219}
1220
1221/* -- Calls --------------------------------------------------------------- */
1222
1223/* Collect arguments from CALL* and CARG instructions. */
1224static void asm_collectargs(ASMState *as, IRIns *ir,
1225 const CCallInfo *ci, IRRef *args)
1226{
1227 uint32_t n = CCI_XNARGS(ci);
1228 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1229 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1230 while (n-- > 1) {
1231 ir = IR(ir->op1);
1232 lua_assert(ir->o == IR_CARG);
1233 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1234 }
1235 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1236 lua_assert(IR(ir->op1)->o != IR_CARG);
1237}
1238
1239/* Reconstruct CCallInfo flags for CALLX*. */
1240static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1241{
1242 uint32_t nargs = 0;
1243 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1244 IRIns *ira = IR(ir->op1);
1245 nargs++;
1246 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1247 }
1248#if LJ_HASFFI
1249 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1250 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1251 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1252 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1253#if LJ_TARGET_X86
1254 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1255#endif
1256 }
1257#endif
1258 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1259}
1260
1261static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1262{
1263 const CCallInfo *ci = &lj_ir_callinfo[id];
1264 IRRef args[2];
1265 args[0] = ir->op1;
1266 args[1] = ir->op2;
1267 asm_setupresult(as, ir, ci);
1268 asm_gencall(as, ci, args);
1269}
1270
1271static void asm_call(ASMState *as, IRIns *ir)
1272{
1273 IRRef args[CCI_NARGS_MAX];
1274 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1275 asm_collectargs(as, ir, ci, args);
1276 asm_setupresult(as, ir, ci);
1277 asm_gencall(as, ci, args);
1278}
1279
1280#if !LJ_SOFTFP
1281static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1282{
1283 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1284 IRRef args[2];
1285 args[0] = lref;
1286 args[1] = rref;
1287 asm_setupresult(as, ir, ci);
1288 asm_gencall(as, ci, args);
1289}
1290
1291static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1292{
1293 IRIns *irp = IR(ir->op1);
1294 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1295 IRIns *irpp = IR(irp->op1);
1296 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1297 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1298 asm_fppow(as, ir, irpp->op1, irp->op2);
1299 return 1;
1300 }
1301 }
1302 return 0;
1303}
1304#endif
1305
1069/* -- PHI and loop handling ----------------------------------------------- */ 1306/* -- PHI and loop handling ----------------------------------------------- */
1070 1307
1071/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1308/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1251,12 +1488,7 @@ static void asm_phi_fixup(ASMState *as)
1251 irt_clearmark(ir->t); 1488 irt_clearmark(ir->t);
1252 /* Left PHI gained a spill slot before the loop? */ 1489 /* Left PHI gained a spill slot before the loop? */
1253 if (ra_hasspill(ir->s)) { 1490 if (ra_hasspill(ir->s)) {
1254 IRRef ren; 1491 ra_addrename(as, r, lref, as->loopsnapno);
1255 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1256 ren = tref_ref(lj_ir_emit(as->J));
1257 as->ir = as->T->ir; /* The IR may have been reallocated. */
1258 IR(ren)->r = (uint8_t)r;
1259 IR(ren)->s = SPS_NONE;
1260 } 1492 }
1261 } 1493 }
1262 rset_clear(work, r); 1494 rset_clear(work, r);
@@ -1339,6 +1571,136 @@ static void asm_loop(ASMState *as)
1339#error "Missing assembler for target CPU" 1571#error "Missing assembler for target CPU"
1340#endif 1572#endif
1341 1573
1574/* -- Instruction dispatch ------------------------------------------------ */
1575
1576/* Assemble a single instruction. */
1577static void asm_ir(ASMState *as, IRIns *ir)
1578{
1579 switch ((IROp)ir->o) {
1580 /* Miscellaneous ops. */
1581 case IR_LOOP: asm_loop(as); break;
1582 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1583 case IR_USE:
1584 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1585 case IR_PHI: asm_phi(as, ir); break;
1586 case IR_HIOP: asm_hiop(as, ir); break;
1587 case IR_GCSTEP: asm_gcstep(as, ir); break;
1588 case IR_PROF: asm_prof(as, ir); break;
1589
1590 /* Guarded assertions. */
1591 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1592 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1593 case IR_ABC:
1594 asm_comp(as, ir);
1595 break;
1596 case IR_EQ: case IR_NE:
1597 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1598 as->curins--;
1599 asm_href(as, ir-1, (IROp)ir->o);
1600 } else {
1601 asm_equal(as, ir);
1602 }
1603 break;
1604
1605 case IR_RETF: asm_retf(as, ir); break;
1606
1607 /* Bit ops. */
1608 case IR_BNOT: asm_bnot(as, ir); break;
1609 case IR_BSWAP: asm_bswap(as, ir); break;
1610 case IR_BAND: asm_band(as, ir); break;
1611 case IR_BOR: asm_bor(as, ir); break;
1612 case IR_BXOR: asm_bxor(as, ir); break;
1613 case IR_BSHL: asm_bshl(as, ir); break;
1614 case IR_BSHR: asm_bshr(as, ir); break;
1615 case IR_BSAR: asm_bsar(as, ir); break;
1616 case IR_BROL: asm_brol(as, ir); break;
1617 case IR_BROR: asm_bror(as, ir); break;
1618
1619 /* Arithmetic ops. */
1620 case IR_ADD: asm_add(as, ir); break;
1621 case IR_SUB: asm_sub(as, ir); break;
1622 case IR_MUL: asm_mul(as, ir); break;
1623 case IR_MOD: asm_mod(as, ir); break;
1624 case IR_NEG: asm_neg(as, ir); break;
1625#if LJ_SOFTFP
1626 case IR_DIV: case IR_POW: case IR_ABS:
1627 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1628 lua_assert(0); /* Unused for LJ_SOFTFP. */
1629 break;
1630#else
1631 case IR_DIV: asm_div(as, ir); break;
1632 case IR_POW: asm_pow(as, ir); break;
1633 case IR_ABS: asm_abs(as, ir); break;
1634 case IR_ATAN2: asm_atan2(as, ir); break;
1635 case IR_LDEXP: asm_ldexp(as, ir); break;
1636 case IR_FPMATH: asm_fpmath(as, ir); break;
1637 case IR_TOBIT: asm_tobit(as, ir); break;
1638#endif
1639 case IR_MIN: asm_min(as, ir); break;
1640 case IR_MAX: asm_max(as, ir); break;
1641
1642 /* Overflow-checking arithmetic ops. */
1643 case IR_ADDOV: asm_addov(as, ir); break;
1644 case IR_SUBOV: asm_subov(as, ir); break;
1645 case IR_MULOV: asm_mulov(as, ir); break;
1646
1647 /* Memory references. */
1648 case IR_AREF: asm_aref(as, ir); break;
1649 case IR_HREF: asm_href(as, ir, 0); break;
1650 case IR_HREFK: asm_hrefk(as, ir); break;
1651 case IR_NEWREF: asm_newref(as, ir); break;
1652 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1653 case IR_FREF: asm_fref(as, ir); break;
1654 case IR_STRREF: asm_strref(as, ir); break;
1655 case IR_LREF: asm_lref(as, ir); break;
1656
1657 /* Loads and stores. */
1658 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1659 asm_ahuvload(as, ir);
1660 break;
1661 case IR_FLOAD: asm_fload(as, ir); break;
1662 case IR_XLOAD: asm_xload(as, ir); break;
1663 case IR_SLOAD: asm_sload(as, ir); break;
1664
1665 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1666 case IR_FSTORE: asm_fstore(as, ir); break;
1667 case IR_XSTORE: asm_xstore(as, ir); break;
1668
1669 /* Allocations. */
1670 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1671 case IR_TNEW: asm_tnew(as, ir); break;
1672 case IR_TDUP: asm_tdup(as, ir); break;
1673 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1674
1675 /* Buffer operations. */
1676 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1677 case IR_BUFPUT: asm_bufput(as, ir); break;
1678 case IR_BUFSTR: asm_bufstr(as, ir); break;
1679
1680 /* Write barriers. */
1681 case IR_TBAR: asm_tbar(as, ir); break;
1682 case IR_OBAR: asm_obar(as, ir); break;
1683
1684 /* Type conversions. */
1685 case IR_CONV: asm_conv(as, ir); break;
1686 case IR_TOSTR: asm_tostr(as, ir); break;
1687 case IR_STRTO: asm_strto(as, ir); break;
1688
1689 /* Calls. */
1690 case IR_CALLA:
1691 as->gcsteps++;
1692 /* fallthrough */
1693 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1694 case IR_CALLXS: asm_callx(as, ir); break;
1695 case IR_CARG: break;
1696
1697 default:
1698 setintV(&as->J->errinfo, ir->o);
1699 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1700 break;
1701 }
1702}
1703
1342/* -- Head of trace ------------------------------------------------------- */ 1704/* -- Head of trace ------------------------------------------------------- */
1343 1705
1344/* Head of a root trace. */ 1706/* Head of a root trace. */
@@ -1537,7 +1899,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1537 SnapEntry sn = map[n-1]; 1899 SnapEntry sn = map[n-1];
1538 if ((sn & SNAP_FRAME)) { 1900 if ((sn & SNAP_FRAME)) {
1539 *gotframe = 1; 1901 *gotframe = 1;
1540 return snap_slot(sn); 1902 return snap_slot(sn) - LJ_FR2;
1541 } 1903 }
1542 } 1904 }
1543 return 0; 1905 return 0;
@@ -1557,19 +1919,23 @@ static void asm_tail_link(ASMState *as)
1557 1919
1558 if (as->T->link == 0) { 1920 if (as->T->link == 0) {
1559 /* Setup fixed registers for exit to interpreter. */ 1921 /* Setup fixed registers for exit to interpreter. */
1560 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 1922 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1561 int32_t mres; 1923 int32_t mres;
1562 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 1924 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1563 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 1925 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1564 if (bc_isret(bc_op(*retpc))) 1926 if (bc_isret(bc_op(*retpc)))
1565 pc = retpc; 1927 pc = retpc;
1566 } 1928 }
1929#if LJ_GC64
1930 emit_loadu64(as, RID_LPC, u64ptr(pc));
1931#else
1567 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1932 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1568 ra_allockreg(as, i32ptr(pc), RID_LPC); 1933 ra_allockreg(as, i32ptr(pc), RID_LPC);
1569 mres = (int32_t)(snap->nslots - baseslot); 1934#endif
1935 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1570 switch (bc_op(*pc)) { 1936 switch (bc_op(*pc)) {
1571 case BC_CALLM: case BC_CALLMT: 1937 case BC_CALLM: case BC_CALLMT:
1572 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 1938 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1573 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1939 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1574 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1940 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1575 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1941 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1581,6 +1947,11 @@ static void asm_tail_link(ASMState *as)
1581 } 1947 }
1582 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 1948 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1583 1949
1950 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
1951 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
1952 IR(as->J->ktrace)->o = IR_KGC;
1953 }
1954
1584 /* Sync the interpreter state with the on-trace state. */ 1955 /* Sync the interpreter state with the on-trace state. */
1585 asm_stack_restore(as, snap); 1956 asm_stack_restore(as, snap);
1586 1957
@@ -1606,16 +1977,27 @@ static void asm_setup_regsp(ASMState *as)
1606 ra_setup(as); 1977 ra_setup(as);
1607 1978
1608 /* Clear reg/sp for constants. */ 1979 /* Clear reg/sp for constants. */
1609 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 1980 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1610 ir->prev = REGSP_INIT; 1981 ir->prev = REGSP_INIT;
1982 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
1983#if LJ_GC64
1984 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
1985#else
1986 /* Make life easier for backends by putting address of constant in i. */
1987 ir->i = (int32_t)(intptr_t)(ir+1);
1988#endif
1989 ir++;
1990 }
1991 }
1611 1992
1612 /* REF_BASE is used for implicit references to the BASE register. */ 1993 /* REF_BASE is used for implicit references to the BASE register. */
1613 lastir->prev = REGSP_HINT(RID_BASE); 1994 lastir->prev = REGSP_HINT(RID_BASE);
1614 1995
1615 ir = IR(nins-1); 1996 ir = IR(nins-1);
1616 if (ir->o == IR_RENAME) { 1997 if (ir->o == IR_RENAME) {
1998 /* Remove any renames left over from ASM restart due to LJ_TRERR_MCODELM. */
1617 do { ir--; nins--; } while (ir->o == IR_RENAME); 1999 do { ir--; nins--; } while (ir->o == IR_RENAME);
1618 T->nins = nins; /* Remove any renames left over from ASM restart. */ 2000 T->nins = nins;
1619 } 2001 }
1620 as->snaprename = nins; 2002 as->snaprename = nins;
1621 as->snapref = nins; 2003 as->snapref = nins;
@@ -1677,7 +2059,7 @@ static void asm_setup_regsp(ASMState *as)
1677 as->modset |= RSET_SCRATCH; 2059 as->modset |= RSET_SCRATCH;
1678 continue; 2060 continue;
1679 } 2061 }
1680 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2062 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1681 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2063 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1682 ir->prev = asm_setup_call_slots(as, ir, ci); 2064 ir->prev = asm_setup_call_slots(as, ir, ci);
1683 if (inloop) 2065 if (inloop)
@@ -1722,10 +2104,20 @@ static void asm_setup_regsp(ASMState *as)
1722 /* fallthrough */ 2104 /* fallthrough */
1723#endif 2105#endif
1724 /* C calls evict all scratch regs and return results in RID_RET. */ 2106 /* C calls evict all scratch regs and return results in RID_RET. */
1725 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2107 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1726 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2108 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1727 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2109 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2110#if LJ_TARGET_X86 && LJ_HASFFI
2111 if (0) {
2112 case IR_CNEW:
2113 if (ir->op2 != REF_NIL && as->evenspill < 4)
2114 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2115 }
2116#else
2117 case IR_CNEW:
2118#endif
2119 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2120 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2121 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2122 if (inloop)
1731 as->modset = RSET_SCRATCH; 2123 as->modset = RSET_SCRATCH;
@@ -1734,21 +2126,26 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2126 if (inloop)
1735 as->modset = RSET_SCRATCH; 2127 as->modset = RSET_SCRATCH;
1736 break; 2128 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2129#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2130 case IR_ATAN2:
2131#if LJ_TARGET_X86
2132 if (as->evenspill < 4) /* Leave room to call atan2(). */
2133 as->evenspill = 4;
2134#endif
2135#if !LJ_TARGET_X86ORX64
2136 case IR_LDEXP:
2137#endif
1739#endif 2138#endif
1740 case IR_POW: 2139 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2140 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2141 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2142 as->modset |= RSET_SCRATCH;
2143#if LJ_TARGET_X86
2144 break;
1746#else 2145#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2146 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2147 continue;
2148#endif
1752 } 2149 }
1753 /* fallthrough for integer POW */ 2150 /* fallthrough for integer POW */
1754 case IR_DIV: case IR_MOD: 2151 case IR_DIV: case IR_MOD:
@@ -1761,31 +2158,33 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2158 break;
1762 case IR_FPMATH: 2159 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2160#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2161 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2162 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2163 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2164 if (inloop)
2165 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2166 continue;
2167 }
2168 break;
2169 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2170 if (as->evenspill < 4) /* Leave room to call pow(). */
1768 as->evenspill = 4; 2171 as->evenspill = 4;
1769#endif
1770 if (inloop)
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2172 }
2173#endif
2174 if (inloop)
2175 as->modset |= RSET_SCRATCH;
2176#if LJ_TARGET_X86
1779 break; 2177 break;
1780#else 2178#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2179 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2180 continue;
1785#endif 2181#endif
1786#if LJ_TARGET_X86ORX64 2182#if LJ_TARGET_X86ORX64
1787 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2183 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1788 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2184 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2185 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2186 break;
2187 case IR_BROL: case IR_BROR:
1789 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2188 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1790 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2189 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1791 if (inloop) 2190 if (inloop)
@@ -1832,13 +2231,14 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1832 MCode *origtop; 2231 MCode *origtop;
1833 2232
1834 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2233 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1835 J->cur.nins = lj_ir_nextins(J); 2234 /* This also allows one RENAME to be added without reallocating curfinal. */
1836 J->cur.ir[J->cur.nins].o = IR_NOP; 2235 as->orignins = lj_ir_nextins(J);
2236 J->cur.ir[as->orignins].o = IR_NOP;
1837 2237
1838 /* Setup initial state. Copy some fields to reduce indirections. */ 2238 /* Setup initial state. Copy some fields to reduce indirections. */
1839 as->J = J; 2239 as->J = J;
1840 as->T = T; 2240 as->T = T;
1841 as->ir = T->ir; 2241 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1842 as->flags = J->flags; 2242 as->flags = J->flags;
1843 as->loopref = J->loopref; 2243 as->loopref = J->loopref;
1844 as->realign = NULL; 2244 as->realign = NULL;
@@ -1851,12 +2251,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1851 as->mclim = as->mcbot + MCLIM_REDZONE; 2251 as->mclim = as->mcbot + MCLIM_REDZONE;
1852 asm_setup_target(as); 2252 asm_setup_target(as);
1853 2253
1854 do { 2254 /*
2255 ** This is a loop, because the MCode may have to be (re-)assembled
2256 ** multiple times:
2257 **
2258 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2259 ** backend wants the MCode to be aligned differently.
2260 **
2261 ** This is currently only the case on x86/x64, where small loops get
2262 ** an aligned loop body plus a short branch. Not much effort is wasted,
2263 ** because the abort happens very quickly and only once.
2264 **
2265 ** 2. The IR is immovable, since the MCode embeds pointers to various
2266 ** constants inside the IR. But RENAMEs may need to be added to the IR
2267 ** during assembly, which might grow and reallocate the IR. We check
2268 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2269 ** copy (in J->curfinal.ir) and try again.
2270 **
2271 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2272 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2273 ** always have one spare slot in the IR (see above), which means we
2274 ** have to redo the assembly for only ~2% of all traces.
2275 **
2276 ** Very, very rarely, this needs to be done repeatedly, since the
2277 ** location of constants inside the IR (actually, reachability from
2278 ** a global pointer) may affect register allocation and thus the
2279 ** number of RENAMEs.
2280 */
2281 for (;;) {
1855 as->mcp = as->mctop; 2282 as->mcp = as->mctop;
1856#ifdef LUA_USE_ASSERT 2283#ifdef LUA_USE_ASSERT
1857 as->mcp_prev = as->mcp; 2284 as->mcp_prev = as->mcp;
1858#endif 2285#endif
1859 as->curins = T->nins; 2286 as->ir = J->curfinal->ir; /* Use the copied IR. */
2287 as->curins = J->cur.nins = as->orignins;
2288
1860 RA_DBG_START(); 2289 RA_DBG_START();
1861 RA_DBGX((as, "===== STOP =====")); 2290 RA_DBGX((as, "===== STOP ====="));
1862 2291
@@ -1884,22 +2313,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1884 checkmclim(as); 2313 checkmclim(as);
1885 asm_ir(as, ir); 2314 asm_ir(as, ir);
1886 } 2315 }
1887 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1888 2316
1889 /* Emit head of trace. */ 2317 if (as->realign && J->curfinal->nins >= T->nins)
1890 RA_DBG_REF(); 2318 continue; /* Retry in case only the MCode needs to be realigned. */
1891 checkmclim(as); 2319
1892 if (as->gcsteps > 0) { 2320 /* Emit head of trace. */
1893 as->curins = as->T->snap[0].ref; 2321 RA_DBG_REF();
1894 asm_snap_prep(as); /* The GC check is a guard. */ 2322 checkmclim(as);
1895 asm_gc_check(as); 2323 if (as->gcsteps > 0) {
2324 as->curins = as->T->snap[0].ref;
2325 asm_snap_prep(as); /* The GC check is a guard. */
2326 asm_gc_check(as);
2327 as->curins = as->stopins;
2328 }
2329 ra_evictk(as);
2330 if (as->parent)
2331 asm_head_side(as);
2332 else
2333 asm_head_root(as);
2334 asm_phi_fixup(as);
2335
2336 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2337 lua_assert(J->curfinal->nk == T->nk);
2338 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2339 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2340 T->nins = J->curfinal->nins;
2341 break; /* Done. */
2342 }
2343
2344 /* Otherwise try again with a bigger IR. */
2345 lj_trace_free(J2G(J), J->curfinal);
2346 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2347 J->curfinal = lj_trace_alloc(J->L, T);
2348 as->realign = NULL;
1896 } 2349 }
1897 ra_evictk(as);
1898 if (as->parent)
1899 asm_head_side(as);
1900 else
1901 asm_head_root(as);
1902 asm_phi_fixup(as);
1903 2350
1904 RA_DBGX((as, "===== START ====")); 2351 RA_DBGX((as, "===== START ===="));
1905 RA_DBG_FLUSH(); 2352 RA_DBG_FLUSH();