aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c720
1 files changed, 602 insertions, 118 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 02714d4e..c2cf5a95 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -91,7 +91,7 @@ typedef struct ASMState {
91 MCode *realign; /* Realign loop if not NULL. */ 91 MCode *realign; /* Realign loop if not NULL. */
92 92
93#ifdef RID_NUM_KREF 93#ifdef RID_NUM_KREF
94 int32_t krefk[RID_NUM_KREF]; 94 intptr_t krefk[RID_NUM_KREF];
95#endif 95#endif
96 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 96 IRRef1 phireg[RID_MAX]; /* PHI register references. */
97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
@@ -144,7 +144,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
144#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 144#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
145#define ra_krefk(as, ref) (as->krefk[(ref)]) 145#define ra_krefk(as, ref) (as->krefk[(ref)])
146 146
147static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 147static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
148{ 148{
149 IRRef ref = (IRRef)(r - RID_MIN_KREF); 149 IRRef ref = (IRRef)(r - RID_MIN_KREF);
150 as->krefk[ref] = k; 150 as->krefk[ref] = k;
@@ -171,6 +171,8 @@ IRFLDEF(FLOFS)
171#include "lj_emit_x86.h" 171#include "lj_emit_x86.h"
172#elif LJ_TARGET_ARM 172#elif LJ_TARGET_ARM
173#include "lj_emit_arm.h" 173#include "lj_emit_arm.h"
174#elif LJ_TARGET_ARM64
175#include "lj_emit_arm64.h"
174#elif LJ_TARGET_PPC 176#elif LJ_TARGET_PPC
175#include "lj_emit_ppc.h" 177#include "lj_emit_ppc.h"
176#elif LJ_TARGET_MIPS 178#elif LJ_TARGET_MIPS
@@ -179,6 +181,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 181#error "Missing instruction emitter for target CPU"
180#endif 182#endif
181 183
184/* Generic load/store of register from/to stack slot. */
185#define emit_spload(as, ir, r, ofs) \
186 emit_loadofs(as, ir, (r), RID_SP, (ofs))
187#define emit_spstore(as, ir, r, ofs) \
188 emit_storeofs(as, ir, (r), RID_SP, (ofs))
189
182/* -- Register allocator debugging ---------------------------------------- */ 190/* -- Register allocator debugging ---------------------------------------- */
183 191
184/* #define LUAJIT_DEBUG_RA */ 192/* #define LUAJIT_DEBUG_RA */
@@ -316,7 +324,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
316 lua_assert(!rset_test(as->freeset, r)); 324 lua_assert(!rset_test(as->freeset, r));
317 ra_free(as, r); 325 ra_free(as, r);
318 ra_modified(as, r); 326 ra_modified(as, r);
327#if LJ_64
328 emit_loadu64(as, r, ra_krefk(as, ref));
329#else
319 emit_loadi(as, r, ra_krefk(as, ref)); 330 emit_loadi(as, r, ra_krefk(as, ref));
331#endif
320 return r; 332 return r;
321 } 333 }
322 ir = IR(ref); 334 ir = IR(ref);
@@ -328,7 +340,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
328 RA_DBGX((as, "remat $i $r", ir, r)); 340 RA_DBGX((as, "remat $i $r", ir, r));
329#if !LJ_SOFTFP 341#if !LJ_SOFTFP
330 if (ir->o == IR_KNUM) { 342 if (ir->o == IR_KNUM) {
331 emit_loadn(as, r, ir_knum(ir)); 343 emit_loadk64(as, r, ir);
332 } else 344 } else
333#endif 345#endif
334 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 346 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@@ -336,10 +348,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
336 emit_getgl(as, r, jit_base); 348 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 349 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 350 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 351 emit_getgl(as, r, cur_L);
340#if LJ_64 352#if LJ_64
341 } else if (ir->o == IR_KINT64) { 353 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 354 emit_loadu64(as, r, ir_kint64(ir)->u64);
355#if LJ_GC64
356 } else if (ir->o == IR_KGC) {
357 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
358 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
359 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
360#endif
343#endif 361#endif
344 } else { 362 } else {
345 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 363 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -512,7 +530,7 @@ static void ra_evictk(ASMState *as)
512 530
513#ifdef RID_NUM_KREF 531#ifdef RID_NUM_KREF
514/* Allocate a register for a constant. */ 532/* Allocate a register for a constant. */
515static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 533static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
516{ 534{
517 /* First try to find a register which already holds the same constant. */ 535 /* First try to find a register which already holds the same constant. */
518 RegSet pick, work = ~as->freeset & RSET_GPR; 536 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -521,9 +539,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
521 IRRef ref; 539 IRRef ref;
522 r = rset_pickbot(work); 540 r = rset_pickbot(work);
523 ref = regcost_ref(as->cost[r]); 541 ref = regcost_ref(as->cost[r]);
542#if LJ_64
543 if (ref < ASMREF_L) {
544 if (ra_iskref(ref)) {
545 if (k == ra_krefk(as, ref))
546 return r;
547 } else {
548 IRIns *ir = IR(ref);
549 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
550#if LJ_GC64
551 (ir->o == IR_KINT && k == ir->i) ||
552 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
553 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
554 k == (intptr_t)ir_kptr(ir))
555#else
556 (ir->o != IR_KINT64 && k == ir->i)
557#endif
558 )
559 return r;
560 }
561 }
562#else
524 if (ref < ASMREF_L && 563 if (ref < ASMREF_L &&
525 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 564 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
526 return r; 565 return r;
566#endif
527 rset_clear(work, r); 567 rset_clear(work, r);
528 } 568 }
529 pick = as->freeset & allow; 569 pick = as->freeset & allow;
@@ -543,7 +583,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
543} 583}
544 584
545/* Allocate a specific register for a constant. */ 585/* Allocate a specific register for a constant. */
546static void ra_allockreg(ASMState *as, int32_t k, Reg r) 586static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
547{ 587{
548 Reg kr = ra_allock(as, k, RID2RSET(r)); 588 Reg kr = ra_allock(as, k, RID2RSET(r));
549 if (kr != r) { 589 if (kr != r) {
@@ -613,10 +653,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
613 return r; 653 return r;
614} 654}
615 655
656/* Add a register rename to the IR. */
657static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
658{
659 IRRef ren;
660 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
661 ren = tref_ref(lj_ir_emit(as->J));
662 as->J->cur.ir[ren].r = (uint8_t)down;
663 as->J->cur.ir[ren].s = SPS_NONE;
664}
665
616/* Rename register allocation and emit move. */ 666/* Rename register allocation and emit move. */
617static void ra_rename(ASMState *as, Reg down, Reg up) 667static void ra_rename(ASMState *as, Reg down, Reg up)
618{ 668{
619 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 669 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
620 IRIns *ir = IR(ref); 670 IRIns *ir = IR(ref);
621 ir->r = (uint8_t)up; 671 ir->r = (uint8_t)up;
622 as->cost[down] = 0; 672 as->cost[down] = 0;
@@ -629,11 +679,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
629 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 679 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
630 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 680 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
631 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 681 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 682 ra_addrename(as, down, ref, as->snapno);
633 ren = tref_ref(lj_ir_emit(as->J));
634 as->ir = as->T->ir; /* The IR may have been reallocated. */
635 IR(ren)->r = (uint8_t)down;
636 IR(ren)->s = SPS_NONE;
637 } 683 }
638} 684}
639 685
@@ -683,18 +729,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
683 if (ra_noreg(left)) { 729 if (ra_noreg(left)) {
684 if (irref_isk(lref)) { 730 if (irref_isk(lref)) {
685 if (ir->o == IR_KNUM) { 731 if (ir->o == IR_KNUM) {
686 cTValue *tv = ir_knum(ir);
687 /* FP remat needs a load except for +0. Still better than eviction. */ 732 /* FP remat needs a load except for +0. Still better than eviction. */
688 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 733 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
689 emit_loadn(as, dest, tv); 734 emit_loadk64(as, dest, ir);
690 return; 735 return;
691 } 736 }
692#if LJ_64 737#if LJ_64
693 } else if (ir->o == IR_KINT64) { 738 } else if (ir->o == IR_KINT64) {
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 739 emit_loadk64(as, dest, ir);
740 return;
741#if LJ_GC64
742 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
743 emit_loadk64(as, dest, ir);
695 return; 744 return;
696#endif 745#endif
697 } else { 746#endif
747 } else if (ir->o != IR_KPRI) {
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 748 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 749 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
700 emit_loadi(as, dest, ir->i); 750 emit_loadi(as, dest, ir->i);
@@ -935,7 +985,7 @@ static void asm_snap_prep(ASMState *as)
935 } else { 985 } else {
936 /* Process any renames above the highwater mark. */ 986 /* Process any renames above the highwater mark. */
937 for (; as->snaprename < as->T->nins; as->snaprename++) { 987 for (; as->snaprename < as->T->nins; as->snaprename++) {
938 IRIns *ir = IR(as->snaprename); 988 IRIns *ir = &as->T->ir[as->snaprename];
939 if (asm_snap_checkrename(as, ir->op1)) 989 if (asm_snap_checkrename(as, ir->op1))
940 ir->op2 = REF_BIAS-1; /* Kill rename. */ 990 ir->op2 = REF_BIAS-1; /* Kill rename. */
941 } 991 }
@@ -944,44 +994,6 @@ static void asm_snap_prep(ASMState *as)
944 994
945/* -- Miscellaneous helpers ----------------------------------------------- */ 995/* -- Miscellaneous helpers ----------------------------------------------- */
946 996
947/* Collect arguments from CALL* and CARG instructions. */
948static void asm_collectargs(ASMState *as, IRIns *ir,
949 const CCallInfo *ci, IRRef *args)
950{
951 uint32_t n = CCI_NARGS(ci);
952 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
953 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
954 while (n-- > 1) {
955 ir = IR(ir->op1);
956 lua_assert(ir->o == IR_CARG);
957 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
958 }
959 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
960 lua_assert(IR(ir->op1)->o != IR_CARG);
961}
962
963/* Reconstruct CCallInfo flags for CALLX*. */
964static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
965{
966 uint32_t nargs = 0;
967 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
968 IRIns *ira = IR(ir->op1);
969 nargs++;
970 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
971 }
972#if LJ_HASFFI
973 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
974 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
975 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
976 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
977#if LJ_TARGET_X86
978 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
979#endif
980 }
981#endif
982 return (nargs | (ir->t.irt << CCI_OTSHIFT));
983}
984
985/* Calculate stack adjustment. */ 997/* Calculate stack adjustment. */
986static int32_t asm_stack_adjust(ASMState *as) 998static int32_t asm_stack_adjust(ASMState *as)
987{ 999{
@@ -1066,6 +1078,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1066 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1078 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1067} 1079}
1068 1080
1081/* -- Buffer operations --------------------------------------------------- */
1082
1083static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1084
1085static void asm_bufhdr(ASMState *as, IRIns *ir)
1086{
1087 Reg sb = ra_dest(as, ir, RSET_GPR);
1088 if ((ir->op2 & IRBUFHDR_APPEND)) {
1089 /* Rematerialize const buffer pointer instead of likely spill. */
1090 IRIns *irp = IR(ir->op1);
1091 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1092 (irp == ir-2 && !ra_used(ir-1)))) {
1093 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1094 irp = IR(irp->op1);
1095 if (irref_isk(irp->op1)) {
1096 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1097 ir = irp;
1098 }
1099 }
1100 } else {
1101 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1102 /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1103 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1104 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1105 }
1106#if LJ_TARGET_X86ORX64
1107 ra_left(as, sb, ir->op1);
1108#else
1109 ra_leftov(as, sb, ir->op1);
1110#endif
1111}
1112
1113static void asm_bufput(ASMState *as, IRIns *ir)
1114{
1115 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1116 IRRef args[3];
1117 IRIns *irs;
1118 int kchar = -1;
1119 args[0] = ir->op1; /* SBuf * */
1120 args[1] = ir->op2; /* GCstr * */
1121 irs = IR(ir->op2);
1122 lua_assert(irt_isstr(irs->t));
1123 if (irs->o == IR_KGC) {
1124 GCstr *s = ir_kstr(irs);
1125 if (s->len == 1) { /* Optimize put of single-char string constant. */
1126 kchar = strdata(s)[0];
1127 args[1] = ASMREF_TMP1; /* int, truncated to char */
1128 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1129 }
1130 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1131 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1132 if (irs->op2 == IRTOSTR_NUM) {
1133 args[1] = ASMREF_TMP1; /* TValue * */
1134 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1135 } else {
1136 lua_assert(irt_isinteger(IR(irs->op1)->t));
1137 args[1] = irs->op1; /* int */
1138 if (irs->op2 == IRTOSTR_INT)
1139 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1140 else
1141 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1142 }
1143 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1144 args[1] = irs->op1; /* const void * */
1145 args[2] = irs->op2; /* MSize */
1146 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1147 }
1148 }
1149 asm_setupresult(as, ir, ci); /* SBuf * */
1150 asm_gencall(as, ci, args);
1151 if (args[1] == ASMREF_TMP1) {
1152 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1153 if (kchar == -1)
1154 asm_tvptr(as, tmp, irs->op1);
1155 else
1156 ra_allockreg(as, kchar, tmp);
1157 }
1158}
1159
1160static void asm_bufstr(ASMState *as, IRIns *ir)
1161{
1162 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1163 IRRef args[1];
1164 args[0] = ir->op1; /* SBuf *sb */
1165 as->gcsteps++;
1166 asm_setupresult(as, ir, ci); /* GCstr * */
1167 asm_gencall(as, ci, args);
1168}
1169
1170/* -- Type conversions ---------------------------------------------------- */
1171
1172static void asm_tostr(ASMState *as, IRIns *ir)
1173{
1174 const CCallInfo *ci;
1175 IRRef args[2];
1176 args[0] = ASMREF_L;
1177 as->gcsteps++;
1178 if (ir->op2 == IRTOSTR_NUM) {
1179 args[1] = ASMREF_TMP1; /* cTValue * */
1180 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1181 } else {
1182 args[1] = ir->op1; /* int32_t k */
1183 if (ir->op2 == IRTOSTR_INT)
1184 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1185 else
1186 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1187 }
1188 asm_setupresult(as, ir, ci); /* GCstr * */
1189 asm_gencall(as, ci, args);
1190 if (ir->op2 == IRTOSTR_NUM)
1191 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1192}
1193
1194#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1195static void asm_conv64(ASMState *as, IRIns *ir)
1196{
1197 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1198 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1199 IRCallID id;
1200 IRRef args[2];
1201 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1202 args[LJ_BE] = (ir-1)->op1;
1203 args[LJ_LE] = ir->op1;
1204 if (st == IRT_NUM || st == IRT_FLOAT) {
1205 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1206 ir--;
1207 } else {
1208 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1209 }
1210 {
1211#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1212 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1213 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1214#else
1215 const CCallInfo *ci = &lj_ir_callinfo[id];
1216#endif
1217 asm_setupresult(as, ir, ci);
1218 asm_gencall(as, ci, args);
1219 }
1220}
1221#endif
1222
1223/* -- Memory references --------------------------------------------------- */
1224
1225static void asm_newref(ASMState *as, IRIns *ir)
1226{
1227 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1228 IRRef args[3];
1229 if (ir->r == RID_SINK)
1230 return;
1231 args[0] = ASMREF_L; /* lua_State *L */
1232 args[1] = ir->op1; /* GCtab *t */
1233 args[2] = ASMREF_TMP1; /* cTValue *key */
1234 asm_setupresult(as, ir, ci); /* TValue * */
1235 asm_gencall(as, ci, args);
1236 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1237}
1238
1239static void asm_lref(ASMState *as, IRIns *ir)
1240{
1241 Reg r = ra_dest(as, ir, RSET_GPR);
1242#if LJ_TARGET_X86ORX64
1243 ra_left(as, r, ASMREF_L);
1244#else
1245 ra_leftov(as, r, ASMREF_L);
1246#endif
1247}
1248
1249/* -- Calls --------------------------------------------------------------- */
1250
1251/* Collect arguments from CALL* and CARG instructions. */
1252static void asm_collectargs(ASMState *as, IRIns *ir,
1253 const CCallInfo *ci, IRRef *args)
1254{
1255 uint32_t n = CCI_XNARGS(ci);
1256 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1257 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1258 while (n-- > 1) {
1259 ir = IR(ir->op1);
1260 lua_assert(ir->o == IR_CARG);
1261 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1262 }
1263 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1264 lua_assert(IR(ir->op1)->o != IR_CARG);
1265}
1266
1267/* Reconstruct CCallInfo flags for CALLX*. */
1268static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1269{
1270 uint32_t nargs = 0;
1271 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1272 IRIns *ira = IR(ir->op1);
1273 nargs++;
1274 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1275 }
1276#if LJ_HASFFI
1277 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1278 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1279 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1280 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1281#if LJ_TARGET_X86
1282 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1283#endif
1284 }
1285#endif
1286 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1287}
1288
1289static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1290{
1291 const CCallInfo *ci = &lj_ir_callinfo[id];
1292 IRRef args[2];
1293 args[0] = ir->op1;
1294 args[1] = ir->op2;
1295 asm_setupresult(as, ir, ci);
1296 asm_gencall(as, ci, args);
1297}
1298
1299static void asm_call(ASMState *as, IRIns *ir)
1300{
1301 IRRef args[CCI_NARGS_MAX];
1302 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1303 asm_collectargs(as, ir, ci, args);
1304 asm_setupresult(as, ir, ci);
1305 asm_gencall(as, ci, args);
1306}
1307
1308#if !LJ_SOFTFP
1309static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1310{
1311 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1312 IRRef args[2];
1313 args[0] = lref;
1314 args[1] = rref;
1315 asm_setupresult(as, ir, ci);
1316 asm_gencall(as, ci, args);
1317}
1318
1319static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1320{
1321 IRIns *irp = IR(ir->op1);
1322 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1323 IRIns *irpp = IR(irp->op1);
1324 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1325 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1326 asm_fppow(as, ir, irpp->op1, irp->op2);
1327 return 1;
1328 }
1329 }
1330 return 0;
1331}
1332#endif
1333
1069/* -- PHI and loop handling ----------------------------------------------- */ 1334/* -- PHI and loop handling ----------------------------------------------- */
1070 1335
1071/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1336/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1251,12 +1516,7 @@ static void asm_phi_fixup(ASMState *as)
1251 irt_clearmark(ir->t); 1516 irt_clearmark(ir->t);
1252 /* Left PHI gained a spill slot before the loop? */ 1517 /* Left PHI gained a spill slot before the loop? */
1253 if (ra_hasspill(ir->s)) { 1518 if (ra_hasspill(ir->s)) {
1254 IRRef ren; 1519 ra_addrename(as, r, lref, as->loopsnapno);
1255 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1256 ren = tref_ref(lj_ir_emit(as->J));
1257 as->ir = as->T->ir; /* The IR may have been reallocated. */
1258 IR(ren)->r = (uint8_t)r;
1259 IR(ren)->s = SPS_NONE;
1260 } 1520 }
1261 } 1521 }
1262 rset_clear(work, r); 1522 rset_clear(work, r);
@@ -1331,6 +1591,8 @@ static void asm_loop(ASMState *as)
1331#include "lj_asm_x86.h" 1591#include "lj_asm_x86.h"
1332#elif LJ_TARGET_ARM 1592#elif LJ_TARGET_ARM
1333#include "lj_asm_arm.h" 1593#include "lj_asm_arm.h"
1594#elif LJ_TARGET_ARM64
1595#include "lj_asm_arm64.h"
1334#elif LJ_TARGET_PPC 1596#elif LJ_TARGET_PPC
1335#include "lj_asm_ppc.h" 1597#include "lj_asm_ppc.h"
1336#elif LJ_TARGET_MIPS 1598#elif LJ_TARGET_MIPS
@@ -1339,6 +1601,136 @@ static void asm_loop(ASMState *as)
1339#error "Missing assembler for target CPU" 1601#error "Missing assembler for target CPU"
1340#endif 1602#endif
1341 1603
1604/* -- Instruction dispatch ------------------------------------------------ */
1605
1606/* Assemble a single instruction. */
1607static void asm_ir(ASMState *as, IRIns *ir)
1608{
1609 switch ((IROp)ir->o) {
1610 /* Miscellaneous ops. */
1611 case IR_LOOP: asm_loop(as); break;
1612 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1613 case IR_USE:
1614 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1615 case IR_PHI: asm_phi(as, ir); break;
1616 case IR_HIOP: asm_hiop(as, ir); break;
1617 case IR_GCSTEP: asm_gcstep(as, ir); break;
1618 case IR_PROF: asm_prof(as, ir); break;
1619
1620 /* Guarded assertions. */
1621 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1622 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1623 case IR_ABC:
1624 asm_comp(as, ir);
1625 break;
1626 case IR_EQ: case IR_NE:
1627 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1628 as->curins--;
1629 asm_href(as, ir-1, (IROp)ir->o);
1630 } else {
1631 asm_equal(as, ir);
1632 }
1633 break;
1634
1635 case IR_RETF: asm_retf(as, ir); break;
1636
1637 /* Bit ops. */
1638 case IR_BNOT: asm_bnot(as, ir); break;
1639 case IR_BSWAP: asm_bswap(as, ir); break;
1640 case IR_BAND: asm_band(as, ir); break;
1641 case IR_BOR: asm_bor(as, ir); break;
1642 case IR_BXOR: asm_bxor(as, ir); break;
1643 case IR_BSHL: asm_bshl(as, ir); break;
1644 case IR_BSHR: asm_bshr(as, ir); break;
1645 case IR_BSAR: asm_bsar(as, ir); break;
1646 case IR_BROL: asm_brol(as, ir); break;
1647 case IR_BROR: asm_bror(as, ir); break;
1648
1649 /* Arithmetic ops. */
1650 case IR_ADD: asm_add(as, ir); break;
1651 case IR_SUB: asm_sub(as, ir); break;
1652 case IR_MUL: asm_mul(as, ir); break;
1653 case IR_MOD: asm_mod(as, ir); break;
1654 case IR_NEG: asm_neg(as, ir); break;
1655#if LJ_SOFTFP
1656 case IR_DIV: case IR_POW: case IR_ABS:
1657 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1658 lua_assert(0); /* Unused for LJ_SOFTFP. */
1659 break;
1660#else
1661 case IR_DIV: asm_div(as, ir); break;
1662 case IR_POW: asm_pow(as, ir); break;
1663 case IR_ABS: asm_abs(as, ir); break;
1664 case IR_ATAN2: asm_atan2(as, ir); break;
1665 case IR_LDEXP: asm_ldexp(as, ir); break;
1666 case IR_FPMATH: asm_fpmath(as, ir); break;
1667 case IR_TOBIT: asm_tobit(as, ir); break;
1668#endif
1669 case IR_MIN: asm_min(as, ir); break;
1670 case IR_MAX: asm_max(as, ir); break;
1671
1672 /* Overflow-checking arithmetic ops. */
1673 case IR_ADDOV: asm_addov(as, ir); break;
1674 case IR_SUBOV: asm_subov(as, ir); break;
1675 case IR_MULOV: asm_mulov(as, ir); break;
1676
1677 /* Memory references. */
1678 case IR_AREF: asm_aref(as, ir); break;
1679 case IR_HREF: asm_href(as, ir, 0); break;
1680 case IR_HREFK: asm_hrefk(as, ir); break;
1681 case IR_NEWREF: asm_newref(as, ir); break;
1682 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1683 case IR_FREF: asm_fref(as, ir); break;
1684 case IR_STRREF: asm_strref(as, ir); break;
1685 case IR_LREF: asm_lref(as, ir); break;
1686
1687 /* Loads and stores. */
1688 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1689 asm_ahuvload(as, ir);
1690 break;
1691 case IR_FLOAD: asm_fload(as, ir); break;
1692 case IR_XLOAD: asm_xload(as, ir); break;
1693 case IR_SLOAD: asm_sload(as, ir); break;
1694
1695 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1696 case IR_FSTORE: asm_fstore(as, ir); break;
1697 case IR_XSTORE: asm_xstore(as, ir); break;
1698
1699 /* Allocations. */
1700 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1701 case IR_TNEW: asm_tnew(as, ir); break;
1702 case IR_TDUP: asm_tdup(as, ir); break;
1703 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1704
1705 /* Buffer operations. */
1706 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1707 case IR_BUFPUT: asm_bufput(as, ir); break;
1708 case IR_BUFSTR: asm_bufstr(as, ir); break;
1709
1710 /* Write barriers. */
1711 case IR_TBAR: asm_tbar(as, ir); break;
1712 case IR_OBAR: asm_obar(as, ir); break;
1713
1714 /* Type conversions. */
1715 case IR_CONV: asm_conv(as, ir); break;
1716 case IR_TOSTR: asm_tostr(as, ir); break;
1717 case IR_STRTO: asm_strto(as, ir); break;
1718
1719 /* Calls. */
1720 case IR_CALLA:
1721 as->gcsteps++;
1722 /* fallthrough */
1723 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1724 case IR_CALLXS: asm_callx(as, ir); break;
1725 case IR_CARG: break;
1726
1727 default:
1728 setintV(&as->J->errinfo, ir->o);
1729 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1730 break;
1731 }
1732}
1733
1342/* -- Head of trace ------------------------------------------------------- */ 1734/* -- Head of trace ------------------------------------------------------- */
1343 1735
1344/* Head of a root trace. */ 1736/* Head of a root trace. */
@@ -1537,7 +1929,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1537 SnapEntry sn = map[n-1]; 1929 SnapEntry sn = map[n-1];
1538 if ((sn & SNAP_FRAME)) { 1930 if ((sn & SNAP_FRAME)) {
1539 *gotframe = 1; 1931 *gotframe = 1;
1540 return snap_slot(sn); 1932 return snap_slot(sn) - LJ_FR2;
1541 } 1933 }
1542 } 1934 }
1543 return 0; 1935 return 0;
@@ -1557,19 +1949,23 @@ static void asm_tail_link(ASMState *as)
1557 1949
1558 if (as->T->link == 0) { 1950 if (as->T->link == 0) {
1559 /* Setup fixed registers for exit to interpreter. */ 1951 /* Setup fixed registers for exit to interpreter. */
1560 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 1952 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1561 int32_t mres; 1953 int32_t mres;
1562 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 1954 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1563 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 1955 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1564 if (bc_isret(bc_op(*retpc))) 1956 if (bc_isret(bc_op(*retpc)))
1565 pc = retpc; 1957 pc = retpc;
1566 } 1958 }
1959#if LJ_GC64
1960 emit_loadu64(as, RID_LPC, u64ptr(pc));
1961#else
1567 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1962 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1568 ra_allockreg(as, i32ptr(pc), RID_LPC); 1963 ra_allockreg(as, i32ptr(pc), RID_LPC);
1569 mres = (int32_t)(snap->nslots - baseslot); 1964#endif
1965 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1570 switch (bc_op(*pc)) { 1966 switch (bc_op(*pc)) {
1571 case BC_CALLM: case BC_CALLMT: 1967 case BC_CALLM: case BC_CALLMT:
1572 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 1968 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1573 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1969 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1574 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1970 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1575 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1971 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1581,6 +1977,11 @@ static void asm_tail_link(ASMState *as)
1581 } 1977 }
1582 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 1978 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1583 1979
1980 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
1981 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
1982 IR(as->J->ktrace)->o = IR_KGC;
1983 }
1984
1584 /* Sync the interpreter state with the on-trace state. */ 1985 /* Sync the interpreter state with the on-trace state. */
1585 asm_stack_restore(as, snap); 1986 asm_stack_restore(as, snap);
1586 1987
@@ -1606,17 +2007,22 @@ static void asm_setup_regsp(ASMState *as)
1606 ra_setup(as); 2007 ra_setup(as);
1607 2008
1608 /* Clear reg/sp for constants. */ 2009 /* Clear reg/sp for constants. */
1609 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2010 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1610 ir->prev = REGSP_INIT; 2011 ir->prev = REGSP_INIT;
2012 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2013#if LJ_GC64
2014 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2015#else
2016 /* Make life easier for backends by putting address of constant in i. */
2017 ir->i = (int32_t)(intptr_t)(ir+1);
2018#endif
2019 ir++;
2020 }
2021 }
1611 2022
1612 /* REF_BASE is used for implicit references to the BASE register. */ 2023 /* REF_BASE is used for implicit references to the BASE register. */
1613 lastir->prev = REGSP_HINT(RID_BASE); 2024 lastir->prev = REGSP_HINT(RID_BASE);
1614 2025
1615 ir = IR(nins-1);
1616 if (ir->o == IR_RENAME) {
1617 do { ir--; nins--; } while (ir->o == IR_RENAME);
1618 T->nins = nins; /* Remove any renames left over from ASM restart. */
1619 }
1620 as->snaprename = nins; 2026 as->snaprename = nins;
1621 as->snapref = nins; 2027 as->snapref = nins;
1622 as->snapno = T->nsnap; 2028 as->snapno = T->nsnap;
@@ -1677,7 +2083,7 @@ static void asm_setup_regsp(ASMState *as)
1677 as->modset |= RSET_SCRATCH; 2083 as->modset |= RSET_SCRATCH;
1678 continue; 2084 continue;
1679 } 2085 }
1680 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2086 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1681 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2087 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1682 ir->prev = asm_setup_call_slots(as, ir, ci); 2088 ir->prev = asm_setup_call_slots(as, ir, ci);
1683 if (inloop) 2089 if (inloop)
@@ -1722,10 +2128,20 @@ static void asm_setup_regsp(ASMState *as)
1722 /* fallthrough */ 2128 /* fallthrough */
1723#endif 2129#endif
1724 /* C calls evict all scratch regs and return results in RID_RET. */ 2130 /* C calls evict all scratch regs and return results in RID_RET. */
1725 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2131 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1726 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2132 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1727 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2133 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2134#if LJ_TARGET_X86 && LJ_HASFFI
2135 if (0) {
2136 case IR_CNEW:
2137 if (ir->op2 != REF_NIL && as->evenspill < 4)
2138 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2139 }
2140#else
2141 case IR_CNEW:
2142#endif
2143 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2144 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2145 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2146 if (inloop)
1731 as->modset = RSET_SCRATCH; 2147 as->modset = RSET_SCRATCH;
@@ -1734,21 +2150,26 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2150 if (inloop)
1735 as->modset = RSET_SCRATCH; 2151 as->modset = RSET_SCRATCH;
1736 break; 2152 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2153#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2154 case IR_ATAN2:
2155#if LJ_TARGET_X86
2156 if (as->evenspill < 4) /* Leave room to call atan2(). */
2157 as->evenspill = 4;
2158#endif
2159#if !LJ_TARGET_X86ORX64
2160 case IR_LDEXP:
2161#endif
1739#endif 2162#endif
1740 case IR_POW: 2163 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2164 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2165 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2166 as->modset |= RSET_SCRATCH;
2167#if LJ_TARGET_X86
2168 break;
1746#else 2169#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2170 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2171 continue;
2172#endif
1752 } 2173 }
1753 /* fallthrough for integer POW */ 2174 /* fallthrough for integer POW */
1754 case IR_DIV: case IR_MOD: 2175 case IR_DIV: case IR_MOD:
@@ -1761,31 +2182,33 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2182 break;
1762 case IR_FPMATH: 2183 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2184#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2185 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2186 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2187 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2188 if (inloop)
2189 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2190 continue;
2191 }
2192 break;
2193 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2194 if (as->evenspill < 4) /* Leave room to call pow(). */
1768 as->evenspill = 4; 2195 as->evenspill = 4;
1769#endif
1770 if (inloop)
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2196 }
2197#endif
2198 if (inloop)
2199 as->modset |= RSET_SCRATCH;
2200#if LJ_TARGET_X86
1779 break; 2201 break;
1780#else 2202#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2203 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2204 continue;
1785#endif 2205#endif
1786#if LJ_TARGET_X86ORX64 2206#if LJ_TARGET_X86ORX64
1787 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2207 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1788 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2208 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2209 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2210 break;
2211 case IR_BROL: case IR_BROR:
1789 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2212 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1790 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2213 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1791 if (inloop) 2214 if (inloop)
@@ -1831,14 +2254,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1831 ASMState *as = &as_; 2254 ASMState *as = &as_;
1832 MCode *origtop; 2255 MCode *origtop;
1833 2256
2257 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2258 {
2259 IRRef nins = T->nins;
2260 IRIns *ir = &T->ir[nins-1];
2261 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2262 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2263 T->nins = nins;
2264 }
2265 }
2266
1834 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2267 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1835 J->cur.nins = lj_ir_nextins(J); 2268 /* This also allows one RENAME to be added without reallocating curfinal. */
1836 J->cur.ir[J->cur.nins].o = IR_NOP; 2269 as->orignins = lj_ir_nextins(J);
2270 J->cur.ir[as->orignins].o = IR_NOP;
1837 2271
1838 /* Setup initial state. Copy some fields to reduce indirections. */ 2272 /* Setup initial state. Copy some fields to reduce indirections. */
1839 as->J = J; 2273 as->J = J;
1840 as->T = T; 2274 as->T = T;
1841 as->ir = T->ir; 2275 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1842 as->flags = J->flags; 2276 as->flags = J->flags;
1843 as->loopref = J->loopref; 2277 as->loopref = J->loopref;
1844 as->realign = NULL; 2278 as->realign = NULL;
@@ -1851,12 +2285,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1851 as->mclim = as->mcbot + MCLIM_REDZONE; 2285 as->mclim = as->mcbot + MCLIM_REDZONE;
1852 asm_setup_target(as); 2286 asm_setup_target(as);
1853 2287
1854 do { 2288 /*
2289 ** This is a loop, because the MCode may have to be (re-)assembled
2290 ** multiple times:
2291 **
2292 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2293 ** backend wants the MCode to be aligned differently.
2294 **
2295 ** This is currently only the case on x86/x64, where small loops get
2296 ** an aligned loop body plus a short branch. Not much effort is wasted,
2297 ** because the abort happens very quickly and only once.
2298 **
2299 ** 2. The IR is immovable, since the MCode embeds pointers to various
2300 ** constants inside the IR. But RENAMEs may need to be added to the IR
2301 ** during assembly, which might grow and reallocate the IR. We check
2302 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2303 ** copy (in J->curfinal.ir) and try again.
2304 **
2305 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2306 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2307 ** always have one spare slot in the IR (see above), which means we
2308 ** have to redo the assembly for only ~2% of all traces.
2309 **
2310 ** Very, very rarely, this needs to be done repeatedly, since the
2311 ** location of constants inside the IR (actually, reachability from
2312 ** a global pointer) may affect register allocation and thus the
2313 ** number of RENAMEs.
2314 */
2315 for (;;) {
1855 as->mcp = as->mctop; 2316 as->mcp = as->mctop;
1856#ifdef LUA_USE_ASSERT 2317#ifdef LUA_USE_ASSERT
1857 as->mcp_prev = as->mcp; 2318 as->mcp_prev = as->mcp;
1858#endif 2319#endif
1859 as->curins = T->nins; 2320 as->ir = J->curfinal->ir; /* Use the copied IR. */
2321 as->curins = J->cur.nins = as->orignins;
2322
1860 RA_DBG_START(); 2323 RA_DBG_START();
1861 RA_DBGX((as, "===== STOP =====")); 2324 RA_DBGX((as, "===== STOP ====="));
1862 2325
@@ -1884,22 +2347,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1884 checkmclim(as); 2347 checkmclim(as);
1885 asm_ir(as, ir); 2348 asm_ir(as, ir);
1886 } 2349 }
1887 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1888 2350
1889 /* Emit head of trace. */ 2351 if (as->realign && J->curfinal->nins >= T->nins)
1890 RA_DBG_REF(); 2352 continue; /* Retry in case only the MCode needs to be realigned. */
1891 checkmclim(as); 2353
1892 if (as->gcsteps > 0) { 2354 /* Emit head of trace. */
1893 as->curins = as->T->snap[0].ref; 2355 RA_DBG_REF();
1894 asm_snap_prep(as); /* The GC check is a guard. */ 2356 checkmclim(as);
1895 asm_gc_check(as); 2357 if (as->gcsteps > 0) {
2358 as->curins = as->T->snap[0].ref;
2359 asm_snap_prep(as); /* The GC check is a guard. */
2360 asm_gc_check(as);
2361 as->curins = as->stopins;
2362 }
2363 ra_evictk(as);
2364 if (as->parent)
2365 asm_head_side(as);
2366 else
2367 asm_head_root(as);
2368 asm_phi_fixup(as);
2369
2370 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2371 lua_assert(J->curfinal->nk == T->nk);
2372 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2373 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2374 T->nins = J->curfinal->nins;
2375 break; /* Done. */
2376 }
2377
2378 /* Otherwise try again with a bigger IR. */
2379 lj_trace_free(J2G(J), J->curfinal);
2380 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2381 J->curfinal = lj_trace_alloc(J->L, T);
2382 as->realign = NULL;
1896 } 2383 }
1897 ra_evictk(as);
1898 if (as->parent)
1899 asm_head_side(as);
1900 else
1901 asm_head_root(as);
1902 asm_phi_fixup(as);
1903 2384
1904 RA_DBGX((as, "===== START ====")); 2385 RA_DBGX((as, "===== START ===="));
1905 RA_DBG_FLUSH(); 2386 RA_DBG_FLUSH();
@@ -1912,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1912 if (!as->loopref) 2393 if (!as->loopref)
1913 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2394 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1914 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2395 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2396#if LJ_TARGET_MCODE_FIXUP
2397 asm_mcode_fixup(T->mcode, T->szmcode);
2398#endif
1915 lj_mcode_sync(T->mcode, origtop); 2399 lj_mcode_sync(T->mcode, origtop);
1916} 2400}
1917 2401