aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c733
1 files changed, 613 insertions, 120 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9b17421e..68d28fb0 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -90,7 +90,7 @@ typedef struct ASMState {
90 MCode *realign; /* Realign loop if not NULL. */ 90 MCode *realign; /* Realign loop if not NULL. */
91 91
92#ifdef RID_NUM_KREF 92#ifdef RID_NUM_KREF
93 int32_t krefk[RID_NUM_KREF]; 93 intptr_t krefk[RID_NUM_KREF];
94#endif 94#endif
95 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 95 IRRef1 phireg[RID_MAX]; /* PHI register references. */
96 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 96 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
@@ -143,7 +143,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
143#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 143#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
144#define ra_krefk(as, ref) (as->krefk[(ref)]) 144#define ra_krefk(as, ref) (as->krefk[(ref)])
145 145
146static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 146static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
147{ 147{
148 IRRef ref = (IRRef)(r - RID_MIN_KREF); 148 IRRef ref = (IRRef)(r - RID_MIN_KREF);
149 as->krefk[ref] = k; 149 as->krefk[ref] = k;
@@ -170,6 +170,8 @@ IRFLDEF(FLOFS)
170#include "lj_emit_x86.h" 170#include "lj_emit_x86.h"
171#elif LJ_TARGET_ARM 171#elif LJ_TARGET_ARM
172#include "lj_emit_arm.h" 172#include "lj_emit_arm.h"
173#elif LJ_TARGET_ARM64
174#include "lj_emit_arm64.h"
173#elif LJ_TARGET_PPC 175#elif LJ_TARGET_PPC
174#include "lj_emit_ppc.h" 176#include "lj_emit_ppc.h"
175#elif LJ_TARGET_MIPS 177#elif LJ_TARGET_MIPS
@@ -178,6 +180,12 @@ IRFLDEF(FLOFS)
178#error "Missing instruction emitter for target CPU" 180#error "Missing instruction emitter for target CPU"
179#endif 181#endif
180 182
183/* Generic load/store of register from/to stack slot. */
184#define emit_spload(as, ir, r, ofs) \
185 emit_loadofs(as, ir, (r), RID_SP, (ofs))
186#define emit_spstore(as, ir, r, ofs) \
187 emit_storeofs(as, ir, (r), RID_SP, (ofs))
188
181/* -- Register allocator debugging ---------------------------------------- */ 189/* -- Register allocator debugging ---------------------------------------- */
182 190
183/* #define LUAJIT_DEBUG_RA */ 191/* #define LUAJIT_DEBUG_RA */
@@ -315,7 +323,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
315 lua_assert(!rset_test(as->freeset, r)); 323 lua_assert(!rset_test(as->freeset, r));
316 ra_free(as, r); 324 ra_free(as, r);
317 ra_modified(as, r); 325 ra_modified(as, r);
326#if LJ_64
327 emit_loadu64(as, r, ra_krefk(as, ref));
328#else
318 emit_loadi(as, r, ra_krefk(as, ref)); 329 emit_loadi(as, r, ra_krefk(as, ref));
330#endif
319 return r; 331 return r;
320 } 332 }
321 ir = IR(ref); 333 ir = IR(ref);
@@ -325,9 +337,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
325 ra_modified(as, r); 337 ra_modified(as, r);
326 ir->r = RID_INIT; /* Do not keep any hint. */ 338 ir->r = RID_INIT; /* Do not keep any hint. */
327 RA_DBGX((as, "remat $i $r", ir, r)); 339 RA_DBGX((as, "remat $i $r", ir, r));
328#if !LJ_SOFTFP 340#if !LJ_SOFTFP32
329 if (ir->o == IR_KNUM) { 341 if (ir->o == IR_KNUM) {
330 emit_loadn(as, r, ir_knum(ir)); 342 emit_loadk64(as, r, ir);
331 } else 343 } else
332#endif 344#endif
333 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 345 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@@ -335,10 +347,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
335 emit_getgl(as, r, jit_base); 347 emit_getgl(as, r, jit_base);
336 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 348 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
337 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 349 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
338 emit_getgl(as, r, jit_L); 350 emit_getgl(as, r, cur_L);
339#if LJ_64 351#if LJ_64
340 } else if (ir->o == IR_KINT64) { 352 } else if (ir->o == IR_KINT64) {
341 emit_loadu64(as, r, ir_kint64(ir)->u64); 353 emit_loadu64(as, r, ir_kint64(ir)->u64);
354#if LJ_GC64
355 } else if (ir->o == IR_KGC) {
356 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
357 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
358 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
359#endif
342#endif 360#endif
343 } else { 361 } else {
344 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 362 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -511,7 +529,7 @@ static void ra_evictk(ASMState *as)
511 529
512#ifdef RID_NUM_KREF 530#ifdef RID_NUM_KREF
513/* Allocate a register for a constant. */ 531/* Allocate a register for a constant. */
514static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 532static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
515{ 533{
516 /* First try to find a register which already holds the same constant. */ 534 /* First try to find a register which already holds the same constant. */
517 RegSet pick, work = ~as->freeset & RSET_GPR; 535 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -520,9 +538,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
520 IRRef ref; 538 IRRef ref;
521 r = rset_pickbot(work); 539 r = rset_pickbot(work);
522 ref = regcost_ref(as->cost[r]); 540 ref = regcost_ref(as->cost[r]);
541#if LJ_64
542 if (ref < ASMREF_L) {
543 if (ra_iskref(ref)) {
544 if (k == ra_krefk(as, ref))
545 return r;
546 } else {
547 IRIns *ir = IR(ref);
548 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
549#if LJ_GC64
550 (ir->o == IR_KINT && k == ir->i) ||
551 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
552 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
553 k == (intptr_t)ir_kptr(ir))
554#else
555 (ir->o != IR_KINT64 && k == ir->i)
556#endif
557 )
558 return r;
559 }
560 }
561#else
523 if (ref < ASMREF_L && 562 if (ref < ASMREF_L &&
524 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 563 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
525 return r; 564 return r;
565#endif
526 rset_clear(work, r); 566 rset_clear(work, r);
527 } 567 }
528 pick = as->freeset & allow; 568 pick = as->freeset & allow;
@@ -542,7 +582,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
542} 582}
543 583
544/* Allocate a specific register for a constant. */ 584/* Allocate a specific register for a constant. */
545static void ra_allockreg(ASMState *as, int32_t k, Reg r) 585static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
546{ 586{
547 Reg kr = ra_allock(as, k, RID2RSET(r)); 587 Reg kr = ra_allock(as, k, RID2RSET(r));
548 if (kr != r) { 588 if (kr != r) {
@@ -612,10 +652,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
612 return r; 652 return r;
613} 653}
614 654
655/* Add a register rename to the IR. */
656static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
657{
658 IRRef ren;
659 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
660 ren = tref_ref(lj_ir_emit(as->J));
661 as->J->cur.ir[ren].r = (uint8_t)down;
662 as->J->cur.ir[ren].s = SPS_NONE;
663}
664
615/* Rename register allocation and emit move. */ 665/* Rename register allocation and emit move. */
616static void ra_rename(ASMState *as, Reg down, Reg up) 666static void ra_rename(ASMState *as, Reg down, Reg up)
617{ 667{
618 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 668 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
619 IRIns *ir = IR(ref); 669 IRIns *ir = IR(ref);
620 ir->r = (uint8_t)up; 670 ir->r = (uint8_t)up;
621 as->cost[down] = 0; 671 as->cost[down] = 0;
@@ -628,11 +678,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
628 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 678 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
629 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 679 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
630 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 680 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
631 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 681 ra_addrename(as, down, ref, as->snapno);
632 ren = tref_ref(lj_ir_emit(as->J));
633 as->ir = as->T->ir; /* The IR may have been reallocated. */
634 IR(ren)->r = (uint8_t)down;
635 IR(ren)->s = SPS_NONE;
636 } 682 }
637} 683}
638 684
@@ -682,18 +728,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
682 if (ra_noreg(left)) { 728 if (ra_noreg(left)) {
683 if (irref_isk(lref)) { 729 if (irref_isk(lref)) {
684 if (ir->o == IR_KNUM) { 730 if (ir->o == IR_KNUM) {
685 cTValue *tv = ir_knum(ir);
686 /* FP remat needs a load except for +0. Still better than eviction. */ 731 /* FP remat needs a load except for +0. Still better than eviction. */
687 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 732 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
688 emit_loadn(as, dest, tv); 733 emit_loadk64(as, dest, ir);
689 return; 734 return;
690 } 735 }
691#if LJ_64 736#if LJ_64
692 } else if (ir->o == IR_KINT64) { 737 } else if (ir->o == IR_KINT64) {
693 emit_loadu64(as, dest, ir_kint64(ir)->u64); 738 emit_loadk64(as, dest, ir);
739 return;
740#if LJ_GC64
741 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
742 emit_loadk64(as, dest, ir);
694 return; 743 return;
695#endif 744#endif
696 } else { 745#endif
746 } else if (ir->o != IR_KPRI) {
697 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 747 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
698 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 748 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
699 emit_loadi(as, dest, ir->i); 749 emit_loadi(as, dest, ir->i);
@@ -934,7 +984,7 @@ static void asm_snap_prep(ASMState *as)
934 } else { 984 } else {
935 /* Process any renames above the highwater mark. */ 985 /* Process any renames above the highwater mark. */
936 for (; as->snaprename < as->T->nins; as->snaprename++) { 986 for (; as->snaprename < as->T->nins; as->snaprename++) {
937 IRIns *ir = IR(as->snaprename); 987 IRIns *ir = &as->T->ir[as->snaprename];
938 if (asm_snap_checkrename(as, ir->op1)) 988 if (asm_snap_checkrename(as, ir->op1))
939 ir->op2 = REF_BIAS-1; /* Kill rename. */ 989 ir->op2 = REF_BIAS-1; /* Kill rename. */
940 } 990 }
@@ -943,44 +993,6 @@ static void asm_snap_prep(ASMState *as)
943 993
944/* -- Miscellaneous helpers ----------------------------------------------- */ 994/* -- Miscellaneous helpers ----------------------------------------------- */
945 995
946/* Collect arguments from CALL* and CARG instructions. */
947static void asm_collectargs(ASMState *as, IRIns *ir,
948 const CCallInfo *ci, IRRef *args)
949{
950 uint32_t n = CCI_NARGS(ci);
951 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
952 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
953 while (n-- > 1) {
954 ir = IR(ir->op1);
955 lua_assert(ir->o == IR_CARG);
956 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
957 }
958 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
959 lua_assert(IR(ir->op1)->o != IR_CARG);
960}
961
962/* Reconstruct CCallInfo flags for CALLX*. */
963static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
964{
965 uint32_t nargs = 0;
966 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
967 IRIns *ira = IR(ir->op1);
968 nargs++;
969 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
970 }
971#if LJ_HASFFI
972 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
973 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
974 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
975 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
976#if LJ_TARGET_X86
977 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
978#endif
979 }
980#endif
981 return (nargs | (ir->t.irt << CCI_OTSHIFT));
982}
983
984/* Calculate stack adjustment. */ 996/* Calculate stack adjustment. */
985static int32_t asm_stack_adjust(ASMState *as) 997static int32_t asm_stack_adjust(ASMState *as)
986{ 998{
@@ -1004,7 +1016,11 @@ static uint32_t ir_khash(IRIns *ir)
1004 } else { 1016 } else {
1005 lua_assert(irt_isgcv(ir->t)); 1017 lua_assert(irt_isgcv(ir->t));
1006 lo = u32ptr(ir_kgc(ir)); 1018 lo = u32ptr(ir_kgc(ir));
1019#if LJ_GC64
1020 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1021#else
1007 hi = lo + HASH_BIAS; 1022 hi = lo + HASH_BIAS;
1023#endif
1008 } 1024 }
1009 return hashrot(lo, hi); 1025 return hashrot(lo, hi);
1010} 1026}
@@ -1065,6 +1081,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1065 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1081 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1066} 1082}
1067 1083
1084/* -- Buffer operations --------------------------------------------------- */
1085
1086static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1087
1088static void asm_bufhdr(ASMState *as, IRIns *ir)
1089{
1090 Reg sb = ra_dest(as, ir, RSET_GPR);
1091 if ((ir->op2 & IRBUFHDR_APPEND)) {
1092 /* Rematerialize const buffer pointer instead of likely spill. */
1093 IRIns *irp = IR(ir->op1);
1094 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1095 (irp == ir-2 && !ra_used(ir-1)))) {
1096 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1097 irp = IR(irp->op1);
1098 if (irref_isk(irp->op1)) {
1099 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1100 ir = irp;
1101 }
1102 }
1103 } else {
1104 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1105 /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1106 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1107 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1108 }
1109#if LJ_TARGET_X86ORX64
1110 ra_left(as, sb, ir->op1);
1111#else
1112 ra_leftov(as, sb, ir->op1);
1113#endif
1114}
1115
1116static void asm_bufput(ASMState *as, IRIns *ir)
1117{
1118 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1119 IRRef args[3];
1120 IRIns *irs;
1121 int kchar = -129;
1122 args[0] = ir->op1; /* SBuf * */
1123 args[1] = ir->op2; /* GCstr * */
1124 irs = IR(ir->op2);
1125 lua_assert(irt_isstr(irs->t));
1126 if (irs->o == IR_KGC) {
1127 GCstr *s = ir_kstr(irs);
1128 if (s->len == 1) { /* Optimize put of single-char string constant. */
1129 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1130 args[1] = ASMREF_TMP1; /* int, truncated to char */
1131 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1132 }
1133 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1134 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1135 if (irs->op2 == IRTOSTR_NUM) {
1136 args[1] = ASMREF_TMP1; /* TValue * */
1137 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1138 } else {
1139 lua_assert(irt_isinteger(IR(irs->op1)->t));
1140 args[1] = irs->op1; /* int */
1141 if (irs->op2 == IRTOSTR_INT)
1142 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1143 else
1144 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1145 }
1146 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1147 args[1] = irs->op1; /* const void * */
1148 args[2] = irs->op2; /* MSize */
1149 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1150 }
1151 }
1152 asm_setupresult(as, ir, ci); /* SBuf * */
1153 asm_gencall(as, ci, args);
1154 if (args[1] == ASMREF_TMP1) {
1155 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1156 if (kchar == -129)
1157 asm_tvptr(as, tmp, irs->op1);
1158 else
1159 ra_allockreg(as, kchar, tmp);
1160 }
1161}
1162
1163static void asm_bufstr(ASMState *as, IRIns *ir)
1164{
1165 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1166 IRRef args[1];
1167 args[0] = ir->op1; /* SBuf *sb */
1168 as->gcsteps++;
1169 asm_setupresult(as, ir, ci); /* GCstr * */
1170 asm_gencall(as, ci, args);
1171}
1172
1173/* -- Type conversions ---------------------------------------------------- */
1174
1175static void asm_tostr(ASMState *as, IRIns *ir)
1176{
1177 const CCallInfo *ci;
1178 IRRef args[2];
1179 args[0] = ASMREF_L;
1180 as->gcsteps++;
1181 if (ir->op2 == IRTOSTR_NUM) {
1182 args[1] = ASMREF_TMP1; /* cTValue * */
1183 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1184 } else {
1185 args[1] = ir->op1; /* int32_t k */
1186 if (ir->op2 == IRTOSTR_INT)
1187 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1188 else
1189 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1190 }
1191 asm_setupresult(as, ir, ci); /* GCstr * */
1192 asm_gencall(as, ci, args);
1193 if (ir->op2 == IRTOSTR_NUM)
1194 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1195}
1196
1197#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1198static void asm_conv64(ASMState *as, IRIns *ir)
1199{
1200 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1201 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1202 IRCallID id;
1203 IRRef args[2];
1204 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1205 args[LJ_BE] = (ir-1)->op1;
1206 args[LJ_LE] = ir->op1;
1207 if (st == IRT_NUM || st == IRT_FLOAT) {
1208 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1209 ir--;
1210 } else {
1211 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1212 }
1213 {
1214#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1215 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1216 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1217#else
1218 const CCallInfo *ci = &lj_ir_callinfo[id];
1219#endif
1220 asm_setupresult(as, ir, ci);
1221 asm_gencall(as, ci, args);
1222 }
1223}
1224#endif
1225
1226/* -- Memory references --------------------------------------------------- */
1227
1228static void asm_newref(ASMState *as, IRIns *ir)
1229{
1230 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1231 IRRef args[3];
1232 if (ir->r == RID_SINK)
1233 return;
1234 args[0] = ASMREF_L; /* lua_State *L */
1235 args[1] = ir->op1; /* GCtab *t */
1236 args[2] = ASMREF_TMP1; /* cTValue *key */
1237 asm_setupresult(as, ir, ci); /* TValue * */
1238 asm_gencall(as, ci, args);
1239 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1240}
1241
1242static void asm_lref(ASMState *as, IRIns *ir)
1243{
1244 Reg r = ra_dest(as, ir, RSET_GPR);
1245#if LJ_TARGET_X86ORX64
1246 ra_left(as, r, ASMREF_L);
1247#else
1248 ra_leftov(as, r, ASMREF_L);
1249#endif
1250}
1251
1252/* -- Calls --------------------------------------------------------------- */
1253
1254/* Collect arguments from CALL* and CARG instructions. */
1255static void asm_collectargs(ASMState *as, IRIns *ir,
1256 const CCallInfo *ci, IRRef *args)
1257{
1258 uint32_t n = CCI_XNARGS(ci);
1259 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1260 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1261 while (n-- > 1) {
1262 ir = IR(ir->op1);
1263 lua_assert(ir->o == IR_CARG);
1264 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1265 }
1266 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1267 lua_assert(IR(ir->op1)->o != IR_CARG);
1268}
1269
1270/* Reconstruct CCallInfo flags for CALLX*. */
1271static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1272{
1273 uint32_t nargs = 0;
1274 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1275 IRIns *ira = IR(ir->op1);
1276 nargs++;
1277 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1278 }
1279#if LJ_HASFFI
1280 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1281 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1282 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1283 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1284#if LJ_TARGET_X86
1285 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1286#endif
1287 }
1288#endif
1289 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1290}
1291
1292static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1293{
1294 const CCallInfo *ci = &lj_ir_callinfo[id];
1295 IRRef args[2];
1296 args[0] = ir->op1;
1297 args[1] = ir->op2;
1298 asm_setupresult(as, ir, ci);
1299 asm_gencall(as, ci, args);
1300}
1301
1302static void asm_call(ASMState *as, IRIns *ir)
1303{
1304 IRRef args[CCI_NARGS_MAX];
1305 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1306 asm_collectargs(as, ir, ci, args);
1307 asm_setupresult(as, ir, ci);
1308 asm_gencall(as, ci, args);
1309}
1310
1311#if !LJ_SOFTFP32
1312static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1313{
1314 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1315 IRRef args[2];
1316 args[0] = lref;
1317 args[1] = rref;
1318 asm_setupresult(as, ir, ci);
1319 asm_gencall(as, ci, args);
1320}
1321
1322static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1323{
1324 IRIns *irp = IR(ir->op1);
1325 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1326 IRIns *irpp = IR(irp->op1);
1327 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1328 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1329 asm_fppow(as, ir, irpp->op1, irp->op2);
1330 return 1;
1331 }
1332 }
1333 return 0;
1334}
1335#endif
1336
1068/* -- PHI and loop handling ----------------------------------------------- */ 1337/* -- PHI and loop handling ----------------------------------------------- */
1069 1338
1070/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1339/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1250,12 +1519,7 @@ static void asm_phi_fixup(ASMState *as)
1250 irt_clearmark(ir->t); 1519 irt_clearmark(ir->t);
1251 /* Left PHI gained a spill slot before the loop? */ 1520 /* Left PHI gained a spill slot before the loop? */
1252 if (ra_hasspill(ir->s)) { 1521 if (ra_hasspill(ir->s)) {
1253 IRRef ren; 1522 ra_addrename(as, r, lref, as->loopsnapno);
1254 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1255 ren = tref_ref(lj_ir_emit(as->J));
1256 as->ir = as->T->ir; /* The IR may have been reallocated. */
1257 IR(ren)->r = (uint8_t)r;
1258 IR(ren)->s = SPS_NONE;
1259 } 1523 }
1260 } 1524 }
1261 rset_clear(work, r); 1525 rset_clear(work, r);
@@ -1330,6 +1594,8 @@ static void asm_loop(ASMState *as)
1330#include "lj_asm_x86.h" 1594#include "lj_asm_x86.h"
1331#elif LJ_TARGET_ARM 1595#elif LJ_TARGET_ARM
1332#include "lj_asm_arm.h" 1596#include "lj_asm_arm.h"
1597#elif LJ_TARGET_ARM64
1598#include "lj_asm_arm64.h"
1333#elif LJ_TARGET_PPC 1599#elif LJ_TARGET_PPC
1334#include "lj_asm_ppc.h" 1600#include "lj_asm_ppc.h"
1335#elif LJ_TARGET_MIPS 1601#elif LJ_TARGET_MIPS
@@ -1338,6 +1604,136 @@ static void asm_loop(ASMState *as)
1338#error "Missing assembler for target CPU" 1604#error "Missing assembler for target CPU"
1339#endif 1605#endif
1340 1606
1607/* -- Instruction dispatch ------------------------------------------------ */
1608
1609/* Assemble a single instruction. */
1610static void asm_ir(ASMState *as, IRIns *ir)
1611{
1612 switch ((IROp)ir->o) {
1613 /* Miscellaneous ops. */
1614 case IR_LOOP: asm_loop(as); break;
1615 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1616 case IR_USE:
1617 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1618 case IR_PHI: asm_phi(as, ir); break;
1619 case IR_HIOP: asm_hiop(as, ir); break;
1620 case IR_GCSTEP: asm_gcstep(as, ir); break;
1621 case IR_PROF: asm_prof(as, ir); break;
1622
1623 /* Guarded assertions. */
1624 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1625 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1626 case IR_ABC:
1627 asm_comp(as, ir);
1628 break;
1629 case IR_EQ: case IR_NE:
1630 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1631 as->curins--;
1632 asm_href(as, ir-1, (IROp)ir->o);
1633 } else {
1634 asm_equal(as, ir);
1635 }
1636 break;
1637
1638 case IR_RETF: asm_retf(as, ir); break;
1639
1640 /* Bit ops. */
1641 case IR_BNOT: asm_bnot(as, ir); break;
1642 case IR_BSWAP: asm_bswap(as, ir); break;
1643 case IR_BAND: asm_band(as, ir); break;
1644 case IR_BOR: asm_bor(as, ir); break;
1645 case IR_BXOR: asm_bxor(as, ir); break;
1646 case IR_BSHL: asm_bshl(as, ir); break;
1647 case IR_BSHR: asm_bshr(as, ir); break;
1648 case IR_BSAR: asm_bsar(as, ir); break;
1649 case IR_BROL: asm_brol(as, ir); break;
1650 case IR_BROR: asm_bror(as, ir); break;
1651
1652 /* Arithmetic ops. */
1653 case IR_ADD: asm_add(as, ir); break;
1654 case IR_SUB: asm_sub(as, ir); break;
1655 case IR_MUL: asm_mul(as, ir); break;
1656 case IR_MOD: asm_mod(as, ir); break;
1657 case IR_NEG: asm_neg(as, ir); break;
1658#if LJ_SOFTFP32
1659 case IR_DIV: case IR_POW: case IR_ABS:
1660 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1661 lua_assert(0); /* Unused for LJ_SOFTFP32. */
1662 break;
1663#else
1664 case IR_DIV: asm_div(as, ir); break;
1665 case IR_POW: asm_pow(as, ir); break;
1666 case IR_ABS: asm_abs(as, ir); break;
1667 case IR_ATAN2: asm_atan2(as, ir); break;
1668 case IR_LDEXP: asm_ldexp(as, ir); break;
1669 case IR_FPMATH: asm_fpmath(as, ir); break;
1670 case IR_TOBIT: asm_tobit(as, ir); break;
1671#endif
1672 case IR_MIN: asm_min(as, ir); break;
1673 case IR_MAX: asm_max(as, ir); break;
1674
1675 /* Overflow-checking arithmetic ops. */
1676 case IR_ADDOV: asm_addov(as, ir); break;
1677 case IR_SUBOV: asm_subov(as, ir); break;
1678 case IR_MULOV: asm_mulov(as, ir); break;
1679
1680 /* Memory references. */
1681 case IR_AREF: asm_aref(as, ir); break;
1682 case IR_HREF: asm_href(as, ir, 0); break;
1683 case IR_HREFK: asm_hrefk(as, ir); break;
1684 case IR_NEWREF: asm_newref(as, ir); break;
1685 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1686 case IR_FREF: asm_fref(as, ir); break;
1687 case IR_STRREF: asm_strref(as, ir); break;
1688 case IR_LREF: asm_lref(as, ir); break;
1689
1690 /* Loads and stores. */
1691 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1692 asm_ahuvload(as, ir);
1693 break;
1694 case IR_FLOAD: asm_fload(as, ir); break;
1695 case IR_XLOAD: asm_xload(as, ir); break;
1696 case IR_SLOAD: asm_sload(as, ir); break;
1697
1698 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1699 case IR_FSTORE: asm_fstore(as, ir); break;
1700 case IR_XSTORE: asm_xstore(as, ir); break;
1701
1702 /* Allocations. */
1703 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1704 case IR_TNEW: asm_tnew(as, ir); break;
1705 case IR_TDUP: asm_tdup(as, ir); break;
1706 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1707
1708 /* Buffer operations. */
1709 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1710 case IR_BUFPUT: asm_bufput(as, ir); break;
1711 case IR_BUFSTR: asm_bufstr(as, ir); break;
1712
1713 /* Write barriers. */
1714 case IR_TBAR: asm_tbar(as, ir); break;
1715 case IR_OBAR: asm_obar(as, ir); break;
1716
1717 /* Type conversions. */
1718 case IR_CONV: asm_conv(as, ir); break;
1719 case IR_TOSTR: asm_tostr(as, ir); break;
1720 case IR_STRTO: asm_strto(as, ir); break;
1721
1722 /* Calls. */
1723 case IR_CALLA:
1724 as->gcsteps++;
1725 /* fallthrough */
1726 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1727 case IR_CALLXS: asm_callx(as, ir); break;
1728 case IR_CARG: break;
1729
1730 default:
1731 setintV(&as->J->errinfo, ir->o);
1732 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1733 break;
1734 }
1735}
1736
1341/* -- Head of trace ------------------------------------------------------- */ 1737/* -- Head of trace ------------------------------------------------------- */
1342 1738
1343/* Head of a root trace. */ 1739/* Head of a root trace. */
@@ -1536,7 +1932,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1536 SnapEntry sn = map[n-1]; 1932 SnapEntry sn = map[n-1];
1537 if ((sn & SNAP_FRAME)) { 1933 if ((sn & SNAP_FRAME)) {
1538 *gotframe = 1; 1934 *gotframe = 1;
1539 return snap_slot(sn); 1935 return snap_slot(sn) - LJ_FR2;
1540 } 1936 }
1541 } 1937 }
1542 return 0; 1938 return 0;
@@ -1556,19 +1952,23 @@ static void asm_tail_link(ASMState *as)
1556 1952
1557 if (as->T->link == 0) { 1953 if (as->T->link == 0) {
1558 /* Setup fixed registers for exit to interpreter. */ 1954 /* Setup fixed registers for exit to interpreter. */
1559 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 1955 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1560 int32_t mres; 1956 int32_t mres;
1561 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 1957 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1562 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 1958 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1563 if (bc_isret(bc_op(*retpc))) 1959 if (bc_isret(bc_op(*retpc)))
1564 pc = retpc; 1960 pc = retpc;
1565 } 1961 }
1962#if LJ_GC64
1963 emit_loadu64(as, RID_LPC, u64ptr(pc));
1964#else
1566 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1965 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1567 ra_allockreg(as, i32ptr(pc), RID_LPC); 1966 ra_allockreg(as, i32ptr(pc), RID_LPC);
1568 mres = (int32_t)(snap->nslots - baseslot); 1967#endif
1968 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1569 switch (bc_op(*pc)) { 1969 switch (bc_op(*pc)) {
1570 case BC_CALLM: case BC_CALLMT: 1970 case BC_CALLM: case BC_CALLMT:
1571 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 1971 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1572 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1972 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1573 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1973 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1574 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1974 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1580,6 +1980,11 @@ static void asm_tail_link(ASMState *as)
1580 } 1980 }
1581 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 1981 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1582 1982
1983 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
1984 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
1985 IR(as->J->ktrace)->o = IR_KGC;
1986 }
1987
1583 /* Sync the interpreter state with the on-trace state. */ 1988 /* Sync the interpreter state with the on-trace state. */
1584 asm_stack_restore(as, snap); 1989 asm_stack_restore(as, snap);
1585 1990
@@ -1605,17 +2010,23 @@ static void asm_setup_regsp(ASMState *as)
1605 ra_setup(as); 2010 ra_setup(as);
1606 2011
1607 /* Clear reg/sp for constants. */ 2012 /* Clear reg/sp for constants. */
1608 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2013 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1609 ir->prev = REGSP_INIT; 2014 ir->prev = REGSP_INIT;
2015 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2016#if LJ_GC64
2017 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2018 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2019#else
2020 /* Make life easier for backends by putting address of constant in i. */
2021 ir->i = (int32_t)(intptr_t)(ir+1);
2022#endif
2023 ir++;
2024 }
2025 }
1610 2026
1611 /* REF_BASE is used for implicit references to the BASE register. */ 2027 /* REF_BASE is used for implicit references to the BASE register. */
1612 lastir->prev = REGSP_HINT(RID_BASE); 2028 lastir->prev = REGSP_HINT(RID_BASE);
1613 2029
1614 ir = IR(nins-1);
1615 if (ir->o == IR_RENAME) {
1616 do { ir--; nins--; } while (ir->o == IR_RENAME);
1617 T->nins = nins; /* Remove any renames left over from ASM restart. */
1618 }
1619 as->snaprename = nins; 2030 as->snaprename = nins;
1620 as->snapref = nins; 2031 as->snapref = nins;
1621 as->snapno = T->nsnap; 2032 as->snapno = T->nsnap;
@@ -1676,7 +2087,7 @@ static void asm_setup_regsp(ASMState *as)
1676 as->modset |= RSET_SCRATCH; 2087 as->modset |= RSET_SCRATCH;
1677 continue; 2088 continue;
1678 } 2089 }
1679 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2090 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1680 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2091 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1681 ir->prev = asm_setup_call_slots(as, ir, ci); 2092 ir->prev = asm_setup_call_slots(as, ir, ci);
1682 if (inloop) 2093 if (inloop)
@@ -1701,8 +2112,8 @@ static void asm_setup_regsp(ASMState *as)
1701 ir->prev = REGSP_HINT(RID_FPRET); 2112 ir->prev = REGSP_HINT(RID_FPRET);
1702 continue; 2113 continue;
1703 } 2114 }
1704 /* fallthrough */
1705#endif 2115#endif
2116 /* fallthrough */
1706 case IR_CALLN: case IR_CALLXS: 2117 case IR_CALLN: case IR_CALLXS:
1707#if LJ_SOFTFP 2118#if LJ_SOFTFP
1708 case IR_MIN: case IR_MAX: 2119 case IR_MIN: case IR_MAX:
@@ -1721,11 +2132,23 @@ static void asm_setup_regsp(ASMState *as)
1721#endif 2132#endif
1722 /* fallthrough */ 2133 /* fallthrough */
1723 /* C calls evict all scratch regs and return results in RID_RET. */ 2134 /* C calls evict all scratch regs and return results in RID_RET. */
1724 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2135 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1725 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2136 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1726 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2137 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2138#if LJ_TARGET_X86 && LJ_HASFFI
2139 if (0) {
2140 case IR_CNEW:
2141 if (ir->op2 != REF_NIL && as->evenspill < 4)
2142 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2143 }
2144 /* fallthrough */
2145#else
2146 /* fallthrough */
2147 case IR_CNEW:
2148#endif
1727 /* fallthrough */ 2149 /* fallthrough */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2150 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2151 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2152 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2153 if (inloop)
1731 as->modset = RSET_SCRATCH; 2154 as->modset = RSET_SCRATCH;
@@ -1734,21 +2157,27 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2157 if (inloop)
1735 as->modset = RSET_SCRATCH; 2158 as->modset = RSET_SCRATCH;
1736 break; 2159 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2160#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2161 case IR_ATAN2:
2162#if LJ_TARGET_X86
2163 if (as->evenspill < 4) /* Leave room to call atan2(). */
2164 as->evenspill = 4;
2165#endif
2166#if !LJ_TARGET_X86ORX64
2167 case IR_LDEXP:
2168#endif
1739#endif 2169#endif
2170 /* fallthrough */
1740 case IR_POW: 2171 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2172 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2173 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2174 as->modset |= RSET_SCRATCH;
2175#if LJ_TARGET_X86
2176 break;
1746#else 2177#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2178 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2179 continue;
2180#endif
1752 } 2181 }
1753 /* fallthrough */ /* for integer POW */ 2182 /* fallthrough */ /* for integer POW */
1754 case IR_DIV: case IR_MOD: 2183 case IR_DIV: case IR_MOD:
@@ -1761,31 +2190,34 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2190 break;
1762 case IR_FPMATH: 2191 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2192#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2193 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2194 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2195 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2196 if (inloop)
2197 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2198 continue;
2199 }
2200 break;
2201 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2202 if (as->evenspill < 4) /* Leave room to call pow(). */
1768 as->evenspill = 4; 2203 as->evenspill = 4;
1769#endif
1770 if (inloop)
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2204 }
2205#endif
2206 if (inloop)
2207 as->modset |= RSET_SCRATCH;
2208#if LJ_TARGET_X86
1779 break; 2209 break;
1780#else 2210#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2211 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2212 continue;
1785#endif 2213#endif
1786#if LJ_TARGET_X86ORX64 2214#if LJ_TARGET_X86ORX64
1787 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2215 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1788 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2216 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2217 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2218 break;
2219 /* fallthrough */
2220 case IR_BROL: case IR_BROR:
1789 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2221 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1790 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2222 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1791 if (inloop) 2223 if (inloop)
@@ -1831,14 +2263,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1831 ASMState *as = &as_; 2263 ASMState *as = &as_;
1832 MCode *origtop; 2264 MCode *origtop;
1833 2265
2266 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2267 {
2268 IRRef nins = T->nins;
2269 IRIns *ir = &T->ir[nins-1];
2270 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2271 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2272 T->nins = nins;
2273 }
2274 }
2275
1834 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2276 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1835 J->cur.nins = lj_ir_nextins(J); 2277 /* This also allows one RENAME to be added without reallocating curfinal. */
1836 J->cur.ir[J->cur.nins].o = IR_NOP; 2278 as->orignins = lj_ir_nextins(J);
2279 J->cur.ir[as->orignins].o = IR_NOP;
1837 2280
1838 /* Setup initial state. Copy some fields to reduce indirections. */ 2281 /* Setup initial state. Copy some fields to reduce indirections. */
1839 as->J = J; 2282 as->J = J;
1840 as->T = T; 2283 as->T = T;
1841 as->ir = T->ir; 2284 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1842 as->flags = J->flags; 2285 as->flags = J->flags;
1843 as->loopref = J->loopref; 2286 as->loopref = J->loopref;
1844 as->realign = NULL; 2287 as->realign = NULL;
@@ -1851,12 +2294,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1851 as->mclim = as->mcbot + MCLIM_REDZONE; 2294 as->mclim = as->mcbot + MCLIM_REDZONE;
1852 asm_setup_target(as); 2295 asm_setup_target(as);
1853 2296
1854 do { 2297 /*
2298 ** This is a loop, because the MCode may have to be (re-)assembled
2299 ** multiple times:
2300 **
2301 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2302 ** backend wants the MCode to be aligned differently.
2303 **
2304 ** This is currently only the case on x86/x64, where small loops get
2305 ** an aligned loop body plus a short branch. Not much effort is wasted,
2306 ** because the abort happens very quickly and only once.
2307 **
2308 ** 2. The IR is immovable, since the MCode embeds pointers to various
2309 ** constants inside the IR. But RENAMEs may need to be added to the IR
2310 ** during assembly, which might grow and reallocate the IR. We check
2311 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2312 ** copy (in J->curfinal.ir) and try again.
2313 **
2314 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2315 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2316 ** always have one spare slot in the IR (see above), which means we
2317 ** have to redo the assembly for only ~2% of all traces.
2318 **
2319 ** Very, very rarely, this needs to be done repeatedly, since the
2320 ** location of constants inside the IR (actually, reachability from
2321 ** a global pointer) may affect register allocation and thus the
2322 ** number of RENAMEs.
2323 */
2324 for (;;) {
1855 as->mcp = as->mctop; 2325 as->mcp = as->mctop;
1856#ifdef LUA_USE_ASSERT 2326#ifdef LUA_USE_ASSERT
1857 as->mcp_prev = as->mcp; 2327 as->mcp_prev = as->mcp;
1858#endif 2328#endif
1859 as->curins = T->nins; 2329 as->ir = J->curfinal->ir; /* Use the copied IR. */
2330 as->curins = J->cur.nins = as->orignins;
2331
1860 RA_DBG_START(); 2332 RA_DBG_START();
1861 RA_DBGX((as, "===== STOP =====")); 2333 RA_DBGX((as, "===== STOP ====="));
1862 2334
@@ -1884,22 +2356,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1884 checkmclim(as); 2356 checkmclim(as);
1885 asm_ir(as, ir); 2357 asm_ir(as, ir);
1886 } 2358 }
1887 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1888 2359
1889 /* Emit head of trace. */ 2360 if (as->realign && J->curfinal->nins >= T->nins)
1890 RA_DBG_REF(); 2361 continue; /* Retry in case only the MCode needs to be realigned. */
1891 checkmclim(as); 2362
1892 if (as->gcsteps > 0) { 2363 /* Emit head of trace. */
1893 as->curins = as->T->snap[0].ref; 2364 RA_DBG_REF();
1894 asm_snap_prep(as); /* The GC check is a guard. */ 2365 checkmclim(as);
1895 asm_gc_check(as); 2366 if (as->gcsteps > 0) {
2367 as->curins = as->T->snap[0].ref;
2368 asm_snap_prep(as); /* The GC check is a guard. */
2369 asm_gc_check(as);
2370 as->curins = as->stopins;
2371 }
2372 ra_evictk(as);
2373 if (as->parent)
2374 asm_head_side(as);
2375 else
2376 asm_head_root(as);
2377 asm_phi_fixup(as);
2378
2379 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2380 lua_assert(J->curfinal->nk == T->nk);
2381 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2382 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2383 T->nins = J->curfinal->nins;
2384 break; /* Done. */
2385 }
2386
2387 /* Otherwise try again with a bigger IR. */
2388 lj_trace_free(J2G(J), J->curfinal);
2389 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2390 J->curfinal = lj_trace_alloc(J->L, T);
2391 as->realign = NULL;
1896 } 2392 }
1897 ra_evictk(as);
1898 if (as->parent)
1899 asm_head_side(as);
1900 else
1901 asm_head_root(as);
1902 asm_phi_fixup(as);
1903 2393
1904 RA_DBGX((as, "===== START ====")); 2394 RA_DBGX((as, "===== START ===="));
1905 RA_DBG_FLUSH(); 2395 RA_DBG_FLUSH();
@@ -1912,6 +2402,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1912 if (!as->loopref) 2402 if (!as->loopref)
1913 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2403 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1914 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2404 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2405#if LJ_TARGET_MCODE_FIXUP
2406 asm_mcode_fixup(T->mcode, T->szmcode);
2407#endif
1915 lj_mcode_sync(T->mcode, origtop); 2408 lj_mcode_sync(T->mcode, origtop);
1916} 2409}
1917 2410