aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c425
1 files changed, 384 insertions, 41 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 316e81d6..a80d6adf 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 179#error "Missing instruction emitter for target CPU"
180#endif 180#endif
181 181
182/* Generic load/store of register from/to stack slot. */
183#define emit_spload(as, ir, r, ofs) \
184 emit_loadofs(as, ir, (r), RID_SP, (ofs))
185#define emit_spstore(as, ir, r, ofs) \
186 emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
182/* -- Register allocator debugging ---------------------------------------- */ 188/* -- Register allocator debugging ---------------------------------------- */
183 189
184/* #define LUAJIT_DEBUG_RA */ 190/* #define LUAJIT_DEBUG_RA */
@@ -943,44 +949,6 @@ static void asm_snap_prep(ASMState *as)
943 949
944/* -- Miscellaneous helpers ----------------------------------------------- */ 950/* -- Miscellaneous helpers ----------------------------------------------- */
945 951
946/* Collect arguments from CALL* and CARG instructions. */
947static void asm_collectargs(ASMState *as, IRIns *ir,
948 const CCallInfo *ci, IRRef *args)
949{
950 uint32_t n = CCI_NARGS(ci);
951 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
952 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
953 while (n-- > 1) {
954 ir = IR(ir->op1);
955 lua_assert(ir->o == IR_CARG);
956 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
957 }
958 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
959 lua_assert(IR(ir->op1)->o != IR_CARG);
960}
961
962/* Reconstruct CCallInfo flags for CALLX*. */
963static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
964{
965 uint32_t nargs = 0;
966 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
967 IRIns *ira = IR(ir->op1);
968 nargs++;
969 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
970 }
971#if LJ_HASFFI
972 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
973 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
974 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
975 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
976#if LJ_TARGET_X86
977 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
978#endif
979 }
980#endif
981 return (nargs | (ir->t.irt << CCI_OTSHIFT));
982}
983
984/* Calculate stack adjustment. */ 952/* Calculate stack adjustment. */
985static int32_t asm_stack_adjust(ASMState *as) 953static int32_t asm_stack_adjust(ASMState *as)
986{ 954{
@@ -1065,6 +1033,253 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1065 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1033 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1066} 1034}
1067 1035
1036/* -- Buffer operations --------------------------------------------------- */
1037
1038static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1039
1040static void asm_bufhdr(ASMState *as, IRIns *ir)
1041{
1042 Reg sb = ra_dest(as, ir, RSET_GPR);
1043 if ((ir->op2 & IRBUFHDR_APPEND)) {
1044 /* Rematerialize const buffer pointer instead of likely spill. */
1045 IRIns *irp = IR(ir->op1);
1046 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1047 (irp == ir-2 && !ra_used(ir-1)))) {
1048 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1049 irp = IR(irp->op1);
1050 if (irref_isk(irp->op1)) {
1051 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1052 ir = irp;
1053 }
1054 }
1055 } else {
1056 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1057 /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1058 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1059 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1060 }
1061#if LJ_TARGET_X86ORX64
1062 ra_left(as, sb, ir->op1);
1063#else
1064 ra_leftov(as, sb, ir->op1);
1065#endif
1066}
1067
1068static void asm_bufput(ASMState *as, IRIns *ir)
1069{
1070 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1071 IRRef args[3];
1072 IRIns *irs;
1073 int kchar = -1;
1074 args[0] = ir->op1; /* SBuf * */
1075 args[1] = ir->op2; /* GCstr * */
1076 irs = IR(ir->op2);
1077 lua_assert(irt_isstr(irs->t));
1078 if (irs->o == IR_KGC) {
1079 GCstr *s = ir_kstr(irs);
1080 if (s->len == 1) { /* Optimize put of single-char string constant. */
1081 kchar = strdata(s)[0];
1082 args[1] = ASMREF_TMP1; /* int, truncated to char */
1083 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1084 }
1085 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1086 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1087 if (irs->op2 == IRTOSTR_NUM) {
1088 args[1] = ASMREF_TMP1; /* TValue * */
1089 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1090 } else {
1091 lua_assert(irt_isinteger(IR(irs->op1)->t));
1092 args[1] = irs->op1; /* int */
1093 if (irs->op2 == IRTOSTR_INT)
1094 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1095 else
1096 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1097 }
1098 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1099 args[1] = irs->op1; /* const void * */
1100 args[2] = irs->op2; /* MSize */
1101 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1102 }
1103 }
1104 asm_setupresult(as, ir, ci); /* SBuf * */
1105 asm_gencall(as, ci, args);
1106 if (args[1] == ASMREF_TMP1) {
1107 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1108 if (kchar == -1)
1109 asm_tvptr(as, tmp, irs->op1);
1110 else
1111 ra_allockreg(as, kchar, tmp);
1112 }
1113}
1114
1115static void asm_bufstr(ASMState *as, IRIns *ir)
1116{
1117 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1118 IRRef args[1];
1119 args[0] = ir->op1; /* SBuf *sb */
1120 as->gcsteps++;
1121 asm_setupresult(as, ir, ci); /* GCstr * */
1122 asm_gencall(as, ci, args);
1123}
1124
1125/* -- Type conversions ---------------------------------------------------- */
1126
1127static void asm_tostr(ASMState *as, IRIns *ir)
1128{
1129 const CCallInfo *ci;
1130 IRRef args[2];
1131 args[0] = ASMREF_L;
1132 as->gcsteps++;
1133 if (ir->op2 == IRTOSTR_NUM) {
1134 args[1] = ASMREF_TMP1; /* cTValue * */
1135 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1136 } else {
1137 args[1] = ir->op1; /* int32_t k */
1138 if (ir->op2 == IRTOSTR_INT)
1139 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1140 else
1141 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1142 }
1143 asm_setupresult(as, ir, ci); /* GCstr * */
1144 asm_gencall(as, ci, args);
1145 if (ir->op2 == IRTOSTR_NUM)
1146 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1147}
1148
1149#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1150static void asm_conv64(ASMState *as, IRIns *ir)
1151{
1152 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1153 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1154 IRCallID id;
1155 IRRef args[2];
1156 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1157 args[LJ_BE] = (ir-1)->op1;
1158 args[LJ_LE] = ir->op1;
1159 if (st == IRT_NUM || st == IRT_FLOAT) {
1160 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1161 ir--;
1162 } else {
1163 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1164 }
1165 {
1166#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1167 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1168 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1169#else
1170 const CCallInfo *ci = &lj_ir_callinfo[id];
1171#endif
1172 asm_setupresult(as, ir, ci);
1173 asm_gencall(as, ci, args);
1174 }
1175}
1176#endif
1177
1178/* -- Memory references --------------------------------------------------- */
1179
1180static void asm_newref(ASMState *as, IRIns *ir)
1181{
1182 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1183 IRRef args[3];
1184 if (ir->r == RID_SINK)
1185 return;
1186 args[0] = ASMREF_L; /* lua_State *L */
1187 args[1] = ir->op1; /* GCtab *t */
1188 args[2] = ASMREF_TMP1; /* cTValue *key */
1189 asm_setupresult(as, ir, ci); /* TValue * */
1190 asm_gencall(as, ci, args);
1191 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1192}
1193
1194/* -- Calls --------------------------------------------------------------- */
1195
1196/* Collect arguments from CALL* and CARG instructions. */
1197static void asm_collectargs(ASMState *as, IRIns *ir,
1198 const CCallInfo *ci, IRRef *args)
1199{
1200 uint32_t n = CCI_XNARGS(ci);
1201 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1202 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1203 while (n-- > 1) {
1204 ir = IR(ir->op1);
1205 lua_assert(ir->o == IR_CARG);
1206 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1207 }
1208 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1209 lua_assert(IR(ir->op1)->o != IR_CARG);
1210}
1211
1212/* Reconstruct CCallInfo flags for CALLX*. */
1213static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1214{
1215 uint32_t nargs = 0;
1216 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1217 IRIns *ira = IR(ir->op1);
1218 nargs++;
1219 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1220 }
1221#if LJ_HASFFI
1222 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1223 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1224 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1225 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1226#if LJ_TARGET_X86
1227 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1228#endif
1229 }
1230#endif
1231 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1232}
1233
1234static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1235{
1236 const CCallInfo *ci = &lj_ir_callinfo[id];
1237 IRRef args[2];
1238 args[0] = ir->op1;
1239 args[1] = ir->op2;
1240 asm_setupresult(as, ir, ci);
1241 asm_gencall(as, ci, args);
1242}
1243
1244static void asm_call(ASMState *as, IRIns *ir)
1245{
1246 IRRef args[CCI_NARGS_MAX];
1247 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1248 asm_collectargs(as, ir, ci, args);
1249 asm_setupresult(as, ir, ci);
1250 asm_gencall(as, ci, args);
1251}
1252
1253#if !LJ_SOFTFP
1254static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref);
1255
1256#if !LJ_TARGET_X86ORX64
1257static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1258{
1259 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1260 IRRef args[2];
1261 args[0] = lref;
1262 args[1] = rref;
1263 asm_setupresult(as, ir, ci);
1264 asm_gencall(as, ci, args);
1265}
1266#endif
1267
1268static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1269{
1270 IRIns *irp = IR(ir->op1);
1271 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1272 IRIns *irpp = IR(irp->op1);
1273 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1274 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1275 asm_fppow(as, ir, irpp->op1, irp->op2);
1276 return 1;
1277 }
1278 }
1279 return 0;
1280}
1281#endif
1282
1068/* -- PHI and loop handling ----------------------------------------------- */ 1283/* -- PHI and loop handling ----------------------------------------------- */
1069 1284
1070/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1285/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1336,6 +1551,124 @@ static void asm_loop(ASMState *as)
1336#error "Missing assembler for target CPU" 1551#error "Missing assembler for target CPU"
1337#endif 1552#endif
1338 1553
1554/* -- Instruction dispatch ------------------------------------------------ */
1555
1556/* Assemble a single instruction. */
1557static void asm_ir(ASMState *as, IRIns *ir)
1558{
1559 switch ((IROp)ir->o) {
1560 /* Miscellaneous ops. */
1561 case IR_LOOP: asm_loop(as); break;
1562 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1563 case IR_USE:
1564 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1565 case IR_PHI: asm_phi(as, ir); break;
1566 case IR_HIOP: asm_hiop(as, ir); break;
1567 case IR_GCSTEP: asm_gcstep(as, ir); break;
1568
1569 /* Guarded assertions. */
1570 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1571 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1572 case IR_ABC:
1573 asm_comp(as, ir);
1574 break;
1575 case IR_EQ: case IR_NE:
1576 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1577 as->curins--;
1578 asm_href(as, ir-1, (IROp)ir->o);
1579 } else {
1580 asm_equal(as, ir);
1581 }
1582 break;
1583
1584 case IR_RETF: asm_retf(as, ir); break;
1585
1586 /* Bit ops. */
1587 case IR_BNOT: asm_bnot(as, ir); break;
1588 case IR_BSWAP: asm_bswap(as, ir); break;
1589 case IR_BAND: asm_band(as, ir); break;
1590 case IR_BOR: asm_bor(as, ir); break;
1591 case IR_BXOR: asm_bxor(as, ir); break;
1592 case IR_BSHL: asm_bshl(as, ir); break;
1593 case IR_BSHR: asm_bshr(as, ir); break;
1594 case IR_BSAR: asm_bsar(as, ir); break;
1595 case IR_BROL: asm_brol(as, ir); break;
1596 case IR_BROR: asm_bror(as, ir); break;
1597
1598 /* Arithmetic ops. */
1599 case IR_ADD: asm_add(as, ir); break;
1600 case IR_SUB: asm_sub(as, ir); break;
1601 case IR_MUL: asm_mul(as, ir); break;
1602 case IR_DIV: asm_div(as, ir); break;
1603 case IR_MOD: asm_mod(as, ir); break;
1604 case IR_POW: asm_pow(as, ir); break;
1605 case IR_NEG: asm_neg(as, ir); break;
1606 case IR_ABS: asm_abs(as, ir); break;
1607 case IR_ATAN2: asm_atan2(as, ir); break;
1608 case IR_LDEXP: asm_ldexp(as, ir); break;
1609 case IR_MIN: asm_min(as, ir); break;
1610 case IR_MAX: asm_max(as, ir); break;
1611 case IR_FPMATH: asm_fpmath(as, ir); break;
1612
1613 /* Overflow-checking arithmetic ops. */
1614 case IR_ADDOV: asm_addov(as, ir); break;
1615 case IR_SUBOV: asm_subov(as, ir); break;
1616 case IR_MULOV: asm_mulov(as, ir); break;
1617
1618 /* Memory references. */
1619 case IR_AREF: asm_aref(as, ir); break;
1620 case IR_HREF: asm_href(as, ir, 0); break;
1621 case IR_HREFK: asm_hrefk(as, ir); break;
1622 case IR_NEWREF: asm_newref(as, ir); break;
1623 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1624 case IR_FREF: asm_fref(as, ir); break;
1625 case IR_STRREF: asm_strref(as, ir); break;
1626
1627 /* Loads and stores. */
1628 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1629 asm_ahuvload(as, ir);
1630 break;
1631 case IR_FLOAD: asm_fload(as, ir); break;
1632 case IR_XLOAD: asm_xload(as, ir); break;
1633 case IR_SLOAD: asm_sload(as, ir); break;
1634
1635 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1636 case IR_FSTORE: asm_fstore(as, ir); break;
1637 case IR_XSTORE: asm_xstore(as, ir); break;
1638
1639 /* Allocations. */
1640 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1641 case IR_TNEW: asm_tnew(as, ir); break;
1642 case IR_TDUP: asm_tdup(as, ir); break;
1643 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1644
1645 /* Buffer operations. */
1646 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1647 case IR_BUFPUT: asm_bufput(as, ir); break;
1648 case IR_BUFSTR: asm_bufstr(as, ir); break;
1649
1650 /* Write barriers. */
1651 case IR_TBAR: asm_tbar(as, ir); break;
1652 case IR_OBAR: asm_obar(as, ir); break;
1653
1654 /* Type conversions. */
1655 case IR_TOBIT: asm_tobit(as, ir); break;
1656 case IR_CONV: asm_conv(as, ir); break;
1657 case IR_TOSTR: asm_tostr(as, ir); break;
1658 case IR_STRTO: asm_strto(as, ir); break;
1659
1660 /* Calls. */
1661 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1662 case IR_CALLXS: asm_callx(as, ir); break;
1663 case IR_CARG: break;
1664
1665 default:
1666 setintV(&as->J->errinfo, ir->o);
1667 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1668 break;
1669 }
1670}
1671
1339/* -- Head of trace ------------------------------------------------------- */ 1672/* -- Head of trace ------------------------------------------------------- */
1340 1673
1341/* Head of a root trace. */ 1674/* Head of a root trace. */
@@ -1714,10 +2047,20 @@ static void asm_setup_regsp(ASMState *as)
1714 /* fallthrough */ 2047 /* fallthrough */
1715#endif 2048#endif
1716 /* C calls evict all scratch regs and return results in RID_RET. */ 2049 /* C calls evict all scratch regs and return results in RID_RET. */
1717 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2050 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1718 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2051 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1719 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2052 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
1720 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2053#if LJ_TARGET_X86 && LJ_HASFFI
2054 if (0) {
2055 case IR_CNEW:
2056 if (ir->op2 != REF_NIL && as->evenspill < 4)
2057 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2058 }
2059#else
2060 case IR_CNEW:
2061#endif
2062 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2063 case IR_BUFSTR:
1721 ir->prev = REGSP_HINT(RID_RET); 2064 ir->prev = REGSP_HINT(RID_RET);
1722 if (inloop) 2065 if (inloop)
1723 as->modset = RSET_SCRATCH; 2066 as->modset = RSET_SCRATCH;
@@ -1753,7 +2096,7 @@ static void asm_setup_regsp(ASMState *as)
1753 break; 2096 break;
1754 case IR_FPMATH: 2097 case IR_FPMATH:
1755#if LJ_TARGET_X86ORX64 2098#if LJ_TARGET_X86ORX64
1756 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2099 if (ir->op2 == IRFPM_EXP2) { /* May be joined to pow. */
1757 ir->prev = REGSP_HINT(RID_XMM0); 2100 ir->prev = REGSP_HINT(RID_XMM0);
1758#if !LJ_64 2101#if !LJ_64
1759 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2102 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */