diff options
Diffstat (limited to '')
-rw-r--r-- | src/lj_asm.c | 504 |
1 files changed, 436 insertions, 68 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index a5e0c01e..94d7bfc4 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -179,6 +179,12 @@ IRFLDEF(FLOFS) | |||
179 | #error "Missing instruction emitter for target CPU" | 179 | #error "Missing instruction emitter for target CPU" |
180 | #endif | 180 | #endif |
181 | 181 | ||
182 | /* Generic load/store of register from/to stack slot. */ | ||
183 | #define emit_spload(as, ir, r, ofs) \ | ||
184 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
185 | #define emit_spstore(as, ir, r, ofs) \ | ||
186 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
187 | |||
182 | /* -- Register allocator debugging ---------------------------------------- */ | 188 | /* -- Register allocator debugging ---------------------------------------- */ |
183 | 189 | ||
184 | /* #define LUAJIT_DEBUG_RA */ | 190 | /* #define LUAJIT_DEBUG_RA */ |
@@ -336,7 +342,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
336 | emit_getgl(as, r, jit_base); | 342 | emit_getgl(as, r, jit_base); |
337 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 343 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
338 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 344 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ |
339 | emit_getgl(as, r, jit_L); | 345 | emit_getgl(as, r, cur_L); |
340 | #if LJ_64 | 346 | #if LJ_64 |
341 | } else if (ir->o == IR_KINT64) { | 347 | } else if (ir->o == IR_KINT64) { |
342 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 348 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
@@ -694,7 +700,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
694 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 700 | emit_loadu64(as, dest, ir_kint64(ir)->u64); |
695 | return; | 701 | return; |
696 | #endif | 702 | #endif |
697 | } else { | 703 | } else if (ir->o != IR_KPRI) { |
698 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 704 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
699 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 705 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); |
700 | emit_loadi(as, dest, ir->i); | 706 | emit_loadi(as, dest, ir->i); |
@@ -944,44 +950,6 @@ static void asm_snap_prep(ASMState *as) | |||
944 | 950 | ||
945 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 951 | /* -- Miscellaneous helpers ----------------------------------------------- */ |
946 | 952 | ||
947 | /* Collect arguments from CALL* and CARG instructions. */ | ||
948 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
949 | const CCallInfo *ci, IRRef *args) | ||
950 | { | ||
951 | uint32_t n = CCI_NARGS(ci); | ||
952 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
953 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
954 | while (n-- > 1) { | ||
955 | ir = IR(ir->op1); | ||
956 | lua_assert(ir->o == IR_CARG); | ||
957 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
958 | } | ||
959 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
960 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
961 | } | ||
962 | |||
963 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
964 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
965 | { | ||
966 | uint32_t nargs = 0; | ||
967 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
968 | IRIns *ira = IR(ir->op1); | ||
969 | nargs++; | ||
970 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
971 | } | ||
972 | #if LJ_HASFFI | ||
973 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
974 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
975 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
976 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
977 | #if LJ_TARGET_X86 | ||
978 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
979 | #endif | ||
980 | } | ||
981 | #endif | ||
982 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
983 | } | ||
984 | |||
985 | /* Calculate stack adjustment. */ | 953 | /* Calculate stack adjustment. */ |
986 | static int32_t asm_stack_adjust(ASMState *as) | 954 | static int32_t asm_stack_adjust(ASMState *as) |
987 | { | 955 | { |
@@ -1066,6 +1034,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1066 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1034 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1067 | } | 1035 | } |
1068 | 1036 | ||
1037 | /* -- Buffer operations --------------------------------------------------- */ | ||
1038 | |||
1039 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | ||
1040 | |||
1041 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1042 | { | ||
1043 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1044 | if ((ir->op2 & IRBUFHDR_APPEND)) { | ||
1045 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1046 | IRIns *irp = IR(ir->op1); | ||
1047 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1048 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1049 | while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) | ||
1050 | irp = IR(irp->op1); | ||
1051 | if (irref_isk(irp->op1)) { | ||
1052 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1053 | ir = irp; | ||
1054 | } | ||
1055 | } | ||
1056 | } else { | ||
1057 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1058 | /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */ | ||
1059 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | ||
1060 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | ||
1061 | } | ||
1062 | #if LJ_TARGET_X86ORX64 | ||
1063 | ra_left(as, sb, ir->op1); | ||
1064 | #else | ||
1065 | ra_leftov(as, sb, ir->op1); | ||
1066 | #endif | ||
1067 | } | ||
1068 | |||
1069 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1070 | { | ||
1071 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1072 | IRRef args[3]; | ||
1073 | IRIns *irs; | ||
1074 | int kchar = -1; | ||
1075 | args[0] = ir->op1; /* SBuf * */ | ||
1076 | args[1] = ir->op2; /* GCstr * */ | ||
1077 | irs = IR(ir->op2); | ||
1078 | lua_assert(irt_isstr(irs->t)); | ||
1079 | if (irs->o == IR_KGC) { | ||
1080 | GCstr *s = ir_kstr(irs); | ||
1081 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1082 | kchar = strdata(s)[0]; | ||
1083 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1084 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1085 | } | ||
1086 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1087 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1088 | if (irs->op2 == IRTOSTR_NUM) { | ||
1089 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1090 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1091 | } else { | ||
1092 | lua_assert(irt_isinteger(IR(irs->op1)->t)); | ||
1093 | args[1] = irs->op1; /* int */ | ||
1094 | if (irs->op2 == IRTOSTR_INT) | ||
1095 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1096 | else | ||
1097 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1098 | } | ||
1099 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1100 | args[1] = irs->op1; /* const void * */ | ||
1101 | args[2] = irs->op2; /* MSize */ | ||
1102 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1103 | } | ||
1104 | } | ||
1105 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1106 | asm_gencall(as, ci, args); | ||
1107 | if (args[1] == ASMREF_TMP1) { | ||
1108 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1109 | if (kchar == -1) | ||
1110 | asm_tvptr(as, tmp, irs->op1); | ||
1111 | else | ||
1112 | ra_allockreg(as, kchar, tmp); | ||
1113 | } | ||
1114 | } | ||
1115 | |||
1116 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1117 | { | ||
1118 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1119 | IRRef args[1]; | ||
1120 | args[0] = ir->op1; /* SBuf *sb */ | ||
1121 | as->gcsteps++; | ||
1122 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1123 | asm_gencall(as, ci, args); | ||
1124 | } | ||
1125 | |||
1126 | /* -- Type conversions ---------------------------------------------------- */ | ||
1127 | |||
1128 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1129 | { | ||
1130 | const CCallInfo *ci; | ||
1131 | IRRef args[2]; | ||
1132 | args[0] = ASMREF_L; | ||
1133 | as->gcsteps++; | ||
1134 | if (ir->op2 == IRTOSTR_NUM) { | ||
1135 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1136 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1137 | } else { | ||
1138 | args[1] = ir->op1; /* int32_t k */ | ||
1139 | if (ir->op2 == IRTOSTR_INT) | ||
1140 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1141 | else | ||
1142 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1143 | } | ||
1144 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1145 | asm_gencall(as, ci, args); | ||
1146 | if (ir->op2 == IRTOSTR_NUM) | ||
1147 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
1148 | } | ||
1149 | |||
1150 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1151 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1152 | { | ||
1153 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1154 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1155 | IRCallID id; | ||
1156 | IRRef args[2]; | ||
1157 | lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); | ||
1158 | args[LJ_BE] = (ir-1)->op1; | ||
1159 | args[LJ_LE] = ir->op1; | ||
1160 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1161 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1162 | ir--; | ||
1163 | } else { | ||
1164 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1165 | } | ||
1166 | { | ||
1167 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1168 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1169 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1170 | #else | ||
1171 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1172 | #endif | ||
1173 | asm_setupresult(as, ir, ci); | ||
1174 | asm_gencall(as, ci, args); | ||
1175 | } | ||
1176 | } | ||
1177 | #endif | ||
1178 | |||
1179 | /* -- Memory references --------------------------------------------------- */ | ||
1180 | |||
1181 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1182 | { | ||
1183 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1184 | IRRef args[3]; | ||
1185 | if (ir->r == RID_SINK) | ||
1186 | return; | ||
1187 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1188 | args[1] = ir->op1; /* GCtab *t */ | ||
1189 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1190 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1191 | asm_gencall(as, ci, args); | ||
1192 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
1193 | } | ||
1194 | |||
1195 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1196 | { | ||
1197 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1198 | #if LJ_TARGET_X86ORX64 | ||
1199 | ra_left(as, r, ASMREF_L); | ||
1200 | #else | ||
1201 | ra_leftov(as, r, ASMREF_L); | ||
1202 | #endif | ||
1203 | } | ||
1204 | |||
1205 | /* -- Calls --------------------------------------------------------------- */ | ||
1206 | |||
1207 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1208 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1209 | const CCallInfo *ci, IRRef *args) | ||
1210 | { | ||
1211 | uint32_t n = CCI_XNARGS(ci); | ||
1212 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
1213 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1214 | while (n-- > 1) { | ||
1215 | ir = IR(ir->op1); | ||
1216 | lua_assert(ir->o == IR_CARG); | ||
1217 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1218 | } | ||
1219 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1220 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
1221 | } | ||
1222 | |||
1223 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1224 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1225 | { | ||
1226 | uint32_t nargs = 0; | ||
1227 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1228 | IRIns *ira = IR(ir->op1); | ||
1229 | nargs++; | ||
1230 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1231 | } | ||
1232 | #if LJ_HASFFI | ||
1233 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1234 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1235 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1236 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1237 | #if LJ_TARGET_X86 | ||
1238 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1239 | #endif | ||
1240 | } | ||
1241 | #endif | ||
1242 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1243 | } | ||
1244 | |||
1245 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1246 | { | ||
1247 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1248 | IRRef args[2]; | ||
1249 | args[0] = ir->op1; | ||
1250 | args[1] = ir->op2; | ||
1251 | asm_setupresult(as, ir, ci); | ||
1252 | asm_gencall(as, ci, args); | ||
1253 | } | ||
1254 | |||
1255 | static void asm_call(ASMState *as, IRIns *ir) | ||
1256 | { | ||
1257 | IRRef args[CCI_NARGS_MAX]; | ||
1258 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1259 | asm_collectargs(as, ir, ci, args); | ||
1260 | asm_setupresult(as, ir, ci); | ||
1261 | asm_gencall(as, ci, args); | ||
1262 | } | ||
1263 | |||
1264 | #if !LJ_SOFTFP | ||
1265 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | ||
1266 | { | ||
1267 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
1268 | IRRef args[2]; | ||
1269 | args[0] = lref; | ||
1270 | args[1] = rref; | ||
1271 | asm_setupresult(as, ir, ci); | ||
1272 | asm_gencall(as, ci, args); | ||
1273 | } | ||
1274 | |||
1275 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | ||
1276 | { | ||
1277 | IRIns *irp = IR(ir->op1); | ||
1278 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1279 | IRIns *irpp = IR(irp->op1); | ||
1280 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1281 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1282 | asm_fppow(as, ir, irpp->op1, irp->op2); | ||
1283 | return 1; | ||
1284 | } | ||
1285 | } | ||
1286 | return 0; | ||
1287 | } | ||
1288 | #endif | ||
1289 | |||
1069 | /* -- PHI and loop handling ----------------------------------------------- */ | 1290 | /* -- PHI and loop handling ----------------------------------------------- */ |
1070 | 1291 | ||
1071 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1292 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1339,6 +1560,136 @@ static void asm_loop(ASMState *as) | |||
1339 | #error "Missing assembler for target CPU" | 1560 | #error "Missing assembler for target CPU" |
1340 | #endif | 1561 | #endif |
1341 | 1562 | ||
1563 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1564 | |||
1565 | /* Assemble a single instruction. */ | ||
1566 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1567 | { | ||
1568 | switch ((IROp)ir->o) { | ||
1569 | /* Miscellaneous ops. */ | ||
1570 | case IR_LOOP: asm_loop(as); break; | ||
1571 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1572 | case IR_USE: | ||
1573 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1574 | case IR_PHI: asm_phi(as, ir); break; | ||
1575 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1576 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1577 | case IR_PROF: asm_prof(as, ir); break; | ||
1578 | |||
1579 | /* Guarded assertions. */ | ||
1580 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1581 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1582 | case IR_ABC: | ||
1583 | asm_comp(as, ir); | ||
1584 | break; | ||
1585 | case IR_EQ: case IR_NE: | ||
1586 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1587 | as->curins--; | ||
1588 | asm_href(as, ir-1, (IROp)ir->o); | ||
1589 | } else { | ||
1590 | asm_equal(as, ir); | ||
1591 | } | ||
1592 | break; | ||
1593 | |||
1594 | case IR_RETF: asm_retf(as, ir); break; | ||
1595 | |||
1596 | /* Bit ops. */ | ||
1597 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1598 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1599 | case IR_BAND: asm_band(as, ir); break; | ||
1600 | case IR_BOR: asm_bor(as, ir); break; | ||
1601 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1602 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1603 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1604 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1605 | case IR_BROL: asm_brol(as, ir); break; | ||
1606 | case IR_BROR: asm_bror(as, ir); break; | ||
1607 | |||
1608 | /* Arithmetic ops. */ | ||
1609 | case IR_ADD: asm_add(as, ir); break; | ||
1610 | case IR_SUB: asm_sub(as, ir); break; | ||
1611 | case IR_MUL: asm_mul(as, ir); break; | ||
1612 | case IR_MOD: asm_mod(as, ir); break; | ||
1613 | case IR_NEG: asm_neg(as, ir); break; | ||
1614 | #if LJ_SOFTFP | ||
1615 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1616 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1617 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | ||
1618 | break; | ||
1619 | #else | ||
1620 | case IR_DIV: asm_div(as, ir); break; | ||
1621 | case IR_POW: asm_pow(as, ir); break; | ||
1622 | case IR_ABS: asm_abs(as, ir); break; | ||
1623 | case IR_ATAN2: asm_atan2(as, ir); break; | ||
1624 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1625 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1626 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1627 | #endif | ||
1628 | case IR_MIN: asm_min(as, ir); break; | ||
1629 | case IR_MAX: asm_max(as, ir); break; | ||
1630 | |||
1631 | /* Overflow-checking arithmetic ops. */ | ||
1632 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1633 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1634 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1635 | |||
1636 | /* Memory references. */ | ||
1637 | case IR_AREF: asm_aref(as, ir); break; | ||
1638 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1639 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1640 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1641 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1642 | case IR_FREF: asm_fref(as, ir); break; | ||
1643 | case IR_STRREF: asm_strref(as, ir); break; | ||
1644 | case IR_LREF: asm_lref(as, ir); break; | ||
1645 | |||
1646 | /* Loads and stores. */ | ||
1647 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1648 | asm_ahuvload(as, ir); | ||
1649 | break; | ||
1650 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1651 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1652 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1653 | |||
1654 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1655 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1656 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1657 | |||
1658 | /* Allocations. */ | ||
1659 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1660 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1661 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1662 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
1663 | |||
1664 | /* Buffer operations. */ | ||
1665 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1666 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1667 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1668 | |||
1669 | /* Write barriers. */ | ||
1670 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1671 | case IR_OBAR: asm_obar(as, ir); break; | ||
1672 | |||
1673 | /* Type conversions. */ | ||
1674 | case IR_CONV: asm_conv(as, ir); break; | ||
1675 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1676 | case IR_STRTO: asm_strto(as, ir); break; | ||
1677 | |||
1678 | /* Calls. */ | ||
1679 | case IR_CALLA: | ||
1680 | as->gcsteps++; | ||
1681 | /* fallthrough */ | ||
1682 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1683 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1684 | case IR_CARG: break; | ||
1685 | |||
1686 | default: | ||
1687 | setintV(&as->J->errinfo, ir->o); | ||
1688 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1689 | break; | ||
1690 | } | ||
1691 | } | ||
1692 | |||
1342 | /* -- Head of trace ------------------------------------------------------- */ | 1693 | /* -- Head of trace ------------------------------------------------------- */ |
1343 | 1694 | ||
1344 | /* Head of a root trace. */ | 1695 | /* Head of a root trace. */ |
@@ -1569,7 +1920,7 @@ static void asm_tail_link(ASMState *as) | |||
1569 | mres = (int32_t)(snap->nslots - baseslot); | 1920 | mres = (int32_t)(snap->nslots - baseslot); |
1570 | switch (bc_op(*pc)) { | 1921 | switch (bc_op(*pc)) { |
1571 | case BC_CALLM: case BC_CALLMT: | 1922 | case BC_CALLM: case BC_CALLMT: |
1572 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 1923 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1573 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 1924 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1574 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 1925 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1575 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 1926 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1677,7 +2028,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1677 | as->modset |= RSET_SCRATCH; | 2028 | as->modset |= RSET_SCRATCH; |
1678 | continue; | 2029 | continue; |
1679 | } | 2030 | } |
1680 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2031 | case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { |
1681 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2032 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1682 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2033 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1683 | if (inloop) | 2034 | if (inloop) |
@@ -1722,10 +2073,20 @@ static void asm_setup_regsp(ASMState *as) | |||
1722 | /* fallthrough */ | 2073 | /* fallthrough */ |
1723 | #endif | 2074 | #endif |
1724 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2075 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1725 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2076 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1726 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2077 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1727 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2078 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
1728 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2079 | #if LJ_TARGET_X86 && LJ_HASFFI |
2080 | if (0) { | ||
2081 | case IR_CNEW: | ||
2082 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2083 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2084 | } | ||
2085 | #else | ||
2086 | case IR_CNEW: | ||
2087 | #endif | ||
2088 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: | ||
2089 | case IR_BUFSTR: | ||
1729 | ir->prev = REGSP_HINT(RID_RET); | 2090 | ir->prev = REGSP_HINT(RID_RET); |
1730 | if (inloop) | 2091 | if (inloop) |
1731 | as->modset = RSET_SCRATCH; | 2092 | as->modset = RSET_SCRATCH; |
@@ -1734,21 +2095,26 @@ static void asm_setup_regsp(ASMState *as) | |||
1734 | if (inloop) | 2095 | if (inloop) |
1735 | as->modset = RSET_SCRATCH; | 2096 | as->modset = RSET_SCRATCH; |
1736 | break; | 2097 | break; |
1737 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2098 | #if !LJ_SOFTFP |
1738 | case IR_ATAN2: case IR_LDEXP: | 2099 | case IR_ATAN2: |
2100 | #if LJ_TARGET_X86 | ||
2101 | if (as->evenspill < 4) /* Leave room to call atan2(). */ | ||
2102 | as->evenspill = 4; | ||
2103 | #endif | ||
2104 | #if !LJ_TARGET_X86ORX64 | ||
2105 | case IR_LDEXP: | ||
2106 | #endif | ||
1739 | #endif | 2107 | #endif |
1740 | case IR_POW: | 2108 | case IR_POW: |
1741 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2109 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1742 | #if LJ_TARGET_X86ORX64 | ||
1743 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1744 | if (inloop) | 2110 | if (inloop) |
1745 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2111 | as->modset |= RSET_SCRATCH; |
2112 | #if LJ_TARGET_X86 | ||
2113 | break; | ||
1746 | #else | 2114 | #else |
1747 | ir->prev = REGSP_HINT(RID_FPRET); | 2115 | ir->prev = REGSP_HINT(RID_FPRET); |
1748 | if (inloop) | ||
1749 | as->modset |= RSET_SCRATCH; | ||
1750 | #endif | ||
1751 | continue; | 2116 | continue; |
2117 | #endif | ||
1752 | } | 2118 | } |
1753 | /* fallthrough for integer POW */ | 2119 | /* fallthrough for integer POW */ |
1754 | case IR_DIV: case IR_MOD: | 2120 | case IR_DIV: case IR_MOD: |
@@ -1761,31 +2127,33 @@ static void asm_setup_regsp(ASMState *as) | |||
1761 | break; | 2127 | break; |
1762 | case IR_FPMATH: | 2128 | case IR_FPMATH: |
1763 | #if LJ_TARGET_X86ORX64 | 2129 | #if LJ_TARGET_X86ORX64 |
1764 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2130 | if (ir->op2 <= IRFPM_TRUNC) { |
1765 | ir->prev = REGSP_HINT(RID_XMM0); | 2131 | if (!(as->flags & JIT_F_SSE4_1)) { |
1766 | #if !LJ_64 | 2132 | ir->prev = REGSP_HINT(RID_XMM0); |
1767 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | 2133 | if (inloop) |
2134 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2135 | continue; | ||
2136 | } | ||
2137 | break; | ||
2138 | } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { | ||
2139 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
1768 | as->evenspill = 4; | 2140 | as->evenspill = 4; |
1769 | #endif | ||
1770 | if (inloop) | ||
1771 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1772 | continue; | ||
1773 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1774 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1775 | if (inloop) | ||
1776 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
1777 | continue; | ||
1778 | } | 2141 | } |
2142 | #endif | ||
2143 | if (inloop) | ||
2144 | as->modset |= RSET_SCRATCH; | ||
2145 | #if LJ_TARGET_X86 | ||
1779 | break; | 2146 | break; |
1780 | #else | 2147 | #else |
1781 | ir->prev = REGSP_HINT(RID_FPRET); | 2148 | ir->prev = REGSP_HINT(RID_FPRET); |
1782 | if (inloop) | ||
1783 | as->modset |= RSET_SCRATCH; | ||
1784 | continue; | 2149 | continue; |
1785 | #endif | 2150 | #endif |
1786 | #if LJ_TARGET_X86ORX64 | 2151 | #if LJ_TARGET_X86ORX64 |
1787 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2152 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1788 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2153 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2154 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2155 | break; | ||
2156 | case IR_BROL: case IR_BROR: | ||
1789 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2157 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1790 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2158 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1791 | if (inloop) | 2159 | if (inloop) |