diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 598 |
1 files changed, 347 insertions, 251 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index a4d0c606..f26a40a5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_str.h" | 14 | #include "lj_str.h" |
15 | #include "lj_tab.h" | 15 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | ||
16 | #include "lj_ir.h" | 17 | #include "lj_ir.h" |
17 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
18 | #include "lj_iropt.h" | 19 | #include "lj_iropt.h" |
@@ -81,6 +82,10 @@ typedef struct ASMState { | |||
81 | 82 | ||
82 | #define IR(ref) (&as->ir[(ref)]) | 83 | #define IR(ref) (&as->ir[(ref)]) |
83 | 84 | ||
85 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ | ||
86 | #define ASMREF_TMP2 REF_FALSE /* Temp. register. */ | ||
87 | #define ASMREF_L REF_NIL /* Stores register for L. */ | ||
88 | |||
84 | /* Check for variant to invariant references. */ | 89 | /* Check for variant to invariant references. */ |
85 | #define iscrossref(as, ref) ((ref) < as->sectref) | 90 | #define iscrossref(as, ref) ((ref) < as->sectref) |
86 | 91 | ||
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) | |||
115 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ | 120 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ |
116 | if (rex != 0x40) *--(p) = rex; } | 121 | if (rex != 0x40) *--(p) = rex; } |
117 | #define FORCE_REX 0x200 | 122 | #define FORCE_REX 0x200 |
123 | #define REX_64 (FORCE_REX|0x080000) | ||
118 | #else | 124 | #else |
119 | #define REXRB(p, rr, rb) ((void)0) | 125 | #define REXRB(p, rr, rb) ((void)0) |
120 | #define FORCE_REX 0 | 126 | #define FORCE_REX 0 |
127 | #define REX_64 0 | ||
121 | #endif | 128 | #endif |
122 | 129 | ||
123 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 130 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | |||
144 | { | 151 | { |
145 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); | 152 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); |
146 | if (rex != 0x40) { | 153 | if (rex != 0x40) { |
154 | rex |= (rr >> 16); | ||
147 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } | 155 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } |
148 | *--p = (MCode)rex; | 156 | *--p = (MCode)rex; |
149 | } | 157 | } |
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target) | |||
451 | 459 | ||
452 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) | 460 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) |
453 | 461 | ||
454 | /* Argument setup for C calls. Up to 3 args need no stack adjustment. */ | ||
455 | #define emit_setargr(as, narg, r) \ | ||
456 | emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4); | ||
457 | #define emit_setargi(as, narg, imm) \ | ||
458 | emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm)) | ||
459 | #define emit_setargp(as, narg, ptr) \ | ||
460 | emit_setargi(as, (narg), ptr2addr((ptr))) | ||
461 | |||
462 | /* -- Register allocator debugging ---------------------------------------- */ | 462 | /* -- Register allocator debugging ---------------------------------------- */ |
463 | 463 | ||
464 | /* #define LUAJIT_DEBUG_RA */ | 464 | /* #define LUAJIT_DEBUG_RA */ |
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as) | |||
578 | memset(as->phireg, 0, sizeof(as->phireg)); | 578 | memset(as->phireg, 0, sizeof(as->phireg)); |
579 | memset(as->cost, 0, sizeof(as->cost)); | 579 | memset(as->cost, 0, sizeof(as->cost)); |
580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); | 580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); |
581 | |||
582 | /* Start slots for spill slot allocation. */ | ||
583 | as->evenspill = (SPS_FIRST+1)&~1; | ||
584 | as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0; | ||
585 | } | 581 | } |
586 | 582 | ||
587 | /* Rematerialize constants. */ | 583 | /* Rematerialize constants. */ |
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
598 | } else if (ir->o == IR_BASE) { | 594 | } else if (ir->o == IR_BASE) { |
599 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 595 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
600 | emit_getgl(as, r, jit_base); | 596 | emit_getgl(as, r, jit_base); |
597 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | ||
598 | lua_assert(irt_isnil(ir->t)); | ||
599 | emit_getgl(as, r, jit_L); | ||
601 | } else { | 600 | } else { |
602 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 601 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
603 | ir->o == IR_KPTR || ir->o == IR_KNULL); | 602 | ir->o == IR_KPTR || ir->o == IR_KNULL); |
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir) | |||
629 | return sps_scale(slot); | 628 | return sps_scale(slot); |
630 | } | 629 | } |
631 | 630 | ||
631 | /* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */ | ||
632 | static Reg ra_releasetmp(ASMState *as, IRRef ref) | ||
633 | { | ||
634 | IRIns *ir = IR(ref); | ||
635 | Reg r = ir->r; | ||
636 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | ||
637 | ra_free(as, r); | ||
638 | ra_modified(as, r); | ||
639 | ir->r = RID_INIT; | ||
640 | return r; | ||
641 | } | ||
642 | |||
632 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 643 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
633 | static Reg ra_restore(ASMState *as, IRRef ref) | 644 | static Reg ra_restore(ASMState *as, IRRef ref) |
634 | { | 645 | { |
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc) | |||
1008 | 1019 | ||
1009 | /* Arch-specific field offsets. */ | 1020 | /* Arch-specific field offsets. */ |
1010 | static const uint8_t field_ofs[IRFL__MAX+1] = { | 1021 | static const uint8_t field_ofs[IRFL__MAX+1] = { |
1011 | #define FLOFS(name, type, field) (uint8_t)offsetof(type, field), | 1022 | #define FLOFS(name, ofs) (uint8_t)(ofs), |
1012 | IRFLDEF(FLOFS) | 1023 | IRFLDEF(FLOFS) |
1013 | #undef FLOFS | 1024 | #undef FLOFS |
1014 | 0 | 1025 | 0 |
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
1129 | { | 1140 | { |
1130 | IRIns *irr; | 1141 | IRIns *irr; |
1131 | lua_assert(ir->o == IR_STRREF); | 1142 | lua_assert(ir->o == IR_STRREF); |
1132 | as->mrm.idx = as->mrm.base = RID_NONE; | 1143 | as->mrm.base = as->mrm.idx = RID_NONE; |
1133 | as->mrm.scale = XM_SCALE1; | 1144 | as->mrm.scale = XM_SCALE1; |
1134 | as->mrm.ofs = sizeof(GCstr); | 1145 | as->mrm.ofs = sizeof(GCstr); |
1135 | if (irref_isk(ir->op1)) { | 1146 | if (irref_isk(ir->op1)) { |
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
1158 | } | 1169 | } |
1159 | } | 1170 | } |
1160 | 1171 | ||
1172 | static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow) | ||
1173 | { | ||
1174 | if (ir->o == IR_KPTR) { | ||
1175 | as->mrm.ofs = ir->i; | ||
1176 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
1177 | } else { | ||
1178 | lua_assert(ir->o == IR_STRREF); | ||
1179 | asm_fusestrref(as, ir, allow); | ||
1180 | } | ||
1181 | } | ||
1182 | |||
1161 | /* Fuse load into memory operand. */ | 1183 | /* Fuse load into memory operand. */ |
1162 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | 1184 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) |
1163 | { | 1185 | { |
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1172 | return RID_MRM; | 1194 | return RID_MRM; |
1173 | } | 1195 | } |
1174 | if (ir->o == IR_KNUM) { | 1196 | if (ir->o == IR_KNUM) { |
1197 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; | ||
1175 | lua_assert(allow != RSET_EMPTY); | 1198 | lua_assert(allow != RSET_EMPTY); |
1176 | if (!(as->freeset & ~as->modset & RSET_FPR)) { | 1199 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ |
1177 | as->mrm.ofs = ptr2addr(ir_knum(ir)); | 1200 | as->mrm.ofs = ptr2addr(ir_knum(ir)); |
1178 | as->mrm.base = as->mrm.idx = RID_NONE; | 1201 | as->mrm.base = as->mrm.idx = RID_NONE; |
1179 | return RID_MRM; | 1202 | return RID_MRM; |
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1188 | return RID_MRM; | 1211 | return RID_MRM; |
1189 | } | 1212 | } |
1190 | } else if (ir->o == IR_FLOAD) { | 1213 | } else if (ir->o == IR_FLOAD) { |
1191 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ | 1214 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ |
1192 | if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { | 1215 | if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && |
1216 | noconflict(as, ref, IR_FSTORE)) { | ||
1193 | asm_fusefref(as, ir, xallow); | 1217 | asm_fusefref(as, ir, xallow); |
1194 | return RID_MRM; | 1218 | return RID_MRM; |
1195 | } | 1219 | } |
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1199 | return RID_MRM; | 1223 | return RID_MRM; |
1200 | } | 1224 | } |
1201 | } else if (ir->o == IR_XLOAD) { | 1225 | } else if (ir->o == IR_XLOAD) { |
1202 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). | 1226 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). |
1203 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). | 1227 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). |
1204 | */ | 1228 | */ |
1205 | if (irt_isint(ir->t)) { | 1229 | if (irt_isint(ir->t) || irt_isaddr(ir->t)) { |
1206 | asm_fusestrref(as, IR(ir->op1), xallow); | 1230 | asm_fusexref(as, IR(ir->op1), xallow); |
1207 | return RID_MRM; | 1231 | return RID_MRM; |
1208 | } | 1232 | } |
1209 | } | 1233 | } |
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1214 | return ra_allocref(as, ref, allow); | 1238 | return ra_allocref(as, ref, allow); |
1215 | } | 1239 | } |
1216 | 1240 | ||
1241 | /* -- Calls --------------------------------------------------------------- */ | ||
1242 | |||
1243 | /* Generate a call to a C function. */ | ||
1244 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
1245 | { | ||
1246 | RegSet allow = RSET_ALL; | ||
1247 | uint32_t n, nargs = CCI_NARGS(ci); | ||
1248 | int32_t ofs = 0; | ||
1249 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ | ||
1250 | emit_call(as, ci->func); | ||
1251 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
1252 | #if LJ_64 | ||
1253 | #error "NYI: 64 bit mode call argument setup" | ||
1254 | #endif | ||
1255 | IRIns *ir = IR(args[n]); | ||
1256 | if (irt_isnum(ir->t)) { | ||
1257 | if ((ofs & 4) && irref_isk(args[n])) { | ||
1258 | /* Split stores for unaligned FP consts. */ | ||
1259 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | ||
1260 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); | ||
1261 | } else { | ||
1262 | Reg r; | ||
1263 | if ((allow & RSET_FPR) == RSET_EMPTY) | ||
1264 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
1265 | r = ra_alloc1(as, args[n], allow & RSET_FPR); | ||
1266 | allow &= ~RID2RSET(r); | ||
1267 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); | ||
1268 | } | ||
1269 | ofs += 8; | ||
1270 | } else { | ||
1271 | if ((ci->flags & CCI_FASTCALL) && n < 2) { | ||
1272 | Reg r = n == 0 ? RID_ECX : RID_EDX; | ||
1273 | if (args[n] < ASMREF_TMP1) { | ||
1274 | emit_loadi(as, r, ir->i); | ||
1275 | } else { | ||
1276 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
1277 | allow &= ~RID2RSET(r); | ||
1278 | if (ra_hasreg(ir->r)) | ||
1279 | emit_movrr(as, r, ir->r); | ||
1280 | else | ||
1281 | ra_allocref(as, args[n], RID2RSET(r)); | ||
1282 | } | ||
1283 | } else { | ||
1284 | if (args[n] < ASMREF_TMP1) { | ||
1285 | emit_movmroi(as, RID_ESP, ofs, ir->i); | ||
1286 | } else { | ||
1287 | Reg r; | ||
1288 | if ((allow & RSET_GPR) == RSET_EMPTY) | ||
1289 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
1290 | r = ra_alloc1(as, args[n], allow & RSET_GPR); | ||
1291 | allow &= ~RID2RSET(r); | ||
1292 | emit_movtomro(as, r, RID_ESP, ofs); | ||
1293 | } | ||
1294 | ofs += 4; | ||
1295 | } | ||
1296 | } | ||
1297 | } | ||
1298 | } | ||
1299 | |||
1300 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
1301 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
1302 | { | ||
1303 | RegSet drop = RSET_SCRATCH; | ||
1304 | if ((ci->flags & CCI_NOFPRCLOBBER)) | ||
1305 | drop &= ~RSET_FPR; | ||
1306 | if (ra_hasreg(ir->r)) | ||
1307 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
1308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
1309 | if (ra_used(ir)) { | ||
1310 | if (irt_isnum(ir->t)) { | ||
1311 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | ||
1312 | #if LJ_64 | ||
1313 | if ((ci->flags & CCI_CASTU64)) { | ||
1314 | Reg dest = ir->r; | ||
1315 | if (ra_hasreg(dest)) { | ||
1316 | ra_free(as, dest); | ||
1317 | ra_modified(as, dest); | ||
1318 | emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ | ||
1319 | } else { | ||
1320 | emit_movrmro(as, RID_RET, RID_ESP, ofs); | ||
1321 | } | ||
1322 | } else { | ||
1323 | ra_destreg(as, ir, RID_FPRET); | ||
1324 | } | ||
1325 | #else | ||
1326 | /* Number result is in x87 st0 for x86 calling convention. */ | ||
1327 | Reg dest = ir->r; | ||
1328 | if (ra_hasreg(dest)) { | ||
1329 | ra_free(as, dest); | ||
1330 | ra_modified(as, dest); | ||
1331 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | ||
1332 | } | ||
1333 | if ((ci->flags & CCI_CASTU64)) { | ||
1334 | emit_movtomro(as, RID_RET, RID_ESP, ofs); | ||
1335 | emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); | ||
1336 | } else { | ||
1337 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1338 | } | ||
1339 | #endif | ||
1340 | } else { | ||
1341 | lua_assert(!irt_ispri(ir->t)); | ||
1342 | ra_destreg(as, ir, RID_RET); | ||
1343 | } | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | /* Collect arguments from CALL* and ARG instructions. */ | ||
1348 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1349 | const CCallInfo *ci, IRRef *args) | ||
1350 | { | ||
1351 | uint32_t n = CCI_NARGS(ci); | ||
1352 | lua_assert(n <= CCI_NARGS_MAX); | ||
1353 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1354 | while (n-- > 1) { | ||
1355 | ir = IR(ir->op1); | ||
1356 | lua_assert(ir->o == IR_CARG); | ||
1357 | args[n] = ir->op2; | ||
1358 | } | ||
1359 | args[0] = ir->op1; | ||
1360 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
1361 | } | ||
1362 | |||
1363 | static void asm_call(ASMState *as, IRIns *ir) | ||
1364 | { | ||
1365 | IRRef args[CCI_NARGS_MAX]; | ||
1366 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1367 | asm_collectargs(as, ir, ci, args); | ||
1368 | asm_setupresult(as, ir, ci); | ||
1369 | asm_gencall(as, ci, args); | ||
1370 | } | ||
1371 | |||
1217 | /* -- Type conversions ---------------------------------------------------- */ | 1372 | /* -- Type conversions ---------------------------------------------------- */ |
1218 | 1373 | ||
1219 | static void asm_tonum(ASMState *as, IRIns *ir) | 1374 | static void asm_tonum(ASMState *as, IRIns *ir) |
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
1260 | 1415 | ||
1261 | static void asm_strto(ASMState *as, IRIns *ir) | 1416 | static void asm_strto(ASMState *as, IRIns *ir) |
1262 | { | 1417 | { |
1263 | Reg str; | ||
1264 | int32_t ofs; | ||
1265 | RegSet drop = RSET_SCRATCH; | ||
1266 | /* Force a spill slot for the destination register (if any). */ | 1418 | /* Force a spill slot for the destination register (if any). */ |
1419 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; | ||
1420 | IRRef args[2]; | ||
1421 | RegSet drop = RSET_SCRATCH; | ||
1267 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) | 1422 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) |
1268 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ | 1423 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ |
1269 | ra_evictset(as, drop); | 1424 | ra_evictset(as, drop); |
1270 | asm_guardcc(as, CC_E); | 1425 | asm_guardcc(as, CC_E); |
1271 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | 1426 | emit_rr(as, XO_TEST, RID_RET, RID_RET); |
1272 | /* int lj_str_numconv(const char *s, TValue *n) */ | 1427 | args[0] = ir->op1; |
1273 | emit_call(as, lj_str_numconv); | 1428 | args[1] = ASMREF_TMP1; |
1274 | ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | 1429 | asm_gencall(as, ci, args); |
1275 | if (ofs == 0) { | 1430 | /* Store the result to the spill slot or slots SPS_TEMP1/2. */ |
1276 | emit_setargr(as, 2, RID_ESP); | 1431 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), |
1277 | } else { | 1432 | RID_ESP, sps_scale(ir->s)); |
1278 | emit_setargr(as, 2, RID_RET); | ||
1279 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs); | ||
1280 | } | ||
1281 | emit_setargr(as, 1, RID_RET); | ||
1282 | str = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1283 | emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr)); | ||
1284 | } | 1433 | } |
1285 | 1434 | ||
1286 | static void asm_tostr(ASMState *as, IRIns *ir) | 1435 | static void asm_tostr(ASMState *as, IRIns *ir) |
1287 | { | 1436 | { |
1288 | IRIns *irl = IR(ir->op1); | 1437 | IRIns *irl = IR(ir->op1); |
1289 | ra_destreg(as, ir, RID_RET); | 1438 | IRRef args[2]; |
1290 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1439 | args[0] = ASMREF_L; |
1291 | as->gcsteps++; | 1440 | as->gcsteps++; |
1292 | if (irt_isnum(irl->t)) { | 1441 | if (irt_isnum(irl->t)) { |
1293 | /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ | 1442 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; |
1294 | emit_call(as, lj_str_fromnum); | 1443 | args[1] = ASMREF_TMP1; |
1295 | emit_setargr(as, 1, RID_RET); | 1444 | asm_setupresult(as, ir, ci); |
1296 | emit_getgl(as, RID_RET, jit_L); | 1445 | asm_gencall(as, ci, args); |
1297 | emit_setargr(as, 2, RID_RET); | 1446 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), |
1298 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); | 1447 | RID_ESP, ra_spill(as, irl)); |
1299 | } else { | 1448 | } else { |
1300 | /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ | 1449 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; |
1301 | emit_call(as, lj_str_fromint); | 1450 | args[1] = ir->op1; |
1302 | emit_setargr(as, 1, RID_RET); | 1451 | asm_setupresult(as, ir, ci); |
1303 | emit_getgl(as, RID_RET, jit_L); | 1452 | asm_gencall(as, ci, args); |
1304 | emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
1305 | } | 1453 | } |
1306 | } | 1454 | } |
1307 | 1455 | ||
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir) | |||
1330 | lua_assert(!irt_isnil(ir->t)); | 1478 | lua_assert(!irt_isnil(ir->t)); |
1331 | return irt_type(ir->t)-IRT_FALSE; | 1479 | return irt_type(ir->t)-IRT_FALSE; |
1332 | } else { | 1480 | } else { |
1333 | lua_assert(irt_isaddr(ir->t)); | 1481 | lua_assert(irt_isgcv(ir->t)); |
1334 | lo = u32ptr(ir_kgc(ir)); | 1482 | lo = u32ptr(ir_kgc(ir)); |
1335 | hi = lo - 0x04c11db7; | 1483 | hi = lo - 0x04c11db7; |
1336 | } | 1484 | } |
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1517 | 1665 | ||
1518 | static void asm_newref(ASMState *as, IRIns *ir) | 1666 | static void asm_newref(ASMState *as, IRIns *ir) |
1519 | { | 1667 | { |
1520 | IRRef keyref = ir->op2; | 1668 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
1521 | IRIns *irkey = IR(keyref); | 1669 | IRRef args[3]; |
1522 | RegSet allow = RSET_GPR; | 1670 | IRIns *irkey; |
1523 | Reg tab, tmp; | 1671 | Reg tmp; |
1524 | ra_destreg(as, ir, RID_RET); | 1672 | args[0] = ASMREF_L; |
1525 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1673 | args[1] = ir->op1; |
1526 | tab = ra_alloc1(as, ir->op1, allow); | 1674 | args[2] = ASMREF_TMP1; |
1527 | tmp = ra_scratch(as, rset_clear(allow, tab)); | 1675 | asm_setupresult(as, ir, ci); |
1528 | /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ | 1676 | asm_gencall(as, ci, args); |
1529 | emit_call(as, lj_tab_newkey); | 1677 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
1530 | emit_setargr(as, 1, tmp); | 1678 | irkey = IR(ir->op2); |
1531 | emit_setargr(as, 2, tab); | ||
1532 | emit_getgl(as, tmp, jit_L); | ||
1533 | if (irt_isnum(irkey->t)) { | 1679 | if (irt_isnum(irkey->t)) { |
1534 | /* For numbers use the constant itself or a spill slot as a TValue. */ | 1680 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
1535 | if (irref_isk(keyref)) { | 1681 | if (irref_isk(ir->op2)) |
1536 | emit_setargp(as, 3, ir_knum(irkey)); | 1682 | emit_loada(as, tmp, ir_knum(irkey)); |
1537 | } else { | 1683 | else |
1538 | emit_setargr(as, 3, tmp); | ||
1539 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); | 1684 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); |
1540 | } | ||
1541 | } else { | 1685 | } else { |
1542 | /* Otherwise use g->tmptv to hold the TValue. */ | 1686 | /* Otherwise use g->tmptv to hold the TValue. */ |
1543 | lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); | 1687 | if (!irref_isk(ir->op2)) { |
1544 | emit_setargr(as, 3, tmp); | 1688 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); |
1545 | if (!irref_isk(keyref)) { | ||
1546 | Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp)); | ||
1547 | emit_movtomro(as, src, tmp, 0); | 1689 | emit_movtomro(as, src, tmp, 0); |
1548 | } else if (!irt_ispri(irkey->t)) { | 1690 | } else if (!irt_ispri(irkey->t)) { |
1549 | emit_movmroi(as, tmp, 0, irkey->i); | 1691 | emit_movmroi(as, tmp, 0, irkey->i); |
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
1600 | 1742 | ||
1601 | /* -- Loads and stores ---------------------------------------------------- */ | 1743 | /* -- Loads and stores ---------------------------------------------------- */ |
1602 | 1744 | ||
1603 | static void asm_fload(ASMState *as, IRIns *ir) | 1745 | static void asm_fxload(ASMState *as, IRIns *ir) |
1604 | { | 1746 | { |
1605 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1747 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1606 | x86Op xo; | 1748 | x86Op xo; |
1607 | asm_fusefref(as, ir, RSET_GPR); | 1749 | if (ir->o == IR_FLOAD) |
1750 | asm_fusefref(as, ir, RSET_GPR); | ||
1751 | else | ||
1752 | asm_fusexref(as, IR(ir->op1), RSET_GPR); | ||
1753 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
1608 | switch (irt_type(ir->t)) { | 1754 | switch (irt_type(ir->t)) { |
1609 | case IRT_I8: xo = XO_MOVSXb; break; | 1755 | case IRT_I8: xo = XO_MOVSXb; break; |
1610 | case IRT_U8: xo = XO_MOVZXb; break; | 1756 | case IRT_U8: xo = XO_MOVZXb; break; |
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1731 | } | 1877 | } |
1732 | } | 1878 | } |
1733 | 1879 | ||
1734 | static void asm_xload(ASMState *as, IRIns *ir) | 1880 | /* -- Allocations --------------------------------------------------------- */ |
1735 | { | ||
1736 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1737 | x86Op xo; | ||
1738 | asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */ | ||
1739 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
1740 | switch (irt_type(ir->t)) { | ||
1741 | case IRT_I8: xo = XO_MOVSXb; break; | ||
1742 | case IRT_U8: xo = XO_MOVZXb; break; | ||
1743 | case IRT_I16: xo = XO_MOVSXw; break; | ||
1744 | case IRT_U16: xo = XO_MOVZXw; break; | ||
1745 | default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break; | ||
1746 | } | ||
1747 | emit_mrm(as, xo, dest, RID_MRM); | ||
1748 | } | ||
1749 | |||
1750 | /* -- String ops ---------------------------------------------------------- */ | ||
1751 | 1881 | ||
1752 | static void asm_snew(ASMState *as, IRIns *ir) | 1882 | static void asm_snew(ASMState *as, IRIns *ir) |
1753 | { | 1883 | { |
1754 | RegSet allow = RSET_GPR; | 1884 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; |
1755 | Reg left, right; | 1885 | IRRef args[3]; |
1756 | IRIns *irl; | 1886 | args[0] = ASMREF_L; |
1757 | ra_destreg(as, ir, RID_RET); | 1887 | args[1] = ir->op1; |
1758 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1888 | args[2] = ir->op2; |
1759 | irl = IR(ir->op1); | ||
1760 | left = irl->r; | ||
1761 | right = IR(ir->op2)->r; | ||
1762 | if (ra_noreg(left)) { | ||
1763 | lua_assert(irl->o == IR_STRREF); | ||
1764 | /* Get register only for non-const STRREF. */ | ||
1765 | if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) { | ||
1766 | if (ra_hasreg(right)) rset_clear(allow, right); | ||
1767 | left = ra_allocref(as, ir->op1, allow); | ||
1768 | } | ||
1769 | } | ||
1770 | if (ra_noreg(right) && !irref_isk(ir->op2)) { | ||
1771 | if (ra_hasreg(left)) rset_clear(allow, left); | ||
1772 | right = ra_allocref(as, ir->op2, allow); | ||
1773 | } | ||
1774 | /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */ | ||
1775 | emit_call(as, lj_str_new); | ||
1776 | emit_setargr(as, 1, RID_RET); | ||
1777 | emit_getgl(as, RID_RET, jit_L); | ||
1778 | if (ra_noreg(left)) /* Use immediate for const STRREF. */ | ||
1779 | emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i + | ||
1780 | (int32_t)sizeof(GCstr)); | ||
1781 | else | ||
1782 | emit_setargr(as, 2, left); | ||
1783 | if (ra_noreg(right)) | ||
1784 | emit_setargi(as, 3, IR(ir->op2)->i); | ||
1785 | else | ||
1786 | emit_setargr(as, 3, right); | ||
1787 | as->gcsteps++; | 1889 | as->gcsteps++; |
1890 | asm_setupresult(as, ir, ci); | ||
1891 | asm_gencall(as, ci, args); | ||
1788 | } | 1892 | } |
1789 | 1893 | ||
1790 | /* -- Table ops ----------------------------------------------------------- */ | ||
1791 | |||
1792 | static void asm_tnew(ASMState *as, IRIns *ir) | 1894 | static void asm_tnew(ASMState *as, IRIns *ir) |
1793 | { | 1895 | { |
1794 | ra_destreg(as, ir, RID_RET); | 1896 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; |
1795 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1897 | IRRef args[2]; |
1796 | /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ | 1898 | args[0] = ASMREF_L; |
1797 | emit_call(as, lj_tab_new); | 1899 | args[1] = ASMREF_TMP1; |
1798 | emit_setargr(as, 1, RID_RET); | ||
1799 | emit_setargi(as, 2, ir->op1); | ||
1800 | emit_setargi(as, 3, ir->op2); | ||
1801 | emit_getgl(as, RID_RET, jit_L); | ||
1802 | as->gcsteps++; | 1900 | as->gcsteps++; |
1901 | asm_setupresult(as, ir, ci); | ||
1902 | asm_gencall(as, ci, args); | ||
1903 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24)); | ||
1803 | } | 1904 | } |
1804 | 1905 | ||
1805 | static void asm_tdup(ASMState *as, IRIns *ir) | 1906 | static void asm_tdup(ASMState *as, IRIns *ir) |
1806 | { | 1907 | { |
1807 | ra_destreg(as, ir, RID_RET); | 1908 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; |
1808 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1909 | IRRef args[2]; |
1809 | /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ | 1910 | args[0] = ASMREF_L; |
1810 | emit_call(as, lj_tab_dup); | 1911 | args[1] = ir->op1; |
1811 | emit_setargr(as, 1, RID_RET); | ||
1812 | emit_setargp(as, 2, ir_kgc(IR(ir->op1))); | ||
1813 | emit_getgl(as, RID_RET, jit_L); | ||
1814 | as->gcsteps++; | 1912 | as->gcsteps++; |
1913 | asm_setupresult(as, ir, ci); | ||
1914 | asm_gencall(as, ci, args); | ||
1815 | } | 1915 | } |
1816 | 1916 | ||
1817 | static void asm_tlen(ASMState *as, IRIns *ir) | 1917 | /* -- Write barriers ------------------------------------------------------ */ |
1818 | { | ||
1819 | ra_destreg(as, ir, RID_RET); | ||
1820 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1821 | emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */ | ||
1822 | emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
1823 | } | ||
1824 | 1918 | ||
1825 | static void asm_tbar(ASMState *as, IRIns *ir) | 1919 | static void asm_tbar(ASMState *as, IRIns *ir) |
1826 | { | 1920 | { |
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1839 | 1933 | ||
1840 | static void asm_obar(ASMState *as, IRIns *ir) | 1934 | static void asm_obar(ASMState *as, IRIns *ir) |
1841 | { | 1935 | { |
1842 | RegSet allow = RSET_GPR; | 1936 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; |
1843 | Reg obj, val; | 1937 | IRRef args[2]; |
1844 | GCobj *valp; | ||
1845 | MCLabel l_end; | 1938 | MCLabel l_end; |
1846 | int32_t ofs; | 1939 | Reg obj; |
1847 | ra_evictset(as, RSET_SCRATCH); | ||
1848 | if (irref_isk(ir->op2)) { | ||
1849 | valp = ir_kgc(IR(ir->op2)); | ||
1850 | val = RID_NONE; | ||
1851 | } else { | ||
1852 | valp = NULL; | ||
1853 | val = ra_alloc1(as, ir->op2, allow); | ||
1854 | rset_clear(allow, val); | ||
1855 | } | ||
1856 | obj = ra_alloc1(as, ir->op1, allow); | ||
1857 | l_end = emit_label(as); | ||
1858 | /* No need for other object barriers (yet). */ | 1940 | /* No need for other object barriers (yet). */ |
1859 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1941 | lua_assert(IR(ir->op1)->o == IR_UREFC); |
1860 | ofs = -(int32_t)offsetof(GCupval, tv); | 1942 | l_end = emit_label(as); |
1861 | /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ | 1943 | args[0] = ASMREF_TMP1; |
1862 | emit_call(as, lj_gc_barrieruv); | 1944 | args[1] = ir->op1; |
1863 | if (ofs == 0) { | 1945 | asm_gencall(as, ci, args); |
1864 | emit_setargr(as, 2, obj); | 1946 | emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J)); |
1865 | } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { | 1947 | obj = IR(ir->op1)->r; |
1866 | emit_setargr(as, 2, obj); | ||
1867 | emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs); | ||
1868 | } else { | ||
1869 | emit_setargr(as, 2, RID_RET); | ||
1870 | emit_rmro(as, XO_LEA, RID_RET, obj, ofs); | ||
1871 | } | ||
1872 | emit_setargp(as, 1, J2G(as->J)); | ||
1873 | if (valp) | ||
1874 | emit_setargp(as, 3, valp); | ||
1875 | else | ||
1876 | emit_setargr(as, 3, val); | ||
1877 | emit_sjcc(as, CC_Z, l_end); | 1948 | emit_sjcc(as, CC_Z, l_end); |
1878 | emit_i8(as, LJ_GC_WHITES); | 1949 | emit_i8(as, LJ_GC_WHITES); |
1879 | if (valp) | 1950 | if (irref_isk(ir->op2)) { |
1880 | emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); | 1951 | GCobj *vp = ir_kgc(IR(ir->op2)); |
1881 | else | 1952 | emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked); |
1953 | } else { | ||
1954 | Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj)); | ||
1882 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); | 1955 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); |
1956 | } | ||
1883 | emit_sjcc(as, CC_Z, l_end); | 1957 | emit_sjcc(as, CC_Z, l_end); |
1884 | emit_i8(as, LJ_GC_BLACK); | 1958 | emit_i8(as, LJ_GC_BLACK); |
1885 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, | 1959 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, |
1886 | ofs + (int32_t)offsetof(GChead, marked)); | 1960 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); |
1887 | } | 1961 | } |
1888 | 1962 | ||
1889 | /* -- FP/int arithmetic and logic operations ------------------------------ */ | 1963 | /* -- FP/int arithmetic and logic operations ------------------------------ */ |
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2260 | } | 2334 | } |
2261 | } | 2335 | } |
2262 | emit_mrm(as, XO_UCOMISD, left, right); | 2336 | emit_mrm(as, XO_UCOMISD, left, right); |
2263 | } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { | 2337 | } else { |
2264 | IRRef lref = ir->op1, rref = ir->op2; | 2338 | IRRef lref = ir->op1, rref = ir->op2; |
2265 | IROp leftop = (IROp)(IR(lref)->o); | 2339 | IROp leftop = (IROp)(IR(lref)->o); |
2266 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 2340 | lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); |
2267 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2341 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
2268 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2342 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
2269 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ | 2343 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ |
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2294 | } else { | 2368 | } else { |
2295 | Reg left; | 2369 | Reg left; |
2296 | if (opisfusableload((IROp)irl->o) && | 2370 | if (opisfusableload((IROp)irl->o) && |
2297 | ((irt_isi8(irl->t) && checki8(imm)) || | 2371 | ((irt_isu8(irl->t) && checku8(imm)) || |
2298 | (irt_isu8(irl->t) && checku8(imm)))) { | 2372 | ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) || |
2299 | /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 | 2373 | (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) { |
2300 | ** loads are handled here. The IRT_I16/IRT_U16 loads should never be | 2374 | /* Only the IRT_INT case is fused by asm_fuseload. |
2301 | ** fused, since cmp word [mem], imm16 has a length-changing prefix. | 2375 | ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads |
2376 | ** are handled here. | ||
2377 | ** Note that cmp word [mem], imm16 should not be generated, | ||
2378 | ** since it has a length-changing prefix. Compares of a word | ||
2379 | ** against a sign-extended imm8 are ok, however. | ||
2302 | */ | 2380 | */ |
2303 | IRType1 origt = irl->t; /* Temporarily flip types. */ | 2381 | IRType1 origt = irl->t; /* Temporarily flip types. */ |
2304 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; | 2382 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; |
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2307 | if (left == RID_MRM) { /* Fusion succeeded? */ | 2385 | if (left == RID_MRM) { /* Fusion succeeded? */ |
2308 | asm_guardcc(as, cc); | 2386 | asm_guardcc(as, cc); |
2309 | emit_i8(as, imm); | 2387 | emit_i8(as, imm); |
2310 | emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); | 2388 | emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? |
2389 | XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM); | ||
2311 | return; | 2390 | return; |
2312 | } /* Otherwise handle register case as usual. */ | 2391 | } /* Otherwise handle register case as usual. */ |
2313 | } else { | 2392 | } else { |
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2337 | asm_guardcc(as, cc); | 2416 | asm_guardcc(as, cc); |
2338 | emit_mrm(as, XO_CMP, left, right); | 2417 | emit_mrm(as, XO_CMP, left, right); |
2339 | } | 2418 | } |
2340 | } else { /* Handle ordered string compares. */ | ||
2341 | RegSet allow = RSET_GPR; | ||
2342 | /* This assumes lj_str_cmp never uses any SSE registers. */ | ||
2343 | ra_evictset(as, (RSET_SCRATCH & RSET_GPR)); | ||
2344 | asm_guardcc(as, cc); | ||
2345 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | ||
2346 | emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */ | ||
2347 | if (irref_isk(ir->op1)) { | ||
2348 | emit_setargi(as, 1, IR(ir->op1)->i); | ||
2349 | } else { | ||
2350 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
2351 | rset_clear(allow, left); | ||
2352 | emit_setargr(as, 1, left); | ||
2353 | } | ||
2354 | if (irref_isk(ir->op2)) { | ||
2355 | emit_setargi(as, 2, IR(ir->op2)->i); | ||
2356 | } else { | ||
2357 | Reg right = ra_alloc1(as, ir->op2, allow); | ||
2358 | emit_setargr(as, 2, right); | ||
2359 | } | ||
2360 | } | 2419 | } |
2361 | } | 2420 | } |
2362 | 2421 | ||
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2366 | /* -- GC handling --------------------------------------------------------- */ | 2425 | /* -- GC handling --------------------------------------------------------- */ |
2367 | 2426 | ||
2368 | /* Sync all live GC values to Lua stack slots. */ | 2427 | /* Sync all live GC values to Lua stack slots. */ |
2369 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | 2428 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) |
2370 | { | 2429 | { |
2430 | /* Some care must be taken when allocating registers here, since this is | ||
2431 | ** not part of the fast path. All scratch registers are evicted in the | ||
2432 | ** fast path, so it's easiest to force allocation from scratch registers | ||
2433 | ** only. This avoids register allocation state unification. | ||
2434 | */ | ||
2435 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); | ||
2371 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | 2436 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; |
2372 | BCReg s, nslots = snap->nslots; | 2437 | BCReg s, nslots = snap->nslots; |
2373 | for (s = 0; s < nslots; s++) { | 2438 | for (s = 0; s < nslots; s++) { |
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | |||
2392 | /* Check GC threshold and do one or more GC steps. */ | 2457 | /* Check GC threshold and do one or more GC steps. */ |
2393 | static void asm_gc_check(ASMState *as, SnapShot *snap) | 2458 | static void asm_gc_check(ASMState *as, SnapShot *snap) |
2394 | { | 2459 | { |
2460 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
2461 | IRRef args[2]; | ||
2395 | MCLabel l_end; | 2462 | MCLabel l_end; |
2396 | const BCIns *pc; | 2463 | Reg base, lstate, tmp; |
2397 | Reg tmp, base; | ||
2398 | RegSet drop = RSET_SCRATCH; | 2464 | RegSet drop = RSET_SCRATCH; |
2399 | /* Must evict BASE because the stack may be reallocated by the GC. */ | 2465 | if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */ |
2400 | if (ra_hasreg(IR(REF_BASE)->r)) | 2466 | drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */ |
2401 | drop |= RID2RSET(IR(REF_BASE)->r); | ||
2402 | ra_evictset(as, drop); | 2467 | ra_evictset(as, drop); |
2403 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET)); | ||
2404 | l_end = emit_label(as); | 2468 | l_end = emit_label(as); |
2405 | /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ | 2469 | args[0] = ASMREF_L; |
2406 | emit_call(as, lj_gc_step_jit); | 2470 | args[1] = ASMREF_TMP1; |
2407 | emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); | 2471 | asm_gencall(as, ci, args); |
2408 | emit_setargr(as, 1, RID_RET); | 2472 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
2409 | emit_setargi(as, 3, (int32_t)as->gcsteps); | 2473 | emit_loadi(as, tmp, (int32_t)as->gcsteps); |
2410 | emit_getgl(as, RID_RET, jit_L); | 2474 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
2411 | pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; | 2475 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
2412 | emit_setargp(as, 2, pc); | 2476 | (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); |
2413 | asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); | 2477 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
2414 | if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ | 2478 | lstate = IR(ASMREF_L)->r; |
2415 | ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ | 2479 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); |
2480 | /* It's ok if lstate is already in a non-scratch reg. But all allocations | ||
2481 | ** in the non-fast path must use a scratch reg. See comment above. | ||
2482 | */ | ||
2483 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); | ||
2484 | emit_movtomro(as, base, lstate, offsetof(lua_State, base)); | ||
2485 | asm_gc_sync(as, snap, base); | ||
2486 | /* BASE/L get restored anyway, better do it inside the slow path. */ | ||
2487 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); | ||
2488 | if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r)) | ||
2489 | ra_restore(as, ASMREF_L); | ||
2416 | /* Jump around GC step if GC total < GC threshold. */ | 2490 | /* Jump around GC step if GC total < GC threshold. */ |
2417 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); | 2491 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); |
2418 | emit_sjcc(as, CC_B, l_end); | 2492 | emit_sjcc(as, CC_B, l_end); |
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as) | |||
2666 | { | 2740 | { |
2667 | int32_t spadj; | 2741 | int32_t spadj; |
2668 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | 2742 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); |
2669 | spadj = sps_adjust(as); | 2743 | spadj = sps_adjust(as->evenspill); |
2670 | as->T->spadjust = (uint16_t)spadj; | 2744 | as->T->spadjust = (uint16_t)spadj; |
2671 | emit_addptr(as, RID_ESP, -spadj); | 2745 | emit_addptr(as, RID_ESP, -spadj); |
2672 | } | 2746 | } |
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as) | |||
2676 | { | 2750 | { |
2677 | IRIns *ir = IR(REF_BASE); | 2751 | IRIns *ir = IR(REF_BASE); |
2678 | Reg r = ir->r; | 2752 | Reg r = ir->r; |
2679 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 2753 | lua_assert(!ra_hasspill(ir->s)); |
2680 | ra_free(as, r); | 2754 | if (ra_hasreg(r)) { |
2681 | if (r != RID_BASE) { | 2755 | ra_free(as, r); |
2682 | ra_scratch(as, RID2RSET(RID_BASE)); | 2756 | if (r != RID_BASE) { |
2683 | emit_rr(as, XO_MOV, r, RID_BASE); | 2757 | ra_scratch(as, RID2RSET(RID_BASE)); |
2758 | emit_rr(as, XO_MOV, r, RID_BASE); | ||
2759 | } | ||
2684 | } | 2760 | } |
2685 | } | 2761 | } |
2686 | 2762 | ||
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as) | |||
2749 | } | 2825 | } |
2750 | 2826 | ||
2751 | /* Calculate stack frame adjustment. */ | 2827 | /* Calculate stack frame adjustment. */ |
2752 | spadj = sps_adjust(as); | 2828 | spadj = sps_adjust(as->evenspill); |
2753 | spdelta = spadj - (int32_t)as->parent->spadjust; | 2829 | spdelta = spadj - (int32_t)as->parent->spadjust; |
2754 | if (spdelta < 0) { /* Don't shrink the stack frame. */ | 2830 | if (spdelta < 0) { /* Don't shrink the stack frame. */ |
2755 | spadj = (int32_t)as->parent->spadjust; | 2831 | spadj = (int32_t)as->parent->spadjust; |
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as) | |||
2877 | GCfunc *fn = ir_kfunc(IR(ir->op2)); | 2953 | GCfunc *fn = ir_kfunc(IR(ir->op2)); |
2878 | if (isluafunc(fn)) { | 2954 | if (isluafunc(fn)) { |
2879 | BCReg fs = s + funcproto(fn)->framesize; | 2955 | BCReg fs = s + funcproto(fn)->framesize; |
2880 | newbase = s; | ||
2881 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2882 | if (fs > topslot) topslot = fs; | 2956 | if (fs > topslot) topslot = fs; |
2957 | if (s != 0) { | ||
2958 | newbase = s; | ||
2959 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2960 | } | ||
2883 | } | 2961 | } |
2884 | } | 2962 | } |
2885 | } | 2963 | } |
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3063 | 3141 | ||
3064 | /* Loads and stores. */ | 3142 | /* Loads and stores. */ |
3065 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; | 3143 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; |
3066 | case IR_FLOAD: asm_fload(as, ir); break; | 3144 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; |
3067 | case IR_SLOAD: asm_sload(as, ir); break; | 3145 | case IR_SLOAD: asm_sload(as, ir); break; |
3068 | case IR_XLOAD: asm_xload(as, ir); break; | ||
3069 | 3146 | ||
3070 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | 3147 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; |
3071 | case IR_FSTORE: asm_fstore(as, ir); break; | 3148 | case IR_FSTORE: asm_fstore(as, ir); break; |
3072 | 3149 | ||
3073 | /* String ops. */ | 3150 | /* Allocations. */ |
3074 | case IR_SNEW: asm_snew(as, ir); break; | 3151 | case IR_SNEW: asm_snew(as, ir); break; |
3075 | |||
3076 | /* Table ops. */ | ||
3077 | case IR_TNEW: asm_tnew(as, ir); break; | 3152 | case IR_TNEW: asm_tnew(as, ir); break; |
3078 | case IR_TDUP: asm_tdup(as, ir); break; | 3153 | case IR_TDUP: asm_tdup(as, ir); break; |
3079 | case IR_TLEN: asm_tlen(as, ir); break; | 3154 | |
3155 | /* Write barriers. */ | ||
3080 | case IR_TBAR: asm_tbar(as, ir); break; | 3156 | case IR_TBAR: asm_tbar(as, ir); break; |
3081 | case IR_OBAR: asm_obar(as, ir); break; | 3157 | case IR_OBAR: asm_obar(as, ir); break; |
3082 | 3158 | ||
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3092 | case IR_TOSTR: asm_tostr(as, ir); break; | 3168 | case IR_TOSTR: asm_tostr(as, ir); break; |
3093 | case IR_STRTO: asm_strto(as, ir); break; | 3169 | case IR_STRTO: asm_strto(as, ir); break; |
3094 | 3170 | ||
3171 | /* Calls. */ | ||
3172 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
3173 | case IR_CARG: break; | ||
3174 | |||
3095 | default: | 3175 | default: |
3096 | setintV(&as->J->errinfo, ir->o); | 3176 | setintV(&as->J->errinfo, ir->o); |
3097 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 3177 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); |
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3123 | IRRef i, nins; | 3203 | IRRef i, nins; |
3124 | int inloop; | 3204 | int inloop; |
3125 | 3205 | ||
3206 | ra_setup(as); | ||
3207 | |||
3126 | /* Clear reg/sp for constants. */ | 3208 | /* Clear reg/sp for constants. */ |
3127 | for (i = T->nk; i < REF_BIAS; i++) | 3209 | for (i = T->nk; i < REF_BIAS; i++) |
3128 | IR(i)->prev = REGSP_INIT; | 3210 | IR(i)->prev = REGSP_INIT; |
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3144 | as->curins = nins; | 3226 | as->curins = nins; |
3145 | 3227 | ||
3146 | inloop = 0; | 3228 | inloop = 0; |
3229 | as->evenspill = SPS_FIRST; | ||
3147 | for (i = REF_FIRST; i < nins; i++) { | 3230 | for (i = REF_FIRST; i < nins; i++) { |
3148 | IRIns *ir = IR(i); | 3231 | IRIns *ir = IR(i); |
3149 | switch (ir->o) { | 3232 | switch (ir->o) { |
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3166 | if (i == as->stopins+1 && ir->op1 == ir->op2) | 3249 | if (i == as->stopins+1 && ir->op1 == ir->op2) |
3167 | as->stopins++; | 3250 | as->stopins++; |
3168 | break; | 3251 | break; |
3252 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | ||
3253 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
3254 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | ||
3255 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | ||
3256 | as->evenspill = (int32_t)CCI_NARGS(ci); | ||
3257 | #if LJ_64 | ||
3258 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | ||
3259 | #else | ||
3260 | ir->prev = REGSP_HINT(RID_RET); | ||
3261 | #endif | ||
3262 | if (inloop) | ||
3263 | as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ? | ||
3264 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | ||
3265 | continue; | ||
3266 | } | ||
3169 | /* C calls evict all scratch regs and return results in RID_RET. */ | 3267 | /* C calls evict all scratch regs and return results in RID_RET. */ |
3170 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: | 3268 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: |
3171 | case IR_NEWREF: | 3269 | case IR_NEWREF: |
3172 | ir->prev = REGSP_HINT(RID_RET); | 3270 | ir->prev = REGSP_HINT(RID_RET); |
3173 | if (inloop) | 3271 | if (inloop) |
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3177 | if (inloop) | 3275 | if (inloop) |
3178 | as->modset = RSET_SCRATCH; | 3276 | as->modset = RSET_SCRATCH; |
3179 | break; | 3277 | break; |
3180 | /* Ordered string compares evict all integer scratch registers. */ | ||
3181 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
3182 | if (irt_isstr(ir->t) && inloop) | ||
3183 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
3184 | break; | ||
3185 | /* Non-constant shift counts need to be in RID_ECX. */ | 3278 | /* Non-constant shift counts need to be in RID_ECX. */ |
3186 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 3279 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
3187 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) | 3280 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) |
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3200 | } | 3293 | } |
3201 | ir->prev = REGSP_INIT; | 3294 | ir->prev = REGSP_INIT; |
3202 | } | 3295 | } |
3296 | if ((as->evenspill & 1)) | ||
3297 | as->oddspill = as->evenspill++; | ||
3298 | else | ||
3299 | as->oddspill = 0; | ||
3203 | } | 3300 | } |
3204 | 3301 | ||
3205 | /* -- Assembler core ------------------------------------------------------ */ | 3302 | /* -- Assembler core ------------------------------------------------------ */ |
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T) | |||
3263 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 3360 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
3264 | 3361 | ||
3265 | /* Setup register allocation. */ | 3362 | /* Setup register allocation. */ |
3266 | ra_setup(as); | ||
3267 | asm_setup_regsp(as, T); | 3363 | asm_setup_regsp(as, T); |
3268 | 3364 | ||
3269 | if (!as->loopref) { | 3365 | if (!as->loopref) { |