summaryrefslogtreecommitdiff
path: root/src/lj_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r--src/lj_asm.c65
1 files changed, 44 insertions, 21 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 6bb2b8c6..4c31a3e9 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -384,15 +384,23 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
384 emit_loadi(as, (r), ptr2addr((addr))) 384 emit_loadi(as, (r), ptr2addr((addr)))
385 385
386#if LJ_64 386#if LJ_64
387/* mov r, imm64 */ 387/* mov r, imm64 or shorter 32 bit extended load. */
388static void emit_loadu64(ASMState *as, Reg r, uint64_t i) 388static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
389{ 389{
390 MCode *p = as->mcp; 390 if (checku32(u64)) { /* 32 bit load clears upper 32 bits. */
391 *(uint64_t *)(p-8) = i; 391 emit_loadi(as, r, (int32_t)u64);
392 p[-9] = (MCode)(XI_MOVri+(r&7)); 392 } else if (checki32((int64_t)u64)) { /* Sign-extended 32 bit load. */
393 p[-10] = 0x48 + ((r>>3)&1); 393 MCode *p = as->mcp;
394 p -= 10; 394 *(int32_t *)(p-4) = (int32_t)u64;
395 as->mcp = p; 395 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
396 } else { /* Full-size 64 bit load. */
397 MCode *p = as->mcp;
398 *(uint64_t *)(p-8) = u64;
399 p[-9] = (MCode)(XI_MOVri+(r&7));
400 p[-10] = 0x48 + ((r>>3)&1);
401 p -= 10;
402 as->mcp = p;
403 }
396} 404}
397#endif 405#endif
398 406
@@ -618,6 +626,10 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
618 } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ 626 } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
619 lua_assert(irt_isnil(ir->t)); 627 lua_assert(irt_isnil(ir->t));
620 emit_getgl(as, r, jit_L); 628 emit_getgl(as, r, jit_L);
629#if LJ_64 /* NYI: 32 bit register pairs. */
630 } else if (ir->o == IR_KINT64) {
631 emit_loadu64(as, r, ir_kint64(ir)->u64);
632#endif
621 } else { 633 } else {
622 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 634 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
623 ir->o == IR_KPTR || ir->o == IR_KNULL); 635 ir->o == IR_KPTR || ir->o == IR_KNULL);
@@ -909,6 +921,11 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
909 emit_loadn(as, dest, tv); 921 emit_loadn(as, dest, tv);
910 return; 922 return;
911 } 923 }
924#if LJ_64 /* NYI: 32 bit register pairs. */
925 } else if (ir->o == IR_KINT64) {
926 emit_loadu64(as, dest, ir_kint64(ir)->u64);
927 return;
928#endif
912 } else { 929 } else {
913 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 930 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
914 ir->o == IR_KPTR || ir->o == IR_KNULL); 931 ir->o == IR_KPTR || ir->o == IR_KNULL);
@@ -1343,7 +1360,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1343 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ 1360 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
1344 emit_call(as, ci->func); 1361 emit_call(as, ci->func);
1345 for (n = 0; n < nargs; n++) { /* Setup args. */ 1362 for (n = 0; n < nargs; n++) { /* Setup args. */
1346 IRIns *ir = IR(args[n]); 1363 IRRef ref = args[n];
1364 IRIns *ir = IR(ref);
1347 Reg r; 1365 Reg r;
1348#if LJ_64 && LJ_ABI_WIN 1366#if LJ_64 && LJ_ABI_WIN
1349 /* Windows/x64 argument registers are strictly positional. */ 1367 /* Windows/x64 argument registers are strictly positional. */
@@ -1364,38 +1382,42 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1364 } 1382 }
1365#endif 1383#endif
1366 if (r) { /* Argument is in a register. */ 1384 if (r) { /* Argument is in a register. */
1367 if (r < RID_MAX_GPR && args[n] < ASMREF_TMP1) { 1385 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
1368 emit_loadi(as, r, ir->i); 1386#if LJ_64 /* NYI: 32 bit register pairs. */
1387 if (ir->o == IR_KINT64)
1388 emit_loadu64(as, r, ir_kint64(ir)->u64);
1389 else
1390#endif
1391 emit_loadi(as, r, ir->i);
1369 } else { 1392 } else {
1370 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ 1393 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
1371 if (ra_hasreg(ir->r)) { 1394 if (ra_hasreg(ir->r)) {
1372 ra_noweak(as, ir->r); 1395 ra_noweak(as, ir->r);
1373 ra_movrr(as, ir, r, ir->r); 1396 ra_movrr(as, ir, r, ir->r);
1374 } else { 1397 } else {
1375 ra_allocref(as, args[n], RID2RSET(r)); 1398 ra_allocref(as, ref, RID2RSET(r));
1376 } 1399 }
1377 } 1400 }
1378 } else if (irt_isnum(ir->t)) { /* FP argument is on stack. */ 1401 } else if (irt_isnum(ir->t)) { /* FP argument is on stack. */
1379 if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) { 1402 if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
1380 /* Split stores for unaligned FP consts. */ 1403 /* Split stores for unaligned FP consts. */
1381 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); 1404 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
1382 emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); 1405 emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
1383 } else { 1406 } else {
1384 if ((allow & RSET_FPR) == RSET_EMPTY) 1407 if ((allow & RSET_FPR) == RSET_EMPTY)
1385 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1408 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1386 r = ra_alloc1(as, args[n], allow & RSET_FPR); 1409 r = ra_alloc1(as, ref, allow & RSET_FPR);
1387 allow &= ~RID2RSET(r); 1410 allow &= ~RID2RSET(r);
1388 emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); 1411 emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
1389 } 1412 }
1390 ofs += 8; 1413 ofs += 8;
1391 } else { /* Non-FP argument is on stack. */ 1414 } else { /* Non-FP argument is on stack. */
1392 /* NYI: no widening for 64 bit parameters on x64. */ 1415 if (LJ_32 && ref < ASMREF_TMP1) {
1393 if (args[n] < ASMREF_TMP1) {
1394 emit_movmroi(as, RID_ESP, ofs, ir->i); 1416 emit_movmroi(as, RID_ESP, ofs, ir->i);
1395 } else { 1417 } else {
1396 if ((allow & RSET_GPR) == RSET_EMPTY) 1418 if ((allow & RSET_GPR) == RSET_EMPTY)
1397 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1419 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1398 r = ra_alloc1(as, args[n], allow & RSET_GPR); 1420 r = ra_alloc1(as, ref, allow & RSET_GPR);
1399 allow &= ~RID2RSET(r); 1421 allow &= ~RID2RSET(r);
1400 emit_movtomro(as, REX_64IR(ir, r), RID_ESP, ofs); 1422 emit_movtomro(as, REX_64IR(ir, r), RID_ESP, ofs);
1401 } 1423 }
@@ -1936,8 +1958,9 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1936 /* The IRT_I16/IRT_U16 stores should never be simplified for constant 1958 /* The IRT_I16/IRT_U16 stores should never be simplified for constant
1937 ** values since mov word [mem], imm16 has a length-changing prefix. 1959 ** values since mov word [mem], imm16 has a length-changing prefix.
1938 */ 1960 */
1939 lua_assert(!(irref_isk(ir->op2) && irt_is64(ir->t))); /* NYI: KINT64. */ 1961 if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t) ||
1940 if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t)) { 1962 (LJ_64 && irt_is64(ir->t) &&
1963 !checki32((int64_t)ir_k64(IR(ir->op2))->u64))) {
1941 RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; 1964 RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
1942 src = ra_alloc1(as, ir->op2, allow8); 1965 src = ra_alloc1(as, ir->op2, allow8);
1943 rset_clear(allow, src); 1966 rset_clear(allow, src);
@@ -2496,7 +2519,7 @@ static void asm_add(ASMState *as, IRIns *ir)
2496 if (irt_isnum(ir->t)) 2519 if (irt_isnum(ir->t))
2497 asm_fparith(as, ir, XO_ADDSD); 2520 asm_fparith(as, ir, XO_ADDSD);
2498 else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp || 2521 else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp ||
2499 !asm_lea(as, ir)) 2522 irt_is64(ir->t) || !asm_lea(as, ir))
2500 asm_intarith(as, ir, XOg_ADD); 2523 asm_intarith(as, ir, XOg_ADD);
2501} 2524}
2502 2525
@@ -2615,7 +2638,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2615 else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */ 2638 else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */
2616 lref = ir->op2; rref = ir->op1; 2639 lref = ir->op2; rref = ir->op1;
2617 } 2640 }
2618 if (irref_isk(rref)) { 2641 if (irref_isk(rref) && IR(rref)->o != IR_KINT64) {
2619 IRIns *irl = IR(lref); 2642 IRIns *irl = IR(lref);
2620 int32_t imm = IR(rref)->i; 2643 int32_t imm = IR(rref)->i;
2621 /* Check wether we can use test ins. Not for unsigned, since CF=0. */ 2644 /* Check wether we can use test ins. Not for unsigned, since CF=0. */