aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2010-12-06 00:07:36 +0100
committerMike Pall <mike>2010-12-06 00:07:36 +0100
commit588fa0481fff746b5a866dd8b90ddd00618da71d (patch)
treef14e18525a0736ff72a887f0b2a7d8c913ad5609 /src
parentc5f4f607c9d73b11ce151f677a06d3f0089be219 (diff)
downloadluajit-588fa0481fff746b5a866dd8b90ddd00618da71d.tar.gz
luajit-588fa0481fff746b5a866dd8b90ddd00618da71d.tar.bz2
luajit-588fa0481fff746b5a866dd8b90ddd00618da71d.zip
Add support for 64 bit integer arithmetic to x64 backend.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c84
-rw-r--r--src/lj_ir.h9
-rw-r--r--src/lj_target_x86.h2
3 files changed, 59 insertions, 36 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index d10cf643..df94933a 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -674,8 +674,12 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
674} 674}
675 675
676/* Use 64 bit operations to handle 64 bit IR types. */ 676/* Use 64 bit operations to handle 64 bit IR types. */
677#define REX_64IR(ir, r) \ 677#if LJ_64
678 ((r) | ((LJ_64 && irt_is64((ir)->t)) ? REX_64 : 0)) 678#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
679#else
680/* NYI: 32 bit register pairs. */
681#define REX_64IR(ir, r) check_exp(!irt_is64((ir)->t), (r))
682#endif
679 683
680/* Generic move between two regs. */ 684/* Generic move between two regs. */
681static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) 685static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2)
@@ -1122,6 +1126,22 @@ IRFLDEF(FLOFS)
1122/* Limit linear search to this distance. Avoids O(n^2) behavior. */ 1126/* Limit linear search to this distance. Avoids O(n^2) behavior. */
1123#define CONFLICT_SEARCH_LIM 31 1127#define CONFLICT_SEARCH_LIM 31
1124 1128
1129/* Check if a reference is a signed 32 bit constant. */
1130static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
1131{
1132 if (irref_isk(ref)) {
1133 IRIns *ir = IR(ref);
1134 if (ir->o != IR_KINT64) {
1135 *k = ir->i;
1136 return 1;
1137 } else if (checki32((int64_t)ir_kint64(ir)->u64)) {
1138 *k = (int32_t)ir_kint64(ir)->u64;
1139 return 1;
1140 }
1141 }
1142 return 0;
1143}
1144
1125/* Check if there's no conflicting instruction between curins and ref. */ 1145/* Check if there's no conflicting instruction between curins and ref. */
1126static int noconflict(ASMState *as, IRRef ref, IROp conflict) 1146static int noconflict(ASMState *as, IRRef ref, IROp conflict)
1127{ 1147{
@@ -1962,12 +1982,11 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1962{ 1982{
1963 RegSet allow = RSET_GPR; 1983 RegSet allow = RSET_GPR;
1964 Reg src = RID_NONE; 1984 Reg src = RID_NONE;
1985 int32_t k = 0;
1965 /* The IRT_I16/IRT_U16 stores should never be simplified for constant 1986 /* The IRT_I16/IRT_U16 stores should never be simplified for constant
1966 ** values since mov word [mem], imm16 has a length-changing prefix. 1987 ** values since mov word [mem], imm16 has a length-changing prefix.
1967 */ 1988 */
1968 if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t) || 1989 if (!asm_isk32(as, ir->op2, &k) || irt_isi16(ir->t) || irt_isu16(ir->t)) {
1969 (LJ_64 && irt_is64(ir->t) &&
1970 !checki32((int64_t)ir_k64(IR(ir->op2))->u64))) {
1971 RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; 1990 RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
1972 src = ra_alloc1(as, ir->op2, allow8); 1991 src = ra_alloc1(as, ir->op2, allow8);
1973 rset_clear(allow, src); 1992 rset_clear(allow, src);
@@ -1992,12 +2011,13 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1992 emit_mrm(as, xo, src, RID_MRM); 2011 emit_mrm(as, xo, src, RID_MRM);
1993 } else { 2012 } else {
1994 if (irt_isi8(ir->t) || irt_isu8(ir->t)) { 2013 if (irt_isi8(ir->t) || irt_isu8(ir->t)) {
1995 emit_i8(as, IR(ir->op2)->i); 2014 emit_i8(as, k);
1996 emit_mrm(as, XO_MOVmib, 0, RID_MRM); 2015 emit_mrm(as, XO_MOVmib, 0, RID_MRM);
1997 } else { 2016 } else {
1998 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 2017 lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
1999 emit_i32(as, IR(ir->op2)->i); 2018 irt_isaddr(ir->t));
2000 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 2019 emit_i32(as, k);
2020 emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
2001 } 2021 }
2002 } 2022 }
2003} 2023}
@@ -2420,6 +2440,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
2420 IRRef rref = ir->op2; 2440 IRRef rref = ir->op2;
2421 RegSet allow = RSET_GPR; 2441 RegSet allow = RSET_GPR;
2422 Reg dest, right; 2442 Reg dest, right;
2443 int32_t k = 0;
2423 if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ 2444 if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */
2424 as->testmcp = NULL; 2445 as->testmcp = NULL;
2425 as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; 2446 as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2;
@@ -2432,7 +2453,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
2432 dest = ra_dest(as, ir, allow); 2453 dest = ra_dest(as, ir, allow);
2433 if (lref == rref) { 2454 if (lref == rref) {
2434 right = dest; 2455 right = dest;
2435 } else if (ra_noreg(right) && !irref_isk(rref)) { 2456 } else if (ra_noreg(right) && !asm_isk32(as, rref, &k)) {
2436 if (swapops(as, ir)) { 2457 if (swapops(as, ir)) {
2437 IRRef tmp = lref; lref = rref; rref = tmp; 2458 IRRef tmp = lref; lref = rref; rref = tmp;
2438 } 2459 }
@@ -2442,9 +2463,9 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
2442 if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ 2463 if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */
2443 asm_guardcc(as, CC_O); 2464 asm_guardcc(as, CC_O);
2444 if (ra_hasreg(right)) 2465 if (ra_hasreg(right))
2445 emit_mrm(as, XO_ARITH(xa), dest, right); 2466 emit_mrm(as, XO_ARITH(xa), REX_64IR(ir, dest), right);
2446 else 2467 else
2447 emit_gri(as, XG_ARITHi(xa), dest, IR(ir->op2)->i); 2468 emit_gri(as, XG_ARITHi(xa), REX_64IR(ir, dest), k);
2448 ra_left(as, dest, lref); 2469 ra_left(as, dest, lref);
2449} 2470}
2450 2471
@@ -2533,19 +2554,15 @@ static void asm_add(ASMState *as, IRIns *ir)
2533static void asm_bitnot(ASMState *as, IRIns *ir) 2554static void asm_bitnot(ASMState *as, IRIns *ir)
2534{ 2555{
2535 Reg dest = ra_dest(as, ir, RSET_GPR); 2556 Reg dest = ra_dest(as, ir, RSET_GPR);
2536 emit_rr(as, XO_GROUP3, XOg_NOT, dest); 2557 emit_rr(as, XO_GROUP3, REX_64IR(ir, XOg_NOT), dest);
2537 ra_left(as, dest, ir->op1); 2558 ra_left(as, dest, ir->op1);
2538} 2559}
2539 2560
2540static void asm_bitswap(ASMState *as, IRIns *ir) 2561static void asm_bitswap(ASMState *as, IRIns *ir)
2541{ 2562{
2542 Reg dest = ra_dest(as, ir, RSET_GPR); 2563 Reg dest = ra_dest(as, ir, RSET_GPR);
2543 MCode *p = as->mcp; 2564 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
2544 p[-1] = (MCode)(XI_BSWAP+(dest&7)); 2565 REX_64IR(ir, dest), 0, 0, as->mcp, 1);
2545 p[-2] = 0x0f;
2546 p -= 2;
2547 REXRB(p, 0, dest);
2548 as->mcp = p;
2549 ra_left(as, dest, ir->op1); 2566 ra_left(as, dest, ir->op1);
2550} 2567}
2551 2568
@@ -2560,8 +2577,8 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2560 shift = irr->i & 31; /* Handle shifts of 0..31 bits. */ 2577 shift = irr->i & 31; /* Handle shifts of 0..31 bits. */
2561 switch (shift) { 2578 switch (shift) {
2562 case 0: return; 2579 case 0: return;
2563 case 1: emit_rr(as, XO_SHIFT1, (Reg)xs, dest); break; 2580 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
2564 default: emit_shifti(as, xs, dest, shift); break; 2581 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
2565 } 2582 }
2566 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ 2583 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
2567 RegSet allow = rset_exclude(RSET_GPR, RID_ECX); 2584 RegSet allow = rset_exclude(RSET_GPR, RID_ECX);
@@ -2573,7 +2590,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2573 ra_scratch(as, RID2RSET(RID_ECX)); 2590 ra_scratch(as, RID2RSET(RID_ECX));
2574 } 2591 }
2575 dest = ra_dest(as, ir, allow); 2592 dest = ra_dest(as, ir, allow);
2576 emit_rr(as, XO_SHIFTcl, (Reg)xs, dest); 2593 emit_rr(as, XO_SHIFTcl, REX_64IR(ir, xs), dest);
2577 if (right != RID_ECX) { 2594 if (right != RID_ECX) {
2578 ra_noweak(as, right); 2595 ra_noweak(as, right);
2579 emit_rr(as, XO_MOV, RID_ECX, right); 2596 emit_rr(as, XO_MOV, RID_ECX, right);
@@ -2638,6 +2655,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2638 } else { 2655 } else {
2639 IRRef lref = ir->op1, rref = ir->op2; 2656 IRRef lref = ir->op1, rref = ir->op2;
2640 IROp leftop = (IROp)(IR(lref)->o); 2657 IROp leftop = (IROp)(IR(lref)->o);
2658 Reg r64 = REX_64IR(ir, 0);
2659 int32_t imm = 0;
2641 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); 2660 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
2642 /* Swap constants (only for ABC) and fusable loads to the right. */ 2661 /* Swap constants (only for ABC) and fusable loads to the right. */
2643 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { 2662 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
@@ -2645,26 +2664,25 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2645 else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */ 2664 else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */
2646 lref = ir->op2; rref = ir->op1; 2665 lref = ir->op2; rref = ir->op1;
2647 } 2666 }
2648 if (irref_isk(rref) && IR(rref)->o != IR_KINT64) { 2667 if (asm_isk32(as, rref, &imm)) {
2649 IRIns *irl = IR(lref); 2668 IRIns *irl = IR(lref);
2650 int32_t imm = IR(rref)->i;
2651 /* Check wether we can use test ins. Not for unsigned, since CF=0. */ 2669 /* Check wether we can use test ins. Not for unsigned, since CF=0. */
2652 int usetest = (imm == 0 && (cc & 0xa) != 0x2); 2670 int usetest = (imm == 0 && (cc & 0xa) != 0x2);
2653 if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) { 2671 if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) {
2654 /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */ 2672 /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */
2655 Reg right, left = RID_NONE; 2673 Reg right, left = RID_NONE;
2656 RegSet allow = RSET_GPR; 2674 RegSet allow = RSET_GPR;
2657 if (!irref_isk(irl->op2)) { 2675 if (!asm_isk32(as, irl->op2, &imm)) {
2658 left = ra_alloc1(as, irl->op2, allow); 2676 left = ra_alloc1(as, irl->op2, allow);
2659 rset_clear(allow, left); 2677 rset_clear(allow, left);
2660 } 2678 }
2661 right = asm_fuseload(as, irl->op1, allow); 2679 right = asm_fuseload(as, irl->op1, allow);
2662 asm_guardcc(as, cc); 2680 asm_guardcc(as, cc);
2663 if (irref_isk(irl->op2)) { 2681 if (ra_noreg(left)) {
2664 emit_i32(as, IR(irl->op2)->i); 2682 emit_i32(as, imm);
2665 emit_mrm(as, XO_GROUP3, XOg_TEST, right); 2683 emit_mrm(as, XO_GROUP3, r64 + XOg_TEST, right);
2666 } else { 2684 } else {
2667 emit_mrm(as, XO_TEST, left, right); 2685 emit_mrm(as, XO_TEST, r64 + left, right);
2668 } 2686 }
2669 } else { 2687 } else {
2670 Reg left; 2688 Reg left;
@@ -2687,7 +2705,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2687 asm_guardcc(as, cc); 2705 asm_guardcc(as, cc);
2688 emit_i8(as, imm); 2706 emit_i8(as, imm);
2689 emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? 2707 emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
2690 XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM); 2708 XO_ARITHib : XO_ARITHiw8, r64 + XOg_CMP, RID_MRM);
2691 return; 2709 return;
2692 } /* Otherwise handle register case as usual. */ 2710 } /* Otherwise handle register case as usual. */
2693 } else { 2711 } else {
@@ -2696,7 +2714,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2696 asm_guardcc(as, cc); 2714 asm_guardcc(as, cc);
2697 if (usetest && left != RID_MRM) { 2715 if (usetest && left != RID_MRM) {
2698 /* Use test r,r instead of cmp r,0. */ 2716 /* Use test r,r instead of cmp r,0. */
2699 emit_rr(as, XO_TEST, REX_64IR(ir, left), left); 2717 emit_rr(as, XO_TEST, r64 + left, left);
2700 if (irl+1 == ir) /* Referencing previous ins? */ 2718 if (irl+1 == ir) /* Referencing previous ins? */
2701 as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ 2719 as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
2702 } else { 2720 } else {
@@ -2708,14 +2726,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2708 emit_i32(as, imm); 2726 emit_i32(as, imm);
2709 xo = XO_ARITHi; 2727 xo = XO_ARITHi;
2710 } 2728 }
2711 emit_mrm(as, xo, XOg_CMP, left); 2729 emit_mrm(as, xo, r64 + XOg_CMP, left);
2712 } 2730 }
2713 } 2731 }
2714 } else { 2732 } else {
2715 Reg left = ra_alloc1(as, lref, RSET_GPR); 2733 Reg left = ra_alloc1(as, lref, RSET_GPR);
2716 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); 2734 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
2717 asm_guardcc(as, cc); 2735 asm_guardcc(as, cc);
2718 emit_mrm(as, XO_CMP, REX_64IR(ir, left), right); 2736 emit_mrm(as, XO_CMP, r64 + left, right);
2719 } 2737 }
2720 } 2738 }
2721} 2739}
diff --git a/src/lj_ir.h b/src/lj_ir.h
index b8ea0fa9..232ff939 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -380,9 +380,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
380#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) 380#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA))
381#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) 381#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
382 382
383#if LJ_64
384#define IRT_IS64 \
385 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
386#else
383#define IRT_IS64 \ 387#define IRT_IS64 \
384 ((1u<<IRT_NUM) | (1u<<IRT_I64) | (1u<<IRT_U64) | (1u<<IRT_P64) | \ 388 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64))
385 (LJ_64 ? (1u<<IRT_LIGHTUD) : 0)) 389#endif
390
386#define irt_is64(t) ((IRT_IS64 >> irt_type(t)) & 1) 391#define irt_is64(t) ((IRT_IS64 >> irt_type(t)) & 1)
387 392
388static LJ_AINLINE IRType itype2irt(const TValue *tv) 393static LJ_AINLINE IRType itype2irt(const TValue *tv)
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 03c52770..0fcee6dd 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -187,7 +187,6 @@ typedef enum {
187 XI_PUSHi8 = 0x6a, 187 XI_PUSHi8 = 0x6a,
188 XI_TEST = 0x85, 188 XI_TEST = 0x85,
189 XI_MOVmi = 0xc7, 189 XI_MOVmi = 0xc7,
190 XI_BSWAP = 0xc8, /* Really 0fc8+r. */
191 190
192 /* Note: little-endian byte-order! */ 191 /* Note: little-endian byte-order! */
193 XI_FLDZ = 0xeed9, 192 XI_FLDZ = 0xeed9,
@@ -230,6 +229,7 @@ typedef enum {
230 XO_MOVZXw = XO_0f(b7), 229 XO_MOVZXw = XO_0f(b7),
231 XO_MOVSXb = XO_0f(be), 230 XO_MOVSXb = XO_0f(be),
232 XO_MOVSXw = XO_0f(bf), 231 XO_MOVSXw = XO_0f(bf),
232 XO_BSWAP = XO_0f(c8),
233 233
234 XO_MOVSD = XO_f20f(10), 234 XO_MOVSD = XO_f20f(10),
235 XO_MOVSDto = XO_f20f(11), 235 XO_MOVSDto = XO_f20f(11),