diff options
author | Mike Pall <mike> | 2010-12-06 00:07:36 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-12-06 00:07:36 +0100 |
commit | 588fa0481fff746b5a866dd8b90ddd00618da71d (patch) | |
tree | f14e18525a0736ff72a887f0b2a7d8c913ad5609 /src | |
parent | c5f4f607c9d73b11ce151f677a06d3f0089be219 (diff) | |
download | luajit-588fa0481fff746b5a866dd8b90ddd00618da71d.tar.gz luajit-588fa0481fff746b5a866dd8b90ddd00618da71d.tar.bz2 luajit-588fa0481fff746b5a866dd8b90ddd00618da71d.zip |
Add support for 64 bit integer arithmetic to x64 backend.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 84 | ||||
-rw-r--r-- | src/lj_ir.h | 9 | ||||
-rw-r--r-- | src/lj_target_x86.h | 2 |
3 files changed, 59 insertions, 36 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index d10cf643..df94933a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -674,8 +674,12 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) | |||
674 | } | 674 | } |
675 | 675 | ||
676 | /* Use 64 bit operations to handle 64 bit IR types. */ | 676 | /* Use 64 bit operations to handle 64 bit IR types. */ |
677 | #define REX_64IR(ir, r) \ | 677 | #if LJ_64 |
678 | ((r) | ((LJ_64 && irt_is64((ir)->t)) ? REX_64 : 0)) | 678 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
679 | #else | ||
680 | /* NYI: 32 bit register pairs. */ | ||
681 | #define REX_64IR(ir, r) check_exp(!irt_is64((ir)->t), (r)) | ||
682 | #endif | ||
679 | 683 | ||
680 | /* Generic move between two regs. */ | 684 | /* Generic move between two regs. */ |
681 | static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) | 685 | static void ra_movrr(ASMState *as, IRIns *ir, Reg r1, Reg r2) |
@@ -1122,6 +1126,22 @@ IRFLDEF(FLOFS) | |||
1122 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | 1126 | /* Limit linear search to this distance. Avoids O(n^2) behavior. */ |
1123 | #define CONFLICT_SEARCH_LIM 31 | 1127 | #define CONFLICT_SEARCH_LIM 31 |
1124 | 1128 | ||
1129 | /* Check if a reference is a signed 32 bit constant. */ | ||
1130 | static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | ||
1131 | { | ||
1132 | if (irref_isk(ref)) { | ||
1133 | IRIns *ir = IR(ref); | ||
1134 | if (ir->o != IR_KINT64) { | ||
1135 | *k = ir->i; | ||
1136 | return 1; | ||
1137 | } else if (checki32((int64_t)ir_kint64(ir)->u64)) { | ||
1138 | *k = (int32_t)ir_kint64(ir)->u64; | ||
1139 | return 1; | ||
1140 | } | ||
1141 | } | ||
1142 | return 0; | ||
1143 | } | ||
1144 | |||
1125 | /* Check if there's no conflicting instruction between curins and ref. */ | 1145 | /* Check if there's no conflicting instruction between curins and ref. */ |
1126 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | 1146 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) |
1127 | { | 1147 | { |
@@ -1962,12 +1982,11 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
1962 | { | 1982 | { |
1963 | RegSet allow = RSET_GPR; | 1983 | RegSet allow = RSET_GPR; |
1964 | Reg src = RID_NONE; | 1984 | Reg src = RID_NONE; |
1985 | int32_t k = 0; | ||
1965 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant | 1986 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant |
1966 | ** values since mov word [mem], imm16 has a length-changing prefix. | 1987 | ** values since mov word [mem], imm16 has a length-changing prefix. |
1967 | */ | 1988 | */ |
1968 | if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t) || | 1989 | if (!asm_isk32(as, ir->op2, &k) || irt_isi16(ir->t) || irt_isu16(ir->t)) { |
1969 | (LJ_64 && irt_is64(ir->t) && | ||
1970 | !checki32((int64_t)ir_k64(IR(ir->op2))->u64))) { | ||
1971 | RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; | 1990 | RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; |
1972 | src = ra_alloc1(as, ir->op2, allow8); | 1991 | src = ra_alloc1(as, ir->op2, allow8); |
1973 | rset_clear(allow, src); | 1992 | rset_clear(allow, src); |
@@ -1992,12 +2011,13 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
1992 | emit_mrm(as, xo, src, RID_MRM); | 2011 | emit_mrm(as, xo, src, RID_MRM); |
1993 | } else { | 2012 | } else { |
1994 | if (irt_isi8(ir->t) || irt_isu8(ir->t)) { | 2013 | if (irt_isi8(ir->t) || irt_isu8(ir->t)) { |
1995 | emit_i8(as, IR(ir->op2)->i); | 2014 | emit_i8(as, k); |
1996 | emit_mrm(as, XO_MOVmib, 0, RID_MRM); | 2015 | emit_mrm(as, XO_MOVmib, 0, RID_MRM); |
1997 | } else { | 2016 | } else { |
1998 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); | 2017 | lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || |
1999 | emit_i32(as, IR(ir->op2)->i); | 2018 | irt_isaddr(ir->t)); |
2000 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 2019 | emit_i32(as, k); |
2020 | emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); | ||
2001 | } | 2021 | } |
2002 | } | 2022 | } |
2003 | } | 2023 | } |
@@ -2420,6 +2440,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
2420 | IRRef rref = ir->op2; | 2440 | IRRef rref = ir->op2; |
2421 | RegSet allow = RSET_GPR; | 2441 | RegSet allow = RSET_GPR; |
2422 | Reg dest, right; | 2442 | Reg dest, right; |
2443 | int32_t k = 0; | ||
2423 | if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ | 2444 | if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ |
2424 | as->testmcp = NULL; | 2445 | as->testmcp = NULL; |
2425 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; | 2446 | as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; |
@@ -2432,7 +2453,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
2432 | dest = ra_dest(as, ir, allow); | 2453 | dest = ra_dest(as, ir, allow); |
2433 | if (lref == rref) { | 2454 | if (lref == rref) { |
2434 | right = dest; | 2455 | right = dest; |
2435 | } else if (ra_noreg(right) && !irref_isk(rref)) { | 2456 | } else if (ra_noreg(right) && !asm_isk32(as, rref, &k)) { |
2436 | if (swapops(as, ir)) { | 2457 | if (swapops(as, ir)) { |
2437 | IRRef tmp = lref; lref = rref; rref = tmp; | 2458 | IRRef tmp = lref; lref = rref; rref = tmp; |
2438 | } | 2459 | } |
@@ -2442,9 +2463,9 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) | |||
2442 | if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ | 2463 | if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ |
2443 | asm_guardcc(as, CC_O); | 2464 | asm_guardcc(as, CC_O); |
2444 | if (ra_hasreg(right)) | 2465 | if (ra_hasreg(right)) |
2445 | emit_mrm(as, XO_ARITH(xa), dest, right); | 2466 | emit_mrm(as, XO_ARITH(xa), REX_64IR(ir, dest), right); |
2446 | else | 2467 | else |
2447 | emit_gri(as, XG_ARITHi(xa), dest, IR(ir->op2)->i); | 2468 | emit_gri(as, XG_ARITHi(xa), REX_64IR(ir, dest), k); |
2448 | ra_left(as, dest, lref); | 2469 | ra_left(as, dest, lref); |
2449 | } | 2470 | } |
2450 | 2471 | ||
@@ -2533,19 +2554,15 @@ static void asm_add(ASMState *as, IRIns *ir) | |||
2533 | static void asm_bitnot(ASMState *as, IRIns *ir) | 2554 | static void asm_bitnot(ASMState *as, IRIns *ir) |
2534 | { | 2555 | { |
2535 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2556 | Reg dest = ra_dest(as, ir, RSET_GPR); |
2536 | emit_rr(as, XO_GROUP3, XOg_NOT, dest); | 2557 | emit_rr(as, XO_GROUP3, REX_64IR(ir, XOg_NOT), dest); |
2537 | ra_left(as, dest, ir->op1); | 2558 | ra_left(as, dest, ir->op1); |
2538 | } | 2559 | } |
2539 | 2560 | ||
2540 | static void asm_bitswap(ASMState *as, IRIns *ir) | 2561 | static void asm_bitswap(ASMState *as, IRIns *ir) |
2541 | { | 2562 | { |
2542 | Reg dest = ra_dest(as, ir, RSET_GPR); | 2563 | Reg dest = ra_dest(as, ir, RSET_GPR); |
2543 | MCode *p = as->mcp; | 2564 | as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), |
2544 | p[-1] = (MCode)(XI_BSWAP+(dest&7)); | 2565 | REX_64IR(ir, dest), 0, 0, as->mcp, 1); |
2545 | p[-2] = 0x0f; | ||
2546 | p -= 2; | ||
2547 | REXRB(p, 0, dest); | ||
2548 | as->mcp = p; | ||
2549 | ra_left(as, dest, ir->op1); | 2566 | ra_left(as, dest, ir->op1); |
2550 | } | 2567 | } |
2551 | 2568 | ||
@@ -2560,8 +2577,8 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2560 | shift = irr->i & 31; /* Handle shifts of 0..31 bits. */ | 2577 | shift = irr->i & 31; /* Handle shifts of 0..31 bits. */ |
2561 | switch (shift) { | 2578 | switch (shift) { |
2562 | case 0: return; | 2579 | case 0: return; |
2563 | case 1: emit_rr(as, XO_SHIFT1, (Reg)xs, dest); break; | 2580 | case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; |
2564 | default: emit_shifti(as, xs, dest, shift); break; | 2581 | default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; |
2565 | } | 2582 | } |
2566 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ | 2583 | } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ |
2567 | RegSet allow = rset_exclude(RSET_GPR, RID_ECX); | 2584 | RegSet allow = rset_exclude(RSET_GPR, RID_ECX); |
@@ -2573,7 +2590,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) | |||
2573 | ra_scratch(as, RID2RSET(RID_ECX)); | 2590 | ra_scratch(as, RID2RSET(RID_ECX)); |
2574 | } | 2591 | } |
2575 | dest = ra_dest(as, ir, allow); | 2592 | dest = ra_dest(as, ir, allow); |
2576 | emit_rr(as, XO_SHIFTcl, (Reg)xs, dest); | 2593 | emit_rr(as, XO_SHIFTcl, REX_64IR(ir, xs), dest); |
2577 | if (right != RID_ECX) { | 2594 | if (right != RID_ECX) { |
2578 | ra_noweak(as, right); | 2595 | ra_noweak(as, right); |
2579 | emit_rr(as, XO_MOV, RID_ECX, right); | 2596 | emit_rr(as, XO_MOV, RID_ECX, right); |
@@ -2638,6 +2655,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2638 | } else { | 2655 | } else { |
2639 | IRRef lref = ir->op1, rref = ir->op2; | 2656 | IRRef lref = ir->op1, rref = ir->op2; |
2640 | IROp leftop = (IROp)(IR(lref)->o); | 2657 | IROp leftop = (IROp)(IR(lref)->o); |
2658 | Reg r64 = REX_64IR(ir, 0); | ||
2659 | int32_t imm = 0; | ||
2641 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 2660 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); |
2642 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2661 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
2643 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2662 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
@@ -2645,26 +2664,25 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2645 | else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */ | 2664 | else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */ |
2646 | lref = ir->op2; rref = ir->op1; | 2665 | lref = ir->op2; rref = ir->op1; |
2647 | } | 2666 | } |
2648 | if (irref_isk(rref) && IR(rref)->o != IR_KINT64) { | 2667 | if (asm_isk32(as, rref, &imm)) { |
2649 | IRIns *irl = IR(lref); | 2668 | IRIns *irl = IR(lref); |
2650 | int32_t imm = IR(rref)->i; | ||
2651 | /* Check wether we can use test ins. Not for unsigned, since CF=0. */ | 2669 | /* Check wether we can use test ins. Not for unsigned, since CF=0. */ |
2652 | int usetest = (imm == 0 && (cc & 0xa) != 0x2); | 2670 | int usetest = (imm == 0 && (cc & 0xa) != 0x2); |
2653 | if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) { | 2671 | if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) { |
2654 | /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */ | 2672 | /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */ |
2655 | Reg right, left = RID_NONE; | 2673 | Reg right, left = RID_NONE; |
2656 | RegSet allow = RSET_GPR; | 2674 | RegSet allow = RSET_GPR; |
2657 | if (!irref_isk(irl->op2)) { | 2675 | if (!asm_isk32(as, irl->op2, &imm)) { |
2658 | left = ra_alloc1(as, irl->op2, allow); | 2676 | left = ra_alloc1(as, irl->op2, allow); |
2659 | rset_clear(allow, left); | 2677 | rset_clear(allow, left); |
2660 | } | 2678 | } |
2661 | right = asm_fuseload(as, irl->op1, allow); | 2679 | right = asm_fuseload(as, irl->op1, allow); |
2662 | asm_guardcc(as, cc); | 2680 | asm_guardcc(as, cc); |
2663 | if (irref_isk(irl->op2)) { | 2681 | if (ra_noreg(left)) { |
2664 | emit_i32(as, IR(irl->op2)->i); | 2682 | emit_i32(as, imm); |
2665 | emit_mrm(as, XO_GROUP3, XOg_TEST, right); | 2683 | emit_mrm(as, XO_GROUP3, r64 + XOg_TEST, right); |
2666 | } else { | 2684 | } else { |
2667 | emit_mrm(as, XO_TEST, left, right); | 2685 | emit_mrm(as, XO_TEST, r64 + left, right); |
2668 | } | 2686 | } |
2669 | } else { | 2687 | } else { |
2670 | Reg left; | 2688 | Reg left; |
@@ -2687,7 +2705,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2687 | asm_guardcc(as, cc); | 2705 | asm_guardcc(as, cc); |
2688 | emit_i8(as, imm); | 2706 | emit_i8(as, imm); |
2689 | emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? | 2707 | emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? |
2690 | XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM); | 2708 | XO_ARITHib : XO_ARITHiw8, r64 + XOg_CMP, RID_MRM); |
2691 | return; | 2709 | return; |
2692 | } /* Otherwise handle register case as usual. */ | 2710 | } /* Otherwise handle register case as usual. */ |
2693 | } else { | 2711 | } else { |
@@ -2696,7 +2714,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2696 | asm_guardcc(as, cc); | 2714 | asm_guardcc(as, cc); |
2697 | if (usetest && left != RID_MRM) { | 2715 | if (usetest && left != RID_MRM) { |
2698 | /* Use test r,r instead of cmp r,0. */ | 2716 | /* Use test r,r instead of cmp r,0. */ |
2699 | emit_rr(as, XO_TEST, REX_64IR(ir, left), left); | 2717 | emit_rr(as, XO_TEST, r64 + left, left); |
2700 | if (irl+1 == ir) /* Referencing previous ins? */ | 2718 | if (irl+1 == ir) /* Referencing previous ins? */ |
2701 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 2719 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
2702 | } else { | 2720 | } else { |
@@ -2708,14 +2726,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2708 | emit_i32(as, imm); | 2726 | emit_i32(as, imm); |
2709 | xo = XO_ARITHi; | 2727 | xo = XO_ARITHi; |
2710 | } | 2728 | } |
2711 | emit_mrm(as, xo, XOg_CMP, left); | 2729 | emit_mrm(as, xo, r64 + XOg_CMP, left); |
2712 | } | 2730 | } |
2713 | } | 2731 | } |
2714 | } else { | 2732 | } else { |
2715 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 2733 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
2716 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); | 2734 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); |
2717 | asm_guardcc(as, cc); | 2735 | asm_guardcc(as, cc); |
2718 | emit_mrm(as, XO_CMP, REX_64IR(ir, left), right); | 2736 | emit_mrm(as, XO_CMP, r64 + left, right); |
2719 | } | 2737 | } |
2720 | } | 2738 | } |
2721 | } | 2739 | } |
diff --git a/src/lj_ir.h b/src/lj_ir.h index b8ea0fa9..232ff939 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -380,9 +380,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
380 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) | 380 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) |
381 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 381 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
382 | 382 | ||
383 | #if LJ_64 | ||
384 | #define IRT_IS64 \ | ||
385 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) | ||
386 | #else | ||
383 | #define IRT_IS64 \ | 387 | #define IRT_IS64 \ |
384 | ((1u<<IRT_NUM) | (1u<<IRT_I64) | (1u<<IRT_U64) | (1u<<IRT_P64) | \ | 388 | ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)) |
385 | (LJ_64 ? (1u<<IRT_LIGHTUD) : 0)) | 389 | #endif |
390 | |||
386 | #define irt_is64(t) ((IRT_IS64 >> irt_type(t)) & 1) | 391 | #define irt_is64(t) ((IRT_IS64 >> irt_type(t)) & 1) |
387 | 392 | ||
388 | static LJ_AINLINE IRType itype2irt(const TValue *tv) | 393 | static LJ_AINLINE IRType itype2irt(const TValue *tv) |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 03c52770..0fcee6dd 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -187,7 +187,6 @@ typedef enum { | |||
187 | XI_PUSHi8 = 0x6a, | 187 | XI_PUSHi8 = 0x6a, |
188 | XI_TEST = 0x85, | 188 | XI_TEST = 0x85, |
189 | XI_MOVmi = 0xc7, | 189 | XI_MOVmi = 0xc7, |
190 | XI_BSWAP = 0xc8, /* Really 0fc8+r. */ | ||
191 | 190 | ||
192 | /* Note: little-endian byte-order! */ | 191 | /* Note: little-endian byte-order! */ |
193 | XI_FLDZ = 0xeed9, | 192 | XI_FLDZ = 0xeed9, |
@@ -230,6 +229,7 @@ typedef enum { | |||
230 | XO_MOVZXw = XO_0f(b7), | 229 | XO_MOVZXw = XO_0f(b7), |
231 | XO_MOVSXb = XO_0f(be), | 230 | XO_MOVSXb = XO_0f(be), |
232 | XO_MOVSXw = XO_0f(bf), | 231 | XO_MOVSXw = XO_0f(bf), |
232 | XO_BSWAP = XO_0f(c8), | ||
233 | 233 | ||
234 | XO_MOVSD = XO_f20f(10), | 234 | XO_MOVSD = XO_f20f(10), |
235 | XO_MOVSDto = XO_f20f(11), | 235 | XO_MOVSDto = XO_f20f(11), |