diff options
| author | Mike Pall <mike> | 2011-01-05 21:45:09 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2011-01-05 21:45:09 +0100 |
| commit | bc37edc91114be67893a4ec0583782588bdee035 (patch) | |
| tree | 3a0dc0004975f68a3d2daca6df8c40eb08e7b83a | |
| parent | 3b468ceedbfd117820a5d6f96a7545d302dc008b (diff) | |
| download | luajit-bc37edc91114be67893a4ec0583782588bdee035.tar.gz luajit-bc37edc91114be67893a4ec0583782588bdee035.tar.bz2 luajit-bc37edc91114be67893a4ec0583782588bdee035.zip | |
Fix handling of floats in x86/x64 backend.
| -rw-r--r-- | src/lj_asm.c | 30 | ||||
| -rw-r--r-- | src/lj_ir.h | 1 | ||||
| -rw-r--r-- | src/lj_target_x86.h | 1 |
3 files changed, 19 insertions, 13 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index f8bd388c..627f5896 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -713,7 +713,8 @@ static Reg ra_restore(ASMState *as, IRRef ref) | |||
| 713 | if (r < RID_MAX_GPR) | 713 | if (r < RID_MAX_GPR) |
| 714 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | 714 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); |
| 715 | else | 715 | else |
| 716 | emit_rmro(as, XMM_MOVRM(as), r, RID_ESP, ofs); | 716 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, |
| 717 | r, RID_ESP, ofs); | ||
| 717 | } | 718 | } |
| 718 | return r; | 719 | return r; |
| 719 | } | 720 | } |
| @@ -726,7 +727,8 @@ static void ra_save(ASMState *as, IRIns *ir, Reg r) | |||
| 726 | if (r < RID_MAX_GPR) | 727 | if (r < RID_MAX_GPR) |
| 727 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, sps_scale(ir->s)); | 728 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, sps_scale(ir->s)); |
| 728 | else | 729 | else |
| 729 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, sps_scale(ir->s)); | 730 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, |
| 731 | r, RID_ESP, sps_scale(ir->s)); | ||
| 730 | } | 732 | } |
| 731 | 733 | ||
| 732 | #define MINCOST(r) \ | 734 | #define MINCOST(r) \ |
| @@ -1476,7 +1478,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 1476 | ra_allocref(as, ref, RID2RSET(r)); | 1478 | ra_allocref(as, ref, RID2RSET(r)); |
| 1477 | } | 1479 | } |
| 1478 | } | 1480 | } |
| 1479 | } else if (irt_isnum(ir->t)) { /* FP argument is on stack. */ | 1481 | } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ |
| 1482 | lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ | ||
| 1480 | if (LJ_32 && (ofs & 4) && irref_isk(ref)) { | 1483 | if (LJ_32 && (ofs & 4) && irref_isk(ref)) { |
| 1481 | /* Split stores for unaligned FP consts. */ | 1484 | /* Split stores for unaligned FP consts. */ |
| 1482 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | 1485 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); |
| @@ -1486,7 +1489,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 1486 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | 1489 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
| 1487 | r = ra_alloc1(as, ref, allow & RSET_FPR); | 1490 | r = ra_alloc1(as, ref, allow & RSET_FPR); |
| 1488 | allow &= ~RID2RSET(r); | 1491 | allow &= ~RID2RSET(r); |
| 1489 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); | 1492 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, |
| 1493 | r, RID_ESP, ofs); | ||
| 1490 | } | 1494 | } |
| 1491 | ofs += 8; | 1495 | ofs += 8; |
| 1492 | } else { /* Non-FP argument is on stack. */ | 1496 | } else { /* Non-FP argument is on stack. */ |
| @@ -1514,7 +1518,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
| 1514 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1518 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
| 1515 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 1519 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
| 1516 | if (ra_used(ir)) { | 1520 | if (ra_used(ir)) { |
| 1517 | if (irt_isnum(ir->t)) { | 1521 | if (irt_isfp(ir->t)) { |
| 1518 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ | 1522 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ |
| 1519 | #if LJ_64 | 1523 | #if LJ_64 |
| 1520 | if ((ci->flags & CCI_CASTU64)) { | 1524 | if ((ci->flags & CCI_CASTU64)) { |
| @@ -1535,7 +1539,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
| 1535 | if (ra_hasreg(dest)) { | 1539 | if (ra_hasreg(dest)) { |
| 1536 | ra_free(as, dest); | 1540 | ra_free(as, dest); |
| 1537 | ra_modified(as, dest); | 1541 | ra_modified(as, dest); |
| 1538 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | 1542 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, |
| 1543 | dest, RID_ESP, ofs); | ||
| 1539 | } | 1544 | } |
| 1540 | if ((ci->flags & CCI_CASTU64)) { | 1545 | if ((ci->flags & CCI_CASTU64)) { |
| 1541 | emit_movtomro(as, RID_RET, RID_ESP, ofs); | 1546 | emit_movtomro(as, RID_RET, RID_ESP, ofs); |
| @@ -1627,7 +1632,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1627 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); | 1632 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
| 1628 | IRRef lref = ir->op1; | 1633 | IRRef lref = ir->op1; |
| 1629 | lua_assert(irt_type(ir->t) != st); | 1634 | lua_assert(irt_type(ir->t) != st); |
| 1630 | if (irt_isnum(ir->t) || irt_isfloat(ir->t)) { | 1635 | if (irt_isfp(ir->t)) { |
| 1631 | Reg dest = ra_dest(as, ir, RSET_FPR); | 1636 | Reg dest = ra_dest(as, ir, RSET_FPR); |
| 1632 | if (stfp) { /* FP to FP conversion. */ | 1637 | if (stfp) { /* FP to FP conversion. */ |
| 1633 | Reg left = asm_fuseload(as, lref, RSET_FPR); | 1638 | Reg left = asm_fuseload(as, lref, RSET_FPR); |
| @@ -1679,12 +1684,12 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 1679 | if (ra_hasreg(left)) { | 1684 | if (ra_hasreg(left)) { |
| 1680 | Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left)); | 1685 | Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
| 1681 | emit_rr(as, op, dest|REX_64, tmpn); | 1686 | emit_rr(as, op, dest|REX_64, tmpn); |
| 1682 | emit_rr(as, XO_ADDSD, tmpn, left); | 1687 | emit_rr(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, tmpn, left); |
| 1683 | emit_rma(as, XMM_MOVRM(as), tmpn, k); | 1688 | emit_rma(as, st == IRT_NUM ? XMM_MOVRM(as) : XO_MOVSS, tmpn, k); |
| 1684 | } else { | 1689 | } else { |
| 1685 | left = ra_allocref(as, lref, RSET_FPR); | 1690 | left = ra_allocref(as, lref, RSET_FPR); |
| 1686 | emit_rr(as, op, dest|REX_64, left); | 1691 | emit_rr(as, op, dest|REX_64, left); |
| 1687 | emit_rma(as, XO_ADDSD, left, k); | 1692 | emit_rma(as, st == IRT_NUM ? XO_ADDSD : XO_ADDSS, left, k); |
| 1688 | } | 1693 | } |
| 1689 | emit_sjcc(as, CC_NS, l_end); | 1694 | emit_sjcc(as, CC_NS, l_end); |
| 1690 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ | 1695 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest < 2^63. */ |
| @@ -2162,10 +2167,9 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
| 2162 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant | 2167 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant |
| 2163 | ** values since mov word [mem], imm16 has a length-changing prefix. | 2168 | ** values since mov word [mem], imm16 has a length-changing prefix. |
| 2164 | */ | 2169 | */ |
| 2165 | if (irt_isi16(ir->t) || irt_isu16(ir->t) || | 2170 | if (irt_isi16(ir->t) || irt_isu16(ir->t) || irt_isfp(ir->t) || |
| 2166 | irt_isnum(ir->t) || irt_isfloat(ir->t) || | ||
| 2167 | !asm_isk32(as, ir->op2, &k)) { | 2171 | !asm_isk32(as, ir->op2, &k)) { |
| 2168 | RegSet allow8 = (irt_isnum(ir->t) || irt_isfloat(ir->t)) ? RSET_FPR : | 2172 | RegSet allow8 = irt_isfp(ir->t) ? RSET_FPR : |
| 2169 | (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; | 2173 | (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; |
| 2170 | src = osrc = ra_alloc1(as, ir->op2, allow8); | 2174 | src = osrc = ra_alloc1(as, ir->op2, allow8); |
| 2171 | if (!LJ_64 && !rset_test(allow8, src)) { /* Already in wrong register. */ | 2175 | if (!LJ_64 && !rset_test(allow8, src)) { /* Already in wrong register. */ |
diff --git a/src/lj_ir.h b/src/lj_ir.h index a8e890c5..7a0e016a 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -394,6 +394,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
| 394 | #define irt_isi64(t) (irt_type(t) == IRT_I64) | 394 | #define irt_isi64(t) (irt_type(t) == IRT_I64) |
| 395 | #define irt_isu64(t) (irt_type(t) == IRT_U64) | 395 | #define irt_isu64(t) (irt_type(t) == IRT_U64) |
| 396 | 396 | ||
| 397 | #define irt_isfp(t) (irt_isnum(t) || irt_isfloat(t)) | ||
| 397 | #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) | 398 | #define irt_isinteger(t) (irt_typerange((t), IRT_I8, IRT_INT)) |
| 398 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) | 399 | #define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) |
| 399 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) | 400 | #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 5909c905..7aab599a 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -259,6 +259,7 @@ typedef enum { | |||
| 259 | XO_CVTTSS2SI= XO_f30f(2c), | 259 | XO_CVTTSS2SI= XO_f30f(2c), |
| 260 | XO_CVTSS2SD = XO_f30f(5a), | 260 | XO_CVTSS2SD = XO_f30f(5a), |
| 261 | XO_CVTSD2SS = XO_f20f(5a), | 261 | XO_CVTSD2SS = XO_f20f(5a), |
| 262 | XO_ADDSS = XO_f30f(58), | ||
| 262 | XO_MOVD = XO_660f(6e), | 263 | XO_MOVD = XO_660f(6e), |
| 263 | XO_MOVDto = XO_660f(7e), | 264 | XO_MOVDto = XO_660f(7e), |
| 264 | 265 | ||
