From e66b5b6eeefa0cadfd80a859c71d41c2f9e076b8 Mon Sep 17 00:00:00 2001
From: Mike Pall <mike>
Date: Sun, 2 Jan 2011 20:43:58 +0100
Subject: Improve uint64_t <-> FP conversions in x64 backend.

---
 src/lj_asm.c   | 30 +++++++++++++++++-------------
 src/lj_ir.c    |  6 +++---
 src/lj_iropt.h |  1 +
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/lj_asm.c b/src/lj_asm.c
index 14d6d849..18944da4 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1641,18 +1641,15 @@ static void asm_conv(ASMState *as, IRIns *ir)
       return;
 #endif
     } else {  /* Integer to FP conversion. */
-      Reg tmp = (LJ_64 && st == IRT_U64) ? ra_scratch(as, RSET_GPR) : RID_NONE;
       Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ?
 		 ra_alloc1(as, lref, RSET_GPR) :
 		 asm_fuseload(as, lref, RSET_GPR);
       if (LJ_64 && st == IRT_U64) {
-	Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, dest));
 	MCLabel l_end = emit_label(as);
-	emit_rr(as, XO_ADDSD, dest, tmpn);
-	emit_rr(as, XO_MOVD, tmpn|REX_64, tmp);
-	emit_loadu64(as, tmp, U64x(43f00000,00000000));
+	const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
+	emit_rma(as, XO_ADDSD, dest, k);  /* Add 2^64 to compensate. */
 	emit_sjcc(as, CC_NS, l_end);
-	emit_rr(as, XO_TEST, left|REX_64, left);
+	emit_rr(as, XO_TEST, left|REX_64, left);  /* Check if u64 >= 2^63. */
       }
       emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
 	       dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
@@ -1675,15 +1672,22 @@ static void asm_conv(ASMState *as, IRIns *ir)
 		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
 		 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
       if (LJ_64 && irt_isu64(ir->t)) {
-	Reg left = ra_alloc1(as, lref, RSET_FPR);
-	Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left));
+	const void *k = lj_ir_k64_find(as->J, U64x(c3f00000,00000000));
 	MCLabel l_end = emit_label(as);
-	emit_rr(as, op, dest|REX_64, tmpn);
-	emit_rr(as, XO_ADDSD, tmpn, left);
-	emit_rr(as, XO_MOVD, tmpn|REX_64, dest);
-	emit_loadu64(as, dest, U64x(c3f00000,00000000));
+	Reg left = IR(lref)->r;
+	/* For inputs in [2^63,2^64-1] add -2^64 and convert again. */
+	if (ra_hasreg(left)) {
+	  Reg tmpn = ra_scratch(as, rset_exclude(RSET_FPR, left));
+	  emit_rr(as, op, dest|REX_64, tmpn);
+	  emit_rr(as, XO_ADDSD, tmpn, left);
+	  emit_rma(as, XMM_MOVRM(as), tmpn, k);
+	} else {
+	  left = ra_allocref(as, lref, RSET_FPR);
+	  emit_rr(as, op, dest|REX_64, left);
+	  emit_rma(as, XO_ADDSD, left, k);
+	}
 	emit_sjcc(as, CC_NS, l_end);
-	emit_rr(as, XO_TEST, dest|REX_64, dest);
+	emit_rr(as, XO_TEST, dest|REX_64, dest);  /* Check if dest < 2^63. */
 	emit_rr(as, op, dest|REX_64, left);
       } else {
 	Reg left = asm_fuseload(as, lref, RSET_FPR);
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 3217bc1e..89be71aa 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -195,7 +195,7 @@ void lj_ir_k64_freeall(jit_State *J)
 }
 
 /* Find 64 bit constant in chained array or add it. */
-static cTValue *ir_k64_find(jit_State *J, uint64_t u64)
+cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
 {
   K64Array *k, *kp = NULL;
   TValue *ntv;
@@ -249,13 +249,13 @@ found:
 /* Intern FP constant, given by its 64 bit pattern. */
 TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
 {
-  return lj_ir_k64(J, IR_KNUM, ir_k64_find(J, u64));
+  return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64));
 }
 
 /* Intern 64 bit integer constant. */
 TRef lj_ir_kint64(jit_State *J, uint64_t u64)
 {
-  return lj_ir_k64(J, IR_KINT64, ir_k64_find(J, u64));
+  return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64));
 }
 
 /* Check whether a number is int and return it. -0 is NOT considered an int. */
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 4739f846..12943d25 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -41,6 +41,7 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
 LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
 LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
 LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
+LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
 LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
 LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
 LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
-- 
cgit v1.2.3-55-g6feb