aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lib_jit.c2
-rw-r--r--src/lj_asm_x86.h23
-rw-r--r--src/lj_emit_x86.h10
-rw-r--r--src/lj_jit.h5
4 files changed, 14 insertions, 26 deletions
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 1b69caa5..125b48ce 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -549,8 +549,6 @@ static uint32_t jit_cpudetect(lua_State *L)
549 flags |= JIT_F_LEA_AGU; 549 flags |= JIT_F_LEA_AGU;
550 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 550 } else if (vendor[2] == 0x444d4163) { /* AMD. */
551 uint32_t fam = (features[0] & 0x0ff00f00); 551 uint32_t fam = (features[0] & 0x0ff00f00);
552 if (fam == 0x00000f00) /* K8. */
553 flags |= JIT_F_SPLIT_XMM;
554 if (fam >= 0x00000f00) /* K8, K10. */ 552 if (fam >= 0x00000f00) /* K8, K10. */
555 flags |= JIT_F_PREFER_IMUL; 553 flags |= JIT_F_PREFER_IMUL;
556 } 554 }
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index e9c53a09..5621b616 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -551,7 +551,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
551 if (ra_hasreg(dest)) { 551 if (ra_hasreg(dest)) {
552 ra_free(as, dest); 552 ra_free(as, dest);
553 ra_modified(as, dest); 553 ra_modified(as, dest);
554 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 554 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
555 dest, RID_ESP, ofs); 555 dest, RID_ESP, ofs);
556 } 556 }
557 if ((ci->flags & CCI_CASTU64)) { 557 if ((ci->flags & CCI_CASTU64)) {
@@ -662,8 +662,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
662 asm_guardcc(as, CC_NE); 662 asm_guardcc(as, CC_NE);
663 emit_rr(as, XO_UCOMISD, left, tmp); 663 emit_rr(as, XO_UCOMISD, left, tmp);
664 emit_rr(as, XO_CVTSI2SD, tmp, dest); 664 emit_rr(as, XO_CVTSI2SD, tmp, dest);
665 if (!(as->flags & JIT_F_SPLIT_XMM)) 665 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
666 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
667 emit_rr(as, XO_CVTTSD2SI, dest, left); 666 emit_rr(as, XO_CVTTSD2SI, dest, left);
668 /* Can't fuse since left is needed twice. */ 667 /* Can't fuse since left is needed twice. */
669} 668}
@@ -719,8 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
719 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 718 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
720 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 719 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
721 } 720 }
722 if (!(as->flags & JIT_F_SPLIT_XMM)) 721 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
723 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
724 } else if (stfp) { /* FP to integer conversion. */ 722 } else if (stfp) { /* FP to integer conversion. */
725 if (irt_isguard(ir->t)) { 723 if (irt_isguard(ir->t)) {
726 /* Checked conversions are only supported from number to int. */ 724 /* Checked conversions are only supported from number to int. */
@@ -824,8 +822,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
824 if (ra_hasreg(dest)) { 822 if (ra_hasreg(dest)) {
825 ra_free(as, dest); 823 ra_free(as, dest);
826 ra_modified(as, dest); 824 ra_modified(as, dest);
827 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 825 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
828 dest, RID_ESP, ofs);
829 } 826 }
830 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 827 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
831 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 828 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
@@ -1262,7 +1259,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1262 case IRT_U8: xo = XO_MOVZXb; break; 1259 case IRT_U8: xo = XO_MOVZXb; break;
1263 case IRT_I16: xo = XO_MOVSXw; break; 1260 case IRT_I16: xo = XO_MOVSXw; break;
1264 case IRT_U16: xo = XO_MOVZXw; break; 1261 case IRT_U16: xo = XO_MOVZXw; break;
1265 case IRT_NUM: xo = XMM_MOVRM(as); break; 1262 case IRT_NUM: xo = XO_MOVSD; break;
1266 case IRT_FLOAT: xo = XO_MOVSS; break; 1263 case IRT_FLOAT: xo = XO_MOVSS; break;
1267 default: 1264 default:
1268 if (LJ_64 && irt_is64(ir->t)) 1265 if (LJ_64 && irt_is64(ir->t))
@@ -1376,7 +1373,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1376 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1373 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1377 Reg dest = ra_dest(as, ir, allow); 1374 Reg dest = ra_dest(as, ir, allow);
1378 asm_fuseahuref(as, ir->op1, RSET_GPR); 1375 asm_fuseahuref(as, ir->op1, RSET_GPR);
1379 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1376 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1380 } else { 1377 } else {
1381 asm_fuseahuref(as, ir->op1, RSET_GPR); 1378 asm_fuseahuref(as, ir->op1, RSET_GPR);
1382 } 1379 }
@@ -1442,7 +1439,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1442 Reg left = ra_scratch(as, RSET_FPR); 1439 Reg left = ra_scratch(as, RSET_FPR);
1443 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1440 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1444 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1441 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1445 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1442 emit_rmro(as, XO_MOVSD, left, base, ofs);
1446 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1443 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1447#if LJ_64 1444#if LJ_64
1448 } else if (irt_islightud(t)) { 1445 } else if (irt_islightud(t)) {
@@ -1461,10 +1458,8 @@ static void asm_sload(ASMState *as, IRIns *ir)
1461 if ((ir->op2 & IRSLOAD_CONVERT)) { 1458 if ((ir->op2 & IRSLOAD_CONVERT)) {
1462 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1459 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1463 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1460 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
1464 } else if (irt_isnum(t)) {
1465 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1466 } else { 1461 } else {
1467 emit_rmro(as, XO_MOV, dest, base, ofs); 1462 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1468 } 1463 }
1469 } else { 1464 } else {
1470 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1465 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1696,7 +1691,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1696 if (ra_hasreg(dest)) { 1691 if (ra_hasreg(dest)) {
1697 ra_free(as, dest); 1692 ra_free(as, dest);
1698 ra_modified(as, dest); 1693 ra_modified(as, dest);
1699 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1694 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1700 } 1695 }
1701 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1696 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1702 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1697 switch (fpm) { /* st0 = lj_vm_*(st0) */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index bd184a30..2454c899 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 241
242/* -- Emit loads/stores --------------------------------------------------- */ 242/* -- Emit loads/stores --------------------------------------------------- */
243 243
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 244/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 245static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 246{
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
314 if (tvispzero(tv)) /* Use xor only for +0. */ 310 if (tvispzero(tv)) /* Use xor only for +0. */
315 emit_rr(as, XO_XORPS, r, r); 311 emit_rr(as, XO_XORPS, r, r);
316 else 312 else
317 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 313 emit_rma(as, XO_MOVSD, r, &tv->n);
318} 314}
319 315
320/* -- Emit control-flow instructions -------------------------------------- */ 316/* -- Emit control-flow instructions -------------------------------------- */
@@ -427,7 +423,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
427 if (dst < RID_MAX_GPR) 423 if (dst < RID_MAX_GPR)
428 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 424 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429 else 425 else
430 emit_rr(as, XMM_MOVRR(as), dst, src); 426 emit_rr(as, XO_MOVAPS, dst, src);
431} 427}
432 428
433/* Generic load of register from stack slot. */ 429/* Generic load of register from stack slot. */
@@ -436,7 +432,7 @@ static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
436 if (r < RID_MAX_GPR) 432 if (r < RID_MAX_GPR)
437 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 433 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
438 else 434 else
439 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 435 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, RID_ESP, ofs);
440} 436}
441 437
442/* Generic store of register to stack slot. */ 438/* Generic store of register to stack slot. */
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 8b42dd4e..2683b462 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -18,12 +18,11 @@
18#define JIT_F_SSE3 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE4_1 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_PREFER_IMUL 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_SPLIT_XMM 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_LEA_AGU 0x00000200
23 22
24/* Names for the CPU-specific flags. Must match the order above. */ 23/* Names for the CPU-specific flags. Must match the order above. */
25#define JIT_F_CPU_FIRST JIT_F_SSE2 24#define JIT_F_CPU_FIRST JIT_F_SSE2
26#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\2K8\4ATOM" 25#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM"
27#elif LJ_TARGET_ARM 26#elif LJ_TARGET_ARM
28#define JIT_F_ARMV6_ 0x00000010 27#define JIT_F_ARMV6_ 0x00000010
29#define JIT_F_ARMV6T2_ 0x00000020 28#define JIT_F_ARMV6T2_ 0x00000020