diff options
author | Mike Pall <mike> | 2013-02-21 17:04:00 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2013-02-21 17:04:00 +0100 |
commit | c3219b7d177f6722b9de808cfd3d3dbfc6808e6f (patch) | |
tree | 75adc549065aa4193dd34dc6a4662137f6bdd791 | |
parent | 57768cd5882eb8d39c673d9dd8598946ef7c1843 (diff) | |
download | luajit-c3219b7d177f6722b9de808cfd3d3dbfc6808e6f.tar.gz luajit-c3219b7d177f6722b9de808cfd3d3dbfc6808e6f.tar.bz2 luajit-c3219b7d177f6722b9de808cfd3d3dbfc6808e6f.zip |
x86/x64: Drop xmm register/memory move tuning for K8.
-rw-r--r-- | src/lib_jit.c | 2 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 23 | ||||
-rw-r--r-- | src/lj_emit_x86.h | 10 | ||||
-rw-r--r-- | src/lj_jit.h | 5 |
4 files changed, 14 insertions, 26 deletions
diff --git a/src/lib_jit.c b/src/lib_jit.c index 1b69caa5..125b48ce 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
@@ -549,8 +549,6 @@ static uint32_t jit_cpudetect(lua_State *L) | |||
549 | flags |= JIT_F_LEA_AGU; | 549 | flags |= JIT_F_LEA_AGU; |
550 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ | 550 | } else if (vendor[2] == 0x444d4163) { /* AMD. */ |
551 | uint32_t fam = (features[0] & 0x0ff00f00); | 551 | uint32_t fam = (features[0] & 0x0ff00f00); |
552 | if (fam == 0x00000f00) /* K8. */ | ||
553 | flags |= JIT_F_SPLIT_XMM; | ||
554 | if (fam >= 0x00000f00) /* K8, K10. */ | 552 | if (fam >= 0x00000f00) /* K8, K10. */ |
555 | flags |= JIT_F_PREFER_IMUL; | 553 | flags |= JIT_F_PREFER_IMUL; |
556 | } | 554 | } |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index e9c53a09..5621b616 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -551,7 +551,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
551 | if (ra_hasreg(dest)) { | 551 | if (ra_hasreg(dest)) { |
552 | ra_free(as, dest); | 552 | ra_free(as, dest); |
553 | ra_modified(as, dest); | 553 | ra_modified(as, dest); |
554 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 554 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, |
555 | dest, RID_ESP, ofs); | 555 | dest, RID_ESP, ofs); |
556 | } | 556 | } |
557 | if ((ci->flags & CCI_CASTU64)) { | 557 | if ((ci->flags & CCI_CASTU64)) { |
@@ -662,8 +662,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
662 | asm_guardcc(as, CC_NE); | 662 | asm_guardcc(as, CC_NE); |
663 | emit_rr(as, XO_UCOMISD, left, tmp); | 663 | emit_rr(as, XO_UCOMISD, left, tmp); |
664 | emit_rr(as, XO_CVTSI2SD, tmp, dest); | 664 | emit_rr(as, XO_CVTSI2SD, tmp, dest); |
665 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 665 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ |
666 | emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ | ||
667 | emit_rr(as, XO_CVTTSD2SI, dest, left); | 666 | emit_rr(as, XO_CVTTSD2SI, dest, left); |
668 | /* Can't fuse since left is needed twice. */ | 667 | /* Can't fuse since left is needed twice. */ |
669 | } | 668 | } |
@@ -719,8 +718,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
719 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, | 718 | emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, |
720 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); | 719 | dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); |
721 | } | 720 | } |
722 | if (!(as->flags & JIT_F_SPLIT_XMM)) | 721 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ |
723 | emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ | ||
724 | } else if (stfp) { /* FP to integer conversion. */ | 722 | } else if (stfp) { /* FP to integer conversion. */ |
725 | if (irt_isguard(ir->t)) { | 723 | if (irt_isguard(ir->t)) { |
726 | /* Checked conversions are only supported from number to int. */ | 724 | /* Checked conversions are only supported from number to int. */ |
@@ -824,8 +822,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
824 | if (ra_hasreg(dest)) { | 822 | if (ra_hasreg(dest)) { |
825 | ra_free(as, dest); | 823 | ra_free(as, dest); |
826 | ra_modified(as, dest); | 824 | ra_modified(as, dest); |
827 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, | 825 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); |
828 | dest, RID_ESP, ofs); | ||
829 | } | 826 | } |
830 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, | 827 | emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, |
831 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); | 828 | irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); |
@@ -1262,7 +1259,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) | |||
1262 | case IRT_U8: xo = XO_MOVZXb; break; | 1259 | case IRT_U8: xo = XO_MOVZXb; break; |
1263 | case IRT_I16: xo = XO_MOVSXw; break; | 1260 | case IRT_I16: xo = XO_MOVSXw; break; |
1264 | case IRT_U16: xo = XO_MOVZXw; break; | 1261 | case IRT_U16: xo = XO_MOVZXw; break; |
1265 | case IRT_NUM: xo = XMM_MOVRM(as); break; | 1262 | case IRT_NUM: xo = XO_MOVSD; break; |
1266 | case IRT_FLOAT: xo = XO_MOVSS; break; | 1263 | case IRT_FLOAT: xo = XO_MOVSS; break; |
1267 | default: | 1264 | default: |
1268 | if (LJ_64 && irt_is64(ir->t)) | 1265 | if (LJ_64 && irt_is64(ir->t)) |
@@ -1376,7 +1373,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1376 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1373 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
1377 | Reg dest = ra_dest(as, ir, allow); | 1374 | Reg dest = ra_dest(as, ir, allow); |
1378 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1375 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1379 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); | 1376 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); |
1380 | } else { | 1377 | } else { |
1381 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1378 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
1382 | } | 1379 | } |
@@ -1442,7 +1439,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1442 | Reg left = ra_scratch(as, RSET_FPR); | 1439 | Reg left = ra_scratch(as, RSET_FPR); |
1443 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ | 1440 | asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ |
1444 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1441 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
1445 | emit_rmro(as, XMM_MOVRM(as), left, base, ofs); | 1442 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
1446 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1443 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
1447 | #if LJ_64 | 1444 | #if LJ_64 |
1448 | } else if (irt_islightud(t)) { | 1445 | } else if (irt_islightud(t)) { |
@@ -1461,10 +1458,8 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1461 | if ((ir->op2 & IRSLOAD_CONVERT)) { | 1458 | if ((ir->op2 & IRSLOAD_CONVERT)) { |
1462 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1459 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
1463 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); | 1460 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); |
1464 | } else if (irt_isnum(t)) { | ||
1465 | emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); | ||
1466 | } else { | 1461 | } else { |
1467 | emit_rmro(as, XO_MOV, dest, base, ofs); | 1462 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); |
1468 | } | 1463 | } |
1469 | } else { | 1464 | } else { |
1470 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) | 1465 | if (!(ir->op2 & IRSLOAD_TYPECHECK)) |
@@ -1696,7 +1691,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
1696 | if (ra_hasreg(dest)) { | 1691 | if (ra_hasreg(dest)) { |
1697 | ra_free(as, dest); | 1692 | ra_free(as, dest); |
1698 | ra_modified(as, dest); | 1693 | ra_modified(as, dest); |
1699 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | 1694 | emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); |
1700 | } | 1695 | } |
1701 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | 1696 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); |
1702 | switch (fpm) { /* st0 = lj_vm_*(st0) */ | 1697 | switch (fpm) { /* st0 = lj_vm_*(st0) */ |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index bd184a30..2454c899 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
@@ -241,10 +241,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | |||
241 | 241 | ||
242 | /* -- Emit loads/stores --------------------------------------------------- */ | 242 | /* -- Emit loads/stores --------------------------------------------------- */ |
243 | 243 | ||
244 | /* Instruction selection for XMM moves. */ | ||
245 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
246 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
247 | |||
248 | /* mov [base+ofs], i */ | 244 | /* mov [base+ofs], i */ |
249 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 245 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
250 | { | 246 | { |
@@ -314,7 +310,7 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | |||
314 | if (tvispzero(tv)) /* Use xor only for +0. */ | 310 | if (tvispzero(tv)) /* Use xor only for +0. */ |
315 | emit_rr(as, XO_XORPS, r, r); | 311 | emit_rr(as, XO_XORPS, r, r); |
316 | else | 312 | else |
317 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | 313 | emit_rma(as, XO_MOVSD, r, &tv->n); |
318 | } | 314 | } |
319 | 315 | ||
320 | /* -- Emit control-flow instructions -------------------------------------- */ | 316 | /* -- Emit control-flow instructions -------------------------------------- */ |
@@ -427,7 +423,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
427 | if (dst < RID_MAX_GPR) | 423 | if (dst < RID_MAX_GPR) |
428 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); | 424 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); |
429 | else | 425 | else |
430 | emit_rr(as, XMM_MOVRR(as), dst, src); | 426 | emit_rr(as, XO_MOVAPS, dst, src); |
431 | } | 427 | } |
432 | 428 | ||
433 | /* Generic load of register from stack slot. */ | 429 | /* Generic load of register from stack slot. */ |
@@ -436,7 +432,7 @@ static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | |||
436 | if (r < RID_MAX_GPR) | 432 | if (r < RID_MAX_GPR) |
437 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | 433 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); |
438 | else | 434 | else |
439 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); | 435 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, RID_ESP, ofs); |
440 | } | 436 | } |
441 | 437 | ||
442 | /* Generic store of register to stack slot. */ | 438 | /* Generic store of register to stack slot. */ |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 8b42dd4e..2683b462 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -18,12 +18,11 @@ | |||
18 | #define JIT_F_SSE3 0x00000020 | 18 | #define JIT_F_SSE3 0x00000020 |
19 | #define JIT_F_SSE4_1 0x00000040 | 19 | #define JIT_F_SSE4_1 0x00000040 |
20 | #define JIT_F_PREFER_IMUL 0x00000080 | 20 | #define JIT_F_PREFER_IMUL 0x00000080 |
21 | #define JIT_F_SPLIT_XMM 0x00000100 | 21 | #define JIT_F_LEA_AGU 0x00000100 |
22 | #define JIT_F_LEA_AGU 0x00000200 | ||
23 | 22 | ||
24 | /* Names for the CPU-specific flags. Must match the order above. */ | 23 | /* Names for the CPU-specific flags. Must match the order above. */ |
25 | #define JIT_F_CPU_FIRST JIT_F_SSE2 | 24 | #define JIT_F_CPU_FIRST JIT_F_SSE2 |
26 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\2K8\4ATOM" | 25 | #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM" |
27 | #elif LJ_TARGET_ARM | 26 | #elif LJ_TARGET_ARM |
28 | #define JIT_F_ARMV6_ 0x00000010 | 27 | #define JIT_F_ARMV6_ 0x00000010 |
29 | #define JIT_F_ARMV6T2_ 0x00000020 | 28 | #define JIT_F_ARMV6T2_ 0x00000020 |