aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-22 05:40:49 +0100
committerMike Pall <mike>2009-12-22 05:40:49 +0100
commit44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1 (patch)
treee4c6f041db3ad0ec3bb09d5480e7f64e4d413c76
parent298e3f5d54bcc676eff0be85688e4538bffce131 (diff)
downloadluajit-44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1.tar.gz
luajit-44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1.tar.bz2
luajit-44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1.zip
Use SSE variants for IRFPM_FLOOR/CEIL/TRUNC unless SSE4.1 available.
-rw-r--r--src/lj_asm.c18
-rw-r--r--src/lj_vm.h3
2 files changed, 21 insertions, 0 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index f26a40a5..c2cc4342 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2020,6 +2020,16 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
2020 as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ 2020 as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
2021 } 2021 }
2022 *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ 2022 *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
2023 } else if (fpm <= IRFPM_TRUNC) {
2024 /* The modified regs must match with the *.dasc implementation. */
2025 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2026 if (ra_hasreg(ir->r))
2027 rset_clear(drop, ir->r); /* Dest reg handled below. */
2028 ra_evictset(as, drop);
2029 ra_destreg(as, ir, RID_XMM0);
2030 emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse :
2031 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
2032 ra_left(as, RID_XMM0, ir->op1);
2023 } else { 2033 } else {
2024 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ 2034 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
2025 Reg dest = ir->r; 2035 Reg dest = ir->r;
@@ -3275,6 +3285,14 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3275 if (inloop) 3285 if (inloop)
3276 as->modset = RSET_SCRATCH; 3286 as->modset = RSET_SCRATCH;
3277 break; 3287 break;
3288 case IR_FPMATH:
3289 if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
3290 ir->prev = REGSP_HINT(RID_XMM0);
3291 if (inloop)
3292 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
3293 continue;
3294 }
3295 break;
3278 /* Non-constant shift counts need to be in RID_ECX. */ 3296 /* Non-constant shift counts need to be in RID_ECX. */
3279 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 3297 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
3280 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) 3298 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
diff --git a/src/lj_vm.h b/src/lj_vm.h
index e4adc8db..07adc36d 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -37,6 +37,9 @@ LJ_ASMF void lj_vm_exit_interp(void);
37LJ_ASMF void lj_vm_floor(void); 37LJ_ASMF void lj_vm_floor(void);
38LJ_ASMF void lj_vm_ceil(void); 38LJ_ASMF void lj_vm_ceil(void);
39LJ_ASMF void lj_vm_trunc(void); 39LJ_ASMF void lj_vm_trunc(void);
40LJ_ASMF void lj_vm_floor_sse(void);
41LJ_ASMF void lj_vm_ceil_sse(void);
42LJ_ASMF void lj_vm_trunc_sse(void);
40LJ_ASMF void lj_vm_exp(void); 43LJ_ASMF void lj_vm_exp(void);
41LJ_ASMF void lj_vm_exp2(void); 44LJ_ASMF void lj_vm_exp2(void);
42LJ_ASMF void lj_vm_pow(void); 45LJ_ASMF void lj_vm_pow(void);