diff options
author | Mike Pall <mike> | 2009-12-22 05:40:49 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-22 05:40:49 +0100 |
commit | 44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1 (patch) | |
tree | e4c6f041db3ad0ec3bb09d5480e7f64e4d413c76 | |
parent | 298e3f5d54bcc676eff0be85688e4538bffce131 (diff) | |
download | luajit-44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1.tar.gz luajit-44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1.tar.bz2 luajit-44a9d7b00c957d0aa8aabf0d139b1a4cb530abc1.zip |
Use SSE variants for IRFPM_FLOOR/CEIL/TRUNC unless SSE4.1 available.
-rw-r--r-- | src/lj_asm.c | 18 | ||||
-rw-r--r-- | src/lj_vm.h | 3 |
2 files changed, 21 insertions, 0 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index f26a40a5..c2cc4342 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -2020,6 +2020,16 @@ static void asm_fpmath(ASMState *as, IRIns *ir) | |||
2020 | as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ | 2020 | as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ |
2021 | } | 2021 | } |
2022 | *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ | 2022 | *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ |
2023 | } else if (fpm <= IRFPM_TRUNC) { | ||
2024 | /* The modified regs must match with the *.dasc implementation. */ | ||
2025 | RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2026 | if (ra_hasreg(ir->r)) | ||
2027 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
2028 | ra_evictset(as, drop); | ||
2029 | ra_destreg(as, ir, RID_XMM0); | ||
2030 | emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse : | ||
2031 | fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); | ||
2032 | ra_left(as, RID_XMM0, ir->op1); | ||
2023 | } else { | 2033 | } else { |
2024 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | 2034 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ |
2025 | Reg dest = ir->r; | 2035 | Reg dest = ir->r; |
@@ -3275,6 +3285,14 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3275 | if (inloop) | 3285 | if (inloop) |
3276 | as->modset = RSET_SCRATCH; | 3286 | as->modset = RSET_SCRATCH; |
3277 | break; | 3287 | break; |
3288 | case IR_FPMATH: | ||
3289 | if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
3290 | ir->prev = REGSP_HINT(RID_XMM0); | ||
3291 | if (inloop) | ||
3292 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
3293 | continue; | ||
3294 | } | ||
3295 | break; | ||
3278 | /* Non-constant shift counts need to be in RID_ECX. */ | 3296 | /* Non-constant shift counts need to be in RID_ECX. */ |
3279 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 3297 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
3280 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) | 3298 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) |
diff --git a/src/lj_vm.h b/src/lj_vm.h index e4adc8db..07adc36d 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
@@ -37,6 +37,9 @@ LJ_ASMF void lj_vm_exit_interp(void); | |||
37 | LJ_ASMF void lj_vm_floor(void); | 37 | LJ_ASMF void lj_vm_floor(void); |
38 | LJ_ASMF void lj_vm_ceil(void); | 38 | LJ_ASMF void lj_vm_ceil(void); |
39 | LJ_ASMF void lj_vm_trunc(void); | 39 | LJ_ASMF void lj_vm_trunc(void); |
40 | LJ_ASMF void lj_vm_floor_sse(void); | ||
41 | LJ_ASMF void lj_vm_ceil_sse(void); | ||
42 | LJ_ASMF void lj_vm_trunc_sse(void); | ||
40 | LJ_ASMF void lj_vm_exp(void); | 43 | LJ_ASMF void lj_vm_exp(void); |
41 | LJ_ASMF void lj_vm_exp2(void); | 44 | LJ_ASMF void lj_vm_exp2(void); |
42 | LJ_ASMF void lj_vm_pow(void); | 45 | LJ_ASMF void lj_vm_pow(void); |