aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-03-28 23:05:20 +0200
committerMike Pall <mike>2016-03-28 23:05:20 +0200
commit892887e5841fc91d8f954e780310a66404cbaadc (patch)
tree44aa22b73bb795a6d21f76c22db0ad84882a0d2b
parent6801e7165c3a5031db3cfe0e52f50cebb918695f (diff)
downloadluajit-892887e5841fc91d8f954e780310a66404cbaadc.tar.gz
luajit-892887e5841fc91d8f954e780310a66404cbaadc.tar.bz2
luajit-892887e5841fc91d8f954e780310a66404cbaadc.zip
x86: Generate BMI2 shifts and rotates, if available.
Contributed by Peter Cawley.
Diffstat (limited to '')
-rw-r--r--src/jit/dis_x86.lua3
-rw-r--r--src/lj_asm.c5
-rw-r--r--src/lj_asm_x86.h28
-rw-r--r--src/lj_emit_x86.h11
-rw-r--r--src/lj_target_x86.h11
5 files changed, 51 insertions, 7 deletions
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index f8a21ff3..d564988e 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -244,6 +244,7 @@ nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
244[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", 244[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
245--Fx 245--Fx
246[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", 246[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
247[0xf7] = "|sarxVrmv|shlxVrmv|shrxVrmv",
247}, 248},
248 249
249["3a"] = { -- [66] 0f 3a xx 250["3a"] = { -- [66] 0f 3a xx
@@ -273,6 +274,8 @@ nil,nil,nil,nil,
273[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", 274[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
274[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", 275[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
275[0xdf] = "||aeskeygenassistXrmu", 276[0xdf] = "||aeskeygenassistXrmu",
277--Fx
278[0xf0] = "|||rorxVrmu",
276}, 279},
277} 280}
278 281
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 93f6bcd6..94d7bfc4 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2150,7 +2150,10 @@ static void asm_setup_regsp(ASMState *as)
2150#endif 2150#endif
2151#if LJ_TARGET_X86ORX64 2151#if LJ_TARGET_X86ORX64
2152 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2152 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
2153 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2153 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2154 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2155 break;
2156 case IR_BROL: case IR_BROR:
2154 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2157 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
2155 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2158 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
2156 if (inloop) 2159 if (inloop)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 512e0534..718cb12e 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1956,7 +1956,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
1956#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) 1956#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1957#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) 1957#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1958 1958
1959static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 1959static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1960{ 1960{
1961 IRRef rref = ir->op2; 1961 IRRef rref = ir->op2;
1962 IRIns *irr = IR(rref); 1962 IRIns *irr = IR(rref);
@@ -1965,11 +1965,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1965 int shift; 1965 int shift;
1966 dest = ra_dest(as, ir, RSET_GPR); 1966 dest = ra_dest(as, ir, RSET_GPR);
1967 shift = irr->i & (irt_is64(ir->t) ? 63 : 31); 1967 shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
1968 if (!xv && shift && (as->flags & JIT_F_BMI2)) {
1969 Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
1970 if (left != dest) { /* BMI2 rotate right by constant. */
1971 emit_i8(as, xs == XOg_ROL ? -shift : shift);
1972 emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
1973 return;
1974 }
1975 }
1968 switch (shift) { 1976 switch (shift) {
1969 case 0: break; 1977 case 0: break;
1970 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; 1978 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
1971 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; 1979 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
1972 } 1980 }
1981 } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
1982 Reg left, right;
1983 dest = ra_dest(as, ir, RSET_GPR);
1984 right = ra_alloc1(as, rref, RSET_GPR);
1985 left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
1986 irt_is64(ir->t));
1987 emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
1988 return;
1973 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ 1989 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
1974 Reg right; 1990 Reg right;
1975 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); 1991 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
@@ -1995,11 +2011,11 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1995 */ 2011 */
1996} 2012}
1997 2013
1998#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL) 2014#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
1999#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR) 2015#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2000#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR) 2016#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2001#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL) 2017#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2002#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR) 2018#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2003 2019
2004/* -- Comparisons --------------------------------------------------------- */ 2020/* -- Comparisons --------------------------------------------------------- */
2005 2021
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index caf30859..cbaf4e85 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -13,10 +13,12 @@
13 if (rex != 0x40) *--(p) = rex; } 13 if (rex != 0x40) *--(p) = rex; }
14#define FORCE_REX 0x200 14#define FORCE_REX 0x200
15#define REX_64 (FORCE_REX|0x080000) 15#define REX_64 (FORCE_REX|0x080000)
16#define VEX_64 0x800000
16#else 17#else
17#define REXRB(p, rr, rb) ((void)0) 18#define REXRB(p, rr, rb) ((void)0)
18#define FORCE_REX 0 19#define FORCE_REX 0
19#define REX_64 0 20#define REX_64 0
21#define VEX_64 0
20#endif 22#endif
21 23
22#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 24#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -31,6 +33,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
31 MCode *p, int delta) 33 MCode *p, int delta)
32{ 34{
33 int n = (int8_t)xo; 35 int n = (int8_t)xo;
36 if (n == -60) { /* VEX-encoded instruction */
37#if LJ_64
38 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
39#endif
40 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
41 return p+delta-5;
42 }
34#if defined(__GNUC__) 43#if defined(__GNUC__)
35 if (__builtin_constant_p(xo) && n == -2) 44 if (__builtin_constant_p(xo) && n == -2)
36 p[delta-2] = (MCode)(xo >> 24); 45 p[delta-2] = (MCode)(xo >> 24);
@@ -412,8 +421,10 @@ static void emit_call_(ASMState *as, MCode *target)
412/* Use 64 bit operations to handle 64 bit IR types. */ 421/* Use 64 bit operations to handle 64 bit IR types. */
413#if LJ_64 422#if LJ_64
414#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) 423#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
424#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
415#else 425#else
416#define REX_64IR(ir, r) (r) 426#define REX_64IR(ir, r) (r)
427#define VEX_64IR(ir, r) (r)
417#endif 428#endif
418 429
419/* Generic move between two regs. */ 430/* Generic move between two regs. */
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 289f83e1..e29f4748 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -189,6 +189,11 @@ typedef struct {
189#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) 189#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
190#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) 190#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
191 191
192#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
193#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
194#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
195#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
196
192/* This list of x86 opcodes is not intended to be complete. Opcodes are only 197/* This list of x86 opcodes is not intended to be complete. Opcodes are only
193** included when needed. Take a look at DynASM or jit.dis_x86 to see the 198** included when needed. Take a look at DynASM or jit.dis_x86 to see the
194** whole mess. 199** whole mess.
@@ -231,6 +236,12 @@ typedef enum {
231 XI_FSCALE = 0xfdd9, 236 XI_FSCALE = 0xfdd9,
232 XI_FYL2X = 0xf1d9, 237 XI_FYL2X = 0xf1d9,
233 238
239 /* VEX-encoded instructions. XV_* prefix. */
240 XV_RORX = XV_f20f3a(f0),
241 XV_SARX = XV_f30f38(f7),
242 XV_SHLX = XV_660f38(f7),
243 XV_SHRX = XV_f20f38(f7),
244
234 /* Variable-length opcodes. XO_* prefix. */ 245 /* Variable-length opcodes. XO_* prefix. */
235 XO_MOV = XO_(8b), 246 XO_MOV = XO_(8b),
236 XO_MOVto = XO_(89), 247 XO_MOVto = XO_(89),