aboutsummaryrefslogtreecommitdiff
path: root/src/lj_emit_x86.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_emit_x86.h')
-rw-r--r--src/lj_emit_x86.h200
1 files changed, 152 insertions, 48 deletions
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index d8b4b8a0..3d3beda3 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -13,10 +13,17 @@
13 if (rex != 0x40) *--(p) = rex; } 13 if (rex != 0x40) *--(p) = rex; }
14#define FORCE_REX 0x200 14#define FORCE_REX 0x200
15#define REX_64 (FORCE_REX|0x080000) 15#define REX_64 (FORCE_REX|0x080000)
16#define VEX_64 0x800000
16#else 17#else
17#define REXRB(p, rr, rb) ((void)0) 18#define REXRB(p, rr, rb) ((void)0)
18#define FORCE_REX 0 19#define FORCE_REX 0
19#define REX_64 0 20#define REX_64 0
21#define VEX_64 0
22#endif
23#if LJ_GC64
24#define REX_GC64 REX_64
25#else
26#define REX_GC64 0
20#endif 27#endif
21 28
22#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 29#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -31,7 +38,14 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
31 MCode *p, int delta) 38 MCode *p, int delta)
32{ 39{
33 int n = (int8_t)xo; 40 int n = (int8_t)xo;
34#if defined(__GNUC__) 41 if (n == -60) { /* VEX-encoded instruction */
42#if LJ_64
43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
44#endif
45 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
46 return p+delta-5;
47 }
48#if defined(__GNUC__) || defined(__clang__)
35 if (__builtin_constant_p(xo) && n == -2) 49 if (__builtin_constant_p(xo) && n == -2)
36 p[delta-2] = (MCode)(xo >> 24); 50 p[delta-2] = (MCode)(xo >> 24);
37 else if (__builtin_constant_p(xo) && n == -3) 51 else if (__builtin_constant_p(xo) && n == -3)
@@ -78,33 +92,24 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
78/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ 92/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
79static int32_t ptr2addr(const void *p) 93static int32_t ptr2addr(const void *p)
80{ 94{
81 lua_assert((uintptr_t)p < (uintptr_t)0x80000000); 95 lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range");
82 return i32ptr(p); 96 return i32ptr(p);
83} 97}
84#else 98#else
85#define ptr2addr(p) (i32ptr((p))) 99#define ptr2addr(p) (i32ptr((p)))
86#endif 100#endif
87 101
88/* op r, [addr] */
89static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
90{
91 MCode *p = as->mcp;
92 *(int32_t *)(p-4) = ptr2addr(addr);
93#if LJ_64
94 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
95 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
96#else
97 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
98#endif
99}
100
101/* op r, [base+ofs] */ 102/* op r, [base+ofs] */
102static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) 103static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
103{ 104{
104 MCode *p = as->mcp; 105 MCode *p = as->mcp;
105 x86Mode mode; 106 x86Mode mode;
106 if (ra_hasreg(rb)) { 107 if (ra_hasreg(rb)) {
107 if (ofs == 0 && (rb&7) != RID_EBP) { 108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
108 mode = XM_OFS0; 113 mode = XM_OFS0;
109 } else if (checki8(ofs)) { 114 } else if (checki8(ofs)) {
110 *--p = (MCode)ofs; 115 *--p = (MCode)ofs;
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
202 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
203 rb = RID_ESP; 208 rb = RID_ESP;
204#endif 209#endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index");
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
205 } else { 215 } else {
206 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { 216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
207 mode = XM_OFS0; 217 mode = XM_OFS0;
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 251
242/* -- Emit loads/stores --------------------------------------------------- */ 252/* -- Emit loads/stores --------------------------------------------------- */
243 253
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 254/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 255static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 256{
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
259/* Get/set global_State fields. */ 265/* Get/set global_State fields. */
260#define emit_opgl(as, xo, r, field) \ 266#define emit_opgl(as, xo, r, field) \
261 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) 267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
262#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) 268#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
263#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) 269#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
264 270
265#define emit_setvmstate(as, i) \ 271#define emit_setvmstate(as, i) \
266 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) 272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
285 } 291 }
286} 292}
287 293
294#if LJ_GC64
295#define dispofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
297#define mcpofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
299#define mctopofs(as, k) \
300 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
301/* mov r, addr */
302#define emit_loada(as, r, addr) \
303 emit_loadu64(as, (r), (uintptr_t)(addr))
304#else
288/* mov r, addr */ 305/* mov r, addr */
289#define emit_loada(as, r, addr) \ 306#define emit_loada(as, r, addr) \
290 emit_loadi(as, (r), ptr2addr((addr))) 307 emit_loadi(as, (r), ptr2addr((addr)))
308#endif
291 309
292#if LJ_64 310#if LJ_64
293/* mov r, imm64 or shorter 32 bit extended load. */ 311/* mov r, imm64 or shorter 32 bit extended load. */
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
299 MCode *p = as->mcp; 317 MCode *p = as->mcp;
300 *(int32_t *)(p-4) = (int32_t)u64; 318 *(int32_t *)(p-4) = (int32_t)u64;
301 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); 319 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
320#if LJ_GC64
321 } else if (checki32(dispofs(as, u64))) {
322 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
323 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
324 /* Since as->realign assumes the code size doesn't change, check
325 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
326 */
327 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
328#endif
302 } else { /* Full-size 64 bit load. */ 329 } else { /* Full-size 64 bit load. */
303 MCode *p = as->mcp; 330 MCode *p = as->mcp;
304 *(uint64_t *)(p-8) = u64; 331 *(uint64_t *)(p-8) = u64;
@@ -310,13 +337,90 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
310} 337}
311#endif 338#endif
312 339
313/* movsd r, [&tv->n] / xorps r, r */ 340/* op r, [addr] */
314static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 341static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
315{ 342{
316 if (tvispzero(tv)) /* Use xor only for +0. */ 343#if LJ_GC64
317 emit_rr(as, XO_XORPS, r, r); 344 if (checki32(dispofs(as, addr))) {
318 else 345 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
319 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 346 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
347 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
348 } else if (!checki32((intptr_t)addr)) {
349 Reg ra = (rr & 15);
350 if (xo != XO_MOV) {
351 /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
352 uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
353 uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
354 ra = RID_DISPATCH;
355 if (checku32(dispaddr)) {
356 emit_loadi(as, ra, (int32_t)dispaddr);
357 } else { /* Full-size 64 bit load. */
358 MCode *p = as->mcp;
359 *(uint64_t *)(p-8) = dispaddr;
360 p[-9] = (MCode)(XI_MOVri+(ra&7));
361 p[-10] = 0x48 + ((ra>>3)&1);
362 p -= 10;
363 as->mcp = p;
364 }
365 if (xo == XO_GROUP3b) emit_i8(as, i8);
366 }
367 emit_rmro(as, xo, rr, ra, 0);
368 emit_loadu64(as, ra, (uintptr_t)addr);
369 } else
370#endif
371 {
372 MCode *p = as->mcp;
373 *(int32_t *)(p-4) = ptr2addr(addr);
374#if LJ_64
375 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
376 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
377#else
378 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
379#endif
380 }
381}
382
383/* Load 64 bit IR constant into register. */
384static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
385{
386 Reg r64;
387 x86Op xo;
388 const uint64_t *k = &ir_k64(ir)->u64;
389 if (rset_test(RSET_FPR, r)) {
390 r64 = r;
391 xo = XO_MOVSD;
392 } else {
393 r64 = r | REX_64;
394 xo = XO_MOV;
395 }
396 if (*k == 0) {
397 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
398#if LJ_GC64
399 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
400 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
401 emit_rma(as, xo, r64, k);
402 } else {
403 if (ir->i) {
404 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
405 "bad interned 64 bit constant");
406 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
407 emit_loadu64(as, r, *k);
408 return;
409 } else {
410 /* If all else fails, add the FP constant at the MCode area bottom. */
411 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
412 *(uint64_t *)as->mcbot = *k;
413 ir->i = (int32_t)(as->mctop - as->mcbot);
414 as->mcbot += 8;
415 as->mclim = as->mcbot + MCLIM_REDZONE;
416 lj_mcode_commitbot(as->J, as->mcbot);
417 }
418 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
419#else
420 } else {
421 emit_rma(as, xo, r64, k);
422#endif
423 }
320} 424}
321 425
322/* -- Emit control-flow instructions -------------------------------------- */ 426/* -- Emit control-flow instructions -------------------------------------- */
@@ -330,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target)
330{ 434{
331 MCode *p = as->mcp; 435 MCode *p = as->mcp;
332 ptrdiff_t delta = target - p; 436 ptrdiff_t delta = target - p;
333 lua_assert(delta == (int8_t)delta); 437 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
334 p[-1] = (MCode)(int8_t)delta; 438 p[-1] = (MCode)(int8_t)delta;
335 p[-2] = XI_JMPs; 439 p[-2] = XI_JMPs;
336 as->mcp = p - 2; 440 as->mcp = p - 2;
@@ -342,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target)
342{ 446{
343 MCode *p = as->mcp; 447 MCode *p = as->mcp;
344 ptrdiff_t delta = target - p; 448 ptrdiff_t delta = target - p;
345 lua_assert(delta == (int8_t)delta); 449 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
346 p[-1] = (MCode)(int8_t)delta; 450 p[-1] = (MCode)(int8_t)delta;
347 p[-2] = (MCode)(XI_JCCs+(cc&15)); 451 p[-2] = (MCode)(XI_JCCs+(cc&15));
348 as->mcp = p - 2; 452 as->mcp = p - 2;
@@ -368,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source)
368#define emit_label(as) ((as)->mcp) 472#define emit_label(as) ((as)->mcp)
369 473
370/* Compute relative 32 bit offset for jump and call instructions. */ 474/* Compute relative 32 bit offset for jump and call instructions. */
371static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) 475static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
372{ 476{
373 ptrdiff_t delta = target - p; 477 ptrdiff_t delta = target - p;
374 lua_assert(delta == (int32_t)delta); 478 UNUSED(J);
479 lj_assertJ(delta == (int32_t)delta, "jump target out of range");
375 return (int32_t)delta; 480 return (int32_t)delta;
376} 481}
377 482
@@ -379,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
379static void emit_jcc(ASMState *as, int cc, MCode *target) 484static void emit_jcc(ASMState *as, int cc, MCode *target)
380{ 485{
381 MCode *p = as->mcp; 486 MCode *p = as->mcp;
382 *(int32_t *)(p-4) = jmprel(p, target); 487 *(int32_t *)(p-4) = jmprel(as->J, p, target);
383 p[-5] = (MCode)(XI_JCCn+(cc&15)); 488 p[-5] = (MCode)(XI_JCCn+(cc&15));
384 p[-6] = 0x0f; 489 p[-6] = 0x0f;
385 as->mcp = p - 6; 490 as->mcp = p - 6;
@@ -389,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target)
389static void emit_jmp(ASMState *as, MCode *target) 494static void emit_jmp(ASMState *as, MCode *target)
390{ 495{
391 MCode *p = as->mcp; 496 MCode *p = as->mcp;
392 *(int32_t *)(p-4) = jmprel(p, target); 497 *(int32_t *)(p-4) = jmprel(as->J, p, target);
393 p[-5] = XI_JMP; 498 p[-5] = XI_JMP;
394 as->mcp = p - 5; 499 as->mcp = p - 5;
395} 500}
@@ -406,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target)
406 return; 511 return;
407 } 512 }
408#endif 513#endif
409 *(int32_t *)(p-4) = jmprel(p, target); 514 *(int32_t *)(p-4) = jmprel(as->J, p, target);
410 p[-5] = XI_CALL; 515 p[-5] = XI_CALL;
411 as->mcp = p - 5; 516 as->mcp = p - 5;
412} 517}
@@ -418,8 +523,10 @@ static void emit_call_(ASMState *as, MCode *target)
418/* Use 64 bit operations to handle 64 bit IR types. */ 523/* Use 64 bit operations to handle 64 bit IR types. */
419#if LJ_64 524#if LJ_64
420#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) 525#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
526#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
421#else 527#else
422#define REX_64IR(ir, r) (r) 528#define REX_64IR(ir, r) (r)
529#define VEX_64IR(ir, r) (r)
423#endif 530#endif
424 531
425/* Generic move between two regs. */ 532/* Generic move between two regs. */
@@ -429,35 +536,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
429 if (dst < RID_MAX_GPR) 536 if (dst < RID_MAX_GPR)
430 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 537 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
431 else 538 else
432 emit_rr(as, XMM_MOVRR(as), dst, src); 539 emit_rr(as, XO_MOVAPS, dst, src);
433} 540}
434 541
435/* Generic load of register from stack slot. */ 542/* Generic load of register with base and (small) offset address. */
436static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 543static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
437{ 544{
438 if (r < RID_MAX_GPR) 545 if (r < RID_MAX_GPR)
439 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 546 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
440 else 547 else
441 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 548 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
442} 549}
443 550
444/* Generic store of register to stack slot. */ 551/* Generic store of register with base and (small) offset address. */
445static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 552static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
446{ 553{
447 if (r < RID_MAX_GPR) 554 if (r < RID_MAX_GPR)
448 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 555 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
449 else 556 else
450 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 557 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
451} 558}
452 559
453/* Add offset to pointer. */ 560/* Add offset to pointer. */
454static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 561static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
455{ 562{
456 if (ofs) { 563 if (ofs) {
457 if ((as->flags & JIT_F_LEA_AGU)) 564 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
458 emit_rmro(as, XO_LEA, r, r, ofs);
459 else
460 emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
461 } 565 }
462} 566}
463 567