diff options
Diffstat (limited to 'src/lj_emit_x86.h')
-rw-r--r-- | src/lj_emit_x86.h | 179 |
1 files changed, 142 insertions, 37 deletions
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 7bbc695b..bc4391a0 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
@@ -13,10 +13,17 @@ | |||
13 | if (rex != 0x40) *--(p) = rex; } | 13 | if (rex != 0x40) *--(p) = rex; } |
14 | #define FORCE_REX 0x200 | 14 | #define FORCE_REX 0x200 |
15 | #define REX_64 (FORCE_REX|0x080000) | 15 | #define REX_64 (FORCE_REX|0x080000) |
16 | #define VEX_64 0x800000 | ||
16 | #else | 17 | #else |
17 | #define REXRB(p, rr, rb) ((void)0) | 18 | #define REXRB(p, rr, rb) ((void)0) |
18 | #define FORCE_REX 0 | 19 | #define FORCE_REX 0 |
19 | #define REX_64 0 | 20 | #define REX_64 0 |
21 | #define VEX_64 0 | ||
22 | #endif | ||
23 | #if LJ_GC64 | ||
24 | #define REX_GC64 REX_64 | ||
25 | #else | ||
26 | #define REX_GC64 0 | ||
20 | #endif | 27 | #endif |
21 | 28 | ||
22 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 29 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
@@ -31,6 +38,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | |||
31 | MCode *p, int delta) | 38 | MCode *p, int delta) |
32 | { | 39 | { |
33 | int n = (int8_t)xo; | 40 | int n = (int8_t)xo; |
41 | if (n == -60) { /* VEX-encoded instruction */ | ||
42 | #if LJ_64 | ||
43 | xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; | ||
44 | #endif | ||
45 | *(uint32_t *)(p+delta-5) = (uint32_t)xo; | ||
46 | return p+delta-5; | ||
47 | } | ||
34 | #if defined(__GNUC__) | 48 | #if defined(__GNUC__) |
35 | if (__builtin_constant_p(xo) && n == -2) | 49 | if (__builtin_constant_p(xo) && n == -2) |
36 | p[delta-2] = (MCode)(xo >> 24); | 50 | p[delta-2] = (MCode)(xo >> 24); |
@@ -85,26 +99,17 @@ static int32_t ptr2addr(const void *p) | |||
85 | #define ptr2addr(p) (i32ptr((p))) | 99 | #define ptr2addr(p) (i32ptr((p))) |
86 | #endif | 100 | #endif |
87 | 101 | ||
88 | /* op r, [addr] */ | ||
89 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) | ||
90 | { | ||
91 | MCode *p = as->mcp; | ||
92 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
93 | #if LJ_64 | ||
94 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
95 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
96 | #else | ||
97 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
98 | #endif | ||
99 | } | ||
100 | |||
101 | /* op r, [base+ofs] */ | 102 | /* op r, [base+ofs] */ |
102 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) | 103 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) |
103 | { | 104 | { |
104 | MCode *p = as->mcp; | 105 | MCode *p = as->mcp; |
105 | x86Mode mode; | 106 | x86Mode mode; |
106 | if (ra_hasreg(rb)) { | 107 | if (ra_hasreg(rb)) { |
107 | if (ofs == 0 && (rb&7) != RID_EBP) { | 108 | if (LJ_GC64 && rb == RID_RIP) { |
109 | mode = XM_OFS0; | ||
110 | p -= 4; | ||
111 | *(int32_t *)p = ofs; | ||
112 | } else if (ofs == 0 && (rb&7) != RID_EBP) { | ||
108 | mode = XM_OFS0; | 113 | mode = XM_OFS0; |
109 | } else if (checki8(ofs)) { | 114 | } else if (checki8(ofs)) { |
110 | *--p = (MCode)ofs; | 115 | *--p = (MCode)ofs; |
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | |||
202 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | 207 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); |
203 | rb = RID_ESP; | 208 | rb = RID_ESP; |
204 | #endif | 209 | #endif |
210 | } else if (LJ_GC64 && rb == RID_RIP) { | ||
211 | lua_assert(as->mrm.idx == RID_NONE); | ||
212 | mode = XM_OFS0; | ||
213 | p -= 4; | ||
214 | *(int32_t *)p = as->mrm.ofs; | ||
205 | } else { | 215 | } else { |
206 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { | 216 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { |
207 | mode = XM_OFS0; | 217 | mode = XM_OFS0; |
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) | |||
241 | 251 | ||
242 | /* -- Emit loads/stores --------------------------------------------------- */ | 252 | /* -- Emit loads/stores --------------------------------------------------- */ |
243 | 253 | ||
244 | /* Instruction selection for XMM moves. */ | ||
245 | #define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) | ||
246 | #define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) | ||
247 | |||
248 | /* mov [base+ofs], i */ | 254 | /* mov [base+ofs], i */ |
249 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | 255 | static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) |
250 | { | 256 | { |
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
259 | /* Get/set global_State fields. */ | 265 | /* Get/set global_State fields. */ |
260 | #define emit_opgl(as, xo, r, field) \ | 266 | #define emit_opgl(as, xo, r, field) \ |
261 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) | 267 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) |
262 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) | 268 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) |
263 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) | 269 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) |
264 | 270 | ||
265 | #define emit_setvmstate(as, i) \ | 271 | #define emit_setvmstate(as, i) \ |
266 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) | 272 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) |
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
285 | } | 291 | } |
286 | } | 292 | } |
287 | 293 | ||
294 | #if LJ_GC64 | ||
295 | #define dispofs(as, k) \ | ||
296 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) | ||
297 | #define mcpofs(as, k) \ | ||
298 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) | ||
299 | #define mctopofs(as, k) \ | ||
300 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) | ||
301 | /* mov r, addr */ | ||
302 | #define emit_loada(as, r, addr) \ | ||
303 | emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
304 | #else | ||
288 | /* mov r, addr */ | 305 | /* mov r, addr */ |
289 | #define emit_loada(as, r, addr) \ | 306 | #define emit_loada(as, r, addr) \ |
290 | emit_loadi(as, (r), ptr2addr((addr))) | 307 | emit_loadi(as, (r), ptr2addr((addr))) |
308 | #endif | ||
291 | 309 | ||
292 | #if LJ_64 | 310 | #if LJ_64 |
293 | /* mov r, imm64 or shorter 32 bit extended load. */ | 311 | /* mov r, imm64 or shorter 32 bit extended load. */ |
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
299 | MCode *p = as->mcp; | 317 | MCode *p = as->mcp; |
300 | *(int32_t *)(p-4) = (int32_t)u64; | 318 | *(int32_t *)(p-4) = (int32_t)u64; |
301 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); | 319 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); |
320 | #if LJ_GC64 | ||
321 | } else if (checki32(dispofs(as, u64))) { | ||
322 | emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); | ||
323 | } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { | ||
324 | /* Since as->realign assumes the code size doesn't change, check | ||
325 | ** RIP-relative addressing reachability for both as->mcp and as->mctop. | ||
326 | */ | ||
327 | emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); | ||
328 | #endif | ||
302 | } else { /* Full-size 64 bit load. */ | 329 | } else { /* Full-size 64 bit load. */ |
303 | MCode *p = as->mcp; | 330 | MCode *p = as->mcp; |
304 | *(uint64_t *)(p-8) = u64; | 331 | *(uint64_t *)(p-8) = u64; |
@@ -310,13 +337,89 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
310 | } | 337 | } |
311 | #endif | 338 | #endif |
312 | 339 | ||
313 | /* movsd r, [&tv->n] / xorps r, r */ | 340 | /* op r, [addr] */ |
314 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | 341 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) |
315 | { | 342 | { |
316 | if (tvispzero(tv)) /* Use xor only for +0. */ | 343 | #if LJ_GC64 |
317 | emit_rr(as, XO_XORPS, r, r); | 344 | if (checki32(dispofs(as, addr))) { |
318 | else | 345 | emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); |
319 | emit_rma(as, XMM_MOVRM(as), r, &tv->n); | 346 | } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { |
347 | emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); | ||
348 | } else if (!checki32((intptr_t)addr)) { | ||
349 | Reg ra = (rr & 15); | ||
350 | if (xo != XO_MOV) { | ||
351 | /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ | ||
352 | uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; | ||
353 | uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; | ||
354 | ra = RID_DISPATCH; | ||
355 | if (checku32(dispaddr)) { | ||
356 | emit_loadi(as, ra, (int32_t)dispaddr); | ||
357 | } else { /* Full-size 64 bit load. */ | ||
358 | MCode *p = as->mcp; | ||
359 | *(uint64_t *)(p-8) = dispaddr; | ||
360 | p[-9] = (MCode)(XI_MOVri+(ra&7)); | ||
361 | p[-10] = 0x48 + ((ra>>3)&1); | ||
362 | p -= 10; | ||
363 | as->mcp = p; | ||
364 | } | ||
365 | if (xo == XO_GROUP3b) emit_i8(as, i8); | ||
366 | } | ||
367 | emit_rmro(as, xo, rr, ra, 0); | ||
368 | emit_loadu64(as, ra, (uintptr_t)addr); | ||
369 | } else | ||
370 | #endif | ||
371 | { | ||
372 | MCode *p = as->mcp; | ||
373 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
374 | #if LJ_64 | ||
375 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
376 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
377 | #else | ||
378 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
379 | #endif | ||
380 | } | ||
381 | } | ||
382 | |||
383 | /* Load 64 bit IR constant into register. */ | ||
384 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
385 | { | ||
386 | Reg r64; | ||
387 | x86Op xo; | ||
388 | const uint64_t *k = &ir_k64(ir)->u64; | ||
389 | if (rset_test(RSET_FPR, r)) { | ||
390 | r64 = r; | ||
391 | xo = XO_MOVSD; | ||
392 | } else { | ||
393 | r64 = r | REX_64; | ||
394 | xo = XO_MOV; | ||
395 | } | ||
396 | if (*k == 0) { | ||
397 | emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); | ||
398 | #if LJ_GC64 | ||
399 | } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || | ||
400 | (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { | ||
401 | emit_rma(as, xo, r64, k); | ||
402 | } else { | ||
403 | if (ir->i) { | ||
404 | lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); | ||
405 | } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { | ||
406 | emit_loadu64(as, r, *k); | ||
407 | return; | ||
408 | } else { | ||
409 | /* If all else fails, add the FP constant at the MCode area bottom. */ | ||
410 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
411 | *(uint64_t *)as->mcbot = *k; | ||
412 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
413 | as->mcbot += 8; | ||
414 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
415 | lj_mcode_commitbot(as->J, as->mcbot); | ||
416 | } | ||
417 | emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); | ||
418 | #else | ||
419 | } else { | ||
420 | emit_rma(as, xo, r64, k); | ||
421 | #endif | ||
422 | } | ||
320 | } | 423 | } |
321 | 424 | ||
322 | /* -- Emit control-flow instructions -------------------------------------- */ | 425 | /* -- Emit control-flow instructions -------------------------------------- */ |
@@ -418,8 +521,10 @@ static void emit_call_(ASMState *as, MCode *target) | |||
418 | /* Use 64 bit operations to handle 64 bit IR types. */ | 521 | /* Use 64 bit operations to handle 64 bit IR types. */ |
419 | #if LJ_64 | 522 | #if LJ_64 |
420 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) | 523 | #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) |
524 | #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) | ||
421 | #else | 525 | #else |
422 | #define REX_64IR(ir, r) (r) | 526 | #define REX_64IR(ir, r) (r) |
527 | #define VEX_64IR(ir, r) (r) | ||
423 | #endif | 528 | #endif |
424 | 529 | ||
425 | /* Generic move between two regs. */ | 530 | /* Generic move between two regs. */ |
@@ -429,25 +534,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
429 | if (dst < RID_MAX_GPR) | 534 | if (dst < RID_MAX_GPR) |
430 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); | 535 | emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); |
431 | else | 536 | else |
432 | emit_rr(as, XMM_MOVRR(as), dst, src); | 537 | emit_rr(as, XO_MOVAPS, dst, src); |
433 | } | 538 | } |
434 | 539 | ||
435 | /* Generic load of register from stack slot. */ | 540 | /* Generic load of register with base and (small) offset address. */ |
436 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 541 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
437 | { | 542 | { |
438 | if (r < RID_MAX_GPR) | 543 | if (r < RID_MAX_GPR) |
439 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); | 544 | emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs); |
440 | else | 545 | else |
441 | emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); | 546 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs); |
442 | } | 547 | } |
443 | 548 | ||
444 | /* Generic store of register to stack slot. */ | 549 | /* Generic store of register with base and (small) offset address. */ |
445 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 550 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) |
446 | { | 551 | { |
447 | if (r < RID_MAX_GPR) | 552 | if (r < RID_MAX_GPR) |
448 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); | 553 | emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs); |
449 | else | 554 | else |
450 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); | 555 | emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs); |
451 | } | 556 | } |
452 | 557 | ||
453 | /* Add offset to pointer. */ | 558 | /* Add offset to pointer. */ |
@@ -455,9 +560,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
455 | { | 560 | { |
456 | if (ofs) { | 561 | if (ofs) { |
457 | if ((as->flags & JIT_F_LEA_AGU)) | 562 | if ((as->flags & JIT_F_LEA_AGU)) |
458 | emit_rmro(as, XO_LEA, r, r, ofs); | 563 | emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); |
459 | else | 564 | else |
460 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | 565 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); |
461 | } | 566 | } |
462 | } | 567 | } |
463 | 568 | ||