aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-05-23 06:01:54 +0200
committerMike Pall <mike>2016-05-23 06:01:54 +0200
commit2868715d80b6ac497a7f08393ec325b60d71df8d (patch)
tree2064588fe32607f19f56ed0d23d4fb225b82e068
parent6c8258d74b7d4ae7f288897518f23c809b9395f2 (diff)
downloadluajit-2868715d80b6ac497a7f08393ec325b60d71df8d.tar.gz
luajit-2868715d80b6ac497a7f08393ec325b60d71df8d.tar.bz2
luajit-2868715d80b6ac497a7f08393ec325b60d71df8d.zip
x64/LJ_GC64: Add missing backend support and enable JIT compilation.
Contributed by Peter Cawley.
-rw-r--r--src/lj_arch.h2
-rw-r--r--src/lj_asm.c11
-rw-r--r--src/lj_asm_x86.h430
-rw-r--r--src/lj_emit_x86.h121
-rw-r--r--src/lj_ffrecord.c5
-rw-r--r--src/lj_ir.h8
-rw-r--r--src/lj_record.c6
-rw-r--r--src/lj_snap.c9
-rw-r--r--src/lj_target_x86.h12
-rw-r--r--src/vm_x64.dasc3
10 files changed, 517 insertions, 90 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 72622a21..3c3c98b1 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -453,7 +453,7 @@
453#endif 453#endif
454 454
455/* Disable or enable the JIT compiler. */ 455/* Disable or enable the JIT compiler. */
456#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64 456#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
457#define LJ_HASJIT 0 457#define LJ_HASJIT 0
458#else 458#else
459#define LJ_HASJIT 1 459#define LJ_HASJIT 1
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 5dd7ca3a..dba5c178 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
346#if LJ_64 346#if LJ_64
347 } else if (ir->o == IR_KINT64) { 347 } else if (ir->o == IR_KINT64) {
348 emit_loadu64(as, r, ir_kint64(ir)->u64); 348 emit_loadu64(as, r, ir_kint64(ir)->u64);
349#if LJ_GC64
350 } else if (ir->o == IR_KGC) {
351 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
352 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
353 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
354#endif
349#endif 355#endif
350 } else { 356 } else {
351 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 357 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -1920,8 +1926,12 @@ static void asm_tail_link(ASMState *as)
1920 if (bc_isret(bc_op(*retpc))) 1926 if (bc_isret(bc_op(*retpc)))
1921 pc = retpc; 1927 pc = retpc;
1922 } 1928 }
1929#if LJ_GC64
1930 emit_loadu64(as, RID_LPC, u64ptr(pc));
1931#else
1923 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1932 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1924 ra_allockreg(as, i32ptr(pc), RID_LPC); 1933 ra_allockreg(as, i32ptr(pc), RID_LPC);
1934#endif
1925 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); 1935 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1926 switch (bc_op(*pc)) { 1936 switch (bc_op(*pc)) {
1927 case BC_CALLM: case BC_CALLMT: 1937 case BC_CALLM: case BC_CALLMT:
@@ -2314,6 +2324,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
2314 as->curins = as->T->snap[0].ref; 2324 as->curins = as->T->snap[0].ref;
2315 asm_snap_prep(as); /* The GC check is a guard. */ 2325 asm_snap_prep(as); /* The GC check is a guard. */
2316 asm_gc_check(as); 2326 asm_gc_check(as);
2327 as->curins = as->stopins;
2317 } 2328 }
2318 ra_evictk(as); 2329 ra_evictk(as);
2319 if (as->parent) 2330 if (as->parent)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 83fe22b2..7d07336a 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
21 } 21 }
22 /* Push the high byte of the exitno for each exit stub group. */ 22 /* Push the high byte of the exitno for each exit stub group. */
23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); 23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24#if !LJ_GC64
24 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ 25 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25 *mxp++ = XI_MOVmi; 26 *mxp++ = XI_MOVmi;
26 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); 27 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 28 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28 *mxp++ = 2*sizeof(void *); 29 *mxp++ = 2*sizeof(void *);
29 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; 30 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31#endif
30 /* Jump to exit handler which fills in the ExitState. */ 32 /* Jump to exit handler which fills in the ExitState. */
31 *mxp++ = XI_JMP; mxp += 4; 33 *mxp++ = XI_JMP; mxp += 4;
32 *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); 34 *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
@@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc)
62 target = p; 64 target = p;
63 cc ^= 1; 65 cc ^= 1;
64 if (as->realign) { 66 if (as->realign) {
67 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
68 as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
65 emit_sjcc(as, cc, target); 69 emit_sjcc(as, cc, target);
66 return; 70 return;
67 } 71 }
68 } 72 }
73 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
74 as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
69 emit_jcc(as, cc, target); 75 emit_jcc(as, cc, target);
70} 76}
71 77
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79{ 85{
80 if (irref_isk(ref)) { 86 if (irref_isk(ref)) {
81 IRIns *ir = IR(ref); 87 IRIns *ir = IR(ref);
88#if LJ_GC64
89 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
90 *k = ir->i;
91 return 1;
92 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
93 *k = (int32_t)ir_k64(ir)->u64;
94 return 1;
95 }
96#else
82 if (ir->o != IR_KINT64) { 97 if (ir->o != IR_KINT64) {
83 *k = ir->i; 98 *k = ir->i;
84 return 1; 99 return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
86 *k = (int32_t)ir_kint64(ir)->u64; 101 *k = (int32_t)ir_kint64(ir)->u64;
87 return 1; 102 return 1;
88 } 103 }
104#endif
89 } 105 }
90 return 0; 106 return 0;
91} 107}
@@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
185 if (irref_isk(ir->op1)) { 201 if (irref_isk(ir->op1)) {
186 GCfunc *fn = ir_kfunc(IR(ir->op1)); 202 GCfunc *fn = ir_kfunc(IR(ir->op1));
187 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; 203 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
204#if LJ_GC64
205 int64_t ofs = dispofs(as, &uv->tv);
206 if (checki32(ofs) && checki32(ofs+4)) {
207 as->mrm.ofs = (int32_t)ofs;
208 as->mrm.base = RID_DISPATCH;
209 as->mrm.idx = RID_NONE;
210 return;
211 }
212#else
188 as->mrm.ofs = ptr2addr(&uv->tv); 213 as->mrm.ofs = ptr2addr(&uv->tv);
189 as->mrm.base = as->mrm.idx = RID_NONE; 214 as->mrm.base = as->mrm.idx = RID_NONE;
190 return; 215 return;
216#endif
191 } 217 }
192 break; 218 break;
193 default: 219 default:
@@ -207,17 +233,38 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
207 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); 233 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
208 as->mrm.idx = RID_NONE; 234 as->mrm.idx = RID_NONE;
209 if (ir->op1 == REF_NIL) { 235 if (ir->op1 == REF_NIL) {
236#if LJ_GC64
237 as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch);
238 as->mrm.base = RID_DISPATCH;
239#else
210 as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J)); 240 as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J));
211 as->mrm.base = RID_NONE; 241 as->mrm.base = RID_NONE;
242#endif
212 return; 243 return;
213 } 244 }
214 as->mrm.ofs = field_ofs[ir->op2]; 245 as->mrm.ofs = field_ofs[ir->op2];
215 if (irref_isk(ir->op1)) { 246 if (irref_isk(ir->op1)) {
216 as->mrm.ofs += IR(ir->op1)->i; 247 IRIns *op1 = IR(ir->op1);
248#if LJ_GC64
249 if (ir->op1 == REF_NIL) {
250 as->mrm.ofs -= GG_OFS(dispatch);
251 as->mrm.base = RID_DISPATCH;
252 return;
253 } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
254 intptr_t ofs = dispofs(as, ir_kptr(op1));
255 if (checki32(as->mrm.ofs + ofs)) {
256 as->mrm.ofs += (int32_t)ofs;
257 as->mrm.base = RID_DISPATCH;
258 return;
259 }
260 }
261#else
262 as->mrm.ofs += op1->i;
217 as->mrm.base = RID_NONE; 263 as->mrm.base = RID_NONE;
218 } else { 264 return;
219 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); 265#endif
220 } 266 }
267 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
221} 268}
222 269
223/* Fuse string reference into memory operand. */ 270/* Fuse string reference into memory operand. */
@@ -228,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
228 as->mrm.base = as->mrm.idx = RID_NONE; 275 as->mrm.base = as->mrm.idx = RID_NONE;
229 as->mrm.scale = XM_SCALE1; 276 as->mrm.scale = XM_SCALE1;
230 as->mrm.ofs = sizeof(GCstr); 277 as->mrm.ofs = sizeof(GCstr);
231 if (irref_isk(ir->op1)) { 278 if (!LJ_GC64 && irref_isk(ir->op1)) {
232 as->mrm.ofs += IR(ir->op1)->i; 279 as->mrm.ofs += IR(ir->op1)->i;
233 } else { 280 } else {
234 Reg r = ra_alloc1(as, ir->op1, allow); 281 Reg r = ra_alloc1(as, ir->op1, allow);
@@ -260,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
260 IRIns *ir = IR(ref); 307 IRIns *ir = IR(ref);
261 as->mrm.idx = RID_NONE; 308 as->mrm.idx = RID_NONE;
262 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { 309 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
310#if LJ_GC64
311 intptr_t ofs = dispofs(as, ir_kptr(ir));
312 if (checki32(ofs)) {
313 as->mrm.ofs = (int32_t)ofs;
314 as->mrm.base = RID_DISPATCH;
315 return;
316 }
317 } if (0) {
318#else
263 as->mrm.ofs = ir->i; 319 as->mrm.ofs = ir->i;
264 as->mrm.base = RID_NONE; 320 as->mrm.base = RID_NONE;
265 } else if (ir->o == IR_STRREF) { 321 } else if (ir->o == IR_STRREF) {
266 asm_fusestrref(as, ir, allow); 322 asm_fusestrref(as, ir, allow);
323#endif
267 } else { 324 } else {
268 as->mrm.ofs = 0; 325 as->mrm.ofs = 0;
269 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { 326 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -310,13 +367,41 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
310static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) 367static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
311{ 368{
312 const uint64_t *k = &ir_k64(ir)->u64; 369 const uint64_t *k = &ir_k64(ir)->u64;
313 as->mrm.ofs = ptr2addr(k); 370 if (!LJ_GC64 || checki32((intptr_t)k)) {
314 as->mrm.base = RID_NONE; 371 as->mrm.ofs = ptr2addr(k);
372 as->mrm.base = RID_NONE;
373#if LJ_GC64
374 } else if (checki32(dispofs(as, k))) {
375 as->mrm.ofs = (int32_t)dispofs(as, k);
376 as->mrm.base = RID_DISPATCH;
377 } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
378 checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
379 as->mrm.ofs = (int32_t)mcpofs(as, k);
380 as->mrm.base = RID_RIP;
381 } else {
382 if (ir->i) {
383 lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
384 } else {
385 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
386 *(uint64_t*)as->mcbot = *k;
387 ir->i = (int32_t)(as->mctop - as->mcbot);
388 as->mcbot += 8;
389 as->mclim = as->mcbot + MCLIM_REDZONE;
390 }
391 as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
392 as->mrm.base = RID_RIP;
393#endif
394 }
315 as->mrm.idx = RID_NONE; 395 as->mrm.idx = RID_NONE;
316 return RID_MRM; 396 return RID_MRM;
317} 397}
318 398
319/* Fuse load into memory operand. */ 399/* Fuse load into memory operand.
400**
401** Important caveat: this may emit RIP-relative loads! So don't place any
402** code emitters between this function and the use of its result.
403** The only permitted exception is asm_guardcc().
404*/
320static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 405static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
321{ 406{
322 IRIns *ir = IR(ref); 407 IRIns *ir = IR(ref);
@@ -346,7 +431,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
346 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 431 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
347 if (ir->o == IR_SLOAD) { 432 if (ir->o == IR_SLOAD) {
348 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && 433 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
349 noconflict(as, ref, IR_RETF, 0)) { 434 noconflict(as, ref, IR_RETF, 0) &&
435 !(LJ_GC64 && irt_isaddr(ir->t))) {
350 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 436 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
351 as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + 437 as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
352 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 438 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
@@ -361,7 +447,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
361 return RID_MRM; 447 return RID_MRM;
362 } 448 }
363 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { 449 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
364 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { 450 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
451 !(LJ_GC64 && irt_isaddr(ir->t))) {
365 asm_fuseahuref(as, ir->op1, xallow); 452 asm_fuseahuref(as, ir->op1, xallow);
366 return RID_MRM; 453 return RID_MRM;
367 } 454 }
@@ -374,7 +461,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
374 asm_fusexref(as, ir->op1, xallow); 461 asm_fusexref(as, ir->op1, xallow);
375 return RID_MRM; 462 return RID_MRM;
376 } 463 }
377 } else if (ir->o == IR_VLOAD) { 464 } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
378 asm_fuseahuref(as, ir->op1, xallow); 465 asm_fuseahuref(as, ir->op1, xallow);
379 return RID_MRM; 466 return RID_MRM;
380 } 467 }
@@ -499,8 +586,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
499 if (r) { /* Argument is in a register. */ 586 if (r) { /* Argument is in a register. */
500 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { 587 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
501#if LJ_64 588#if LJ_64
502 if (ir->o == IR_KINT64) 589 if (LJ_GC64 ? ir->o != IR_KINT : ir->o == IR_KINT64)
503 emit_loadu64(as, r, ir_kint64(ir)->u64); 590 emit_loadu64(as, r, ir_k64(ir)->u64);
504 else 591 else
505#endif 592#endif
506 emit_loadi(as, r, ir->i); 593 emit_loadi(as, r, ir->i);
@@ -668,7 +755,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
668 emit_addptr(as, base, -8*delta); 755 emit_addptr(as, base, -8*delta);
669 asm_guardcc(as, CC_NE); 756 asm_guardcc(as, CC_NE);
670#if LJ_FR2 757#if LJ_FR2
671 emit_rmro(as, XO_CMP, rpc, base, -8); 758 emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
672 emit_loadu64(as, rpc, u64ptr(pc)); 759 emit_loadu64(as, rpc, u64ptr(pc));
673#else 760#else
674 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); 761 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
@@ -696,8 +783,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
696 Reg tmp = ra_noreg(IR(ir->op1)->r) ? 783 Reg tmp = ra_noreg(IR(ir->op1)->r) ?
697 ra_alloc1(as, ir->op1, RSET_FPR) : 784 ra_alloc1(as, ir->op1, RSET_FPR) :
698 ra_scratch(as, RSET_FPR); 785 ra_scratch(as, RSET_FPR);
699 Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); 786 Reg right;
700 emit_rr(as, XO_MOVDto, tmp, dest); 787 emit_rr(as, XO_MOVDto, tmp, dest);
788 right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
701 emit_mrm(as, XO_ADDSD, tmp, right); 789 emit_mrm(as, XO_ADDSD, tmp, right);
702 ra_left(as, tmp, ir->op1); 790 ra_left(as, tmp, ir->op1);
703} 791}
@@ -768,13 +856,12 @@ static void asm_conv(ASMState *as, IRIns *ir)
768 emit_rr(as, op, dest|REX_64, tmp); 856 emit_rr(as, op, dest|REX_64, tmp);
769 ra_left(as, tmp, lref); 857 ra_left(as, tmp, lref);
770 } else { 858 } else {
771 Reg left = asm_fuseload(as, lref, RSET_FPR);
772 if (LJ_64 && irt_isu32(ir->t)) 859 if (LJ_64 && irt_isu32(ir->t))
773 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ 860 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
774 emit_mrm(as, op, 861 emit_mrm(as, op,
775 dest|((LJ_64 && 862 dest|((LJ_64 &&
776 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), 863 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
777 left); 864 asm_fuseload(as, lref, RSET_FPR));
778 } 865 }
779 } 866 }
780 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 867 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
@@ -952,6 +1039,24 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
952 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); 1039 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
953 } else { 1040 } else {
954 /* Otherwise use g->tmptv to hold the TValue. */ 1041 /* Otherwise use g->tmptv to hold the TValue. */
1042#if LJ_GC64
1043 if (irref_isk(ref)) {
1044 TValue k;
1045 lj_ir_kvalue(as->J->L, &k, ir);
1046 emit_movmroi(as, dest, 4, k.u32.hi);
1047 emit_movmroi(as, dest, 0, k.u32.lo);
1048 } else {
1049 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1050 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1051 if (irt_is64(ir->t)) {
1052 emit_u32(as, irt_toitype(ir->t) << 15);
1053 emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
1054 } else {
1055 emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff);
1056 }
1057 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1058 }
1059#else
955 if (!irref_isk(ref)) { 1060 if (!irref_isk(ref)) {
956 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); 1061 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
957 emit_movtomro(as, REX_64IR(ir, src), dest, 0); 1062 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
@@ -960,6 +1065,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
960 } 1065 }
961 if (!(LJ_64 && irt_islightud(ir->t))) 1066 if (!(LJ_64 && irt_islightud(ir->t)))
962 emit_movmroi(as, dest, 4, irt_toitype(ir->t)); 1067 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
1068#endif
963 emit_loada(as, dest, &J2G(as->J)->tmptv); 1069 emit_loada(as, dest, &J2G(as->J)->tmptv);
964 } 1070 }
965} 1071}
@@ -969,9 +1075,9 @@ static void asm_aref(ASMState *as, IRIns *ir)
969 Reg dest = ra_dest(as, ir, RSET_GPR); 1075 Reg dest = ra_dest(as, ir, RSET_GPR);
970 asm_fusearef(as, ir, RSET_GPR); 1076 asm_fusearef(as, ir, RSET_GPR);
971 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) 1077 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
972 emit_mrm(as, XO_LEA, dest, RID_MRM); 1078 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
973 else if (as->mrm.base != dest) 1079 else if (as->mrm.base != dest)
974 emit_rr(as, XO_MOV, dest, as->mrm.base); 1080 emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
975} 1081}
976 1082
977/* Inlined hash lookup. Specialized for key type and for const keys. 1083/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -998,7 +1104,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
998 if (!isk) { 1104 if (!isk) {
999 rset_clear(allow, tab); 1105 rset_clear(allow, tab);
1000 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); 1106 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
1001 if (!irt_isstr(kt)) 1107 if (LJ_GC64 || !irt_isstr(kt))
1002 tmp = ra_scratch(as, rset_exclude(allow, key)); 1108 tmp = ra_scratch(as, rset_exclude(allow, key));
1003 } 1109 }
1004 1110
@@ -1011,8 +1117,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1011 1117
1012 /* Follow hash chain until the end. */ 1118 /* Follow hash chain until the end. */
1013 l_loop = emit_sjcc_label(as, CC_NZ); 1119 l_loop = emit_sjcc_label(as, CC_NZ);
1014 emit_rr(as, XO_TEST, dest, dest); 1120 emit_rr(as, XO_TEST, dest|REX_GC64, dest);
1015 emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); 1121 emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
1016 l_next = emit_label(as); 1122 l_next = emit_label(as);
1017 1123
1018 /* Type and value comparison. */ 1124 /* Type and value comparison. */
@@ -1033,7 +1139,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1033 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); 1139 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1034 emit_sjcc(as, CC_AE, l_next); 1140 emit_sjcc(as, CC_AE, l_next);
1035 /* The type check avoids NaN penalties and complaints from Valgrind. */ 1141 /* The type check avoids NaN penalties and complaints from Valgrind. */
1036#if LJ_64 1142#if LJ_64 && !LJ_GC64
1037 emit_u32(as, LJ_TISNUM); 1143 emit_u32(as, LJ_TISNUM);
1038 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); 1144 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1039#else 1145#else
@@ -1041,10 +1147,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1041 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1147 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1042#endif 1148#endif
1043 } 1149 }
1044#if LJ_64 1150#if LJ_64 && !LJ_GC64
1045 } else if (irt_islightud(kt)) { 1151 } else if (irt_islightud(kt)) {
1046 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); 1152 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1047#endif 1153#elif LJ_GC64
1154 } else if (irt_isaddr(kt)) {
1155 if (isk) {
1156 TValue k;
1157 k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1158 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1159 k.u32.lo);
1160 emit_sjcc(as, CC_NE, l_next);
1161 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1162 k.u32.hi);
1163 } else {
1164 emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
1165 }
1166 } else {
1167 lua_assert(irt_ispri(kt) && !irt_isnil(kt));
1168 emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
1169 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1170#else
1048 } else { 1171 } else {
1049 if (!irt_ispri(kt)) { 1172 if (!irt_ispri(kt)) {
1050 lua_assert(irt_isaddr(kt)); 1173 lua_assert(irt_isaddr(kt));
@@ -1058,16 +1181,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1058 lua_assert(!irt_isnil(kt)); 1181 lua_assert(!irt_isnil(kt));
1059 emit_i8(as, irt_toitype(kt)); 1182 emit_i8(as, irt_toitype(kt));
1060 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1183 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1184#endif
1061 } 1185 }
1062 emit_sfixup(as, l_loop); 1186 emit_sfixup(as, l_loop);
1063 checkmclim(as); 1187 checkmclim(as);
1188#if LJ_GC64
1189 if (!isk && irt_isaddr(kt)) {
1190 emit_rr(as, XO_OR, tmp|REX_64, key);
1191 emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
1192 }
1193#endif
1064 1194
1065 /* Load main position relative to tab->node into dest. */ 1195 /* Load main position relative to tab->node into dest. */
1066 khash = isk ? ir_khash(irkey) : 1; 1196 khash = isk ? ir_khash(irkey) : 1;
1067 if (khash == 0) { 1197 if (khash == 0) {
1068 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); 1198 emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1069 } else { 1199 } else {
1070 emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); 1200 emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1071 if ((as->flags & JIT_F_PREFER_IMUL)) { 1201 if ((as->flags & JIT_F_PREFER_IMUL)) {
1072 emit_i8(as, sizeof(Node)); 1202 emit_i8(as, sizeof(Node));
1073 emit_rr(as, XO_IMULi8, dest, dest); 1203 emit_rr(as, XO_IMULi8, dest, dest);
@@ -1122,11 +1252,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1122 if (ra_hasreg(dest)) { 1252 if (ra_hasreg(dest)) {
1123 if (ofs != 0) { 1253 if (ofs != 0) {
1124 if (dest == node && !(as->flags & JIT_F_LEA_AGU)) 1254 if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1125 emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); 1255 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1126 else 1256 else
1127 emit_rmro(as, XO_LEA, dest, node, ofs); 1257 emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
1128 } else if (dest != node) { 1258 } else if (dest != node) {
1129 emit_rr(as, XO_MOV, dest, node); 1259 emit_rr(as, XO_MOV, dest|REX_GC64, node);
1130 } 1260 }
1131 } 1261 }
1132 asm_guardcc(as, CC_NE); 1262 asm_guardcc(as, CC_NE);
@@ -1138,13 +1268,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1138 lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); 1268 lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
1139 /* Assumes -0.0 is already canonicalized to +0.0. */ 1269 /* Assumes -0.0 is already canonicalized to +0.0. */
1140 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : 1270 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1271#if LJ_GC64
1272 ((uint64_t)irt_toitype(irkey->t) << 47) |
1273 (uint64_t)ir_kgc(irkey));
1274#else
1141 ((uint64_t)irt_toitype(irkey->t) << 32) | 1275 ((uint64_t)irt_toitype(irkey->t) << 32) |
1142 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); 1276 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1277#endif
1143 } else { 1278 } else {
1144 lua_assert(!irt_isnil(irkey->t)); 1279 lua_assert(!irt_isnil(irkey->t));
1280#if LJ_GC64
1281 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
1282 emit_rmro(as, XO_ARITHi, XOg_CMP, node,
1283 ofs + (int32_t)offsetof(Node, key.it));
1284#else
1145 emit_i8(as, irt_toitype(irkey->t)); 1285 emit_i8(as, irt_toitype(irkey->t));
1146 emit_rmro(as, XO_ARITHi8, XOg_CMP, node, 1286 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1147 ofs + (int32_t)offsetof(Node, key.it)); 1287 ofs + (int32_t)offsetof(Node, key.it));
1288#endif
1148 } 1289 }
1149#else 1290#else
1150 l_exit = emit_label(as); 1291 l_exit = emit_label(as);
@@ -1179,20 +1320,21 @@ static void asm_uref(ASMState *as, IRIns *ir)
1179 if (irref_isk(ir->op1)) { 1320 if (irref_isk(ir->op1)) {
1180 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1321 GCfunc *fn = ir_kfunc(IR(ir->op1));
1181 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1322 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1182 emit_rma(as, XO_MOV, dest, v); 1323 emit_rma(as, XO_MOV, dest|REX_GC64, v);
1183 } else { 1324 } else {
1184 Reg uv = ra_scratch(as, RSET_GPR); 1325 Reg uv = ra_scratch(as, RSET_GPR);
1185 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1326 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1186 if (ir->o == IR_UREFC) { 1327 if (ir->o == IR_UREFC) {
1187 emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); 1328 emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
1188 asm_guardcc(as, CC_NE); 1329 asm_guardcc(as, CC_NE);
1189 emit_i8(as, 1); 1330 emit_i8(as, 1);
1190 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); 1331 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1191 } else { 1332 } else {
1192 emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); 1333 emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
1193 } 1334 }
1194 emit_rmro(as, XO_MOV, uv, func, 1335 emit_rmro(as, XO_MOV, uv|REX_GC64, func,
1195 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 1336 (int32_t)offsetof(GCfuncL, uvptr) +
1337 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
1196 } 1338 }
1197} 1339}
1198 1340
@@ -1210,9 +1352,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
1210 if (as->mrm.base == RID_NONE) 1352 if (as->mrm.base == RID_NONE)
1211 emit_loadi(as, dest, as->mrm.ofs); 1353 emit_loadi(as, dest, as->mrm.ofs);
1212 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) 1354 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1213 emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); 1355 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
1214 else 1356 else
1215 emit_mrm(as, XO_LEA, dest, RID_MRM); 1357 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
1216} 1358}
1217 1359
1218/* -- Loads and stores ---------------------------------------------------- */ 1360/* -- Loads and stores ---------------------------------------------------- */
@@ -1281,7 +1423,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1281 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; 1423 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1282 case IRT_NUM: xo = XO_MOVSDto; break; 1424 case IRT_NUM: xo = XO_MOVSDto; break;
1283 case IRT_FLOAT: xo = XO_MOVSSto; break; 1425 case IRT_FLOAT: xo = XO_MOVSSto; break;
1284#if LJ_64 1426#if LJ_64 && !LJ_GC64
1285 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ 1427 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
1286#endif 1428#endif
1287 default: 1429 default:
@@ -1313,7 +1455,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1313#define asm_fstore(as, ir) asm_fxstore(as, ir) 1455#define asm_fstore(as, ir) asm_fxstore(as, ir)
1314#define asm_xstore(as, ir) asm_fxstore(as, ir) 1456#define asm_xstore(as, ir) asm_fxstore(as, ir)
1315 1457
1316#if LJ_64 1458#if LJ_64 && !LJ_GC64
1317static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1459static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1318{ 1460{
1319 if (ra_used(ir) || typecheck) { 1461 if (ra_used(ir) || typecheck) {
@@ -1335,9 +1477,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1335 1477
1336static void asm_ahuvload(ASMState *as, IRIns *ir) 1478static void asm_ahuvload(ASMState *as, IRIns *ir)
1337{ 1479{
1480#if LJ_GC64
1481 Reg tmp = RID_NONE;
1482#endif
1338 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || 1483 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1339 (LJ_DUALNUM && irt_isint(ir->t))); 1484 (LJ_DUALNUM && irt_isint(ir->t)));
1340#if LJ_64 1485#if LJ_64 && !LJ_GC64
1341 if (irt_islightud(ir->t)) { 1486 if (irt_islightud(ir->t)) {
1342 Reg dest = asm_load_lightud64(as, ir, 1); 1487 Reg dest = asm_load_lightud64(as, ir, 1);
1343 if (ra_hasreg(dest)) { 1488 if (ra_hasreg(dest)) {
@@ -1351,20 +1496,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1351 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1496 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1352 Reg dest = ra_dest(as, ir, allow); 1497 Reg dest = ra_dest(as, ir, allow);
1353 asm_fuseahuref(as, ir->op1, RSET_GPR); 1498 asm_fuseahuref(as, ir->op1, RSET_GPR);
1499#if LJ_GC64
1500 if (irt_isaddr(ir->t)) {
1501 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1502 asm_guardcc(as, CC_NE);
1503 emit_i8(as, irt_toitype(ir->t));
1504 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1505 emit_i8(as, XI_O16);
1506 if ((as->flags & JIT_F_BMI2)) {
1507 emit_i8(as, 47);
1508 emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
1509 } else {
1510 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1511 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1512 }
1513 return;
1514 } else
1515#endif
1354 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); 1516 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1355 } else { 1517 } else {
1356 asm_fuseahuref(as, ir->op1, RSET_GPR); 1518 RegSet gpr = RSET_GPR;
1519#if LJ_GC64
1520 if (irt_isaddr(ir->t)) {
1521 tmp = ra_scratch(as, RSET_GPR);
1522 gpr = rset_exclude(gpr, tmp);
1523 }
1524#endif
1525 asm_fuseahuref(as, ir->op1, gpr);
1357 } 1526 }
1358 /* Always do the type check, even if the load result is unused. */ 1527 /* Always do the type check, even if the load result is unused. */
1359 as->mrm.ofs += 4; 1528 as->mrm.ofs += 4;
1360 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); 1529 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1361 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { 1530 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1362 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); 1531 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1532#if LJ_GC64
1533 emit_u32(as, LJ_TISNUM << 15);
1534#else
1363 emit_u32(as, LJ_TISNUM); 1535 emit_u32(as, LJ_TISNUM);
1536#endif
1537 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1538#if LJ_GC64
1539 } else if (irt_isaddr(ir->t)) {
1540 as->mrm.ofs -= 4;
1541 emit_i8(as, irt_toitype(ir->t));
1542 emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
1543 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1544 emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
1545 } else if (irt_isnil(ir->t)) {
1546 as->mrm.ofs -= 4;
1547 emit_i8(as, -1);
1548 emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
1549 } else {
1550 emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
1364 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); 1551 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1552#else
1365 } else { 1553 } else {
1366 emit_i8(as, irt_toitype(ir->t)); 1554 emit_i8(as, irt_toitype(ir->t));
1367 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); 1555 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1556#endif
1368 } 1557 }
1369} 1558}
1370 1559
@@ -1376,12 +1565,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1376 Reg src = ra_alloc1(as, ir->op2, RSET_FPR); 1565 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1377 asm_fuseahuref(as, ir->op1, RSET_GPR); 1566 asm_fuseahuref(as, ir->op1, RSET_GPR);
1378 emit_mrm(as, XO_MOVSDto, src, RID_MRM); 1567 emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1379#if LJ_64 1568#if LJ_64 && !LJ_GC64
1380 } else if (irt_islightud(ir->t)) { 1569 } else if (irt_islightud(ir->t)) {
1381 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 1570 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1382 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); 1571 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1383 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); 1572 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1384#endif 1573#endif
1574#if LJ_GC64
1575 } else if (irref_isk(ir->op2)) {
1576 TValue k;
1577 lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
1578 asm_fuseahuref(as, ir->op1, RSET_GPR);
1579 if (tvisnil(&k)) {
1580 emit_i32(as, -1);
1581 emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
1582 } else {
1583 emit_u32(as, k.u32.lo);
1584 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1585 as->mrm.ofs += 4;
1586 emit_u32(as, k.u32.hi);
1587 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1588 }
1589#endif
1385 } else { 1590 } else {
1386 IRIns *irr = IR(ir->op2); 1591 IRIns *irr = IR(ir->op2);
1387 RegSet allow = RSET_GPR; 1592 RegSet allow = RSET_GPR;
@@ -1392,6 +1597,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1392 } 1597 }
1393 asm_fuseahuref(as, ir->op1, allow); 1598 asm_fuseahuref(as, ir->op1, allow);
1394 if (ra_hasreg(src)) { 1599 if (ra_hasreg(src)) {
1600#if LJ_GC64
1601 if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
1602 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1603 as->mrm.ofs += 4;
1604 emit_u32(as, irt_toitype(ir->t) << 15);
1605 emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
1606 as->mrm.ofs -= 4;
1607 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1608 return;
1609 }
1610#endif
1395 emit_mrm(as, XO_MOVto, src, RID_MRM); 1611 emit_mrm(as, XO_MOVto, src, RID_MRM);
1396 } else if (!irt_ispri(irr->t)) { 1612 } else if (!irt_ispri(irr->t)) {
1397 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); 1613 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
@@ -1399,7 +1615,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1399 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1615 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1400 } 1616 }
1401 as->mrm.ofs += 4; 1617 as->mrm.ofs += 4;
1618#if LJ_GC64
1619 lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
1620 emit_i32(as, LJ_TNUMX << 15);
1621#else
1402 emit_i32(as, (int32_t)irt_toitype(ir->t)); 1622 emit_i32(as, (int32_t)irt_toitype(ir->t));
1623#endif
1403 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1624 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1404 } 1625 }
1405} 1626}
@@ -1420,7 +1641,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1420 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1641 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1421 emit_rmro(as, XO_MOVSD, left, base, ofs); 1642 emit_rmro(as, XO_MOVSD, left, base, ofs);
1422 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1643 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1423#if LJ_64 1644#if LJ_64 && !LJ_GC64
1424 } else if (irt_islightud(t)) { 1645 } else if (irt_islightud(t)) {
1425 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); 1646 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1426 if (ra_hasreg(dest)) { 1647 if (ra_hasreg(dest)) {
@@ -1438,6 +1659,36 @@ static void asm_sload(ASMState *as, IRIns *ir)
1438 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1659 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1439 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); 1660 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1440 } else { 1661 } else {
1662#if LJ_GC64
1663 if (irt_isaddr(t)) {
1664 /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
1665 **
1666 ** mov r64, [addr] rorx r64, [addr], 47
1667 ** ror r64, 47
1668 ** cmp r16, itype cmp r16, itype
1669 ** jne ->exit jne ->exit
1670 ** shr r64, 16 shr r64, 16
1671 */
1672 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1673 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1674 asm_guardcc(as, CC_NE);
1675 emit_i8(as, irt_toitype(t));
1676 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1677 emit_i8(as, XI_O16);
1678 }
1679 if ((as->flags & JIT_F_BMI2)) {
1680 emit_i8(as, 47);
1681 emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
1682 } else {
1683 if ((ir->op2 & IRSLOAD_TYPECHECK))
1684 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1685 else
1686 emit_shifti(as, XOg_SHL|REX_64, dest, 17);
1687 emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1688 }
1689 return;
1690 } else
1691#endif
1441 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); 1692 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1442 } 1693 }
1443 } else { 1694 } else {
@@ -1450,11 +1701,42 @@ static void asm_sload(ASMState *as, IRIns *ir)
1450 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); 1701 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1451 if (LJ_64 && irt_type(t) >= IRT_NUM) { 1702 if (LJ_64 && irt_type(t) >= IRT_NUM) {
1452 lua_assert(irt_isinteger(t) || irt_isnum(t)); 1703 lua_assert(irt_isinteger(t) || irt_isnum(t));
1704#if LJ_GC64
1705 emit_u32(as, LJ_TISNUM << 15);
1706#else
1453 emit_u32(as, LJ_TISNUM); 1707 emit_u32(as, LJ_TISNUM);
1708#endif
1454 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); 1709 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1710#if LJ_GC64
1711 } else if (irt_isnil(t)) {
1712 /* LJ_GC64 type check for nil:
1713 **
1714 ** cmp qword [addr], -1
1715 ** jne ->exit
1716 */
1717 emit_i8(as, -1);
1718 emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
1719 } else if (irt_ispri(t)) {
1720 emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
1721 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1722 } else {
1723 /* LJ_GC64 type check only:
1724 **
1725 ** mov r64, [addr]
1726 ** sar r64, 47
1727 ** cmp r32, itype
1728 ** jne ->exit
1729 */
1730 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
1731 emit_i8(as, irt_toitype(t));
1732 emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1733 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1734 emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4);
1735#else
1455 } else { 1736 } else {
1456 emit_i8(as, irt_toitype(t)); 1737 emit_i8(as, irt_toitype(t));
1457 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); 1738 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1739#endif
1458 } 1740 }
1459 } 1741 }
1460} 1742}
@@ -1548,7 +1830,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1548 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1830 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1549 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1831 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1550 MCLabel l_end = emit_label(as); 1832 MCLabel l_end = emit_label(as);
1551 emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); 1833 emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
1552 emit_setgl(as, tab, gc.grayagain); 1834 emit_setgl(as, tab, gc.grayagain);
1553 emit_getgl(as, tmp, gc.grayagain); 1835 emit_getgl(as, tmp, gc.grayagain);
1554 emit_i8(as, ~LJ_GC_BLACK); 1836 emit_i8(as, ~LJ_GC_BLACK);
@@ -2084,7 +2366,6 @@ static void asm_comp(ASMState *as, IRIns *ir)
2084 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ 2366 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2085 } 2367 }
2086 left = ra_alloc1(as, lref, RSET_FPR); 2368 left = ra_alloc1(as, lref, RSET_FPR);
2087 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2088 l_around = emit_label(as); 2369 l_around = emit_label(as);
2089 asm_guardcc(as, cc >> 4); 2370 asm_guardcc(as, cc >> 4);
2090 if (cc & VCC_P) { /* Extra CC_P branch required? */ 2371 if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2101,6 +2382,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
2101 emit_jcc(as, CC_P, as->mcp); 2382 emit_jcc(as, CC_P, as->mcp);
2102 } 2383 }
2103 } 2384 }
2385 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2104 emit_mrm(as, XO_UCOMISD, left, right); 2386 emit_mrm(as, XO_UCOMISD, left, right);
2105 } else { 2387 } else {
2106 IRRef lref = ir->op1, rref = ir->op2; 2388 IRRef lref = ir->op1, rref = ir->op2;
@@ -2377,13 +2659,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2377 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); 2659 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2378 else 2660 else
2379 ra_modified(as, r); 2661 ra_modified(as, r);
2380 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); 2662 emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
2381 if (ra_hasreg(pbase) && pbase != r) 2663 if (ra_hasreg(pbase) && pbase != r)
2382 emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); 2664 emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
2383 else 2665 else
2666#if LJ_GC64
2667 emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
2668 (int32_t)dispofs(as, &J2G(as->J)->jit_base));
2669#else
2384 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2670 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2385 ptr2addr(&J2G(as->J)->jit_base)); 2671 ptr2addr(&J2G(as->J)->jit_base));
2386 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2672#endif
2673 emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
2387 emit_getgl(as, r, cur_L); 2674 emit_getgl(as, r, cur_L);
2388 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2675 if (allow == RSET_EMPTY) /* Spill temp. register. */
2389 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2676 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
@@ -2414,18 +2701,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2414 (LJ_DUALNUM && irt_isinteger(ir->t))); 2701 (LJ_DUALNUM && irt_isinteger(ir->t)));
2415 if (!irref_isk(ref)) { 2702 if (!irref_isk(ref)) {
2416 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); 2703 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2704#if LJ_GC64
2705 if (irt_is64(ir->t)) {
2706 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
2707 emit_u32(as, irt_toitype(ir->t) << 15);
2708 emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
2709 } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
2710 emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
2711 } else {
2712 emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
2713 }
2714#endif
2417 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); 2715 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2716#if LJ_GC64
2717 } else {
2718 TValue k;
2719 lj_ir_kvalue(as->J->L, &k, ir);
2720 if (tvisnil(&k)) {
2721 emit_i32(as, -1);
2722 emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
2723 } else {
2724 emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
2725 emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
2726 }
2727#else
2418 } else if (!irt_ispri(ir->t)) { 2728 } else if (!irt_ispri(ir->t)) {
2419 emit_movmroi(as, RID_BASE, ofs, ir->i); 2729 emit_movmroi(as, RID_BASE, ofs, ir->i);
2730#endif
2420 } 2731 }
2421 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2732 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2422#if !LJ_FR2 2733#if !LJ_FR2
2423 if (s != 0) /* Do not overwrite link to previous frame. */ 2734 if (s != 0) /* Do not overwrite link to previous frame. */
2424 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); 2735 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2425#endif 2736#endif
2737#if !LJ_GC64
2426 } else { 2738 } else {
2427 if (!(LJ_64 && irt_islightud(ir->t))) 2739 if (!(LJ_64 && irt_islightud(ir->t)))
2428 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); 2740 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2741#endif
2429 } 2742 }
2430 } 2743 }
2431 checkmclim(as); 2744 checkmclim(as);
@@ -2451,11 +2764,15 @@ static void asm_gc_check(ASMState *as)
2451 args[1] = ASMREF_TMP2; /* MSize steps */ 2764 args[1] = ASMREF_TMP2; /* MSize steps */
2452 asm_gencall(as, ci, args); 2765 asm_gencall(as, ci, args);
2453 tmp = ra_releasetmp(as, ASMREF_TMP1); 2766 tmp = ra_releasetmp(as, ASMREF_TMP1);
2767#if LJ_GC64
2768 emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
2769#else
2454 emit_loada(as, tmp, J2G(as->J)); 2770 emit_loada(as, tmp, J2G(as->J));
2771#endif
2455 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); 2772 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2456 /* Jump around GC step if GC total < GC threshold. */ 2773 /* Jump around GC step if GC total < GC threshold. */
2457 emit_sjcc(as, CC_B, l_end); 2774 emit_sjcc(as, CC_B, l_end);
2458 emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); 2775 emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
2459 emit_getgl(as, tmp, gc.total); 2776 emit_getgl(as, tmp, gc.total);
2460 as->gcsteps = 0; 2777 as->gcsteps = 0;
2461 checkmclim(as); 2778 checkmclim(as);
@@ -2520,7 +2837,7 @@ static void asm_head_root_base(ASMState *as)
2520 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 2837 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2521 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 2838 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2522 if (r != RID_BASE) 2839 if (r != RID_BASE)
2523 emit_rr(as, XO_MOV, r, RID_BASE); 2840 emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
2524 } 2841 }
2525} 2842}
2526 2843
@@ -2536,8 +2853,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2536 if (irp->r == r) { 2853 if (irp->r == r) {
2537 rset_clear(allow, r); /* Mark same BASE register as coalesced. */ 2854 rset_clear(allow, r); /* Mark same BASE register as coalesced. */
2538 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { 2855 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2856 /* Move from coalesced parent reg. */
2539 rset_clear(allow, irp->r); 2857 rset_clear(allow, irp->r);
2540 emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ 2858 emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
2541 } else { 2859 } else {
2542 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ 2860 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
2543 } 2861 }
@@ -2750,13 +3068,19 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2750 MSize len = T->szmcode; 3068 MSize len = T->szmcode;
2751 MCode *px = exitstub_addr(J, exitno) - 6; 3069 MCode *px = exitstub_addr(J, exitno) - 6;
2752 MCode *pe = p+len-6; 3070 MCode *pe = p+len-6;
2753 uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); 3071#if LJ_GC64
3072 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
3073#else
3074 uint32_t statei = u32ptr(&J2G(J)->vmstate);
3075#endif
2754 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) 3076 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2755 *(int32_t *)(p+len-4) = jmprel(p+len, target); 3077 *(int32_t *)(p+len-4) = jmprel(p+len, target);
2756 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ 3078 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2757 for (; p < pe; p += asm_x86_inslen(p)) 3079 for (; p < pe; p += asm_x86_inslen(p)) {
2758 if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) 3080 intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
3081 if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
2759 break; 3082 break;
3083 }
2760 lua_assert(p < pe); 3084 lua_assert(p < pe);
2761 for (; p < pe; p += asm_x86_inslen(p)) 3085 for (; p < pe; p += asm_x86_inslen(p))
2762 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) 3086 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 3d6f13f4..f0bca938 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -20,6 +20,11 @@
20#define REX_64 0 20#define REX_64 0
21#define VEX_64 0 21#define VEX_64 0
22#endif 22#endif
23#if LJ_GC64
24#define REX_GC64 REX_64
25#else
26#define REX_GC64 0
27#endif
23 28
24#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 29#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
25#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) 30#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
@@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p)
94#define ptr2addr(p) (i32ptr((p))) 99#define ptr2addr(p) (i32ptr((p)))
95#endif 100#endif
96 101
97/* op r, [addr] */
98static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
99{
100 MCode *p = as->mcp;
101 *(int32_t *)(p-4) = ptr2addr(addr);
102#if LJ_64
103 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
104 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
105#else
106 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
107#endif
108}
109
110/* op r, [base+ofs] */ 102/* op r, [base+ofs] */
111static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) 103static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
112{ 104{
113 MCode *p = as->mcp; 105 MCode *p = as->mcp;
114 x86Mode mode; 106 x86Mode mode;
115 if (ra_hasreg(rb)) { 107 if (ra_hasreg(rb)) {
116 if (ofs == 0 && (rb&7) != RID_EBP) { 108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
117 mode = XM_OFS0; 113 mode = XM_OFS0;
118 } else if (checki8(ofs)) { 114 } else if (checki8(ofs)) {
119 *--p = (MCode)ofs; 115 *--p = (MCode)ofs;
@@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
211 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
212 rb = RID_ESP; 208 rb = RID_ESP;
213#endif 209#endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lua_assert(as->mrm.idx == RID_NONE);
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
214 } else { 215 } else {
215 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { 216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
216 mode = XM_OFS0; 217 mode = XM_OFS0;
@@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
264/* Get/set global_State fields. */ 265/* Get/set global_State fields. */
265#define emit_opgl(as, xo, r, field) \ 266#define emit_opgl(as, xo, r, field) \
266 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) 267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
267#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) 268#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
268#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) 269#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
269 270
270#define emit_setvmstate(as, i) \ 271#define emit_setvmstate(as, i) \
271 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) 272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
288 } 289 }
289} 290}
290 291
292#if LJ_GC64
293#define dispofs(as, k) \
294 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
295#define mcpofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
297#define mctopofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
299/* mov r, addr */
300#define emit_loada(as, r, addr) \
301 emit_loadu64(as, (r), (uintptr_t)(addr))
302#else
291/* mov r, addr */ 303/* mov r, addr */
292#define emit_loada(as, r, addr) \ 304#define emit_loada(as, r, addr) \
293 emit_loadi(as, (r), ptr2addr((addr))) 305 emit_loadi(as, (r), ptr2addr((addr)))
306#endif
294 307
295#if LJ_64 308#if LJ_64
296/* mov r, imm64 or shorter 32 bit extended load. */ 309/* mov r, imm64 or shorter 32 bit extended load. */
@@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
302 MCode *p = as->mcp; 315 MCode *p = as->mcp;
303 *(int32_t *)(p-4) = (int32_t)u64; 316 *(int32_t *)(p-4) = (int32_t)u64;
304 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); 317 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
318#if LJ_GC64
319 } else if (checki32(dispofs(as, u64))) {
320 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
321 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
322 /* Since as->realign assumes the code size doesn't change, check
323 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
324 */
325 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
326#endif
305 } else { /* Full-size 64 bit load. */ 327 } else { /* Full-size 64 bit load. */
306 MCode *p = as->mcp; 328 MCode *p = as->mcp;
307 *(uint64_t *)(p-8) = u64; 329 *(uint64_t *)(p-8) = u64;
@@ -313,22 +335,69 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
313} 335}
314#endif 336#endif
315 337
338/* op r, [addr] */
339static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
340{
341#if LJ_GC64
342 if (checki32(dispofs(as, addr))) {
343 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
344 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
345 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
346 } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
347 emit_rmro(as, xo, rr, rr, 0);
348 emit_loadu64(as, rr, (uintptr_t)addr);
349 } else
350#endif
351 {
352 MCode *p = as->mcp;
353 *(int32_t *)(p-4) = ptr2addr(addr);
354#if LJ_64
355 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
356 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
357#else
358 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
359#endif
360 }
361}
362
316/* Load 64 bit IR constant into register. */ 363/* Load 64 bit IR constant into register. */
317static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) 364static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
318{ 365{
366 Reg r64;
367 x86Op xo;
319 const uint64_t *k = &ir_k64(ir)->u64; 368 const uint64_t *k = &ir_k64(ir)->u64;
320 if (rset_test(RSET_FPR, r)) { 369 if (rset_test(RSET_FPR, r)) {
321 if (*k == 0) { 370 r64 = r;
322 emit_rr(as, XO_XORPS, r, r); 371 xo = XO_MOVSD;
323 } else {
324 emit_rma(as, XO_MOVSD, r, k);
325 }
326 } else { 372 } else {
327 if (*k == 0) { 373 r64 = r | REX_64;
328 emit_rr(as, XO_ARITH(XOg_XOR), r, r); 374 xo = XO_MOV;
375 }
376 if (*k == 0) {
377 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
378#if LJ_GC64
379 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
380 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
381 emit_rma(as, xo, r64, k);
382 } else {
383 if (ir->i) {
384 lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
385 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
386 emit_loadu64(as, r, *k);
387 return;
329 } else { 388 } else {
330 emit_rma(as, XO_MOV, r | REX_64, k); 389 /* If all else fails, add the FP constant at the MCode area bottom. */
390 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
391 *(uint64_t *)as->mcbot = *k;
392 ir->i = (int32_t)(as->mctop - as->mcbot);
393 as->mcbot += 8;
394 as->mclim = as->mcbot + MCLIM_REDZONE;
331 } 395 }
396 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
397#else
398 } else {
399 emit_rma(as, xo, r64, k);
400#endif
332 } 401 }
333} 402}
334 403
@@ -470,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
470{ 539{
471 if (ofs) { 540 if (ofs) {
472 if ((as->flags & JIT_F_LEA_AGU)) 541 if ((as->flags & JIT_F_LEA_AGU))
473 emit_rmro(as, XO_LEA, r, r, ofs); 542 emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
474 else 543 else
475 emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); 544 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
476 } 545 }
477} 546}
478 547
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 64a9a65d..6d141a20 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -1114,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
1114{ 1114{
1115 TRef tr, ud, fp; 1115 TRef tr, ud, fp;
1116 if (id) { /* io.func() */ 1116 if (id) { /* io.func() */
1117#if LJ_GC64
1118 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
1119 ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
1120#else
1117 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); 1121 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
1118 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); 1122 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
1123#endif
1119 } else { /* fp:method() */ 1124 } else { /* fp:method() */
1120 ud = J->base[0]; 1125 ud = J->base[0];
1121 if (!tref_isudata(ud)) 1126 if (!tref_isudata(ud))
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 4e9c85c7..e77f7b99 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -412,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
412 412
413static LJ_AINLINE uint32_t irt_toitype_(IRType t) 413static LJ_AINLINE uint32_t irt_toitype_(IRType t)
414{ 414{
415 lua_assert(!LJ_64 || t != IRT_LIGHTUD); 415 lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
416 if (LJ_DUALNUM && t > IRT_NUM) { 416 if (LJ_DUALNUM && t > IRT_NUM) {
417 return LJ_TISNUM; 417 return LJ_TISNUM;
418 } else { 418 } else {
@@ -568,7 +568,11 @@ typedef union IRIns {
568#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) 568#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
569#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) 569#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
570#define ir_k64(ir) \ 570#define ir_k64(ir) \
571 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, &(ir)[1].tv) 571 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
572 (LJ_GC64 && \
573 ((ir)->o == IR_KGC || \
574 (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
575 &(ir)[1].tv)
572#define ir_kptr(ir) \ 576#define ir_kptr(ir) \
573 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ 577 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
574 mref((ir)[LJ_GC64].ptr, void)) 578 mref((ir)[LJ_GC64].ptr, void))
diff --git a/src/lj_record.c b/src/lj_record.c
index f0481050..3c67e1a0 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -976,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
976 } 976 }
977 /* The cdata metatable is treated as immutable. */ 977 /* The cdata metatable is treated as immutable. */
978 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; 978 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
979#if LJ_GC64
980 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
981 ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
982 GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
983#else
979 ix->mt = mix.tab = lj_ir_ktab(J, mt); 984 ix->mt = mix.tab = lj_ir_ktab(J, mt);
985#endif
980 goto nocheck; 986 goto nocheck;
981 } 987 }
982 ix->mt = mt ? mix.tab : TREF_NIL; 988 ix->mt = mt ? mix.tab : TREF_NIL;
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 33c058be..0a08d4d4 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -623,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
623 } 623 }
624 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 624 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
625 rs = snap_renameref(T, snapno, ref, rs); 625 rs = snap_renameref(T, snapno, ref, rs);
626 lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */
627 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ 626 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
628 int32_t *sps = &ex->spill[regsp_spill(rs)]; 627 int32_t *sps = &ex->spill[regsp_spill(rs)];
629 if (irt_isinteger(t)) { 628 if (irt_isinteger(t)) {
@@ -632,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
632 } else if (irt_isnum(t)) { 631 } else if (irt_isnum(t)) {
633 o->u64 = *(uint64_t *)sps; 632 o->u64 = *(uint64_t *)sps;
634#endif 633#endif
635 } else if (LJ_64 && irt_islightud(t)) { 634#if LJ_64 && !LJ_GC64
635 } else if (irt_islightud(t)) {
636 /* 64 bit lightuserdata which may escape already has the tag bits. */ 636 /* 64 bit lightuserdata which may escape already has the tag bits. */
637 o->u64 = *(uint64_t *)sps; 637 o->u64 = *(uint64_t *)sps;
638#endif
638 } else { 639 } else {
639 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 640 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
640 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); 641 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
@@ -652,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
652 } else if (irt_isnum(t)) { 653 } else if (irt_isnum(t)) {
653 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 654 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
654#endif 655#endif
655 } else if (LJ_64 && irt_is64(t)) { 656#if LJ_64 && !LJ_GC64
657 } else if (irt_is64(t)) {
656 /* 64 bit values that already have the tag bits. */ 658 /* 64 bit values that already have the tag bits. */
657 o->u64 = ex->gpr[r-RID_MIN_GPR]; 659 o->u64 = ex->gpr[r-RID_MIN_GPR];
660#endif
658 } else if (irt_ispri(t)) { 661 } else if (irt_ispri(t)) {
659 setpriV(o, irt_toitype(t)); 662 setpriV(o, irt_toitype(t));
660 } else { 663 } else {
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index e29f4748..d5429597 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -22,7 +22,7 @@
22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) 22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
23#endif 23#endif
24#define VRIDDEF(_) \ 24#define VRIDDEF(_) \
25 _(MRM) 25 _(MRM) _(RIP)
26 26
27#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
28 28
@@ -31,6 +31,7 @@ enum {
31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ 31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
32 RID_MAX, 32 RID_MAX,
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */
34 35
35 /* Calling conventions. */ 36 /* Calling conventions. */
36 RID_SP = RID_ESP, 37 RID_SP = RID_ESP,
@@ -63,8 +64,10 @@ enum {
63 64
64/* -- Register sets ------------------------------------------------------- */ 65/* -- Register sets ------------------------------------------------------- */
65 66
66/* Make use of all registers, except the stack pointer. */ 67/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
67#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) 68#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
69 - RID2RSET(RID_ESP) \
70 - LJ_GC64*RID2RSET(RID_DISPATCH))
68#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) 71#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
69#define RSET_ALL (RSET_GPR|RSET_FPR) 72#define RSET_ALL (RSET_GPR|RSET_FPR)
70#define RSET_INIT RSET_ALL 73#define RSET_INIT RSET_ALL
@@ -200,6 +203,7 @@ typedef struct {
200*/ 203*/
201typedef enum { 204typedef enum {
202 /* Fixed length opcodes. XI_* prefix. */ 205 /* Fixed length opcodes. XI_* prefix. */
206 XI_O16 = 0x66,
203 XI_NOP = 0x90, 207 XI_NOP = 0x90,
204 XI_XCHGa = 0x90, 208 XI_XCHGa = 0x90,
205 XI_CALL = 0xe8, 209 XI_CALL = 0xe8,
@@ -217,6 +221,7 @@ typedef enum {
217 XI_PUSHi8 = 0x6a, 221 XI_PUSHi8 = 0x6a,
218 XI_TESTb = 0x84, 222 XI_TESTb = 0x84,
219 XI_TEST = 0x85, 223 XI_TEST = 0x85,
224 XI_INT3 = 0xcc,
220 XI_MOVmi = 0xc7, 225 XI_MOVmi = 0xc7,
221 XI_GROUP5 = 0xff, 226 XI_GROUP5 = 0xff,
222 227
@@ -243,6 +248,7 @@ typedef enum {
243 XV_SHRX = XV_f20f38(f7), 248 XV_SHRX = XV_f20f38(f7),
244 249
245 /* Variable-length opcodes. XO_* prefix. */ 250 /* Variable-length opcodes. XO_* prefix. */
251 XO_OR = XO_(0b),
246 XO_MOV = XO_(8b), 252 XO_MOV = XO_(8b),
247 XO_MOVto = XO_(89), 253 XO_MOVto = XO_(89),
248 XO_MOVtow = XO_66(89), 254 XO_MOVtow = XO_66(89),
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 759e30ec..d38ac907 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx)
2401 | movzx RCd, byte [rbp-8] // Reconstruct exit number. 2401 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2402 | mov RCH, byte [rbp-16] 2402 | mov RCH, byte [rbp-16]
2403 | mov [rbp-8], r15; mov [rbp-16], r14 2403 | mov [rbp-8], r15; mov [rbp-16], r14
2404 | // Caveat: DISPATCH is rbx. 2404 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2405 | mov DISPATCH, [ebp]
2406 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. 2405 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2407 | set_vmstate EXIT 2406 | set_vmstate EXIT
2408 | mov [DISPATCH+DISPATCH_J(exitno)], RCd 2407 | mov [DISPATCH+DISPATCH_J(exitno)], RCd