diff options
| author | Mike Pall <mike> | 2016-05-23 06:01:54 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2016-05-23 06:01:54 +0200 |
| commit | 2868715d80b6ac497a7f08393ec325b60d71df8d (patch) | |
| tree | 2064588fe32607f19f56ed0d23d4fb225b82e068 | |
| parent | 6c8258d74b7d4ae7f288897518f23c809b9395f2 (diff) | |
| download | luajit-2868715d80b6ac497a7f08393ec325b60d71df8d.tar.gz luajit-2868715d80b6ac497a7f08393ec325b60d71df8d.tar.bz2 luajit-2868715d80b6ac497a7f08393ec325b60d71df8d.zip | |
x64/LJ_GC64: Add missing backend support and enable JIT compilation.
Contributed by Peter Cawley.
| -rw-r--r-- | src/lj_arch.h | 2 | ||||
| -rw-r--r-- | src/lj_asm.c | 11 | ||||
| -rw-r--r-- | src/lj_asm_x86.h | 430 | ||||
| -rw-r--r-- | src/lj_emit_x86.h | 121 | ||||
| -rw-r--r-- | src/lj_ffrecord.c | 5 | ||||
| -rw-r--r-- | src/lj_ir.h | 8 | ||||
| -rw-r--r-- | src/lj_record.c | 6 | ||||
| -rw-r--r-- | src/lj_snap.c | 9 | ||||
| -rw-r--r-- | src/lj_target_x86.h | 12 | ||||
| -rw-r--r-- | src/vm_x64.dasc | 3 |
10 files changed, 517 insertions, 90 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h index 72622a21..3c3c98b1 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
| @@ -453,7 +453,7 @@ | |||
| 453 | #endif | 453 | #endif |
| 454 | 454 | ||
| 455 | /* Disable or enable the JIT compiler. */ | 455 | /* Disable or enable the JIT compiler. */ |
| 456 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) || LJ_GC64 | 456 | #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) |
| 457 | #define LJ_HASJIT 0 | 457 | #define LJ_HASJIT 0 |
| 458 | #else | 458 | #else |
| 459 | #define LJ_HASJIT 1 | 459 | #define LJ_HASJIT 1 |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 5dd7ca3a..dba5c178 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -346,6 +346,12 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
| 346 | #if LJ_64 | 346 | #if LJ_64 |
| 347 | } else if (ir->o == IR_KINT64) { | 347 | } else if (ir->o == IR_KINT64) { |
| 348 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 348 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
| 349 | #if LJ_GC64 | ||
| 350 | } else if (ir->o == IR_KGC) { | ||
| 351 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
| 352 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
| 353 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
| 354 | #endif | ||
| 349 | #endif | 355 | #endif |
| 350 | } else { | 356 | } else { |
| 351 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 357 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
| @@ -1920,8 +1926,12 @@ static void asm_tail_link(ASMState *as) | |||
| 1920 | if (bc_isret(bc_op(*retpc))) | 1926 | if (bc_isret(bc_op(*retpc))) |
| 1921 | pc = retpc; | 1927 | pc = retpc; |
| 1922 | } | 1928 | } |
| 1929 | #if LJ_GC64 | ||
| 1930 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
| 1931 | #else | ||
| 1923 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 1932 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
| 1924 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 1933 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
| 1934 | #endif | ||
| 1925 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | 1935 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); |
| 1926 | switch (bc_op(*pc)) { | 1936 | switch (bc_op(*pc)) { |
| 1927 | case BC_CALLM: case BC_CALLMT: | 1937 | case BC_CALLM: case BC_CALLMT: |
| @@ -2314,6 +2324,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
| 2314 | as->curins = as->T->snap[0].ref; | 2324 | as->curins = as->T->snap[0].ref; |
| 2315 | asm_snap_prep(as); /* The GC check is a guard. */ | 2325 | asm_snap_prep(as); /* The GC check is a guard. */ |
| 2316 | asm_gc_check(as); | 2326 | asm_gc_check(as); |
| 2327 | as->curins = as->stopins; | ||
| 2317 | } | 2328 | } |
| 2318 | ra_evictk(as); | 2329 | ra_evictk(as); |
| 2319 | if (as->parent) | 2330 | if (as->parent) |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 83fe22b2..7d07336a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | |||
| 21 | } | 21 | } |
| 22 | /* Push the high byte of the exitno for each exit stub group. */ | 22 | /* Push the high byte of the exitno for each exit stub group. */ |
| 23 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); | 23 | *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); |
| 24 | #if !LJ_GC64 | ||
| 24 | /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ | 25 | /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ |
| 25 | *mxp++ = XI_MOVmi; | 26 | *mxp++ = XI_MOVmi; |
| 26 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); | 27 | *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); |
| 27 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); | 28 | *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); |
| 28 | *mxp++ = 2*sizeof(void *); | 29 | *mxp++ = 2*sizeof(void *); |
| 29 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; | 30 | *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; |
| 31 | #endif | ||
| 30 | /* Jump to exit handler which fills in the ExitState. */ | 32 | /* Jump to exit handler which fills in the ExitState. */ |
| 31 | *mxp++ = XI_JMP; mxp += 4; | 33 | *mxp++ = XI_JMP; mxp += 4; |
| 32 | *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); | 34 | *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); |
| @@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc) | |||
| 62 | target = p; | 64 | target = p; |
| 63 | cc ^= 1; | 65 | cc ^= 1; |
| 64 | if (as->realign) { | 66 | if (as->realign) { |
| 67 | if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) | ||
| 68 | as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ | ||
| 65 | emit_sjcc(as, cc, target); | 69 | emit_sjcc(as, cc, target); |
| 66 | return; | 70 | return; |
| 67 | } | 71 | } |
| 68 | } | 72 | } |
| 73 | if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) | ||
| 74 | as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ | ||
| 69 | emit_jcc(as, cc, target); | 75 | emit_jcc(as, cc, target); |
| 70 | } | 76 | } |
| 71 | 77 | ||
| @@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
| 79 | { | 85 | { |
| 80 | if (irref_isk(ref)) { | 86 | if (irref_isk(ref)) { |
| 81 | IRIns *ir = IR(ref); | 87 | IRIns *ir = IR(ref); |
| 88 | #if LJ_GC64 | ||
| 89 | if (ir->o == IR_KNULL || !irt_is64(ir->t)) { | ||
| 90 | *k = ir->i; | ||
| 91 | return 1; | ||
| 92 | } else if (checki32((int64_t)ir_k64(ir)->u64)) { | ||
| 93 | *k = (int32_t)ir_k64(ir)->u64; | ||
| 94 | return 1; | ||
| 95 | } | ||
| 96 | #else | ||
| 82 | if (ir->o != IR_KINT64) { | 97 | if (ir->o != IR_KINT64) { |
| 83 | *k = ir->i; | 98 | *k = ir->i; |
| 84 | return 1; | 99 | return 1; |
| @@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
| 86 | *k = (int32_t)ir_kint64(ir)->u64; | 101 | *k = (int32_t)ir_kint64(ir)->u64; |
| 87 | return 1; | 102 | return 1; |
| 88 | } | 103 | } |
| 104 | #endif | ||
| 89 | } | 105 | } |
| 90 | return 0; | 106 | return 0; |
| 91 | } | 107 | } |
| @@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) | |||
| 185 | if (irref_isk(ir->op1)) { | 201 | if (irref_isk(ir->op1)) { |
| 186 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 202 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
| 187 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; | 203 | GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; |
| 204 | #if LJ_GC64 | ||
| 205 | int64_t ofs = dispofs(as, &uv->tv); | ||
| 206 | if (checki32(ofs) && checki32(ofs+4)) { | ||
| 207 | as->mrm.ofs = (int32_t)ofs; | ||
| 208 | as->mrm.base = RID_DISPATCH; | ||
| 209 | as->mrm.idx = RID_NONE; | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | #else | ||
| 188 | as->mrm.ofs = ptr2addr(&uv->tv); | 213 | as->mrm.ofs = ptr2addr(&uv->tv); |
| 189 | as->mrm.base = as->mrm.idx = RID_NONE; | 214 | as->mrm.base = as->mrm.idx = RID_NONE; |
| 190 | return; | 215 | return; |
| 216 | #endif | ||
| 191 | } | 217 | } |
| 192 | break; | 218 | break; |
| 193 | default: | 219 | default: |
| @@ -207,17 +233,38 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) | |||
| 207 | lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); | 233 | lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); |
| 208 | as->mrm.idx = RID_NONE; | 234 | as->mrm.idx = RID_NONE; |
| 209 | if (ir->op1 == REF_NIL) { | 235 | if (ir->op1 == REF_NIL) { |
| 236 | #if LJ_GC64 | ||
| 237 | as->mrm.ofs = (int32_t)ir->op2 - GG_OFS(dispatch); | ||
| 238 | as->mrm.base = RID_DISPATCH; | ||
| 239 | #else | ||
| 210 | as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J)); | 240 | as->mrm.ofs = (int32_t)ir->op2 + ptr2addr(J2GG(as->J)); |
| 211 | as->mrm.base = RID_NONE; | 241 | as->mrm.base = RID_NONE; |
| 242 | #endif | ||
| 212 | return; | 243 | return; |
| 213 | } | 244 | } |
| 214 | as->mrm.ofs = field_ofs[ir->op2]; | 245 | as->mrm.ofs = field_ofs[ir->op2]; |
| 215 | if (irref_isk(ir->op1)) { | 246 | if (irref_isk(ir->op1)) { |
| 216 | as->mrm.ofs += IR(ir->op1)->i; | 247 | IRIns *op1 = IR(ir->op1); |
| 248 | #if LJ_GC64 | ||
| 249 | if (ir->op1 == REF_NIL) { | ||
| 250 | as->mrm.ofs -= GG_OFS(dispatch); | ||
| 251 | as->mrm.base = RID_DISPATCH; | ||
| 252 | return; | ||
| 253 | } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { | ||
| 254 | intptr_t ofs = dispofs(as, ir_kptr(op1)); | ||
| 255 | if (checki32(as->mrm.ofs + ofs)) { | ||
| 256 | as->mrm.ofs += (int32_t)ofs; | ||
| 257 | as->mrm.base = RID_DISPATCH; | ||
| 258 | return; | ||
| 259 | } | ||
| 260 | } | ||
| 261 | #else | ||
| 262 | as->mrm.ofs += op1->i; | ||
| 217 | as->mrm.base = RID_NONE; | 263 | as->mrm.base = RID_NONE; |
| 218 | } else { | 264 | return; |
| 219 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | 265 | #endif |
| 220 | } | 266 | } |
| 267 | as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); | ||
| 221 | } | 268 | } |
| 222 | 269 | ||
| 223 | /* Fuse string reference into memory operand. */ | 270 | /* Fuse string reference into memory operand. */ |
| @@ -228,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
| 228 | as->mrm.base = as->mrm.idx = RID_NONE; | 275 | as->mrm.base = as->mrm.idx = RID_NONE; |
| 229 | as->mrm.scale = XM_SCALE1; | 276 | as->mrm.scale = XM_SCALE1; |
| 230 | as->mrm.ofs = sizeof(GCstr); | 277 | as->mrm.ofs = sizeof(GCstr); |
| 231 | if (irref_isk(ir->op1)) { | 278 | if (!LJ_GC64 && irref_isk(ir->op1)) { |
| 232 | as->mrm.ofs += IR(ir->op1)->i; | 279 | as->mrm.ofs += IR(ir->op1)->i; |
| 233 | } else { | 280 | } else { |
| 234 | Reg r = ra_alloc1(as, ir->op1, allow); | 281 | Reg r = ra_alloc1(as, ir->op1, allow); |
| @@ -260,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) | |||
| 260 | IRIns *ir = IR(ref); | 307 | IRIns *ir = IR(ref); |
| 261 | as->mrm.idx = RID_NONE; | 308 | as->mrm.idx = RID_NONE; |
| 262 | if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | 309 | if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { |
| 310 | #if LJ_GC64 | ||
| 311 | intptr_t ofs = dispofs(as, ir_kptr(ir)); | ||
| 312 | if (checki32(ofs)) { | ||
| 313 | as->mrm.ofs = (int32_t)ofs; | ||
| 314 | as->mrm.base = RID_DISPATCH; | ||
| 315 | return; | ||
| 316 | } | ||
| 317 | } if (0) { | ||
| 318 | #else | ||
| 263 | as->mrm.ofs = ir->i; | 319 | as->mrm.ofs = ir->i; |
| 264 | as->mrm.base = RID_NONE; | 320 | as->mrm.base = RID_NONE; |
| 265 | } else if (ir->o == IR_STRREF) { | 321 | } else if (ir->o == IR_STRREF) { |
| 266 | asm_fusestrref(as, ir, allow); | 322 | asm_fusestrref(as, ir, allow); |
| 323 | #endif | ||
| 267 | } else { | 324 | } else { |
| 268 | as->mrm.ofs = 0; | 325 | as->mrm.ofs = 0; |
| 269 | if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { | 326 | if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { |
| @@ -310,13 +367,41 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) | |||
| 310 | static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) | 367 | static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) |
| 311 | { | 368 | { |
| 312 | const uint64_t *k = &ir_k64(ir)->u64; | 369 | const uint64_t *k = &ir_k64(ir)->u64; |
| 313 | as->mrm.ofs = ptr2addr(k); | 370 | if (!LJ_GC64 || checki32((intptr_t)k)) { |
| 314 | as->mrm.base = RID_NONE; | 371 | as->mrm.ofs = ptr2addr(k); |
| 372 | as->mrm.base = RID_NONE; | ||
| 373 | #if LJ_GC64 | ||
| 374 | } else if (checki32(dispofs(as, k))) { | ||
| 375 | as->mrm.ofs = (int32_t)dispofs(as, k); | ||
| 376 | as->mrm.base = RID_DISPATCH; | ||
| 377 | } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && | ||
| 378 | checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { | ||
| 379 | as->mrm.ofs = (int32_t)mcpofs(as, k); | ||
| 380 | as->mrm.base = RID_RIP; | ||
| 381 | } else { | ||
| 382 | if (ir->i) { | ||
| 383 | lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); | ||
| 384 | } else { | ||
| 385 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
| 386 | *(uint64_t*)as->mcbot = *k; | ||
| 387 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
| 388 | as->mcbot += 8; | ||
| 389 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
| 390 | } | ||
| 391 | as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); | ||
| 392 | as->mrm.base = RID_RIP; | ||
| 393 | #endif | ||
| 394 | } | ||
| 315 | as->mrm.idx = RID_NONE; | 395 | as->mrm.idx = RID_NONE; |
| 316 | return RID_MRM; | 396 | return RID_MRM; |
| 317 | } | 397 | } |
| 318 | 398 | ||
| 319 | /* Fuse load into memory operand. */ | 399 | /* Fuse load into memory operand. |
| 400 | ** | ||
| 401 | ** Important caveat: this may emit RIP-relative loads! So don't place any | ||
| 402 | ** code emitters between this function and the use of its result. | ||
| 403 | ** The only permitted exception is asm_guardcc(). | ||
| 404 | */ | ||
| 320 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | 405 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) |
| 321 | { | 406 | { |
| 322 | IRIns *ir = IR(ref); | 407 | IRIns *ir = IR(ref); |
| @@ -346,7 +431,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 346 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; | 431 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; |
| 347 | if (ir->o == IR_SLOAD) { | 432 | if (ir->o == IR_SLOAD) { |
| 348 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && | 433 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && |
| 349 | noconflict(as, ref, IR_RETF, 0)) { | 434 | noconflict(as, ref, IR_RETF, 0) && |
| 435 | !(LJ_GC64 && irt_isaddr(ir->t))) { | ||
| 350 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | 436 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); |
| 351 | as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + | 437 | as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + |
| 352 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | 438 | (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); |
| @@ -361,7 +447,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 361 | return RID_MRM; | 447 | return RID_MRM; |
| 362 | } | 448 | } |
| 363 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { | 449 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { |
| 364 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { | 450 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && |
| 451 | !(LJ_GC64 && irt_isaddr(ir->t))) { | ||
| 365 | asm_fuseahuref(as, ir->op1, xallow); | 452 | asm_fuseahuref(as, ir->op1, xallow); |
| 366 | return RID_MRM; | 453 | return RID_MRM; |
| 367 | } | 454 | } |
| @@ -374,7 +461,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 374 | asm_fusexref(as, ir->op1, xallow); | 461 | asm_fusexref(as, ir->op1, xallow); |
| 375 | return RID_MRM; | 462 | return RID_MRM; |
| 376 | } | 463 | } |
| 377 | } else if (ir->o == IR_VLOAD) { | 464 | } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { |
| 378 | asm_fuseahuref(as, ir->op1, xallow); | 465 | asm_fuseahuref(as, ir->op1, xallow); |
| 379 | return RID_MRM; | 466 | return RID_MRM; |
| 380 | } | 467 | } |
| @@ -499,8 +586,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 499 | if (r) { /* Argument is in a register. */ | 586 | if (r) { /* Argument is in a register. */ |
| 500 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { | 587 | if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { |
| 501 | #if LJ_64 | 588 | #if LJ_64 |
| 502 | if (ir->o == IR_KINT64) | 589 | if (LJ_GC64 ? ir->o != IR_KINT : ir->o == IR_KINT64) |
| 503 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 590 | emit_loadu64(as, r, ir_k64(ir)->u64); |
| 504 | else | 591 | else |
| 505 | #endif | 592 | #endif |
| 506 | emit_loadi(as, r, ir->i); | 593 | emit_loadi(as, r, ir->i); |
| @@ -668,7 +755,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
| 668 | emit_addptr(as, base, -8*delta); | 755 | emit_addptr(as, base, -8*delta); |
| 669 | asm_guardcc(as, CC_NE); | 756 | asm_guardcc(as, CC_NE); |
| 670 | #if LJ_FR2 | 757 | #if LJ_FR2 |
| 671 | emit_rmro(as, XO_CMP, rpc, base, -8); | 758 | emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); |
| 672 | emit_loadu64(as, rpc, u64ptr(pc)); | 759 | emit_loadu64(as, rpc, u64ptr(pc)); |
| 673 | #else | 760 | #else |
| 674 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); | 761 | emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); |
| @@ -696,8 +783,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
| 696 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? | 783 | Reg tmp = ra_noreg(IR(ir->op1)->r) ? |
| 697 | ra_alloc1(as, ir->op1, RSET_FPR) : | 784 | ra_alloc1(as, ir->op1, RSET_FPR) : |
| 698 | ra_scratch(as, RSET_FPR); | 785 | ra_scratch(as, RSET_FPR); |
| 699 | Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | 786 | Reg right; |
| 700 | emit_rr(as, XO_MOVDto, tmp, dest); | 787 | emit_rr(as, XO_MOVDto, tmp, dest); |
| 788 | right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); | ||
| 701 | emit_mrm(as, XO_ADDSD, tmp, right); | 789 | emit_mrm(as, XO_ADDSD, tmp, right); |
| 702 | ra_left(as, tmp, ir->op1); | 790 | ra_left(as, tmp, ir->op1); |
| 703 | } | 791 | } |
| @@ -768,13 +856,12 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
| 768 | emit_rr(as, op, dest|REX_64, tmp); | 856 | emit_rr(as, op, dest|REX_64, tmp); |
| 769 | ra_left(as, tmp, lref); | 857 | ra_left(as, tmp, lref); |
| 770 | } else { | 858 | } else { |
| 771 | Reg left = asm_fuseload(as, lref, RSET_FPR); | ||
| 772 | if (LJ_64 && irt_isu32(ir->t)) | 859 | if (LJ_64 && irt_isu32(ir->t)) |
| 773 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ | 860 | emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ |
| 774 | emit_mrm(as, op, | 861 | emit_mrm(as, op, |
| 775 | dest|((LJ_64 && | 862 | dest|((LJ_64 && |
| 776 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), | 863 | (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), |
| 777 | left); | 864 | asm_fuseload(as, lref, RSET_FPR)); |
| 778 | } | 865 | } |
| 779 | } | 866 | } |
| 780 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 867 | } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
| @@ -952,6 +1039,24 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
| 952 | emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); | 1039 | emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); |
| 953 | } else { | 1040 | } else { |
| 954 | /* Otherwise use g->tmptv to hold the TValue. */ | 1041 | /* Otherwise use g->tmptv to hold the TValue. */ |
| 1042 | #if LJ_GC64 | ||
| 1043 | if (irref_isk(ref)) { | ||
| 1044 | TValue k; | ||
| 1045 | lj_ir_kvalue(as->J->L, &k, ir); | ||
| 1046 | emit_movmroi(as, dest, 4, k.u32.hi); | ||
| 1047 | emit_movmroi(as, dest, 0, k.u32.lo); | ||
| 1048 | } else { | ||
| 1049 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
| 1050 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); | ||
| 1051 | if (irt_is64(ir->t)) { | ||
| 1052 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
| 1053 | emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); | ||
| 1054 | } else { | ||
| 1055 | emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15) | 0x7fff); | ||
| 1056 | } | ||
| 1057 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); | ||
| 1058 | } | ||
| 1059 | #else | ||
| 955 | if (!irref_isk(ref)) { | 1060 | if (!irref_isk(ref)) { |
| 956 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); | 1061 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); |
| 957 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); | 1062 | emit_movtomro(as, REX_64IR(ir, src), dest, 0); |
| @@ -960,6 +1065,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | |||
| 960 | } | 1065 | } |
| 961 | if (!(LJ_64 && irt_islightud(ir->t))) | 1066 | if (!(LJ_64 && irt_islightud(ir->t))) |
| 962 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); | 1067 | emit_movmroi(as, dest, 4, irt_toitype(ir->t)); |
| 1068 | #endif | ||
| 963 | emit_loada(as, dest, &J2G(as->J)->tmptv); | 1069 | emit_loada(as, dest, &J2G(as->J)->tmptv); |
| 964 | } | 1070 | } |
| 965 | } | 1071 | } |
| @@ -969,9 +1075,9 @@ static void asm_aref(ASMState *as, IRIns *ir) | |||
| 969 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1075 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 970 | asm_fusearef(as, ir, RSET_GPR); | 1076 | asm_fusearef(as, ir, RSET_GPR); |
| 971 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) | 1077 | if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) |
| 972 | emit_mrm(as, XO_LEA, dest, RID_MRM); | 1078 | emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); |
| 973 | else if (as->mrm.base != dest) | 1079 | else if (as->mrm.base != dest) |
| 974 | emit_rr(as, XO_MOV, dest, as->mrm.base); | 1080 | emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); |
| 975 | } | 1081 | } |
| 976 | 1082 | ||
| 977 | /* Inlined hash lookup. Specialized for key type and for const keys. | 1083 | /* Inlined hash lookup. Specialized for key type and for const keys. |
| @@ -998,7 +1104,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 998 | if (!isk) { | 1104 | if (!isk) { |
| 999 | rset_clear(allow, tab); | 1105 | rset_clear(allow, tab); |
| 1000 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | 1106 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); |
| 1001 | if (!irt_isstr(kt)) | 1107 | if (LJ_GC64 || !irt_isstr(kt)) |
| 1002 | tmp = ra_scratch(as, rset_exclude(allow, key)); | 1108 | tmp = ra_scratch(as, rset_exclude(allow, key)); |
| 1003 | } | 1109 | } |
| 1004 | 1110 | ||
| @@ -1011,8 +1117,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 1011 | 1117 | ||
| 1012 | /* Follow hash chain until the end. */ | 1118 | /* Follow hash chain until the end. */ |
| 1013 | l_loop = emit_sjcc_label(as, CC_NZ); | 1119 | l_loop = emit_sjcc_label(as, CC_NZ); |
| 1014 | emit_rr(as, XO_TEST, dest, dest); | 1120 | emit_rr(as, XO_TEST, dest|REX_GC64, dest); |
| 1015 | emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); | 1121 | emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); |
| 1016 | l_next = emit_label(as); | 1122 | l_next = emit_label(as); |
| 1017 | 1123 | ||
| 1018 | /* Type and value comparison. */ | 1124 | /* Type and value comparison. */ |
| @@ -1033,7 +1139,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 1033 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); | 1139 | emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); |
| 1034 | emit_sjcc(as, CC_AE, l_next); | 1140 | emit_sjcc(as, CC_AE, l_next); |
| 1035 | /* The type check avoids NaN penalties and complaints from Valgrind. */ | 1141 | /* The type check avoids NaN penalties and complaints from Valgrind. */ |
| 1036 | #if LJ_64 | 1142 | #if LJ_64 && !LJ_GC64 |
| 1037 | emit_u32(as, LJ_TISNUM); | 1143 | emit_u32(as, LJ_TISNUM); |
| 1038 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); | 1144 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); |
| 1039 | #else | 1145 | #else |
| @@ -1041,10 +1147,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 1041 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1147 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
| 1042 | #endif | 1148 | #endif |
| 1043 | } | 1149 | } |
| 1044 | #if LJ_64 | 1150 | #if LJ_64 && !LJ_GC64 |
| 1045 | } else if (irt_islightud(kt)) { | 1151 | } else if (irt_islightud(kt)) { |
| 1046 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); | 1152 | emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); |
| 1047 | #endif | 1153 | #elif LJ_GC64 |
| 1154 | } else if (irt_isaddr(kt)) { | ||
| 1155 | if (isk) { | ||
| 1156 | TValue k; | ||
| 1157 | k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; | ||
| 1158 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), | ||
| 1159 | k.u32.lo); | ||
| 1160 | emit_sjcc(as, CC_NE, l_next); | ||
| 1161 | emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), | ||
| 1162 | k.u32.hi); | ||
| 1163 | } else { | ||
| 1164 | emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); | ||
| 1165 | } | ||
| 1166 | } else { | ||
| 1167 | lua_assert(irt_ispri(kt) && !irt_isnil(kt)); | ||
| 1168 | emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); | ||
| 1169 | emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); | ||
| 1170 | #else | ||
| 1048 | } else { | 1171 | } else { |
| 1049 | if (!irt_ispri(kt)) { | 1172 | if (!irt_ispri(kt)) { |
| 1050 | lua_assert(irt_isaddr(kt)); | 1173 | lua_assert(irt_isaddr(kt)); |
| @@ -1058,16 +1181,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 1058 | lua_assert(!irt_isnil(kt)); | 1181 | lua_assert(!irt_isnil(kt)); |
| 1059 | emit_i8(as, irt_toitype(kt)); | 1182 | emit_i8(as, irt_toitype(kt)); |
| 1060 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); | 1183 | emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); |
| 1184 | #endif | ||
| 1061 | } | 1185 | } |
| 1062 | emit_sfixup(as, l_loop); | 1186 | emit_sfixup(as, l_loop); |
| 1063 | checkmclim(as); | 1187 | checkmclim(as); |
| 1188 | #if LJ_GC64 | ||
| 1189 | if (!isk && irt_isaddr(kt)) { | ||
| 1190 | emit_rr(as, XO_OR, tmp|REX_64, key); | ||
| 1191 | emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); | ||
| 1192 | } | ||
| 1193 | #endif | ||
| 1064 | 1194 | ||
| 1065 | /* Load main position relative to tab->node into dest. */ | 1195 | /* Load main position relative to tab->node into dest. */ |
| 1066 | khash = isk ? ir_khash(irkey) : 1; | 1196 | khash = isk ? ir_khash(irkey) : 1; |
| 1067 | if (khash == 0) { | 1197 | if (khash == 0) { |
| 1068 | emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); | 1198 | emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); |
| 1069 | } else { | 1199 | } else { |
| 1070 | emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); | 1200 | emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); |
| 1071 | if ((as->flags & JIT_F_PREFER_IMUL)) { | 1201 | if ((as->flags & JIT_F_PREFER_IMUL)) { |
| 1072 | emit_i8(as, sizeof(Node)); | 1202 | emit_i8(as, sizeof(Node)); |
| 1073 | emit_rr(as, XO_IMULi8, dest, dest); | 1203 | emit_rr(as, XO_IMULi8, dest, dest); |
| @@ -1122,11 +1252,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
| 1122 | if (ra_hasreg(dest)) { | 1252 | if (ra_hasreg(dest)) { |
| 1123 | if (ofs != 0) { | 1253 | if (ofs != 0) { |
| 1124 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) | 1254 | if (dest == node && !(as->flags & JIT_F_LEA_AGU)) |
| 1125 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); | 1255 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); |
| 1126 | else | 1256 | else |
| 1127 | emit_rmro(as, XO_LEA, dest, node, ofs); | 1257 | emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); |
| 1128 | } else if (dest != node) { | 1258 | } else if (dest != node) { |
| 1129 | emit_rr(as, XO_MOV, dest, node); | 1259 | emit_rr(as, XO_MOV, dest|REX_GC64, node); |
| 1130 | } | 1260 | } |
| 1131 | } | 1261 | } |
| 1132 | asm_guardcc(as, CC_NE); | 1262 | asm_guardcc(as, CC_NE); |
| @@ -1138,13 +1268,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
| 1138 | lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); | 1268 | lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); |
| 1139 | /* Assumes -0.0 is already canonicalized to +0.0. */ | 1269 | /* Assumes -0.0 is already canonicalized to +0.0. */ |
| 1140 | emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : | 1270 | emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : |
| 1271 | #if LJ_GC64 | ||
| 1272 | ((uint64_t)irt_toitype(irkey->t) << 47) | | ||
| 1273 | (uint64_t)ir_kgc(irkey)); | ||
| 1274 | #else | ||
| 1141 | ((uint64_t)irt_toitype(irkey->t) << 32) | | 1275 | ((uint64_t)irt_toitype(irkey->t) << 32) | |
| 1142 | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); | 1276 | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); |
| 1277 | #endif | ||
| 1143 | } else { | 1278 | } else { |
| 1144 | lua_assert(!irt_isnil(irkey->t)); | 1279 | lua_assert(!irt_isnil(irkey->t)); |
| 1280 | #if LJ_GC64 | ||
| 1281 | emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); | ||
| 1282 | emit_rmro(as, XO_ARITHi, XOg_CMP, node, | ||
| 1283 | ofs + (int32_t)offsetof(Node, key.it)); | ||
| 1284 | #else | ||
| 1145 | emit_i8(as, irt_toitype(irkey->t)); | 1285 | emit_i8(as, irt_toitype(irkey->t)); |
| 1146 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, | 1286 | emit_rmro(as, XO_ARITHi8, XOg_CMP, node, |
| 1147 | ofs + (int32_t)offsetof(Node, key.it)); | 1287 | ofs + (int32_t)offsetof(Node, key.it)); |
| 1288 | #endif | ||
| 1148 | } | 1289 | } |
| 1149 | #else | 1290 | #else |
| 1150 | l_exit = emit_label(as); | 1291 | l_exit = emit_label(as); |
| @@ -1179,20 +1320,21 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
| 1179 | if (irref_isk(ir->op1)) { | 1320 | if (irref_isk(ir->op1)) { |
| 1180 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1321 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
| 1181 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1322 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
| 1182 | emit_rma(as, XO_MOV, dest, v); | 1323 | emit_rma(as, XO_MOV, dest|REX_GC64, v); |
| 1183 | } else { | 1324 | } else { |
| 1184 | Reg uv = ra_scratch(as, RSET_GPR); | 1325 | Reg uv = ra_scratch(as, RSET_GPR); |
| 1185 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1326 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); |
| 1186 | if (ir->o == IR_UREFC) { | 1327 | if (ir->o == IR_UREFC) { |
| 1187 | emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); | 1328 | emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); |
| 1188 | asm_guardcc(as, CC_NE); | 1329 | asm_guardcc(as, CC_NE); |
| 1189 | emit_i8(as, 1); | 1330 | emit_i8(as, 1); |
| 1190 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); | 1331 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); |
| 1191 | } else { | 1332 | } else { |
| 1192 | emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); | 1333 | emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); |
| 1193 | } | 1334 | } |
| 1194 | emit_rmro(as, XO_MOV, uv, func, | 1335 | emit_rmro(as, XO_MOV, uv|REX_GC64, func, |
| 1195 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | 1336 | (int32_t)offsetof(GCfuncL, uvptr) + |
| 1337 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
| 1196 | } | 1338 | } |
| 1197 | } | 1339 | } |
| 1198 | 1340 | ||
| @@ -1210,9 +1352,9 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
| 1210 | if (as->mrm.base == RID_NONE) | 1352 | if (as->mrm.base == RID_NONE) |
| 1211 | emit_loadi(as, dest, as->mrm.ofs); | 1353 | emit_loadi(as, dest, as->mrm.ofs); |
| 1212 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) | 1354 | else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) |
| 1213 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); | 1355 | emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); |
| 1214 | else | 1356 | else |
| 1215 | emit_mrm(as, XO_LEA, dest, RID_MRM); | 1357 | emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); |
| 1216 | } | 1358 | } |
| 1217 | 1359 | ||
| 1218 | /* -- Loads and stores ---------------------------------------------------- */ | 1360 | /* -- Loads and stores ---------------------------------------------------- */ |
| @@ -1281,7 +1423,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
| 1281 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; | 1423 | case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; |
| 1282 | case IRT_NUM: xo = XO_MOVSDto; break; | 1424 | case IRT_NUM: xo = XO_MOVSDto; break; |
| 1283 | case IRT_FLOAT: xo = XO_MOVSSto; break; | 1425 | case IRT_FLOAT: xo = XO_MOVSSto; break; |
| 1284 | #if LJ_64 | 1426 | #if LJ_64 && !LJ_GC64 |
| 1285 | case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ | 1427 | case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ |
| 1286 | #endif | 1428 | #endif |
| 1287 | default: | 1429 | default: |
| @@ -1313,7 +1455,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
| 1313 | #define asm_fstore(as, ir) asm_fxstore(as, ir) | 1455 | #define asm_fstore(as, ir) asm_fxstore(as, ir) |
| 1314 | #define asm_xstore(as, ir) asm_fxstore(as, ir) | 1456 | #define asm_xstore(as, ir) asm_fxstore(as, ir) |
| 1315 | 1457 | ||
| 1316 | #if LJ_64 | 1458 | #if LJ_64 && !LJ_GC64 |
| 1317 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | 1459 | static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) |
| 1318 | { | 1460 | { |
| 1319 | if (ra_used(ir) || typecheck) { | 1461 | if (ra_used(ir) || typecheck) { |
| @@ -1335,9 +1477,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) | |||
| 1335 | 1477 | ||
| 1336 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1478 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| 1337 | { | 1479 | { |
| 1480 | #if LJ_GC64 | ||
| 1481 | Reg tmp = RID_NONE; | ||
| 1482 | #endif | ||
| 1338 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || | 1483 | lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || |
| 1339 | (LJ_DUALNUM && irt_isint(ir->t))); | 1484 | (LJ_DUALNUM && irt_isint(ir->t))); |
| 1340 | #if LJ_64 | 1485 | #if LJ_64 && !LJ_GC64 |
| 1341 | if (irt_islightud(ir->t)) { | 1486 | if (irt_islightud(ir->t)) { |
| 1342 | Reg dest = asm_load_lightud64(as, ir, 1); | 1487 | Reg dest = asm_load_lightud64(as, ir, 1); |
| 1343 | if (ra_hasreg(dest)) { | 1488 | if (ra_hasreg(dest)) { |
| @@ -1351,20 +1496,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
| 1351 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; | 1496 | RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; |
| 1352 | Reg dest = ra_dest(as, ir, allow); | 1497 | Reg dest = ra_dest(as, ir, allow); |
| 1353 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1498 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
| 1499 | #if LJ_GC64 | ||
| 1500 | if (irt_isaddr(ir->t)) { | ||
| 1501 | emit_shifti(as, XOg_SHR|REX_64, dest, 17); | ||
| 1502 | asm_guardcc(as, CC_NE); | ||
| 1503 | emit_i8(as, irt_toitype(ir->t)); | ||
| 1504 | emit_rr(as, XO_ARITHi8, XOg_CMP, dest); | ||
| 1505 | emit_i8(as, XI_O16); | ||
| 1506 | if ((as->flags & JIT_F_BMI2)) { | ||
| 1507 | emit_i8(as, 47); | ||
| 1508 | emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); | ||
| 1509 | } else { | ||
| 1510 | emit_shifti(as, XOg_ROR|REX_64, dest, 47); | ||
| 1511 | emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); | ||
| 1512 | } | ||
| 1513 | return; | ||
| 1514 | } else | ||
| 1515 | #endif | ||
| 1354 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); | 1516 | emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); |
| 1355 | } else { | 1517 | } else { |
| 1356 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1518 | RegSet gpr = RSET_GPR; |
| 1519 | #if LJ_GC64 | ||
| 1520 | if (irt_isaddr(ir->t)) { | ||
| 1521 | tmp = ra_scratch(as, RSET_GPR); | ||
| 1522 | gpr = rset_exclude(gpr, tmp); | ||
| 1523 | } | ||
| 1524 | #endif | ||
| 1525 | asm_fuseahuref(as, ir->op1, gpr); | ||
| 1357 | } | 1526 | } |
| 1358 | /* Always do the type check, even if the load result is unused. */ | 1527 | /* Always do the type check, even if the load result is unused. */ |
| 1359 | as->mrm.ofs += 4; | 1528 | as->mrm.ofs += 4; |
| 1360 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); | 1529 | asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); |
| 1361 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { | 1530 | if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { |
| 1362 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); | 1531 | lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); |
| 1532 | #if LJ_GC64 | ||
| 1533 | emit_u32(as, LJ_TISNUM << 15); | ||
| 1534 | #else | ||
| 1363 | emit_u32(as, LJ_TISNUM); | 1535 | emit_u32(as, LJ_TISNUM); |
| 1536 | #endif | ||
| 1537 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | ||
| 1538 | #if LJ_GC64 | ||
| 1539 | } else if (irt_isaddr(ir->t)) { | ||
| 1540 | as->mrm.ofs -= 4; | ||
| 1541 | emit_i8(as, irt_toitype(ir->t)); | ||
| 1542 | emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); | ||
| 1543 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
| 1544 | emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); | ||
| 1545 | } else if (irt_isnil(ir->t)) { | ||
| 1546 | as->mrm.ofs -= 4; | ||
| 1547 | emit_i8(as, -1); | ||
| 1548 | emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); | ||
| 1549 | } else { | ||
| 1550 | emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); | ||
| 1364 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); | 1551 | emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); |
| 1552 | #else | ||
| 1365 | } else { | 1553 | } else { |
| 1366 | emit_i8(as, irt_toitype(ir->t)); | 1554 | emit_i8(as, irt_toitype(ir->t)); |
| 1367 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); | 1555 | emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); |
| 1556 | #endif | ||
| 1368 | } | 1557 | } |
| 1369 | } | 1558 | } |
| 1370 | 1559 | ||
| @@ -1376,12 +1565,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 1376 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | 1565 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); |
| 1377 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1566 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
| 1378 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); | 1567 | emit_mrm(as, XO_MOVSDto, src, RID_MRM); |
| 1379 | #if LJ_64 | 1568 | #if LJ_64 && !LJ_GC64 |
| 1380 | } else if (irt_islightud(ir->t)) { | 1569 | } else if (irt_islightud(ir->t)) { |
| 1381 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 1570 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); |
| 1382 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); | 1571 | asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); |
| 1383 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | 1572 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); |
| 1384 | #endif | 1573 | #endif |
| 1574 | #if LJ_GC64 | ||
| 1575 | } else if (irref_isk(ir->op2)) { | ||
| 1576 | TValue k; | ||
| 1577 | lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); | ||
| 1578 | asm_fuseahuref(as, ir->op1, RSET_GPR); | ||
| 1579 | if (tvisnil(&k)) { | ||
| 1580 | emit_i32(as, -1); | ||
| 1581 | emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); | ||
| 1582 | } else { | ||
| 1583 | emit_u32(as, k.u32.lo); | ||
| 1584 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
| 1585 | as->mrm.ofs += 4; | ||
| 1586 | emit_u32(as, k.u32.hi); | ||
| 1587 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | ||
| 1588 | } | ||
| 1589 | #endif | ||
| 1385 | } else { | 1590 | } else { |
| 1386 | IRIns *irr = IR(ir->op2); | 1591 | IRIns *irr = IR(ir->op2); |
| 1387 | RegSet allow = RSET_GPR; | 1592 | RegSet allow = RSET_GPR; |
| @@ -1392,6 +1597,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 1392 | } | 1597 | } |
| 1393 | asm_fuseahuref(as, ir->op1, allow); | 1598 | asm_fuseahuref(as, ir->op1, allow); |
| 1394 | if (ra_hasreg(src)) { | 1599 | if (ra_hasreg(src)) { |
| 1600 | #if LJ_GC64 | ||
| 1601 | if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { | ||
| 1602 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
| 1603 | as->mrm.ofs += 4; | ||
| 1604 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
| 1605 | emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); | ||
| 1606 | as->mrm.ofs -= 4; | ||
| 1607 | emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); | ||
| 1608 | return; | ||
| 1609 | } | ||
| 1610 | #endif | ||
| 1395 | emit_mrm(as, XO_MOVto, src, RID_MRM); | 1611 | emit_mrm(as, XO_MOVto, src, RID_MRM); |
| 1396 | } else if (!irt_ispri(irr->t)) { | 1612 | } else if (!irt_ispri(irr->t)) { |
| 1397 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); | 1613 | lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); |
| @@ -1399,7 +1615,12 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 1399 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 1615 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
| 1400 | } | 1616 | } |
| 1401 | as->mrm.ofs += 4; | 1617 | as->mrm.ofs += 4; |
| 1618 | #if LJ_GC64 | ||
| 1619 | lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); | ||
| 1620 | emit_i32(as, LJ_TNUMX << 15); | ||
| 1621 | #else | ||
| 1402 | emit_i32(as, (int32_t)irt_toitype(ir->t)); | 1622 | emit_i32(as, (int32_t)irt_toitype(ir->t)); |
| 1623 | #endif | ||
| 1403 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); | 1624 | emit_mrm(as, XO_MOVmi, 0, RID_MRM); |
| 1404 | } | 1625 | } |
| 1405 | } | 1626 | } |
| @@ -1420,7 +1641,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 1420 | base = ra_alloc1(as, REF_BASE, RSET_GPR); | 1641 | base = ra_alloc1(as, REF_BASE, RSET_GPR); |
| 1421 | emit_rmro(as, XO_MOVSD, left, base, ofs); | 1642 | emit_rmro(as, XO_MOVSD, left, base, ofs); |
| 1422 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ | 1643 | t.irt = IRT_NUM; /* Continue with a regular number type check. */ |
| 1423 | #if LJ_64 | 1644 | #if LJ_64 && !LJ_GC64 |
| 1424 | } else if (irt_islightud(t)) { | 1645 | } else if (irt_islightud(t)) { |
| 1425 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); | 1646 | Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); |
| 1426 | if (ra_hasreg(dest)) { | 1647 | if (ra_hasreg(dest)) { |
| @@ -1438,6 +1659,36 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 1438 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ | 1659 | t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ |
| 1439 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); | 1660 | emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); |
| 1440 | } else { | 1661 | } else { |
| 1662 | #if LJ_GC64 | ||
| 1663 | if (irt_isaddr(t)) { | ||
| 1664 | /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: | ||
| 1665 | ** | ||
| 1666 | ** mov r64, [addr] rorx r64, [addr], 47 | ||
| 1667 | ** ror r64, 47 | ||
| 1668 | ** cmp r16, itype cmp r16, itype | ||
| 1669 | ** jne ->exit jne ->exit | ||
| 1670 | ** shr r64, 16 shr r64, 16 | ||
| 1671 | */ | ||
| 1672 | emit_shifti(as, XOg_SHR|REX_64, dest, 17); | ||
| 1673 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | ||
| 1674 | asm_guardcc(as, CC_NE); | ||
| 1675 | emit_i8(as, irt_toitype(t)); | ||
| 1676 | emit_rr(as, XO_ARITHi8, XOg_CMP, dest); | ||
| 1677 | emit_i8(as, XI_O16); | ||
| 1678 | } | ||
| 1679 | if ((as->flags & JIT_F_BMI2)) { | ||
| 1680 | emit_i8(as, 47); | ||
| 1681 | emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); | ||
| 1682 | } else { | ||
| 1683 | if ((ir->op2 & IRSLOAD_TYPECHECK)) | ||
| 1684 | emit_shifti(as, XOg_ROR|REX_64, dest, 47); | ||
| 1685 | else | ||
| 1686 | emit_shifti(as, XOg_SHL|REX_64, dest, 17); | ||
| 1687 | emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); | ||
| 1688 | } | ||
| 1689 | return; | ||
| 1690 | } else | ||
| 1691 | #endif | ||
| 1441 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); | 1692 | emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); |
| 1442 | } | 1693 | } |
| 1443 | } else { | 1694 | } else { |
| @@ -1450,11 +1701,42 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 1450 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); | 1701 | asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); |
| 1451 | if (LJ_64 && irt_type(t) >= IRT_NUM) { | 1702 | if (LJ_64 && irt_type(t) >= IRT_NUM) { |
| 1452 | lua_assert(irt_isinteger(t) || irt_isnum(t)); | 1703 | lua_assert(irt_isinteger(t) || irt_isnum(t)); |
| 1704 | #if LJ_GC64 | ||
| 1705 | emit_u32(as, LJ_TISNUM << 15); | ||
| 1706 | #else | ||
| 1453 | emit_u32(as, LJ_TISNUM); | 1707 | emit_u32(as, LJ_TISNUM); |
| 1708 | #endif | ||
| 1454 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | 1709 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); |
| 1710 | #if LJ_GC64 | ||
| 1711 | } else if (irt_isnil(t)) { | ||
| 1712 | /* LJ_GC64 type check for nil: | ||
| 1713 | ** | ||
| 1714 | ** cmp qword [addr], -1 | ||
| 1715 | ** jne ->exit | ||
| 1716 | */ | ||
| 1717 | emit_i8(as, -1); | ||
| 1718 | emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); | ||
| 1719 | } else if (irt_ispri(t)) { | ||
| 1720 | emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); | ||
| 1721 | emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); | ||
| 1722 | } else { | ||
| 1723 | /* LJ_GC64 type check only: | ||
| 1724 | ** | ||
| 1725 | ** mov r64, [addr] | ||
| 1726 | ** sar r64, 47 | ||
| 1727 | ** cmp r32, itype | ||
| 1728 | ** jne ->exit | ||
| 1729 | */ | ||
| 1730 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); | ||
| 1731 | emit_i8(as, irt_toitype(t)); | ||
| 1732 | emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); | ||
| 1733 | emit_shifti(as, XOg_SAR|REX_64, tmp, 47); | ||
| 1734 | emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); | ||
| 1735 | #else | ||
| 1455 | } else { | 1736 | } else { |
| 1456 | emit_i8(as, irt_toitype(t)); | 1737 | emit_i8(as, irt_toitype(t)); |
| 1457 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); | 1738 | emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); |
| 1739 | #endif | ||
| 1458 | } | 1740 | } |
| 1459 | } | 1741 | } |
| 1460 | } | 1742 | } |
| @@ -1548,7 +1830,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
| 1548 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | 1830 | Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); |
| 1549 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | 1831 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); |
| 1550 | MCLabel l_end = emit_label(as); | 1832 | MCLabel l_end = emit_label(as); |
| 1551 | emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); | 1833 | emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); |
| 1552 | emit_setgl(as, tab, gc.grayagain); | 1834 | emit_setgl(as, tab, gc.grayagain); |
| 1553 | emit_getgl(as, tmp, gc.grayagain); | 1835 | emit_getgl(as, tmp, gc.grayagain); |
| 1554 | emit_i8(as, ~LJ_GC_BLACK); | 1836 | emit_i8(as, ~LJ_GC_BLACK); |
| @@ -2084,7 +2366,6 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
| 2084 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ | 2366 | cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ |
| 2085 | } | 2367 | } |
| 2086 | left = ra_alloc1(as, lref, RSET_FPR); | 2368 | left = ra_alloc1(as, lref, RSET_FPR); |
| 2087 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
| 2088 | l_around = emit_label(as); | 2369 | l_around = emit_label(as); |
| 2089 | asm_guardcc(as, cc >> 4); | 2370 | asm_guardcc(as, cc >> 4); |
| 2090 | if (cc & VCC_P) { /* Extra CC_P branch required? */ | 2371 | if (cc & VCC_P) { /* Extra CC_P branch required? */ |
| @@ -2101,6 +2382,7 @@ static void asm_comp(ASMState *as, IRIns *ir) | |||
| 2101 | emit_jcc(as, CC_P, as->mcp); | 2382 | emit_jcc(as, CC_P, as->mcp); |
| 2102 | } | 2383 | } |
| 2103 | } | 2384 | } |
| 2385 | right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); | ||
| 2104 | emit_mrm(as, XO_UCOMISD, left, right); | 2386 | emit_mrm(as, XO_UCOMISD, left, right); |
| 2105 | } else { | 2387 | } else { |
| 2106 | IRRef lref = ir->op1, rref = ir->op2; | 2388 | IRRef lref = ir->op1, rref = ir->op2; |
| @@ -2377,13 +2659,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
| 2377 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); | 2659 | emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); |
| 2378 | else | 2660 | else |
| 2379 | ra_modified(as, r); | 2661 | ra_modified(as, r); |
| 2380 | emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); | 2662 | emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); |
| 2381 | if (ra_hasreg(pbase) && pbase != r) | 2663 | if (ra_hasreg(pbase) && pbase != r) |
| 2382 | emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); | 2664 | emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); |
| 2383 | else | 2665 | else |
| 2666 | #if LJ_GC64 | ||
| 2667 | emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, | ||
| 2668 | (int32_t)dispofs(as, &J2G(as->J)->jit_base)); | ||
| 2669 | #else | ||
| 2384 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, | 2670 | emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, |
| 2385 | ptr2addr(&J2G(as->J)->jit_base)); | 2671 | ptr2addr(&J2G(as->J)->jit_base)); |
| 2386 | emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); | 2672 | #endif |
| 2673 | emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); | ||
| 2387 | emit_getgl(as, r, cur_L); | 2674 | emit_getgl(as, r, cur_L); |
| 2388 | if (allow == RSET_EMPTY) /* Spill temp. register. */ | 2675 | if (allow == RSET_EMPTY) /* Spill temp. register. */ |
| 2389 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); | 2676 | emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); |
| @@ -2414,18 +2701,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
| 2414 | (LJ_DUALNUM && irt_isinteger(ir->t))); | 2701 | (LJ_DUALNUM && irt_isinteger(ir->t))); |
| 2415 | if (!irref_isk(ref)) { | 2702 | if (!irref_isk(ref)) { |
| 2416 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | 2703 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); |
| 2704 | #if LJ_GC64 | ||
| 2705 | if (irt_is64(ir->t)) { | ||
| 2706 | /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ | ||
| 2707 | emit_u32(as, irt_toitype(ir->t) << 15); | ||
| 2708 | emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); | ||
| 2709 | } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { | ||
| 2710 | emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); | ||
| 2711 | } else { | ||
| 2712 | emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); | ||
| 2713 | } | ||
| 2714 | #endif | ||
| 2417 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); | 2715 | emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); |
| 2716 | #if LJ_GC64 | ||
| 2717 | } else { | ||
| 2718 | TValue k; | ||
| 2719 | lj_ir_kvalue(as->J->L, &k, ir); | ||
| 2720 | if (tvisnil(&k)) { | ||
| 2721 | emit_i32(as, -1); | ||
| 2722 | emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); | ||
| 2723 | } else { | ||
| 2724 | emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); | ||
| 2725 | emit_movmroi(as, RID_BASE, ofs, k.u32.lo); | ||
| 2726 | } | ||
| 2727 | #else | ||
| 2418 | } else if (!irt_ispri(ir->t)) { | 2728 | } else if (!irt_ispri(ir->t)) { |
| 2419 | emit_movmroi(as, RID_BASE, ofs, ir->i); | 2729 | emit_movmroi(as, RID_BASE, ofs, ir->i); |
| 2730 | #endif | ||
| 2420 | } | 2731 | } |
| 2421 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 2732 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
| 2422 | #if !LJ_FR2 | 2733 | #if !LJ_FR2 |
| 2423 | if (s != 0) /* Do not overwrite link to previous frame. */ | 2734 | if (s != 0) /* Do not overwrite link to previous frame. */ |
| 2424 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); | 2735 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); |
| 2425 | #endif | 2736 | #endif |
| 2737 | #if !LJ_GC64 | ||
| 2426 | } else { | 2738 | } else { |
| 2427 | if (!(LJ_64 && irt_islightud(ir->t))) | 2739 | if (!(LJ_64 && irt_islightud(ir->t))) |
| 2428 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | 2740 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); |
| 2741 | #endif | ||
| 2429 | } | 2742 | } |
| 2430 | } | 2743 | } |
| 2431 | checkmclim(as); | 2744 | checkmclim(as); |
| @@ -2451,11 +2764,15 @@ static void asm_gc_check(ASMState *as) | |||
| 2451 | args[1] = ASMREF_TMP2; /* MSize steps */ | 2764 | args[1] = ASMREF_TMP2; /* MSize steps */ |
| 2452 | asm_gencall(as, ci, args); | 2765 | asm_gencall(as, ci, args); |
| 2453 | tmp = ra_releasetmp(as, ASMREF_TMP1); | 2766 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
| 2767 | #if LJ_GC64 | ||
| 2768 | emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); | ||
| 2769 | #else | ||
| 2454 | emit_loada(as, tmp, J2G(as->J)); | 2770 | emit_loada(as, tmp, J2G(as->J)); |
| 2771 | #endif | ||
| 2455 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); | 2772 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); |
| 2456 | /* Jump around GC step if GC total < GC threshold. */ | 2773 | /* Jump around GC step if GC total < GC threshold. */ |
| 2457 | emit_sjcc(as, CC_B, l_end); | 2774 | emit_sjcc(as, CC_B, l_end); |
| 2458 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); | 2775 | emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); |
| 2459 | emit_getgl(as, tmp, gc.total); | 2776 | emit_getgl(as, tmp, gc.total); |
| 2460 | as->gcsteps = 0; | 2777 | as->gcsteps = 0; |
| 2461 | checkmclim(as); | 2778 | checkmclim(as); |
| @@ -2520,7 +2837,7 @@ static void asm_head_root_base(ASMState *as) | |||
| 2520 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | 2837 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
| 2521 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ | 2838 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ |
| 2522 | if (r != RID_BASE) | 2839 | if (r != RID_BASE) |
| 2523 | emit_rr(as, XO_MOV, r, RID_BASE); | 2840 | emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); |
| 2524 | } | 2841 | } |
| 2525 | } | 2842 | } |
| 2526 | 2843 | ||
| @@ -2536,8 +2853,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | |||
| 2536 | if (irp->r == r) { | 2853 | if (irp->r == r) { |
| 2537 | rset_clear(allow, r); /* Mark same BASE register as coalesced. */ | 2854 | rset_clear(allow, r); /* Mark same BASE register as coalesced. */ |
| 2538 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { | 2855 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { |
| 2856 | /* Move from coalesced parent reg. */ | ||
| 2539 | rset_clear(allow, irp->r); | 2857 | rset_clear(allow, irp->r); |
| 2540 | emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ | 2858 | emit_rr(as, XO_MOV, r|REX_GC64, irp->r); |
| 2541 | } else { | 2859 | } else { |
| 2542 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ | 2860 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ |
| 2543 | } | 2861 | } |
| @@ -2750,13 +3068,19 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
| 2750 | MSize len = T->szmcode; | 3068 | MSize len = T->szmcode; |
| 2751 | MCode *px = exitstub_addr(J, exitno) - 6; | 3069 | MCode *px = exitstub_addr(J, exitno) - 6; |
| 2752 | MCode *pe = p+len-6; | 3070 | MCode *pe = p+len-6; |
| 2753 | uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); | 3071 | #if LJ_GC64 |
| 3072 | uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); | ||
| 3073 | #else | ||
| 3074 | uint32_t statei = u32ptr(&J2G(J)->vmstate); | ||
| 3075 | #endif | ||
| 2754 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) | 3076 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) |
| 2755 | *(int32_t *)(p+len-4) = jmprel(p+len, target); | 3077 | *(int32_t *)(p+len-4) = jmprel(p+len, target); |
| 2756 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ | 3078 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ |
| 2757 | for (; p < pe; p += asm_x86_inslen(p)) | 3079 | for (; p < pe; p += asm_x86_inslen(p)) { |
| 2758 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) | 3080 | intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; |
| 3081 | if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) | ||
| 2759 | break; | 3082 | break; |
| 3083 | } | ||
| 2760 | lua_assert(p < pe); | 3084 | lua_assert(p < pe); |
| 2761 | for (; p < pe; p += asm_x86_inslen(p)) | 3085 | for (; p < pe; p += asm_x86_inslen(p)) |
| 2762 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) | 3086 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 3d6f13f4..f0bca938 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
| @@ -20,6 +20,11 @@ | |||
| 20 | #define REX_64 0 | 20 | #define REX_64 0 |
| 21 | #define VEX_64 0 | 21 | #define VEX_64 0 |
| 22 | #endif | 22 | #endif |
| 23 | #if LJ_GC64 | ||
| 24 | #define REX_GC64 REX_64 | ||
| 25 | #else | ||
| 26 | #define REX_GC64 0 | ||
| 27 | #endif | ||
| 23 | 28 | ||
| 24 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 29 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
| 25 | #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) | 30 | #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) |
| @@ -94,26 +99,17 @@ static int32_t ptr2addr(const void *p) | |||
| 94 | #define ptr2addr(p) (i32ptr((p))) | 99 | #define ptr2addr(p) (i32ptr((p))) |
| 95 | #endif | 100 | #endif |
| 96 | 101 | ||
| 97 | /* op r, [addr] */ | ||
| 98 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) | ||
| 99 | { | ||
| 100 | MCode *p = as->mcp; | ||
| 101 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
| 102 | #if LJ_64 | ||
| 103 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
| 104 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
| 105 | #else | ||
| 106 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
| 107 | #endif | ||
| 108 | } | ||
| 109 | |||
| 110 | /* op r, [base+ofs] */ | 102 | /* op r, [base+ofs] */ |
| 111 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) | 103 | static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) |
| 112 | { | 104 | { |
| 113 | MCode *p = as->mcp; | 105 | MCode *p = as->mcp; |
| 114 | x86Mode mode; | 106 | x86Mode mode; |
| 115 | if (ra_hasreg(rb)) { | 107 | if (ra_hasreg(rb)) { |
| 116 | if (ofs == 0 && (rb&7) != RID_EBP) { | 108 | if (LJ_GC64 && rb == RID_RIP) { |
| 109 | mode = XM_OFS0; | ||
| 110 | p -= 4; | ||
| 111 | *(int32_t *)p = ofs; | ||
| 112 | } else if (ofs == 0 && (rb&7) != RID_EBP) { | ||
| 117 | mode = XM_OFS0; | 113 | mode = XM_OFS0; |
| 118 | } else if (checki8(ofs)) { | 114 | } else if (checki8(ofs)) { |
| 119 | *--p = (MCode)ofs; | 115 | *--p = (MCode)ofs; |
| @@ -211,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) | |||
| 211 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | 207 | *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); |
| 212 | rb = RID_ESP; | 208 | rb = RID_ESP; |
| 213 | #endif | 209 | #endif |
| 210 | } else if (LJ_GC64 && rb == RID_RIP) { | ||
| 211 | lua_assert(as->mrm.idx == RID_NONE); | ||
| 212 | mode = XM_OFS0; | ||
| 213 | p -= 4; | ||
| 214 | *(int32_t *)p = as->mrm.ofs; | ||
| 214 | } else { | 215 | } else { |
| 215 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { | 216 | if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { |
| 216 | mode = XM_OFS0; | 217 | mode = XM_OFS0; |
| @@ -264,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) | |||
| 264 | /* Get/set global_State fields. */ | 265 | /* Get/set global_State fields. */ |
| 265 | #define emit_opgl(as, xo, r, field) \ | 266 | #define emit_opgl(as, xo, r, field) \ |
| 266 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) | 267 | emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) |
| 267 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) | 268 | #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) |
| 268 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) | 269 | #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) |
| 269 | 270 | ||
| 270 | #define emit_setvmstate(as, i) \ | 271 | #define emit_setvmstate(as, i) \ |
| 271 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) | 272 | (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) |
| @@ -288,9 +289,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) | |||
| 288 | } | 289 | } |
| 289 | } | 290 | } |
| 290 | 291 | ||
| 292 | #if LJ_GC64 | ||
| 293 | #define dispofs(as, k) \ | ||
| 294 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) | ||
| 295 | #define mcpofs(as, k) \ | ||
| 296 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) | ||
| 297 | #define mctopofs(as, k) \ | ||
| 298 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) | ||
| 299 | /* mov r, addr */ | ||
| 300 | #define emit_loada(as, r, addr) \ | ||
| 301 | emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
| 302 | #else | ||
| 291 | /* mov r, addr */ | 303 | /* mov r, addr */ |
| 292 | #define emit_loada(as, r, addr) \ | 304 | #define emit_loada(as, r, addr) \ |
| 293 | emit_loadi(as, (r), ptr2addr((addr))) | 305 | emit_loadi(as, (r), ptr2addr((addr))) |
| 306 | #endif | ||
| 294 | 307 | ||
| 295 | #if LJ_64 | 308 | #if LJ_64 |
| 296 | /* mov r, imm64 or shorter 32 bit extended load. */ | 309 | /* mov r, imm64 or shorter 32 bit extended load. */ |
| @@ -302,6 +315,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
| 302 | MCode *p = as->mcp; | 315 | MCode *p = as->mcp; |
| 303 | *(int32_t *)(p-4) = (int32_t)u64; | 316 | *(int32_t *)(p-4) = (int32_t)u64; |
| 304 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); | 317 | as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); |
| 318 | #if LJ_GC64 | ||
| 319 | } else if (checki32(dispofs(as, u64))) { | ||
| 320 | emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); | ||
| 321 | } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { | ||
| 322 | /* Since as->realign assumes the code size doesn't change, check | ||
| 323 | ** RIP-relative addressing reachability for both as->mcp and as->mctop. | ||
| 324 | */ | ||
| 325 | emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); | ||
| 326 | #endif | ||
| 305 | } else { /* Full-size 64 bit load. */ | 327 | } else { /* Full-size 64 bit load. */ |
| 306 | MCode *p = as->mcp; | 328 | MCode *p = as->mcp; |
| 307 | *(uint64_t *)(p-8) = u64; | 329 | *(uint64_t *)(p-8) = u64; |
| @@ -313,22 +335,69 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) | |||
| 313 | } | 335 | } |
| 314 | #endif | 336 | #endif |
| 315 | 337 | ||
| 338 | /* op r, [addr] */ | ||
| 339 | static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) | ||
| 340 | { | ||
| 341 | #if LJ_GC64 | ||
| 342 | if (checki32(dispofs(as, addr))) { | ||
| 343 | emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); | ||
| 344 | } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { | ||
| 345 | emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); | ||
| 346 | } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { | ||
| 347 | emit_rmro(as, xo, rr, rr, 0); | ||
| 348 | emit_loadu64(as, rr, (uintptr_t)addr); | ||
| 349 | } else | ||
| 350 | #endif | ||
| 351 | { | ||
| 352 | MCode *p = as->mcp; | ||
| 353 | *(int32_t *)(p-4) = ptr2addr(addr); | ||
| 354 | #if LJ_64 | ||
| 355 | p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); | ||
| 356 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); | ||
| 357 | #else | ||
| 358 | as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); | ||
| 359 | #endif | ||
| 360 | } | ||
| 361 | } | ||
| 362 | |||
| 316 | /* Load 64 bit IR constant into register. */ | 363 | /* Load 64 bit IR constant into register. */ |
| 317 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | 364 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) |
| 318 | { | 365 | { |
| 366 | Reg r64; | ||
| 367 | x86Op xo; | ||
| 319 | const uint64_t *k = &ir_k64(ir)->u64; | 368 | const uint64_t *k = &ir_k64(ir)->u64; |
| 320 | if (rset_test(RSET_FPR, r)) { | 369 | if (rset_test(RSET_FPR, r)) { |
| 321 | if (*k == 0) { | 370 | r64 = r; |
| 322 | emit_rr(as, XO_XORPS, r, r); | 371 | xo = XO_MOVSD; |
| 323 | } else { | ||
| 324 | emit_rma(as, XO_MOVSD, r, k); | ||
| 325 | } | ||
| 326 | } else { | 372 | } else { |
| 327 | if (*k == 0) { | 373 | r64 = r | REX_64; |
| 328 | emit_rr(as, XO_ARITH(XOg_XOR), r, r); | 374 | xo = XO_MOV; |
| 375 | } | ||
| 376 | if (*k == 0) { | ||
| 377 | emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); | ||
| 378 | #if LJ_GC64 | ||
| 379 | } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || | ||
| 380 | (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { | ||
| 381 | emit_rma(as, xo, r64, k); | ||
| 382 | } else { | ||
| 383 | if (ir->i) { | ||
| 384 | lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); | ||
| 385 | } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { | ||
| 386 | emit_loadu64(as, r, *k); | ||
| 387 | return; | ||
| 329 | } else { | 388 | } else { |
| 330 | emit_rma(as, XO_MOV, r | REX_64, k); | 389 | /* If all else fails, add the FP constant at the MCode area bottom. */ |
| 390 | while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; | ||
| 391 | *(uint64_t *)as->mcbot = *k; | ||
| 392 | ir->i = (int32_t)(as->mctop - as->mcbot); | ||
| 393 | as->mcbot += 8; | ||
| 394 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
| 331 | } | 395 | } |
| 396 | emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); | ||
| 397 | #else | ||
| 398 | } else { | ||
| 399 | emit_rma(as, xo, r64, k); | ||
| 400 | #endif | ||
| 332 | } | 401 | } |
| 333 | } | 402 | } |
| 334 | 403 | ||
| @@ -470,9 +539,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |||
| 470 | { | 539 | { |
| 471 | if (ofs) { | 540 | if (ofs) { |
| 472 | if ((as->flags & JIT_F_LEA_AGU)) | 541 | if ((as->flags & JIT_F_LEA_AGU)) |
| 473 | emit_rmro(as, XO_LEA, r, r, ofs); | 542 | emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); |
| 474 | else | 543 | else |
| 475 | emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); | 544 | emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); |
| 476 | } | 545 | } |
| 477 | } | 546 | } |
| 478 | 547 | ||
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 64a9a65d..6d141a20 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
| @@ -1114,8 +1114,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) | |||
| 1114 | { | 1114 | { |
| 1115 | TRef tr, ud, fp; | 1115 | TRef tr, ud, fp; |
| 1116 | if (id) { /* io.func() */ | 1116 | if (id) { /* io.func() */ |
| 1117 | #if LJ_GC64 | ||
| 1118 | /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ | ||
| 1119 | ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); | ||
| 1120 | #else | ||
| 1117 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); | 1121 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); |
| 1118 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); | 1122 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); |
| 1123 | #endif | ||
| 1119 | } else { /* fp:method() */ | 1124 | } else { /* fp:method() */ |
| 1120 | ud = J->base[0]; | 1125 | ud = J->base[0]; |
| 1121 | if (!tref_isudata(ud)) | 1126 | if (!tref_isudata(ud)) |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 4e9c85c7..e77f7b99 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -412,7 +412,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) | |||
| 412 | 412 | ||
| 413 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) | 413 | static LJ_AINLINE uint32_t irt_toitype_(IRType t) |
| 414 | { | 414 | { |
| 415 | lua_assert(!LJ_64 || t != IRT_LIGHTUD); | 415 | lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); |
| 416 | if (LJ_DUALNUM && t > IRT_NUM) { | 416 | if (LJ_DUALNUM && t > IRT_NUM) { |
| 417 | return LJ_TISNUM; | 417 | return LJ_TISNUM; |
| 418 | } else { | 418 | } else { |
| @@ -568,7 +568,11 @@ typedef union IRIns { | |||
| 568 | #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) | 568 | #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) |
| 569 | #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) | 569 | #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) |
| 570 | #define ir_k64(ir) \ | 570 | #define ir_k64(ir) \ |
| 571 | check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, &(ir)[1].tv) | 571 | check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ |
| 572 | (LJ_GC64 && \ | ||
| 573 | ((ir)->o == IR_KGC || \ | ||
| 574 | (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ | ||
| 575 | &(ir)[1].tv) | ||
| 572 | #define ir_kptr(ir) \ | 576 | #define ir_kptr(ir) \ |
| 573 | check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ | 577 | check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ |
| 574 | mref((ir)[LJ_GC64].ptr, void)) | 578 | mref((ir)[LJ_GC64].ptr, void)) |
diff --git a/src/lj_record.c b/src/lj_record.c index f0481050..3c67e1a0 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -976,7 +976,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
| 976 | } | 976 | } |
| 977 | /* The cdata metatable is treated as immutable. */ | 977 | /* The cdata metatable is treated as immutable. */ |
| 978 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; | 978 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; |
| 979 | #if LJ_GC64 | ||
| 980 | /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ | ||
| 981 | ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, | ||
| 982 | GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); | ||
| 983 | #else | ||
| 979 | ix->mt = mix.tab = lj_ir_ktab(J, mt); | 984 | ix->mt = mix.tab = lj_ir_ktab(J, mt); |
| 985 | #endif | ||
| 980 | goto nocheck; | 986 | goto nocheck; |
| 981 | } | 987 | } |
| 982 | ix->mt = mt ? mix.tab : TREF_NIL; | 988 | ix->mt = mt ? mix.tab : TREF_NIL; |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 33c058be..0a08d4d4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
| @@ -623,7 +623,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
| 623 | } | 623 | } |
| 624 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 624 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
| 625 | rs = snap_renameref(T, snapno, ref, rs); | 625 | rs = snap_renameref(T, snapno, ref, rs); |
| 626 | lua_assert(!LJ_GC64); /* TODO_GC64: handle 64 bit references. */ | ||
| 627 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ | 626 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ |
| 628 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 627 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
| 629 | if (irt_isinteger(t)) { | 628 | if (irt_isinteger(t)) { |
| @@ -632,9 +631,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
| 632 | } else if (irt_isnum(t)) { | 631 | } else if (irt_isnum(t)) { |
| 633 | o->u64 = *(uint64_t *)sps; | 632 | o->u64 = *(uint64_t *)sps; |
| 634 | #endif | 633 | #endif |
| 635 | } else if (LJ_64 && irt_islightud(t)) { | 634 | #if LJ_64 && !LJ_GC64 |
| 635 | } else if (irt_islightud(t)) { | ||
| 636 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 636 | /* 64 bit lightuserdata which may escape already has the tag bits. */ |
| 637 | o->u64 = *(uint64_t *)sps; | 637 | o->u64 = *(uint64_t *)sps; |
| 638 | #endif | ||
| 638 | } else { | 639 | } else { |
| 639 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 640 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
| 640 | setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); | 641 | setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); |
| @@ -652,9 +653,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
| 652 | } else if (irt_isnum(t)) { | 653 | } else if (irt_isnum(t)) { |
| 653 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 654 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
| 654 | #endif | 655 | #endif |
| 655 | } else if (LJ_64 && irt_is64(t)) { | 656 | #if LJ_64 && !LJ_GC64 |
| 657 | } else if (irt_is64(t)) { | ||
| 656 | /* 64 bit values that already have the tag bits. */ | 658 | /* 64 bit values that already have the tag bits. */ |
| 657 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | 659 | o->u64 = ex->gpr[r-RID_MIN_GPR]; |
| 660 | #endif | ||
| 658 | } else if (irt_ispri(t)) { | 661 | } else if (irt_ispri(t)) { |
| 659 | setpriV(o, irt_toitype(t)); | 662 | setpriV(o, irt_toitype(t)); |
| 660 | } else { | 663 | } else { |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index e29f4748..d5429597 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | 22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) |
| 23 | #endif | 23 | #endif |
| 24 | #define VRIDDEF(_) \ | 24 | #define VRIDDEF(_) \ |
| 25 | _(MRM) | 25 | _(MRM) _(RIP) |
| 26 | 26 | ||
| 27 | #define RIDENUM(name) RID_##name, | 27 | #define RIDENUM(name) RID_##name, |
| 28 | 28 | ||
| @@ -31,6 +31,7 @@ enum { | |||
| 31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | 31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ |
| 32 | RID_MAX, | 32 | RID_MAX, |
| 33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ | 33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ |
| 34 | RID_RIP = RID_MAX+1, /* Pseudo-id for RIP (x64 only). */ | ||
| 34 | 35 | ||
| 35 | /* Calling conventions. */ | 36 | /* Calling conventions. */ |
| 36 | RID_SP = RID_ESP, | 37 | RID_SP = RID_ESP, |
| @@ -63,8 +64,10 @@ enum { | |||
| 63 | 64 | ||
| 64 | /* -- Register sets ------------------------------------------------------- */ | 65 | /* -- Register sets ------------------------------------------------------- */ |
| 65 | 66 | ||
| 66 | /* Make use of all registers, except the stack pointer. */ | 67 | /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ |
| 67 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) | 68 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ |
| 69 | - RID2RSET(RID_ESP) \ | ||
| 70 | - LJ_GC64*RID2RSET(RID_DISPATCH)) | ||
| 68 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 71 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
| 69 | #define RSET_ALL (RSET_GPR|RSET_FPR) | 72 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
| 70 | #define RSET_INIT RSET_ALL | 73 | #define RSET_INIT RSET_ALL |
| @@ -200,6 +203,7 @@ typedef struct { | |||
| 200 | */ | 203 | */ |
| 201 | typedef enum { | 204 | typedef enum { |
| 202 | /* Fixed length opcodes. XI_* prefix. */ | 205 | /* Fixed length opcodes. XI_* prefix. */ |
| 206 | XI_O16 = 0x66, | ||
| 203 | XI_NOP = 0x90, | 207 | XI_NOP = 0x90, |
| 204 | XI_XCHGa = 0x90, | 208 | XI_XCHGa = 0x90, |
| 205 | XI_CALL = 0xe8, | 209 | XI_CALL = 0xe8, |
| @@ -217,6 +221,7 @@ typedef enum { | |||
| 217 | XI_PUSHi8 = 0x6a, | 221 | XI_PUSHi8 = 0x6a, |
| 218 | XI_TESTb = 0x84, | 222 | XI_TESTb = 0x84, |
| 219 | XI_TEST = 0x85, | 223 | XI_TEST = 0x85, |
| 224 | XI_INT3 = 0xcc, | ||
| 220 | XI_MOVmi = 0xc7, | 225 | XI_MOVmi = 0xc7, |
| 221 | XI_GROUP5 = 0xff, | 226 | XI_GROUP5 = 0xff, |
| 222 | 227 | ||
| @@ -243,6 +248,7 @@ typedef enum { | |||
| 243 | XV_SHRX = XV_f20f38(f7), | 248 | XV_SHRX = XV_f20f38(f7), |
| 244 | 249 | ||
| 245 | /* Variable-length opcodes. XO_* prefix. */ | 250 | /* Variable-length opcodes. XO_* prefix. */ |
| 251 | XO_OR = XO_(0b), | ||
| 246 | XO_MOV = XO_(8b), | 252 | XO_MOV = XO_(8b), |
| 247 | XO_MOVto = XO_(89), | 253 | XO_MOVto = XO_(89), |
| 248 | XO_MOVtow = XO_66(89), | 254 | XO_MOVtow = XO_66(89), |
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 759e30ec..d38ac907 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc | |||
| @@ -2401,8 +2401,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2401 | | movzx RCd, byte [rbp-8] // Reconstruct exit number. | 2401 | | movzx RCd, byte [rbp-8] // Reconstruct exit number. |
| 2402 | | mov RCH, byte [rbp-16] | 2402 | | mov RCH, byte [rbp-16] |
| 2403 | | mov [rbp-8], r15; mov [rbp-16], r14 | 2403 | | mov [rbp-8], r15; mov [rbp-16], r14 |
| 2404 | | // Caveat: DISPATCH is rbx. | 2404 | | // DISPATCH is preserved on-trace in LJ_GC64 mode. |
| 2405 | | mov DISPATCH, [ebp] | ||
| 2406 | | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | 2405 | | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. |
| 2407 | | set_vmstate EXIT | 2406 | | set_vmstate EXIT |
| 2408 | | mov [DISPATCH+DISPATCH_J(exitno)], RCd | 2407 | | mov [DISPATCH+DISPATCH_J(exitno)], RCd |
