aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-05-23 00:25:29 +0200
committerMike Pall <mike>2016-05-23 00:25:29 +0200
commit7fb75ccc4cf17825c1c8fe9f44ebfb0668a1b033 (patch)
tree7317a542402fa2a135b6789c7c4868089d31fd2f
parenta4067978b6d1c2a25d91d82b1b0d384d98abdbe5 (diff)
downloadluajit-7fb75ccc4cf17825c1c8fe9f44ebfb0668a1b033.tar.gz
luajit-7fb75ccc4cf17825c1c8fe9f44ebfb0668a1b033.tar.bz2
luajit-7fb75ccc4cf17825c1c8fe9f44ebfb0668a1b033.zip
Embed 64 bit constants directly in the IR, using two slots.
Contributed by Peter Cawley.
-rw-r--r--src/lj_asm.c17
-rw-r--r--src/lj_asm_x86.h26
-rw-r--r--src/lj_emit_arm.h3
-rw-r--r--src/lj_emit_mips.h4
-rw-r--r--src/lj_emit_ppc.h4
-rw-r--r--src/lj_emit_x86.h22
-rw-r--r--src/lj_gc.c2
-rw-r--r--src/lj_ir.c36
-rw-r--r--src/lj_ir.h15
-rw-r--r--src/lj_iropt.h2
-rw-r--r--src/lj_jit.h4
-rw-r--r--src/lj_opt_fold.c8
-rw-r--r--src/lj_opt_mem.c4
-rw-r--r--src/lj_opt_sink.c2
-rw-r--r--src/lj_opt_split.c2
-rw-r--r--src/lj_record.c5
-rw-r--r--src/lj_snap.c9
17 files changed, 105 insertions, 60 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9b394beb..0b3e770a 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -334,7 +334,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
334 RA_DBGX((as, "remat $i $r", ir, r)); 334 RA_DBGX((as, "remat $i $r", ir, r));
335#if !LJ_SOFTFP 335#if !LJ_SOFTFP
336 if (ir->o == IR_KNUM) { 336 if (ir->o == IR_KNUM) {
337 emit_loadn(as, r, ir_knum(ir)); 337 emit_loadk64(as, r, ir);
338 } else 338 } else
339#endif 339#endif
340 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 340 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@@ -695,15 +695,14 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
695 if (ra_noreg(left)) { 695 if (ra_noreg(left)) {
696 if (irref_isk(lref)) { 696 if (irref_isk(lref)) {
697 if (ir->o == IR_KNUM) { 697 if (ir->o == IR_KNUM) {
698 cTValue *tv = ir_knum(ir);
699 /* FP remat needs a load except for +0. Still better than eviction. */ 698 /* FP remat needs a load except for +0. Still better than eviction. */
700 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 699 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
701 emit_loadn(as, dest, tv); 700 emit_loadk64(as, dest, ir);
702 return; 701 return;
703 } 702 }
704#if LJ_64 703#if LJ_64
705 } else if (ir->o == IR_KINT64) { 704 } else if (ir->o == IR_KINT64) {
706 emit_loadu64(as, dest, ir_kint64(ir)->u64); 705 emit_loadk64(as, dest, ir);
707 return; 706 return;
708#endif 707#endif
709 } else if (ir->o != IR_KPRI) { 708 } else if (ir->o != IR_KPRI) {
@@ -1963,8 +1962,14 @@ static void asm_setup_regsp(ASMState *as)
1963 ra_setup(as); 1962 ra_setup(as);
1964 1963
1965 /* Clear reg/sp for constants. */ 1964 /* Clear reg/sp for constants. */
1966 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 1965 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1967 ir->prev = REGSP_INIT; 1966 ir->prev = REGSP_INIT;
1967 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
1968 /* Make life easier for backends by putting address of constant in i. */
1969 ir->i = (int32_t)(intptr_t)(ir+1);
1970 ir++;
1971 }
1972 }
1968 1973
1969 /* REF_BASE is used for implicit references to the BASE register. */ 1974 /* REF_BASE is used for implicit references to the BASE register. */
1970 lastir->prev = REGSP_HINT(RID_BASE); 1975 lastir->prev = REGSP_HINT(RID_BASE);
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 69d1256e..0361a965 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -306,6 +306,16 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
306 } 306 }
307} 307}
308 308
309/* Fuse load of 64 bit IR constant into memory operand. */
310static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
311{
312 const uint64_t *k = &ir_k64(ir)->u64;
313 as->mrm.ofs = ptr2addr(k);
314 as->mrm.base = RID_NONE;
315 as->mrm.idx = RID_NONE;
316 return RID_MRM;
317}
318
309/* Fuse load into memory operand. */ 319/* Fuse load into memory operand. */
310static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 320static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
311{ 321{
@@ -325,19 +335,13 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
325 if (ir->o == IR_KNUM) { 335 if (ir->o == IR_KNUM) {
326 RegSet avail = as->freeset & ~as->modset & RSET_FPR; 336 RegSet avail = as->freeset & ~as->modset & RSET_FPR;
327 lua_assert(allow != RSET_EMPTY); 337 lua_assert(allow != RSET_EMPTY);
328 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 338 if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
329 as->mrm.ofs = ptr2addr(ir_knum(ir)); 339 return asm_fuseloadk64(as, ir);
330 as->mrm.base = as->mrm.idx = RID_NONE;
331 return RID_MRM;
332 }
333 } else if (ir->o == IR_KINT64) { 340 } else if (ir->o == IR_KINT64) {
334 RegSet avail = as->freeset & ~as->modset & RSET_GPR; 341 RegSet avail = as->freeset & ~as->modset & RSET_GPR;
335 lua_assert(allow != RSET_EMPTY); 342 lua_assert(allow != RSET_EMPTY);
336 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 343 if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
337 as->mrm.ofs = ptr2addr(ir_kint64(ir)); 344 return asm_fuseloadk64(as, ir);
338 as->mrm.base = as->mrm.idx = RID_NONE;
339 return RID_MRM;
340 }
341 } else if (mayfuse(as, ref)) { 345 } else if (mayfuse(as, ref)) {
342 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 346 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
343 if (ir->o == IR_SLOAD) { 347 if (ir->o == IR_SLOAD) {
@@ -711,7 +715,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
711 emit_rr(as, XO_CVTSD2SS, dest, dest); 715 emit_rr(as, XO_CVTSD2SS, dest, dest);
712 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ 716 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
713 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ 717 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
714 emit_loadn(as, bias, k); 718 emit_rma(as, XO_MOVSD, bias, k);
715 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); 719 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
716 return; 720 return;
717 } else { /* Integer to FP conversion. */ 721 } else { /* Integer to FP conversion. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index 47fee5fc..dff9fac4 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
219 219
220#if !LJ_SOFTFP 220#if !LJ_SOFTFP
221/* Load a number constant into an FPR. */ 221/* Load a number constant into an FPR. */
222static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 222static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
223{ 223{
224 cTValue *tv = ir_knum(ir);
224 int32_t i; 225 int32_t i;
225 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { 226 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
226 uint32_t hi = tv->u32.hi; 227 uint32_t hi = tv->u32.hi;
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index fdebe94b..29079ea3 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -112,8 +112,8 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
112 emit_tsi(as, mi, r, base, i); 112 emit_tsi(as, mi, r, base, i);
113} 113}
114 114
115#define emit_loadn(as, r, tv) \ 115#define emit_loadk64(as, r, ir) \
116 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) 116 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
117 117
118/* Get/set global_State fields. */ 118/* Get/set global_State fields. */
119static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) 119static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 4eb933ea..5163012a 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
115 emit_tai(as, pi, r, base, i); 115 emit_tai(as, pi, r, base, i);
116} 116}
117 117
118#define emit_loadn(as, r, tv) \ 118#define emit_loadk64(as, r, ir) \
119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) 119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
120 120
121/* Get/set global_State fields. */ 121/* Get/set global_State fields. */
122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) 122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index cbaf4e85..3d6f13f4 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -313,13 +313,23 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
313} 313}
314#endif 314#endif
315 315
316/* movsd r, [&tv->n] / xorps r, r */ 316/* Load 64 bit IR constant into register. */
317static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 317static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
318{ 318{
319 if (tvispzero(tv)) /* Use xor only for +0. */ 319 const uint64_t *k = &ir_k64(ir)->u64;
320 emit_rr(as, XO_XORPS, r, r); 320 if (rset_test(RSET_FPR, r)) {
321 else 321 if (*k == 0) {
322 emit_rma(as, XO_MOVSD, r, &tv->n); 322 emit_rr(as, XO_XORPS, r, r);
323 } else {
324 emit_rma(as, XO_MOVSD, r, k);
325 }
326 } else {
327 if (*k == 0) {
328 emit_rr(as, XO_ARITH(XOg_XOR), r, r);
329 } else {
330 emit_rma(as, XO_MOV, r | REX_64, k);
331 }
332 }
323} 333}
324 334
325/* -- Emit control-flow instructions -------------------------------------- */ 335/* -- Emit control-flow instructions -------------------------------------- */
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 53f1d974..7c707462 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -238,6 +238,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
238 IRIns *ir = &T->ir[ref]; 238 IRIns *ir = &T->ir[ref];
239 if (ir->o == IR_KGC) 239 if (ir->o == IR_KGC)
240 gc_markobj(g, ir_kgc(ir)); 240 gc_markobj(g, ir_kgc(ir));
241 if (irt_is64(ir->t) && ir->o != IR_KNULL)
242 ref++;
241 } 243 }
242 if (T->link) gc_marktrace(g, T->link); 244 if (T->link) gc_marktrace(g, T->link);
243 if (T->nextroot) gc_marktrace(g, T->nextroot); 245 if (T->nextroot) gc_marktrace(g, T->nextroot);
diff --git a/src/lj_ir.c b/src/lj_ir.c
index acb39463..124d5791 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -91,7 +91,7 @@ static void lj_ir_growbot(jit_State *J)
91 IRIns *baseir = J->irbuf + J->irbotlim; 91 IRIns *baseir = J->irbuf + J->irbotlim;
92 MSize szins = J->irtoplim - J->irbotlim; 92 MSize szins = J->irtoplim - J->irbotlim;
93 lua_assert(szins != 0); 93 lua_assert(szins != 0);
94 lua_assert(J->cur.nk == J->irbotlim); 94 lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
95 if (J->cur.nins + (szins >> 1) < J->irtoplim) { 95 if (J->cur.nins + (szins >> 1) < J->irtoplim) {
96 /* More than half of the buffer is free on top: shift up by a quarter. */ 96 /* More than half of the buffer is free on top: shift up by a quarter. */
97 MSize ofs = szins >> 2; 97 MSize ofs = szins >> 2;
@@ -173,6 +173,18 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
173 return ref; 173 return ref;
174} 174}
175 175
176/* Get ref of next 64 bit IR constant and optionally grow IR.
177** Note: this may invalidate all IRIns *!
178*/
179static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
180{
181 IRRef ref = J->cur.nk - 2;
182 lua_assert(J->state != LJ_TRACE_ASM);
183 if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
184 J->cur.nk = ref;
185 return ref;
186}
187
176/* Intern int32_t constant. */ 188/* Intern int32_t constant. */
177TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) 189TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
178{ 190{
@@ -266,19 +278,18 @@ TValue *lj_ir_k64_reserve(jit_State *J)
266 return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ 278 return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */
267} 279}
268 280
269/* Intern 64 bit constant, given by its address. */ 281/* Intern 64 bit constant, given by its 64 bit pattern. */
270TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) 282TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
271{ 283{
272 IRIns *ir, *cir = J->cur.ir; 284 IRIns *ir, *cir = J->cur.ir;
273 IRRef ref; 285 IRRef ref;
274 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; 286 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
275 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 287 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
276 if (ir_k64(&cir[ref]) == tv) 288 if (ir_k64(&cir[ref])->u64 == u64)
277 goto found; 289 goto found;
278 ref = ir_nextk(J); 290 ref = ir_nextk64(J);
279 ir = IR(ref); 291 ir = IR(ref);
280 lua_assert(checkptrGC(tv)); 292 ir[1].tv.u64 = u64;
281 setmref(ir->ptr, tv);
282 ir->t.irt = t; 293 ir->t.irt = t;
283 ir->o = op; 294 ir->o = op;
284 ir->prev = J->chain[op]; 295 ir->prev = J->chain[op];
@@ -290,13 +301,13 @@ found:
290/* Intern FP constant, given by its 64 bit pattern. */ 301/* Intern FP constant, given by its 64 bit pattern. */
291TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) 302TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
292{ 303{
293 return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); 304 return lj_ir_k64(J, IR_KNUM, u64);
294} 305}
295 306
296/* Intern 64 bit integer constant. */ 307/* Intern 64 bit integer constant. */
297TRef lj_ir_kint64(jit_State *J, uint64_t u64) 308TRef lj_ir_kint64(jit_State *J, uint64_t u64)
298{ 309{
299 return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); 310 return lj_ir_k64(J, IR_KINT64, u64);
300} 311}
301 312
302/* Check whether a number is int and return it. -0 is NOT considered an int. */ 313/* Check whether a number is int and return it. -0 is NOT considered an int. */
@@ -367,7 +378,7 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
367 IRRef ref; 378 IRRef ref;
368 lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); 379 lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr);
369 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 380 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
370 if (mref(cir[ref].ptr, void) == ptr) 381 if (ir_kptr(&cir[ref]) == ptr)
371 goto found; 382 goto found;
372 ref = ir_nextk(J); 383 ref = ir_nextk(J);
373 ir = IR(ref); 384 ir = IR(ref);
@@ -432,9 +443,8 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
432 case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; 443 case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
433 case IR_KINT: setintV(tv, ir->i); break; 444 case IR_KINT: setintV(tv, ir->i); break;
434 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; 445 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
435 case IR_KPTR: case IR_KKPTR: case IR_KNULL: 446 case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break;
436 setlightudV(tv, mref(ir->ptr, void)); 447 case IR_KNULL: setlightudV(tv, NULL); break;
437 break;
438 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; 448 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
439#if LJ_HASFFI 449#if LJ_HASFFI
440 case IR_KINT64: { 450 case IR_KINT64: {
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 8a655b64..03377ec1 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -522,7 +522,9 @@ typedef uint32_t TRef;
522** +-------+-------+---+---+---+---+ 522** +-------+-------+---+---+---+---+
523** | op1 | op2 | t | o | r | s | 523** | op1 | op2 | t | o | r | s |
524** +-------+-------+---+---+---+---+ 524** +-------+-------+---+---+---+---+
525** | op12/i/gco | ot | prev | (alternative fields in union) 525** | op12/i/gco32 | ot | prev | (alternative fields in union)
526** +-------+-------+---+---+---+---+
527** | TValue/gco64 | (2nd IR slot for 64 bit constants)
526** +---------------+-------+-------+ 528** +---------------+-------+-------+
527** 32 16 16 529** 32 16 16
528** 530**
@@ -550,8 +552,9 @@ typedef union IRIns {
550 ) 552 )
551 }; 553 };
552 int32_t i; /* 32 bit signed integer literal (overlaps op12). */ 554 int32_t i; /* 32 bit signed integer literal (overlaps op12). */
553 GCRef gcr; /* GCobj constant (overlaps op12). */ 555 GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
554 MRef ptr; /* Pointer constant (overlaps op12). */ 556 MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
557 TValue tv; /* TValue constant (overlaps entire slot). */
555} IRIns; 558} IRIns;
556 559
557/* TODO_GC64: major changes required. */ 560/* TODO_GC64: major changes required. */
@@ -560,10 +563,10 @@ typedef union IRIns {
560#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) 563#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
561#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) 564#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
562#define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) 565#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
563#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) 566#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
564#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) 567#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
565#define ir_k64(ir) \ 568#define ir_k64(ir) \
566 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) 569 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, &(ir)[1].tv)
567#define ir_kptr(ir) \ 570#define ir_kptr(ir) \
568 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) 571 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
569 572
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index fdc5f0d2..219d391a 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -41,7 +41,7 @@ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
41/* Interning of constants. */ 41/* Interning of constants. */
42LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); 42LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
43LJ_FUNC void lj_ir_k64_freeall(jit_State *J); 43LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
44LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); 44LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
45LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J); 45LJ_FUNC TValue *lj_ir_k64_reserve(jit_State *J);
46LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); 46LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
47LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); 47LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
diff --git a/src/lj_jit.h b/src/lj_jit.h
index eafbc327..e9ab319e 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -355,8 +355,8 @@ enum {
355/* Fold state is used to fold instructions on-the-fly. */ 355/* Fold state is used to fold instructions on-the-fly. */
356typedef struct FoldState { 356typedef struct FoldState {
357 IRIns ins; /* Currently emitted instruction. */ 357 IRIns ins; /* Currently emitted instruction. */
358 IRIns left; /* Instruction referenced by left operand. */ 358 IRIns left[2]; /* Instruction referenced by left operand. */
359 IRIns right; /* Instruction referenced by right operand. */ 359 IRIns right[2]; /* Instruction referenced by right operand. */
360} FoldState; 360} FoldState;
361 361
362/* JIT compiler state. */ 362/* JIT compiler state. */
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index c102f2db..73a368ed 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -136,8 +136,8 @@
136/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
137#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
138#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
139#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
140#define fright (&J->fold.right) 140#define fright (J->fold.right)
141#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
142#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
143 143
@@ -2393,10 +2393,14 @@ retry:
2393 if (fins->op1 >= J->cur.nk) { 2393 if (fins->op1 >= J->cur.nk) {
2394 key += (uint32_t)IR(fins->op1)->o << 10; 2394 key += (uint32_t)IR(fins->op1)->o << 10;
2395 *fleft = *IR(fins->op1); 2395 *fleft = *IR(fins->op1);
2396 if (fins->op1 < REF_TRUE)
2397 fleft[1] = IR(fins->op1)[1];
2396 } 2398 }
2397 if (fins->op2 >= J->cur.nk) { 2399 if (fins->op2 >= J->cur.nk) {
2398 key += (uint32_t)IR(fins->op2)->o; 2400 key += (uint32_t)IR(fins->op2)->o;
2399 *fright = *IR(fins->op2); 2401 *fright = *IR(fins->op2);
2402 if (fins->op2 < REF_TRUE)
2403 fright[1] = IR(fins->op2)[1];
2400 } else { 2404 } else {
2401 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2405 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2402 } 2406 }
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 5549b0d0..92ecbb48 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -22,8 +22,8 @@
22/* Some local macros to save typing. Undef'd at the end. */ 22/* Some local macros to save typing. Undef'd at the end. */
23#define IR(ref) (&J->cur.ir[(ref)]) 23#define IR(ref) (&J->cur.ir[(ref)])
24#define fins (&J->fold.ins) 24#define fins (&J->fold.ins)
25#define fleft (&J->fold.left) 25#define fleft (J->fold.left)
26#define fright (&J->fold.right) 26#define fright (J->fold.right)
27 27
28/* 28/*
29** Caveat #1: return value is not always a TRef -- only use with tref_ref(). 29** Caveat #1: return value is not always a TRef -- only use with tref_ref().
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index 49e13784..1b775f2d 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -220,6 +220,8 @@ static void sink_sweep_ins(jit_State *J)
220 for (ir = IR(J->cur.nk); ir < irbase; ir++) { 220 for (ir = IR(J->cur.nk); ir < irbase; ir++) {
221 irt_clearmark(ir->t); 221 irt_clearmark(ir->t);
222 ir->prev = REGSP_INIT; 222 ir->prev = REGSP_INIT;
223 if (irt_is64(ir->t) && ir->o != IR_KNULL)
224 ir++;
223 } 225 }
224} 226}
225 227
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 49c9ae47..19818660 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -354,6 +354,8 @@ static void split_ir(jit_State *J)
354 ir->prev = ref; /* Identity substitution for loword. */ 354 ir->prev = ref; /* Identity substitution for loword. */
355 hisubst[ref] = 0; 355 hisubst[ref] = 0;
356 } 356 }
357 if (irt_is64(ir->t) && ir->o != IR_KNULL)
358 ref++;
357 } 359 }
358 360
359 /* Process old IR instructions. */ 361 /* Process old IR instructions. */
diff --git a/src/lj_record.c b/src/lj_record.c
index b5fb6649..3b754897 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -61,7 +61,10 @@ static void rec_check_ir(jit_State *J)
61 case IRMref: lua_assert(op1 >= nk); 61 case IRMref: lua_assert(op1 >= nk);
62 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; 62 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
63 case IRMlit: break; 63 case IRMlit: break;
64 case IRMcst: lua_assert(i < REF_BIAS); continue; 64 case IRMcst: lua_assert(i < REF_BIAS);
65 if (irt_is64(ir->t) && ir->o != IR_KNULL)
66 i++;
67 continue;
65 } 68 }
66 switch (irm_op2(mode)) { 69 switch (irm_op2(mode)) {
67 case IRMnone: lua_assert(op2 == 0); break; 70 case IRMnone: lua_assert(op2 == 0); break;
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 8638d9ed..6199b1f0 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -371,8 +371,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
371 case IR_KPRI: return TREF_PRI(irt_type(ir->t)); 371 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
372 case IR_KINT: return lj_ir_kint(J, ir->i); 372 case IR_KINT: return lj_ir_kint(J, ir->i);
373 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); 373 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
374 case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); 374 case IR_KNUM: case IR_KINT64:
375 case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); 375 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
376 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ 376 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
377 default: lua_assert(0); return TREF_NIL; break; 377 default: lua_assert(0); return TREF_NIL; break;
378 } 378 }
@@ -555,8 +555,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
555 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { 555 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
556 uint64_t k = (uint32_t)T->ir[irs->op2].i + 556 uint64_t k = (uint32_t)T->ir[irs->op2].i +
557 ((uint64_t)T->ir[(irs+1)->op2].i << 32); 557 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
558 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, 558 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
559 lj_ir_k64_find(J, k));
560 } else { 559 } else {
561 val = emitir_raw(IRT(IR_HIOP, t), val, 560 val = emitir_raw(IRT(IR_HIOP, t), val,
562 snap_pref(J, T, map, nent, seen, (irs+1)->op2)); 561 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@@ -651,7 +650,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
651 uint64_t tmp; 650 uint64_t tmp;
652 if (irref_isk(ref)) { 651 if (irref_isk(ref)) {
653 if (ir->o == IR_KNUM || ir->o == IR_KINT64) { 652 if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
654 src = mref(ir->ptr, int32_t); 653 src = (int32_t *)&ir[1];
655 } else if (sz == 8) { 654 } else if (sz == 8) {
656 tmp = (uint64_t)(uint32_t)ir->i; 655 tmp = (uint64_t)(uint32_t)ir->i;
657 src = (int32_t *)&tmp; 656 src = (int32_t *)&tmp;