aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-02-03 04:13:51 +0100
committerMike Pall <mike>2011-02-03 04:22:27 +0100
commitdf65b8b419c12327254dec0df116c62525aaabad (patch)
treeb4ba6ea2841692123b49b3033420dbb7282cbcd6
parent1027018b2135caf45057c3d3b3da03ffb0c6add3 (diff)
downloadluajit-df65b8b419c12327254dec0df116c62525aaabad.tar.gz
luajit-df65b8b419c12327254dec0df116c62525aaabad.tar.bz2
luajit-df65b8b419c12327254dec0df116c62525aaabad.zip
FFI: Rename IR_CNEWP to IR_CNEWI and use it to box 64 bit integers.
Generates smaller IR and DCE eliminates many intermediate boxes. Needs allocation sinking to eliminate the boxes kept alive by PHIs.
-rw-r--r--src/lj_asm.c67
-rw-r--r--src/lj_crecord.c70
-rw-r--r--src/lj_ir.h10
-rw-r--r--src/lj_opt_fold.c25
-rw-r--r--src/lj_opt_split.c50
5 files changed, 140 insertions, 82 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 8864c9a3..77b55f0c 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2518,7 +2518,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
2518{ 2518{
2519 CTState *cts = ctype_ctsG(J2G(as->J)); 2519 CTState *cts = ctype_ctsG(J2G(as->J));
2520 CTypeID typeid = (CTypeID)IR(ir->op1)->i; 2520 CTypeID typeid = (CTypeID)IR(ir->op1)->i;
2521 CTSize sz = (ir->o == IR_CNEWP || ir->op2 == REF_NIL) ? 2521 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
2522 lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i; 2522 lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i;
2523 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 2523 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
2524 IRRef args[2]; 2524 IRRef args[2];
@@ -2529,33 +2529,45 @@ static void asm_cnew(ASMState *as, IRIns *ir)
2529 as->gcsteps++; 2529 as->gcsteps++;
2530 asm_setupresult(as, ir, ci); /* GCcdata * */ 2530 asm_setupresult(as, ir, ci); /* GCcdata * */
2531 2531
2532 /* Initialize pointer cdata object. */ 2532 /* Initialize immutable cdata object. */
2533 if (ir->o == IR_CNEWP) { 2533 if (ir->o == IR_CNEWI) {
2534 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
2535#if LJ_64
2536 Reg r64 = sz == 8 ? REX_64 : 0;
2534 if (irref_isk(ir->op2)) { 2537 if (irref_isk(ir->op2)) {
2535 IRIns *irk = IR(ir->op2); 2538 IRIns *irk = IR(ir->op2);
2536#if LJ_64 2539 uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
2537 if (irk->o == IR_KINT64) { 2540 (uint64_t)(uint32_t)irk->i;
2538 uint64_t k = ir_k64(irk)->u64; 2541 if (sz == 4 || checki32((int64_t)k)) {
2539 lua_assert(sz == 8); 2542 emit_i32(as, (int32_t)k);
2540 if (checki32((int64_t)k)) { 2543 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
2541 emit_i32(as, (int32_t)k);
2542 emit_rmro(as, XO_MOVmi, REX_64, RID_RET, sizeof(GCcdata));
2543 } else {
2544 emit_movtomro(as, RID_ECX|REX_64, RID_RET, sizeof(GCcdata));
2545 emit_loadu64(as, RID_ECX, k);
2546 }
2547 } else { 2544 } else {
2548#endif 2545 emit_movtomro(as, RID_ECX + r64, RID_RET, sizeof(GCcdata));
2549 lua_assert(sz == 4); 2546 emit_loadu64(as, RID_ECX, k);
2550 emit_movmroi(as, RID_RET, sizeof(GCcdata), irk->i);
2551#if LJ_64
2552 } 2547 }
2553#endif
2554 } else { 2548 } else {
2555 Reg r = ra_alloc1(as, ir->op2, (RSET_GPR & ~RSET_SCRATCH)); 2549 Reg r = ra_alloc1(as, ir->op2, allow);
2556 emit_movtomro(as, r + ((LJ_64 && sz == 8) ? REX_64 : 0), 2550 emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata));
2557 RID_RET, sizeof(GCcdata)); 2551 }
2552#else
2553 int32_t ofs = sizeof(GCcdata);
2554 if (LJ_HASFFI && sz == 8) {
2555 ofs += 4; ir++;
2556 lua_assert(ir->o == IR_HIOP);
2558 } 2557 }
2558 do {
2559 if (irref_isk(ir->op2)) {
2560 emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i);
2561 } else {
2562 Reg r = ra_alloc1(as, ir->op2, allow);
2563 emit_movtomro(as, r, RID_RET, ofs);
2564 rset_clear(allow, r);
2565 }
2566 if (!LJ_HASFFI || ofs == sizeof(GCcdata)) break;
2567 ofs -= 4; ir--;
2568 } while (1);
2569#endif
2570 lua_assert(sz == 4 || (sz == 8 && (LJ_64 || LJ_HASFFI)));
2559 } 2571 }
2560 2572
2561 /* Combine initialization of marked, gct and typeid. */ 2573 /* Combine initialization of marked, gct and typeid. */
@@ -3289,6 +3301,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
3289 if (!uselo) 3301 if (!uselo)
3290 ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ 3302 ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */
3291 break; 3303 break;
3304 case IR_CNEWI:
3305 /* Nothing to do here. Handled by CNEWI itself. */
3306 break;
3292 default: lua_assert(0); break; 3307 default: lua_assert(0); break;
3293 } 3308 }
3294#else 3309#else
@@ -4057,7 +4072,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
4057 case IR_SNEW: asm_snew(as, ir); break; 4072 case IR_SNEW: asm_snew(as, ir); break;
4058 case IR_TNEW: asm_tnew(as, ir); break; 4073 case IR_TNEW: asm_tnew(as, ir); break;
4059 case IR_TDUP: asm_tdup(as, ir); break; 4074 case IR_TDUP: asm_tdup(as, ir); break;
4060 case IR_CNEW: case IR_CNEWP: asm_cnew(as, ir); break; 4075 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
4061 4076
4062 /* Write barriers. */ 4077 /* Write barriers. */
4063 case IR_TBAR: asm_tbar(as, ir); break; 4078 case IR_TBAR: asm_tbar(as, ir); break;
@@ -4164,8 +4179,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T)
4164 } 4179 }
4165#if LJ_32 && LJ_HASFFI 4180#if LJ_32 && LJ_HASFFI
4166 case IR_HIOP: 4181 case IR_HIOP:
4167 if ((ir-1)->o == IR_CALLN) 4182 if ((ir-1)->o == IR_CALLN) {
4168 ir->prev = REGSP_HINT(RID_RETHI); 4183 ir->prev = REGSP_HINT(RID_RETHI);
4184 continue;
4185 }
4169 break; 4186 break;
4170#endif 4187#endif
4171 /* C calls evict all scratch regs and return results in RID_RET. */ 4188 /* C calls evict all scratch regs and return results in RID_RET. */
@@ -4174,7 +4191,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T)
4174 if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ 4191 if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */
4175 as->evenspill = 3; 4192 as->evenspill = 3;
4176#endif 4193#endif
4177 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWP: case IR_TOSTR: 4194 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
4178 ir->prev = REGSP_HINT(RID_RET); 4195 ir->prev = REGSP_HINT(RID_RET);
4179 if (inloop) 4196 if (inloop)
4180 as->modset = RSET_SCRATCH; 4197 as->modset = RSET_SCRATCH;
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index cd5c7d49..1ba98ae8 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -137,7 +137,7 @@ static int crec_isnonzero(CType *s, void *p)
137 } 137 }
138} 138}
139 139
140static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, 140static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
141 void *svisnz) 141 void *svisnz)
142{ 142{
143 CTSize dsize = d->size, ssize = s->size; 143 CTSize dsize = d->size, ssize = s->size;
@@ -190,6 +190,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
190#endif 190#endif
191 xstore: 191 xstore:
192 if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); 192 if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J);
193 if (dp == 0) return sp;
193 emitir(IRT(IR_XSTORE, dt), dp, sp); 194 emitir(IRT(IR_XSTORE, dt), dp, sp);
194 break; 195 break;
195 case CCX(I, C): 196 case CCX(I, C):
@@ -290,6 +291,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
290 lj_trace_err(J, LJ_TRERR_NYICONV); 291 lj_trace_err(J, LJ_TRERR_NYICONV);
291 break; 292 break;
292 } 293 }
294 return 0;
293} 295}
294 296
295/* -- Convert C type to TValue (load) ------------------------------------- */ 297/* -- Convert C type to TValue (load) ------------------------------------- */
@@ -306,21 +308,18 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp)
306 goto err_nyi; /* NYI: copyval of >64 bit integers. */ 308 goto err_nyi; /* NYI: copyval of >64 bit integers. */
307 tr = emitir(IRT(IR_XLOAD, t), sp, 0); 309 tr = emitir(IRT(IR_XLOAD, t), sp, 0);
308 if (t == IRT_FLOAT || t == IRT_U32) { /* Keep uint32_t/float as numbers. */ 310 if (t == IRT_FLOAT || t == IRT_U32) { /* Keep uint32_t/float as numbers. */
309 tr = emitconv(tr, IRT_NUM, t, 0); 311 return emitconv(tr, IRT_NUM, t, 0);
310 } else if (t == IRT_I64 || t == IRT_U64) { /* Box 64 bit integer. */ 312 } else if (t == IRT_I64 || t == IRT_U64) { /* Box 64 bit integer. */
311 TRef dp = emitir(IRTG(IR_CNEW, IRT_CDATA), lj_ir_kint(J, sid), TREF_NIL); 313 sp = tr;
312 TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp,
313 lj_ir_kintp(J, sizeof(GCcdata)));
314 emitir(IRT(IR_XSTORE, t), ptr, tr);
315 lj_needsplit(J); 314 lj_needsplit(J);
316 return dp;
317 } else if ((sinfo & CTF_BOOL)) { 315 } else if ((sinfo & CTF_BOOL)) {
318 /* Assume not equal to zero. Fixup and emit pending guard later. */ 316 /* Assume not equal to zero. Fixup and emit pending guard later. */
319 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); 317 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0));
320 J->postproc = LJ_POST_FIXGUARD; 318 J->postproc = LJ_POST_FIXGUARD;
321 tr = TREF_TRUE; 319 return TREF_TRUE;
320 } else {
321 return tr;
322 } 322 }
323 return tr;
324 } else if (ctype_isptr(sinfo)) { 323 } else if (ctype_isptr(sinfo)) {
325 IRType t = (LJ_64 && s->size == 8) ? IRT_P64 : IRT_P32; 324 IRType t = (LJ_64 && s->size == 8) ? IRT_P64 : IRT_P32;
326 sp = emitir(IRT(IR_XLOAD, t), sp, 0); 325 sp = emitir(IRT(IR_XLOAD, t), sp, 0);
@@ -345,13 +344,13 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp)
345 err_nyi: 344 err_nyi:
346 lj_trace_err(J, LJ_TRERR_NYICONV); 345 lj_trace_err(J, LJ_TRERR_NYICONV);
347 } 346 }
348 /* Box pointer or ref. */ 347 /* Box pointer, ref or 64 bit integer. */
349 return emitir(IRTG(IR_CNEWP, IRT_CDATA), lj_ir_kint(J, sid), sp); 348 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, sid), sp);
350} 349}
351 350
352/* -- Convert TValue to C type (store) ------------------------------------ */ 351/* -- Convert TValue to C type (store) ------------------------------------ */
353 352
354static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) 353static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
355{ 354{
356 CTState *cts = ctype_ctsG(J2G(J)); 355 CTState *cts = ctype_ctsG(J2G(J));
357 CTypeID sid = CTID_P_VOID; 356 CTypeID sid = CTID_P_VOID;
@@ -402,6 +401,12 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
402 } else { 401 } else {
403 goto doconv; /* The pointer value was loaded, don't load number. */ 402 goto doconv; /* The pointer value was loaded, don't load number. */
404 } 403 }
404
405 } else if (ctype_isnum(s->info) && s->size == 8) {
406 IRType t = (s->info & CTF_UNSIGNED) ? IRT_U64 : IRT_I64;
407 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_INT64);
408 lj_needsplit(J);
409 goto doconv;
405 } else { 410 } else {
406 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCcdata))); 411 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCcdata)));
407 } 412 }
@@ -418,7 +423,7 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
418 s = ctype_get(cts, sid); 423 s = ctype_get(cts, sid);
419doconv: 424doconv:
420 if (ctype_isenum(d->info)) d = ctype_child(cts, d); 425 if (ctype_isenum(d->info)) d = ctype_child(cts, d);
421 crec_ct_ct(J, d, s, dp, sp, svisnz); 426 return crec_ct_ct(J, d, s, dp, sp, svisnz);
422} 427}
423 428
424/* -- C data metamethods -------------------------------------------------- */ 429/* -- C data metamethods -------------------------------------------------- */
@@ -578,15 +583,18 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
578 CTState *cts = ctype_ctsG(J2G(J)); 583 CTState *cts = ctype_ctsG(J2G(J));
579 CTSize sz; 584 CTSize sz;
580 CTInfo info = lj_ctype_info(cts, id, &sz); 585 CTInfo info = lj_ctype_info(cts, id, &sz);
586 CType *d = ctype_raw(cts, id);
581 TRef trid; 587 TRef trid;
582 if (sz == 0 || sz > 64 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 588 if (sz == 0 || sz > 64 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN)
583 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ 589 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
584 trid = lj_ir_kint(J, id); 590 trid = lj_ir_kint(J, id);
585 if (ctype_isptr(info)) { 591 /* Use special instruction to box pointer or 64 bit integer. */
586 TRef sp = J->base[1] ? J->base[1] : lj_ir_kptr(J, NULL); 592 if (ctype_isptr(info) || (ctype_isnum(info) && sz == 8)) {
587 J->base[0] = emitir(IRTG(IR_CNEWP, IRT_CDATA), trid, sp); 593 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
594 ctype_isptr(info) ? lj_ir_kptr(J, NULL) :
595 (lj_needsplit(J), lj_ir_kint64(J, 0));
596 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
588 } else { 597 } else {
589 CType *d = ctype_raw(cts, id);
590 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 598 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL);
591 J->base[0] = trcd; 599 J->base[0] = trcd;
592 if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(d, &rd->argv[1])) { 600 if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(d, &rd->argv[1])) {
@@ -598,7 +606,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
598 TValue tv; 606 TValue tv;
599 TValue *sval = &tv; 607 TValue *sval = &tv;
600 MSize i; 608 MSize i;
601 setnumV(&tv, 0); 609 tv.u64 = 0;
602 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 610 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))
603 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 611 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */
604 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 612 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
@@ -645,11 +653,16 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
645 } 653 }
646 } 654 }
647 } else { 655 } else {
648 TRef sp, dp; 656 TRef dp;
649 single_init: 657 single_init:
650 sp = J->base[1] ? J->base[1] : lj_ir_kint(J, 0);
651 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); 658 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
652 crec_ct_tv(J, d, dp, sp, &rd->argv[1]); 659 if (J->base[1]) {
660 crec_ct_tv(J, d, dp, J->base[1], &rd->argv[1]);
661 } else {
662 TValue tv;
663 tv.u64 = 0;
664 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
665 }
653 } 666 }
654 } 667 }
655} 668}
@@ -669,7 +682,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
669 if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { 682 if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
670 IRType dt; 683 IRType dt;
671 CTypeID id; 684 CTypeID id;
672 TRef tr, dp, ptr; 685 TRef tr;
673 MSize i; 686 MSize i;
674 lj_needsplit(J); 687 lj_needsplit(J);
675 if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || 688 if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) ||
@@ -702,10 +715,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
702 } else { 715 } else {
703 tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, dt), sp[0], sp[1]); 716 tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, dt), sp[0], sp[1]);
704 } 717 }
705 dp = emitir(IRTG(IR_CNEW, IRT_CDATA), lj_ir_kint(J, id), TREF_NIL); 718 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
706 ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, sizeof(GCcdata)));
707 emitir(IRT(IR_XSTORE, dt), ptr, tr);
708 return dp;
709 } 719 }
710 return 0; 720 return 0;
711} 721}
@@ -767,7 +777,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
767 tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); 777 tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr);
768 id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)), 778 id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)),
769 CTSIZE_PTR); 779 CTSIZE_PTR);
770 return emitir(IRTG(IR_CNEWP, IRT_CDATA), lj_ir_kint(J, id), tr); 780 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
771 } 781 }
772} 782}
773 783
@@ -787,6 +797,11 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
787 IRType t = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; 797 IRType t = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32;
788 if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); 798 if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
789 tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_PTR); 799 tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_PTR);
800 } else if (ctype_isnum(ct->info) && ct->size == 8) {
801 IRType t = (ct->info & CTF_UNSIGNED) ? IRT_U64 : IRT_I64;
802 tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_INT64);
803 lj_needsplit(J);
804 goto ok;
790 } else { 805 } else {
791 tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata))); 806 tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata)));
792 } 807 }
@@ -807,6 +822,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
807 } else if (!tref_isnum(tr)) { 822 } else if (!tref_isnum(tr)) {
808 goto err_type; 823 goto err_type;
809 } 824 }
825 ok:
810 s[i] = ct; 826 s[i] = ct;
811 sp[i] = tr; 827 sp[i] = tr;
812 } 828 }
diff --git a/src/lj_ir.h b/src/lj_ir.h
index dfafc5db..bde0ac04 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -113,7 +113,7 @@
113 _(TNEW, AW, lit, lit) \ 113 _(TNEW, AW, lit, lit) \
114 _(TDUP, AW, ref, ___) \ 114 _(TDUP, AW, ref, ___) \
115 _(CNEW, AW, ref, ref) \ 115 _(CNEW, AW, ref, ref) \
116 _(CNEWP, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 116 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
117 \ 117 \
118 /* Write barriers. */ \ 118 /* Write barriers. */ \
119 _(TBAR, S , ref, ___) \ 119 _(TBAR, S , ref, ___) \
@@ -188,7 +188,9 @@ IRFPMDEF(FPMENUM)
188 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ 188 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
189 _(UDATA_FILE, sizeof(GCudata)) \ 189 _(UDATA_FILE, sizeof(GCudata)) \
190 _(CDATA_TYPEID, offsetof(GCcdata, typeid)) \ 190 _(CDATA_TYPEID, offsetof(GCcdata, typeid)) \
191 _(CDATA_PTR, sizeof(GCcdata)) 191 _(CDATA_PTR, sizeof(GCcdata)) \
192 _(CDATA_INT64, sizeof(GCcdata)) \
193 _(CDATA_INT64HI, sizeof(GCcdata) + 4)
192 194
193typedef enum { 195typedef enum {
194#define FLENUM(name, ofs) IRFL_##name, 196#define FLENUM(name, ofs) IRFL_##name,
@@ -588,12 +590,12 @@ typedef union IRIns {
588#define ir_kptr(ir) \ 590#define ir_kptr(ir) \
589 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) 591 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
590 592
593LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);
594
591/* A store or any other op with a non-weak guard has a side-effect. */ 595/* A store or any other op with a non-weak guard has a side-effect. */
592static LJ_AINLINE int ir_sideeff(IRIns *ir) 596static LJ_AINLINE int ir_sideeff(IRIns *ir)
593{ 597{
594 return (((ir->t.irt | ~IRT_GUARD) & lj_ir_mode[ir->o]) >= IRM_S); 598 return (((ir->t.irt | ~IRT_GUARD) & lj_ir_mode[ir->o]) >= IRM_S);
595} 599}
596 600
597LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);
598
599#endif 601#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index c3b0a082..28758013 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -154,7 +154,7 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
154#define gcstep_barrier(J, ref) \ 154#define gcstep_barrier(J, ref) \
155 ((ref) < J->chain[IR_LOOP] && \ 155 ((ref) < J->chain[IR_LOOP] && \
156 (J->chain[IR_SNEW] || J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 156 (J->chain[IR_SNEW] || J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
157 J->chain[IR_CNEW] || J->chain[IR_CNEWP] || J->chain[IR_TOSTR])) 157 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
158 158
159/* -- Constant folding for FP numbers ------------------------------------- */ 159/* -- Constant folding for FP numbers ------------------------------------- */
160 160
@@ -307,7 +307,7 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
307 case IR_BOR: k1 |= k2; break; 307 case IR_BOR: k1 |= k2; break;
308 case IR_BXOR: k1 ^= k2; break; 308 case IR_BXOR: k1 ^= k2; break;
309#endif 309#endif
310 default: lua_assert(0); break; 310 default: UNUSED(k2); lua_assert(0); break;
311 } 311 }
312 return k1; 312 return k1;
313} 313}
@@ -1765,18 +1765,28 @@ LJFOLDF(fload_cdata_typeid_kgc)
1765 return NEXTFOLD; 1765 return NEXTFOLD;
1766} 1766}
1767 1767
1768/* The content of int64 cdata objects is immutable. */
1769LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1770LJFOLDF(fload_cdata_int64_kgc)
1771{
1772 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1773 return INT64FOLD(*(uint64_t *)cdataptr(ir_kcdata(fleft)));
1774 return NEXTFOLD;
1775}
1776
1768LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID) 1777LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID)
1769LJFOLD(FLOAD CNEWP IRFL_CDATA_TYPEID) 1778LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID)
1770LJFOLDF(fload_cdata_typeid_cnew) 1779LJFOLDF(fload_cdata_typeid_cnew)
1771{ 1780{
1772 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) 1781 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1773 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWP op1 is const. */ 1782 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1774 return NEXTFOLD; 1783 return NEXTFOLD;
1775} 1784}
1776 1785
1777/* Pointer cdata objects are immutable. */ 1786/* Pointer and int64 cdata objects are immutable. */
1778LJFOLD(FLOAD CNEWP IRFL_CDATA_PTR) 1787LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
1779LJFOLDF(fload_cdata_ptr_cnew) 1788LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
1789LJFOLDF(fload_cdata_ptr_int64_cnew)
1780{ 1790{
1781 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) 1791 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1782 return fleft->op2; /* Fold even across PHI to avoid allocations. */ 1792 return fleft->op2; /* Fold even across PHI to avoid allocations. */
@@ -1786,6 +1796,7 @@ LJFOLDF(fload_cdata_ptr_cnew)
1786LJFOLD(FLOAD any IRFL_STR_LEN) 1796LJFOLD(FLOAD any IRFL_STR_LEN)
1787LJFOLD(FLOAD any IRFL_CDATA_TYPEID) 1797LJFOLD(FLOAD any IRFL_CDATA_TYPEID)
1788LJFOLD(FLOAD any IRFL_CDATA_PTR) 1798LJFOLD(FLOAD any IRFL_CDATA_PTR)
1799LJFOLD(FLOAD any IRFL_CDATA_INT64)
1789LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */ 1800LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
1790LJFOLDX(lj_opt_cse) 1801LJFOLDX(lj_opt_cse)
1791 1802
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index f53616b3..90b2b49c 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -159,7 +159,8 @@ static void split_ir(jit_State *J)
159 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); 159 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
160 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); 160 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
161 } else { 161 } else {
162 ir->prev = (IRRef1)ref; /* Identity substitution for loword. */ 162 ir->prev = ref; /* Identity substitution for loword. */
163 hisubst[ref] = 0;
163 } 164 }
164 } 165 }
165 166
@@ -168,6 +169,7 @@ static void split_ir(jit_State *J)
168 IRIns *ir = &oir[ref]; 169 IRIns *ir = &oir[ref];
169 IRRef nref = lj_ir_nextins(J); 170 IRRef nref = lj_ir_nextins(J);
170 IRIns *nir = IR(nref); 171 IRIns *nir = IR(nref);
172 IRRef hi = 0;
171 173
172 /* Copy-substitute old instruction to new instruction. */ 174 /* Copy-substitute old instruction to new instruction. */
173 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; 175 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
@@ -175,10 +177,11 @@ static void split_ir(jit_State *J)
175 ir->prev = nref; /* Loword substitution. */ 177 ir->prev = nref; /* Loword substitution. */
176 nir->o = ir->o; 178 nir->o = ir->o;
177 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); 179 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
180 hisubst[ref] = 0;
178 181
179 /* Split 64 bit instructions. */ 182 /* Split 64 bit instructions. */
180 if (irt_isint64(ir->t)) { 183 if (irt_isint64(ir->t)) {
181 IRRef hi = hisubst[ir->op1]; 184 IRRef hiref = hisubst[ir->op1];
182 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ 185 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
183 switch (ir->o) { 186 switch (ir->o) {
184 case IR_ADD: 187 case IR_ADD:
@@ -186,13 +189,13 @@ static void split_ir(jit_State *J)
186 /* Use plain op for hiword if loword cannot produce a carry/borrow. */ 189 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
187 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { 190 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
188 ir->prev = nir->op1; /* Pass through loword. */ 191 ir->prev = nir->op1; /* Pass through loword. */
189 nir->op1 = hi; nir->op2 = hisubst[ir->op2]; 192 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
190 hi = nref; 193 hi = nref;
191 break; 194 break;
192 } 195 }
193 /* fallthrough */ 196 /* fallthrough */
194 case IR_NEG: 197 case IR_NEG:
195 hi = split_emit(J, IRTI(IR_HIOP), hi, hisubst[ir->op2]); 198 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
196 break; 199 break;
197 case IR_MUL: 200 case IR_MUL:
198 hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); 201 hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
@@ -212,6 +215,13 @@ static void split_ir(jit_State *J)
212 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 215 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
213 IRCALL_lj_carith_powu64); 216 IRCALL_lj_carith_powu64);
214 break; 217 break;
218 case IR_FLOAD:
219 lua_assert(ir->op2 == IRFL_CDATA_INT64);
220 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI);
221#if LJ_BE
222 ir->prev = hi; hi = nref;
223#endif
224 break;
215 case IR_XLOAD: 225 case IR_XLOAD:
216 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); 226 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2);
217#if LJ_BE 227#if LJ_BE
@@ -220,19 +230,18 @@ static void split_ir(jit_State *J)
220 break; 230 break;
221 case IR_XSTORE: 231 case IR_XSTORE:
222#if LJ_LE 232#if LJ_LE
223 hi = hisubst[ir->op2]; 233 hiref = hisubst[ir->op2];
224#else 234#else
225 hi = nir->op2; nir->op2 = hisubst[ir->op2]; 235 hiref = nir->op2; nir->op2 = hisubst[ir->op2];
226#endif 236#endif
227 split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hi); 237 split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref);
228 continue; 238 break;
229 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ 239 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
230 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 240 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
231 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ 241 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
232 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); 242 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
233 } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ 243 } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
234 /* Drop cast, since assembler doesn't care. */ 244 /* Drop cast, since assembler doesn't care. */
235 hisubst[ref] = hi;
236 goto fwdlo; 245 goto fwdlo;
237 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ 246 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
238 IRRef k31 = lj_ir_kint(J, 31); 247 IRRef k31 = lj_ir_kint(J, 31);
@@ -242,27 +251,26 @@ static void split_ir(jit_State *J)
242 nir->op2 = k31; 251 nir->op2 = k31;
243 hi = nref; 252 hi = nref;
244 } else { /* Zero-extend to 64 bit. */ 253 } else { /* Zero-extend to 64 bit. */
245 hisubst[ref] = lj_ir_kint(J, 0); 254 hi = lj_ir_kint(J, 0);
246 goto fwdlo; 255 goto fwdlo;
247 } 256 }
248 break; 257 break;
249 } 258 }
250 case IR_PHI: { 259 case IR_PHI: {
251 IRRef hi2; 260 IRRef hiref2;
252 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || 261 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
253 nir->op1 == nir->op2) 262 nir->op1 == nir->op2)
254 J->cur.nins--; /* Drop useless PHIs. */ 263 J->cur.nins--; /* Drop useless PHIs. */
255 hi2 = hisubst[ir->op2]; 264 hiref2 = hisubst[ir->op2];
256 if (!((irref_isk(hi) && irref_isk(hi2)) || hi == hi2)) 265 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
257 split_emit(J, IRTI(IR_PHI), hi, hi2); 266 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
258 continue; 267 break;
259 } 268 }
260 default: 269 default:
261 lua_assert(ir->o <= IR_NE); 270 lua_assert(ir->o <= IR_NE); /* Comparisons. */
262 split_emit(J, IRTGI(IR_HIOP), hi, hisubst[ir->op2]); /* Comparisons. */ 271 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
263 continue; 272 break;
264 } 273 }
265 hisubst[ref] = hi; /* Store hiword substitution. */
266 } else if (ir->o == IR_CONV) { /* See above, too. */ 274 } else if (ir->o == IR_CONV) { /* See above, too. */
267 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 275 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
268 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ 276 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
@@ -277,9 +285,13 @@ static void split_ir(jit_State *J)
277 nir->op1 = nir->op2 = 0; 285 nir->op1 = nir->op2 = 0;
278 } 286 }
279 } 287 }
288 } else if (ir->o == IR_CNEWI) {
289 if (hisubst[ir->op2])
290 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
280 } else if (ir->o == IR_LOOP) { 291 } else if (ir->o == IR_LOOP) {
281 J->loopref = nref; /* Needed by assembler. */ 292 J->loopref = nref; /* Needed by assembler. */
282 } 293 }
294 hisubst[ref] = hi; /* Store hiword substitution. */
283 } 295 }
284 296
285 /* Add PHI marks. */ 297 /* Add PHI marks. */