aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2011-02-03 04:13:51 +0100
committerMike Pall <mike>2011-02-03 04:22:27 +0100
commitdf65b8b419c12327254dec0df116c62525aaabad (patch)
treeb4ba6ea2841692123b49b3033420dbb7282cbcd6 /src
parent1027018b2135caf45057c3d3b3da03ffb0c6add3 (diff)
downloadluajit-df65b8b419c12327254dec0df116c62525aaabad.tar.gz
luajit-df65b8b419c12327254dec0df116c62525aaabad.tar.bz2
luajit-df65b8b419c12327254dec0df116c62525aaabad.zip
FFI: Rename IR_CNEWP to IR_CNEWI and use it to box 64 bit integers.
Generates smaller IR and DCE eliminates many intermediate boxes. Needs allocation sinking to eliminate the boxes kept alive by PHIs.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c67
-rw-r--r--src/lj_crecord.c70
-rw-r--r--src/lj_ir.h10
-rw-r--r--src/lj_opt_fold.c25
-rw-r--r--src/lj_opt_split.c50
5 files changed, 140 insertions, 82 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 8864c9a3..77b55f0c 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2518,7 +2518,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
2518{ 2518{
2519 CTState *cts = ctype_ctsG(J2G(as->J)); 2519 CTState *cts = ctype_ctsG(J2G(as->J));
2520 CTypeID typeid = (CTypeID)IR(ir->op1)->i; 2520 CTypeID typeid = (CTypeID)IR(ir->op1)->i;
2521 CTSize sz = (ir->o == IR_CNEWP || ir->op2 == REF_NIL) ? 2521 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
2522 lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i; 2522 lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i;
2523 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 2523 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
2524 IRRef args[2]; 2524 IRRef args[2];
@@ -2529,33 +2529,45 @@ static void asm_cnew(ASMState *as, IRIns *ir)
2529 as->gcsteps++; 2529 as->gcsteps++;
2530 asm_setupresult(as, ir, ci); /* GCcdata * */ 2530 asm_setupresult(as, ir, ci); /* GCcdata * */
2531 2531
2532 /* Initialize pointer cdata object. */ 2532 /* Initialize immutable cdata object. */
2533 if (ir->o == IR_CNEWP) { 2533 if (ir->o == IR_CNEWI) {
2534 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
2535#if LJ_64
2536 Reg r64 = sz == 8 ? REX_64 : 0;
2534 if (irref_isk(ir->op2)) { 2537 if (irref_isk(ir->op2)) {
2535 IRIns *irk = IR(ir->op2); 2538 IRIns *irk = IR(ir->op2);
2536#if LJ_64 2539 uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
2537 if (irk->o == IR_KINT64) { 2540 (uint64_t)(uint32_t)irk->i;
2538 uint64_t k = ir_k64(irk)->u64; 2541 if (sz == 4 || checki32((int64_t)k)) {
2539 lua_assert(sz == 8); 2542 emit_i32(as, (int32_t)k);
2540 if (checki32((int64_t)k)) { 2543 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
2541 emit_i32(as, (int32_t)k);
2542 emit_rmro(as, XO_MOVmi, REX_64, RID_RET, sizeof(GCcdata));
2543 } else {
2544 emit_movtomro(as, RID_ECX|REX_64, RID_RET, sizeof(GCcdata));
2545 emit_loadu64(as, RID_ECX, k);
2546 }
2547 } else { 2544 } else {
2548#endif 2545 emit_movtomro(as, RID_ECX + r64, RID_RET, sizeof(GCcdata));
2549 lua_assert(sz == 4); 2546 emit_loadu64(as, RID_ECX, k);
2550 emit_movmroi(as, RID_RET, sizeof(GCcdata), irk->i);
2551#if LJ_64
2552 } 2547 }
2553#endif
2554 } else { 2548 } else {
2555 Reg r = ra_alloc1(as, ir->op2, (RSET_GPR & ~RSET_SCRATCH)); 2549 Reg r = ra_alloc1(as, ir->op2, allow);
2556 emit_movtomro(as, r + ((LJ_64 && sz == 8) ? REX_64 : 0), 2550 emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata));
2557 RID_RET, sizeof(GCcdata)); 2551 }
2552#else
2553 int32_t ofs = sizeof(GCcdata);
2554 if (LJ_HASFFI && sz == 8) {
2555 ofs += 4; ir++;
2556 lua_assert(ir->o == IR_HIOP);
2558 } 2557 }
2558 do {
2559 if (irref_isk(ir->op2)) {
2560 emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i);
2561 } else {
2562 Reg r = ra_alloc1(as, ir->op2, allow);
2563 emit_movtomro(as, r, RID_RET, ofs);
2564 rset_clear(allow, r);
2565 }
2566 if (!LJ_HASFFI || ofs == sizeof(GCcdata)) break;
2567 ofs -= 4; ir--;
2568 } while (1);
2569#endif
2570 lua_assert(sz == 4 || (sz == 8 && (LJ_64 || LJ_HASFFI)));
2559 } 2571 }
2560 2572
2561 /* Combine initialization of marked, gct and typeid. */ 2573 /* Combine initialization of marked, gct and typeid. */
@@ -3289,6 +3301,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
3289 if (!uselo) 3301 if (!uselo)
3290 ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */ 3302 ra_allocref(as, ir->op1, RID2RSET(RID_RET)); /* Mark call as used. */
3291 break; 3303 break;
3304 case IR_CNEWI:
3305 /* Nothing to do here. Handled by CNEWI itself. */
3306 break;
3292 default: lua_assert(0); break; 3307 default: lua_assert(0); break;
3293 } 3308 }
3294#else 3309#else
@@ -4057,7 +4072,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
4057 case IR_SNEW: asm_snew(as, ir); break; 4072 case IR_SNEW: asm_snew(as, ir); break;
4058 case IR_TNEW: asm_tnew(as, ir); break; 4073 case IR_TNEW: asm_tnew(as, ir); break;
4059 case IR_TDUP: asm_tdup(as, ir); break; 4074 case IR_TDUP: asm_tdup(as, ir); break;
4060 case IR_CNEW: case IR_CNEWP: asm_cnew(as, ir); break; 4075 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
4061 4076
4062 /* Write barriers. */ 4077 /* Write barriers. */
4063 case IR_TBAR: asm_tbar(as, ir); break; 4078 case IR_TBAR: asm_tbar(as, ir); break;
@@ -4164,8 +4179,10 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T)
4164 } 4179 }
4165#if LJ_32 && LJ_HASFFI 4180#if LJ_32 && LJ_HASFFI
4166 case IR_HIOP: 4181 case IR_HIOP:
4167 if ((ir-1)->o == IR_CALLN) 4182 if ((ir-1)->o == IR_CALLN) {
4168 ir->prev = REGSP_HINT(RID_RETHI); 4183 ir->prev = REGSP_HINT(RID_RETHI);
4184 continue;
4185 }
4169 break; 4186 break;
4170#endif 4187#endif
4171 /* C calls evict all scratch regs and return results in RID_RET. */ 4188 /* C calls evict all scratch regs and return results in RID_RET. */
@@ -4174,7 +4191,7 @@ static void asm_setup_regsp(ASMState *as, GCtrace *T)
4174 if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */ 4191 if (as->evenspill < 3) /* lj_str_new and lj_tab_newkey need 3 args. */
4175 as->evenspill = 3; 4192 as->evenspill = 3;
4176#endif 4193#endif
4177 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWP: case IR_TOSTR: 4194 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
4178 ir->prev = REGSP_HINT(RID_RET); 4195 ir->prev = REGSP_HINT(RID_RET);
4179 if (inloop) 4196 if (inloop)
4180 as->modset = RSET_SCRATCH; 4197 as->modset = RSET_SCRATCH;
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index cd5c7d49..1ba98ae8 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -137,7 +137,7 @@ static int crec_isnonzero(CType *s, void *p)
137 } 137 }
138} 138}
139 139
140static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, 140static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
141 void *svisnz) 141 void *svisnz)
142{ 142{
143 CTSize dsize = d->size, ssize = s->size; 143 CTSize dsize = d->size, ssize = s->size;
@@ -190,6 +190,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
190#endif 190#endif
191 xstore: 191 xstore:
192 if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J); 192 if (dt == IRT_I64 || dt == IRT_U64) lj_needsplit(J);
193 if (dp == 0) return sp;
193 emitir(IRT(IR_XSTORE, dt), dp, sp); 194 emitir(IRT(IR_XSTORE, dt), dp, sp);
194 break; 195 break;
195 case CCX(I, C): 196 case CCX(I, C):
@@ -290,6 +291,7 @@ static void crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
290 lj_trace_err(J, LJ_TRERR_NYICONV); 291 lj_trace_err(J, LJ_TRERR_NYICONV);
291 break; 292 break;
292 } 293 }
294 return 0;
293} 295}
294 296
295/* -- Convert C type to TValue (load) ------------------------------------- */ 297/* -- Convert C type to TValue (load) ------------------------------------- */
@@ -306,21 +308,18 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp)
306 goto err_nyi; /* NYI: copyval of >64 bit integers. */ 308 goto err_nyi; /* NYI: copyval of >64 bit integers. */
307 tr = emitir(IRT(IR_XLOAD, t), sp, 0); 309 tr = emitir(IRT(IR_XLOAD, t), sp, 0);
308 if (t == IRT_FLOAT || t == IRT_U32) { /* Keep uint32_t/float as numbers. */ 310 if (t == IRT_FLOAT || t == IRT_U32) { /* Keep uint32_t/float as numbers. */
309 tr = emitconv(tr, IRT_NUM, t, 0); 311 return emitconv(tr, IRT_NUM, t, 0);
310 } else if (t == IRT_I64 || t == IRT_U64) { /* Box 64 bit integer. */ 312 } else if (t == IRT_I64 || t == IRT_U64) { /* Box 64 bit integer. */
311 TRef dp = emitir(IRTG(IR_CNEW, IRT_CDATA), lj_ir_kint(J, sid), TREF_NIL); 313 sp = tr;
312 TRef ptr = emitir(IRT(IR_ADD, IRT_PTR), dp,
313 lj_ir_kintp(J, sizeof(GCcdata)));
314 emitir(IRT(IR_XSTORE, t), ptr, tr);
315 lj_needsplit(J); 314 lj_needsplit(J);
316 return dp;
317 } else if ((sinfo & CTF_BOOL)) { 315 } else if ((sinfo & CTF_BOOL)) {
318 /* Assume not equal to zero. Fixup and emit pending guard later. */ 316 /* Assume not equal to zero. Fixup and emit pending guard later. */
319 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); 317 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0));
320 J->postproc = LJ_POST_FIXGUARD; 318 J->postproc = LJ_POST_FIXGUARD;
321 tr = TREF_TRUE; 319 return TREF_TRUE;
320 } else {
321 return tr;
322 } 322 }
323 return tr;
324 } else if (ctype_isptr(sinfo)) { 323 } else if (ctype_isptr(sinfo)) {
325 IRType t = (LJ_64 && s->size == 8) ? IRT_P64 : IRT_P32; 324 IRType t = (LJ_64 && s->size == 8) ? IRT_P64 : IRT_P32;
326 sp = emitir(IRT(IR_XLOAD, t), sp, 0); 325 sp = emitir(IRT(IR_XLOAD, t), sp, 0);
@@ -345,13 +344,13 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp)
345 err_nyi: 344 err_nyi:
346 lj_trace_err(J, LJ_TRERR_NYICONV); 345 lj_trace_err(J, LJ_TRERR_NYICONV);
347 } 346 }
348 /* Box pointer or ref. */ 347 /* Box pointer, ref or 64 bit integer. */
349 return emitir(IRTG(IR_CNEWP, IRT_CDATA), lj_ir_kint(J, sid), sp); 348 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, sid), sp);
350} 349}
351 350
352/* -- Convert TValue to C type (store) ------------------------------------ */ 351/* -- Convert TValue to C type (store) ------------------------------------ */
353 352
354static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) 353static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
355{ 354{
356 CTState *cts = ctype_ctsG(J2G(J)); 355 CTState *cts = ctype_ctsG(J2G(J));
357 CTypeID sid = CTID_P_VOID; 356 CTypeID sid = CTID_P_VOID;
@@ -402,6 +401,12 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
402 } else { 401 } else {
403 goto doconv; /* The pointer value was loaded, don't load number. */ 402 goto doconv; /* The pointer value was loaded, don't load number. */
404 } 403 }
404
405 } else if (ctype_isnum(s->info) && s->size == 8) {
406 IRType t = (s->info & CTF_UNSIGNED) ? IRT_U64 : IRT_I64;
407 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_INT64);
408 lj_needsplit(J);
409 goto doconv;
405 } else { 410 } else {
406 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCcdata))); 411 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCcdata)));
407 } 412 }
@@ -418,7 +423,7 @@ static void crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval)
418 s = ctype_get(cts, sid); 423 s = ctype_get(cts, sid);
419doconv: 424doconv:
420 if (ctype_isenum(d->info)) d = ctype_child(cts, d); 425 if (ctype_isenum(d->info)) d = ctype_child(cts, d);
421 crec_ct_ct(J, d, s, dp, sp, svisnz); 426 return crec_ct_ct(J, d, s, dp, sp, svisnz);
422} 427}
423 428
424/* -- C data metamethods -------------------------------------------------- */ 429/* -- C data metamethods -------------------------------------------------- */
@@ -578,15 +583,18 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
578 CTState *cts = ctype_ctsG(J2G(J)); 583 CTState *cts = ctype_ctsG(J2G(J));
579 CTSize sz; 584 CTSize sz;
580 CTInfo info = lj_ctype_info(cts, id, &sz); 585 CTInfo info = lj_ctype_info(cts, id, &sz);
586 CType *d = ctype_raw(cts, id);
581 TRef trid; 587 TRef trid;
582 if (sz == 0 || sz > 64 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 588 if (sz == 0 || sz > 64 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN)
583 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ 589 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
584 trid = lj_ir_kint(J, id); 590 trid = lj_ir_kint(J, id);
585 if (ctype_isptr(info)) { 591 /* Use special instruction to box pointer or 64 bit integer. */
586 TRef sp = J->base[1] ? J->base[1] : lj_ir_kptr(J, NULL); 592 if (ctype_isptr(info) || (ctype_isnum(info) && sz == 8)) {
587 J->base[0] = emitir(IRTG(IR_CNEWP, IRT_CDATA), trid, sp); 593 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
594 ctype_isptr(info) ? lj_ir_kptr(J, NULL) :
595 (lj_needsplit(J), lj_ir_kint64(J, 0));
596 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
588 } else { 597 } else {
589 CType *d = ctype_raw(cts, id);
590 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 598 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL);
591 J->base[0] = trcd; 599 J->base[0] = trcd;
592 if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(d, &rd->argv[1])) { 600 if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(d, &rd->argv[1])) {
@@ -598,7 +606,7 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
598 TValue tv; 606 TValue tv;
599 TValue *sval = &tv; 607 TValue *sval = &tv;
600 MSize i; 608 MSize i;
601 setnumV(&tv, 0); 609 tv.u64 = 0;
602 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 610 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))
603 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 611 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */
604 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 612 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
@@ -645,11 +653,16 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
645 } 653 }
646 } 654 }
647 } else { 655 } else {
648 TRef sp, dp; 656 TRef dp;
649 single_init: 657 single_init:
650 sp = J->base[1] ? J->base[1] : lj_ir_kint(J, 0);
651 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); 658 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
652 crec_ct_tv(J, d, dp, sp, &rd->argv[1]); 659 if (J->base[1]) {
660 crec_ct_tv(J, d, dp, J->base[1], &rd->argv[1]);
661 } else {
662 TValue tv;
663 tv.u64 = 0;
664 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
665 }
653 } 666 }
654 } 667 }
655} 668}
@@ -669,7 +682,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
669 if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) { 682 if (ctype_isnum(s[0]->info) && ctype_isnum(s[1]->info)) {
670 IRType dt; 683 IRType dt;
671 CTypeID id; 684 CTypeID id;
672 TRef tr, dp, ptr; 685 TRef tr;
673 MSize i; 686 MSize i;
674 lj_needsplit(J); 687 lj_needsplit(J);
675 if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) || 688 if (((s[0]->info & CTF_UNSIGNED) && s[0]->size == 8) ||
@@ -702,10 +715,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
702 } else { 715 } else {
703 tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, dt), sp[0], sp[1]); 716 tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, dt), sp[0], sp[1]);
704 } 717 }
705 dp = emitir(IRTG(IR_CNEW, IRT_CDATA), lj_ir_kint(J, id), TREF_NIL); 718 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
706 ptr = emitir(IRT(IR_ADD, IRT_PTR), dp, lj_ir_kintp(J, sizeof(GCcdata)));
707 emitir(IRT(IR_XSTORE, dt), ptr, tr);
708 return dp;
709 } 719 }
710 return 0; 720 return 0;
711} 721}
@@ -767,7 +777,7 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
767 tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr); 777 tr = emitir(IRT(IR_ADD, IRT_PTR), sp[0], tr);
768 id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)), 778 id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)),
769 CTSIZE_PTR); 779 CTSIZE_PTR);
770 return emitir(IRTG(IR_CNEWP, IRT_CDATA), lj_ir_kint(J, id), tr); 780 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
771 } 781 }
772} 782}
773 783
@@ -787,6 +797,11 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
787 IRType t = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; 797 IRType t = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32;
788 if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); 798 if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
789 tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_PTR); 799 tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_PTR);
800 } else if (ctype_isnum(ct->info) && ct->size == 8) {
801 IRType t = (ct->info & CTF_UNSIGNED) ? IRT_U64 : IRT_I64;
802 tr = emitir(IRT(IR_FLOAD, t), tr, IRFL_CDATA_INT64);
803 lj_needsplit(J);
804 goto ok;
790 } else { 805 } else {
791 tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata))); 806 tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata)));
792 } 807 }
@@ -807,6 +822,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
807 } else if (!tref_isnum(tr)) { 822 } else if (!tref_isnum(tr)) {
808 goto err_type; 823 goto err_type;
809 } 824 }
825 ok:
810 s[i] = ct; 826 s[i] = ct;
811 sp[i] = tr; 827 sp[i] = tr;
812 } 828 }
diff --git a/src/lj_ir.h b/src/lj_ir.h
index dfafc5db..bde0ac04 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -113,7 +113,7 @@
113 _(TNEW, AW, lit, lit) \ 113 _(TNEW, AW, lit, lit) \
114 _(TDUP, AW, ref, ___) \ 114 _(TDUP, AW, ref, ___) \
115 _(CNEW, AW, ref, ref) \ 115 _(CNEW, AW, ref, ref) \
116 _(CNEWP, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 116 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
117 \ 117 \
118 /* Write barriers. */ \ 118 /* Write barriers. */ \
119 _(TBAR, S , ref, ___) \ 119 _(TBAR, S , ref, ___) \
@@ -188,7 +188,9 @@ IRFPMDEF(FPMENUM)
188 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ 188 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
189 _(UDATA_FILE, sizeof(GCudata)) \ 189 _(UDATA_FILE, sizeof(GCudata)) \
190 _(CDATA_TYPEID, offsetof(GCcdata, typeid)) \ 190 _(CDATA_TYPEID, offsetof(GCcdata, typeid)) \
191 _(CDATA_PTR, sizeof(GCcdata)) 191 _(CDATA_PTR, sizeof(GCcdata)) \
192 _(CDATA_INT64, sizeof(GCcdata)) \
193 _(CDATA_INT64HI, sizeof(GCcdata) + 4)
192 194
193typedef enum { 195typedef enum {
194#define FLENUM(name, ofs) IRFL_##name, 196#define FLENUM(name, ofs) IRFL_##name,
@@ -588,12 +590,12 @@ typedef union IRIns {
588#define ir_kptr(ir) \ 590#define ir_kptr(ir) \
589 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) 591 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
590 592
593LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);
594
591/* A store or any other op with a non-weak guard has a side-effect. */ 595/* A store or any other op with a non-weak guard has a side-effect. */
592static LJ_AINLINE int ir_sideeff(IRIns *ir) 596static LJ_AINLINE int ir_sideeff(IRIns *ir)
593{ 597{
594 return (((ir->t.irt | ~IRT_GUARD) & lj_ir_mode[ir->o]) >= IRM_S); 598 return (((ir->t.irt | ~IRT_GUARD) & lj_ir_mode[ir->o]) >= IRM_S);
595} 599}
596 600
597LJ_STATIC_ASSERT((int)IRT_GUARD == (int)IRM_W);
598
599#endif 601#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index c3b0a082..28758013 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -154,7 +154,7 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
154#define gcstep_barrier(J, ref) \ 154#define gcstep_barrier(J, ref) \
155 ((ref) < J->chain[IR_LOOP] && \ 155 ((ref) < J->chain[IR_LOOP] && \
156 (J->chain[IR_SNEW] || J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 156 (J->chain[IR_SNEW] || J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
157 J->chain[IR_CNEW] || J->chain[IR_CNEWP] || J->chain[IR_TOSTR])) 157 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
158 158
159/* -- Constant folding for FP numbers ------------------------------------- */ 159/* -- Constant folding for FP numbers ------------------------------------- */
160 160
@@ -307,7 +307,7 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
307 case IR_BOR: k1 |= k2; break; 307 case IR_BOR: k1 |= k2; break;
308 case IR_BXOR: k1 ^= k2; break; 308 case IR_BXOR: k1 ^= k2; break;
309#endif 309#endif
310 default: lua_assert(0); break; 310 default: UNUSED(k2); lua_assert(0); break;
311 } 311 }
312 return k1; 312 return k1;
313} 313}
@@ -1765,18 +1765,28 @@ LJFOLDF(fload_cdata_typeid_kgc)
1765 return NEXTFOLD; 1765 return NEXTFOLD;
1766} 1766}
1767 1767
1768/* The content of int64 cdata objects is immutable. */
1769LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1770LJFOLDF(fload_cdata_int64_kgc)
1771{
1772 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1773 return INT64FOLD(*(uint64_t *)cdataptr(ir_kcdata(fleft)));
1774 return NEXTFOLD;
1775}
1776
1768LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID) 1777LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID)
1769LJFOLD(FLOAD CNEWP IRFL_CDATA_TYPEID) 1778LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID)
1770LJFOLDF(fload_cdata_typeid_cnew) 1779LJFOLDF(fload_cdata_typeid_cnew)
1771{ 1780{
1772 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) 1781 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1773 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWP op1 is const. */ 1782 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1774 return NEXTFOLD; 1783 return NEXTFOLD;
1775} 1784}
1776 1785
1777/* Pointer cdata objects are immutable. */ 1786/* Pointer and int64 cdata objects are immutable. */
1778LJFOLD(FLOAD CNEWP IRFL_CDATA_PTR) 1787LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
1779LJFOLDF(fload_cdata_ptr_cnew) 1788LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
1789LJFOLDF(fload_cdata_ptr_int64_cnew)
1780{ 1790{
1781 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) 1791 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1782 return fleft->op2; /* Fold even across PHI to avoid allocations. */ 1792 return fleft->op2; /* Fold even across PHI to avoid allocations. */
@@ -1786,6 +1796,7 @@ LJFOLDF(fload_cdata_ptr_cnew)
1786LJFOLD(FLOAD any IRFL_STR_LEN) 1796LJFOLD(FLOAD any IRFL_STR_LEN)
1787LJFOLD(FLOAD any IRFL_CDATA_TYPEID) 1797LJFOLD(FLOAD any IRFL_CDATA_TYPEID)
1788LJFOLD(FLOAD any IRFL_CDATA_PTR) 1798LJFOLD(FLOAD any IRFL_CDATA_PTR)
1799LJFOLD(FLOAD any IRFL_CDATA_INT64)
1789LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */ 1800LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
1790LJFOLDX(lj_opt_cse) 1801LJFOLDX(lj_opt_cse)
1791 1802
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index f53616b3..90b2b49c 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -159,7 +159,8 @@ static void split_ir(jit_State *J)
159 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); 159 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
160 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); 160 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
161 } else { 161 } else {
162 ir->prev = (IRRef1)ref; /* Identity substitution for loword. */ 162 ir->prev = ref; /* Identity substitution for loword. */
163 hisubst[ref] = 0;
163 } 164 }
164 } 165 }
165 166
@@ -168,6 +169,7 @@ static void split_ir(jit_State *J)
168 IRIns *ir = &oir[ref]; 169 IRIns *ir = &oir[ref];
169 IRRef nref = lj_ir_nextins(J); 170 IRRef nref = lj_ir_nextins(J);
170 IRIns *nir = IR(nref); 171 IRIns *nir = IR(nref);
172 IRRef hi = 0;
171 173
172 /* Copy-substitute old instruction to new instruction. */ 174 /* Copy-substitute old instruction to new instruction. */
173 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; 175 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
@@ -175,10 +177,11 @@ static void split_ir(jit_State *J)
175 ir->prev = nref; /* Loword substitution. */ 177 ir->prev = nref; /* Loword substitution. */
176 nir->o = ir->o; 178 nir->o = ir->o;
177 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); 179 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
180 hisubst[ref] = 0;
178 181
179 /* Split 64 bit instructions. */ 182 /* Split 64 bit instructions. */
180 if (irt_isint64(ir->t)) { 183 if (irt_isint64(ir->t)) {
181 IRRef hi = hisubst[ir->op1]; 184 IRRef hiref = hisubst[ir->op1];
182 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ 185 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
183 switch (ir->o) { 186 switch (ir->o) {
184 case IR_ADD: 187 case IR_ADD:
@@ -186,13 +189,13 @@ static void split_ir(jit_State *J)
186 /* Use plain op for hiword if loword cannot produce a carry/borrow. */ 189 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
187 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { 190 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
188 ir->prev = nir->op1; /* Pass through loword. */ 191 ir->prev = nir->op1; /* Pass through loword. */
189 nir->op1 = hi; nir->op2 = hisubst[ir->op2]; 192 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
190 hi = nref; 193 hi = nref;
191 break; 194 break;
192 } 195 }
193 /* fallthrough */ 196 /* fallthrough */
194 case IR_NEG: 197 case IR_NEG:
195 hi = split_emit(J, IRTI(IR_HIOP), hi, hisubst[ir->op2]); 198 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
196 break; 199 break;
197 case IR_MUL: 200 case IR_MUL:
198 hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); 201 hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
@@ -212,6 +215,13 @@ static void split_ir(jit_State *J)
212 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 215 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
213 IRCALL_lj_carith_powu64); 216 IRCALL_lj_carith_powu64);
214 break; 217 break;
218 case IR_FLOAD:
219 lua_assert(ir->op2 == IRFL_CDATA_INT64);
220 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64HI);
221#if LJ_BE
222 ir->prev = hi; hi = nref;
223#endif
224 break;
215 case IR_XLOAD: 225 case IR_XLOAD:
216 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2); 226 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, nir->op1), ir->op2);
217#if LJ_BE 227#if LJ_BE
@@ -220,19 +230,18 @@ static void split_ir(jit_State *J)
220 break; 230 break;
221 case IR_XSTORE: 231 case IR_XSTORE:
222#if LJ_LE 232#if LJ_LE
223 hi = hisubst[ir->op2]; 233 hiref = hisubst[ir->op2];
224#else 234#else
225 hi = nir->op2; nir->op2 = hisubst[ir->op2]; 235 hiref = nir->op2; nir->op2 = hisubst[ir->op2];
226#endif 236#endif
227 split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hi); 237 split_emit(J, IRTI(IR_XSTORE), split_ptr(J, nir->op1), hiref);
228 continue; 238 break;
229 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ 239 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
230 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 240 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
231 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ 241 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
232 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); 242 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
233 } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ 243 } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
234 /* Drop cast, since assembler doesn't care. */ 244 /* Drop cast, since assembler doesn't care. */
235 hisubst[ref] = hi;
236 goto fwdlo; 245 goto fwdlo;
237 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ 246 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
238 IRRef k31 = lj_ir_kint(J, 31); 247 IRRef k31 = lj_ir_kint(J, 31);
@@ -242,27 +251,26 @@ static void split_ir(jit_State *J)
242 nir->op2 = k31; 251 nir->op2 = k31;
243 hi = nref; 252 hi = nref;
244 } else { /* Zero-extend to 64 bit. */ 253 } else { /* Zero-extend to 64 bit. */
245 hisubst[ref] = lj_ir_kint(J, 0); 254 hi = lj_ir_kint(J, 0);
246 goto fwdlo; 255 goto fwdlo;
247 } 256 }
248 break; 257 break;
249 } 258 }
250 case IR_PHI: { 259 case IR_PHI: {
251 IRRef hi2; 260 IRRef hiref2;
252 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || 261 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
253 nir->op1 == nir->op2) 262 nir->op1 == nir->op2)
254 J->cur.nins--; /* Drop useless PHIs. */ 263 J->cur.nins--; /* Drop useless PHIs. */
255 hi2 = hisubst[ir->op2]; 264 hiref2 = hisubst[ir->op2];
256 if (!((irref_isk(hi) && irref_isk(hi2)) || hi == hi2)) 265 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
257 split_emit(J, IRTI(IR_PHI), hi, hi2); 266 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
258 continue; 267 break;
259 } 268 }
260 default: 269 default:
261 lua_assert(ir->o <= IR_NE); 270 lua_assert(ir->o <= IR_NE); /* Comparisons. */
262 split_emit(J, IRTGI(IR_HIOP), hi, hisubst[ir->op2]); /* Comparisons. */ 271 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
263 continue; 272 break;
264 } 273 }
265 hisubst[ref] = hi; /* Store hiword substitution. */
266 } else if (ir->o == IR_CONV) { /* See above, too. */ 274 } else if (ir->o == IR_CONV) { /* See above, too. */
267 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 275 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
268 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ 276 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
@@ -277,9 +285,13 @@ static void split_ir(jit_State *J)
277 nir->op1 = nir->op2 = 0; 285 nir->op1 = nir->op2 = 0;
278 } 286 }
279 } 287 }
288 } else if (ir->o == IR_CNEWI) {
289 if (hisubst[ir->op2])
290 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
280 } else if (ir->o == IR_LOOP) { 291 } else if (ir->o == IR_LOOP) {
281 J->loopref = nref; /* Needed by assembler. */ 292 J->loopref = nref; /* Needed by assembler. */
282 } 293 }
294 hisubst[ref] = hi; /* Store hiword substitution. */
283 } 295 }
284 296
285 /* Add PHI marks. */ 297 /* Add PHI marks. */