aboutsummaryrefslogtreecommitdiff
path: root/src/lj_asm_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_asm_arm.h')
-rw-r--r--src/lj_asm_arm.h656
1 files changed, 285 insertions, 371 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 262fa59e..ded63913 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)
41 } 41 }
42 } 42 }
43 } 43 }
44 lua_assert(rset_test(RSET_GPREVEN, r)); 44 lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
45 ra_modified(as, r); 45 ra_modified(as, r);
46 ra_modified(as, r+1); 46 ra_modified(as, r+1);
47 RA_DBGX((as, "scratchpair $r $r", r, r+1)); 47 RA_DBGX((as, "scratchpair $r $r", r, r+1));
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ 185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
186 return ra_allock(as, (ofs & ~255), allow); 186 return ra_allock(as, (ofs & ~255), allow);
187 } 187 }
188 } else if (ir->o == IR_TMPREF) {
189 *ofsp = 0;
190 return RID_SP;
188 } 191 }
189 } 192 }
190 *ofsp = 0; 193 *ofsp = 0;
@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
269 return; 272 return;
270 } 273 }
271 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { 274 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {
272 lua_assert(ofs == 0); 275 lj_assertA(ofs == 0, "bad usage");
273 ofs = (int32_t)sizeof(GCstr); 276 ofs = (int32_t)sizeof(GCstr);
274 if (irref_isk(ir->op2)) { 277 if (irref_isk(ir->op2)) {
275 ofs += IR(ir->op2)->i; 278 ofs += IR(ir->op2)->i;
@@ -338,7 +341,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 341/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 342static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 343{
341 uint32_t n, nargs = CCI_NARGS(ci); 344 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 345 int32_t ofs = 0;
343#if LJ_SOFTFP 346#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 347 Reg gpr = REGARG_FIRSTGPR;
@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
389 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 392 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
390 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; 393 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
391 if (gpr <= REGARG_LASTGPR) { 394 if (gpr <= REGARG_LASTGPR) {
392 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 395 lj_assertA(rset_test(as->freeset, gpr),
396 "reg %d not free", gpr); /* Must have been evicted. */
393 if (irt_isnum(ir->t)) { 397 if (irt_isnum(ir->t)) {
394 lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ 398 lj_assertA(rset_test(as->freeset, gpr+1),
399 "reg %d not free", gpr+1); /* Ditto. */
395 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); 400 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
396 gpr += 2; 401 gpr += 2;
397 } else { 402 } else {
@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
408#endif 413#endif
409 { 414 {
410 if (gpr <= REGARG_LASTGPR) { 415 if (gpr <= REGARG_LASTGPR) {
411 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 416 lj_assertA(rset_test(as->freeset, gpr),
417 "reg %d not free", gpr); /* Must have been evicted. */
412 if (ref) ra_leftov(as, gpr, ref); 418 if (ref) ra_leftov(as, gpr, ref);
413 gpr++; 419 gpr++;
414 } else { 420 } else {
@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
433 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 439 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
434 ra_evictset(as, drop); /* Evictions must be performed first. */ 440 ra_evictset(as, drop); /* Evictions must be performed first. */
435 if (ra_used(ir)) { 441 if (ra_used(ir)) {
436 lua_assert(!irt_ispri(ir->t)); 442 lj_assertA(!irt_ispri(ir->t), "PRI dest");
437 if (!LJ_SOFTFP && irt_isfp(ir->t)) { 443 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
438 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { 444 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
439 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); 445 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
@@ -453,15 +459,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 459 UNUSED(ci);
454} 460}
455 461
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 462static void asm_callx(ASMState *as, IRIns *ir)
466{ 463{
467 IRRef args[CCI_NARGS_MAX*2]; 464 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +487,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 487{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 488 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 489 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 490 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 491 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 492 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 493 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -504,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
504 emit_lso(as, ARMI_LDR, RID_TMP, base, -4); 501 emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
505} 502}
506 503
504/* -- Buffer operations --------------------------------------------------- */
505
506#if LJ_HASBUFFER
507static void asm_bufhdr_write(ASMState *as, Reg sb)
508{
509 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
510 IRIns irgc;
511 int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
512 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
513 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
514 if ((as->flags & JIT_F_ARMV6T2)) {
515 emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
516 } else {
517 emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
518 emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
519 }
520 emit_lso(as, ARMI_LDR, RID_TMP,
521 ra_allock(as, (addr & ~4095),
522 rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
523 (addr & 4095));
524 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
525}
526#endif
527
507/* -- Type conversions ---------------------------------------------------- */ 528/* -- Type conversions ---------------------------------------------------- */
508 529
509#if !LJ_SOFTFP 530#if !LJ_SOFTFP
@@ -539,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir)
539#endif 560#endif
540 IRRef lref = ir->op1; 561 IRRef lref = ir->op1;
541 /* 64 bit integer conversions are handled by SPLIT. */ 562 /* 64 bit integer conversions are handled by SPLIT. */
542 lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); 563 lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
564 "IR %04d has unsplit 64 bit type",
565 (int)(ir - as->ir) - REF_BIAS);
543#if LJ_SOFTFP 566#if LJ_SOFTFP
544 /* FP conversions are handled by SPLIT. */ 567 /* FP conversions are handled by SPLIT. */
545 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); 568 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
569 "IR %04d has FP type",
570 (int)(ir - as->ir) - REF_BIAS);
546 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ 571 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
547#else 572#else
548 lua_assert(irt_type(ir->t) != st); 573 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
549 if (irt_isfp(ir->t)) { 574 if (irt_isfp(ir->t)) {
550 Reg dest = ra_dest(as, ir, RSET_FPR); 575 Reg dest = ra_dest(as, ir, RSET_FPR);
551 if (stfp) { /* FP to FP conversion. */ 576 if (stfp) { /* FP to FP conversion. */
@@ -562,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
562 } else if (stfp) { /* FP to integer conversion. */ 587 } else if (stfp) { /* FP to integer conversion. */
563 if (irt_isguard(ir->t)) { 588 if (irt_isguard(ir->t)) {
564 /* Checked conversions are only supported from number to int. */ 589 /* Checked conversions are only supported from number to int. */
565 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 590 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
591 "bad type for checked CONV");
566 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 592 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
567 } else { 593 } else {
568 Reg left = ra_alloc1(as, lref, RSET_FPR); 594 Reg left = ra_alloc1(as, lref, RSET_FPR);
@@ -581,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
581 Reg dest = ra_dest(as, ir, RSET_GPR); 607 Reg dest = ra_dest(as, ir, RSET_GPR);
582 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 608 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
583 Reg left = ra_alloc1(as, lref, RSET_GPR); 609 Reg left = ra_alloc1(as, lref, RSET_GPR);
584 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 610 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
585 if ((as->flags & JIT_F_ARMV6)) { 611 if ((as->flags & JIT_F_ARMV6)) {
586 ARMIns ai = st == IRT_I8 ? ARMI_SXTB : 612 ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
587 st == IRT_U8 ? ARMI_UXTB : 613 st == IRT_U8 ? ARMI_UXTB :
@@ -601,31 +627,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 627 }
602} 628}
603 629
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 630static void asm_strto(ASMState *as, IRIns *ir)
630{ 631{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 632 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,60 +690,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 690 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 691}
691 692
693/* -- Memory references --------------------------------------------------- */
694
692/* Get pointer to TValue. */ 695/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 696static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
694{ 697{
695 IRIns *ir = IR(ref); 698 if ((mode & IRTMPREF_IN1)) {
696 if (irt_isnum(ir->t)) { 699 IRIns *ir = IR(ref);
697 if (irref_isk(ref)) { 700 if (irt_isnum(ir->t)) {
698 /* Use the number constant itself as a TValue. */ 701 if ((mode & IRTMPREF_OUT1)) {
699 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 702#if LJ_SOFTFP
700 } else { 703 lj_assertA(irref_isk(ref), "unsplit FP op");
704 emit_dm(as, ARMI_MOV, dest, RID_SP);
705 emit_lso(as, ARMI_STR,
706 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
707 RID_SP, 0);
708 emit_lso(as, ARMI_STR,
709 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
710 RID_SP, 4);
711#else
712 Reg src = ra_alloc1(as, ref, RSET_FPR);
713 emit_dm(as, ARMI_MOV, dest, RID_SP);
714 emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
715#endif
716 } else if (irref_isk(ref)) {
717 /* Use the number constant itself as a TValue. */
718 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
719 } else {
701#if LJ_SOFTFP 720#if LJ_SOFTFP
702 lua_assert(0); 721 lj_assertA(0, "unsplit FP op");
703#else 722#else
704 /* Otherwise force a spill and use the spill slot. */ 723 /* Otherwise force a spill and use the spill slot. */
705 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); 724 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
706#endif 725#endif
726 }
727 } else {
728 /* Otherwise use [sp] and [sp+4] to hold the TValue.
729 ** This assumes the following call has max. 4 args.
730 */
731 Reg type;
732 emit_dm(as, ARMI_MOV, dest, RID_SP);
733 if (!irt_ispri(ir->t)) {
734 Reg src = ra_alloc1(as, ref, RSET_GPR);
735 emit_lso(as, ARMI_STR, src, RID_SP, 0);
736 }
737 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
738 type = ra_alloc1(as, ref+1, RSET_GPR);
739 else
740 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
741 emit_lso(as, ARMI_STR, type, RID_SP, 4);
707 } 742 }
708 } else { 743 } else {
709 /* Otherwise use [sp] and [sp+4] to hold the TValue. */
710 RegSet allow = rset_exclude(RSET_GPR, dest);
711 Reg type;
712 emit_dm(as, ARMI_MOV, dest, RID_SP); 744 emit_dm(as, ARMI_MOV, dest, RID_SP);
713 if (!irt_ispri(ir->t)) {
714 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 }
717 if ((ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow);
719 else
720 type = ra_allock(as, irt_toitype(ir->t), allow);
721 emit_lso(as, ARMI_STR, type, RID_SP, 4);
722 } 745 }
723} 746}
724 747
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 748static void asm_aref(ASMState *as, IRIns *ir)
747{ 749{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 750 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -864,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
864 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); 866 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
865 867
866 /* Load main position relative to tab->node into dest. */ 868 /* Load main position relative to tab->node into dest. */
867 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 869 khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
868 if (khash == 0) { 870 if (khash == 0) {
869 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 871 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
870 } else { 872 } else {
871 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); 873 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
872 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); 874 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
873 if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ 875 if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
874 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); 876 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
875 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 877 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
876 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); 878 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
877 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); 879 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
878 } else if (irref_isk(refkey)) { 880 } else if (irref_isk(refkey)) {
879 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, 881 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
@@ -920,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
920 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 922 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
921 Reg key = RID_NONE, type = RID_TMP, idx = node; 923 Reg key = RID_NONE, type = RID_TMP, idx = node;
922 RegSet allow = rset_exclude(RSET_GPR, node); 924 RegSet allow = rset_exclude(RSET_GPR, node);
923 lua_assert(ofs % sizeof(Node) == 0); 925 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
924 if (ofs > 4095) { 926 if (ofs > 4095) {
925 idx = dest; 927 idx = dest;
926 rset_clear(allow, dest); 928 rset_clear(allow, dest);
@@ -960,20 +962,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 962 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 963}
962 964
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 965static void asm_uref(ASMState *as, IRIns *ir)
978{ 966{
979 Reg dest = ra_dest(as, ir, RSET_GPR); 967 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1001,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
1001static void asm_fref(ASMState *as, IRIns *ir) 989static void asm_fref(ASMState *as, IRIns *ir)
1002{ 990{
1003 UNUSED(as); UNUSED(ir); 991 UNUSED(as); UNUSED(ir);
1004 lua_assert(!ra_used(ir)); 992 lj_assertA(!ra_used(ir), "unfused FREF");
1005} 993}
1006 994
1007static void asm_strref(ASMState *as, IRIns *ir) 995static void asm_strref(ASMState *as, IRIns *ir)
@@ -1038,25 +1026,27 @@ static void asm_strref(ASMState *as, IRIns *ir)
1038 1026
1039/* -- Loads and stores ---------------------------------------------------- */ 1027/* -- Loads and stores ---------------------------------------------------- */
1040 1028
1041static ARMIns asm_fxloadins(IRIns *ir) 1029static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
1042{ 1030{
1031 UNUSED(as);
1043 switch (irt_type(ir->t)) { 1032 switch (irt_type(ir->t)) {
1044 case IRT_I8: return ARMI_LDRSB; 1033 case IRT_I8: return ARMI_LDRSB;
1045 case IRT_U8: return ARMI_LDRB; 1034 case IRT_U8: return ARMI_LDRB;
1046 case IRT_I16: return ARMI_LDRSH; 1035 case IRT_I16: return ARMI_LDRSH;
1047 case IRT_U16: return ARMI_LDRH; 1036 case IRT_U16: return ARMI_LDRH;
1048 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; 1037 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
1049 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ 1038 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
1050 default: return ARMI_LDR; 1039 default: return ARMI_LDR;
1051 } 1040 }
1052} 1041}
1053 1042
1054static ARMIns asm_fxstoreins(IRIns *ir) 1043static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
1055{ 1044{
1045 UNUSED(as);
1056 switch (irt_type(ir->t)) { 1046 switch (irt_type(ir->t)) {
1057 case IRT_I8: case IRT_U8: return ARMI_STRB; 1047 case IRT_I8: case IRT_U8: return ARMI_STRB;
1058 case IRT_I16: case IRT_U16: return ARMI_STRH; 1048 case IRT_I16: case IRT_U16: return ARMI_STRH;
1059 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; 1049 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
1060 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ 1050 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
1061 default: return ARMI_STR; 1051 default: return ARMI_STR;
1062 } 1052 }
@@ -1065,17 +1055,23 @@ static ARMIns asm_fxstoreins(IRIns *ir)
1065static void asm_fload(ASMState *as, IRIns *ir) 1055static void asm_fload(ASMState *as, IRIns *ir)
1066{ 1056{
1067 Reg dest = ra_dest(as, ir, RSET_GPR); 1057 Reg dest = ra_dest(as, ir, RSET_GPR);
1068 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1058 ARMIns ai = asm_fxloadins(as, ir);
1069 ARMIns ai = asm_fxloadins(ir); 1059 Reg idx;
1070 int32_t ofs; 1060 int32_t ofs;
1071 if (ir->op2 == IRFL_TAB_ARRAY) { 1061 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
1072 ofs = asm_fuseabase(as, ir->op1); 1062 idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR);
1073 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1063 ofs = 0;
1074 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1064 } else {
1075 return; 1065 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1066 if (ir->op2 == IRFL_TAB_ARRAY) {
1067 ofs = asm_fuseabase(as, ir->op1);
1068 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1069 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
1070 return;
1071 }
1076 } 1072 }
1073 ofs = field_ofs[ir->op2];
1077 } 1074 }
1078 ofs = field_ofs[ir->op2];
1079 if ((ai & 0x04000000)) 1075 if ((ai & 0x04000000))
1080 emit_lso(as, ai, dest, idx, ofs); 1076 emit_lso(as, ai, dest, idx, ofs);
1081 else 1077 else
@@ -1089,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1089 IRIns *irf = IR(ir->op1); 1085 IRIns *irf = IR(ir->op1);
1090 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 1086 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1091 int32_t ofs = field_ofs[irf->op2]; 1087 int32_t ofs = field_ofs[irf->op2];
1092 ARMIns ai = asm_fxstoreins(ir); 1088 ARMIns ai = asm_fxstoreins(as, ir);
1093 if ((ai & 0x04000000)) 1089 if ((ai & 0x04000000))
1094 emit_lso(as, ai, src, idx, ofs); 1090 emit_lso(as, ai, src, idx, ofs);
1095 else 1091 else
@@ -1101,20 +1097,22 @@ static void asm_xload(ASMState *as, IRIns *ir)
1101{ 1097{
1102 Reg dest = ra_dest(as, ir, 1098 Reg dest = ra_dest(as, ir,
1103 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1099 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1104 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1100 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1101 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
1106} 1102}
1107 1103
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1104static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1105{
1110 if (ir->r != RID_SINK) { 1106 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1107 Reg src = ra_alloc1(as, ir->op2,
1112 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1108 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1113 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1109 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
1114 rset_exclude(RSET_GPR, src), ofs); 1110 rset_exclude(RSET_GPR, src), ofs);
1115 } 1111 }
1116} 1112}
1117 1113
1114#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1115
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1116static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1117{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1118 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1127,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1127 rset_clear(allow, type); 1125 rset_clear(allow, type);
1128 } 1126 }
1129 if (ra_used(ir)) { 1127 if (ra_used(ir)) {
1130 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1128 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1131 irt_isint(ir->t) || irt_isaddr(ir->t)); 1129 irt_isint(ir->t) || irt_isaddr(ir->t),
1130 "bad load type %d", irt_type(ir->t));
1132 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1131 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
1133 rset_clear(allow, dest); 1132 rset_clear(allow, dest);
1134 } 1133 }
1135 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1134 idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
1136 (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); 1135 (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
1136 if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
1137 if (!hiop || type == RID_NONE) { 1137 if (!hiop || type == RID_NONE) {
1138 rset_clear(allow, idx); 1138 rset_clear(allow, idx);
1139 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && 1139 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
@@ -1194,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1194 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 1194 IRType t = hiop ? IRT_NUM : irt_type(ir->t);
1195 Reg dest = RID_NONE, type = RID_NONE, base; 1195 Reg dest = RID_NONE, type = RID_NONE, base;
1196 RegSet allow = RSET_GPR; 1196 RegSet allow = RSET_GPR;
1197 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1197 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1198 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1198 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1199 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1200 "inconsistent SLOAD variant");
1199#if LJ_SOFTFP 1201#if LJ_SOFTFP
1200 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ 1202 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1203 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1201 if (hiop && ra_used(ir+1)) { 1204 if (hiop && ra_used(ir+1)) {
1202 type = ra_dest(as, ir+1, allow); 1205 type = ra_dest(as, ir+1, allow);
1203 rset_clear(allow, type); 1206 rset_clear(allow, type);
@@ -1213,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1213 Reg tmp = RID_NONE; 1216 Reg tmp = RID_NONE;
1214 if ((ir->op2 & IRSLOAD_CONVERT)) 1217 if ((ir->op2 & IRSLOAD_CONVERT))
1215 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); 1218 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
1216 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1219 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1217 irt_isint(ir->t) || irt_isaddr(ir->t)); 1220 irt_isint(ir->t) || irt_isaddr(ir->t),
1221 "bad SLOAD type %d", irt_type(ir->t));
1218 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1222 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
1219 rset_clear(allow, dest); 1223 rset_clear(allow, dest);
1220 base = ra_alloc1(as, REF_BASE, allow); 1224 base = ra_alloc1(as, REF_BASE, allow);
@@ -1272,19 +1276,17 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1276static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1277{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1278 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1279 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1280 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1281 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1282 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1283 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1284 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1285 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1286 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1287 "bad CNEW/CNEWI operands");
1283 1288
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1289 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1290 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1291 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1292 ra_evictset(as, drop);
@@ -1294,10 +1296,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1294 /* Initialize immutable cdata object. */ 1296 /* Initialize immutable cdata object. */
1295 if (ir->o == IR_CNEWI) { 1297 if (ir->o == IR_CNEWI) {
1296 int32_t ofs = sizeof(GCcdata); 1298 int32_t ofs = sizeof(GCcdata);
1297 lua_assert(sz == 4 || sz == 8); 1299 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1298 if (sz == 8) { 1300 if (sz == 8) {
1299 ofs += 4; ir++; 1301 ofs += 4; ir++;
1300 lua_assert(ir->o == IR_HIOP); 1302 lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
1301 } 1303 }
1302 for (;;) { 1304 for (;;) {
1303 Reg r = ra_alloc1(as, ir->op2, allow); 1305 Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1306,22 +1308,32 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1308 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1309 ofs -= 4; ir--;
1308 } 1310 }
1311 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1312 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1313 args[0] = ASMREF_L; /* lua_State *L */
1314 args[1] = ir->op1; /* CTypeID id */
1315 args[2] = ir->op2; /* CTSize sz */
1316 args[3] = ASMREF_TMP1; /* CTSize align */
1317 asm_gencall(as, ci, args);
1318 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1319 return;
1309 } 1320 }
1321
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1322 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1323 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1324 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1325 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1326 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1327 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1328 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1329 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1330 }
1331 args[0] = ASMREF_L; /* lua_State *L */
1332 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1333 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1334 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1335 ra_releasetmp(as, ASMREF_TMP1));
1322} 1336}
1323#else
1324#define asm_cnew(as, ir) ((void)0)
1325#endif 1337#endif
1326 1338
1327/* -- Write barriers ------------------------------------------------------ */ 1339/* -- Write barriers ------------------------------------------------------ */
@@ -1353,7 +1365,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1353 MCLabel l_end; 1365 MCLabel l_end;
1354 Reg obj, val, tmp; 1366 Reg obj, val, tmp;
1355 /* No need for other object barriers (yet). */ 1367 /* No need for other object barriers (yet). */
1356 lua_assert(IR(ir->op1)->o == IR_UREFC); 1368 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1357 ra_evictset(as, RSET_SCRATCH); 1369 ra_evictset(as, RSET_SCRATCH);
1358 l_end = emit_label(as); 1370 l_end = emit_label(as);
1359 args[0] = ASMREF_TMP1; /* global_State *g */ 1371 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1392,23 +1404,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1404 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1405}
1394 1406
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1407static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1408{
1397 IRIns *irp = IR(ir->op1); 1409 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1410 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1411 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1412 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1413 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1414 ra_evictset(as, drop);
1403 IRRef args[2]; 1415 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1416 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1417 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1418 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1419 (void *)lj_vm_trunc_sf);
1408 return 1; 1420 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1421 of = as->freeset;
1410 } 1422 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1423 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1424 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1425 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1426 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1427}
1428
1429static void asm_fpmath(ASMState *as, IRIns *ir)
1430{
1431 if (ir->op2 <= IRFPM_TRUNC)
1432 asm_callround(as, ir, ir->op2);
1433 else if (ir->op2 == IRFPM_SQRT)
1434 asm_fpunary(as, ir, ARMI_VSQRT_D);
1435 else
1436 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1437}
1413#endif 1438#endif
1414 1439
@@ -1474,19 +1499,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1474 asm_intop(as, ir, asm_drop_cmp0(as, ai)); 1499 asm_intop(as, ir, asm_drop_cmp0(as, ai));
1475} 1500}
1476 1501
1477static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1478{
1479 ai = asm_drop_cmp0(as, ai);
1480 if (ir->op2 == 0) {
1481 Reg dest = ra_dest(as, ir, RSET_GPR);
1482 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1483 emit_d(as, ai^m, dest);
1484 } else {
1485 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1486 asm_intop(as, ir, ai);
1487 }
1488}
1489
1490static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1502static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1491{ 1503{
1492 Reg dest = ra_dest(as, ir, RSET_GPR); 1504 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1552,6 +1564,15 @@ static void asm_mul(ASMState *as, IRIns *ir)
1552 asm_intmul(as, ir); 1564 asm_intmul(as, ir);
1553} 1565}
1554 1566
1567#define asm_addov(as, ir) asm_add(as, ir)
1568#define asm_subov(as, ir) asm_sub(as, ir)
1569#define asm_mulov(as, ir) asm_mul(as, ir)
1570
1571#if !LJ_SOFTFP
1572#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1573#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1574#endif
1575
1555static void asm_neg(ASMState *as, IRIns *ir) 1576static void asm_neg(ASMState *as, IRIns *ir)
1556{ 1577{
1557#if !LJ_SOFTFP 1578#if !LJ_SOFTFP
@@ -1563,41 +1584,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
1563 asm_intneg(as, ir, ARMI_RSB); 1584 asm_intneg(as, ir, ARMI_RSB);
1564} 1585}
1565 1586
1566static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1587static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1567{ 1588{
1568 const CCallInfo *ci = &lj_ir_callinfo[id]; 1589 ai = asm_drop_cmp0(as, ai);
1569 IRRef args[2]; 1590 if (ir->op2 == 0) {
1570 args[0] = ir->op1; 1591 Reg dest = ra_dest(as, ir, RSET_GPR);
1571 args[1] = ir->op2; 1592 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1572 asm_setupresult(as, ir, ci); 1593 emit_d(as, ai^m, dest);
1573 asm_gencall(as, ci, args); 1594 } else {
1595 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1596 asm_intop(as, ir, ai);
1597 }
1574} 1598}
1575 1599
1576#if !LJ_SOFTFP 1600#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1577static void asm_callround(ASMState *as, IRIns *ir, int id)
1578{
1579 /* The modified regs must match with the *.dasc implementation. */
1580 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1581 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1582 RegSet of;
1583 Reg dest, src;
1584 ra_evictset(as, drop);
1585 dest = ra_dest(as, ir, RSET_FPR);
1586 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1587 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1588 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1589 (void *)lj_vm_trunc_sf);
1590 /* Workaround to protect argument GPRs from being used for remat. */
1591 of = as->freeset;
1592 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1593 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1594 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1595 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1596 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1597}
1598#endif
1599 1601
1600static void asm_bitswap(ASMState *as, IRIns *ir) 1602static void asm_bswap(ASMState *as, IRIns *ir)
1601{ 1603{
1602 Reg dest = ra_dest(as, ir, RSET_GPR); 1604 Reg dest = ra_dest(as, ir, RSET_GPR);
1603 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1605 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1614,6 +1616,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1614 } 1616 }
1615} 1617}
1616 1618
1619#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1620#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1621#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1622
1617static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1623static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1618{ 1624{
1619 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1625 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1631,6 +1637,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1631 } 1637 }
1632} 1638}
1633 1639
1640#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1641#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1642#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1643#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1644#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1645
1634static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1646static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1635{ 1647{
1636 uint32_t kcmp = 0, kmov = 0; 1648 uint32_t kcmp = 0, kmov = 0;
@@ -1704,6 +1716,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1704 asm_intmin_max(as, ir, cc); 1716 asm_intmin_max(as, ir, cc);
1705} 1717}
1706 1718
1719#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
1720#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
1721
1707/* -- Comparisons --------------------------------------------------------- */ 1722/* -- Comparisons --------------------------------------------------------- */
1708 1723
1709/* Map of comparisons to flags. ORDER IR. */ 1724/* Map of comparisons to flags. ORDER IR. */
@@ -1777,7 +1792,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
1777 Reg left; 1792 Reg left;
1778 uint32_t m; 1793 uint32_t m;
1779 int cmpprev0 = 0; 1794 int cmpprev0 = 0;
1780 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1795 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1796 "bad comparison data type %d", irt_type(ir->t));
1781 if (asm_swapops(as, lref, rref)) { 1797 if (asm_swapops(as, lref, rref)) {
1782 Reg tmp = lref; lref = rref; rref = tmp; 1798 Reg tmp = lref; lref = rref; rref = tmp;
1783 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ 1799 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
@@ -1819,6 +1835,18 @@ notst:
1819 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1835 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1820} 1836}
1821 1837
1838static void asm_comp(ASMState *as, IRIns *ir)
1839{
1840#if !LJ_SOFTFP
1841 if (irt_isnum(ir->t))
1842 asm_fpcomp(as, ir);
1843 else
1844#endif
1845 asm_intcomp(as, ir);
1846}
1847
1848#define asm_equal(as, ir) asm_comp(as, ir)
1849
1822#if LJ_HASFFI 1850#if LJ_HASFFI
1823/* 64 bit integer comparisons. */ 1851/* 64 bit integer comparisons. */
1824static void asm_int64comp(ASMState *as, IRIns *ir) 1852static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1857,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
1857} 1885}
1858#endif 1886#endif
1859 1887
1860/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1888/* -- Split register ops -------------------------------------------------- */
1861 1889
1862/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1890/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
1863static void asm_hiop(ASMState *as, IRIns *ir) 1891static void asm_hiop(ASMState *as, IRIns *ir)
1864{ 1892{
1865#if LJ_HASFFI || LJ_SOFTFP
1866 /* HIOP is marked as a store because it needs its own DCE logic. */ 1893 /* HIOP is marked as a store because it needs its own DCE logic. */
1867 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1894 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1868 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1895 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1896#if LJ_HASFFI || LJ_SOFTFP
1869 if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ 1897 if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */
1870 as->curins--; /* Always skip the loword comparison. */ 1898 as->curins--; /* Always skip the loword comparison. */
1871#if LJ_SOFTFP 1899#if LJ_SOFTFP
@@ -1882,7 +1910,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1882 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { 1910 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1883 as->curins--; /* Always skip the loword min/max. */ 1911 as->curins--; /* Always skip the loword min/max. */
1884 if (uselo || usehi) 1912 if (uselo || usehi)
1885 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); 1913 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
1886 return; 1914 return;
1887#elif LJ_HASFFI 1915#elif LJ_HASFFI
1888 } else if ((ir-1)->o == IR_CONV) { 1916 } else if ((ir-1)->o == IR_CONV) {
@@ -1893,9 +1921,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1893#endif 1921#endif
1894 } else if ((ir-1)->o == IR_XSTORE) { 1922 } else if ((ir-1)->o == IR_XSTORE) {
1895 if ((ir-1)->r != RID_SINK) 1923 if ((ir-1)->r != RID_SINK)
1896 asm_xstore(as, ir, 4); 1924 asm_xstore_(as, ir, 4);
1897 return; 1925 return;
1898 } 1926 }
1927#endif
1899 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1928 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1900 switch ((ir-1)->o) { 1929 switch ((ir-1)->o) {
1901#if LJ_HASFFI 1930#if LJ_HASFFI
@@ -1914,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1914 asm_intneg(as, ir, ARMI_RSC); 1943 asm_intneg(as, ir, ARMI_RSC);
1915 asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); 1944 asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
1916 break; 1945 break;
1946 case IR_CNEWI:
1947 /* Nothing to do here. Handled by lo op itself. */
1948 break;
1917#endif 1949#endif
1918#if LJ_SOFTFP 1950#if LJ_SOFTFP
1919 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 1951 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1921,24 +1953,26 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1921 if (!uselo) 1953 if (!uselo)
1922 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ 1954 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1923 break; 1955 break;
1956 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
1957 /* Nothing to do here. Handled by lo op itself. */
1958 break;
1924#endif 1959#endif
1925 case IR_CALLN: 1960 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
1926 case IR_CALLS:
1927 case IR_CALLXS:
1928 if (!uselo) 1961 if (!uselo)
1929 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1962 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1930 break; 1963 break;
1931#if LJ_SOFTFP 1964 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
1932 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
1933#endif
1934 case IR_CNEWI:
1935 /* Nothing to do here. Handled by lo op itself. */
1936 break;
1937 default: lua_assert(0); break;
1938 } 1965 }
1939#else 1966}
1940 UNUSED(as); UNUSED(ir); lua_assert(0); 1967
1941#endif 1968/* -- Profiling ----------------------------------------------------------- */
1969
1970static void asm_prof(ASMState *as, IRIns *ir)
1971{
1972 UNUSED(ir);
1973 asm_guardcc(as, CC_NE);
1974 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1975 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1942} 1976}
1943 1977
1944/* -- Stack handling ------------------------------------------------------ */ 1978/* -- Stack handling ------------------------------------------------------ */
@@ -1952,7 +1986,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1952 if (irp) { 1986 if (irp) {
1953 if (!ra_hasspill(irp->s)) { 1987 if (!ra_hasspill(irp->s)) {
1954 pbase = irp->r; 1988 pbase = irp->r;
1955 lua_assert(ra_hasreg(pbase)); 1989 lj_assertA(ra_hasreg(pbase), "base reg lost");
1956 } else if (allow) { 1990 } else if (allow) {
1957 pbase = rset_pickbot(allow); 1991 pbase = rset_pickbot(allow);
1958 } else { 1992 } else {
@@ -1964,13 +1998,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1964 } 1998 }
1965 emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); 1999 emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
1966 k = emit_isk12(0, (int32_t)(8*topslot)); 2000 k = emit_isk12(0, (int32_t)(8*topslot));
1967 lua_assert(k); 2001 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
1968 emit_n(as, ARMI_CMP^k, RID_TMP); 2002 emit_n(as, ARMI_CMP^k, RID_TMP);
1969 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); 2003 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
1970 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 2004 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1971 (int32_t)offsetof(lua_State, maxstack)); 2005 (int32_t)offsetof(lua_State, maxstack));
1972 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 2006 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1973 int32_t i = i32ptr(&J2G(as->J)->jit_L); 2007 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1974 if (ra_hasspill(irp->s)) 2008 if (ra_hasspill(irp->s))
1975 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 2009 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1976 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 2010 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1978,7 +2012,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1978 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 2012 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1979 emit_loadi(as, RID_TMP, (i & ~4095)); 2013 emit_loadi(as, RID_TMP, (i & ~4095));
1980 } else { 2014 } else {
1981 emit_getgl(as, RID_TMP, jit_L); 2015 emit_getgl(as, RID_TMP, cur_L);
1982 } 2016 }
1983} 2017}
1984 2018
@@ -2001,7 +2035,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2001#if LJ_SOFTFP 2035#if LJ_SOFTFP
2002 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 2036 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
2003 Reg tmp; 2037 Reg tmp;
2004 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ 2038 /* LJ_SOFTFP: must be a number constant. */
2039 lj_assertA(irref_isk(ref), "unsplit FP op");
2005 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, 2040 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
2006 rset_exclude(RSET_GPREVEN, RID_BASE)); 2041 rset_exclude(RSET_GPREVEN, RID_BASE));
2007 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); 2042 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
@@ -2015,7 +2050,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2015 } else { 2050 } else {
2016 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 2051 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
2017 Reg type; 2052 Reg type;
2018 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2053 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2054 "restore of IR type %d", irt_type(ir->t));
2019 if (!irt_ispri(ir->t)) { 2055 if (!irt_ispri(ir->t)) {
2020 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); 2056 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
2021 emit_lso(as, ARMI_STR, src, RID_BASE, ofs); 2057 emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
@@ -2028,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2028 } else if ((sn & SNAP_SOFTFPNUM)) { 2064 } else if ((sn & SNAP_SOFTFPNUM)) {
2029 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); 2065 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
2030#endif 2066#endif
2067 } else if ((sn & SNAP_KEYINDEX)) {
2068 type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
2031 } else { 2069 } else {
2032 type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); 2070 type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
2033 } 2071 }
@@ -2035,7 +2073,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2035 } 2073 }
2036 checkmclim(as); 2074 checkmclim(as);
2037 } 2075 }
2038 lua_assert(map + nent == flinks); 2076 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
2039} 2077}
2040 2078
2041/* -- GC handling --------------------------------------------------------- */ 2079/* -- GC handling --------------------------------------------------------- */
@@ -2089,15 +2127,21 @@ static void asm_loop_fixup(ASMState *as)
2089 } 2127 }
2090} 2128}
2091 2129
2130/* Fixup the tail of the loop. */
2131static void asm_loop_tail_fixup(ASMState *as)
2132{
2133 UNUSED(as); /* Nothing to do. */
2134}
2135
2092/* -- Head of trace ------------------------------------------------------- */ 2136/* -- Head of trace ------------------------------------------------------- */
2093 2137
2094/* Reload L register from g->jit_L. */ 2138/* Reload L register from g->cur_L. */
2095static void asm_head_lreg(ASMState *as) 2139static void asm_head_lreg(ASMState *as)
2096{ 2140{
2097 IRIns *ir = IR(ASMREF_L); 2141 IRIns *ir = IR(ASMREF_L);
2098 if (ra_used(ir)) { 2142 if (ra_used(ir)) {
2099 Reg r = ra_dest(as, ir, RSET_GPR); 2143 Reg r = ra_dest(as, ir, RSET_GPR);
2100 emit_getgl(as, r, jit_L); 2144 emit_getgl(as, r, cur_L);
2101 ra_evictk(as); 2145 ra_evictk(as);
2102 } 2146 }
2103} 2147}
@@ -2125,7 +2169,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2125 rset_clear(allow, ra_dest(as, ir, allow)); 2169 rset_clear(allow, ra_dest(as, ir, allow));
2126 } else { 2170 } else {
2127 Reg r = irp->r; 2171 Reg r = irp->r;
2128 lua_assert(ra_hasreg(r)); 2172 lj_assertA(ra_hasreg(r), "base reg lost");
2129 rset_clear(allow, r); 2173 rset_clear(allow, r);
2130 if (r != ir->r && !rset_test(as->freeset, r)) 2174 if (r != ir->r && !rset_test(as->freeset, r))
2131 ra_restore(as, regcost_ref(as->cost[r])); 2175 ra_restore(as, regcost_ref(as->cost[r]));
@@ -2147,7 +2191,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2147 } else { 2191 } else {
2148 /* Patch stack adjustment. */ 2192 /* Patch stack adjustment. */
2149 uint32_t k = emit_isk12(ARMI_ADD, spadj); 2193 uint32_t k = emit_isk12(ARMI_ADD, spadj);
2150 lua_assert(k); 2194 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
2151 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); 2195 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
2152 } 2196 }
2153 /* Patch exit branch. */ 2197 /* Patch exit branch. */
@@ -2168,143 +2212,13 @@ static void asm_tail_prep(ASMState *as)
2168 *p = 0; /* Prevent load/store merging. */ 2212 *p = 0; /* Prevent load/store merging. */
2169} 2213}
2170 2214
2171/* -- Instruction dispatch ------------------------------------------------ */
2172
2173/* Assemble a single instruction. */
2174static void asm_ir(ASMState *as, IRIns *ir)
2175{
2176 switch ((IROp)ir->o) {
2177 /* Miscellaneous ops. */
2178 case IR_LOOP: asm_loop(as); break;
2179 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2180 case IR_USE:
2181 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2182 case IR_PHI: asm_phi(as, ir); break;
2183 case IR_HIOP: asm_hiop(as, ir); break;
2184 case IR_GCSTEP: asm_gcstep(as, ir); break;
2185
2186 /* Guarded assertions. */
2187 case IR_EQ: case IR_NE:
2188 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2189 as->curins--;
2190 asm_href(as, ir-1, (IROp)ir->o);
2191 break;
2192 }
2193 /* fallthrough */
2194 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2195 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2196 case IR_ABC:
2197#if !LJ_SOFTFP
2198 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2199#endif
2200 asm_intcomp(as, ir);
2201 break;
2202
2203 case IR_RETF: asm_retf(as, ir); break;
2204
2205 /* Bit ops. */
2206 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2207 case IR_BSWAP: asm_bitswap(as, ir); break;
2208
2209 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2210 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2211 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2212
2213 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2214 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2215 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2216 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2217 case IR_BROL: lua_assert(0); break;
2218
2219 /* Arithmetic ops. */
2220 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2221 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2222 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2223 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2224 case IR_NEG: asm_neg(as, ir); break;
2225
2226#if LJ_SOFTFP
2227 case IR_DIV: case IR_POW: case IR_ABS:
2228 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2229 lua_assert(0); /* Unused for LJ_SOFTFP. */
2230 break;
2231#else
2232 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2233 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2234 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2235 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2236 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2237 case IR_FPMATH:
2238 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2239 break;
2240 if (ir->op2 <= IRFPM_TRUNC)
2241 asm_callround(as, ir, ir->op2);
2242 else if (ir->op2 == IRFPM_SQRT)
2243 asm_fpunary(as, ir, ARMI_VSQRT_D);
2244 else
2245 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2246 break;
2247 case IR_TOBIT: asm_tobit(as, ir); break;
2248#endif
2249
2250 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2251 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2252
2253 /* Memory references. */
2254 case IR_AREF: asm_aref(as, ir); break;
2255 case IR_HREF: asm_href(as, ir, 0); break;
2256 case IR_HREFK: asm_hrefk(as, ir); break;
2257 case IR_NEWREF: asm_newref(as, ir); break;
2258 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2259 case IR_FREF: asm_fref(as, ir); break;
2260 case IR_STRREF: asm_strref(as, ir); break;
2261
2262 /* Loads and stores. */
2263 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2264 asm_ahuvload(as, ir);
2265 break;
2266 case IR_FLOAD: asm_fload(as, ir); break;
2267 case IR_XLOAD: asm_xload(as, ir); break;
2268 case IR_SLOAD: asm_sload(as, ir); break;
2269
2270 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2271 case IR_FSTORE: asm_fstore(as, ir); break;
2272 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2273
2274 /* Allocations. */
2275 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2276 case IR_TNEW: asm_tnew(as, ir); break;
2277 case IR_TDUP: asm_tdup(as, ir); break;
2278 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2279
2280 /* Write barriers. */
2281 case IR_TBAR: asm_tbar(as, ir); break;
2282 case IR_OBAR: asm_obar(as, ir); break;
2283
2284 /* Type conversions. */
2285 case IR_CONV: asm_conv(as, ir); break;
2286 case IR_TOSTR: asm_tostr(as, ir); break;
2287 case IR_STRTO: asm_strto(as, ir); break;
2288
2289 /* Calls. */
2290 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2291 case IR_CALLXS: asm_callx(as, ir); break;
2292 case IR_CARG: break;
2293
2294 default:
2295 setintV(&as->J->errinfo, ir->o);
2296 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2297 break;
2298 }
2299}
2300
2301/* -- Trace setup --------------------------------------------------------- */ 2215/* -- Trace setup --------------------------------------------------------- */
2302 2216
2303/* Ensure there are enough stack slots for call arguments. */ 2217/* Ensure there are enough stack slots for call arguments. */
2304static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2218static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2305{ 2219{
2306 IRRef args[CCI_NARGS_MAX*2]; 2220 IRRef args[CCI_NARGS_MAX*2];
2307 uint32_t i, nargs = (int)CCI_NARGS(ci); 2221 uint32_t i, nargs = CCI_XNARGS(ci);
2308 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2222 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2309 asm_collectargs(as, ir, ci, args); 2223 asm_collectargs(as, ir, ci, args);
2310 for (i = 0; i < nargs; i++) { 2224 for (i = 0; i < nargs; i++) {
@@ -2360,7 +2274,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2360 if (!cstart) cstart = p; 2274 if (!cstart) cstart = p;
2361 } 2275 }
2362 } 2276 }
2363 lua_assert(cstart != NULL); 2277 lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
2364 lj_mcode_sync(cstart, cend); 2278 lj_mcode_sync(cstart, cend);
2365 lj_mcode_patch(J, mcarea, 1); 2279 lj_mcode_patch(J, mcarea, 1);
2366} 2280}