diff options
Diffstat (limited to 'src/lj_asm_arm.h')
-rw-r--r-- | src/lj_asm_arm.h | 656 |
1 files changed, 285 insertions, 371 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 262fa59e..ded63913 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) | |||
41 | } | 41 | } |
42 | } | 42 | } |
43 | } | 43 | } |
44 | lua_assert(rset_test(RSET_GPREVEN, r)); | 44 | lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); |
45 | ra_modified(as, r); | 45 | ra_modified(as, r); |
46 | ra_modified(as, r+1); | 46 | ra_modified(as, r+1); |
47 | RA_DBGX((as, "scratchpair $r $r", r, r+1)); | 47 | RA_DBGX((as, "scratchpair $r $r", r, r+1)); |
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, | |||
185 | *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ | 185 | *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ |
186 | return ra_allock(as, (ofs & ~255), allow); | 186 | return ra_allock(as, (ofs & ~255), allow); |
187 | } | 187 | } |
188 | } else if (ir->o == IR_TMPREF) { | ||
189 | *ofsp = 0; | ||
190 | return RID_SP; | ||
188 | } | 191 | } |
189 | } | 192 | } |
190 | *ofsp = 0; | 193 | *ofsp = 0; |
@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | |||
269 | return; | 272 | return; |
270 | } | 273 | } |
271 | } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { | 274 | } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { |
272 | lua_assert(ofs == 0); | 275 | lj_assertA(ofs == 0, "bad usage"); |
273 | ofs = (int32_t)sizeof(GCstr); | 276 | ofs = (int32_t)sizeof(GCstr); |
274 | if (irref_isk(ir->op2)) { | 277 | if (irref_isk(ir->op2)) { |
275 | ofs += IR(ir->op2)->i; | 278 | ofs += IR(ir->op2)->i; |
@@ -338,7 +341,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) | |||
338 | /* Generate a call to a C function. */ | 341 | /* Generate a call to a C function. */ |
339 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | 342 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) |
340 | { | 343 | { |
341 | uint32_t n, nargs = CCI_NARGS(ci); | 344 | uint32_t n, nargs = CCI_XNARGS(ci); |
342 | int32_t ofs = 0; | 345 | int32_t ofs = 0; |
343 | #if LJ_SOFTFP | 346 | #if LJ_SOFTFP |
344 | Reg gpr = REGARG_FIRSTGPR; | 347 | Reg gpr = REGARG_FIRSTGPR; |
@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
389 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); | 392 | as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); |
390 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; | 393 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; |
391 | if (gpr <= REGARG_LASTGPR) { | 394 | if (gpr <= REGARG_LASTGPR) { |
392 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | 395 | lj_assertA(rset_test(as->freeset, gpr), |
396 | "reg %d not free", gpr); /* Must have been evicted. */ | ||
393 | if (irt_isnum(ir->t)) { | 397 | if (irt_isnum(ir->t)) { |
394 | lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ | 398 | lj_assertA(rset_test(as->freeset, gpr+1), |
399 | "reg %d not free", gpr+1); /* Ditto. */ | ||
395 | emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); | 400 | emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); |
396 | gpr += 2; | 401 | gpr += 2; |
397 | } else { | 402 | } else { |
@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
408 | #endif | 413 | #endif |
409 | { | 414 | { |
410 | if (gpr <= REGARG_LASTGPR) { | 415 | if (gpr <= REGARG_LASTGPR) { |
411 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | 416 | lj_assertA(rset_test(as->freeset, gpr), |
417 | "reg %d not free", gpr); /* Must have been evicted. */ | ||
412 | if (ref) ra_leftov(as, gpr, ref); | 418 | if (ref) ra_leftov(as, gpr, ref); |
413 | gpr++; | 419 | gpr++; |
414 | } else { | 420 | } else { |
@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
433 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | 439 | rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ |
434 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 440 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
435 | if (ra_used(ir)) { | 441 | if (ra_used(ir)) { |
436 | lua_assert(!irt_ispri(ir->t)); | 442 | lj_assertA(!irt_ispri(ir->t), "PRI dest"); |
437 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { | 443 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
438 | if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { | 444 | if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { |
439 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); | 445 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); |
@@ -453,15 +459,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
453 | UNUSED(ci); | 459 | UNUSED(ci); |
454 | } | 460 | } |
455 | 461 | ||
456 | static void asm_call(ASMState *as, IRIns *ir) | ||
457 | { | ||
458 | IRRef args[CCI_NARGS_MAX]; | ||
459 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
460 | asm_collectargs(as, ir, ci, args); | ||
461 | asm_setupresult(as, ir, ci); | ||
462 | asm_gencall(as, ci, args); | ||
463 | } | ||
464 | |||
465 | static void asm_callx(ASMState *as, IRIns *ir) | 462 | static void asm_callx(ASMState *as, IRIns *ir) |
466 | { | 463 | { |
467 | IRRef args[CCI_NARGS_MAX*2]; | 464 | IRRef args[CCI_NARGS_MAX*2]; |
@@ -490,7 +487,7 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
490 | { | 487 | { |
491 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | 488 | Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); |
492 | void *pc = ir_kptr(IR(ir->op2)); | 489 | void *pc = ir_kptr(IR(ir->op2)); |
493 | int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); | 490 | int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); |
494 | as->topslot -= (BCReg)delta; | 491 | as->topslot -= (BCReg)delta; |
495 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 492 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
496 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 493 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
@@ -504,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
504 | emit_lso(as, ARMI_LDR, RID_TMP, base, -4); | 501 | emit_lso(as, ARMI_LDR, RID_TMP, base, -4); |
505 | } | 502 | } |
506 | 503 | ||
504 | /* -- Buffer operations --------------------------------------------------- */ | ||
505 | |||
506 | #if LJ_HASBUFFER | ||
507 | static void asm_bufhdr_write(ASMState *as, Reg sb) | ||
508 | { | ||
509 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
510 | IRIns irgc; | ||
511 | int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); | ||
512 | irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | ||
513 | emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | ||
514 | if ((as->flags & JIT_F_ARMV6T2)) { | ||
515 | emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); | ||
516 | } else { | ||
517 | emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); | ||
518 | emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp); | ||
519 | } | ||
520 | emit_lso(as, ARMI_LDR, RID_TMP, | ||
521 | ra_allock(as, (addr & ~4095), | ||
522 | rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), | ||
523 | (addr & 4095)); | ||
524 | emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | ||
525 | } | ||
526 | #endif | ||
527 | |||
507 | /* -- Type conversions ---------------------------------------------------- */ | 528 | /* -- Type conversions ---------------------------------------------------- */ |
508 | 529 | ||
509 | #if !LJ_SOFTFP | 530 | #if !LJ_SOFTFP |
@@ -539,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
539 | #endif | 560 | #endif |
540 | IRRef lref = ir->op1; | 561 | IRRef lref = ir->op1; |
541 | /* 64 bit integer conversions are handled by SPLIT. */ | 562 | /* 64 bit integer conversions are handled by SPLIT. */ |
542 | lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); | 563 | lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), |
564 | "IR %04d has unsplit 64 bit type", | ||
565 | (int)(ir - as->ir) - REF_BIAS); | ||
543 | #if LJ_SOFTFP | 566 | #if LJ_SOFTFP |
544 | /* FP conversions are handled by SPLIT. */ | 567 | /* FP conversions are handled by SPLIT. */ |
545 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); | 568 | lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), |
569 | "IR %04d has FP type", | ||
570 | (int)(ir - as->ir) - REF_BIAS); | ||
546 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | 571 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ |
547 | #else | 572 | #else |
548 | lua_assert(irt_type(ir->t) != st); | 573 | lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); |
549 | if (irt_isfp(ir->t)) { | 574 | if (irt_isfp(ir->t)) { |
550 | Reg dest = ra_dest(as, ir, RSET_FPR); | 575 | Reg dest = ra_dest(as, ir, RSET_FPR); |
551 | if (stfp) { /* FP to FP conversion. */ | 576 | if (stfp) { /* FP to FP conversion. */ |
@@ -562,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
562 | } else if (stfp) { /* FP to integer conversion. */ | 587 | } else if (stfp) { /* FP to integer conversion. */ |
563 | if (irt_isguard(ir->t)) { | 588 | if (irt_isguard(ir->t)) { |
564 | /* Checked conversions are only supported from number to int. */ | 589 | /* Checked conversions are only supported from number to int. */ |
565 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | 590 | lj_assertA(irt_isint(ir->t) && st == IRT_NUM, |
591 | "bad type for checked CONV"); | ||
566 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | 592 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); |
567 | } else { | 593 | } else { |
568 | Reg left = ra_alloc1(as, lref, RSET_FPR); | 594 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
@@ -581,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
581 | Reg dest = ra_dest(as, ir, RSET_GPR); | 607 | Reg dest = ra_dest(as, ir, RSET_GPR); |
582 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 608 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ |
583 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 609 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
584 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 610 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); |
585 | if ((as->flags & JIT_F_ARMV6)) { | 611 | if ((as->flags & JIT_F_ARMV6)) { |
586 | ARMIns ai = st == IRT_I8 ? ARMI_SXTB : | 612 | ARMIns ai = st == IRT_I8 ? ARMI_SXTB : |
587 | st == IRT_U8 ? ARMI_UXTB : | 613 | st == IRT_U8 ? ARMI_UXTB : |
@@ -601,31 +627,6 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
601 | } | 627 | } |
602 | } | 628 | } |
603 | 629 | ||
604 | #if !LJ_SOFTFP && LJ_HASFFI | ||
605 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
606 | { | ||
607 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
608 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
609 | IRCallID id; | ||
610 | CCallInfo ci; | ||
611 | IRRef args[2]; | ||
612 | args[0] = (ir-1)->op1; | ||
613 | args[1] = ir->op1; | ||
614 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
615 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
616 | ir--; | ||
617 | } else { | ||
618 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
619 | } | ||
620 | ci = lj_ir_callinfo[id]; | ||
621 | #if !LJ_ABI_SOFTFP | ||
622 | ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
623 | #endif | ||
624 | asm_setupresult(as, ir, &ci); | ||
625 | asm_gencall(as, &ci, args); | ||
626 | } | ||
627 | #endif | ||
628 | |||
629 | static void asm_strto(ASMState *as, IRIns *ir) | 630 | static void asm_strto(ASMState *as, IRIns *ir) |
630 | { | 631 | { |
631 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | 632 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; |
@@ -689,60 +690,61 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
689 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); | 690 | emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); |
690 | } | 691 | } |
691 | 692 | ||
693 | /* -- Memory references --------------------------------------------------- */ | ||
694 | |||
692 | /* Get pointer to TValue. */ | 695 | /* Get pointer to TValue. */ |
693 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 696 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) |
694 | { | 697 | { |
695 | IRIns *ir = IR(ref); | 698 | if ((mode & IRTMPREF_IN1)) { |
696 | if (irt_isnum(ir->t)) { | 699 | IRIns *ir = IR(ref); |
697 | if (irref_isk(ref)) { | 700 | if (irt_isnum(ir->t)) { |
698 | /* Use the number constant itself as a TValue. */ | 701 | if ((mode & IRTMPREF_OUT1)) { |
699 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 702 | #if LJ_SOFTFP |
700 | } else { | 703 | lj_assertA(irref_isk(ref), "unsplit FP op"); |
704 | emit_dm(as, ARMI_MOV, dest, RID_SP); | ||
705 | emit_lso(as, ARMI_STR, | ||
706 | ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), | ||
707 | RID_SP, 0); | ||
708 | emit_lso(as, ARMI_STR, | ||
709 | ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), | ||
710 | RID_SP, 4); | ||
711 | #else | ||
712 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
713 | emit_dm(as, ARMI_MOV, dest, RID_SP); | ||
714 | emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); | ||
715 | #endif | ||
716 | } else if (irref_isk(ref)) { | ||
717 | /* Use the number constant itself as a TValue. */ | ||
718 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | ||
719 | } else { | ||
701 | #if LJ_SOFTFP | 720 | #if LJ_SOFTFP |
702 | lua_assert(0); | 721 | lj_assertA(0, "unsplit FP op"); |
703 | #else | 722 | #else |
704 | /* Otherwise force a spill and use the spill slot. */ | 723 | /* Otherwise force a spill and use the spill slot. */ |
705 | emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); | 724 | emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); |
706 | #endif | 725 | #endif |
726 | } | ||
727 | } else { | ||
728 | /* Otherwise use [sp] and [sp+4] to hold the TValue. | ||
729 | ** This assumes the following call has max. 4 args. | ||
730 | */ | ||
731 | Reg type; | ||
732 | emit_dm(as, ARMI_MOV, dest, RID_SP); | ||
733 | if (!irt_ispri(ir->t)) { | ||
734 | Reg src = ra_alloc1(as, ref, RSET_GPR); | ||
735 | emit_lso(as, ARMI_STR, src, RID_SP, 0); | ||
736 | } | ||
737 | if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) | ||
738 | type = ra_alloc1(as, ref+1, RSET_GPR); | ||
739 | else | ||
740 | type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); | ||
741 | emit_lso(as, ARMI_STR, type, RID_SP, 4); | ||
707 | } | 742 | } |
708 | } else { | 743 | } else { |
709 | /* Otherwise use [sp] and [sp+4] to hold the TValue. */ | ||
710 | RegSet allow = rset_exclude(RSET_GPR, dest); | ||
711 | Reg type; | ||
712 | emit_dm(as, ARMI_MOV, dest, RID_SP); | 744 | emit_dm(as, ARMI_MOV, dest, RID_SP); |
713 | if (!irt_ispri(ir->t)) { | ||
714 | Reg src = ra_alloc1(as, ref, allow); | ||
715 | emit_lso(as, ARMI_STR, src, RID_SP, 0); | ||
716 | } | ||
717 | if ((ir+1)->o == IR_HIOP) | ||
718 | type = ra_alloc1(as, ref+1, allow); | ||
719 | else | ||
720 | type = ra_allock(as, irt_toitype(ir->t), allow); | ||
721 | emit_lso(as, ARMI_STR, type, RID_SP, 4); | ||
722 | } | 745 | } |
723 | } | 746 | } |
724 | 747 | ||
725 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
726 | { | ||
727 | IRRef args[2]; | ||
728 | args[0] = ASMREF_L; | ||
729 | as->gcsteps++; | ||
730 | if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { | ||
731 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; | ||
732 | args[1] = ASMREF_TMP1; /* const lua_Number * */ | ||
733 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
734 | asm_gencall(as, ci, args); | ||
735 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
736 | } else { | ||
737 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; | ||
738 | args[1] = ir->op1; /* int32_t k */ | ||
739 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
740 | asm_gencall(as, ci, args); | ||
741 | } | ||
742 | } | ||
743 | |||
744 | /* -- Memory references --------------------------------------------------- */ | ||
745 | |||
746 | static void asm_aref(ASMState *as, IRIns *ir) | 748 | static void asm_aref(ASMState *as, IRIns *ir) |
747 | { | 749 | { |
748 | Reg dest = ra_dest(as, ir, RSET_GPR); | 750 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -864,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
864 | *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); | 866 | *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); |
865 | 867 | ||
866 | /* Load main position relative to tab->node into dest. */ | 868 | /* Load main position relative to tab->node into dest. */ |
867 | khash = irref_isk(refkey) ? ir_khash(irkey) : 1; | 869 | khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; |
868 | if (khash == 0) { | 870 | if (khash == 0) { |
869 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); | 871 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); |
870 | } else { | 872 | } else { |
871 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); | 873 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); |
872 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); | 874 | emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); |
873 | if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ | 875 | if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */ |
874 | emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); | 876 | emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); |
875 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); | 877 | emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); |
876 | emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); | 878 | emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid)); |
877 | emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); | 879 | emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); |
878 | } else if (irref_isk(refkey)) { | 880 | } else if (irref_isk(refkey)) { |
879 | emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, | 881 | emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, |
@@ -920,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
920 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | 922 | Reg node = ra_alloc1(as, ir->op1, RSET_GPR); |
921 | Reg key = RID_NONE, type = RID_TMP, idx = node; | 923 | Reg key = RID_NONE, type = RID_TMP, idx = node; |
922 | RegSet allow = rset_exclude(RSET_GPR, node); | 924 | RegSet allow = rset_exclude(RSET_GPR, node); |
923 | lua_assert(ofs % sizeof(Node) == 0); | 925 | lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); |
924 | if (ofs > 4095) { | 926 | if (ofs > 4095) { |
925 | idx = dest; | 927 | idx = dest; |
926 | rset_clear(allow, dest); | 928 | rset_clear(allow, dest); |
@@ -960,20 +962,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
960 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); | 962 | emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); |
961 | } | 963 | } |
962 | 964 | ||
963 | static void asm_newref(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
966 | IRRef args[3]; | ||
967 | if (ir->r == RID_SINK) | ||
968 | return; | ||
969 | args[0] = ASMREF_L; /* lua_State *L */ | ||
970 | args[1] = ir->op1; /* GCtab *t */ | ||
971 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
972 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
973 | asm_gencall(as, ci, args); | ||
974 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
975 | } | ||
976 | |||
977 | static void asm_uref(ASMState *as, IRIns *ir) | 965 | static void asm_uref(ASMState *as, IRIns *ir) |
978 | { | 966 | { |
979 | Reg dest = ra_dest(as, ir, RSET_GPR); | 967 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1001,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir) | |||
1001 | static void asm_fref(ASMState *as, IRIns *ir) | 989 | static void asm_fref(ASMState *as, IRIns *ir) |
1002 | { | 990 | { |
1003 | UNUSED(as); UNUSED(ir); | 991 | UNUSED(as); UNUSED(ir); |
1004 | lua_assert(!ra_used(ir)); | 992 | lj_assertA(!ra_used(ir), "unfused FREF"); |
1005 | } | 993 | } |
1006 | 994 | ||
1007 | static void asm_strref(ASMState *as, IRIns *ir) | 995 | static void asm_strref(ASMState *as, IRIns *ir) |
@@ -1038,25 +1026,27 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
1038 | 1026 | ||
1039 | /* -- Loads and stores ---------------------------------------------------- */ | 1027 | /* -- Loads and stores ---------------------------------------------------- */ |
1040 | 1028 | ||
1041 | static ARMIns asm_fxloadins(IRIns *ir) | 1029 | static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) |
1042 | { | 1030 | { |
1031 | UNUSED(as); | ||
1043 | switch (irt_type(ir->t)) { | 1032 | switch (irt_type(ir->t)) { |
1044 | case IRT_I8: return ARMI_LDRSB; | 1033 | case IRT_I8: return ARMI_LDRSB; |
1045 | case IRT_U8: return ARMI_LDRB; | 1034 | case IRT_U8: return ARMI_LDRB; |
1046 | case IRT_I16: return ARMI_LDRSH; | 1035 | case IRT_I16: return ARMI_LDRSH; |
1047 | case IRT_U16: return ARMI_LDRH; | 1036 | case IRT_U16: return ARMI_LDRH; |
1048 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; | 1037 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; |
1049 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ | 1038 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ |
1050 | default: return ARMI_LDR; | 1039 | default: return ARMI_LDR; |
1051 | } | 1040 | } |
1052 | } | 1041 | } |
1053 | 1042 | ||
1054 | static ARMIns asm_fxstoreins(IRIns *ir) | 1043 | static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) |
1055 | { | 1044 | { |
1045 | UNUSED(as); | ||
1056 | switch (irt_type(ir->t)) { | 1046 | switch (irt_type(ir->t)) { |
1057 | case IRT_I8: case IRT_U8: return ARMI_STRB; | 1047 | case IRT_I8: case IRT_U8: return ARMI_STRB; |
1058 | case IRT_I16: case IRT_U16: return ARMI_STRH; | 1048 | case IRT_I16: case IRT_U16: return ARMI_STRH; |
1059 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; | 1049 | case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; |
1060 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ | 1050 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ |
1061 | default: return ARMI_STR; | 1051 | default: return ARMI_STR; |
1062 | } | 1052 | } |
@@ -1065,17 +1055,23 @@ static ARMIns asm_fxstoreins(IRIns *ir) | |||
1065 | static void asm_fload(ASMState *as, IRIns *ir) | 1055 | static void asm_fload(ASMState *as, IRIns *ir) |
1066 | { | 1056 | { |
1067 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1057 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1068 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | 1058 | ARMIns ai = asm_fxloadins(as, ir); |
1069 | ARMIns ai = asm_fxloadins(ir); | 1059 | Reg idx; |
1070 | int32_t ofs; | 1060 | int32_t ofs; |
1071 | if (ir->op2 == IRFL_TAB_ARRAY) { | 1061 | if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ |
1072 | ofs = asm_fuseabase(as, ir->op1); | 1062 | idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); |
1073 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | 1063 | ofs = 0; |
1074 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | 1064 | } else { |
1075 | return; | 1065 | idx = ra_alloc1(as, ir->op1, RSET_GPR); |
1066 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
1067 | ofs = asm_fuseabase(as, ir->op1); | ||
1068 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
1069 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
1070 | return; | ||
1071 | } | ||
1076 | } | 1072 | } |
1073 | ofs = field_ofs[ir->op2]; | ||
1077 | } | 1074 | } |
1078 | ofs = field_ofs[ir->op2]; | ||
1079 | if ((ai & 0x04000000)) | 1075 | if ((ai & 0x04000000)) |
1080 | emit_lso(as, ai, dest, idx, ofs); | 1076 | emit_lso(as, ai, dest, idx, ofs); |
1081 | else | 1077 | else |
@@ -1089,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
1089 | IRIns *irf = IR(ir->op1); | 1085 | IRIns *irf = IR(ir->op1); |
1090 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 1086 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
1091 | int32_t ofs = field_ofs[irf->op2]; | 1087 | int32_t ofs = field_ofs[irf->op2]; |
1092 | ARMIns ai = asm_fxstoreins(ir); | 1088 | ARMIns ai = asm_fxstoreins(as, ir); |
1093 | if ((ai & 0x04000000)) | 1089 | if ((ai & 0x04000000)) |
1094 | emit_lso(as, ai, src, idx, ofs); | 1090 | emit_lso(as, ai, src, idx, ofs); |
1095 | else | 1091 | else |
@@ -1101,20 +1097,22 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
1101 | { | 1097 | { |
1102 | Reg dest = ra_dest(as, ir, | 1098 | Reg dest = ra_dest(as, ir, |
1103 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | 1099 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
1104 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 1100 | lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); |
1105 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1101 | asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); |
1106 | } | 1102 | } |
1107 | 1103 | ||
1108 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1104 | static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) |
1109 | { | 1105 | { |
1110 | if (ir->r != RID_SINK) { | 1106 | if (ir->r != RID_SINK) { |
1111 | Reg src = ra_alloc1(as, ir->op2, | 1107 | Reg src = ra_alloc1(as, ir->op2, |
1112 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | 1108 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); |
1113 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 1109 | asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, |
1114 | rset_exclude(RSET_GPR, src), ofs); | 1110 | rset_exclude(RSET_GPR, src), ofs); |
1115 | } | 1111 | } |
1116 | } | 1112 | } |
1117 | 1113 | ||
1114 | #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | ||
1115 | |||
1118 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1116 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
1119 | { | 1117 | { |
1120 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | 1118 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
@@ -1127,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1127 | rset_clear(allow, type); | 1125 | rset_clear(allow, type); |
1128 | } | 1126 | } |
1129 | if (ra_used(ir)) { | 1127 | if (ra_used(ir)) { |
1130 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || | 1128 | lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
1131 | irt_isint(ir->t) || irt_isaddr(ir->t)); | 1129 | irt_isint(ir->t) || irt_isaddr(ir->t), |
1130 | "bad load type %d", irt_type(ir->t)); | ||
1132 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); | 1131 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); |
1133 | rset_clear(allow, dest); | 1132 | rset_clear(allow, dest); |
1134 | } | 1133 | } |
1135 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, | 1134 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, |
1136 | (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); | 1135 | (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); |
1136 | if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; | ||
1137 | if (!hiop || type == RID_NONE) { | 1137 | if (!hiop || type == RID_NONE) { |
1138 | rset_clear(allow, idx); | 1138 | rset_clear(allow, idx); |
1139 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && | 1139 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && |
@@ -1194,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1194 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); | 1194 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); |
1195 | Reg dest = RID_NONE, type = RID_NONE, base; | 1195 | Reg dest = RID_NONE, type = RID_NONE, base; |
1196 | RegSet allow = RSET_GPR; | 1196 | RegSet allow = RSET_GPR; |
1197 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1197 | lj_assertA(!(ir->op2 & IRSLOAD_PARENT), |
1198 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1198 | "bad parent SLOAD"); /* Handled by asm_head_side(). */ |
1199 | lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | ||
1200 | "inconsistent SLOAD variant"); | ||
1199 | #if LJ_SOFTFP | 1201 | #if LJ_SOFTFP |
1200 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ | 1202 | lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), |
1203 | "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ | ||
1201 | if (hiop && ra_used(ir+1)) { | 1204 | if (hiop && ra_used(ir+1)) { |
1202 | type = ra_dest(as, ir+1, allow); | 1205 | type = ra_dest(as, ir+1, allow); |
1203 | rset_clear(allow, type); | 1206 | rset_clear(allow, type); |
@@ -1213,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1213 | Reg tmp = RID_NONE; | 1216 | Reg tmp = RID_NONE; |
1214 | if ((ir->op2 & IRSLOAD_CONVERT)) | 1217 | if ((ir->op2 & IRSLOAD_CONVERT)) |
1215 | tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); | 1218 | tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); |
1216 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || | 1219 | lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
1217 | irt_isint(ir->t) || irt_isaddr(ir->t)); | 1220 | irt_isint(ir->t) || irt_isaddr(ir->t), |
1221 | "bad SLOAD type %d", irt_type(ir->t)); | ||
1218 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); | 1222 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); |
1219 | rset_clear(allow, dest); | 1223 | rset_clear(allow, dest); |
1220 | base = ra_alloc1(as, REF_BASE, allow); | 1224 | base = ra_alloc1(as, REF_BASE, allow); |
@@ -1272,19 +1276,17 @@ dotypecheck: | |||
1272 | static void asm_cnew(ASMState *as, IRIns *ir) | 1276 | static void asm_cnew(ASMState *as, IRIns *ir) |
1273 | { | 1277 | { |
1274 | CTState *cts = ctype_ctsG(J2G(as->J)); | 1278 | CTState *cts = ctype_ctsG(J2G(as->J)); |
1275 | CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; | 1279 | CTypeID id = (CTypeID)IR(ir->op1)->i; |
1276 | CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? | 1280 | CTSize sz; |
1277 | lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; | 1281 | CTInfo info = lj_ctype_info(cts, id, &sz); |
1278 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | 1282 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; |
1279 | IRRef args[2]; | 1283 | IRRef args[4]; |
1280 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | 1284 | RegSet allow = (RSET_GPR & ~RSET_SCRATCH); |
1281 | RegSet drop = RSET_SCRATCH; | 1285 | RegSet drop = RSET_SCRATCH; |
1282 | lua_assert(sz != CTSIZE_INVALID); | 1286 | lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), |
1287 | "bad CNEW/CNEWI operands"); | ||
1283 | 1288 | ||
1284 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1285 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1286 | as->gcsteps++; | 1289 | as->gcsteps++; |
1287 | |||
1288 | if (ra_hasreg(ir->r)) | 1290 | if (ra_hasreg(ir->r)) |
1289 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | 1291 | rset_clear(drop, ir->r); /* Dest reg handled below. */ |
1290 | ra_evictset(as, drop); | 1292 | ra_evictset(as, drop); |
@@ -1294,10 +1296,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1294 | /* Initialize immutable cdata object. */ | 1296 | /* Initialize immutable cdata object. */ |
1295 | if (ir->o == IR_CNEWI) { | 1297 | if (ir->o == IR_CNEWI) { |
1296 | int32_t ofs = sizeof(GCcdata); | 1298 | int32_t ofs = sizeof(GCcdata); |
1297 | lua_assert(sz == 4 || sz == 8); | 1299 | lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); |
1298 | if (sz == 8) { | 1300 | if (sz == 8) { |
1299 | ofs += 4; ir++; | 1301 | ofs += 4; ir++; |
1300 | lua_assert(ir->o == IR_HIOP); | 1302 | lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); |
1301 | } | 1303 | } |
1302 | for (;;) { | 1304 | for (;;) { |
1303 | Reg r = ra_alloc1(as, ir->op2, allow); | 1305 | Reg r = ra_alloc1(as, ir->op2, allow); |
@@ -1306,22 +1308,32 @@ static void asm_cnew(ASMState *as, IRIns *ir) | |||
1306 | if (ofs == sizeof(GCcdata)) break; | 1308 | if (ofs == sizeof(GCcdata)) break; |
1307 | ofs -= 4; ir--; | 1309 | ofs -= 4; ir--; |
1308 | } | 1310 | } |
1311 | } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | ||
1312 | ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | ||
1313 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1314 | args[1] = ir->op1; /* CTypeID id */ | ||
1315 | args[2] = ir->op2; /* CTSize sz */ | ||
1316 | args[3] = ASMREF_TMP1; /* CTSize align */ | ||
1317 | asm_gencall(as, ci, args); | ||
1318 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | ||
1319 | return; | ||
1309 | } | 1320 | } |
1321 | |||
1310 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | 1322 | /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ |
1311 | { | 1323 | { |
1312 | uint32_t k = emit_isk12(ARMI_MOV, ctypeid); | 1324 | uint32_t k = emit_isk12(ARMI_MOV, id); |
1313 | Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); | 1325 | Reg r = k ? RID_R1 : ra_allock(as, id, allow); |
1314 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); | 1326 | emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); |
1315 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); | 1327 | emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); |
1316 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); | 1328 | emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); |
1317 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); | 1329 | if (k) emit_d(as, ARMI_MOV^k, RID_R1); |
1318 | } | 1330 | } |
1331 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1332 | args[1] = ASMREF_TMP1; /* MSize size */ | ||
1319 | asm_gencall(as, ci, args); | 1333 | asm_gencall(as, ci, args); |
1320 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | 1334 | ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), |
1321 | ra_releasetmp(as, ASMREF_TMP1)); | 1335 | ra_releasetmp(as, ASMREF_TMP1)); |
1322 | } | 1336 | } |
1323 | #else | ||
1324 | #define asm_cnew(as, ir) ((void)0) | ||
1325 | #endif | 1337 | #endif |
1326 | 1338 | ||
1327 | /* -- Write barriers ------------------------------------------------------ */ | 1339 | /* -- Write barriers ------------------------------------------------------ */ |
@@ -1353,7 +1365,7 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
1353 | MCLabel l_end; | 1365 | MCLabel l_end; |
1354 | Reg obj, val, tmp; | 1366 | Reg obj, val, tmp; |
1355 | /* No need for other object barriers (yet). */ | 1367 | /* No need for other object barriers (yet). */ |
1356 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1368 | lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); |
1357 | ra_evictset(as, RSET_SCRATCH); | 1369 | ra_evictset(as, RSET_SCRATCH); |
1358 | l_end = emit_label(as); | 1370 | l_end = emit_label(as); |
1359 | args[0] = ASMREF_TMP1; /* global_State *g */ | 1371 | args[0] = ASMREF_TMP1; /* global_State *g */ |
@@ -1392,23 +1404,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) | |||
1392 | emit_dm(as, ai, (dest & 15), (left & 15)); | 1404 | emit_dm(as, ai, (dest & 15), (left & 15)); |
1393 | } | 1405 | } |
1394 | 1406 | ||
1395 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | 1407 | static void asm_callround(ASMState *as, IRIns *ir, int id) |
1396 | { | 1408 | { |
1397 | IRIns *irp = IR(ir->op1); | 1409 | /* The modified regs must match with the *.dasc implementation. */ |
1398 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | 1410 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| |
1399 | IRIns *irpp = IR(irp->op1); | 1411 | RID2RSET(RID_R3)|RID2RSET(RID_R12); |
1400 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | 1412 | RegSet of; |
1401 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | 1413 | Reg dest, src; |
1402 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | 1414 | ra_evictset(as, drop); |
1403 | IRRef args[2]; | 1415 | dest = ra_dest(as, ir, RSET_FPR); |
1404 | args[0] = irpp->op1; | 1416 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); |
1405 | args[1] = irp->op2; | 1417 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : |
1406 | asm_setupresult(as, ir, ci); | 1418 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : |
1407 | asm_gencall(as, ci, args); | 1419 | (void *)lj_vm_trunc_sf); |
1408 | return 1; | 1420 | /* Workaround to protect argument GPRs from being used for remat. */ |
1409 | } | 1421 | of = as->freeset; |
1410 | } | 1422 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); |
1411 | return 0; | 1423 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); |
1424 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1425 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1426 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1427 | } | ||
1428 | |||
1429 | static void asm_fpmath(ASMState *as, IRIns *ir) | ||
1430 | { | ||
1431 | if (ir->op2 <= IRFPM_TRUNC) | ||
1432 | asm_callround(as, ir, ir->op2); | ||
1433 | else if (ir->op2 == IRFPM_SQRT) | ||
1434 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
1435 | else | ||
1436 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
1412 | } | 1437 | } |
1413 | #endif | 1438 | #endif |
1414 | 1439 | ||
@@ -1474,19 +1499,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) | |||
1474 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); | 1499 | asm_intop(as, ir, asm_drop_cmp0(as, ai)); |
1475 | } | 1500 | } |
1476 | 1501 | ||
1477 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) | ||
1478 | { | ||
1479 | ai = asm_drop_cmp0(as, ai); | ||
1480 | if (ir->op2 == 0) { | ||
1481 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1482 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); | ||
1483 | emit_d(as, ai^m, dest); | ||
1484 | } else { | ||
1485 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1486 | asm_intop(as, ir, ai); | ||
1487 | } | ||
1488 | } | ||
1489 | |||
1490 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) | 1502 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) |
1491 | { | 1503 | { |
1492 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1504 | Reg dest = ra_dest(as, ir, RSET_GPR); |
@@ -1552,6 +1564,15 @@ static void asm_mul(ASMState *as, IRIns *ir) | |||
1552 | asm_intmul(as, ir); | 1564 | asm_intmul(as, ir); |
1553 | } | 1565 | } |
1554 | 1566 | ||
1567 | #define asm_addov(as, ir) asm_add(as, ir) | ||
1568 | #define asm_subov(as, ir) asm_sub(as, ir) | ||
1569 | #define asm_mulov(as, ir) asm_mul(as, ir) | ||
1570 | |||
1571 | #if !LJ_SOFTFP | ||
1572 | #define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) | ||
1573 | #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) | ||
1574 | #endif | ||
1575 | |||
1555 | static void asm_neg(ASMState *as, IRIns *ir) | 1576 | static void asm_neg(ASMState *as, IRIns *ir) |
1556 | { | 1577 | { |
1557 | #if !LJ_SOFTFP | 1578 | #if !LJ_SOFTFP |
@@ -1563,41 +1584,22 @@ static void asm_neg(ASMState *as, IRIns *ir) | |||
1563 | asm_intneg(as, ir, ARMI_RSB); | 1584 | asm_intneg(as, ir, ARMI_RSB); |
1564 | } | 1585 | } |
1565 | 1586 | ||
1566 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | 1587 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) |
1567 | { | 1588 | { |
1568 | const CCallInfo *ci = &lj_ir_callinfo[id]; | 1589 | ai = asm_drop_cmp0(as, ai); |
1569 | IRRef args[2]; | 1590 | if (ir->op2 == 0) { |
1570 | args[0] = ir->op1; | 1591 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1571 | args[1] = ir->op2; | 1592 | uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); |
1572 | asm_setupresult(as, ir, ci); | 1593 | emit_d(as, ai^m, dest); |
1573 | asm_gencall(as, ci, args); | 1594 | } else { |
1595 | /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ | ||
1596 | asm_intop(as, ir, ai); | ||
1597 | } | ||
1574 | } | 1598 | } |
1575 | 1599 | ||
1576 | #if !LJ_SOFTFP | 1600 | #define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) |
1577 | static void asm_callround(ASMState *as, IRIns *ir, int id) | ||
1578 | { | ||
1579 | /* The modified regs must match with the *.dasc implementation. */ | ||
1580 | RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| | ||
1581 | RID2RSET(RID_R3)|RID2RSET(RID_R12); | ||
1582 | RegSet of; | ||
1583 | Reg dest, src; | ||
1584 | ra_evictset(as, drop); | ||
1585 | dest = ra_dest(as, ir, RSET_FPR); | ||
1586 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); | ||
1587 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : | ||
1588 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : | ||
1589 | (void *)lj_vm_trunc_sf); | ||
1590 | /* Workaround to protect argument GPRs from being used for remat. */ | ||
1591 | of = as->freeset; | ||
1592 | as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); | ||
1593 | as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); | ||
1594 | src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ | ||
1595 | as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); | ||
1596 | emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); | ||
1597 | } | ||
1598 | #endif | ||
1599 | 1601 | ||
1600 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1602 | static void asm_bswap(ASMState *as, IRIns *ir) |
1601 | { | 1603 | { |
1602 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1604 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1603 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 1605 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); |
@@ -1614,6 +1616,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) | |||
1614 | } | 1616 | } |
1615 | } | 1617 | } |
1616 | 1618 | ||
1619 | #define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) | ||
1620 | #define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) | ||
1621 | #define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) | ||
1622 | |||
1617 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | 1623 | static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) |
1618 | { | 1624 | { |
1619 | if (irref_isk(ir->op2)) { /* Constant shifts. */ | 1625 | if (irref_isk(ir->op2)) { /* Constant shifts. */ |
@@ -1631,6 +1637,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) | |||
1631 | } | 1637 | } |
1632 | } | 1638 | } |
1633 | 1639 | ||
1640 | #define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) | ||
1641 | #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) | ||
1642 | #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) | ||
1643 | #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) | ||
1644 | #define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") | ||
1645 | |||
1634 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | 1646 | static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) |
1635 | { | 1647 | { |
1636 | uint32_t kcmp = 0, kmov = 0; | 1648 | uint32_t kcmp = 0, kmov = 0; |
@@ -1704,6 +1716,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) | |||
1704 | asm_intmin_max(as, ir, cc); | 1716 | asm_intmin_max(as, ir, cc); |
1705 | } | 1717 | } |
1706 | 1718 | ||
1719 | #define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) | ||
1720 | #define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) | ||
1721 | |||
1707 | /* -- Comparisons --------------------------------------------------------- */ | 1722 | /* -- Comparisons --------------------------------------------------------- */ |
1708 | 1723 | ||
1709 | /* Map of comparisons to flags. ORDER IR. */ | 1724 | /* Map of comparisons to flags. ORDER IR. */ |
@@ -1777,7 +1792,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir) | |||
1777 | Reg left; | 1792 | Reg left; |
1778 | uint32_t m; | 1793 | uint32_t m; |
1779 | int cmpprev0 = 0; | 1794 | int cmpprev0 = 0; |
1780 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); | 1795 | lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), |
1796 | "bad comparison data type %d", irt_type(ir->t)); | ||
1781 | if (asm_swapops(as, lref, rref)) { | 1797 | if (asm_swapops(as, lref, rref)) { |
1782 | Reg tmp = lref; lref = rref; rref = tmp; | 1798 | Reg tmp = lref; lref = rref; rref = tmp; |
1783 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ | 1799 | if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ |
@@ -1819,6 +1835,18 @@ notst: | |||
1819 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | 1835 | as->flagmcp = as->mcp; /* Allow elimination of the compare. */ |
1820 | } | 1836 | } |
1821 | 1837 | ||
1838 | static void asm_comp(ASMState *as, IRIns *ir) | ||
1839 | { | ||
1840 | #if !LJ_SOFTFP | ||
1841 | if (irt_isnum(ir->t)) | ||
1842 | asm_fpcomp(as, ir); | ||
1843 | else | ||
1844 | #endif | ||
1845 | asm_intcomp(as, ir); | ||
1846 | } | ||
1847 | |||
1848 | #define asm_equal(as, ir) asm_comp(as, ir) | ||
1849 | |||
1822 | #if LJ_HASFFI | 1850 | #if LJ_HASFFI |
1823 | /* 64 bit integer comparisons. */ | 1851 | /* 64 bit integer comparisons. */ |
1824 | static void asm_int64comp(ASMState *as, IRIns *ir) | 1852 | static void asm_int64comp(ASMState *as, IRIns *ir) |
@@ -1857,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir) | |||
1857 | } | 1885 | } |
1858 | #endif | 1886 | #endif |
1859 | 1887 | ||
1860 | /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ | 1888 | /* -- Split register ops -------------------------------------------------- */ |
1861 | 1889 | ||
1862 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1890 | /* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ |
1863 | static void asm_hiop(ASMState *as, IRIns *ir) | 1891 | static void asm_hiop(ASMState *as, IRIns *ir) |
1864 | { | 1892 | { |
1865 | #if LJ_HASFFI || LJ_SOFTFP | ||
1866 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1893 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
1867 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1894 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
1868 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1895 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
1896 | #if LJ_HASFFI || LJ_SOFTFP | ||
1869 | if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ | 1897 | if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ |
1870 | as->curins--; /* Always skip the loword comparison. */ | 1898 | as->curins--; /* Always skip the loword comparison. */ |
1871 | #if LJ_SOFTFP | 1899 | #if LJ_SOFTFP |
@@ -1882,7 +1910,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1882 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | 1910 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { |
1883 | as->curins--; /* Always skip the loword min/max. */ | 1911 | as->curins--; /* Always skip the loword min/max. */ |
1884 | if (uselo || usehi) | 1912 | if (uselo || usehi) |
1885 | asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); | 1913 | asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); |
1886 | return; | 1914 | return; |
1887 | #elif LJ_HASFFI | 1915 | #elif LJ_HASFFI |
1888 | } else if ((ir-1)->o == IR_CONV) { | 1916 | } else if ((ir-1)->o == IR_CONV) { |
@@ -1893,9 +1921,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1893 | #endif | 1921 | #endif |
1894 | } else if ((ir-1)->o == IR_XSTORE) { | 1922 | } else if ((ir-1)->o == IR_XSTORE) { |
1895 | if ((ir-1)->r != RID_SINK) | 1923 | if ((ir-1)->r != RID_SINK) |
1896 | asm_xstore(as, ir, 4); | 1924 | asm_xstore_(as, ir, 4); |
1897 | return; | 1925 | return; |
1898 | } | 1926 | } |
1927 | #endif | ||
1899 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1928 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
1900 | switch ((ir-1)->o) { | 1929 | switch ((ir-1)->o) { |
1901 | #if LJ_HASFFI | 1930 | #if LJ_HASFFI |
@@ -1914,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1914 | asm_intneg(as, ir, ARMI_RSC); | 1943 | asm_intneg(as, ir, ARMI_RSC); |
1915 | asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); | 1944 | asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); |
1916 | break; | 1945 | break; |
1946 | case IR_CNEWI: | ||
1947 | /* Nothing to do here. Handled by lo op itself. */ | ||
1948 | break; | ||
1917 | #endif | 1949 | #endif |
1918 | #if LJ_SOFTFP | 1950 | #if LJ_SOFTFP |
1919 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | 1951 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
@@ -1921,24 +1953,26 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1921 | if (!uselo) | 1953 | if (!uselo) |
1922 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | 1954 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ |
1923 | break; | 1955 | break; |
1956 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: | ||
1957 | /* Nothing to do here. Handled by lo op itself. */ | ||
1958 | break; | ||
1924 | #endif | 1959 | #endif |
1925 | case IR_CALLN: | 1960 | case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: |
1926 | case IR_CALLS: | ||
1927 | case IR_CALLXS: | ||
1928 | if (!uselo) | 1961 | if (!uselo) |
1929 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 1962 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ |
1930 | break; | 1963 | break; |
1931 | #if LJ_SOFTFP | 1964 | default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; |
1932 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: | ||
1933 | #endif | ||
1934 | case IR_CNEWI: | ||
1935 | /* Nothing to do here. Handled by lo op itself. */ | ||
1936 | break; | ||
1937 | default: lua_assert(0); break; | ||
1938 | } | 1965 | } |
1939 | #else | 1966 | } |
1940 | UNUSED(as); UNUSED(ir); lua_assert(0); | 1967 | |
1941 | #endif | 1968 | /* -- Profiling ----------------------------------------------------------- */ |
1969 | |||
1970 | static void asm_prof(ASMState *as, IRIns *ir) | ||
1971 | { | ||
1972 | UNUSED(ir); | ||
1973 | asm_guardcc(as, CC_NE); | ||
1974 | emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); | ||
1975 | emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); | ||
1942 | } | 1976 | } |
1943 | 1977 | ||
1944 | /* -- Stack handling ------------------------------------------------------ */ | 1978 | /* -- Stack handling ------------------------------------------------------ */ |
@@ -1952,7 +1986,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1952 | if (irp) { | 1986 | if (irp) { |
1953 | if (!ra_hasspill(irp->s)) { | 1987 | if (!ra_hasspill(irp->s)) { |
1954 | pbase = irp->r; | 1988 | pbase = irp->r; |
1955 | lua_assert(ra_hasreg(pbase)); | 1989 | lj_assertA(ra_hasreg(pbase), "base reg lost"); |
1956 | } else if (allow) { | 1990 | } else if (allow) { |
1957 | pbase = rset_pickbot(allow); | 1991 | pbase = rset_pickbot(allow); |
1958 | } else { | 1992 | } else { |
@@ -1964,13 +1998,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1964 | } | 1998 | } |
1965 | emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); | 1999 | emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); |
1966 | k = emit_isk12(0, (int32_t)(8*topslot)); | 2000 | k = emit_isk12(0, (int32_t)(8*topslot)); |
1967 | lua_assert(k); | 2001 | lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); |
1968 | emit_n(as, ARMI_CMP^k, RID_TMP); | 2002 | emit_n(as, ARMI_CMP^k, RID_TMP); |
1969 | emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); | 2003 | emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); |
1970 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, | 2004 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, |
1971 | (int32_t)offsetof(lua_State, maxstack)); | 2005 | (int32_t)offsetof(lua_State, maxstack)); |
1972 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | 2006 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ |
1973 | int32_t i = i32ptr(&J2G(as->J)->jit_L); | 2007 | int32_t i = i32ptr(&J2G(as->J)->cur_L); |
1974 | if (ra_hasspill(irp->s)) | 2008 | if (ra_hasspill(irp->s)) |
1975 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); | 2009 | emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); |
1976 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); | 2010 | emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); |
@@ -1978,7 +2012,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1978 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ | 2012 | emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ |
1979 | emit_loadi(as, RID_TMP, (i & ~4095)); | 2013 | emit_loadi(as, RID_TMP, (i & ~4095)); |
1980 | } else { | 2014 | } else { |
1981 | emit_getgl(as, RID_TMP, jit_L); | 2015 | emit_getgl(as, RID_TMP, cur_L); |
1982 | } | 2016 | } |
1983 | } | 2017 | } |
1984 | 2018 | ||
@@ -2001,7 +2035,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2001 | #if LJ_SOFTFP | 2035 | #if LJ_SOFTFP |
2002 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); | 2036 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); |
2003 | Reg tmp; | 2037 | Reg tmp; |
2004 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ | 2038 | /* LJ_SOFTFP: must be a number constant. */ |
2039 | lj_assertA(irref_isk(ref), "unsplit FP op"); | ||
2005 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, | 2040 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, |
2006 | rset_exclude(RSET_GPREVEN, RID_BASE)); | 2041 | rset_exclude(RSET_GPREVEN, RID_BASE)); |
2007 | emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); | 2042 | emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); |
@@ -2015,7 +2050,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2015 | } else { | 2050 | } else { |
2016 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); | 2051 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); |
2017 | Reg type; | 2052 | Reg type; |
2018 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); | 2053 | lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), |
2054 | "restore of IR type %d", irt_type(ir->t)); | ||
2019 | if (!irt_ispri(ir->t)) { | 2055 | if (!irt_ispri(ir->t)) { |
2020 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); | 2056 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); |
2021 | emit_lso(as, ARMI_STR, src, RID_BASE, ofs); | 2057 | emit_lso(as, ARMI_STR, src, RID_BASE, ofs); |
@@ -2028,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2028 | } else if ((sn & SNAP_SOFTFPNUM)) { | 2064 | } else if ((sn & SNAP_SOFTFPNUM)) { |
2029 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); | 2065 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); |
2030 | #endif | 2066 | #endif |
2067 | } else if ((sn & SNAP_KEYINDEX)) { | ||
2068 | type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); | ||
2031 | } else { | 2069 | } else { |
2032 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); | 2070 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); |
2033 | } | 2071 | } |
@@ -2035,7 +2073,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
2035 | } | 2073 | } |
2036 | checkmclim(as); | 2074 | checkmclim(as); |
2037 | } | 2075 | } |
2038 | lua_assert(map + nent == flinks); | 2076 | lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); |
2039 | } | 2077 | } |
2040 | 2078 | ||
2041 | /* -- GC handling --------------------------------------------------------- */ | 2079 | /* -- GC handling --------------------------------------------------------- */ |
@@ -2089,15 +2127,21 @@ static void asm_loop_fixup(ASMState *as) | |||
2089 | } | 2127 | } |
2090 | } | 2128 | } |
2091 | 2129 | ||
2130 | /* Fixup the tail of the loop. */ | ||
2131 | static void asm_loop_tail_fixup(ASMState *as) | ||
2132 | { | ||
2133 | UNUSED(as); /* Nothing to do. */ | ||
2134 | } | ||
2135 | |||
2092 | /* -- Head of trace ------------------------------------------------------- */ | 2136 | /* -- Head of trace ------------------------------------------------------- */ |
2093 | 2137 | ||
2094 | /* Reload L register from g->jit_L. */ | 2138 | /* Reload L register from g->cur_L. */ |
2095 | static void asm_head_lreg(ASMState *as) | 2139 | static void asm_head_lreg(ASMState *as) |
2096 | { | 2140 | { |
2097 | IRIns *ir = IR(ASMREF_L); | 2141 | IRIns *ir = IR(ASMREF_L); |
2098 | if (ra_used(ir)) { | 2142 | if (ra_used(ir)) { |
2099 | Reg r = ra_dest(as, ir, RSET_GPR); | 2143 | Reg r = ra_dest(as, ir, RSET_GPR); |
2100 | emit_getgl(as, r, jit_L); | 2144 | emit_getgl(as, r, cur_L); |
2101 | ra_evictk(as); | 2145 | ra_evictk(as); |
2102 | } | 2146 | } |
2103 | } | 2147 | } |
@@ -2125,7 +2169,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) | |||
2125 | rset_clear(allow, ra_dest(as, ir, allow)); | 2169 | rset_clear(allow, ra_dest(as, ir, allow)); |
2126 | } else { | 2170 | } else { |
2127 | Reg r = irp->r; | 2171 | Reg r = irp->r; |
2128 | lua_assert(ra_hasreg(r)); | 2172 | lj_assertA(ra_hasreg(r), "base reg lost"); |
2129 | rset_clear(allow, r); | 2173 | rset_clear(allow, r); |
2130 | if (r != ir->r && !rset_test(as->freeset, r)) | 2174 | if (r != ir->r && !rset_test(as->freeset, r)) |
2131 | ra_restore(as, regcost_ref(as->cost[r])); | 2175 | ra_restore(as, regcost_ref(as->cost[r])); |
@@ -2147,7 +2191,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
2147 | } else { | 2191 | } else { |
2148 | /* Patch stack adjustment. */ | 2192 | /* Patch stack adjustment. */ |
2149 | uint32_t k = emit_isk12(ARMI_ADD, spadj); | 2193 | uint32_t k = emit_isk12(ARMI_ADD, spadj); |
2150 | lua_assert(k); | 2194 | lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); |
2151 | p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); | 2195 | p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); |
2152 | } | 2196 | } |
2153 | /* Patch exit branch. */ | 2197 | /* Patch exit branch. */ |
@@ -2168,143 +2212,13 @@ static void asm_tail_prep(ASMState *as) | |||
2168 | *p = 0; /* Prevent load/store merging. */ | 2212 | *p = 0; /* Prevent load/store merging. */ |
2169 | } | 2213 | } |
2170 | 2214 | ||
2171 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
2172 | |||
2173 | /* Assemble a single instruction. */ | ||
2174 | static void asm_ir(ASMState *as, IRIns *ir) | ||
2175 | { | ||
2176 | switch ((IROp)ir->o) { | ||
2177 | /* Miscellaneous ops. */ | ||
2178 | case IR_LOOP: asm_loop(as); break; | ||
2179 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
2180 | case IR_USE: | ||
2181 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
2182 | case IR_PHI: asm_phi(as, ir); break; | ||
2183 | case IR_HIOP: asm_hiop(as, ir); break; | ||
2184 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
2185 | |||
2186 | /* Guarded assertions. */ | ||
2187 | case IR_EQ: case IR_NE: | ||
2188 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
2189 | as->curins--; | ||
2190 | asm_href(as, ir-1, (IROp)ir->o); | ||
2191 | break; | ||
2192 | } | ||
2193 | /* fallthrough */ | ||
2194 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
2195 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
2196 | case IR_ABC: | ||
2197 | #if !LJ_SOFTFP | ||
2198 | if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } | ||
2199 | #endif | ||
2200 | asm_intcomp(as, ir); | ||
2201 | break; | ||
2202 | |||
2203 | case IR_RETF: asm_retf(as, ir); break; | ||
2204 | |||
2205 | /* Bit ops. */ | ||
2206 | case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break; | ||
2207 | case IR_BSWAP: asm_bitswap(as, ir); break; | ||
2208 | |||
2209 | case IR_BAND: asm_bitop(as, ir, ARMI_AND); break; | ||
2210 | case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break; | ||
2211 | case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break; | ||
2212 | |||
2213 | case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break; | ||
2214 | case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break; | ||
2215 | case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break; | ||
2216 | case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break; | ||
2217 | case IR_BROL: lua_assert(0); break; | ||
2218 | |||
2219 | /* Arithmetic ops. */ | ||
2220 | case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; | ||
2221 | case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; | ||
2222 | case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; | ||
2223 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; | ||
2224 | case IR_NEG: asm_neg(as, ir); break; | ||
2225 | |||
2226 | #if LJ_SOFTFP | ||
2227 | case IR_DIV: case IR_POW: case IR_ABS: | ||
2228 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
2229 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | ||
2230 | break; | ||
2231 | #else | ||
2232 | case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; | ||
2233 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
2234 | case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; | ||
2235 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
2236 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
2237 | case IR_FPMATH: | ||
2238 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
2239 | break; | ||
2240 | if (ir->op2 <= IRFPM_TRUNC) | ||
2241 | asm_callround(as, ir, ir->op2); | ||
2242 | else if (ir->op2 == IRFPM_SQRT) | ||
2243 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
2244 | else | ||
2245 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
2246 | break; | ||
2247 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
2248 | #endif | ||
2249 | |||
2250 | case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; | ||
2251 | case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; | ||
2252 | |||
2253 | /* Memory references. */ | ||
2254 | case IR_AREF: asm_aref(as, ir); break; | ||
2255 | case IR_HREF: asm_href(as, ir, 0); break; | ||
2256 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
2257 | case IR_NEWREF: asm_newref(as, ir); break; | ||
2258 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
2259 | case IR_FREF: asm_fref(as, ir); break; | ||
2260 | case IR_STRREF: asm_strref(as, ir); break; | ||
2261 | |||
2262 | /* Loads and stores. */ | ||
2263 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
2264 | asm_ahuvload(as, ir); | ||
2265 | break; | ||
2266 | case IR_FLOAD: asm_fload(as, ir); break; | ||
2267 | case IR_XLOAD: asm_xload(as, ir); break; | ||
2268 | case IR_SLOAD: asm_sload(as, ir); break; | ||
2269 | |||
2270 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
2271 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
2272 | case IR_XSTORE: asm_xstore(as, ir, 0); break; | ||
2273 | |||
2274 | /* Allocations. */ | ||
2275 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
2276 | case IR_TNEW: asm_tnew(as, ir); break; | ||
2277 | case IR_TDUP: asm_tdup(as, ir); break; | ||
2278 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
2279 | |||
2280 | /* Write barriers. */ | ||
2281 | case IR_TBAR: asm_tbar(as, ir); break; | ||
2282 | case IR_OBAR: asm_obar(as, ir); break; | ||
2283 | |||
2284 | /* Type conversions. */ | ||
2285 | case IR_CONV: asm_conv(as, ir); break; | ||
2286 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
2287 | case IR_STRTO: asm_strto(as, ir); break; | ||
2288 | |||
2289 | /* Calls. */ | ||
2290 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
2291 | case IR_CALLXS: asm_callx(as, ir); break; | ||
2292 | case IR_CARG: break; | ||
2293 | |||
2294 | default: | ||
2295 | setintV(&as->J->errinfo, ir->o); | ||
2296 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
2297 | break; | ||
2298 | } | ||
2299 | } | ||
2300 | |||
2301 | /* -- Trace setup --------------------------------------------------------- */ | 2215 | /* -- Trace setup --------------------------------------------------------- */ |
2302 | 2216 | ||
2303 | /* Ensure there are enough stack slots for call arguments. */ | 2217 | /* Ensure there are enough stack slots for call arguments. */ |
2304 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | 2218 | static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) |
2305 | { | 2219 | { |
2306 | IRRef args[CCI_NARGS_MAX*2]; | 2220 | IRRef args[CCI_NARGS_MAX*2]; |
2307 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2221 | uint32_t i, nargs = CCI_XNARGS(ci); |
2308 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; | 2222 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; |
2309 | asm_collectargs(as, ir, ci, args); | 2223 | asm_collectargs(as, ir, ci, args); |
2310 | for (i = 0; i < nargs; i++) { | 2224 | for (i = 0; i < nargs; i++) { |
@@ -2360,7 +2274,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2360 | if (!cstart) cstart = p; | 2274 | if (!cstart) cstart = p; |
2361 | } | 2275 | } |
2362 | } | 2276 | } |
2363 | lua_assert(cstart != NULL); | 2277 | lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); |
2364 | lj_mcode_sync(cstart, cend); | 2278 | lj_mcode_sync(cstart, cend); |
2365 | lj_mcode_patch(J, mcarea, 1); | 2279 | lj_mcode_patch(J, mcarea, 1); |
2366 | } | 2280 | } |