diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 733 |
1 files changed, 613 insertions, 120 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9b17421e..68d28fb0 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -90,7 +90,7 @@ typedef struct ASMState { | |||
90 | MCode *realign; /* Realign loop if not NULL. */ | 90 | MCode *realign; /* Realign loop if not NULL. */ |
91 | 91 | ||
92 | #ifdef RID_NUM_KREF | 92 | #ifdef RID_NUM_KREF |
93 | int32_t krefk[RID_NUM_KREF]; | 93 | intptr_t krefk[RID_NUM_KREF]; |
94 | #endif | 94 | #endif |
95 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 95 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
96 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ | 96 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ |
@@ -143,7 +143,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
143 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | 143 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) |
144 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | 144 | #define ra_krefk(as, ref) (as->krefk[(ref)]) |
145 | 145 | ||
146 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | 146 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) |
147 | { | 147 | { |
148 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | 148 | IRRef ref = (IRRef)(r - RID_MIN_KREF); |
149 | as->krefk[ref] = k; | 149 | as->krefk[ref] = k; |
@@ -170,6 +170,8 @@ IRFLDEF(FLOFS) | |||
170 | #include "lj_emit_x86.h" | 170 | #include "lj_emit_x86.h" |
171 | #elif LJ_TARGET_ARM | 171 | #elif LJ_TARGET_ARM |
172 | #include "lj_emit_arm.h" | 172 | #include "lj_emit_arm.h" |
173 | #elif LJ_TARGET_ARM64 | ||
174 | #include "lj_emit_arm64.h" | ||
173 | #elif LJ_TARGET_PPC | 175 | #elif LJ_TARGET_PPC |
174 | #include "lj_emit_ppc.h" | 176 | #include "lj_emit_ppc.h" |
175 | #elif LJ_TARGET_MIPS | 177 | #elif LJ_TARGET_MIPS |
@@ -178,6 +180,12 @@ IRFLDEF(FLOFS) | |||
178 | #error "Missing instruction emitter for target CPU" | 180 | #error "Missing instruction emitter for target CPU" |
179 | #endif | 181 | #endif |
180 | 182 | ||
183 | /* Generic load/store of register from/to stack slot. */ | ||
184 | #define emit_spload(as, ir, r, ofs) \ | ||
185 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
186 | #define emit_spstore(as, ir, r, ofs) \ | ||
187 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
188 | |||
181 | /* -- Register allocator debugging ---------------------------------------- */ | 189 | /* -- Register allocator debugging ---------------------------------------- */ |
182 | 190 | ||
183 | /* #define LUAJIT_DEBUG_RA */ | 191 | /* #define LUAJIT_DEBUG_RA */ |
@@ -315,7 +323,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
315 | lua_assert(!rset_test(as->freeset, r)); | 323 | lua_assert(!rset_test(as->freeset, r)); |
316 | ra_free(as, r); | 324 | ra_free(as, r); |
317 | ra_modified(as, r); | 325 | ra_modified(as, r); |
326 | #if LJ_64 | ||
327 | emit_loadu64(as, r, ra_krefk(as, ref)); | ||
328 | #else | ||
318 | emit_loadi(as, r, ra_krefk(as, ref)); | 329 | emit_loadi(as, r, ra_krefk(as, ref)); |
330 | #endif | ||
319 | return r; | 331 | return r; |
320 | } | 332 | } |
321 | ir = IR(ref); | 333 | ir = IR(ref); |
@@ -325,9 +337,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
325 | ra_modified(as, r); | 337 | ra_modified(as, r); |
326 | ir->r = RID_INIT; /* Do not keep any hint. */ | 338 | ir->r = RID_INIT; /* Do not keep any hint. */ |
327 | RA_DBGX((as, "remat $i $r", ir, r)); | 339 | RA_DBGX((as, "remat $i $r", ir, r)); |
328 | #if !LJ_SOFTFP | 340 | #if !LJ_SOFTFP32 |
329 | if (ir->o == IR_KNUM) { | 341 | if (ir->o == IR_KNUM) { |
330 | emit_loadn(as, r, ir_knum(ir)); | 342 | emit_loadk64(as, r, ir); |
331 | } else | 343 | } else |
332 | #endif | 344 | #endif |
333 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { | 345 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
@@ -335,10 +347,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
335 | emit_getgl(as, r, jit_base); | 347 | emit_getgl(as, r, jit_base); |
336 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 348 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
337 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 349 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ |
338 | emit_getgl(as, r, jit_L); | 350 | emit_getgl(as, r, cur_L); |
339 | #if LJ_64 | 351 | #if LJ_64 |
340 | } else if (ir->o == IR_KINT64) { | 352 | } else if (ir->o == IR_KINT64) { |
341 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 353 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
354 | #if LJ_GC64 | ||
355 | } else if (ir->o == IR_KGC) { | ||
356 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
357 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
358 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
359 | #endif | ||
342 | #endif | 360 | #endif |
343 | } else { | 361 | } else { |
344 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 362 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
@@ -511,7 +529,7 @@ static void ra_evictk(ASMState *as) | |||
511 | 529 | ||
512 | #ifdef RID_NUM_KREF | 530 | #ifdef RID_NUM_KREF |
513 | /* Allocate a register for a constant. */ | 531 | /* Allocate a register for a constant. */ |
514 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | 532 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) |
515 | { | 533 | { |
516 | /* First try to find a register which already holds the same constant. */ | 534 | /* First try to find a register which already holds the same constant. */ |
517 | RegSet pick, work = ~as->freeset & RSET_GPR; | 535 | RegSet pick, work = ~as->freeset & RSET_GPR; |
@@ -520,9 +538,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
520 | IRRef ref; | 538 | IRRef ref; |
521 | r = rset_pickbot(work); | 539 | r = rset_pickbot(work); |
522 | ref = regcost_ref(as->cost[r]); | 540 | ref = regcost_ref(as->cost[r]); |
541 | #if LJ_64 | ||
542 | if (ref < ASMREF_L) { | ||
543 | if (ra_iskref(ref)) { | ||
544 | if (k == ra_krefk(as, ref)) | ||
545 | return r; | ||
546 | } else { | ||
547 | IRIns *ir = IR(ref); | ||
548 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | ||
549 | #if LJ_GC64 | ||
550 | (ir->o == IR_KINT && k == ir->i) || | ||
551 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | ||
552 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | ||
553 | k == (intptr_t)ir_kptr(ir)) | ||
554 | #else | ||
555 | (ir->o != IR_KINT64 && k == ir->i) | ||
556 | #endif | ||
557 | ) | ||
558 | return r; | ||
559 | } | ||
560 | } | ||
561 | #else | ||
523 | if (ref < ASMREF_L && | 562 | if (ref < ASMREF_L && |
524 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | 563 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) |
525 | return r; | 564 | return r; |
565 | #endif | ||
526 | rset_clear(work, r); | 566 | rset_clear(work, r); |
527 | } | 567 | } |
528 | pick = as->freeset & allow; | 568 | pick = as->freeset & allow; |
@@ -542,7 +582,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
542 | } | 582 | } |
543 | 583 | ||
544 | /* Allocate a specific register for a constant. */ | 584 | /* Allocate a specific register for a constant. */ |
545 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | 585 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r) |
546 | { | 586 | { |
547 | Reg kr = ra_allock(as, k, RID2RSET(r)); | 587 | Reg kr = ra_allock(as, k, RID2RSET(r)); |
548 | if (kr != r) { | 588 | if (kr != r) { |
@@ -612,10 +652,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
612 | return r; | 652 | return r; |
613 | } | 653 | } |
614 | 654 | ||
655 | /* Add a register rename to the IR. */ | ||
656 | static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) | ||
657 | { | ||
658 | IRRef ren; | ||
659 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); | ||
660 | ren = tref_ref(lj_ir_emit(as->J)); | ||
661 | as->J->cur.ir[ren].r = (uint8_t)down; | ||
662 | as->J->cur.ir[ren].s = SPS_NONE; | ||
663 | } | ||
664 | |||
615 | /* Rename register allocation and emit move. */ | 665 | /* Rename register allocation and emit move. */ |
616 | static void ra_rename(ASMState *as, Reg down, Reg up) | 666 | static void ra_rename(ASMState *as, Reg down, Reg up) |
617 | { | 667 | { |
618 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 668 | IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); |
619 | IRIns *ir = IR(ref); | 669 | IRIns *ir = IR(ref); |
620 | ir->r = (uint8_t)up; | 670 | ir->r = (uint8_t)up; |
621 | as->cost[down] = 0; | 671 | as->cost[down] = 0; |
@@ -628,11 +678,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
628 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 678 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
629 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 679 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
630 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 680 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
631 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 681 | ra_addrename(as, down, ref, as->snapno); |
632 | ren = tref_ref(lj_ir_emit(as->J)); | ||
633 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
634 | IR(ren)->r = (uint8_t)down; | ||
635 | IR(ren)->s = SPS_NONE; | ||
636 | } | 682 | } |
637 | } | 683 | } |
638 | 684 | ||
@@ -682,18 +728,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
682 | if (ra_noreg(left)) { | 728 | if (ra_noreg(left)) { |
683 | if (irref_isk(lref)) { | 729 | if (irref_isk(lref)) { |
684 | if (ir->o == IR_KNUM) { | 730 | if (ir->o == IR_KNUM) { |
685 | cTValue *tv = ir_knum(ir); | ||
686 | /* FP remat needs a load except for +0. Still better than eviction. */ | 731 | /* FP remat needs a load except for +0. Still better than eviction. */ |
687 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | 732 | if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { |
688 | emit_loadn(as, dest, tv); | 733 | emit_loadk64(as, dest, ir); |
689 | return; | 734 | return; |
690 | } | 735 | } |
691 | #if LJ_64 | 736 | #if LJ_64 |
692 | } else if (ir->o == IR_KINT64) { | 737 | } else if (ir->o == IR_KINT64) { |
693 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 738 | emit_loadk64(as, dest, ir); |
739 | return; | ||
740 | #if LJ_GC64 | ||
741 | } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
742 | emit_loadk64(as, dest, ir); | ||
694 | return; | 743 | return; |
695 | #endif | 744 | #endif |
696 | } else { | 745 | #endif |
746 | } else if (ir->o != IR_KPRI) { | ||
697 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 747 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
698 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 748 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); |
699 | emit_loadi(as, dest, ir->i); | 749 | emit_loadi(as, dest, ir->i); |
@@ -934,7 +984,7 @@ static void asm_snap_prep(ASMState *as) | |||
934 | } else { | 984 | } else { |
935 | /* Process any renames above the highwater mark. */ | 985 | /* Process any renames above the highwater mark. */ |
936 | for (; as->snaprename < as->T->nins; as->snaprename++) { | 986 | for (; as->snaprename < as->T->nins; as->snaprename++) { |
937 | IRIns *ir = IR(as->snaprename); | 987 | IRIns *ir = &as->T->ir[as->snaprename]; |
938 | if (asm_snap_checkrename(as, ir->op1)) | 988 | if (asm_snap_checkrename(as, ir->op1)) |
939 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | 989 | ir->op2 = REF_BIAS-1; /* Kill rename. */ |
940 | } | 990 | } |
@@ -943,44 +993,6 @@ static void asm_snap_prep(ASMState *as) | |||
943 | 993 | ||
944 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 994 | /* -- Miscellaneous helpers ----------------------------------------------- */ |
945 | 995 | ||
946 | /* Collect arguments from CALL* and CARG instructions. */ | ||
947 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
948 | const CCallInfo *ci, IRRef *args) | ||
949 | { | ||
950 | uint32_t n = CCI_NARGS(ci); | ||
951 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
952 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
953 | while (n-- > 1) { | ||
954 | ir = IR(ir->op1); | ||
955 | lua_assert(ir->o == IR_CARG); | ||
956 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
957 | } | ||
958 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
959 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
960 | } | ||
961 | |||
962 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
963 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
964 | { | ||
965 | uint32_t nargs = 0; | ||
966 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
967 | IRIns *ira = IR(ir->op1); | ||
968 | nargs++; | ||
969 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
970 | } | ||
971 | #if LJ_HASFFI | ||
972 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
973 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
974 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
975 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
976 | #if LJ_TARGET_X86 | ||
977 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
978 | #endif | ||
979 | } | ||
980 | #endif | ||
981 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
982 | } | ||
983 | |||
984 | /* Calculate stack adjustment. */ | 996 | /* Calculate stack adjustment. */ |
985 | static int32_t asm_stack_adjust(ASMState *as) | 997 | static int32_t asm_stack_adjust(ASMState *as) |
986 | { | 998 | { |
@@ -1004,7 +1016,11 @@ static uint32_t ir_khash(IRIns *ir) | |||
1004 | } else { | 1016 | } else { |
1005 | lua_assert(irt_isgcv(ir->t)); | 1017 | lua_assert(irt_isgcv(ir->t)); |
1006 | lo = u32ptr(ir_kgc(ir)); | 1018 | lo = u32ptr(ir_kgc(ir)); |
1019 | #if LJ_GC64 | ||
1020 | hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); | ||
1021 | #else | ||
1007 | hi = lo + HASH_BIAS; | 1022 | hi = lo + HASH_BIAS; |
1023 | #endif | ||
1008 | } | 1024 | } |
1009 | return hashrot(lo, hi); | 1025 | return hashrot(lo, hi); |
1010 | } | 1026 | } |
@@ -1065,6 +1081,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1065 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1081 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1066 | } | 1082 | } |
1067 | 1083 | ||
1084 | /* -- Buffer operations --------------------------------------------------- */ | ||
1085 | |||
1086 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | ||
1087 | |||
1088 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1089 | { | ||
1090 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1091 | if ((ir->op2 & IRBUFHDR_APPEND)) { | ||
1092 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1093 | IRIns *irp = IR(ir->op1); | ||
1094 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1095 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1096 | while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) | ||
1097 | irp = IR(irp->op1); | ||
1098 | if (irref_isk(irp->op1)) { | ||
1099 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1100 | ir = irp; | ||
1101 | } | ||
1102 | } | ||
1103 | } else { | ||
1104 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1105 | /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ | ||
1106 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | ||
1107 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | ||
1108 | } | ||
1109 | #if LJ_TARGET_X86ORX64 | ||
1110 | ra_left(as, sb, ir->op1); | ||
1111 | #else | ||
1112 | ra_leftov(as, sb, ir->op1); | ||
1113 | #endif | ||
1114 | } | ||
1115 | |||
1116 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1117 | { | ||
1118 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1119 | IRRef args[3]; | ||
1120 | IRIns *irs; | ||
1121 | int kchar = -129; | ||
1122 | args[0] = ir->op1; /* SBuf * */ | ||
1123 | args[1] = ir->op2; /* GCstr * */ | ||
1124 | irs = IR(ir->op2); | ||
1125 | lua_assert(irt_isstr(irs->t)); | ||
1126 | if (irs->o == IR_KGC) { | ||
1127 | GCstr *s = ir_kstr(irs); | ||
1128 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1129 | kchar = (int8_t)strdata(s)[0]; /* Signed! */ | ||
1130 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1131 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1132 | } | ||
1133 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1134 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1135 | if (irs->op2 == IRTOSTR_NUM) { | ||
1136 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1137 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1138 | } else { | ||
1139 | lua_assert(irt_isinteger(IR(irs->op1)->t)); | ||
1140 | args[1] = irs->op1; /* int */ | ||
1141 | if (irs->op2 == IRTOSTR_INT) | ||
1142 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1143 | else | ||
1144 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1145 | } | ||
1146 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1147 | args[1] = irs->op1; /* const void * */ | ||
1148 | args[2] = irs->op2; /* MSize */ | ||
1149 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1150 | } | ||
1151 | } | ||
1152 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1153 | asm_gencall(as, ci, args); | ||
1154 | if (args[1] == ASMREF_TMP1) { | ||
1155 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1156 | if (kchar == -129) | ||
1157 | asm_tvptr(as, tmp, irs->op1); | ||
1158 | else | ||
1159 | ra_allockreg(as, kchar, tmp); | ||
1160 | } | ||
1161 | } | ||
1162 | |||
1163 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1164 | { | ||
1165 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1166 | IRRef args[1]; | ||
1167 | args[0] = ir->op1; /* SBuf *sb */ | ||
1168 | as->gcsteps++; | ||
1169 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1170 | asm_gencall(as, ci, args); | ||
1171 | } | ||
1172 | |||
1173 | /* -- Type conversions ---------------------------------------------------- */ | ||
1174 | |||
1175 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1176 | { | ||
1177 | const CCallInfo *ci; | ||
1178 | IRRef args[2]; | ||
1179 | args[0] = ASMREF_L; | ||
1180 | as->gcsteps++; | ||
1181 | if (ir->op2 == IRTOSTR_NUM) { | ||
1182 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1183 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1184 | } else { | ||
1185 | args[1] = ir->op1; /* int32_t k */ | ||
1186 | if (ir->op2 == IRTOSTR_INT) | ||
1187 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1188 | else | ||
1189 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1190 | } | ||
1191 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1192 | asm_gencall(as, ci, args); | ||
1193 | if (ir->op2 == IRTOSTR_NUM) | ||
1194 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
1195 | } | ||
1196 | |||
1197 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1198 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1199 | { | ||
1200 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1201 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1202 | IRCallID id; | ||
1203 | IRRef args[2]; | ||
1204 | lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); | ||
1205 | args[LJ_BE] = (ir-1)->op1; | ||
1206 | args[LJ_LE] = ir->op1; | ||
1207 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1208 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1209 | ir--; | ||
1210 | } else { | ||
1211 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1212 | } | ||
1213 | { | ||
1214 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1215 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1216 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1217 | #else | ||
1218 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1219 | #endif | ||
1220 | asm_setupresult(as, ir, ci); | ||
1221 | asm_gencall(as, ci, args); | ||
1222 | } | ||
1223 | } | ||
1224 | #endif | ||
1225 | |||
1226 | /* -- Memory references --------------------------------------------------- */ | ||
1227 | |||
1228 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1229 | { | ||
1230 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1231 | IRRef args[3]; | ||
1232 | if (ir->r == RID_SINK) | ||
1233 | return; | ||
1234 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1235 | args[1] = ir->op1; /* GCtab *t */ | ||
1236 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1237 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1238 | asm_gencall(as, ci, args); | ||
1239 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
1240 | } | ||
1241 | |||
1242 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1243 | { | ||
1244 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1245 | #if LJ_TARGET_X86ORX64 | ||
1246 | ra_left(as, r, ASMREF_L); | ||
1247 | #else | ||
1248 | ra_leftov(as, r, ASMREF_L); | ||
1249 | #endif | ||
1250 | } | ||
1251 | |||
1252 | /* -- Calls --------------------------------------------------------------- */ | ||
1253 | |||
1254 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1255 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1256 | const CCallInfo *ci, IRRef *args) | ||
1257 | { | ||
1258 | uint32_t n = CCI_XNARGS(ci); | ||
1259 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
1260 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1261 | while (n-- > 1) { | ||
1262 | ir = IR(ir->op1); | ||
1263 | lua_assert(ir->o == IR_CARG); | ||
1264 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1265 | } | ||
1266 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1267 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
1268 | } | ||
1269 | |||
1270 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1271 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1272 | { | ||
1273 | uint32_t nargs = 0; | ||
1274 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1275 | IRIns *ira = IR(ir->op1); | ||
1276 | nargs++; | ||
1277 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1278 | } | ||
1279 | #if LJ_HASFFI | ||
1280 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1281 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1282 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1283 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1284 | #if LJ_TARGET_X86 | ||
1285 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1286 | #endif | ||
1287 | } | ||
1288 | #endif | ||
1289 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1290 | } | ||
1291 | |||
1292 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1293 | { | ||
1294 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1295 | IRRef args[2]; | ||
1296 | args[0] = ir->op1; | ||
1297 | args[1] = ir->op2; | ||
1298 | asm_setupresult(as, ir, ci); | ||
1299 | asm_gencall(as, ci, args); | ||
1300 | } | ||
1301 | |||
1302 | static void asm_call(ASMState *as, IRIns *ir) | ||
1303 | { | ||
1304 | IRRef args[CCI_NARGS_MAX]; | ||
1305 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1306 | asm_collectargs(as, ir, ci, args); | ||
1307 | asm_setupresult(as, ir, ci); | ||
1308 | asm_gencall(as, ci, args); | ||
1309 | } | ||
1310 | |||
1311 | #if !LJ_SOFTFP32 | ||
1312 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | ||
1313 | { | ||
1314 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
1315 | IRRef args[2]; | ||
1316 | args[0] = lref; | ||
1317 | args[1] = rref; | ||
1318 | asm_setupresult(as, ir, ci); | ||
1319 | asm_gencall(as, ci, args); | ||
1320 | } | ||
1321 | |||
1322 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | ||
1323 | { | ||
1324 | IRIns *irp = IR(ir->op1); | ||
1325 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1326 | IRIns *irpp = IR(irp->op1); | ||
1327 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1328 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1329 | asm_fppow(as, ir, irpp->op1, irp->op2); | ||
1330 | return 1; | ||
1331 | } | ||
1332 | } | ||
1333 | return 0; | ||
1334 | } | ||
1335 | #endif | ||
1336 | |||
1068 | /* -- PHI and loop handling ----------------------------------------------- */ | 1337 | /* -- PHI and loop handling ----------------------------------------------- */ |
1069 | 1338 | ||
1070 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1339 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1250,12 +1519,7 @@ static void asm_phi_fixup(ASMState *as) | |||
1250 | irt_clearmark(ir->t); | 1519 | irt_clearmark(ir->t); |
1251 | /* Left PHI gained a spill slot before the loop? */ | 1520 | /* Left PHI gained a spill slot before the loop? */ |
1252 | if (ra_hasspill(ir->s)) { | 1521 | if (ra_hasspill(ir->s)) { |
1253 | IRRef ren; | 1522 | ra_addrename(as, r, lref, as->loopsnapno); |
1254 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
1255 | ren = tref_ref(lj_ir_emit(as->J)); | ||
1256 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
1257 | IR(ren)->r = (uint8_t)r; | ||
1258 | IR(ren)->s = SPS_NONE; | ||
1259 | } | 1523 | } |
1260 | } | 1524 | } |
1261 | rset_clear(work, r); | 1525 | rset_clear(work, r); |
@@ -1330,6 +1594,8 @@ static void asm_loop(ASMState *as) | |||
1330 | #include "lj_asm_x86.h" | 1594 | #include "lj_asm_x86.h" |
1331 | #elif LJ_TARGET_ARM | 1595 | #elif LJ_TARGET_ARM |
1332 | #include "lj_asm_arm.h" | 1596 | #include "lj_asm_arm.h" |
1597 | #elif LJ_TARGET_ARM64 | ||
1598 | #include "lj_asm_arm64.h" | ||
1333 | #elif LJ_TARGET_PPC | 1599 | #elif LJ_TARGET_PPC |
1334 | #include "lj_asm_ppc.h" | 1600 | #include "lj_asm_ppc.h" |
1335 | #elif LJ_TARGET_MIPS | 1601 | #elif LJ_TARGET_MIPS |
@@ -1338,6 +1604,136 @@ static void asm_loop(ASMState *as) | |||
1338 | #error "Missing assembler for target CPU" | 1604 | #error "Missing assembler for target CPU" |
1339 | #endif | 1605 | #endif |
1340 | 1606 | ||
1607 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1608 | |||
1609 | /* Assemble a single instruction. */ | ||
1610 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1611 | { | ||
1612 | switch ((IROp)ir->o) { | ||
1613 | /* Miscellaneous ops. */ | ||
1614 | case IR_LOOP: asm_loop(as); break; | ||
1615 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1616 | case IR_USE: | ||
1617 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1618 | case IR_PHI: asm_phi(as, ir); break; | ||
1619 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1620 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1621 | case IR_PROF: asm_prof(as, ir); break; | ||
1622 | |||
1623 | /* Guarded assertions. */ | ||
1624 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1625 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1626 | case IR_ABC: | ||
1627 | asm_comp(as, ir); | ||
1628 | break; | ||
1629 | case IR_EQ: case IR_NE: | ||
1630 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1631 | as->curins--; | ||
1632 | asm_href(as, ir-1, (IROp)ir->o); | ||
1633 | } else { | ||
1634 | asm_equal(as, ir); | ||
1635 | } | ||
1636 | break; | ||
1637 | |||
1638 | case IR_RETF: asm_retf(as, ir); break; | ||
1639 | |||
1640 | /* Bit ops. */ | ||
1641 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1642 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1643 | case IR_BAND: asm_band(as, ir); break; | ||
1644 | case IR_BOR: asm_bor(as, ir); break; | ||
1645 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1646 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1647 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1648 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1649 | case IR_BROL: asm_brol(as, ir); break; | ||
1650 | case IR_BROR: asm_bror(as, ir); break; | ||
1651 | |||
1652 | /* Arithmetic ops. */ | ||
1653 | case IR_ADD: asm_add(as, ir); break; | ||
1654 | case IR_SUB: asm_sub(as, ir); break; | ||
1655 | case IR_MUL: asm_mul(as, ir); break; | ||
1656 | case IR_MOD: asm_mod(as, ir); break; | ||
1657 | case IR_NEG: asm_neg(as, ir); break; | ||
1658 | #if LJ_SOFTFP32 | ||
1659 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1660 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1661 | lua_assert(0); /* Unused for LJ_SOFTFP32. */ | ||
1662 | break; | ||
1663 | #else | ||
1664 | case IR_DIV: asm_div(as, ir); break; | ||
1665 | case IR_POW: asm_pow(as, ir); break; | ||
1666 | case IR_ABS: asm_abs(as, ir); break; | ||
1667 | case IR_ATAN2: asm_atan2(as, ir); break; | ||
1668 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1669 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1670 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1671 | #endif | ||
1672 | case IR_MIN: asm_min(as, ir); break; | ||
1673 | case IR_MAX: asm_max(as, ir); break; | ||
1674 | |||
1675 | /* Overflow-checking arithmetic ops. */ | ||
1676 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1677 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1678 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1679 | |||
1680 | /* Memory references. */ | ||
1681 | case IR_AREF: asm_aref(as, ir); break; | ||
1682 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1683 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1684 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1685 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1686 | case IR_FREF: asm_fref(as, ir); break; | ||
1687 | case IR_STRREF: asm_strref(as, ir); break; | ||
1688 | case IR_LREF: asm_lref(as, ir); break; | ||
1689 | |||
1690 | /* Loads and stores. */ | ||
1691 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1692 | asm_ahuvload(as, ir); | ||
1693 | break; | ||
1694 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1695 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1696 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1697 | |||
1698 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1699 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1700 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1701 | |||
1702 | /* Allocations. */ | ||
1703 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1704 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1705 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1706 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
1707 | |||
1708 | /* Buffer operations. */ | ||
1709 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1710 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1711 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1712 | |||
1713 | /* Write barriers. */ | ||
1714 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1715 | case IR_OBAR: asm_obar(as, ir); break; | ||
1716 | |||
1717 | /* Type conversions. */ | ||
1718 | case IR_CONV: asm_conv(as, ir); break; | ||
1719 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1720 | case IR_STRTO: asm_strto(as, ir); break; | ||
1721 | |||
1722 | /* Calls. */ | ||
1723 | case IR_CALLA: | ||
1724 | as->gcsteps++; | ||
1725 | /* fallthrough */ | ||
1726 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1727 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1728 | case IR_CARG: break; | ||
1729 | |||
1730 | default: | ||
1731 | setintV(&as->J->errinfo, ir->o); | ||
1732 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1733 | break; | ||
1734 | } | ||
1735 | } | ||
1736 | |||
1341 | /* -- Head of trace ------------------------------------------------------- */ | 1737 | /* -- Head of trace ------------------------------------------------------- */ |
1342 | 1738 | ||
1343 | /* Head of a root trace. */ | 1739 | /* Head of a root trace. */ |
@@ -1536,7 +1932,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1536 | SnapEntry sn = map[n-1]; | 1932 | SnapEntry sn = map[n-1]; |
1537 | if ((sn & SNAP_FRAME)) { | 1933 | if ((sn & SNAP_FRAME)) { |
1538 | *gotframe = 1; | 1934 | *gotframe = 1; |
1539 | return snap_slot(sn); | 1935 | return snap_slot(sn) - LJ_FR2; |
1540 | } | 1936 | } |
1541 | } | 1937 | } |
1542 | return 0; | 1938 | return 0; |
@@ -1556,19 +1952,23 @@ static void asm_tail_link(ASMState *as) | |||
1556 | 1952 | ||
1557 | if (as->T->link == 0) { | 1953 | if (as->T->link == 0) { |
1558 | /* Setup fixed registers for exit to interpreter. */ | 1954 | /* Setup fixed registers for exit to interpreter. */ |
1559 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 1955 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1560 | int32_t mres; | 1956 | int32_t mres; |
1561 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 1957 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1562 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 1958 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
1563 | if (bc_isret(bc_op(*retpc))) | 1959 | if (bc_isret(bc_op(*retpc))) |
1564 | pc = retpc; | 1960 | pc = retpc; |
1565 | } | 1961 | } |
1962 | #if LJ_GC64 | ||
1963 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
1964 | #else | ||
1566 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 1965 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1567 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 1966 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1568 | mres = (int32_t)(snap->nslots - baseslot); | 1967 | #endif |
1968 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | ||
1569 | switch (bc_op(*pc)) { | 1969 | switch (bc_op(*pc)) { |
1570 | case BC_CALLM: case BC_CALLMT: | 1970 | case BC_CALLM: case BC_CALLMT: |
1571 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 1971 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1572 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 1972 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1573 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 1973 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1574 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 1974 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1580,6 +1980,11 @@ static void asm_tail_link(ASMState *as) | |||
1580 | } | 1980 | } |
1581 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | 1981 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); |
1582 | 1982 | ||
1983 | if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ | ||
1984 | setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); | ||
1985 | IR(as->J->ktrace)->o = IR_KGC; | ||
1986 | } | ||
1987 | |||
1583 | /* Sync the interpreter state with the on-trace state. */ | 1988 | /* Sync the interpreter state with the on-trace state. */ |
1584 | asm_stack_restore(as, snap); | 1989 | asm_stack_restore(as, snap); |
1585 | 1990 | ||
@@ -1605,17 +2010,23 @@ static void asm_setup_regsp(ASMState *as) | |||
1605 | ra_setup(as); | 2010 | ra_setup(as); |
1606 | 2011 | ||
1607 | /* Clear reg/sp for constants. */ | 2012 | /* Clear reg/sp for constants. */ |
1608 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) | 2013 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { |
1609 | ir->prev = REGSP_INIT; | 2014 | ir->prev = REGSP_INIT; |
2015 | if (irt_is64(ir->t) && ir->o != IR_KNULL) { | ||
2016 | #if LJ_GC64 | ||
2017 | /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ | ||
2018 | ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ | ||
2019 | #else | ||
2020 | /* Make life easier for backends by putting address of constant in i. */ | ||
2021 | ir->i = (int32_t)(intptr_t)(ir+1); | ||
2022 | #endif | ||
2023 | ir++; | ||
2024 | } | ||
2025 | } | ||
1610 | 2026 | ||
1611 | /* REF_BASE is used for implicit references to the BASE register. */ | 2027 | /* REF_BASE is used for implicit references to the BASE register. */ |
1612 | lastir->prev = REGSP_HINT(RID_BASE); | 2028 | lastir->prev = REGSP_HINT(RID_BASE); |
1613 | 2029 | ||
1614 | ir = IR(nins-1); | ||
1615 | if (ir->o == IR_RENAME) { | ||
1616 | do { ir--; nins--; } while (ir->o == IR_RENAME); | ||
1617 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
1618 | } | ||
1619 | as->snaprename = nins; | 2030 | as->snaprename = nins; |
1620 | as->snapref = nins; | 2031 | as->snapref = nins; |
1621 | as->snapno = T->nsnap; | 2032 | as->snapno = T->nsnap; |
@@ -1676,7 +2087,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1676 | as->modset |= RSET_SCRATCH; | 2087 | as->modset |= RSET_SCRATCH; |
1677 | continue; | 2088 | continue; |
1678 | } | 2089 | } |
1679 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2090 | case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { |
1680 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2091 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1681 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2092 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1682 | if (inloop) | 2093 | if (inloop) |
@@ -1701,8 +2112,8 @@ static void asm_setup_regsp(ASMState *as) | |||
1701 | ir->prev = REGSP_HINT(RID_FPRET); | 2112 | ir->prev = REGSP_HINT(RID_FPRET); |
1702 | continue; | 2113 | continue; |
1703 | } | 2114 | } |
1704 | /* fallthrough */ | ||
1705 | #endif | 2115 | #endif |
2116 | /* fallthrough */ | ||
1706 | case IR_CALLN: case IR_CALLXS: | 2117 | case IR_CALLN: case IR_CALLXS: |
1707 | #if LJ_SOFTFP | 2118 | #if LJ_SOFTFP |
1708 | case IR_MIN: case IR_MAX: | 2119 | case IR_MIN: case IR_MAX: |
@@ -1721,11 +2132,23 @@ static void asm_setup_regsp(ASMState *as) | |||
1721 | #endif | 2132 | #endif |
1722 | /* fallthrough */ | 2133 | /* fallthrough */ |
1723 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2134 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1724 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2135 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1725 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2136 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1726 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2137 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
2138 | #if LJ_TARGET_X86 && LJ_HASFFI | ||
2139 | if (0) { | ||
2140 | case IR_CNEW: | ||
2141 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2142 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2143 | } | ||
2144 | /* fallthrough */ | ||
2145 | #else | ||
2146 | /* fallthrough */ | ||
2147 | case IR_CNEW: | ||
2148 | #endif | ||
1727 | /* fallthrough */ | 2149 | /* fallthrough */ |
1728 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2150 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: |
2151 | case IR_BUFSTR: | ||
1729 | ir->prev = REGSP_HINT(RID_RET); | 2152 | ir->prev = REGSP_HINT(RID_RET); |
1730 | if (inloop) | 2153 | if (inloop) |
1731 | as->modset = RSET_SCRATCH; | 2154 | as->modset = RSET_SCRATCH; |
@@ -1734,21 +2157,27 @@ static void asm_setup_regsp(ASMState *as) | |||
1734 | if (inloop) | 2157 | if (inloop) |
1735 | as->modset = RSET_SCRATCH; | 2158 | as->modset = RSET_SCRATCH; |
1736 | break; | 2159 | break; |
1737 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2160 | #if !LJ_SOFTFP |
1738 | case IR_ATAN2: case IR_LDEXP: | 2161 | case IR_ATAN2: |
2162 | #if LJ_TARGET_X86 | ||
2163 | if (as->evenspill < 4) /* Leave room to call atan2(). */ | ||
2164 | as->evenspill = 4; | ||
2165 | #endif | ||
2166 | #if !LJ_TARGET_X86ORX64 | ||
2167 | case IR_LDEXP: | ||
2168 | #endif | ||
1739 | #endif | 2169 | #endif |
2170 | /* fallthrough */ | ||
1740 | case IR_POW: | 2171 | case IR_POW: |
1741 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2172 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1742 | #if LJ_TARGET_X86ORX64 | ||
1743 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1744 | if (inloop) | 2173 | if (inloop) |
1745 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2174 | as->modset |= RSET_SCRATCH; |
2175 | #if LJ_TARGET_X86 | ||
2176 | break; | ||
1746 | #else | 2177 | #else |
1747 | ir->prev = REGSP_HINT(RID_FPRET); | 2178 | ir->prev = REGSP_HINT(RID_FPRET); |
1748 | if (inloop) | ||
1749 | as->modset |= RSET_SCRATCH; | ||
1750 | #endif | ||
1751 | continue; | 2179 | continue; |
2180 | #endif | ||
1752 | } | 2181 | } |
1753 | /* fallthrough */ /* for integer POW */ | 2182 | /* fallthrough */ /* for integer POW */ |
1754 | case IR_DIV: case IR_MOD: | 2183 | case IR_DIV: case IR_MOD: |
@@ -1761,31 +2190,34 @@ static void asm_setup_regsp(ASMState *as) | |||
1761 | break; | 2190 | break; |
1762 | case IR_FPMATH: | 2191 | case IR_FPMATH: |
1763 | #if LJ_TARGET_X86ORX64 | 2192 | #if LJ_TARGET_X86ORX64 |
1764 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2193 | if (ir->op2 <= IRFPM_TRUNC) { |
1765 | ir->prev = REGSP_HINT(RID_XMM0); | 2194 | if (!(as->flags & JIT_F_SSE4_1)) { |
1766 | #if !LJ_64 | 2195 | ir->prev = REGSP_HINT(RID_XMM0); |
1767 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | 2196 | if (inloop) |
2197 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2198 | continue; | ||
2199 | } | ||
2200 | break; | ||
2201 | } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { | ||
2202 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
1768 | as->evenspill = 4; | 2203 | as->evenspill = 4; |
1769 | #endif | ||
1770 | if (inloop) | ||
1771 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1772 | continue; | ||
1773 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1774 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1775 | if (inloop) | ||
1776 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
1777 | continue; | ||
1778 | } | 2204 | } |
2205 | #endif | ||
2206 | if (inloop) | ||
2207 | as->modset |= RSET_SCRATCH; | ||
2208 | #if LJ_TARGET_X86 | ||
1779 | break; | 2209 | break; |
1780 | #else | 2210 | #else |
1781 | ir->prev = REGSP_HINT(RID_FPRET); | 2211 | ir->prev = REGSP_HINT(RID_FPRET); |
1782 | if (inloop) | ||
1783 | as->modset |= RSET_SCRATCH; | ||
1784 | continue; | 2212 | continue; |
1785 | #endif | 2213 | #endif |
1786 | #if LJ_TARGET_X86ORX64 | 2214 | #if LJ_TARGET_X86ORX64 |
1787 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2215 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1788 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2216 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2217 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2218 | break; | ||
2219 | /* fallthrough */ | ||
2220 | case IR_BROL: case IR_BROR: | ||
1789 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2221 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1790 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2222 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1791 | if (inloop) | 2223 | if (inloop) |
@@ -1831,14 +2263,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1831 | ASMState *as = &as_; | 2263 | ASMState *as = &as_; |
1832 | MCode *origtop; | 2264 | MCode *origtop; |
1833 | 2265 | ||
2266 | /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ | ||
2267 | { | ||
2268 | IRRef nins = T->nins; | ||
2269 | IRIns *ir = &T->ir[nins-1]; | ||
2270 | if (ir->o == IR_NOP || ir->o == IR_RENAME) { | ||
2271 | do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); | ||
2272 | T->nins = nins; | ||
2273 | } | ||
2274 | } | ||
2275 | |||
1834 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 2276 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1835 | J->cur.nins = lj_ir_nextins(J); | 2277 | /* This also allows one RENAME to be added without reallocating curfinal. */ |
1836 | J->cur.ir[J->cur.nins].o = IR_NOP; | 2278 | as->orignins = lj_ir_nextins(J); |
2279 | J->cur.ir[as->orignins].o = IR_NOP; | ||
1837 | 2280 | ||
1838 | /* Setup initial state. Copy some fields to reduce indirections. */ | 2281 | /* Setup initial state. Copy some fields to reduce indirections. */ |
1839 | as->J = J; | 2282 | as->J = J; |
1840 | as->T = T; | 2283 | as->T = T; |
1841 | as->ir = T->ir; | 2284 | J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ |
1842 | as->flags = J->flags; | 2285 | as->flags = J->flags; |
1843 | as->loopref = J->loopref; | 2286 | as->loopref = J->loopref; |
1844 | as->realign = NULL; | 2287 | as->realign = NULL; |
@@ -1851,12 +2294,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1851 | as->mclim = as->mcbot + MCLIM_REDZONE; | 2294 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1852 | asm_setup_target(as); | 2295 | asm_setup_target(as); |
1853 | 2296 | ||
1854 | do { | 2297 | /* |
2298 | ** This is a loop, because the MCode may have to be (re-)assembled | ||
2299 | ** multiple times: | ||
2300 | ** | ||
2301 | ** 1. as->realign is set (and the assembly aborted), if the arch-specific | ||
2302 | ** backend wants the MCode to be aligned differently. | ||
2303 | ** | ||
2304 | ** This is currently only the case on x86/x64, where small loops get | ||
2305 | ** an aligned loop body plus a short branch. Not much effort is wasted, | ||
2306 | ** because the abort happens very quickly and only once. | ||
2307 | ** | ||
2308 | ** 2. The IR is immovable, since the MCode embeds pointers to various | ||
2309 | ** constants inside the IR. But RENAMEs may need to be added to the IR | ||
2310 | ** during assembly, which might grow and reallocate the IR. We check | ||
2311 | ** at the end if the IR (in J->cur.ir) has actually grown, resize the | ||
2312 | ** copy (in J->curfinal.ir) and try again. | ||
2313 | ** | ||
2314 | ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have | ||
2315 | ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to | ||
2316 | ** always have one spare slot in the IR (see above), which means we | ||
2317 | ** have to redo the assembly for only ~2% of all traces. | ||
2318 | ** | ||
2319 | ** Very, very rarely, this needs to be done repeatedly, since the | ||
2320 | ** location of constants inside the IR (actually, reachability from | ||
2321 | ** a global pointer) may affect register allocation and thus the | ||
2322 | ** number of RENAMEs. | ||
2323 | */ | ||
2324 | for (;;) { | ||
1855 | as->mcp = as->mctop; | 2325 | as->mcp = as->mctop; |
1856 | #ifdef LUA_USE_ASSERT | 2326 | #ifdef LUA_USE_ASSERT |
1857 | as->mcp_prev = as->mcp; | 2327 | as->mcp_prev = as->mcp; |
1858 | #endif | 2328 | #endif |
1859 | as->curins = T->nins; | 2329 | as->ir = J->curfinal->ir; /* Use the copied IR. */ |
2330 | as->curins = J->cur.nins = as->orignins; | ||
2331 | |||
1860 | RA_DBG_START(); | 2332 | RA_DBG_START(); |
1861 | RA_DBGX((as, "===== STOP =====")); | 2333 | RA_DBGX((as, "===== STOP =====")); |
1862 | 2334 | ||
@@ -1884,22 +2356,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1884 | checkmclim(as); | 2356 | checkmclim(as); |
1885 | asm_ir(as, ir); | 2357 | asm_ir(as, ir); |
1886 | } | 2358 | } |
1887 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
1888 | 2359 | ||
1889 | /* Emit head of trace. */ | 2360 | if (as->realign && J->curfinal->nins >= T->nins) |
1890 | RA_DBG_REF(); | 2361 | continue; /* Retry in case only the MCode needs to be realigned. */ |
1891 | checkmclim(as); | 2362 | |
1892 | if (as->gcsteps > 0) { | 2363 | /* Emit head of trace. */ |
1893 | as->curins = as->T->snap[0].ref; | 2364 | RA_DBG_REF(); |
1894 | asm_snap_prep(as); /* The GC check is a guard. */ | 2365 | checkmclim(as); |
1895 | asm_gc_check(as); | 2366 | if (as->gcsteps > 0) { |
2367 | as->curins = as->T->snap[0].ref; | ||
2368 | asm_snap_prep(as); /* The GC check is a guard. */ | ||
2369 | asm_gc_check(as); | ||
2370 | as->curins = as->stopins; | ||
2371 | } | ||
2372 | ra_evictk(as); | ||
2373 | if (as->parent) | ||
2374 | asm_head_side(as); | ||
2375 | else | ||
2376 | asm_head_root(as); | ||
2377 | asm_phi_fixup(as); | ||
2378 | |||
2379 | if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ | ||
2380 | lua_assert(J->curfinal->nk == T->nk); | ||
2381 | memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, | ||
2382 | (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ | ||
2383 | T->nins = J->curfinal->nins; | ||
2384 | break; /* Done. */ | ||
2385 | } | ||
2386 | |||
2387 | /* Otherwise try again with a bigger IR. */ | ||
2388 | lj_trace_free(J2G(J), J->curfinal); | ||
2389 | J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ | ||
2390 | J->curfinal = lj_trace_alloc(J->L, T); | ||
2391 | as->realign = NULL; | ||
1896 | } | 2392 | } |
1897 | ra_evictk(as); | ||
1898 | if (as->parent) | ||
1899 | asm_head_side(as); | ||
1900 | else | ||
1901 | asm_head_root(as); | ||
1902 | asm_phi_fixup(as); | ||
1903 | 2393 | ||
1904 | RA_DBGX((as, "===== START ====")); | 2394 | RA_DBGX((as, "===== START ====")); |
1905 | RA_DBG_FLUSH(); | 2395 | RA_DBG_FLUSH(); |
@@ -1912,6 +2402,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1912 | if (!as->loopref) | 2402 | if (!as->loopref) |
1913 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2403 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
1914 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2404 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
2405 | #if LJ_TARGET_MCODE_FIXUP | ||
2406 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2407 | #endif | ||
1915 | lj_mcode_sync(T->mcode, origtop); | 2408 | lj_mcode_sync(T->mcode, origtop); |
1916 | } | 2409 | } |
1917 | 2410 | ||