diff options
Diffstat (limited to 'src/lj_asm.c')
-rw-r--r-- | src/lj_asm.c | 720 |
1 files changed, 602 insertions, 118 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 02714d4e..c2cf5a95 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -91,7 +91,7 @@ typedef struct ASMState { | |||
91 | MCode *realign; /* Realign loop if not NULL. */ | 91 | MCode *realign; /* Realign loop if not NULL. */ |
92 | 92 | ||
93 | #ifdef RID_NUM_KREF | 93 | #ifdef RID_NUM_KREF |
94 | int32_t krefk[RID_NUM_KREF]; | 94 | intptr_t krefk[RID_NUM_KREF]; |
95 | #endif | 95 | #endif |
96 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ | 96 | IRRef1 phireg[RID_MAX]; /* PHI register references. */ |
97 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ | 97 | uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ |
@@ -144,7 +144,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) | |||
144 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) | 144 | #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) |
145 | #define ra_krefk(as, ref) (as->krefk[(ref)]) | 145 | #define ra_krefk(as, ref) (as->krefk[(ref)]) |
146 | 146 | ||
147 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) | 147 | static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) |
148 | { | 148 | { |
149 | IRRef ref = (IRRef)(r - RID_MIN_KREF); | 149 | IRRef ref = (IRRef)(r - RID_MIN_KREF); |
150 | as->krefk[ref] = k; | 150 | as->krefk[ref] = k; |
@@ -171,6 +171,8 @@ IRFLDEF(FLOFS) | |||
171 | #include "lj_emit_x86.h" | 171 | #include "lj_emit_x86.h" |
172 | #elif LJ_TARGET_ARM | 172 | #elif LJ_TARGET_ARM |
173 | #include "lj_emit_arm.h" | 173 | #include "lj_emit_arm.h" |
174 | #elif LJ_TARGET_ARM64 | ||
175 | #include "lj_emit_arm64.h" | ||
174 | #elif LJ_TARGET_PPC | 176 | #elif LJ_TARGET_PPC |
175 | #include "lj_emit_ppc.h" | 177 | #include "lj_emit_ppc.h" |
176 | #elif LJ_TARGET_MIPS | 178 | #elif LJ_TARGET_MIPS |
@@ -179,6 +181,12 @@ IRFLDEF(FLOFS) | |||
179 | #error "Missing instruction emitter for target CPU" | 181 | #error "Missing instruction emitter for target CPU" |
180 | #endif | 182 | #endif |
181 | 183 | ||
184 | /* Generic load/store of register from/to stack slot. */ | ||
185 | #define emit_spload(as, ir, r, ofs) \ | ||
186 | emit_loadofs(as, ir, (r), RID_SP, (ofs)) | ||
187 | #define emit_spstore(as, ir, r, ofs) \ | ||
188 | emit_storeofs(as, ir, (r), RID_SP, (ofs)) | ||
189 | |||
182 | /* -- Register allocator debugging ---------------------------------------- */ | 190 | /* -- Register allocator debugging ---------------------------------------- */ |
183 | 191 | ||
184 | /* #define LUAJIT_DEBUG_RA */ | 192 | /* #define LUAJIT_DEBUG_RA */ |
@@ -316,7 +324,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
316 | lua_assert(!rset_test(as->freeset, r)); | 324 | lua_assert(!rset_test(as->freeset, r)); |
317 | ra_free(as, r); | 325 | ra_free(as, r); |
318 | ra_modified(as, r); | 326 | ra_modified(as, r); |
327 | #if LJ_64 | ||
328 | emit_loadu64(as, r, ra_krefk(as, ref)); | ||
329 | #else | ||
319 | emit_loadi(as, r, ra_krefk(as, ref)); | 330 | emit_loadi(as, r, ra_krefk(as, ref)); |
331 | #endif | ||
320 | return r; | 332 | return r; |
321 | } | 333 | } |
322 | ir = IR(ref); | 334 | ir = IR(ref); |
@@ -328,7 +340,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
328 | RA_DBGX((as, "remat $i $r", ir, r)); | 340 | RA_DBGX((as, "remat $i $r", ir, r)); |
329 | #if !LJ_SOFTFP | 341 | #if !LJ_SOFTFP |
330 | if (ir->o == IR_KNUM) { | 342 | if (ir->o == IR_KNUM) { |
331 | emit_loadn(as, r, ir_knum(ir)); | 343 | emit_loadk64(as, r, ir); |
332 | } else | 344 | } else |
333 | #endif | 345 | #endif |
334 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { | 346 | if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { |
@@ -336,10 +348,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref) | |||
336 | emit_getgl(as, r, jit_base); | 348 | emit_getgl(as, r, jit_base); |
337 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { | 349 | } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { |
338 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ | 350 | lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ |
339 | emit_getgl(as, r, jit_L); | 351 | emit_getgl(as, r, cur_L); |
340 | #if LJ_64 | 352 | #if LJ_64 |
341 | } else if (ir->o == IR_KINT64) { | 353 | } else if (ir->o == IR_KINT64) { |
342 | emit_loadu64(as, r, ir_kint64(ir)->u64); | 354 | emit_loadu64(as, r, ir_kint64(ir)->u64); |
355 | #if LJ_GC64 | ||
356 | } else if (ir->o == IR_KGC) { | ||
357 | emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); | ||
358 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
359 | emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); | ||
360 | #endif | ||
343 | #endif | 361 | #endif |
344 | } else { | 362 | } else { |
345 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 363 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
@@ -512,7 +530,7 @@ static void ra_evictk(ASMState *as) | |||
512 | 530 | ||
513 | #ifdef RID_NUM_KREF | 531 | #ifdef RID_NUM_KREF |
514 | /* Allocate a register for a constant. */ | 532 | /* Allocate a register for a constant. */ |
515 | static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | 533 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) |
516 | { | 534 | { |
517 | /* First try to find a register which already holds the same constant. */ | 535 | /* First try to find a register which already holds the same constant. */ |
518 | RegSet pick, work = ~as->freeset & RSET_GPR; | 536 | RegSet pick, work = ~as->freeset & RSET_GPR; |
@@ -521,9 +539,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
521 | IRRef ref; | 539 | IRRef ref; |
522 | r = rset_pickbot(work); | 540 | r = rset_pickbot(work); |
523 | ref = regcost_ref(as->cost[r]); | 541 | ref = regcost_ref(as->cost[r]); |
542 | #if LJ_64 | ||
543 | if (ref < ASMREF_L) { | ||
544 | if (ra_iskref(ref)) { | ||
545 | if (k == ra_krefk(as, ref)) | ||
546 | return r; | ||
547 | } else { | ||
548 | IRIns *ir = IR(ref); | ||
549 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | ||
550 | #if LJ_GC64 | ||
551 | (ir->o == IR_KINT && k == ir->i) || | ||
552 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | ||
553 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | ||
554 | k == (intptr_t)ir_kptr(ir)) | ||
555 | #else | ||
556 | (ir->o != IR_KINT64 && k == ir->i) | ||
557 | #endif | ||
558 | ) | ||
559 | return r; | ||
560 | } | ||
561 | } | ||
562 | #else | ||
524 | if (ref < ASMREF_L && | 563 | if (ref < ASMREF_L && |
525 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) | 564 | k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) |
526 | return r; | 565 | return r; |
566 | #endif | ||
527 | rset_clear(work, r); | 567 | rset_clear(work, r); |
528 | } | 568 | } |
529 | pick = as->freeset & allow; | 569 | pick = as->freeset & allow; |
@@ -543,7 +583,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) | |||
543 | } | 583 | } |
544 | 584 | ||
545 | /* Allocate a specific register for a constant. */ | 585 | /* Allocate a specific register for a constant. */ |
546 | static void ra_allockreg(ASMState *as, int32_t k, Reg r) | 586 | static void ra_allockreg(ASMState *as, intptr_t k, Reg r) |
547 | { | 587 | { |
548 | Reg kr = ra_allock(as, k, RID2RSET(r)); | 588 | Reg kr = ra_allock(as, k, RID2RSET(r)); |
549 | if (kr != r) { | 589 | if (kr != r) { |
@@ -613,10 +653,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) | |||
613 | return r; | 653 | return r; |
614 | } | 654 | } |
615 | 655 | ||
656 | /* Add a register rename to the IR. */ | ||
657 | static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) | ||
658 | { | ||
659 | IRRef ren; | ||
660 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); | ||
661 | ren = tref_ref(lj_ir_emit(as->J)); | ||
662 | as->J->cur.ir[ren].r = (uint8_t)down; | ||
663 | as->J->cur.ir[ren].s = SPS_NONE; | ||
664 | } | ||
665 | |||
616 | /* Rename register allocation and emit move. */ | 666 | /* Rename register allocation and emit move. */ |
617 | static void ra_rename(ASMState *as, Reg down, Reg up) | 667 | static void ra_rename(ASMState *as, Reg down, Reg up) |
618 | { | 668 | { |
619 | IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); | 669 | IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); |
620 | IRIns *ir = IR(ref); | 670 | IRIns *ir = IR(ref); |
621 | ir->r = (uint8_t)up; | 671 | ir->r = (uint8_t)up; |
622 | as->cost[down] = 0; | 672 | as->cost[down] = 0; |
@@ -629,11 +679,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up) | |||
629 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); | 679 | RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); |
630 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ | 680 | emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ |
631 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ | 681 | if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ |
632 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); | 682 | ra_addrename(as, down, ref, as->snapno); |
633 | ren = tref_ref(lj_ir_emit(as->J)); | ||
634 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
635 | IR(ren)->r = (uint8_t)down; | ||
636 | IR(ren)->s = SPS_NONE; | ||
637 | } | 683 | } |
638 | } | 684 | } |
639 | 685 | ||
@@ -683,18 +729,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) | |||
683 | if (ra_noreg(left)) { | 729 | if (ra_noreg(left)) { |
684 | if (irref_isk(lref)) { | 730 | if (irref_isk(lref)) { |
685 | if (ir->o == IR_KNUM) { | 731 | if (ir->o == IR_KNUM) { |
686 | cTValue *tv = ir_knum(ir); | ||
687 | /* FP remat needs a load except for +0. Still better than eviction. */ | 732 | /* FP remat needs a load except for +0. Still better than eviction. */ |
688 | if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { | 733 | if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { |
689 | emit_loadn(as, dest, tv); | 734 | emit_loadk64(as, dest, ir); |
690 | return; | 735 | return; |
691 | } | 736 | } |
692 | #if LJ_64 | 737 | #if LJ_64 |
693 | } else if (ir->o == IR_KINT64) { | 738 | } else if (ir->o == IR_KINT64) { |
694 | emit_loadu64(as, dest, ir_kint64(ir)->u64); | 739 | emit_loadk64(as, dest, ir); |
740 | return; | ||
741 | #if LJ_GC64 | ||
742 | } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
743 | emit_loadk64(as, dest, ir); | ||
695 | return; | 744 | return; |
696 | #endif | 745 | #endif |
697 | } else { | 746 | #endif |
747 | } else if (ir->o != IR_KPRI) { | ||
698 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 748 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
699 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); | 749 | ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); |
700 | emit_loadi(as, dest, ir->i); | 750 | emit_loadi(as, dest, ir->i); |
@@ -935,7 +985,7 @@ static void asm_snap_prep(ASMState *as) | |||
935 | } else { | 985 | } else { |
936 | /* Process any renames above the highwater mark. */ | 986 | /* Process any renames above the highwater mark. */ |
937 | for (; as->snaprename < as->T->nins; as->snaprename++) { | 987 | for (; as->snaprename < as->T->nins; as->snaprename++) { |
938 | IRIns *ir = IR(as->snaprename); | 988 | IRIns *ir = &as->T->ir[as->snaprename]; |
939 | if (asm_snap_checkrename(as, ir->op1)) | 989 | if (asm_snap_checkrename(as, ir->op1)) |
940 | ir->op2 = REF_BIAS-1; /* Kill rename. */ | 990 | ir->op2 = REF_BIAS-1; /* Kill rename. */ |
941 | } | 991 | } |
@@ -944,44 +994,6 @@ static void asm_snap_prep(ASMState *as) | |||
944 | 994 | ||
945 | /* -- Miscellaneous helpers ----------------------------------------------- */ | 995 | /* -- Miscellaneous helpers ----------------------------------------------- */ |
946 | 996 | ||
947 | /* Collect arguments from CALL* and CARG instructions. */ | ||
948 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
949 | const CCallInfo *ci, IRRef *args) | ||
950 | { | ||
951 | uint32_t n = CCI_NARGS(ci); | ||
952 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
953 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
954 | while (n-- > 1) { | ||
955 | ir = IR(ir->op1); | ||
956 | lua_assert(ir->o == IR_CARG); | ||
957 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
958 | } | ||
959 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
960 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
961 | } | ||
962 | |||
963 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
964 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
965 | { | ||
966 | uint32_t nargs = 0; | ||
967 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
968 | IRIns *ira = IR(ir->op1); | ||
969 | nargs++; | ||
970 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
971 | } | ||
972 | #if LJ_HASFFI | ||
973 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
974 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
975 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
976 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
977 | #if LJ_TARGET_X86 | ||
978 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
979 | #endif | ||
980 | } | ||
981 | #endif | ||
982 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
983 | } | ||
984 | |||
985 | /* Calculate stack adjustment. */ | 997 | /* Calculate stack adjustment. */ |
986 | static int32_t asm_stack_adjust(ASMState *as) | 998 | static int32_t asm_stack_adjust(ASMState *as) |
987 | { | 999 | { |
@@ -1066,6 +1078,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir) | |||
1066 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ | 1078 | as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ |
1067 | } | 1079 | } |
1068 | 1080 | ||
1081 | /* -- Buffer operations --------------------------------------------------- */ | ||
1082 | |||
1083 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); | ||
1084 | |||
1085 | static void asm_bufhdr(ASMState *as, IRIns *ir) | ||
1086 | { | ||
1087 | Reg sb = ra_dest(as, ir, RSET_GPR); | ||
1088 | if ((ir->op2 & IRBUFHDR_APPEND)) { | ||
1089 | /* Rematerialize const buffer pointer instead of likely spill. */ | ||
1090 | IRIns *irp = IR(ir->op1); | ||
1091 | if (!(ra_hasreg(irp->r) || irp == ir-1 || | ||
1092 | (irp == ir-2 && !ra_used(ir-1)))) { | ||
1093 | while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) | ||
1094 | irp = IR(irp->op1); | ||
1095 | if (irref_isk(irp->op1)) { | ||
1096 | ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); | ||
1097 | ir = irp; | ||
1098 | } | ||
1099 | } | ||
1100 | } else { | ||
1101 | Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | ||
1102 | /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ | ||
1103 | emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); | ||
1104 | emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); | ||
1105 | } | ||
1106 | #if LJ_TARGET_X86ORX64 | ||
1107 | ra_left(as, sb, ir->op1); | ||
1108 | #else | ||
1109 | ra_leftov(as, sb, ir->op1); | ||
1110 | #endif | ||
1111 | } | ||
1112 | |||
1113 | static void asm_bufput(ASMState *as, IRIns *ir) | ||
1114 | { | ||
1115 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; | ||
1116 | IRRef args[3]; | ||
1117 | IRIns *irs; | ||
1118 | int kchar = -1; | ||
1119 | args[0] = ir->op1; /* SBuf * */ | ||
1120 | args[1] = ir->op2; /* GCstr * */ | ||
1121 | irs = IR(ir->op2); | ||
1122 | lua_assert(irt_isstr(irs->t)); | ||
1123 | if (irs->o == IR_KGC) { | ||
1124 | GCstr *s = ir_kstr(irs); | ||
1125 | if (s->len == 1) { /* Optimize put of single-char string constant. */ | ||
1126 | kchar = strdata(s)[0]; | ||
1127 | args[1] = ASMREF_TMP1; /* int, truncated to char */ | ||
1128 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1129 | } | ||
1130 | } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { | ||
1131 | if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ | ||
1132 | if (irs->op2 == IRTOSTR_NUM) { | ||
1133 | args[1] = ASMREF_TMP1; /* TValue * */ | ||
1134 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; | ||
1135 | } else { | ||
1136 | lua_assert(irt_isinteger(IR(irs->op1)->t)); | ||
1137 | args[1] = irs->op1; /* int */ | ||
1138 | if (irs->op2 == IRTOSTR_INT) | ||
1139 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; | ||
1140 | else | ||
1141 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; | ||
1142 | } | ||
1143 | } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ | ||
1144 | args[1] = irs->op1; /* const void * */ | ||
1145 | args[2] = irs->op2; /* MSize */ | ||
1146 | ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; | ||
1147 | } | ||
1148 | } | ||
1149 | asm_setupresult(as, ir, ci); /* SBuf * */ | ||
1150 | asm_gencall(as, ci, args); | ||
1151 | if (args[1] == ASMREF_TMP1) { | ||
1152 | Reg tmp = ra_releasetmp(as, ASMREF_TMP1); | ||
1153 | if (kchar == -1) | ||
1154 | asm_tvptr(as, tmp, irs->op1); | ||
1155 | else | ||
1156 | ra_allockreg(as, kchar, tmp); | ||
1157 | } | ||
1158 | } | ||
1159 | |||
1160 | static void asm_bufstr(ASMState *as, IRIns *ir) | ||
1161 | { | ||
1162 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; | ||
1163 | IRRef args[1]; | ||
1164 | args[0] = ir->op1; /* SBuf *sb */ | ||
1165 | as->gcsteps++; | ||
1166 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1167 | asm_gencall(as, ci, args); | ||
1168 | } | ||
1169 | |||
1170 | /* -- Type conversions ---------------------------------------------------- */ | ||
1171 | |||
1172 | static void asm_tostr(ASMState *as, IRIns *ir) | ||
1173 | { | ||
1174 | const CCallInfo *ci; | ||
1175 | IRRef args[2]; | ||
1176 | args[0] = ASMREF_L; | ||
1177 | as->gcsteps++; | ||
1178 | if (ir->op2 == IRTOSTR_NUM) { | ||
1179 | args[1] = ASMREF_TMP1; /* cTValue * */ | ||
1180 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; | ||
1181 | } else { | ||
1182 | args[1] = ir->op1; /* int32_t k */ | ||
1183 | if (ir->op2 == IRTOSTR_INT) | ||
1184 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; | ||
1185 | else | ||
1186 | ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; | ||
1187 | } | ||
1188 | asm_setupresult(as, ir, ci); /* GCstr * */ | ||
1189 | asm_gencall(as, ci, args); | ||
1190 | if (ir->op2 == IRTOSTR_NUM) | ||
1191 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); | ||
1192 | } | ||
1193 | |||
1194 | #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 | ||
1195 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
1196 | { | ||
1197 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
1198 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
1199 | IRCallID id; | ||
1200 | IRRef args[2]; | ||
1201 | lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); | ||
1202 | args[LJ_BE] = (ir-1)->op1; | ||
1203 | args[LJ_LE] = ir->op1; | ||
1204 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
1205 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
1206 | ir--; | ||
1207 | } else { | ||
1208 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
1209 | } | ||
1210 | { | ||
1211 | #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP | ||
1212 | CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; | ||
1213 | cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
1214 | #else | ||
1215 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1216 | #endif | ||
1217 | asm_setupresult(as, ir, ci); | ||
1218 | asm_gencall(as, ci, args); | ||
1219 | } | ||
1220 | } | ||
1221 | #endif | ||
1222 | |||
1223 | /* -- Memory references --------------------------------------------------- */ | ||
1224 | |||
1225 | static void asm_newref(ASMState *as, IRIns *ir) | ||
1226 | { | ||
1227 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | ||
1228 | IRRef args[3]; | ||
1229 | if (ir->r == RID_SINK) | ||
1230 | return; | ||
1231 | args[0] = ASMREF_L; /* lua_State *L */ | ||
1232 | args[1] = ir->op1; /* GCtab *t */ | ||
1233 | args[2] = ASMREF_TMP1; /* cTValue *key */ | ||
1234 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
1235 | asm_gencall(as, ci, args); | ||
1236 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
1237 | } | ||
1238 | |||
1239 | static void asm_lref(ASMState *as, IRIns *ir) | ||
1240 | { | ||
1241 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1242 | #if LJ_TARGET_X86ORX64 | ||
1243 | ra_left(as, r, ASMREF_L); | ||
1244 | #else | ||
1245 | ra_leftov(as, r, ASMREF_L); | ||
1246 | #endif | ||
1247 | } | ||
1248 | |||
1249 | /* -- Calls --------------------------------------------------------------- */ | ||
1250 | |||
1251 | /* Collect arguments from CALL* and CARG instructions. */ | ||
1252 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1253 | const CCallInfo *ci, IRRef *args) | ||
1254 | { | ||
1255 | uint32_t n = CCI_XNARGS(ci); | ||
1256 | lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ | ||
1257 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1258 | while (n-- > 1) { | ||
1259 | ir = IR(ir->op1); | ||
1260 | lua_assert(ir->o == IR_CARG); | ||
1261 | args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; | ||
1262 | } | ||
1263 | args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; | ||
1264 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
1265 | } | ||
1266 | |||
1267 | /* Reconstruct CCallInfo flags for CALLX*. */ | ||
1268 | static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) | ||
1269 | { | ||
1270 | uint32_t nargs = 0; | ||
1271 | if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ | ||
1272 | IRIns *ira = IR(ir->op1); | ||
1273 | nargs++; | ||
1274 | while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } | ||
1275 | } | ||
1276 | #if LJ_HASFFI | ||
1277 | if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ | ||
1278 | CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; | ||
1279 | CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); | ||
1280 | nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); | ||
1281 | #if LJ_TARGET_X86 | ||
1282 | nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); | ||
1283 | #endif | ||
1284 | } | ||
1285 | #endif | ||
1286 | return (nargs | (ir->t.irt << CCI_OTSHIFT)); | ||
1287 | } | ||
1288 | |||
1289 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
1290 | { | ||
1291 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
1292 | IRRef args[2]; | ||
1293 | args[0] = ir->op1; | ||
1294 | args[1] = ir->op2; | ||
1295 | asm_setupresult(as, ir, ci); | ||
1296 | asm_gencall(as, ci, args); | ||
1297 | } | ||
1298 | |||
1299 | static void asm_call(ASMState *as, IRIns *ir) | ||
1300 | { | ||
1301 | IRRef args[CCI_NARGS_MAX]; | ||
1302 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1303 | asm_collectargs(as, ir, ci, args); | ||
1304 | asm_setupresult(as, ir, ci); | ||
1305 | asm_gencall(as, ci, args); | ||
1306 | } | ||
1307 | |||
1308 | #if !LJ_SOFTFP | ||
1309 | static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) | ||
1310 | { | ||
1311 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
1312 | IRRef args[2]; | ||
1313 | args[0] = lref; | ||
1314 | args[1] = rref; | ||
1315 | asm_setupresult(as, ir, ci); | ||
1316 | asm_gencall(as, ci, args); | ||
1317 | } | ||
1318 | |||
1319 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | ||
1320 | { | ||
1321 | IRIns *irp = IR(ir->op1); | ||
1322 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
1323 | IRIns *irpp = IR(irp->op1); | ||
1324 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
1325 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
1326 | asm_fppow(as, ir, irpp->op1, irp->op2); | ||
1327 | return 1; | ||
1328 | } | ||
1329 | } | ||
1330 | return 0; | ||
1331 | } | ||
1332 | #endif | ||
1333 | |||
1069 | /* -- PHI and loop handling ----------------------------------------------- */ | 1334 | /* -- PHI and loop handling ----------------------------------------------- */ |
1070 | 1335 | ||
1071 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ | 1336 | /* Break a PHI cycle by renaming to a free register (evict if needed). */ |
@@ -1251,12 +1516,7 @@ static void asm_phi_fixup(ASMState *as) | |||
1251 | irt_clearmark(ir->t); | 1516 | irt_clearmark(ir->t); |
1252 | /* Left PHI gained a spill slot before the loop? */ | 1517 | /* Left PHI gained a spill slot before the loop? */ |
1253 | if (ra_hasspill(ir->s)) { | 1518 | if (ra_hasspill(ir->s)) { |
1254 | IRRef ren; | 1519 | ra_addrename(as, r, lref, as->loopsnapno); |
1255 | lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); | ||
1256 | ren = tref_ref(lj_ir_emit(as->J)); | ||
1257 | as->ir = as->T->ir; /* The IR may have been reallocated. */ | ||
1258 | IR(ren)->r = (uint8_t)r; | ||
1259 | IR(ren)->s = SPS_NONE; | ||
1260 | } | 1520 | } |
1261 | } | 1521 | } |
1262 | rset_clear(work, r); | 1522 | rset_clear(work, r); |
@@ -1331,6 +1591,8 @@ static void asm_loop(ASMState *as) | |||
1331 | #include "lj_asm_x86.h" | 1591 | #include "lj_asm_x86.h" |
1332 | #elif LJ_TARGET_ARM | 1592 | #elif LJ_TARGET_ARM |
1333 | #include "lj_asm_arm.h" | 1593 | #include "lj_asm_arm.h" |
1594 | #elif LJ_TARGET_ARM64 | ||
1595 | #include "lj_asm_arm64.h" | ||
1334 | #elif LJ_TARGET_PPC | 1596 | #elif LJ_TARGET_PPC |
1335 | #include "lj_asm_ppc.h" | 1597 | #include "lj_asm_ppc.h" |
1336 | #elif LJ_TARGET_MIPS | 1598 | #elif LJ_TARGET_MIPS |
@@ -1339,6 +1601,136 @@ static void asm_loop(ASMState *as) | |||
1339 | #error "Missing assembler for target CPU" | 1601 | #error "Missing assembler for target CPU" |
1340 | #endif | 1602 | #endif |
1341 | 1603 | ||
1604 | /* -- Instruction dispatch ------------------------------------------------ */ | ||
1605 | |||
1606 | /* Assemble a single instruction. */ | ||
1607 | static void asm_ir(ASMState *as, IRIns *ir) | ||
1608 | { | ||
1609 | switch ((IROp)ir->o) { | ||
1610 | /* Miscellaneous ops. */ | ||
1611 | case IR_LOOP: asm_loop(as); break; | ||
1612 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | ||
1613 | case IR_USE: | ||
1614 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
1615 | case IR_PHI: asm_phi(as, ir); break; | ||
1616 | case IR_HIOP: asm_hiop(as, ir); break; | ||
1617 | case IR_GCSTEP: asm_gcstep(as, ir); break; | ||
1618 | case IR_PROF: asm_prof(as, ir); break; | ||
1619 | |||
1620 | /* Guarded assertions. */ | ||
1621 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
1622 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | ||
1623 | case IR_ABC: | ||
1624 | asm_comp(as, ir); | ||
1625 | break; | ||
1626 | case IR_EQ: case IR_NE: | ||
1627 | if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { | ||
1628 | as->curins--; | ||
1629 | asm_href(as, ir-1, (IROp)ir->o); | ||
1630 | } else { | ||
1631 | asm_equal(as, ir); | ||
1632 | } | ||
1633 | break; | ||
1634 | |||
1635 | case IR_RETF: asm_retf(as, ir); break; | ||
1636 | |||
1637 | /* Bit ops. */ | ||
1638 | case IR_BNOT: asm_bnot(as, ir); break; | ||
1639 | case IR_BSWAP: asm_bswap(as, ir); break; | ||
1640 | case IR_BAND: asm_band(as, ir); break; | ||
1641 | case IR_BOR: asm_bor(as, ir); break; | ||
1642 | case IR_BXOR: asm_bxor(as, ir); break; | ||
1643 | case IR_BSHL: asm_bshl(as, ir); break; | ||
1644 | case IR_BSHR: asm_bshr(as, ir); break; | ||
1645 | case IR_BSAR: asm_bsar(as, ir); break; | ||
1646 | case IR_BROL: asm_brol(as, ir); break; | ||
1647 | case IR_BROR: asm_bror(as, ir); break; | ||
1648 | |||
1649 | /* Arithmetic ops. */ | ||
1650 | case IR_ADD: asm_add(as, ir); break; | ||
1651 | case IR_SUB: asm_sub(as, ir); break; | ||
1652 | case IR_MUL: asm_mul(as, ir); break; | ||
1653 | case IR_MOD: asm_mod(as, ir); break; | ||
1654 | case IR_NEG: asm_neg(as, ir); break; | ||
1655 | #if LJ_SOFTFP | ||
1656 | case IR_DIV: case IR_POW: case IR_ABS: | ||
1657 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: | ||
1658 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | ||
1659 | break; | ||
1660 | #else | ||
1661 | case IR_DIV: asm_div(as, ir); break; | ||
1662 | case IR_POW: asm_pow(as, ir); break; | ||
1663 | case IR_ABS: asm_abs(as, ir); break; | ||
1664 | case IR_ATAN2: asm_atan2(as, ir); break; | ||
1665 | case IR_LDEXP: asm_ldexp(as, ir); break; | ||
1666 | case IR_FPMATH: asm_fpmath(as, ir); break; | ||
1667 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
1668 | #endif | ||
1669 | case IR_MIN: asm_min(as, ir); break; | ||
1670 | case IR_MAX: asm_max(as, ir); break; | ||
1671 | |||
1672 | /* Overflow-checking arithmetic ops. */ | ||
1673 | case IR_ADDOV: asm_addov(as, ir); break; | ||
1674 | case IR_SUBOV: asm_subov(as, ir); break; | ||
1675 | case IR_MULOV: asm_mulov(as, ir); break; | ||
1676 | |||
1677 | /* Memory references. */ | ||
1678 | case IR_AREF: asm_aref(as, ir); break; | ||
1679 | case IR_HREF: asm_href(as, ir, 0); break; | ||
1680 | case IR_HREFK: asm_hrefk(as, ir); break; | ||
1681 | case IR_NEWREF: asm_newref(as, ir); break; | ||
1682 | case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; | ||
1683 | case IR_FREF: asm_fref(as, ir); break; | ||
1684 | case IR_STRREF: asm_strref(as, ir); break; | ||
1685 | case IR_LREF: asm_lref(as, ir); break; | ||
1686 | |||
1687 | /* Loads and stores. */ | ||
1688 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
1689 | asm_ahuvload(as, ir); | ||
1690 | break; | ||
1691 | case IR_FLOAD: asm_fload(as, ir); break; | ||
1692 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1693 | case IR_SLOAD: asm_sload(as, ir); break; | ||
1694 | |||
1695 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | ||
1696 | case IR_FSTORE: asm_fstore(as, ir); break; | ||
1697 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1698 | |||
1699 | /* Allocations. */ | ||
1700 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | ||
1701 | case IR_TNEW: asm_tnew(as, ir); break; | ||
1702 | case IR_TDUP: asm_tdup(as, ir); break; | ||
1703 | case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; | ||
1704 | |||
1705 | /* Buffer operations. */ | ||
1706 | case IR_BUFHDR: asm_bufhdr(as, ir); break; | ||
1707 | case IR_BUFPUT: asm_bufput(as, ir); break; | ||
1708 | case IR_BUFSTR: asm_bufstr(as, ir); break; | ||
1709 | |||
1710 | /* Write barriers. */ | ||
1711 | case IR_TBAR: asm_tbar(as, ir); break; | ||
1712 | case IR_OBAR: asm_obar(as, ir); break; | ||
1713 | |||
1714 | /* Type conversions. */ | ||
1715 | case IR_CONV: asm_conv(as, ir); break; | ||
1716 | case IR_TOSTR: asm_tostr(as, ir); break; | ||
1717 | case IR_STRTO: asm_strto(as, ir); break; | ||
1718 | |||
1719 | /* Calls. */ | ||
1720 | case IR_CALLA: | ||
1721 | as->gcsteps++; | ||
1722 | /* fallthrough */ | ||
1723 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
1724 | case IR_CALLXS: asm_callx(as, ir); break; | ||
1725 | case IR_CARG: break; | ||
1726 | |||
1727 | default: | ||
1728 | setintV(&as->J->errinfo, ir->o); | ||
1729 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | ||
1730 | break; | ||
1731 | } | ||
1732 | } | ||
1733 | |||
1342 | /* -- Head of trace ------------------------------------------------------- */ | 1734 | /* -- Head of trace ------------------------------------------------------- */ |
1343 | 1735 | ||
1344 | /* Head of a root trace. */ | 1736 | /* Head of a root trace. */ |
@@ -1537,7 +1929,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) | |||
1537 | SnapEntry sn = map[n-1]; | 1929 | SnapEntry sn = map[n-1]; |
1538 | if ((sn & SNAP_FRAME)) { | 1930 | if ((sn & SNAP_FRAME)) { |
1539 | *gotframe = 1; | 1931 | *gotframe = 1; |
1540 | return snap_slot(sn); | 1932 | return snap_slot(sn) - LJ_FR2; |
1541 | } | 1933 | } |
1542 | } | 1934 | } |
1543 | return 0; | 1935 | return 0; |
@@ -1557,19 +1949,23 @@ static void asm_tail_link(ASMState *as) | |||
1557 | 1949 | ||
1558 | if (as->T->link == 0) { | 1950 | if (as->T->link == 0) { |
1559 | /* Setup fixed registers for exit to interpreter. */ | 1951 | /* Setup fixed registers for exit to interpreter. */ |
1560 | const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); | 1952 | const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); |
1561 | int32_t mres; | 1953 | int32_t mres; |
1562 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ | 1954 | if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ |
1563 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; | 1955 | BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; |
1564 | if (bc_isret(bc_op(*retpc))) | 1956 | if (bc_isret(bc_op(*retpc))) |
1565 | pc = retpc; | 1957 | pc = retpc; |
1566 | } | 1958 | } |
1959 | #if LJ_GC64 | ||
1960 | emit_loadu64(as, RID_LPC, u64ptr(pc)); | ||
1961 | #else | ||
1567 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); | 1962 | ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); |
1568 | ra_allockreg(as, i32ptr(pc), RID_LPC); | 1963 | ra_allockreg(as, i32ptr(pc), RID_LPC); |
1569 | mres = (int32_t)(snap->nslots - baseslot); | 1964 | #endif |
1965 | mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); | ||
1570 | switch (bc_op(*pc)) { | 1966 | switch (bc_op(*pc)) { |
1571 | case BC_CALLM: case BC_CALLMT: | 1967 | case BC_CALLM: case BC_CALLMT: |
1572 | mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; | 1968 | mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; |
1573 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; | 1969 | case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; |
1574 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; | 1970 | case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; |
1575 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; | 1971 | default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; |
@@ -1581,6 +1977,11 @@ static void asm_tail_link(ASMState *as) | |||
1581 | } | 1977 | } |
1582 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); | 1978 | emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); |
1583 | 1979 | ||
1980 | if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ | ||
1981 | setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); | ||
1982 | IR(as->J->ktrace)->o = IR_KGC; | ||
1983 | } | ||
1984 | |||
1584 | /* Sync the interpreter state with the on-trace state. */ | 1985 | /* Sync the interpreter state with the on-trace state. */ |
1585 | asm_stack_restore(as, snap); | 1986 | asm_stack_restore(as, snap); |
1586 | 1987 | ||
@@ -1606,17 +2007,22 @@ static void asm_setup_regsp(ASMState *as) | |||
1606 | ra_setup(as); | 2007 | ra_setup(as); |
1607 | 2008 | ||
1608 | /* Clear reg/sp for constants. */ | 2009 | /* Clear reg/sp for constants. */ |
1609 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) | 2010 | for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { |
1610 | ir->prev = REGSP_INIT; | 2011 | ir->prev = REGSP_INIT; |
2012 | if (irt_is64(ir->t) && ir->o != IR_KNULL) { | ||
2013 | #if LJ_GC64 | ||
2014 | ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ | ||
2015 | #else | ||
2016 | /* Make life easier for backends by putting address of constant in i. */ | ||
2017 | ir->i = (int32_t)(intptr_t)(ir+1); | ||
2018 | #endif | ||
2019 | ir++; | ||
2020 | } | ||
2021 | } | ||
1611 | 2022 | ||
1612 | /* REF_BASE is used for implicit references to the BASE register. */ | 2023 | /* REF_BASE is used for implicit references to the BASE register. */ |
1613 | lastir->prev = REGSP_HINT(RID_BASE); | 2024 | lastir->prev = REGSP_HINT(RID_BASE); |
1614 | 2025 | ||
1615 | ir = IR(nins-1); | ||
1616 | if (ir->o == IR_RENAME) { | ||
1617 | do { ir--; nins--; } while (ir->o == IR_RENAME); | ||
1618 | T->nins = nins; /* Remove any renames left over from ASM restart. */ | ||
1619 | } | ||
1620 | as->snaprename = nins; | 2026 | as->snaprename = nins; |
1621 | as->snapref = nins; | 2027 | as->snapref = nins; |
1622 | as->snapno = T->nsnap; | 2028 | as->snapno = T->nsnap; |
@@ -1677,7 +2083,7 @@ static void asm_setup_regsp(ASMState *as) | |||
1677 | as->modset |= RSET_SCRATCH; | 2083 | as->modset |= RSET_SCRATCH; |
1678 | continue; | 2084 | continue; |
1679 | } | 2085 | } |
1680 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 2086 | case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { |
1681 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 2087 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
1682 | ir->prev = asm_setup_call_slots(as, ir, ci); | 2088 | ir->prev = asm_setup_call_slots(as, ir, ci); |
1683 | if (inloop) | 2089 | if (inloop) |
@@ -1722,10 +2128,20 @@ static void asm_setup_regsp(ASMState *as) | |||
1722 | /* fallthrough */ | 2128 | /* fallthrough */ |
1723 | #endif | 2129 | #endif |
1724 | /* C calls evict all scratch regs and return results in RID_RET. */ | 2130 | /* C calls evict all scratch regs and return results in RID_RET. */ |
1725 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: | 2131 | case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: |
1726 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) | 2132 | if (REGARG_NUMGPR < 3 && as->evenspill < 3) |
1727 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ | 2133 | as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ |
1728 | case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: | 2134 | #if LJ_TARGET_X86 && LJ_HASFFI |
2135 | if (0) { | ||
2136 | case IR_CNEW: | ||
2137 | if (ir->op2 != REF_NIL && as->evenspill < 4) | ||
2138 | as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ | ||
2139 | } | ||
2140 | #else | ||
2141 | case IR_CNEW: | ||
2142 | #endif | ||
2143 | case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: | ||
2144 | case IR_BUFSTR: | ||
1729 | ir->prev = REGSP_HINT(RID_RET); | 2145 | ir->prev = REGSP_HINT(RID_RET); |
1730 | if (inloop) | 2146 | if (inloop) |
1731 | as->modset = RSET_SCRATCH; | 2147 | as->modset = RSET_SCRATCH; |
@@ -1734,21 +2150,26 @@ static void asm_setup_regsp(ASMState *as) | |||
1734 | if (inloop) | 2150 | if (inloop) |
1735 | as->modset = RSET_SCRATCH; | 2151 | as->modset = RSET_SCRATCH; |
1736 | break; | 2152 | break; |
1737 | #if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP | 2153 | #if !LJ_SOFTFP |
1738 | case IR_ATAN2: case IR_LDEXP: | 2154 | case IR_ATAN2: |
2155 | #if LJ_TARGET_X86 | ||
2156 | if (as->evenspill < 4) /* Leave room to call atan2(). */ | ||
2157 | as->evenspill = 4; | ||
2158 | #endif | ||
2159 | #if !LJ_TARGET_X86ORX64 | ||
2160 | case IR_LDEXP: | ||
2161 | #endif | ||
1739 | #endif | 2162 | #endif |
1740 | case IR_POW: | 2163 | case IR_POW: |
1741 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { | 2164 | if (!LJ_SOFTFP && irt_isnum(ir->t)) { |
1742 | #if LJ_TARGET_X86ORX64 | ||
1743 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1744 | if (inloop) | 2165 | if (inloop) |
1745 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); | 2166 | as->modset |= RSET_SCRATCH; |
2167 | #if LJ_TARGET_X86 | ||
2168 | break; | ||
1746 | #else | 2169 | #else |
1747 | ir->prev = REGSP_HINT(RID_FPRET); | 2170 | ir->prev = REGSP_HINT(RID_FPRET); |
1748 | if (inloop) | ||
1749 | as->modset |= RSET_SCRATCH; | ||
1750 | #endif | ||
1751 | continue; | 2171 | continue; |
2172 | #endif | ||
1752 | } | 2173 | } |
1753 | /* fallthrough for integer POW */ | 2174 | /* fallthrough for integer POW */ |
1754 | case IR_DIV: case IR_MOD: | 2175 | case IR_DIV: case IR_MOD: |
@@ -1761,31 +2182,33 @@ static void asm_setup_regsp(ASMState *as) | |||
1761 | break; | 2182 | break; |
1762 | case IR_FPMATH: | 2183 | case IR_FPMATH: |
1763 | #if LJ_TARGET_X86ORX64 | 2184 | #if LJ_TARGET_X86ORX64 |
1764 | if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ | 2185 | if (ir->op2 <= IRFPM_TRUNC) { |
1765 | ir->prev = REGSP_HINT(RID_XMM0); | 2186 | if (!(as->flags & JIT_F_SSE4_1)) { |
1766 | #if !LJ_64 | 2187 | ir->prev = REGSP_HINT(RID_XMM0); |
1767 | if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ | 2188 | if (inloop) |
2189 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
2190 | continue; | ||
2191 | } | ||
2192 | break; | ||
2193 | } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { | ||
2194 | if (as->evenspill < 4) /* Leave room to call pow(). */ | ||
1768 | as->evenspill = 4; | 2195 | as->evenspill = 4; |
1769 | #endif | ||
1770 | if (inloop) | ||
1771 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); | ||
1772 | continue; | ||
1773 | } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { | ||
1774 | ir->prev = REGSP_HINT(RID_XMM0); | ||
1775 | if (inloop) | ||
1776 | as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); | ||
1777 | continue; | ||
1778 | } | 2196 | } |
2197 | #endif | ||
2198 | if (inloop) | ||
2199 | as->modset |= RSET_SCRATCH; | ||
2200 | #if LJ_TARGET_X86 | ||
1779 | break; | 2201 | break; |
1780 | #else | 2202 | #else |
1781 | ir->prev = REGSP_HINT(RID_FPRET); | 2203 | ir->prev = REGSP_HINT(RID_FPRET); |
1782 | if (inloop) | ||
1783 | as->modset |= RSET_SCRATCH; | ||
1784 | continue; | 2204 | continue; |
1785 | #endif | 2205 | #endif |
1786 | #if LJ_TARGET_X86ORX64 | 2206 | #if LJ_TARGET_X86ORX64 |
1787 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ | 2207 | /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ |
1788 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 2208 | case IR_BSHL: case IR_BSHR: case IR_BSAR: |
2209 | if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ | ||
2210 | break; | ||
2211 | case IR_BROL: case IR_BROR: | ||
1789 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { | 2212 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { |
1790 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); | 2213 | IR(ir->op2)->r = REGSP_HINT(RID_ECX); |
1791 | if (inloop) | 2214 | if (inloop) |
@@ -1831,14 +2254,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1831 | ASMState *as = &as_; | 2254 | ASMState *as = &as_; |
1832 | MCode *origtop; | 2255 | MCode *origtop; |
1833 | 2256 | ||
2257 | /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ | ||
2258 | { | ||
2259 | IRRef nins = T->nins; | ||
2260 | IRIns *ir = &T->ir[nins-1]; | ||
2261 | if (ir->o == IR_NOP || ir->o == IR_RENAME) { | ||
2262 | do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); | ||
2263 | T->nins = nins; | ||
2264 | } | ||
2265 | } | ||
2266 | |||
1834 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 2267 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1835 | J->cur.nins = lj_ir_nextins(J); | 2268 | /* This also allows one RENAME to be added without reallocating curfinal. */ |
1836 | J->cur.ir[J->cur.nins].o = IR_NOP; | 2269 | as->orignins = lj_ir_nextins(J); |
2270 | J->cur.ir[as->orignins].o = IR_NOP; | ||
1837 | 2271 | ||
1838 | /* Setup initial state. Copy some fields to reduce indirections. */ | 2272 | /* Setup initial state. Copy some fields to reduce indirections. */ |
1839 | as->J = J; | 2273 | as->J = J; |
1840 | as->T = T; | 2274 | as->T = T; |
1841 | as->ir = T->ir; | 2275 | J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ |
1842 | as->flags = J->flags; | 2276 | as->flags = J->flags; |
1843 | as->loopref = J->loopref; | 2277 | as->loopref = J->loopref; |
1844 | as->realign = NULL; | 2278 | as->realign = NULL; |
@@ -1851,12 +2285,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1851 | as->mclim = as->mcbot + MCLIM_REDZONE; | 2285 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1852 | asm_setup_target(as); | 2286 | asm_setup_target(as); |
1853 | 2287 | ||
1854 | do { | 2288 | /* |
2289 | ** This is a loop, because the MCode may have to be (re-)assembled | ||
2290 | ** multiple times: | ||
2291 | ** | ||
2292 | ** 1. as->realign is set (and the assembly aborted), if the arch-specific | ||
2293 | ** backend wants the MCode to be aligned differently. | ||
2294 | ** | ||
2295 | ** This is currently only the case on x86/x64, where small loops get | ||
2296 | ** an aligned loop body plus a short branch. Not much effort is wasted, | ||
2297 | ** because the abort happens very quickly and only once. | ||
2298 | ** | ||
2299 | ** 2. The IR is immovable, since the MCode embeds pointers to various | ||
2300 | ** constants inside the IR. But RENAMEs may need to be added to the IR | ||
2301 | ** during assembly, which might grow and reallocate the IR. We check | ||
2302 | ** at the end if the IR (in J->cur.ir) has actually grown, resize the | ||
2303 | ** copy (in J->curfinal.ir) and try again. | ||
2304 | ** | ||
2305 | ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have | ||
2306 | ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to | ||
2307 | ** always have one spare slot in the IR (see above), which means we | ||
2308 | ** have to redo the assembly for only ~2% of all traces. | ||
2309 | ** | ||
2310 | ** Very, very rarely, this needs to be done repeatedly, since the | ||
2311 | ** location of constants inside the IR (actually, reachability from | ||
2312 | ** a global pointer) may affect register allocation and thus the | ||
2313 | ** number of RENAMEs. | ||
2314 | */ | ||
2315 | for (;;) { | ||
1855 | as->mcp = as->mctop; | 2316 | as->mcp = as->mctop; |
1856 | #ifdef LUA_USE_ASSERT | 2317 | #ifdef LUA_USE_ASSERT |
1857 | as->mcp_prev = as->mcp; | 2318 | as->mcp_prev = as->mcp; |
1858 | #endif | 2319 | #endif |
1859 | as->curins = T->nins; | 2320 | as->ir = J->curfinal->ir; /* Use the copied IR. */ |
2321 | as->curins = J->cur.nins = as->orignins; | ||
2322 | |||
1860 | RA_DBG_START(); | 2323 | RA_DBG_START(); |
1861 | RA_DBGX((as, "===== STOP =====")); | 2324 | RA_DBGX((as, "===== STOP =====")); |
1862 | 2325 | ||
@@ -1884,22 +2347,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1884 | checkmclim(as); | 2347 | checkmclim(as); |
1885 | asm_ir(as, ir); | 2348 | asm_ir(as, ir); |
1886 | } | 2349 | } |
1887 | } while (as->realign); /* Retry in case the MCode needs to be realigned. */ | ||
1888 | 2350 | ||
1889 | /* Emit head of trace. */ | 2351 | if (as->realign && J->curfinal->nins >= T->nins) |
1890 | RA_DBG_REF(); | 2352 | continue; /* Retry in case only the MCode needs to be realigned. */ |
1891 | checkmclim(as); | 2353 | |
1892 | if (as->gcsteps > 0) { | 2354 | /* Emit head of trace. */ |
1893 | as->curins = as->T->snap[0].ref; | 2355 | RA_DBG_REF(); |
1894 | asm_snap_prep(as); /* The GC check is a guard. */ | 2356 | checkmclim(as); |
1895 | asm_gc_check(as); | 2357 | if (as->gcsteps > 0) { |
2358 | as->curins = as->T->snap[0].ref; | ||
2359 | asm_snap_prep(as); /* The GC check is a guard. */ | ||
2360 | asm_gc_check(as); | ||
2361 | as->curins = as->stopins; | ||
2362 | } | ||
2363 | ra_evictk(as); | ||
2364 | if (as->parent) | ||
2365 | asm_head_side(as); | ||
2366 | else | ||
2367 | asm_head_root(as); | ||
2368 | asm_phi_fixup(as); | ||
2369 | |||
2370 | if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ | ||
2371 | lua_assert(J->curfinal->nk == T->nk); | ||
2372 | memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, | ||
2373 | (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ | ||
2374 | T->nins = J->curfinal->nins; | ||
2375 | break; /* Done. */ | ||
2376 | } | ||
2377 | |||
2378 | /* Otherwise try again with a bigger IR. */ | ||
2379 | lj_trace_free(J2G(J), J->curfinal); | ||
2380 | J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ | ||
2381 | J->curfinal = lj_trace_alloc(J->L, T); | ||
2382 | as->realign = NULL; | ||
1896 | } | 2383 | } |
1897 | ra_evictk(as); | ||
1898 | if (as->parent) | ||
1899 | asm_head_side(as); | ||
1900 | else | ||
1901 | asm_head_root(as); | ||
1902 | asm_phi_fixup(as); | ||
1903 | 2384 | ||
1904 | RA_DBGX((as, "===== START ====")); | 2385 | RA_DBGX((as, "===== START ====")); |
1905 | RA_DBG_FLUSH(); | 2386 | RA_DBG_FLUSH(); |
@@ -1912,6 +2393,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1912 | if (!as->loopref) | 2393 | if (!as->loopref) |
1913 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ | 2394 | asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ |
1914 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); | 2395 | T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); |
2396 | #if LJ_TARGET_MCODE_FIXUP | ||
2397 | asm_mcode_fixup(T->mcode, T->szmcode); | ||
2398 | #endif | ||
1915 | lj_mcode_sync(T->mcode, origtop); | 2399 | lj_mcode_sync(T->mcode, origtop); |
1916 | } | 2400 | } |
1917 | 2401 | ||