diff options
-rw-r--r-- | src/lj_asm.c | 65 | ||||
-rw-r--r-- | src/lj_emit_arm.h | 2 | ||||
-rw-r--r-- | src/lj_emit_mips.h | 6 | ||||
-rw-r--r-- | src/lj_emit_ppc.h | 14 | ||||
-rw-r--r-- | src/lj_emit_x86.h | 9 | ||||
-rw-r--r-- | src/lj_target_arm.h | 2 | ||||
-rw-r--r-- | src/lj_target_mips.h | 2 | ||||
-rw-r--r-- | src/lj_target_x86.h | 2 |
8 files changed, 97 insertions, 5 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 1a78e32a..4da1a0a3 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1079,6 +1079,64 @@ static void asm_phi_shuffle(ASMState *as) | |||
1079 | } | 1079 | } |
1080 | } | 1080 | } |
1081 | 1081 | ||
1082 | /* Copy unsynced left/right PHI spill slots. Rarely needed. */ | ||
1083 | static void asm_phi_copyspill(ASMState *as) | ||
1084 | { | ||
1085 | int need = 0; | ||
1086 | IRIns *ir; | ||
1087 | for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) | ||
1088 | if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s)) | ||
1089 | need |= irt_isfp(ir->t) ? 2 : 1; /* Unsynced spill slot? */ | ||
1090 | if ((need & 1)) { /* Copy integer spill slots. */ | ||
1091 | #if !LJ_TARGET_X86ORX64 | ||
1092 | Reg r = RID_TMP; | ||
1093 | #else | ||
1094 | Reg r = RID_RET; | ||
1095 | if ((as->freeset & RSET_GPR)) | ||
1096 | r = rset_pickbot((as->freeset & RSET_GPR)); | ||
1097 | else | ||
1098 | emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); | ||
1099 | #endif | ||
1100 | for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { | ||
1101 | if (ra_hasspill(ir->s)) { | ||
1102 | IRIns *irl = IR(ir->op1); | ||
1103 | if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) { | ||
1104 | emit_spstore(as, irl, r, sps_scale(irl->s)); | ||
1105 | emit_spload(as, ir, r, sps_scale(ir->s)); | ||
1106 | } | ||
1107 | } | ||
1108 | } | ||
1109 | #if LJ_TARGET_X86ORX64 | ||
1110 | if (!rset_test(as->freeset, r)) | ||
1111 | emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); | ||
1112 | #endif | ||
1113 | } | ||
1114 | #if !LJ_SOFTFP | ||
1115 | if ((need & 2)) { /* Copy FP spill slots. */ | ||
1116 | #if LJ_TARGET_X86 | ||
1117 | Reg r = RID_XMM0; | ||
1118 | #else | ||
1119 | Reg r = RID_FPRET; | ||
1120 | #endif | ||
1121 | if ((as->freeset & RSET_FPR)) | ||
1122 | r = rset_pickbot((as->freeset & RSET_FPR)); | ||
1123 | if (!rset_test(as->freeset, r)) | ||
1124 | emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); | ||
1125 | for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { | ||
1126 | if (ra_hasspill(ir->s)) { | ||
1127 | IRIns *irl = IR(ir->op1); | ||
1128 | if (ra_hasspill(irl->s) && irt_isfp(ir->t)) { | ||
1129 | emit_spstore(as, irl, r, sps_scale(irl->s)); | ||
1130 | emit_spload(as, ir, r, sps_scale(ir->s)); | ||
1131 | } | ||
1132 | } | ||
1133 | } | ||
1134 | if (!rset_test(as->freeset, r)) | ||
1135 | emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); | ||
1136 | } | ||
1137 | #endif | ||
1138 | } | ||
1139 | |||
1082 | /* Emit renames for left PHIs which are only spilled outside the loop. */ | 1140 | /* Emit renames for left PHIs which are only spilled outside the loop. */ |
1083 | static void asm_phi_fixup(ASMState *as) | 1141 | static void asm_phi_fixup(ASMState *as) |
1084 | { | 1142 | { |
@@ -1132,7 +1190,7 @@ static void asm_phi(ASMState *as, IRIns *ir) | |||
1132 | if (ra_hasreg(irl->r) || ra_hasreg(irr->r)) | 1190 | if (ra_hasreg(irl->r) || ra_hasreg(irr->r)) |
1133 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); | 1191 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); |
1134 | ra_spill(as, ir); | 1192 | ra_spill(as, ir); |
1135 | irl->s = irr->s = ir->s; /* Sync left/right PHI spill slots. */ | 1193 | irr->s = ir->s; /* Set right PHI spill slot. Sync left slot later. */ |
1136 | } | 1194 | } |
1137 | } | 1195 | } |
1138 | 1196 | ||
@@ -1142,6 +1200,7 @@ static void asm_loop_fixup(ASMState *as); | |||
1142 | /* Middle part of a loop. */ | 1200 | /* Middle part of a loop. */ |
1143 | static void asm_loop(ASMState *as) | 1201 | static void asm_loop(ASMState *as) |
1144 | { | 1202 | { |
1203 | MCode *mcspill; | ||
1145 | /* LOOP is a guard, so the snapno is up to date. */ | 1204 | /* LOOP is a guard, so the snapno is up to date. */ |
1146 | as->loopsnapno = as->snapno; | 1205 | as->loopsnapno = as->snapno; |
1147 | if (as->gcsteps) | 1206 | if (as->gcsteps) |
@@ -1151,10 +1210,14 @@ static void asm_loop(ASMState *as) | |||
1151 | as->sectref = 0; | 1210 | as->sectref = 0; |
1152 | if (!neverfuse(as)) as->fuseref = 0; | 1211 | if (!neverfuse(as)) as->fuseref = 0; |
1153 | asm_phi_shuffle(as); | 1212 | asm_phi_shuffle(as); |
1213 | mcspill = as->mcp; | ||
1214 | asm_phi_copyspill(as); | ||
1154 | asm_loop_fixup(as); | 1215 | asm_loop_fixup(as); |
1155 | as->mcloop = as->mcp; | 1216 | as->mcloop = as->mcp; |
1156 | RA_DBGX((as, "===== LOOP =====")); | 1217 | RA_DBGX((as, "===== LOOP =====")); |
1157 | if (!as->realign) RA_DBG_FLUSH(); | 1218 | if (!as->realign) RA_DBG_FLUSH(); |
1219 | if (as->mcp != mcspill) | ||
1220 | emit_jmp(as, mcspill); | ||
1158 | } | 1221 | } |
1159 | 1222 | ||
1160 | /* -- Target-specific assembler ------------------------------------------- */ | 1223 | /* -- Target-specific assembler ------------------------------------------- */ |
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 7654c19b..21ece88e 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h | |||
@@ -231,6 +231,8 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) | |||
231 | as->mcp = p; | 231 | as->mcp = p; |
232 | } | 232 | } |
233 | 233 | ||
234 | #define emit_jmp(as, target) emit_branch(as, ARMI_B, (target)) | ||
235 | |||
234 | static void emit_call(ASMState *as, void *target) | 236 | static void emit_call(ASMState *as, void *target) |
235 | { | 237 | { |
236 | MCode *p = --as->mcp; | 238 | MCode *p = --as->mcp; |
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index 59f0640b..3edf8851 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h | |||
@@ -146,6 +146,12 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) | |||
146 | as->mcp = p; | 146 | as->mcp = p; |
147 | } | 147 | } |
148 | 148 | ||
149 | static void emit_jmp(ASMState *as, MCode *target) | ||
150 | { | ||
151 | *--as->mcp = MIPSI_NOP; | ||
152 | emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); | ||
153 | } | ||
154 | |||
149 | static void emit_call(ASMState *as, void *target) | 155 | static void emit_call(ASMState *as, void *target) |
150 | { | 156 | { |
151 | MCode *p = as->mcp; | 157 | MCode *p = as->mcp; |
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index bc361c5b..f2bf0a94 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h | |||
@@ -142,12 +142,18 @@ typedef MCode *MCLabel; | |||
142 | 142 | ||
143 | static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) | 143 | static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) |
144 | { | 144 | { |
145 | MCode *p = as->mcp; | 145 | MCode *p = --as->mcp; |
146 | ptrdiff_t delta = ((char *)target - (char *)p) + 4; | 146 | ptrdiff_t delta = (char *)target - (char *)p; |
147 | lua_assert(((delta + 0x8000) >> 16) == 0); | 147 | lua_assert(((delta + 0x8000) >> 16) == 0); |
148 | pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); | 148 | pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); |
149 | *--p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); | 149 | *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); |
150 | as->mcp = p; | 150 | } |
151 | |||
152 | static void emit_jmp(ASMState *as, MCode *target) | ||
153 | { | ||
154 | MCode *p = --as->mcp; | ||
155 | ptrdiff_t delta = (char *)target - (char *)p; | ||
156 | *p = PPCI_B | (delta & 0x03fffffcu); | ||
151 | } | 157 | } |
152 | 158 | ||
153 | static void emit_call(ASMState *as, void *target) | 159 | static void emit_call(ASMState *as, void *target) |
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 4f3a08a1..dfb70574 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h | |||
@@ -383,6 +383,15 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) | |||
383 | as->mcp = p - 6; | 383 | as->mcp = p - 6; |
384 | } | 384 | } |
385 | 385 | ||
386 | /* jmp target */ | ||
387 | static void emit_jmp(ASMState *as, MCode *target) | ||
388 | { | ||
389 | MCode *p = as->mcp; | ||
390 | *(int32_t *)(p-4) = jmprel(p, target); | ||
391 | p[-5] = XI_JMP; | ||
392 | as->mcp = p - 5; | ||
393 | } | ||
394 | |||
386 | /* call target */ | 395 | /* call target */ |
387 | static void emit_call_(ASMState *as, MCode *target) | 396 | static void emit_call_(ASMState *as, MCode *target) |
388 | { | 397 | { |
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 96fc85e3..a24fc819 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h | |||
@@ -101,6 +101,8 @@ enum { | |||
101 | #define SPS_FIXED 2 | 101 | #define SPS_FIXED 2 |
102 | #define SPS_FIRST 2 | 102 | #define SPS_FIRST 2 |
103 | 103 | ||
104 | #define SPOFS_TMP 0 | ||
105 | |||
104 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 106 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
105 | #define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) | 107 | #define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) |
106 | 108 | ||
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index e81d55bd..1b7727d0 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h | |||
@@ -102,6 +102,8 @@ enum { | |||
102 | #define SPS_FIXED 5 | 102 | #define SPS_FIXED 5 |
103 | #define SPS_FIRST 4 | 103 | #define SPS_FIRST 4 |
104 | 104 | ||
105 | #define SPOFS_TMP 0 | ||
106 | |||
105 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 107 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
106 | #define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) | 108 | #define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) |
107 | 109 | ||
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 3de408cc..cc15490b 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -139,6 +139,8 @@ enum { | |||
139 | #define SPS_FIRST 2 | 139 | #define SPS_FIRST 2 |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | #define SPOFS_TMP 0 | ||
143 | |||
142 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 144 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
143 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) | 145 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) |
144 | 146 | ||