diff options
author | Mike Pall <mike> | 2023-09-09 16:30:14 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2023-09-09 16:30:14 +0200 |
commit | 5149b0a3a2809fef155ff2b2f01c667d920db3c2 (patch) | |
tree | 9e35cab02b970a44715e8168a6530e0b468fb9a2 /src | |
parent | dfc122e45ce0dd76a47794789b413aeaa4cc3773 (diff) | |
download | luajit-5149b0a3a2809fef155ff2b2f01c667d920db3c2.tar.gz luajit-5149b0a3a2809fef155ff2b2f01c667d920db3c2.tar.bz2 luajit-5149b0a3a2809fef155ff2b2f01c667d920db3c2.zip |
ARM64: Consolidate 32/64-bit constant handling in assembler.
Thanks to Peter Cawley. #1065
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 4 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 73 |
2 files changed, 41 insertions, 36 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index c02a1b9e..844910ad 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -606,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) | |||
606 | IRIns *ir = IR(ref); | 606 | IRIns *ir = IR(ref); |
607 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || | 607 | if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || |
608 | #if LJ_GC64 | 608 | #if LJ_GC64 |
609 | #if LJ_TARGET_ARM64 | ||
610 | (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) || | ||
611 | #else | ||
609 | (ir->o == IR_KINT && k == ir->i) || | 612 | (ir->o == IR_KINT && k == ir->i) || |
613 | #endif | ||
610 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || | 614 | (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || |
611 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && | 615 | ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && |
612 | k == (intptr_t)ir_kptr(ir)) | 616 | k == (intptr_t)ir_kptr(ir)) |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 86626177..50e658dd 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -20,7 +20,7 @@ static uint64_t get_k64val(ASMState *as, IRRef ref) | |||
20 | } else { | 20 | } else { |
21 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, | 21 | lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, |
22 | "bad 64 bit const IR op %d", ir->o); | 22 | "bad 64 bit const IR op %d", ir->o); |
23 | return ir->i; /* Sign-extended. */ | 23 | return (uint32_t)ir->i; /* Zero-extended. */ |
24 | } | 24 | } |
25 | } | 25 | } |
26 | 26 | ||
@@ -152,11 +152,10 @@ nopair: | |||
152 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | 152 | /* Prefer rematerialization of BASE/L from global_State over spills. */ |
153 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | 153 | #define emit_canremat(ref) ((ref) <= ASMREF_L) |
154 | 154 | ||
155 | /* Try to find an N-step delta relative to other consts with N < lim. */ | 155 | /* Try to find a one-step delta relative to other consts. */ |
156 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | 156 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) |
157 | { | 157 | { |
158 | RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); | 158 | RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); |
159 | if (lim <= 1) return 0; /* Can't beat that. */ | ||
160 | while (work) { | 159 | while (work) { |
161 | Reg r = rset_picktop(work); | 160 | Reg r = rset_picktop(work); |
162 | IRRef ref = regcost_ref(as->cost[r]); | 161 | IRRef ref = regcost_ref(as->cost[r]); |
@@ -165,13 +164,14 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | |||
165 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : | 164 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : |
166 | get_k64val(as, ref); | 165 | get_k64val(as, ref); |
167 | int64_t delta = (int64_t)(k - kx); | 166 | int64_t delta = (int64_t)(k - kx); |
167 | if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */ | ||
168 | if (delta == 0) { | 168 | if (delta == 0) { |
169 | emit_dm(as, A64I_MOVx, rd, r); | 169 | emit_dm(as, is64|A64I_MOVw, rd, r); |
170 | return 1; | 170 | return 1; |
171 | } else { | 171 | } else { |
172 | uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta); | 172 | uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta); |
173 | if (k12) { | 173 | if (k12) { |
174 | emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); | 174 | emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r); |
175 | return 1; | 175 | return 1; |
176 | } | 176 | } |
177 | /* Do other ops or multi-step deltas pay off? Probably not. | 177 | /* Do other ops or multi-step deltas pay off? Probably not. |
@@ -184,51 +184,52 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | |||
184 | return 0; /* Failed. */ | 184 | return 0; /* Failed. */ |
185 | } | 185 | } |
186 | 186 | ||
187 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | 187 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) |
188 | { | 188 | { |
189 | int i, zeros = 0, ones = 0, neg; | 189 | int zeros = 0, ones = 0, neg, lshift = 0; |
190 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | 190 | int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2; |
191 | /* Count homogeneous 16 bit fragments. */ | 191 | /* Count non-homogeneous 16 bit fragments. */ |
192 | for (i = 0; i < 4; i++) { | 192 | while (--i >= 0) { |
193 | uint64_t frag = (u64 >> i*16) & 0xffff; | 193 | uint32_t frag = (u64 >> i*16) & 0xffff; |
194 | zeros += (frag == 0); | 194 | zeros += (frag != 0); |
195 | ones += (frag == 0xffff); | 195 | ones += (frag != 0xffff); |
196 | } | 196 | } |
197 | neg = ones > zeros; /* Use MOVN if it pays off. */ | 197 | neg = ones < zeros; /* Use MOVN if it pays off. */ |
198 | if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ | 198 | if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */ |
199 | uint32_t k13 = emit_isk13(u64, is64); | 199 | uint32_t k13 = emit_isk13(u64, is64); |
200 | if (k13) { | 200 | if (k13) { |
201 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | 201 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); |
202 | return; | 202 | return; |
203 | } | 203 | } |
204 | } | 204 | if (emit_kdelta(as, rd, u64, is64)) { |
205 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | 205 | return; |
206 | int shift = 0, lshift = 0; | ||
207 | uint64_t n64 = neg ? ~u64 : u64; | ||
208 | if (n64 != 0) { | ||
209 | /* Find first/last fragment to be filled. */ | ||
210 | shift = (63-emit_clz64(n64)) & ~15; | ||
211 | lshift = emit_ctz64(n64) & ~15; | ||
212 | } | 206 | } |
213 | /* MOVK requires the original value (u64). */ | 207 | } |
214 | while (shift > lshift) { | 208 | if (neg) { |
215 | uint32_t u16 = (u64 >> shift) & 0xffff; | 209 | u64 = ~u64; |
216 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | 210 | if (!is64) u64 = (uint32_t)u64; |
217 | if (u16 != (neg ? 0xffff : 0)) | 211 | } |
218 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | 212 | if (u64) { |
219 | shift -= 16; | 213 | /* Find first/last fragment to be filled. */ |
214 | int shift = (63-emit_clz64(u64)) & ~15; | ||
215 | lshift = emit_ctz64(u64) & ~15; | ||
216 | for (; shift > lshift; shift -= 16) { | ||
217 | uint32_t frag = (u64 >> shift) & 0xffff; | ||
218 | if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */ | ||
219 | if (neg) frag ^= 0xffff; /* MOVK requires the original value. */ | ||
220 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd); | ||
220 | } | 221 | } |
221 | /* But MOVN needs an inverted value (n64). */ | ||
222 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
223 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
224 | } | 222 | } |
223 | /* But MOVN needs an inverted value. */ | ||
224 | emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) | | ||
225 | A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
225 | } | 226 | } |
226 | 227 | ||
227 | /* Load a 32 bit constant into a GPR. */ | 228 | /* Load a 32 bit constant into a GPR. */ |
228 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) | 229 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i) |
229 | 230 | ||
230 | /* Load a 64 bit constant into a GPR. */ | 231 | /* Load a 64 bit constant into a GPR. */ |
231 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) | 232 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i) |
232 | 233 | ||
233 | #define glofs(as, k) \ | 234 | #define glofs(as, k) \ |
234 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) | 235 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) |