aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2023-09-09 16:30:14 +0200
committerMike Pall <mike>2023-09-09 16:30:14 +0200
commit5149b0a3a2809fef155ff2b2f01c667d920db3c2 (patch)
tree9e35cab02b970a44715e8168a6530e0b468fb9a2 /src
parentdfc122e45ce0dd76a47794789b413aeaa4cc3773 (diff)
downloadluajit-5149b0a3a2809fef155ff2b2f01c667d920db3c2.tar.gz
luajit-5149b0a3a2809fef155ff2b2f01c667d920db3c2.tar.bz2
luajit-5149b0a3a2809fef155ff2b2f01c667d920db3c2.zip
ARM64: Consolidate 32/64-bit constant handling in assembler.
Thanks to Peter Cawley. #1065
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c4
-rw-r--r--src/lj_emit_arm64.h73
2 files changed, 41 insertions, 36 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c02a1b9e..844910ad 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -606,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
606 IRIns *ir = IR(ref); 606 IRIns *ir = IR(ref);
607 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || 607 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
608#if LJ_GC64 608#if LJ_GC64
609#if LJ_TARGET_ARM64
610 (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
611#else
609 (ir->o == IR_KINT && k == ir->i) || 612 (ir->o == IR_KINT && k == ir->i) ||
613#endif
610 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || 614 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
611 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && 615 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
612 k == (intptr_t)ir_kptr(ir)) 616 k == (intptr_t)ir_kptr(ir))
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 86626177..50e658dd 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -20,7 +20,7 @@ static uint64_t get_k64val(ASMState *as, IRRef ref)
20 } else { 20 } else {
21 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, 21 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
22 "bad 64 bit const IR op %d", ir->o); 22 "bad 64 bit const IR op %d", ir->o);
23 return ir->i; /* Sign-extended. */ 23 return (uint32_t)ir->i; /* Zero-extended. */
24 } 24 }
25} 25}
26 26
@@ -152,11 +152,10 @@ nopair:
152/* Prefer rematerialization of BASE/L from global_State over spills. */ 152/* Prefer rematerialization of BASE/L from global_State over spills. */
153#define emit_canremat(ref) ((ref) <= ASMREF_L) 153#define emit_canremat(ref) ((ref) <= ASMREF_L)
154 154
155/* Try to find an N-step delta relative to other consts with N < lim. */ 155/* Try to find a one-step delta relative to other consts. */
156static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) 156static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
157{ 157{
158 RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); 158 RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
159 if (lim <= 1) return 0; /* Can't beat that. */
160 while (work) { 159 while (work) {
161 Reg r = rset_picktop(work); 160 Reg r = rset_picktop(work);
162 IRRef ref = regcost_ref(as->cost[r]); 161 IRRef ref = regcost_ref(as->cost[r]);
@@ -165,13 +164,14 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
165 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : 164 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
166 get_k64val(as, ref); 165 get_k64val(as, ref);
167 int64_t delta = (int64_t)(k - kx); 166 int64_t delta = (int64_t)(k - kx);
167 if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */
168 if (delta == 0) { 168 if (delta == 0) {
169 emit_dm(as, A64I_MOVx, rd, r); 169 emit_dm(as, is64|A64I_MOVw, rd, r);
170 return 1; 170 return 1;
171 } else { 171 } else {
172 uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta); 172 uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
173 if (k12) { 173 if (k12) {
174 emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); 174 emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r);
175 return 1; 175 return 1;
176 } 176 }
177 /* Do other ops or multi-step deltas pay off? Probably not. 177 /* Do other ops or multi-step deltas pay off? Probably not.
@@ -184,51 +184,52 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
184 return 0; /* Failed. */ 184 return 0; /* Failed. */
185} 185}
186 186
187static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) 187static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
188{ 188{
189 int i, zeros = 0, ones = 0, neg; 189 int zeros = 0, ones = 0, neg, lshift = 0;
190 if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ 190 int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2;
191 /* Count homogeneous 16 bit fragments. */ 191 /* Count non-homogeneous 16 bit fragments. */
192 for (i = 0; i < 4; i++) { 192 while (--i >= 0) {
193 uint64_t frag = (u64 >> i*16) & 0xffff; 193 uint32_t frag = (u64 >> i*16) & 0xffff;
194 zeros += (frag == 0); 194 zeros += (frag != 0);
195 ones += (frag == 0xffff); 195 ones += (frag != 0xffff);
196 } 196 }
197 neg = ones > zeros; /* Use MOVN if it pays off. */ 197 neg = ones < zeros; /* Use MOVN if it pays off. */
198 if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ 198 if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */
199 uint32_t k13 = emit_isk13(u64, is64); 199 uint32_t k13 = emit_isk13(u64, is64);
200 if (k13) { 200 if (k13) {
201 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); 201 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
202 return; 202 return;
203 } 203 }
204 } 204 if (emit_kdelta(as, rd, u64, is64)) {
205 if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { 205 return;
206 int shift = 0, lshift = 0;
207 uint64_t n64 = neg ? ~u64 : u64;
208 if (n64 != 0) {
209 /* Find first/last fragment to be filled. */
210 shift = (63-emit_clz64(n64)) & ~15;
211 lshift = emit_ctz64(n64) & ~15;
212 } 206 }
213 /* MOVK requires the original value (u64). */ 207 }
214 while (shift > lshift) { 208 if (neg) {
215 uint32_t u16 = (u64 >> shift) & 0xffff; 209 u64 = ~u64;
216 /* Skip fragments that are correctly filled by MOVN/MOVZ. */ 210 if (!is64) u64 = (uint32_t)u64;
217 if (u16 != (neg ? 0xffff : 0)) 211 }
218 emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); 212 if (u64) {
219 shift -= 16; 213 /* Find first/last fragment to be filled. */
214 int shift = (63-emit_clz64(u64)) & ~15;
215 lshift = emit_ctz64(u64) & ~15;
216 for (; shift > lshift; shift -= 16) {
217 uint32_t frag = (u64 >> shift) & 0xffff;
218 if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */
219 if (neg) frag ^= 0xffff; /* MOVK requires the original value. */
220 emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd);
220 } 221 }
221 /* But MOVN needs an inverted value (n64). */
222 emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
223 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
224 } 222 }
223 /* But MOVN needs an inverted value. */
224 emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) |
225 A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
225} 226}
226 227
227/* Load a 32 bit constant into a GPR. */ 228/* Load a 32 bit constant into a GPR. */
228#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) 229#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i)
229 230
230/* Load a 64 bit constant into a GPR. */ 231/* Load a 64 bit constant into a GPR. */
231#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) 232#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i)
232 233
233#define glofs(as, k) \ 234#define glofs(as, k) \
234 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) 235 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))