aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lj_emit_arm64.h64
1 files changed, 33 insertions, 31 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index abc32be7..938486ca 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -194,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
194 194
195static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) 195static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
196{ 196{
197 uint32_t k13 = emit_isk13(u64, is64); 197 int i, zeros = 0, ones = 0, neg;
198 if (k13) { /* Can the constant be represented as a bitmask immediate? */ 198 if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
199 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); 199 /* Count homogeneous 16 bit fragments. */
200 } else { 200 for (i = 0; i < 4; i++) {
201 int i, zeros = 0, ones = 0, neg; 201 uint64_t frag = (u64 >> i*16) & 0xffff;
202 if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ 202 zeros += (frag == 0);
203 /* Count homogeneous 16 bit fragments. */ 203 ones += (frag == 0xffff);
204 for (i = 0; i < 4; i++) { 204 }
205 uint64_t frag = (u64 >> i*16) & 0xffff; 205 neg = ones > zeros; /* Use MOVN if it pays off. */
206 zeros += (frag == 0); 206 if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
207 ones += (frag == 0xffff); 207 uint32_t k13 = emit_isk13(u64, is64);
208 if (k13) {
209 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
210 return;
208 } 211 }
209 neg = ones > zeros; /* Use MOVN if it pays off. */ 212 }
210 if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { 213 if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
211 int shift = 0, lshift = 0; 214 int shift = 0, lshift = 0;
212 uint64_t n64 = neg ? ~u64 : u64; 215 uint64_t n64 = neg ? ~u64 : u64;
213 if (n64 != 0) { 216 if (n64 != 0) {
214 /* Find first/last fragment to be filled. */ 217 /* Find first/last fragment to be filled. */
215 shift = (63-emit_clz64(n64)) & ~15; 218 shift = (63-emit_clz64(n64)) & ~15;
216 lshift = emit_ctz64(n64) & ~15; 219 lshift = emit_ctz64(n64) & ~15;
217 } 220 }
218 /* MOVK requires the original value (u64). */ 221 /* MOVK requires the original value (u64). */
219 while (shift > lshift) { 222 while (shift > lshift) {
220 uint32_t u16 = (u64 >> shift) & 0xffff; 223 uint32_t u16 = (u64 >> shift) & 0xffff;
221 /* Skip fragments that are correctly filled by MOVN/MOVZ. */ 224 /* Skip fragments that are correctly filled by MOVN/MOVZ. */
222 if (u16 != (neg ? 0xffff : 0)) 225 if (u16 != (neg ? 0xffff : 0))
223 emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); 226 emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
224 shift -= 16; 227 shift -= 16;
225 }
226 /* But MOVN needs an inverted value (n64). */
227 emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
228 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
229 } 228 }
229 /* But MOVN needs an inverted value (n64). */
230 emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
231 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
230 } 232 }
231} 233}
232 234