diff options
| -rw-r--r-- | src/lj_emit_arm64.h | 64 |
1 files changed, 33 insertions, 31 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index abc32be7..938486ca 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
| @@ -194,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | |||
| 194 | 194 | ||
| 195 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | 195 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) |
| 196 | { | 196 | { |
| 197 | uint32_t k13 = emit_isk13(u64, is64); | 197 | int i, zeros = 0, ones = 0, neg; |
| 198 | if (k13) { /* Can the constant be represented as a bitmask immediate? */ | 198 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ |
| 199 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | 199 | /* Count homogeneous 16 bit fragments. */ |
| 200 | } else { | 200 | for (i = 0; i < 4; i++) { |
| 201 | int i, zeros = 0, ones = 0, neg; | 201 | uint64_t frag = (u64 >> i*16) & 0xffff; |
| 202 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | 202 | zeros += (frag == 0); |
| 203 | /* Count homogeneous 16 bit fragments. */ | 203 | ones += (frag == 0xffff); |
| 204 | for (i = 0; i < 4; i++) { | 204 | } |
| 205 | uint64_t frag = (u64 >> i*16) & 0xffff; | 205 | neg = ones > zeros; /* Use MOVN if it pays off. */ |
| 206 | zeros += (frag == 0); | 206 | if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ |
| 207 | ones += (frag == 0xffff); | 207 | uint32_t k13 = emit_isk13(u64, is64); |
| 208 | if (k13) { | ||
| 209 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
| 210 | return; | ||
| 208 | } | 211 | } |
| 209 | neg = ones > zeros; /* Use MOVN if it pays off. */ | 212 | } |
| 210 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | 213 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { |
| 211 | int shift = 0, lshift = 0; | 214 | int shift = 0, lshift = 0; |
| 212 | uint64_t n64 = neg ? ~u64 : u64; | 215 | uint64_t n64 = neg ? ~u64 : u64; |
| 213 | if (n64 != 0) { | 216 | if (n64 != 0) { |
| 214 | /* Find first/last fragment to be filled. */ | 217 | /* Find first/last fragment to be filled. */ |
| 215 | shift = (63-emit_clz64(n64)) & ~15; | 218 | shift = (63-emit_clz64(n64)) & ~15; |
| 216 | lshift = emit_ctz64(n64) & ~15; | 219 | lshift = emit_ctz64(n64) & ~15; |
| 217 | } | 220 | } |
| 218 | /* MOVK requires the original value (u64). */ | 221 | /* MOVK requires the original value (u64). */ |
| 219 | while (shift > lshift) { | 222 | while (shift > lshift) { |
| 220 | uint32_t u16 = (u64 >> shift) & 0xffff; | 223 | uint32_t u16 = (u64 >> shift) & 0xffff; |
| 221 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | 224 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ |
| 222 | if (u16 != (neg ? 0xffff : 0)) | 225 | if (u16 != (neg ? 0xffff : 0)) |
| 223 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | 226 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); |
| 224 | shift -= 16; | 227 | shift -= 16; |
| 225 | } | ||
| 226 | /* But MOVN needs an inverted value (n64). */ | ||
| 227 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
| 228 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
| 229 | } | 228 | } |
| 229 | /* But MOVN needs an inverted value (n64). */ | ||
| 230 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
| 231 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
| 230 | } | 232 | } |
| 231 | } | 233 | } |
| 232 | 234 | ||
