diff options
author | Mike Pall <mike> | 2021-03-23 02:08:44 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2021-03-23 02:08:44 +0100 |
commit | ebc4919affbc0f9e8adfb5dede378017c7dd1fdd (patch) | |
tree | 23f509367510e5f173fd24e97dbd0cfffbb6ebbc /src | |
parent | a4c9fc3d6cca87cc6ff9c6343ddb9b4716823201 (diff) | |
download | luajit-ebc4919affbc0f9e8adfb5dede378017c7dd1fdd.tar.gz luajit-ebc4919affbc0f9e8adfb5dede378017c7dd1fdd.tar.bz2 luajit-ebc4919affbc0f9e8adfb5dede378017c7dd1fdd.zip |
ARM64: Improve generation of immediates.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_emit_arm64.h | 64 |
1 files changed, 33 insertions, 31 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index abc32be7..938486ca 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -194,39 +194,41 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | |||
194 | 194 | ||
195 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | 195 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) |
196 | { | 196 | { |
197 | uint32_t k13 = emit_isk13(u64, is64); | 197 | int i, zeros = 0, ones = 0, neg; |
198 | if (k13) { /* Can the constant be represented as a bitmask immediate? */ | 198 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ |
199 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | 199 | /* Count homogeneous 16 bit fragments. */ |
200 | } else { | 200 | for (i = 0; i < 4; i++) { |
201 | int i, zeros = 0, ones = 0, neg; | 201 | uint64_t frag = (u64 >> i*16) & 0xffff; |
202 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | 202 | zeros += (frag == 0); |
203 | /* Count homogeneous 16 bit fragments. */ | 203 | ones += (frag == 0xffff); |
204 | for (i = 0; i < 4; i++) { | 204 | } |
205 | uint64_t frag = (u64 >> i*16) & 0xffff; | 205 | neg = ones > zeros; /* Use MOVN if it pays off. */ |
206 | zeros += (frag == 0); | 206 | if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ |
207 | ones += (frag == 0xffff); | 207 | uint32_t k13 = emit_isk13(u64, is64); |
208 | if (k13) { | ||
209 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
210 | return; | ||
208 | } | 211 | } |
209 | neg = ones > zeros; /* Use MOVN if it pays off. */ | 212 | } |
210 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | 213 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { |
211 | int shift = 0, lshift = 0; | 214 | int shift = 0, lshift = 0; |
212 | uint64_t n64 = neg ? ~u64 : u64; | 215 | uint64_t n64 = neg ? ~u64 : u64; |
213 | if (n64 != 0) { | 216 | if (n64 != 0) { |
214 | /* Find first/last fragment to be filled. */ | 217 | /* Find first/last fragment to be filled. */ |
215 | shift = (63-emit_clz64(n64)) & ~15; | 218 | shift = (63-emit_clz64(n64)) & ~15; |
216 | lshift = emit_ctz64(n64) & ~15; | 219 | lshift = emit_ctz64(n64) & ~15; |
217 | } | 220 | } |
218 | /* MOVK requires the original value (u64). */ | 221 | /* MOVK requires the original value (u64). */ |
219 | while (shift > lshift) { | 222 | while (shift > lshift) { |
220 | uint32_t u16 = (u64 >> shift) & 0xffff; | 223 | uint32_t u16 = (u64 >> shift) & 0xffff; |
221 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | 224 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ |
222 | if (u16 != (neg ? 0xffff : 0)) | 225 | if (u16 != (neg ? 0xffff : 0)) |
223 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | 226 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); |
224 | shift -= 16; | 227 | shift -= 16; |
225 | } | ||
226 | /* But MOVN needs an inverted value (n64). */ | ||
227 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
228 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
229 | } | 228 | } |
229 | /* But MOVN needs an inverted value (n64). */ | ||
230 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
231 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
230 | } | 232 | } |
231 | } | 233 | } |
232 | 234 | ||