diff options
author | Mike Pall <mike> | 2024-08-21 11:31:29 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2024-08-21 11:31:29 +0200 |
commit | c68711cc872e6626dc9e653e94df7bf21691d38e (patch) | |
tree | 2c6aa4c04e14ba06b474e5329b60309b0802d4e1 | |
parent | 304da39cc5ee43491f7b1f4e0c9c52d477ce0d98 (diff) | |
download | luajit-c68711cc872e6626dc9e653e94df7bf21691d38e.tar.gz luajit-c68711cc872e6626dc9e653e94df7bf21691d38e.tar.bz2 luajit-c68711cc872e6626dc9e653e94df7bf21691d38e.zip |
ARM64: Use ldr literal to load FP constants.
Thanks to Peter Cawley. #1255
-rw-r--r-- | src/lj_emit_arm64.h | 17 | ||||
-rw-r--r-- | src/lj_target_arm64.h | 1 |
2 files changed, 12 insertions, 6 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 0967f6e4..6838693e 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -109,6 +109,11 @@ static void emit_d(ASMState *as, A64Ins ai, Reg rd) | |||
109 | *--as->mcp = ai | A64F_D(rd); | 109 | *--as->mcp = ai | A64F_D(rd); |
110 | } | 110 | } |
111 | 111 | ||
112 | static void emit_dl(ASMState *as, A64Ins ai, Reg rd, uint32_t l) | ||
113 | { | ||
114 | *--as->mcp = ai | A64F_D(rd) | A64F_S19(l >> 2); | ||
115 | } | ||
116 | |||
112 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | 117 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) |
113 | { | 118 | { |
114 | *--as->mcp = ai | A64F_N(rn); | 119 | *--as->mcp = ai | A64F_N(rn); |
@@ -226,7 +231,7 @@ static int emit_kadrp(ASMState *as, Reg rd, uint64_t k) | |||
226 | emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd); | 231 | emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd); |
227 | ai = A64I_ADRP; | 232 | ai = A64I_ADRP; |
228 | } | 233 | } |
229 | emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd); | 234 | emit_dl(as, ai|(((uint32_t)ofs&3)<<29), rd, ofs); |
230 | return 1; | 235 | return 1; |
231 | } | 236 | } |
232 | 237 | ||
@@ -291,7 +296,7 @@ static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | |||
291 | /* GL + offset, might subsequently fuse to LDP/STP. */ | 296 | /* GL + offset, might subsequently fuse to LDP/STP. */ |
292 | } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { | 297 | } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { |
293 | /* IP + offset is cheaper than allock, but address must be in range. */ | 298 | /* IP + offset is cheaper than allock, but address must be in range. */ |
294 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | 299 | emit_dl(as, A64I_LDRLx, r, mcpofs(as, p)); |
295 | return; | 300 | return; |
296 | } else { /* Split up into base reg + offset. */ | 301 | } else { /* Split up into base reg + offset. */ |
297 | int64_t i64 = i64ptr(p); | 302 | int64_t i64 = i64ptr(p); |
@@ -320,15 +325,15 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | |||
320 | if (emit_checkofs(A64I_LDRx, ofs)) { | 325 | if (emit_checkofs(A64I_LDRx, ofs)) { |
321 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | 326 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, |
322 | (r & 31), RID_GL, ofs); | 327 | (r & 31), RID_GL, ofs); |
328 | } else if (checkmcpofs(as, k)) { | ||
329 | emit_dl(as, r >= RID_MAX_GPR ? A64I_LDRLd : A64I_LDRLx, | ||
330 | (r & 31), mcpofs(as, k)); | ||
323 | } else { | 331 | } else { |
324 | if (r >= RID_MAX_GPR) { | 332 | if (r >= RID_MAX_GPR) { |
325 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | 333 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); |
326 | r = RID_TMP; | 334 | r = RID_TMP; |
327 | } | 335 | } |
328 | if (checkmcpofs(as, k)) | 336 | emit_loadu64(as, r, *k); |
329 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
330 | else | ||
331 | emit_loadu64(as, r, *k); | ||
332 | } | 337 | } |
333 | } | 338 | } |
334 | 339 | ||
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 8ed8851c..7d11395d 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h | |||
@@ -281,6 +281,7 @@ typedef enum A64Ins { | |||
281 | A64I_FSQRTd = 0x1e61c000, | 281 | A64I_FSQRTd = 0x1e61c000, |
282 | A64I_LDRs = 0xbd400000, | 282 | A64I_LDRs = 0xbd400000, |
283 | A64I_LDRd = 0xfd400000, | 283 | A64I_LDRd = 0xfd400000, |
284 | A64I_LDRLd = 0x5c000000, | ||
284 | A64I_STRs = 0xbd000000, | 285 | A64I_STRs = 0xbd000000, |
285 | A64I_STRd = 0xfd000000, | 286 | A64I_STRd = 0xfd000000, |
286 | A64I_LDPs = 0x2d400000, | 287 | A64I_LDPs = 0x2d400000, |