diff options
| author | Mike Pall <mike> | 2024-08-21 11:31:29 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2024-08-21 11:31:29 +0200 |
| commit | c68711cc872e6626dc9e653e94df7bf21691d38e (patch) | |
| tree | 2c6aa4c04e14ba06b474e5329b60309b0802d4e1 /src | |
| parent | 304da39cc5ee43491f7b1f4e0c9c52d477ce0d98 (diff) | |
| download | luajit-c68711cc872e6626dc9e653e94df7bf21691d38e.tar.gz luajit-c68711cc872e6626dc9e653e94df7bf21691d38e.tar.bz2 luajit-c68711cc872e6626dc9e653e94df7bf21691d38e.zip | |
ARM64: Use ldr literal to load FP constants.
Thanks to Peter Cawley. #1255
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_emit_arm64.h | 17 | ||||
| -rw-r--r-- | src/lj_target_arm64.h | 1 |
2 files changed, 12 insertions, 6 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 0967f6e4..6838693e 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
| @@ -109,6 +109,11 @@ static void emit_d(ASMState *as, A64Ins ai, Reg rd) | |||
| 109 | *--as->mcp = ai | A64F_D(rd); | 109 | *--as->mcp = ai | A64F_D(rd); |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | static void emit_dl(ASMState *as, A64Ins ai, Reg rd, uint32_t l) | ||
| 113 | { | ||
| 114 | *--as->mcp = ai | A64F_D(rd) | A64F_S19(l >> 2); | ||
| 115 | } | ||
| 116 | |||
| 112 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | 117 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) |
| 113 | { | 118 | { |
| 114 | *--as->mcp = ai | A64F_N(rn); | 119 | *--as->mcp = ai | A64F_N(rn); |
| @@ -226,7 +231,7 @@ static int emit_kadrp(ASMState *as, Reg rd, uint64_t k) | |||
| 226 | emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd); | 231 | emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd); |
| 227 | ai = A64I_ADRP; | 232 | ai = A64I_ADRP; |
| 228 | } | 233 | } |
| 229 | emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd); | 234 | emit_dl(as, ai|(((uint32_t)ofs&3)<<29), rd, ofs); |
| 230 | return 1; | 235 | return 1; |
| 231 | } | 236 | } |
| 232 | 237 | ||
| @@ -291,7 +296,7 @@ static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | |||
| 291 | /* GL + offset, might subsequently fuse to LDP/STP. */ | 296 | /* GL + offset, might subsequently fuse to LDP/STP. */ |
| 292 | } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { | 297 | } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { |
| 293 | /* IP + offset is cheaper than allock, but address must be in range. */ | 298 | /* IP + offset is cheaper than allock, but address must be in range. */ |
| 294 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | 299 | emit_dl(as, A64I_LDRLx, r, mcpofs(as, p)); |
| 295 | return; | 300 | return; |
| 296 | } else { /* Split up into base reg + offset. */ | 301 | } else { /* Split up into base reg + offset. */ |
| 297 | int64_t i64 = i64ptr(p); | 302 | int64_t i64 = i64ptr(p); |
| @@ -320,15 +325,15 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | |||
| 320 | if (emit_checkofs(A64I_LDRx, ofs)) { | 325 | if (emit_checkofs(A64I_LDRx, ofs)) { |
| 321 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | 326 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, |
| 322 | (r & 31), RID_GL, ofs); | 327 | (r & 31), RID_GL, ofs); |
| 328 | } else if (checkmcpofs(as, k)) { | ||
| 329 | emit_dl(as, r >= RID_MAX_GPR ? A64I_LDRLd : A64I_LDRLx, | ||
| 330 | (r & 31), mcpofs(as, k)); | ||
| 323 | } else { | 331 | } else { |
| 324 | if (r >= RID_MAX_GPR) { | 332 | if (r >= RID_MAX_GPR) { |
| 325 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | 333 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); |
| 326 | r = RID_TMP; | 334 | r = RID_TMP; |
| 327 | } | 335 | } |
| 328 | if (checkmcpofs(as, k)) | 336 | emit_loadu64(as, r, *k); |
| 329 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
| 330 | else | ||
| 331 | emit_loadu64(as, r, *k); | ||
| 332 | } | 337 | } |
| 333 | } | 338 | } |
| 334 | 339 | ||
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 8ed8851c..7d11395d 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h | |||
| @@ -281,6 +281,7 @@ typedef enum A64Ins { | |||
| 281 | A64I_FSQRTd = 0x1e61c000, | 281 | A64I_FSQRTd = 0x1e61c000, |
| 282 | A64I_LDRs = 0xbd400000, | 282 | A64I_LDRs = 0xbd400000, |
| 283 | A64I_LDRd = 0xfd400000, | 283 | A64I_LDRd = 0xfd400000, |
| 284 | A64I_LDRLd = 0x5c000000, | ||
| 284 | A64I_STRs = 0xbd000000, | 285 | A64I_STRs = 0xbd000000, |
| 285 | A64I_STRd = 0xfd000000, | 286 | A64I_STRd = 0xfd000000, |
| 286 | A64I_LDPs = 0x2d400000, | 287 | A64I_LDPs = 0x2d400000, |
