aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2023-09-09 17:31:06 +0200
committerMike Pall <mike>2023-09-09 17:31:06 +0200
commita5ee35867c6dd359a04f58913e9a21f1649d68b3 (patch)
tree1962c6923c735a8ca36a0e962742227aa68a2656 /src
parentc1877e648a5eeb96deda7080c6a43aed1b1a35ea (diff)
downloadluajit-a5ee35867c6dd359a04f58913e9a21f1649d68b3.tar.gz
luajit-a5ee35867c6dd359a04f58913e9a21f1649d68b3.tar.bz2
luajit-a5ee35867c6dd359a04f58913e9a21f1649d68b3.zip
ARM64: Use RID_TMP instead of scratch register in more places.
Thanks to Peter Cawley. #1070
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm_arm64.h23
1 files changed, 10 insertions, 13 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 5fd7bf07..9ea2d405 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -890,7 +890,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
890 int bigofs = !emit_checkofs(A64I_LDRx, kofs); 890 int bigofs = !emit_checkofs(A64I_LDRx, kofs);
891 Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 891 Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
892 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 892 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
893 Reg key, idx = node; 893 Reg idx = node;
894 RegSet allow = rset_exclude(RSET_GPR, node); 894 RegSet allow = rset_exclude(RSET_GPR, node);
895 uint64_t k; 895 uint64_t k;
896 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); 896 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@@ -909,9 +909,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
909 } else { 909 } else {
910 k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); 910 k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
911 } 911 }
912 key = ra_scratch(as, allow); 912 emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
913 emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); 913 emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
914 emit_lso(as, A64I_LDRx, key, idx, kofs);
915 if (bigofs) 914 if (bigofs)
916 emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); 915 emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
917} 916}
@@ -1039,7 +1038,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
1039 1038
1040static void asm_ahuvload(ASMState *as, IRIns *ir) 1039static void asm_ahuvload(ASMState *as, IRIns *ir)
1041{ 1040{
1042 Reg idx, tmp, type; 1041 Reg idx, tmp;
1043 int32_t ofs = 0; 1042 int32_t ofs = 0;
1044 RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1043 RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1045 lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || 1044 lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@@ -1058,8 +1057,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1058 } else { 1057 } else {
1059 tmp = ra_scratch(as, gpr); 1058 tmp = ra_scratch(as, gpr);
1060 } 1059 }
1061 type = ra_scratch(as, rset_clear(gpr, tmp)); 1060 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
1062 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
1063 rset_clear(gpr, idx); 1061 rset_clear(gpr, idx);
1064 if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31); 1062 if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
1065 if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; 1063 if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
@@ -1071,8 +1069,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1071 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), 1069 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1072 ra_allock(as, LJ_TISNUM << 15, gpr), tmp); 1070 ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
1073 } else if (irt_isaddr(ir->t)) { 1071 } else if (irt_isaddr(ir->t)) {
1074 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); 1072 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
1075 emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); 1073 emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
1076 } else if (irt_isnil(ir->t)) { 1074 } else if (irt_isnil(ir->t)) {
1077 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); 1075 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1078 } else { 1076 } else {
@@ -1195,9 +1193,8 @@ dotypecheck:
1195 emit_nm(as, A64I_CMPx, 1193 emit_nm(as, A64I_CMPx,
1196 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); 1194 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
1197 } else { 1195 } else {
1198 Reg type = ra_scratch(as, allow); 1196 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
1199 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); 1197 emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
1200 emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1201 } 1198 }
1202 emit_lso(as, A64I_LDRx, tmp, base, ofs); 1199 emit_lso(as, A64I_LDRx, tmp, base, ofs);
1203 return; 1200 return;
@@ -1805,7 +1802,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1805 1802
1806/* Marker to prevent patching the GC check exit. */ 1803/* Marker to prevent patching the GC check exit. */
1807#define ARM64_NOPATCH_GC_CHECK \ 1804#define ARM64_NOPATCH_GC_CHECK \
1808 (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) 1805 (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
1809 1806
1810/* Check GC threshold and do one or more GC steps. */ 1807/* Check GC threshold and do one or more GC steps. */
1811static void asm_gc_check(ASMState *as) 1808static void asm_gc_check(ASMState *as)