aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2023-09-09 17:15:26 +0200
committerMike Pall <mike>2023-09-09 17:15:26 +0200
commit435d8c630135d4f6a54f2ecf7be2d7e805652f80 (patch)
tree1c43be3566069a6abb78205b564cc0182a8b4986 /src
parent315dc3e776d3199269a464b17d07c48064d3fd09 (diff)
downloadluajit-435d8c630135d4f6a54f2ecf7be2d7e805652f80.tar.gz
luajit-435d8c630135d4f6a54f2ecf7be2d7e805652f80.tar.bz2
luajit-435d8c630135d4f6a54f2ecf7be2d7e805652f80.zip
ARM64: Improve IR_HREF code generation.
Thanks to Peter Cawley. #1070
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm_arm64.h126
1 files changed, 40 insertions, 86 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index b8fbf69b..c5ebd324 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -773,57 +773,36 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
773 int destused = ra_used(ir); 773 int destused = ra_used(ir);
774 Reg dest = ra_dest(as, ir, allow); 774 Reg dest = ra_dest(as, ir, allow);
775 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 775 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
776 Reg key = 0, tmp = RID_TMP; 776 Reg key = 0, tmp = RID_TMP, type = RID_NONE, tkey;
777 Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
778 IRRef refkey = ir->op2; 777 IRRef refkey = ir->op2;
779 IRIns *irkey = IR(refkey); 778 IRIns *irkey = IR(refkey);
780 int isk = irref_isk(ir->op2); 779 int isk = irref_isk(refkey);
781 IRType1 kt = irkey->t; 780 IRType1 kt = irkey->t;
782 uint32_t k = 0; 781 uint32_t k = 0;
783 uint32_t khash; 782 uint32_t khash;
784 MCLabel l_end, l_loop, l_next; 783 MCLabel l_end, l_loop;
785 rset_clear(allow, tab); 784 rset_clear(allow, tab);
786 785
787 if (!isk) { 786 /* Allocate registers outside of the loop. */
788 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); 787 if (irkey->o != IR_KNUM || !(k = emit_isk12((int64_t)ir_knum(irkey)->u64))) {
788 key = ra_alloc1(as, refkey, irt_isnum(kt) ? RSET_FPR : allow);
789 rset_clear(allow, key); 789 rset_clear(allow, key);
790 if (!irt_isstr(kt)) {
791 tmp = ra_scratch(as, allow);
792 rset_clear(allow, tmp);
793 }
794 } else if (irt_isnum(kt)) {
795 int64_t val = (int64_t)ir_knum(irkey)->u64;
796 if (!(k = emit_isk12(val))) {
797 key = ra_allock(as, val, allow);
798 rset_clear(allow, key);
799 }
800 } else if (!irt_ispri(kt)) {
801 if (!(k = emit_isk12(irkey->i))) {
802 key = ra_alloc1(as, refkey, allow);
803 rset_clear(allow, key);
804 }
805 } 790 }
806 791 if (!isk) {
807 /* Allocate constants early. */ 792 tkey = ra_scratch(as, allow);
808 if (irt_isnum(kt)) { 793 rset_clear(allow, tkey);
809 if (!isk) { 794 } else if (irt_isnum(kt)) {
810 tisnum = ra_allock(as, LJ_TISNUM << 15, allow); 795 tkey = key; /* Assumes -0.0 is already canonicalized to +0.0. */
811 ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); 796 } else {
812 rset_clear(allow, tisnum); 797 int64_t kk;
813 } 798 if (irt_isaddr(kt)) {
814 } else if (irt_isaddr(kt)) { 799 kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
815 if (isk) {
816 int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
817 scr = ra_allock(as, kk, allow);
818 } else { 800 } else {
819 scr = ra_scratch(as, allow); 801 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
802 kk = ~((int64_t)~irt_toitype(kt) << 47);
820 } 803 }
821 rset_clear(allow, scr); 804 tkey = ra_allock(as, kk, allow);
822 } else { 805 rset_clear(allow, tkey);
823 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
824 type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
825 scr = ra_scratch(as, rset_clear(allow, type));
826 rset_clear(allow, scr);
827 } 806 }
828 807
829 /* Key not found in chain: jump to exit (if merged) or load niltv. */ 808 /* Key not found in chain: jump to exit (if merged) or load niltv. */
@@ -839,50 +818,31 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
839 818
840 /* Follow hash chain until the end. */ 819 /* Follow hash chain until the end. */
841 l_loop = --as->mcp; 820 l_loop = --as->mcp;
842 emit_n(as, A64I_CMPx^A64I_K12^0, dest); 821 if (destused)
843 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); 822 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
844 l_next = emit_label(as);
845 823
846 /* Type and value comparison. */ 824 /* Type and value comparison. */
847 if (merge == IR_EQ) 825 if (merge == IR_EQ)
848 asm_guardcc(as, CC_EQ); 826 asm_guardcc(as, CC_EQ);
849 else 827 else
850 emit_cond_branch(as, CC_EQ, l_end); 828 emit_cond_branch(as, CC_EQ, l_end);
829 emit_nm(as, A64I_CMPx^k, tmp, tkey);
830 if (!destused)
831 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
832 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
833 *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
851 834
852 if (irt_isnum(kt)) { 835 /* Construct tkey as canonicalized or tagged key. */
853 if (isk) { 836 if (!isk) {
854 /* Assumes -0.0 is already canonicalized to +0.0. */ 837 if (irt_isnum(kt)) {
855 if (k) 838 emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
856 emit_n(as, A64I_CMPx^k, tmp);
857 else
858 emit_nm(as, A64I_CMPx, key, tmp);
859 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
860 } else {
861 emit_nm(as, A64I_FCMPd, key, ftmp);
862 emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
863 emit_cond_branch(as, CC_LO, l_next);
864 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
865 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
866 }
867 } else if (irt_isaddr(kt)) {
868 if (isk) {
869 emit_nm(as, A64I_CMPx, scr, tmp);
870 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
871 } else { 839 } else {
872 emit_nm(as, A64I_CMPx, tmp, scr); 840 lj_assertA(irt_isaddr(kt), "bad HREF key type");
873 emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); 841 type = ra_allock(as, irt_toitype(kt) << 15, allow);
842 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
874 } 843 }
875 } else {
876 emit_nm(as, A64I_CMPx, scr, type);
877 emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
878 } 844 }
879 845
880 *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
881 if (!isk && irt_isaddr(kt)) {
882 type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
883 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
884 rset_clear(allow, type);
885 }
886 /* Load main position relative to tab->node into dest. */ 846 /* Load main position relative to tab->node into dest. */
887 khash = isk ? ir_khash(as, irkey) : 1; 847 khash = isk ? ir_khash(as, irkey) : 1;
888 if (khash == 0) { 848 if (khash == 0) {
@@ -896,7 +856,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
896 emit_dnm(as, A64I_ANDw, dest, dest, tmphash); 856 emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
897 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); 857 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
898 } else if (irt_isstr(kt)) { 858 } else if (irt_isstr(kt)) {
899 /* Fetch of str->sid is cheaper than ra_allock. */
900 emit_dnm(as, A64I_ANDw, dest, dest, tmp); 859 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
901 emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); 860 emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
902 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); 861 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@@ -905,23 +864,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
905 emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); 864 emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
906 emit_dnm(as, A64I_SUBw, dest, dest, tmp); 865 emit_dnm(as, A64I_SUBw, dest, dest, tmp);
907 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); 866 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
908 emit_dnm(as, A64I_EORw, dest, dest, tmp); 867 emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
909 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
910 emit_dnm(as, A64I_SUBw, tmp, tmp, dest); 868 emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
911 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); 869 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
912 emit_dnm(as, A64I_EORw, tmp, tmp, dest);
913 if (irt_isnum(kt)) { 870 if (irt_isnum(kt)) {
871 emit_dnm(as, A64I_EORw, tmp, tkey, dest);
914 emit_dnm(as, A64I_ADDw, dest, dest, dest); 872 emit_dnm(as, A64I_ADDw, dest, dest, dest);
915 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); 873 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
916 emit_dm(as, A64I_MOVw, tmp, dest); 874 emit_nm(as, A64I_FCMPZd, (key & 31), 0);
917 emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); 875 emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
918 } else { 876 } else {
919 checkmclim(as); 877 emit_dnm(as, A64I_EORw, tmp, key, dest);
920 emit_dm(as, A64I_MOVw, tmp, key); 878 emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
921 emit_dnm(as, A64I_EORw, dest, dest,
922 ra_allock(as, irt_toitype(kt) << 15, allow));
923 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
924 emit_dm(as, A64I_MOVx, dest, key);
925 } 879 }
926 } 880 }
927 } 881 }