diff options
author | Mike Pall <mike> | 2023-09-09 17:15:26 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2023-09-09 17:15:26 +0200 |
commit | 435d8c630135d4f6a54f2ecf7be2d7e805652f80 (patch) | |
tree | 1c43be3566069a6abb78205b564cc0182a8b4986 /src | |
parent | 315dc3e776d3199269a464b17d07c48064d3fd09 (diff) | |
download | luajit-435d8c630135d4f6a54f2ecf7be2d7e805652f80.tar.gz luajit-435d8c630135d4f6a54f2ecf7be2d7e805652f80.tar.bz2 luajit-435d8c630135d4f6a54f2ecf7be2d7e805652f80.zip |
ARM64: Improve IR_HREF code generation.
Thanks to Peter Cawley. #1070
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm_arm64.h | 126 |
1 files changed, 40 insertions, 86 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index b8fbf69b..c5ebd324 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -773,57 +773,36 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
773 | int destused = ra_used(ir); | 773 | int destused = ra_used(ir); |
774 | Reg dest = ra_dest(as, ir, allow); | 774 | Reg dest = ra_dest(as, ir, allow); |
775 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | 775 | Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); |
776 | Reg key = 0, tmp = RID_TMP; | 776 | Reg key = 0, tmp = RID_TMP, type = RID_NONE, tkey; |
777 | Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; | ||
778 | IRRef refkey = ir->op2; | 777 | IRRef refkey = ir->op2; |
779 | IRIns *irkey = IR(refkey); | 778 | IRIns *irkey = IR(refkey); |
780 | int isk = irref_isk(ir->op2); | 779 | int isk = irref_isk(refkey); |
781 | IRType1 kt = irkey->t; | 780 | IRType1 kt = irkey->t; |
782 | uint32_t k = 0; | 781 | uint32_t k = 0; |
783 | uint32_t khash; | 782 | uint32_t khash; |
784 | MCLabel l_end, l_loop, l_next; | 783 | MCLabel l_end, l_loop; |
785 | rset_clear(allow, tab); | 784 | rset_clear(allow, tab); |
786 | 785 | ||
787 | if (!isk) { | 786 | /* Allocate registers outside of the loop. */ |
788 | key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); | 787 | if (irkey->o != IR_KNUM || !(k = emit_isk12((int64_t)ir_knum(irkey)->u64))) { |
788 | key = ra_alloc1(as, refkey, irt_isnum(kt) ? RSET_FPR : allow); | ||
789 | rset_clear(allow, key); | 789 | rset_clear(allow, key); |
790 | if (!irt_isstr(kt)) { | ||
791 | tmp = ra_scratch(as, allow); | ||
792 | rset_clear(allow, tmp); | ||
793 | } | ||
794 | } else if (irt_isnum(kt)) { | ||
795 | int64_t val = (int64_t)ir_knum(irkey)->u64; | ||
796 | if (!(k = emit_isk12(val))) { | ||
797 | key = ra_allock(as, val, allow); | ||
798 | rset_clear(allow, key); | ||
799 | } | ||
800 | } else if (!irt_ispri(kt)) { | ||
801 | if (!(k = emit_isk12(irkey->i))) { | ||
802 | key = ra_alloc1(as, refkey, allow); | ||
803 | rset_clear(allow, key); | ||
804 | } | ||
805 | } | 790 | } |
806 | 791 | if (!isk) { | |
807 | /* Allocate constants early. */ | 792 | tkey = ra_scratch(as, allow); |
808 | if (irt_isnum(kt)) { | 793 | rset_clear(allow, tkey); |
809 | if (!isk) { | 794 | } else if (irt_isnum(kt)) { |
810 | tisnum = ra_allock(as, LJ_TISNUM << 15, allow); | 795 | tkey = key; /* Assumes -0.0 is already canonicalized to +0.0. */ |
811 | ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); | 796 | } else { |
812 | rset_clear(allow, tisnum); | 797 | int64_t kk; |
813 | } | 798 | if (irt_isaddr(kt)) { |
814 | } else if (irt_isaddr(kt)) { | 799 | kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; |
815 | if (isk) { | ||
816 | int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; | ||
817 | scr = ra_allock(as, kk, allow); | ||
818 | } else { | 800 | } else { |
819 | scr = ra_scratch(as, allow); | 801 | lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); |
802 | kk = ~((int64_t)~irt_toitype(kt) << 47); | ||
820 | } | 803 | } |
821 | rset_clear(allow, scr); | 804 | tkey = ra_allock(as, kk, allow); |
822 | } else { | 805 | rset_clear(allow, tkey); |
823 | lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); | ||
824 | type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); | ||
825 | scr = ra_scratch(as, rset_clear(allow, type)); | ||
826 | rset_clear(allow, scr); | ||
827 | } | 806 | } |
828 | 807 | ||
829 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ | 808 | /* Key not found in chain: jump to exit (if merged) or load niltv. */ |
@@ -839,50 +818,31 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
839 | 818 | ||
840 | /* Follow hash chain until the end. */ | 819 | /* Follow hash chain until the end. */ |
841 | l_loop = --as->mcp; | 820 | l_loop = --as->mcp; |
842 | emit_n(as, A64I_CMPx^A64I_K12^0, dest); | 821 | if (destused) |
843 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); | 822 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); |
844 | l_next = emit_label(as); | ||
845 | 823 | ||
846 | /* Type and value comparison. */ | 824 | /* Type and value comparison. */ |
847 | if (merge == IR_EQ) | 825 | if (merge == IR_EQ) |
848 | asm_guardcc(as, CC_EQ); | 826 | asm_guardcc(as, CC_EQ); |
849 | else | 827 | else |
850 | emit_cond_branch(as, CC_EQ, l_end); | 828 | emit_cond_branch(as, CC_EQ, l_end); |
829 | emit_nm(as, A64I_CMPx^k, tmp, tkey); | ||
830 | if (!destused) | ||
831 | emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); | ||
832 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key)); | ||
833 | *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest; | ||
851 | 834 | ||
852 | if (irt_isnum(kt)) { | 835 | /* Construct tkey as canonicalized or tagged key. */ |
853 | if (isk) { | 836 | if (!isk) { |
854 | /* Assumes -0.0 is already canonicalized to +0.0. */ | 837 | if (irt_isnum(kt)) { |
855 | if (k) | 838 | emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey); |
856 | emit_n(as, A64I_CMPx^k, tmp); | ||
857 | else | ||
858 | emit_nm(as, A64I_CMPx, key, tmp); | ||
859 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
860 | } else { | ||
861 | emit_nm(as, A64I_FCMPd, key, ftmp); | ||
862 | emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); | ||
863 | emit_cond_branch(as, CC_LO, l_next); | ||
864 | emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); | ||
865 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); | ||
866 | } | ||
867 | } else if (irt_isaddr(kt)) { | ||
868 | if (isk) { | ||
869 | emit_nm(as, A64I_CMPx, scr, tmp); | ||
870 | emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); | ||
871 | } else { | 839 | } else { |
872 | emit_nm(as, A64I_CMPx, tmp, scr); | 840 | lj_assertA(irt_isaddr(kt), "bad HREF key type"); |
873 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); | 841 | type = ra_allock(as, irt_toitype(kt) << 15, allow); |
842 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type); | ||
874 | } | 843 | } |
875 | } else { | ||
876 | emit_nm(as, A64I_CMPx, scr, type); | ||
877 | emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); | ||
878 | } | 844 | } |
879 | 845 | ||
880 | *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; | ||
881 | if (!isk && irt_isaddr(kt)) { | ||
882 | type = ra_allock(as, (int32_t)irt_toitype(kt), allow); | ||
883 | emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); | ||
884 | rset_clear(allow, type); | ||
885 | } | ||
886 | /* Load main position relative to tab->node into dest. */ | 846 | /* Load main position relative to tab->node into dest. */ |
887 | khash = isk ? ir_khash(as, irkey) : 1; | 847 | khash = isk ? ir_khash(as, irkey) : 1; |
888 | if (khash == 0) { | 848 | if (khash == 0) { |
@@ -896,7 +856,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
896 | emit_dnm(as, A64I_ANDw, dest, dest, tmphash); | 856 | emit_dnm(as, A64I_ANDw, dest, dest, tmphash); |
897 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | 857 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); |
898 | } else if (irt_isstr(kt)) { | 858 | } else if (irt_isstr(kt)) { |
899 | /* Fetch of str->sid is cheaper than ra_allock. */ | ||
900 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); | 859 | emit_dnm(as, A64I_ANDw, dest, dest, tmp); |
901 | emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); | 860 | emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); |
902 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); | 861 | emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); |
@@ -905,23 +864,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
905 | emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); | 864 | emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); |
906 | emit_dnm(as, A64I_SUBw, dest, dest, tmp); | 865 | emit_dnm(as, A64I_SUBw, dest, dest, tmp); |
907 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); | 866 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); |
908 | emit_dnm(as, A64I_EORw, dest, dest, tmp); | 867 | emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest); |
909 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); | ||
910 | emit_dnm(as, A64I_SUBw, tmp, tmp, dest); | 868 | emit_dnm(as, A64I_SUBw, tmp, tmp, dest); |
911 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); | 869 | emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); |
912 | emit_dnm(as, A64I_EORw, tmp, tmp, dest); | ||
913 | if (irt_isnum(kt)) { | 870 | if (irt_isnum(kt)) { |
871 | emit_dnm(as, A64I_EORw, tmp, tkey, dest); | ||
914 | emit_dnm(as, A64I_ADDw, dest, dest, dest); | 872 | emit_dnm(as, A64I_ADDw, dest, dest, dest); |
915 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | 873 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey); |
916 | emit_dm(as, A64I_MOVw, tmp, dest); | 874 | emit_nm(as, A64I_FCMPZd, (key & 31), 0); |
917 | emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); | 875 | emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31)); |
918 | } else { | 876 | } else { |
919 | checkmclim(as); | 877 | emit_dnm(as, A64I_EORw, tmp, key, dest); |
920 | emit_dm(as, A64I_MOVw, tmp, key); | 878 | emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key); |
921 | emit_dnm(as, A64I_EORw, dest, dest, | ||
922 | ra_allock(as, irt_toitype(kt) << 15, allow)); | ||
923 | emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); | ||
924 | emit_dm(as, A64I_MOVx, dest, key); | ||
925 | } | 879 | } |
926 | } | 880 | } |
927 | } | 881 | } |