diff options
| author | Mike Pall <mike> | 2011-06-05 18:59:35 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2011-06-05 18:59:35 +0200 |
| commit | 7ad4908a4880523c5822d006055dc5d6db4556ea (patch) | |
| tree | ed031bb0133708d1eae03720f28d02d293b95a9c /src | |
| parent | b88eaa11f351257612657352daa0a76e60e66a75 (diff) | |
| download | luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.tar.gz luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.tar.bz2 luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.zip | |
ARM: Fuse XLOAD/XSTORE operands.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm_arm.h | 190 | ||||
| -rw-r--r-- | src/lj_crecord.c | 7 |
2 files changed, 145 insertions, 52 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 74a3a927..31b300bf 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
| @@ -261,6 +261,83 @@ static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) | |||
| 261 | return ra_allocref(as, ref, allow); | 261 | return ra_allocref(as, ref, allow); |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | /* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ | ||
| 265 | static IRRef asm_fuselsl2(ASMState *as, IRRef ref) | ||
| 266 | { | ||
| 267 | IRIns *ir = IR(ref); | ||
| 268 | if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && | ||
| 269 | irref_isk(ir->op2) && IR(ir->op2)->i == 2) | ||
| 270 | return ir->op1; | ||
| 271 | return 0; /* No fusion. */ | ||
| 272 | } | ||
| 273 | |||
| 274 | /* Fuse XLOAD/XSTORE reference into load/store operand. */ | ||
| 275 | static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | ||
| 276 | RegSet allow) | ||
| 277 | { | ||
| 278 | IRIns *ir = IR(ref); | ||
| 279 | int32_t ofs = 0; | ||
| 280 | Reg base; | ||
| 281 | if (ra_noreg(ir->r) && mayfuse(as, ref)) { | ||
| 282 | int32_t lim = (ai & 0x04000000) ? 4096 : 256; | ||
| 283 | if (ir->o == IR_ADD) { | ||
| 284 | if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) { | ||
| 285 | ref = ir->op1; | ||
| 286 | } else { | ||
| 287 | IRRef lref = ir->op1, rref = ir->op2; | ||
| 288 | Reg rn, rm; | ||
| 289 | if ((ai & 0x04000000)) { | ||
| 290 | IRRef sref = asm_fuselsl2(as, rref); | ||
| 291 | if (sref) { | ||
| 292 | rref = sref; | ||
| 293 | ai |= ARMF_SH(ARMSH_LSL, 2); | ||
| 294 | } else if ((sref = asm_fuselsl2(as, lref)) != 0) { | ||
| 295 | lref = rref; | ||
| 296 | rref = sref; | ||
| 297 | ai |= ARMF_SH(ARMSH_LSL, 2); | ||
| 298 | } | ||
| 299 | } | ||
| 300 | rn = ra_alloc1(as, lref, allow); | ||
| 301 | rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); | ||
| 302 | if ((ai & 0x04000000)) ai |= ARMI_LS_R; | ||
| 303 | emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); | ||
| 304 | return; | ||
| 305 | } | ||
| 306 | } else if (ir->o == IR_STRREF) { | ||
| 307 | ofs = (int32_t)sizeof(GCstr); | ||
| 308 | if (irref_isk(ir->op2)) { | ||
| 309 | ofs += IR(ir->op2)->i; | ||
| 310 | ref = ir->op1; | ||
| 311 | } else if (irref_isk(ir->op1)) { | ||
| 312 | ofs += IR(ir->op1)->i; | ||
| 313 | ref = ir->op2; | ||
| 314 | } else { | ||
| 315 | /* NYI: Fuse ADD with constant. */ | ||
| 316 | Reg rn = ra_alloc1(as, ir->op1, allow); | ||
| 317 | uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); | ||
| 318 | if ((ai & 0x04000000)) | ||
| 319 | emit_lso(as, ai, rd, rd, ofs); | ||
| 320 | else | ||
| 321 | emit_lsox(as, ai, rd, rd, ofs); | ||
| 322 | emit_dn(as, ARMI_ADD^m, rd, rn); | ||
| 323 | return; | ||
| 324 | } | ||
| 325 | if (ofs <= -lim || ofs >= lim) { | ||
| 326 | Reg rn = ra_alloc1(as, ref, allow); | ||
| 327 | Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); | ||
| 328 | if ((ai & 0x04000000)) ai |= ARMI_LS_R; | ||
| 329 | emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); | ||
| 330 | return; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | } | ||
| 334 | base = ra_alloc1(as, ref, allow); | ||
| 335 | if ((ai & 0x04000000)) | ||
| 336 | emit_lso(as, ai, rd, base, ofs); | ||
| 337 | else | ||
| 338 | emit_lsox(as, ai, rd, base, ofs); | ||
| 339 | } | ||
| 340 | |||
| 264 | /* -- Calls --------------------------------------------------------------- */ | 341 | /* -- Calls --------------------------------------------------------------- */ |
| 265 | 342 | ||
| 266 | /* Generate a call to a C function. */ | 343 | /* Generate a call to a C function. */ |
| @@ -749,68 +826,75 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
| 749 | 826 | ||
| 750 | /* -- Loads and stores ---------------------------------------------------- */ | 827 | /* -- Loads and stores ---------------------------------------------------- */ |
| 751 | 828 | ||
| 752 | static void asm_fxload(ASMState *as, IRIns *ir) | 829 | static ARMIns asm_fxloadins(IRIns *ir) |
| 753 | { | 830 | { |
| 754 | Reg idx, dest = ra_dest(as, ir, RSET_GPR); | ||
| 755 | int32_t ofs; | ||
| 756 | ARMIns ai; | ||
| 757 | if (ir->o == IR_FLOAD) { | ||
| 758 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 759 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
| 760 | ofs = asm_fuseabase(as, ir->op1); | ||
| 761 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
| 762 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
| 763 | return; | ||
| 764 | } | ||
| 765 | } | ||
| 766 | ofs = field_ofs[ir->op2]; | ||
| 767 | } else { | ||
| 768 | /* NYI: Fuse xload operands. */ | ||
| 769 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
| 770 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 771 | ofs = 0; | ||
| 772 | } | ||
| 773 | switch (irt_type(ir->t)) { | 831 | switch (irt_type(ir->t)) { |
| 774 | case IRT_I8: ai = ARMI_LDRSB; break; | 832 | case IRT_I8: return ARMI_LDRSB; |
| 775 | case IRT_U8: ai = ARMI_LDRB; goto use_lso; | 833 | case IRT_U8: return ARMI_LDRB; |
| 776 | case IRT_I16: ai = ARMI_LDRSH; break; | 834 | case IRT_I16: return ARMI_LDRSH; |
| 777 | case IRT_U16: ai = ARMI_LDRH; break; | 835 | case IRT_U16: return ARMI_LDRH; |
| 778 | case IRT_NUM: lua_assert(0); | 836 | case IRT_NUM: lua_assert(0); |
| 779 | case IRT_FLOAT: | 837 | case IRT_FLOAT: |
| 780 | default: ai = ARMI_LDR; | 838 | default: return ARMI_LDR; |
| 781 | use_lso: | ||
| 782 | emit_lso(as, ai, dest, idx, ofs); | ||
| 783 | return; | ||
| 784 | } | 839 | } |
| 785 | emit_lsox(as, ai, dest, idx, ofs); | ||
| 786 | } | 840 | } |
| 787 | 841 | ||
| 788 | static void asm_fxstore(ASMState *as, IRIns *ir) | 842 | static ARMIns asm_fxstoreins(IRIns *ir) |
| 789 | { | 843 | { |
| 790 | Reg idx, src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
| 791 | RegSet allow = rset_exclude(RSET_GPR, src); | ||
| 792 | int32_t ofs; | ||
| 793 | ARMIns ai; | ||
| 794 | if (ir->o == IR_FSTORE) { | ||
| 795 | IRIns *irf = IR(ir->op1); | ||
| 796 | idx = ra_alloc1(as, irf->op1, allow); | ||
| 797 | ofs = field_ofs[irf->op2]; | ||
| 798 | } else { | ||
| 799 | /* NYI: Fuse xstore operands. */ | ||
| 800 | idx = ra_alloc1(as, ir->op1, allow); | ||
| 801 | ofs = 0; | ||
| 802 | } | ||
| 803 | switch (irt_type(ir->t)) { | 844 | switch (irt_type(ir->t)) { |
| 804 | case IRT_I8: case IRT_U8: ai = ARMI_STRB; goto use_lso; | 845 | case IRT_I8: case IRT_U8: return ARMI_STRB; |
| 805 | case IRT_I16: case IRT_U16: ai = ARMI_STRH; break; | 846 | case IRT_I16: case IRT_U16: return ARMI_STRH; |
| 806 | case IRT_NUM: lua_assert(0); | 847 | case IRT_NUM: lua_assert(0); |
| 807 | case IRT_FLOAT: | 848 | case IRT_FLOAT: |
| 808 | default: ai = ARMI_STR; | 849 | default: return ARMI_STR; |
| 809 | use_lso: | 850 | } |
| 810 | emit_lso(as, ai, src, idx, ofs); | 851 | } |
| 811 | return; | 852 | |
| 853 | static void asm_fload(ASMState *as, IRIns *ir) | ||
| 854 | { | ||
| 855 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 856 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 857 | ARMIns ai = asm_fxloadins(ir); | ||
| 858 | int32_t ofs; | ||
| 859 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
| 860 | ofs = asm_fuseabase(as, ir->op1); | ||
| 861 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
| 862 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
| 863 | return; | ||
| 864 | } | ||
| 812 | } | 865 | } |
| 813 | emit_lsox(as, ai, src, idx, ofs); | 866 | ofs = field_ofs[ir->op2]; |
| 867 | if ((ai & 0x04000000)) | ||
| 868 | emit_lso(as, ai, dest, idx, ofs); | ||
| 869 | else | ||
| 870 | emit_lsox(as, ai, dest, idx, ofs); | ||
| 871 | } | ||
| 872 | |||
| 873 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
| 874 | { | ||
| 875 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
| 876 | IRIns *irf = IR(ir->op1); | ||
| 877 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | ||
| 878 | int32_t ofs = field_ofs[irf->op2]; | ||
| 879 | ARMIns ai = asm_fxstoreins(ir); | ||
| 880 | if ((ai & 0x04000000)) | ||
| 881 | emit_lso(as, ai, src, idx, ofs); | ||
| 882 | else | ||
| 883 | emit_lsox(as, ai, src, idx, ofs); | ||
| 884 | } | ||
| 885 | |||
| 886 | static void asm_xload(ASMState *as, IRIns *ir) | ||
| 887 | { | ||
| 888 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 889 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
| 890 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); | ||
| 891 | } | ||
| 892 | |||
| 893 | static void asm_xstore(ASMState *as, IRIns *ir) | ||
| 894 | { | ||
| 895 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
| 896 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
| 897 | rset_exclude(RSET_GPR, src)); | ||
| 814 | } | 898 | } |
| 815 | 899 | ||
| 816 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 900 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| @@ -1687,11 +1771,13 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 1687 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | 1771 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
| 1688 | asm_ahuvload(as, ir); | 1772 | asm_ahuvload(as, ir); |
| 1689 | break; | 1773 | break; |
| 1690 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | 1774 | case IR_FLOAD: asm_fload(as, ir); break; |
| 1775 | case IR_XLOAD: asm_xload(as, ir); break; | ||
| 1691 | case IR_SLOAD: asm_sload(as, ir); break; | 1776 | case IR_SLOAD: asm_sload(as, ir); break; |
| 1692 | 1777 | ||
| 1693 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | 1778 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; |
| 1694 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | 1779 | case IR_FSTORE: asm_fstore(as, ir); break; |
| 1780 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
| 1695 | 1781 | ||
| 1696 | /* Allocations. */ | 1782 | /* Allocations. */ |
| 1697 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | 1783 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index ed5e8272..8373faaa 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
| @@ -526,6 +526,13 @@ again: | |||
| 526 | idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); | 526 | idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); |
| 527 | sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); | 527 | sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); |
| 528 | idx = crec_reassoc_ofs(J, idx, &ofs, sz); | 528 | idx = crec_reassoc_ofs(J, idx, &ofs, sz); |
| 529 | #if LJ_TARGET_ARM | ||
| 530 | /* Hoist base add to allow fusion of shifts into operands. */ | ||
| 531 | if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs && (sz == 1 || sz == 4)) { | ||
| 532 | ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); | ||
| 533 | ofs = 0; | ||
| 534 | } | ||
| 535 | #endif | ||
| 529 | idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); | 536 | idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); |
| 530 | ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); | 537 | ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); |
| 531 | } | 538 | } |
