diff options
author | Mike Pall <mike> | 2011-06-05 18:59:35 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2011-06-05 18:59:35 +0200 |
commit | 7ad4908a4880523c5822d006055dc5d6db4556ea (patch) | |
tree | ed031bb0133708d1eae03720f28d02d293b95a9c | |
parent | b88eaa11f351257612657352daa0a76e60e66a75 (diff) | |
download | luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.tar.gz luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.tar.bz2 luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.zip |
ARM: Fuse XLOAD/XSTORE operands.
-rw-r--r-- | src/lj_asm_arm.h | 190 | ||||
-rw-r--r-- | src/lj_crecord.c | 7 |
2 files changed, 145 insertions, 52 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 74a3a927..31b300bf 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -261,6 +261,83 @@ static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) | |||
261 | return ra_allocref(as, ref, allow); | 261 | return ra_allocref(as, ref, allow); |
262 | } | 262 | } |
263 | 263 | ||
264 | /* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ | ||
265 | static IRRef asm_fuselsl2(ASMState *as, IRRef ref) | ||
266 | { | ||
267 | IRIns *ir = IR(ref); | ||
268 | if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && | ||
269 | irref_isk(ir->op2) && IR(ir->op2)->i == 2) | ||
270 | return ir->op1; | ||
271 | return 0; /* No fusion. */ | ||
272 | } | ||
273 | |||
274 | /* Fuse XLOAD/XSTORE reference into load/store operand. */ | ||
275 | static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | ||
276 | RegSet allow) | ||
277 | { | ||
278 | IRIns *ir = IR(ref); | ||
279 | int32_t ofs = 0; | ||
280 | Reg base; | ||
281 | if (ra_noreg(ir->r) && mayfuse(as, ref)) { | ||
282 | int32_t lim = (ai & 0x04000000) ? 4096 : 256; | ||
283 | if (ir->o == IR_ADD) { | ||
284 | if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) { | ||
285 | ref = ir->op1; | ||
286 | } else { | ||
287 | IRRef lref = ir->op1, rref = ir->op2; | ||
288 | Reg rn, rm; | ||
289 | if ((ai & 0x04000000)) { | ||
290 | IRRef sref = asm_fuselsl2(as, rref); | ||
291 | if (sref) { | ||
292 | rref = sref; | ||
293 | ai |= ARMF_SH(ARMSH_LSL, 2); | ||
294 | } else if ((sref = asm_fuselsl2(as, lref)) != 0) { | ||
295 | lref = rref; | ||
296 | rref = sref; | ||
297 | ai |= ARMF_SH(ARMSH_LSL, 2); | ||
298 | } | ||
299 | } | ||
300 | rn = ra_alloc1(as, lref, allow); | ||
301 | rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); | ||
302 | if ((ai & 0x04000000)) ai |= ARMI_LS_R; | ||
303 | emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); | ||
304 | return; | ||
305 | } | ||
306 | } else if (ir->o == IR_STRREF) { | ||
307 | ofs = (int32_t)sizeof(GCstr); | ||
308 | if (irref_isk(ir->op2)) { | ||
309 | ofs += IR(ir->op2)->i; | ||
310 | ref = ir->op1; | ||
311 | } else if (irref_isk(ir->op1)) { | ||
312 | ofs += IR(ir->op1)->i; | ||
313 | ref = ir->op2; | ||
314 | } else { | ||
315 | /* NYI: Fuse ADD with constant. */ | ||
316 | Reg rn = ra_alloc1(as, ir->op1, allow); | ||
317 | uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); | ||
318 | if ((ai & 0x04000000)) | ||
319 | emit_lso(as, ai, rd, rd, ofs); | ||
320 | else | ||
321 | emit_lsox(as, ai, rd, rd, ofs); | ||
322 | emit_dn(as, ARMI_ADD^m, rd, rn); | ||
323 | return; | ||
324 | } | ||
325 | if (ofs <= -lim || ofs >= lim) { | ||
326 | Reg rn = ra_alloc1(as, ref, allow); | ||
327 | Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); | ||
328 | if ((ai & 0x04000000)) ai |= ARMI_LS_R; | ||
329 | emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); | ||
330 | return; | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | base = ra_alloc1(as, ref, allow); | ||
335 | if ((ai & 0x04000000)) | ||
336 | emit_lso(as, ai, rd, base, ofs); | ||
337 | else | ||
338 | emit_lsox(as, ai, rd, base, ofs); | ||
339 | } | ||
340 | |||
264 | /* -- Calls --------------------------------------------------------------- */ | 341 | /* -- Calls --------------------------------------------------------------- */ |
265 | 342 | ||
266 | /* Generate a call to a C function. */ | 343 | /* Generate a call to a C function. */ |
@@ -749,68 +826,75 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
749 | 826 | ||
750 | /* -- Loads and stores ---------------------------------------------------- */ | 827 | /* -- Loads and stores ---------------------------------------------------- */ |
751 | 828 | ||
752 | static void asm_fxload(ASMState *as, IRIns *ir) | 829 | static ARMIns asm_fxloadins(IRIns *ir) |
753 | { | 830 | { |
754 | Reg idx, dest = ra_dest(as, ir, RSET_GPR); | ||
755 | int32_t ofs; | ||
756 | ARMIns ai; | ||
757 | if (ir->o == IR_FLOAD) { | ||
758 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
759 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
760 | ofs = asm_fuseabase(as, ir->op1); | ||
761 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
762 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
763 | return; | ||
764 | } | ||
765 | } | ||
766 | ofs = field_ofs[ir->op2]; | ||
767 | } else { | ||
768 | /* NYI: Fuse xload operands. */ | ||
769 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
770 | idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
771 | ofs = 0; | ||
772 | } | ||
773 | switch (irt_type(ir->t)) { | 831 | switch (irt_type(ir->t)) { |
774 | case IRT_I8: ai = ARMI_LDRSB; break; | 832 | case IRT_I8: return ARMI_LDRSB; |
775 | case IRT_U8: ai = ARMI_LDRB; goto use_lso; | 833 | case IRT_U8: return ARMI_LDRB; |
776 | case IRT_I16: ai = ARMI_LDRSH; break; | 834 | case IRT_I16: return ARMI_LDRSH; |
777 | case IRT_U16: ai = ARMI_LDRH; break; | 835 | case IRT_U16: return ARMI_LDRH; |
778 | case IRT_NUM: lua_assert(0); | 836 | case IRT_NUM: lua_assert(0); |
779 | case IRT_FLOAT: | 837 | case IRT_FLOAT: |
780 | default: ai = ARMI_LDR; | 838 | default: return ARMI_LDR; |
781 | use_lso: | ||
782 | emit_lso(as, ai, dest, idx, ofs); | ||
783 | return; | ||
784 | } | 839 | } |
785 | emit_lsox(as, ai, dest, idx, ofs); | ||
786 | } | 840 | } |
787 | 841 | ||
788 | static void asm_fxstore(ASMState *as, IRIns *ir) | 842 | static ARMIns asm_fxstoreins(IRIns *ir) |
789 | { | 843 | { |
790 | Reg idx, src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
791 | RegSet allow = rset_exclude(RSET_GPR, src); | ||
792 | int32_t ofs; | ||
793 | ARMIns ai; | ||
794 | if (ir->o == IR_FSTORE) { | ||
795 | IRIns *irf = IR(ir->op1); | ||
796 | idx = ra_alloc1(as, irf->op1, allow); | ||
797 | ofs = field_ofs[irf->op2]; | ||
798 | } else { | ||
799 | /* NYI: Fuse xstore operands. */ | ||
800 | idx = ra_alloc1(as, ir->op1, allow); | ||
801 | ofs = 0; | ||
802 | } | ||
803 | switch (irt_type(ir->t)) { | 844 | switch (irt_type(ir->t)) { |
804 | case IRT_I8: case IRT_U8: ai = ARMI_STRB; goto use_lso; | 845 | case IRT_I8: case IRT_U8: return ARMI_STRB; |
805 | case IRT_I16: case IRT_U16: ai = ARMI_STRH; break; | 846 | case IRT_I16: case IRT_U16: return ARMI_STRH; |
806 | case IRT_NUM: lua_assert(0); | 847 | case IRT_NUM: lua_assert(0); |
807 | case IRT_FLOAT: | 848 | case IRT_FLOAT: |
808 | default: ai = ARMI_STR; | 849 | default: return ARMI_STR; |
809 | use_lso: | 850 | } |
810 | emit_lso(as, ai, src, idx, ofs); | 851 | } |
811 | return; | 852 | |
853 | static void asm_fload(ASMState *as, IRIns *ir) | ||
854 | { | ||
855 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
856 | Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); | ||
857 | ARMIns ai = asm_fxloadins(ir); | ||
858 | int32_t ofs; | ||
859 | if (ir->op2 == IRFL_TAB_ARRAY) { | ||
860 | ofs = asm_fuseabase(as, ir->op1); | ||
861 | if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | ||
862 | emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); | ||
863 | return; | ||
864 | } | ||
812 | } | 865 | } |
813 | emit_lsox(as, ai, src, idx, ofs); | 866 | ofs = field_ofs[ir->op2]; |
867 | if ((ai & 0x04000000)) | ||
868 | emit_lso(as, ai, dest, idx, ofs); | ||
869 | else | ||
870 | emit_lsox(as, ai, dest, idx, ofs); | ||
871 | } | ||
872 | |||
873 | static void asm_fstore(ASMState *as, IRIns *ir) | ||
874 | { | ||
875 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
876 | IRIns *irf = IR(ir->op1); | ||
877 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | ||
878 | int32_t ofs = field_ofs[irf->op2]; | ||
879 | ARMIns ai = asm_fxstoreins(ir); | ||
880 | if ((ai & 0x04000000)) | ||
881 | emit_lso(as, ai, src, idx, ofs); | ||
882 | else | ||
883 | emit_lsox(as, ai, src, idx, ofs); | ||
884 | } | ||
885 | |||
886 | static void asm_xload(ASMState *as, IRIns *ir) | ||
887 | { | ||
888 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
889 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | ||
890 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); | ||
891 | } | ||
892 | |||
893 | static void asm_xstore(ASMState *as, IRIns *ir) | ||
894 | { | ||
895 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
896 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
897 | rset_exclude(RSET_GPR, src)); | ||
814 | } | 898 | } |
815 | 899 | ||
816 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 900 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
@@ -1687,11 +1771,13 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
1687 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | 1771 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
1688 | asm_ahuvload(as, ir); | 1772 | asm_ahuvload(as, ir); |
1689 | break; | 1773 | break; |
1690 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; | 1774 | case IR_FLOAD: asm_fload(as, ir); break; |
1775 | case IR_XLOAD: asm_xload(as, ir); break; | ||
1691 | case IR_SLOAD: asm_sload(as, ir); break; | 1776 | case IR_SLOAD: asm_sload(as, ir); break; |
1692 | 1777 | ||
1693 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | 1778 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; |
1694 | case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; | 1779 | case IR_FSTORE: asm_fstore(as, ir); break; |
1780 | case IR_XSTORE: asm_xstore(as, ir); break; | ||
1695 | 1781 | ||
1696 | /* Allocations. */ | 1782 | /* Allocations. */ |
1697 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; | 1783 | case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index ed5e8272..8373faaa 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
@@ -526,6 +526,13 @@ again: | |||
526 | idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); | 526 | idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); |
527 | sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); | 527 | sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); |
528 | idx = crec_reassoc_ofs(J, idx, &ofs, sz); | 528 | idx = crec_reassoc_ofs(J, idx, &ofs, sz); |
529 | #if LJ_TARGET_ARM | ||
530 | /* Hoist base add to allow fusion of shifts into operands. */ | ||
531 | if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs && (sz == 1 || sz == 4)) { | ||
532 | ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); | ||
533 | ofs = 0; | ||
534 | } | ||
535 | #endif | ||
529 | idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); | 536 | idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); |
530 | ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); | 537 | ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); |
531 | } | 538 | } |