aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-06-05 18:59:35 +0200
committerMike Pall <mike>2011-06-05 18:59:35 +0200
commit7ad4908a4880523c5822d006055dc5d6db4556ea (patch)
treeed031bb0133708d1eae03720f28d02d293b95a9c
parentb88eaa11f351257612657352daa0a76e60e66a75 (diff)
downloadluajit-7ad4908a4880523c5822d006055dc5d6db4556ea.tar.gz
luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.tar.bz2
luajit-7ad4908a4880523c5822d006055dc5d6db4556ea.zip
ARM: Fuse XLOAD/XSTORE operands.
-rw-r--r--src/lj_asm_arm.h190
-rw-r--r--src/lj_crecord.c7
2 files changed, 145 insertions, 52 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 74a3a927..31b300bf 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -261,6 +261,83 @@ static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow)
261 return ra_allocref(as, ref, allow); 261 return ra_allocref(as, ref, allow);
262} 262}
263 263
264/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
265static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
266{
267 IRIns *ir = IR(ref);
268 if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL &&
269 irref_isk(ir->op2) && IR(ir->op2)->i == 2)
270 return ir->op1;
271 return 0; /* No fusion. */
272}
273
274/* Fuse XLOAD/XSTORE reference into load/store operand. */
275static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
276 RegSet allow)
277{
278 IRIns *ir = IR(ref);
279 int32_t ofs = 0;
280 Reg base;
281 if (ra_noreg(ir->r) && mayfuse(as, ref)) {
282 int32_t lim = (ai & 0x04000000) ? 4096 : 256;
283 if (ir->o == IR_ADD) {
284 if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) {
285 ref = ir->op1;
286 } else {
287 IRRef lref = ir->op1, rref = ir->op2;
288 Reg rn, rm;
289 if ((ai & 0x04000000)) {
290 IRRef sref = asm_fuselsl2(as, rref);
291 if (sref) {
292 rref = sref;
293 ai |= ARMF_SH(ARMSH_LSL, 2);
294 } else if ((sref = asm_fuselsl2(as, lref)) != 0) {
295 lref = rref;
296 rref = sref;
297 ai |= ARMF_SH(ARMSH_LSL, 2);
298 }
299 }
300 rn = ra_alloc1(as, lref, allow);
301 rm = ra_alloc1(as, rref, rset_exclude(allow, rn));
302 if ((ai & 0x04000000)) ai |= ARMI_LS_R;
303 emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
304 return;
305 }
306 } else if (ir->o == IR_STRREF) {
307 ofs = (int32_t)sizeof(GCstr);
308 if (irref_isk(ir->op2)) {
309 ofs += IR(ir->op2)->i;
310 ref = ir->op1;
311 } else if (irref_isk(ir->op1)) {
312 ofs += IR(ir->op1)->i;
313 ref = ir->op2;
314 } else {
315 /* NYI: Fuse ADD with constant. */
316 Reg rn = ra_alloc1(as, ir->op1, allow);
317 uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
318 if ((ai & 0x04000000))
319 emit_lso(as, ai, rd, rd, ofs);
320 else
321 emit_lsox(as, ai, rd, rd, ofs);
322 emit_dn(as, ARMI_ADD^m, rd, rn);
323 return;
324 }
325 if (ofs <= -lim || ofs >= lim) {
326 Reg rn = ra_alloc1(as, ref, allow);
327 Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
328 if ((ai & 0x04000000)) ai |= ARMI_LS_R;
329 emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);
330 return;
331 }
332 }
333 }
334 base = ra_alloc1(as, ref, allow);
335 if ((ai & 0x04000000))
336 emit_lso(as, ai, rd, base, ofs);
337 else
338 emit_lsox(as, ai, rd, base, ofs);
339}
340
264/* -- Calls --------------------------------------------------------------- */ 341/* -- Calls --------------------------------------------------------------- */
265 342
266/* Generate a call to a C function. */ 343/* Generate a call to a C function. */
@@ -749,68 +826,75 @@ static void asm_strref(ASMState *as, IRIns *ir)
749 826
750/* -- Loads and stores ---------------------------------------------------- */ 827/* -- Loads and stores ---------------------------------------------------- */
751 828
752static void asm_fxload(ASMState *as, IRIns *ir) 829static ARMIns asm_fxloadins(IRIns *ir)
753{ 830{
754 Reg idx, dest = ra_dest(as, ir, RSET_GPR);
755 int32_t ofs;
756 ARMIns ai;
757 if (ir->o == IR_FLOAD) {
758 idx = ra_alloc1(as, ir->op1, RSET_GPR);
759 if (ir->op2 == IRFL_TAB_ARRAY) {
760 ofs = asm_fuseabase(as, ir->op1);
761 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
762 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
763 return;
764 }
765 }
766 ofs = field_ofs[ir->op2];
767 } else {
768 /* NYI: Fuse xload operands. */
769 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
770 idx = ra_alloc1(as, ir->op1, RSET_GPR);
771 ofs = 0;
772 }
773 switch (irt_type(ir->t)) { 831 switch (irt_type(ir->t)) {
774 case IRT_I8: ai = ARMI_LDRSB; break; 832 case IRT_I8: return ARMI_LDRSB;
775 case IRT_U8: ai = ARMI_LDRB; goto use_lso; 833 case IRT_U8: return ARMI_LDRB;
776 case IRT_I16: ai = ARMI_LDRSH; break; 834 case IRT_I16: return ARMI_LDRSH;
777 case IRT_U16: ai = ARMI_LDRH; break; 835 case IRT_U16: return ARMI_LDRH;
778 case IRT_NUM: lua_assert(0); 836 case IRT_NUM: lua_assert(0);
779 case IRT_FLOAT: 837 case IRT_FLOAT:
780 default: ai = ARMI_LDR; 838 default: return ARMI_LDR;
781 use_lso:
782 emit_lso(as, ai, dest, idx, ofs);
783 return;
784 } 839 }
785 emit_lsox(as, ai, dest, idx, ofs);
786} 840}
787 841
788static void asm_fxstore(ASMState *as, IRIns *ir) 842static ARMIns asm_fxstoreins(IRIns *ir)
789{ 843{
790 Reg idx, src = ra_alloc1(as, ir->op2, RSET_GPR);
791 RegSet allow = rset_exclude(RSET_GPR, src);
792 int32_t ofs;
793 ARMIns ai;
794 if (ir->o == IR_FSTORE) {
795 IRIns *irf = IR(ir->op1);
796 idx = ra_alloc1(as, irf->op1, allow);
797 ofs = field_ofs[irf->op2];
798 } else {
799 /* NYI: Fuse xstore operands. */
800 idx = ra_alloc1(as, ir->op1, allow);
801 ofs = 0;
802 }
803 switch (irt_type(ir->t)) { 844 switch (irt_type(ir->t)) {
804 case IRT_I8: case IRT_U8: ai = ARMI_STRB; goto use_lso; 845 case IRT_I8: case IRT_U8: return ARMI_STRB;
805 case IRT_I16: case IRT_U16: ai = ARMI_STRH; break; 846 case IRT_I16: case IRT_U16: return ARMI_STRH;
806 case IRT_NUM: lua_assert(0); 847 case IRT_NUM: lua_assert(0);
807 case IRT_FLOAT: 848 case IRT_FLOAT:
808 default: ai = ARMI_STR; 849 default: return ARMI_STR;
809 use_lso: 850 }
810 emit_lso(as, ai, src, idx, ofs); 851}
811 return; 852
853static void asm_fload(ASMState *as, IRIns *ir)
854{
855 Reg dest = ra_dest(as, ir, RSET_GPR);
856 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
857 ARMIns ai = asm_fxloadins(ir);
858 int32_t ofs;
859 if (ir->op2 == IRFL_TAB_ARRAY) {
860 ofs = asm_fuseabase(as, ir->op1);
861 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
862 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
863 return;
864 }
812 } 865 }
813 emit_lsox(as, ai, src, idx, ofs); 866 ofs = field_ofs[ir->op2];
867 if ((ai & 0x04000000))
868 emit_lso(as, ai, dest, idx, ofs);
869 else
870 emit_lsox(as, ai, dest, idx, ofs);
871}
872
873static void asm_fstore(ASMState *as, IRIns *ir)
874{
875 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
876 IRIns *irf = IR(ir->op1);
877 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
878 int32_t ofs = field_ofs[irf->op2];
879 ARMIns ai = asm_fxstoreins(ir);
880 if ((ai & 0x04000000))
881 emit_lso(as, ai, src, idx, ofs);
882 else
883 emit_lsox(as, ai, src, idx, ofs);
884}
885
886static void asm_xload(ASMState *as, IRIns *ir)
887{
888 Reg dest = ra_dest(as, ir, RSET_GPR);
889 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
890 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
891}
892
893static void asm_xstore(ASMState *as, IRIns *ir)
894{
895 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
896 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
897 rset_exclude(RSET_GPR, src));
814} 898}
815 899
816static void asm_ahuvload(ASMState *as, IRIns *ir) 900static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -1687,11 +1771,13 @@ static void asm_ir(ASMState *as, IRIns *ir)
1687 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 1771 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1688 asm_ahuvload(as, ir); 1772 asm_ahuvload(as, ir);
1689 break; 1773 break;
1690 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; 1774 case IR_FLOAD: asm_fload(as, ir); break;
1775 case IR_XLOAD: asm_xload(as, ir); break;
1691 case IR_SLOAD: asm_sload(as, ir); break; 1776 case IR_SLOAD: asm_sload(as, ir); break;
1692 1777
1693 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; 1778 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1694 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; 1779 case IR_FSTORE: asm_fstore(as, ir); break;
1780 case IR_XSTORE: asm_xstore(as, ir); break;
1695 1781
1696 /* Allocations. */ 1782 /* Allocations. */
1697 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; 1783 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index ed5e8272..8373faaa 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -526,6 +526,13 @@ again:
526 idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); 526 idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1));
527 sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); 527 sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info)));
528 idx = crec_reassoc_ofs(J, idx, &ofs, sz); 528 idx = crec_reassoc_ofs(J, idx, &ofs, sz);
529#if LJ_TARGET_ARM
530 /* Hoist base add to allow fusion of shifts into operands. */
531 if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs && (sz == 1 || sz == 4)) {
532 ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
533 ofs = 0;
534 }
535#endif
529 idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); 536 idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz));
530 ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); 537 ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr);
531 } 538 }