diff options
author | Mike Pall <mike> | 2010-12-08 03:26:53 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-12-08 03:26:53 +0100 |
commit | 3c78a7f46877e2c151ba7043b2b50d35e09704c5 (patch) | |
tree | 24a8679b122c7ea948909a51db94ed4c328ee629 /src | |
parent | 72744dabd90f23a3d9bd3070c1c5cc537da86867 (diff) | |
download | luajit-3c78a7f46877e2c151ba7043b2b50d35e09704c5.tar.gz luajit-3c78a7f46877e2c151ba7043b2b50d35e09704c5.tar.bz2 luajit-3c78a7f46877e2c151ba7043b2b50d35e09704c5.zip |
Avoid fusing loads if there are multiple references.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 85587a98..70d94c39 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1144,16 +1144,21 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
1144 | return 0; | 1144 | return 0; |
1145 | } | 1145 | } |
1146 | 1146 | ||
1147 | /* Check if there's no conflicting instruction between curins and ref. */ | 1147 | /* Check if there's no conflicting instruction between curins and ref. |
1148 | static int noconflict(ASMState *as, IRRef ref, IROp conflict) | 1148 | ** Also avoid fusing loads if there are multiple references. |
1149 | */ | ||
1150 | static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) | ||
1149 | { | 1151 | { |
1150 | IRIns *ir = as->ir; | 1152 | IRIns *ir = as->ir; |
1151 | IRRef i = as->curins; | 1153 | IRRef i = as->curins; |
1152 | if (i > ref + CONFLICT_SEARCH_LIM) | 1154 | if (i > ref + CONFLICT_SEARCH_LIM) |
1153 | return 0; /* Give up, ref is too far away. */ | 1155 | return 0; /* Give up, ref is too far away. */ |
1154 | while (--i > ref) | 1156 | while (--i > ref) { |
1155 | if (ir[i].o == conflict) | 1157 | if (ir[i].o == conflict) |
1156 | return 0; /* Conflict found. */ | 1158 | return 0; /* Conflict found. */ |
1159 | else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) | ||
1160 | return 0; | ||
1161 | } | ||
1157 | return 1; /* Ok, no conflict. */ | 1162 | return 1; /* Ok, no conflict. */ |
1158 | } | 1163 | } |
1159 | 1164 | ||
@@ -1167,7 +1172,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) | |||
1167 | lua_assert(irb->op2 == IRFL_TAB_ARRAY); | 1172 | lua_assert(irb->op2 == IRFL_TAB_ARRAY); |
1168 | /* We can avoid the FLOAD of t->array for colocated arrays. */ | 1173 | /* We can avoid the FLOAD of t->array for colocated arrays. */ |
1169 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && | 1174 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && |
1170 | noconflict(as, irb->op1, IR_NEWREF)) { | 1175 | noconflict(as, irb->op1, IR_NEWREF, 1)) { |
1171 | as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ | 1176 | as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ |
1172 | return irb->op1; /* Table obj. */ | 1177 | return irb->op1; /* Table obj. */ |
1173 | } | 1178 | } |
@@ -1377,7 +1382,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1377 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; | 1382 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; |
1378 | if (ir->o == IR_SLOAD) { | 1383 | if (ir->o == IR_SLOAD) { |
1379 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && | 1384 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && |
1380 | noconflict(as, ref, IR_RETF)) { | 1385 | noconflict(as, ref, IR_RETF, 0)) { |
1381 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | 1386 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); |
1382 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); | 1387 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); |
1383 | as->mrm.idx = RID_NONE; | 1388 | as->mrm.idx = RID_NONE; |
@@ -1386,12 +1391,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1386 | } else if (ir->o == IR_FLOAD) { | 1391 | } else if (ir->o == IR_FLOAD) { |
1387 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ | 1392 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ |
1388 | if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && | 1393 | if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && |
1389 | noconflict(as, ref, IR_FSTORE)) { | 1394 | noconflict(as, ref, IR_FSTORE, 0)) { |
1390 | asm_fusefref(as, ir, xallow); | 1395 | asm_fusefref(as, ir, xallow); |
1391 | return RID_MRM; | 1396 | return RID_MRM; |
1392 | } | 1397 | } |
1393 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { | 1398 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { |
1394 | if (noconflict(as, ref, ir->o + IRDELTA_L2S)) { | 1399 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { |
1395 | asm_fuseahuref(as, ir->op1, xallow); | 1400 | asm_fuseahuref(as, ir->op1, xallow); |
1396 | return RID_MRM; | 1401 | return RID_MRM; |
1397 | } | 1402 | } |
@@ -1400,7 +1405,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1400 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). | 1405 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). |
1401 | */ | 1406 | */ |
1402 | if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && | 1407 | if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && |
1403 | noconflict(as, ref, IR_XSTORE)) { | 1408 | noconflict(as, ref, IR_XSTORE, 0)) { |
1404 | asm_fusexref(as, ir->op1, xallow); | 1409 | asm_fusexref(as, ir->op1, xallow); |
1405 | return RID_MRM; | 1410 | return RID_MRM; |
1406 | } | 1411 | } |