aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2023-11-14 22:50:21 +0100
committerMike Pall <mike>2023-11-14 22:50:21 +0100
commit644723649ea04cb23b72c814b88b72a29e4afed4 (patch)
treee994573ad17a726d73b48af0d623036c5f33e91b
parentd854d00ce94b274359e5181bed13e977420daf5c (diff)
downloadluajit-644723649ea04cb23b72c814b88b72a29e4afed4.tar.gz
luajit-644723649ea04cb23b72c814b88b72a29e4afed4.tar.bz2
luajit-644723649ea04cb23b72c814b88b72a29e4afed4.zip
x86/x64: Don't fuse loads across IR_NEWREF.
Reported by Peter Cawley. #1117
-rw-r--r--src/lj_asm_x86.h16
1 files changed, 9 insertions, 7 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 6b114802..ddbe9c55 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -93,7 +93,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
93/* Check if there's no conflicting instruction between curins and ref. 93/* Check if there's no conflicting instruction between curins and ref.
94** Also avoid fusing loads if there are multiple references. 94** Also avoid fusing loads if there are multiple references.
95*/ 95*/
96static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) 96static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
97{ 97{
98 IRIns *ir = as->ir; 98 IRIns *ir = as->ir;
99 IRRef i = as->curins; 99 IRRef i = as->curins;
@@ -102,7 +102,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
102 while (--i > ref) { 102 while (--i > ref) {
103 if (ir[i].o == conflict) 103 if (ir[i].o == conflict)
104 return 0; /* Conflict found. */ 104 return 0; /* Conflict found. */
105 else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) 105 else if ((check & 1) && ir[i].o == IR_NEWREF)
106 return 0;
107 else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
106 return 0; 108 return 0;
107 } 109 }
108 return 1; /* Ok, no conflict. */ 110 return 1; /* Ok, no conflict. */
@@ -118,7 +120,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
118 lua_assert(irb->op2 == IRFL_TAB_ARRAY); 120 lua_assert(irb->op2 == IRFL_TAB_ARRAY);
119 /* We can avoid the FLOAD of t->array for colocated arrays. */ 121 /* We can avoid the FLOAD of t->array for colocated arrays. */
120 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && 122 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
121 !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { 123 !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
122 as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ 124 as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
123 return irb->op1; /* Table obj. */ 125 return irb->op1; /* Table obj. */
124 } 126 }
@@ -337,7 +339,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
337 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 339 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
338 if (ir->o == IR_SLOAD) { 340 if (ir->o == IR_SLOAD) {
339 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && 341 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
340 noconflict(as, ref, IR_RETF, 0)) { 342 noconflict(as, ref, IR_RETF, 2)) {
341 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 343 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
342 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); 344 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
343 as->mrm.idx = RID_NONE; 345 as->mrm.idx = RID_NONE;
@@ -346,12 +348,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
346 } else if (ir->o == IR_FLOAD) { 348 } else if (ir->o == IR_FLOAD) {
347 /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ 349 /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
348 if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && 350 if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
349 noconflict(as, ref, IR_FSTORE, 0)) { 351 noconflict(as, ref, IR_FSTORE, 2)) {
350 asm_fusefref(as, ir, xallow); 352 asm_fusefref(as, ir, xallow);
351 return RID_MRM; 353 return RID_MRM;
352 } 354 }
353 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { 355 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
354 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { 356 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD))) {
355 asm_fuseahuref(as, ir->op1, xallow); 357 asm_fuseahuref(as, ir->op1, xallow);
356 return RID_MRM; 358 return RID_MRM;
357 } 359 }
@@ -360,7 +362,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
360 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). 362 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
361 */ 363 */
362 if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && 364 if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
363 noconflict(as, ref, IR_XSTORE, 0)) { 365 noconflict(as, ref, IR_XSTORE, 2)) {
364 asm_fusexref(as, ir->op1, xallow); 366 asm_fusexref(as, ir->op1, xallow);
365 return RID_MRM; 367 return RID_MRM;
366 } 368 }