diff options
author | Mike Pall <mike> | 2023-11-14 22:50:21 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2023-11-14 22:50:21 +0100 |
commit | 644723649ea04cb23b72c814b88b72a29e4afed4 (patch) | |
tree | e994573ad17a726d73b48af0d623036c5f33e91b | |
parent | d854d00ce94b274359e5181bed13e977420daf5c (diff) | |
download | luajit-644723649ea04cb23b72c814b88b72a29e4afed4.tar.gz luajit-644723649ea04cb23b72c814b88b72a29e4afed4.tar.bz2 luajit-644723649ea04cb23b72c814b88b72a29e4afed4.zip |
x86/x64: Don't fuse loads across IR_NEWREF.
Reported by Peter Cawley. #1117
-rw-r--r-- | src/lj_asm_x86.h | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 6b114802..ddbe9c55 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -93,7 +93,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) | |||
93 | /* Check if there's no conflicting instruction between curins and ref. | 93 | /* Check if there's no conflicting instruction between curins and ref. |
94 | ** Also avoid fusing loads if there are multiple references. | 94 | ** Also avoid fusing loads if there are multiple references. |
95 | */ | 95 | */ |
96 | static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) | 96 | static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check) |
97 | { | 97 | { |
98 | IRIns *ir = as->ir; | 98 | IRIns *ir = as->ir; |
99 | IRRef i = as->curins; | 99 | IRRef i = as->curins; |
@@ -102,7 +102,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload) | |||
102 | while (--i > ref) { | 102 | while (--i > ref) { |
103 | if (ir[i].o == conflict) | 103 | if (ir[i].o == conflict) |
104 | return 0; /* Conflict found. */ | 104 | return 0; /* Conflict found. */ |
105 | else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref)) | 105 | else if ((check & 1) && ir[i].o == IR_NEWREF) |
106 | return 0; | ||
107 | else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) | ||
106 | return 0; | 108 | return 0; |
107 | } | 109 | } |
108 | return 1; /* Ok, no conflict. */ | 110 | return 1; /* Ok, no conflict. */ |
@@ -118,7 +120,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) | |||
118 | lua_assert(irb->op2 == IRFL_TAB_ARRAY); | 120 | lua_assert(irb->op2 == IRFL_TAB_ARRAY); |
119 | /* We can avoid the FLOAD of t->array for colocated arrays. */ | 121 | /* We can avoid the FLOAD of t->array for colocated arrays. */ |
120 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && | 122 | if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && |
121 | !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { | 123 | !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { |
122 | as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ | 124 | as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */ |
123 | return irb->op1; /* Table obj. */ | 125 | return irb->op1; /* Table obj. */ |
124 | } | 126 | } |
@@ -337,7 +339,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
337 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; | 339 | RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; |
338 | if (ir->o == IR_SLOAD) { | 340 | if (ir->o == IR_SLOAD) { |
339 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && | 341 | if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && |
340 | noconflict(as, ref, IR_RETF, 0)) { | 342 | noconflict(as, ref, IR_RETF, 2)) { |
341 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); | 343 | as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); |
342 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); | 344 | as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); |
343 | as->mrm.idx = RID_NONE; | 345 | as->mrm.idx = RID_NONE; |
@@ -346,12 +348,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
346 | } else if (ir->o == IR_FLOAD) { | 348 | } else if (ir->o == IR_FLOAD) { |
347 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ | 349 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ |
348 | if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && | 350 | if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) && |
349 | noconflict(as, ref, IR_FSTORE, 0)) { | 351 | noconflict(as, ref, IR_FSTORE, 2)) { |
350 | asm_fusefref(as, ir, xallow); | 352 | asm_fusefref(as, ir, xallow); |
351 | return RID_MRM; | 353 | return RID_MRM; |
352 | } | 354 | } |
353 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { | 355 | } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { |
354 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { | 356 | if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD))) { |
355 | asm_fuseahuref(as, ir->op1, xallow); | 357 | asm_fuseahuref(as, ir->op1, xallow); |
356 | return RID_MRM; | 358 | return RID_MRM; |
357 | } | 359 | } |
@@ -360,7 +362,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
360 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). | 362 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). |
361 | */ | 363 | */ |
362 | if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && | 364 | if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) && |
363 | noconflict(as, ref, IR_XSTORE, 0)) { | 365 | noconflict(as, ref, IR_XSTORE, 2)) { |
364 | asm_fusexref(as, ir->op1, xallow); | 366 | asm_fusexref(as, ir->op1, xallow); |
365 | return RID_MRM; | 367 | return RID_MRM; |
366 | } | 368 | } |