diff options
author | Mike Pall <mike> | 2023-11-05 16:34:46 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2023-11-05 16:34:46 +0100 |
commit | 07b3cd3cf9b57a3801a1ebc48144767e31671f21 (patch) | |
tree | f8d8eeed931dc75a7f3853271c14e087f76731ec /src | |
parent | 0afa1676b2d2aabf1f3101a2692eb0f1e291076a (diff) | |
download | luajit-07b3cd3cf9b57a3801a1ebc48144767e31671f21.tar.gz luajit-07b3cd3cf9b57a3801a1ebc48144767e31671f21.tar.bz2 luajit-07b3cd3cf9b57a3801a1ebc48144767e31671f21.zip |
Check for upvalue state transition in IR_UREFO.
Thanks to Peter Cawley. #1085
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm_arm.h | 28 | ||||
-rw-r--r-- | src/lj_asm_arm64.h | 20 | ||||
-rw-r--r-- | src/lj_asm_mips.h | 27 | ||||
-rw-r--r-- | src/lj_asm_ppc.h | 27 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 25 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 47 | ||||
-rw-r--r-- | src/lj_opt_mem.c | 15 | ||||
-rw-r--r-- | src/lj_record.c | 13 | ||||
-rw-r--r-- | src/lj_state.c | 7 |
9 files changed, 146 insertions, 63 deletions
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index ac3d1b58..348cd79f 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
969 | static void asm_uref(ASMState *as, IRIns *ir) | 969 | static void asm_uref(ASMState *as, IRIns *ir) |
970 | { | 970 | { |
971 | Reg dest = ra_dest(as, ir, RSET_GPR); | 971 | Reg dest = ra_dest(as, ir, RSET_GPR); |
972 | if (irref_isk(ir->op1)) { | 972 | int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); |
973 | if (irref_isk(ir->op1) && !guarded) { | ||
973 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 974 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
974 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 975 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
975 | emit_lsptr(as, ARMI_LDR, dest, v); | 976 | emit_lsptr(as, ARMI_LDR, dest, v); |
976 | } else { | 977 | } else { |
977 | Reg uv = ra_scratch(as, RSET_GPR); | 978 | if (guarded) { |
978 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 979 | asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); |
979 | if (ir->o == IR_UREFC) { | ||
980 | asm_guardcc(as, CC_NE); | ||
981 | emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); | 980 | emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); |
982 | emit_opk(as, ARMI_ADD, dest, uv, | 981 | } |
982 | if (ir->o == IR_UREFC) | ||
983 | emit_opk(as, ARMI_ADD, dest, dest, | ||
983 | (int32_t)offsetof(GCupval, tv), RSET_GPR); | 984 | (int32_t)offsetof(GCupval, tv), RSET_GPR); |
984 | emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | 985 | else |
986 | emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v)); | ||
987 | if (guarded) | ||
988 | emit_lso(as, ARMI_LDRB, RID_TMP, dest, | ||
989 | (int32_t)offsetof(GCupval, closed)); | ||
990 | if (irref_isk(ir->op1)) { | ||
991 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
992 | int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); | ||
993 | emit_loadi(as, dest, k); | ||
985 | } else { | 994 | } else { |
986 | emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); | 995 | emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR), |
996 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | ||
987 | } | 997 | } |
988 | emit_lso(as, ARMI_LDR, uv, func, | ||
989 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | ||
990 | } | 998 | } |
991 | } | 999 | } |
992 | 1000 | ||
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 9f165fa8..5b40f4cc 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -931,22 +931,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
931 | static void asm_uref(ASMState *as, IRIns *ir) | 931 | static void asm_uref(ASMState *as, IRIns *ir) |
932 | { | 932 | { |
933 | Reg dest = ra_dest(as, ir, RSET_GPR); | 933 | Reg dest = ra_dest(as, ir, RSET_GPR); |
934 | if (irref_isk(ir->op1)) { | 934 | int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); |
935 | if (irref_isk(ir->op1) && !guarded) { | ||
935 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 936 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
936 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 937 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
937 | emit_lsptr(as, A64I_LDRx, dest, v); | 938 | emit_lsptr(as, A64I_LDRx, dest, v); |
938 | } else { | 939 | } else { |
939 | if (ir->o == IR_UREFC) { | 940 | if (guarded) |
940 | asm_guardcnb(as, A64I_CBZ, RID_TMP); | 941 | asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP); |
942 | if (ir->o == IR_UREFC) | ||
941 | emit_opk(as, A64I_ADDx, dest, dest, | 943 | emit_opk(as, A64I_ADDx, dest, dest, |
942 | (int32_t)offsetof(GCupval, tv), RSET_GPR); | 944 | (int32_t)offsetof(GCupval, tv), RSET_GPR); |
945 | else | ||
946 | emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); | ||
947 | if (guarded) | ||
943 | emit_lso(as, A64I_LDRB, RID_TMP, dest, | 948 | emit_lso(as, A64I_LDRB, RID_TMP, dest, |
944 | (int32_t)offsetof(GCupval, closed)); | 949 | (int32_t)offsetof(GCupval, closed)); |
950 | if (irref_isk(ir->op1)) { | ||
951 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
952 | uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); | ||
953 | emit_loadu64(as, dest, k); | ||
945 | } else { | 954 | } else { |
946 | emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); | 955 | emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), |
956 | (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); | ||
947 | } | 957 | } |
948 | emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), | ||
949 | (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); | ||
950 | } | 958 | } |
951 | } | 959 | } |
952 | 960 | ||
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index b02da663..d4e40c91 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
@@ -1207,22 +1207,29 @@ nolo: | |||
1207 | static void asm_uref(ASMState *as, IRIns *ir) | 1207 | static void asm_uref(ASMState *as, IRIns *ir) |
1208 | { | 1208 | { |
1209 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1209 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1210 | if (irref_isk(ir->op1)) { | 1210 | int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); |
1211 | if (irref_isk(ir->op1) && !guarded) { | ||
1211 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1212 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
1212 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1213 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
1213 | emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); | 1214 | emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); |
1214 | } else { | 1215 | } else { |
1215 | Reg uv = ra_scratch(as, RSET_GPR); | 1216 | if (guarded) |
1216 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1217 | asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); |
1217 | if (ir->o == IR_UREFC) { | 1218 | if (ir->o == IR_UREFC) |
1218 | asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); | 1219 | emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv)); |
1219 | emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); | 1220 | else |
1220 | emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | 1221 | emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v)); |
1222 | if (guarded) | ||
1223 | emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); | ||
1224 | if (irref_isk(ir->op1)) { | ||
1225 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
1226 | GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); | ||
1227 | emit_loada(as, dest, o); | ||
1221 | } else { | 1228 | } else { |
1222 | emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); | 1229 | emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR), |
1230 | (int32_t)offsetof(GCfuncL, uvptr) + | ||
1231 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
1223 | } | 1232 | } |
1224 | emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + | ||
1225 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
1226 | } | 1233 | } |
1227 | } | 1234 | } |
1228 | 1235 | ||
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 6555312d..8e9a92a4 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
840 | static void asm_uref(ASMState *as, IRIns *ir) | 840 | static void asm_uref(ASMState *as, IRIns *ir) |
841 | { | 841 | { |
842 | Reg dest = ra_dest(as, ir, RSET_GPR); | 842 | Reg dest = ra_dest(as, ir, RSET_GPR); |
843 | if (irref_isk(ir->op1)) { | 843 | int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); |
844 | if (irref_isk(ir->op1) && !guarded) { | ||
844 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 845 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
845 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 846 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
846 | emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); | 847 | emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); |
847 | } else { | 848 | } else { |
848 | Reg uv = ra_scratch(as, RSET_GPR); | 849 | if (guarded) { |
849 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 850 | asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ); |
850 | if (ir->o == IR_UREFC) { | ||
851 | asm_guardcc(as, CC_NE); | ||
852 | emit_ai(as, PPCI_CMPWI, RID_TMP, 1); | 851 | emit_ai(as, PPCI_CMPWI, RID_TMP, 1); |
853 | emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); | 852 | } |
854 | emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | 853 | if (ir->o == IR_UREFC) |
854 | emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); | ||
855 | else | ||
856 | emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v)); | ||
857 | if (guarded) | ||
858 | emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); | ||
859 | if (irref_isk(ir->op1)) { | ||
860 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
861 | int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]); | ||
862 | emit_loadi(as, dest, k); | ||
855 | } else { | 863 | } else { |
856 | emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); | 864 | emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR), |
865 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | ||
857 | } | 866 | } |
858 | emit_tai(as, PPCI_LWZ, uv, func, | ||
859 | (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | ||
860 | } | 867 | } |
861 | } | 868 | } |
862 | 869 | ||
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index c92de3d8..0e0b28a4 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -1373,24 +1373,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1373 | static void asm_uref(ASMState *as, IRIns *ir) | 1373 | static void asm_uref(ASMState *as, IRIns *ir) |
1374 | { | 1374 | { |
1375 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1375 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1376 | if (irref_isk(ir->op1)) { | 1376 | int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); |
1377 | if (irref_isk(ir->op1) && !guarded) { | ||
1377 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | 1378 | GCfunc *fn = ir_kfunc(IR(ir->op1)); |
1378 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | 1379 | MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; |
1379 | emit_rma(as, XO_MOV, dest|REX_GC64, v); | 1380 | emit_rma(as, XO_MOV, dest|REX_GC64, v); |
1380 | } else { | 1381 | } else { |
1381 | Reg uv = ra_scratch(as, RSET_GPR); | 1382 | Reg uv = ra_scratch(as, RSET_GPR); |
1382 | Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | 1383 | if (ir->o == IR_UREFC) |
1383 | if (ir->o == IR_UREFC) { | ||
1384 | emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); | 1384 | emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); |
1385 | asm_guardcc(as, CC_NE); | 1385 | else |
1386 | emit_i8(as, 1); | 1386 | emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); |
1387 | if (guarded) { | ||
1388 | asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE); | ||
1389 | emit_i8(as, 0); | ||
1387 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); | 1390 | emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); |
1391 | } | ||
1392 | if (irref_isk(ir->op1)) { | ||
1393 | GCfunc *fn = ir_kfunc(IR(ir->op1)); | ||
1394 | GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); | ||
1395 | emit_loada(as, uv, o); | ||
1388 | } else { | 1396 | } else { |
1389 | emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); | 1397 | emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR), |
1398 | (int32_t)offsetof(GCfuncL, uvptr) + | ||
1399 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
1390 | } | 1400 | } |
1391 | emit_rmro(as, XO_MOV, uv|REX_GC64, func, | ||
1392 | (int32_t)offsetof(GCfuncL, uvptr) + | ||
1393 | (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); | ||
1394 | } | 1401 | } |
1395 | } | 1402 | } |
1396 | 1403 | ||
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 743dfb07..ce78505b 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -2134,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload) | |||
2134 | LJFOLD(ALEN any any) | 2134 | LJFOLD(ALEN any any) |
2135 | LJFOLDX(lj_opt_fwd_alen) | 2135 | LJFOLDX(lj_opt_fwd_alen) |
2136 | 2136 | ||
2137 | /* Try to merge UREFO/UREFC into referenced instruction. */ | ||
2138 | static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir) | ||
2139 | { | ||
2140 | if (ir->o == IR_UREFO && irt_isguard(ir->t)) { | ||
2141 | /* Might be pointing to some other coroutine's stack. | ||
2142 | ** And GC might shrink said stack, thereby repointing the upvalue. | ||
2143 | ** GC might even collect said coroutine, thereby closing the upvalue. | ||
2144 | */ | ||
2145 | if (gcstep_barrier(J, ref)) | ||
2146 | return EMITFOLD; /* So cannot merge. */ | ||
2147 | /* Current fins wants a check, but ir doesn't have one. */ | ||
2148 | if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) && | ||
2149 | irt_type(ir->t) == IRT_IGC) | ||
2150 | ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */ | ||
2151 | } | ||
2152 | return ref; /* Not a TRef, but the caller doesn't care. */ | ||
2153 | } | ||
2154 | |||
2137 | /* Upvalue refs are really loads, but there are no corresponding stores. | 2155 | /* Upvalue refs are really loads, but there are no corresponding stores. |
2138 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 2156 | ** So CSE is ok for them, except for guarded UREFO across a GC step. |
2139 | ** If the referenced function is const, its upvalue addresses are const, too. | 2157 | ** If the referenced function is const, its upvalue addresses are const, too. |
2140 | ** This can be used to improve CSE by looking for the same address, | 2158 | ** This can be used to improve CSE by looking for the same address, |
2141 | ** even if the upvalues originate from a different function. | 2159 | ** even if the upvalues originate from a different function. |
@@ -2153,9 +2171,7 @@ LJFOLDF(cse_uref) | |||
2153 | if (irref_isk(ir->op1)) { | 2171 | if (irref_isk(ir->op1)) { |
2154 | GCfunc *fn2 = ir_kfunc(IR(ir->op1)); | 2172 | GCfunc *fn2 = ir_kfunc(IR(ir->op1)); |
2155 | if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) { | 2173 | if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) { |
2156 | if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) | 2174 | return merge_uref(J, ref, ir); |
2157 | break; | ||
2158 | return ref; | ||
2159 | } | 2175 | } |
2160 | } | 2176 | } |
2161 | ref = ir->prev; | 2177 | ref = ir->prev; |
@@ -2164,6 +2180,24 @@ LJFOLDF(cse_uref) | |||
2164 | return EMITFOLD; | 2180 | return EMITFOLD; |
2165 | } | 2181 | } |
2166 | 2182 | ||
2183 | /* Custom CSE for UREFO. */ | ||
2184 | LJFOLD(UREFO any any) | ||
2185 | LJFOLDF(cse_urefo) | ||
2186 | { | ||
2187 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
2188 | IRRef ref = J->chain[IR_UREFO]; | ||
2189 | IRRef lim = fins->op1; | ||
2190 | IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); | ||
2191 | while (ref > lim) { | ||
2192 | IRIns *ir = IR(ref); | ||
2193 | if (ir->op12 == op12) | ||
2194 | return merge_uref(J, ref, ir); | ||
2195 | ref = ir->prev; | ||
2196 | } | ||
2197 | } | ||
2198 | return EMITFOLD; | ||
2199 | } | ||
2200 | |||
2167 | LJFOLD(HREFK any any) | 2201 | LJFOLD(HREFK any any) |
2168 | LJFOLDX(lj_opt_fwd_hrefk) | 2202 | LJFOLDX(lj_opt_fwd_hrefk) |
2169 | 2203 | ||
@@ -2384,14 +2418,9 @@ LJFOLDF(fold_base) | |||
2384 | 2418 | ||
2385 | /* Write barriers are amenable to CSE, but not across any incremental | 2419 | /* Write barriers are amenable to CSE, but not across any incremental |
2386 | ** GC steps. | 2420 | ** GC steps. |
2387 | ** | ||
2388 | ** The same logic applies to open upvalue references, because a stack | ||
2389 | ** may be resized during a GC step (not the current stack, but maybe that | ||
2390 | ** of a coroutine). | ||
2391 | */ | 2421 | */ |
2392 | LJFOLD(TBAR any) | 2422 | LJFOLD(TBAR any) |
2393 | LJFOLD(OBAR any any) | 2423 | LJFOLD(OBAR any any) |
2394 | LJFOLD(UREFO any any) | ||
2395 | LJFOLDF(barrier_tab) | 2424 | LJFOLDF(barrier_tab) |
2396 | { | 2425 | { |
2397 | TRef tr = lj_opt_cse(J); | 2426 | TRef tr = lj_opt_cse(J); |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 351d958c..631ac9e4 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -464,18 +464,23 @@ doemit: | |||
464 | */ | 464 | */ |
465 | static AliasRet aa_uref(IRIns *refa, IRIns *refb) | 465 | static AliasRet aa_uref(IRIns *refa, IRIns *refb) |
466 | { | 466 | { |
467 | if (refa->o != refb->o) | ||
468 | return ALIAS_NO; /* Different UREFx type. */ | ||
469 | if (refa->op1 == refb->op1) { /* Same function. */ | 467 | if (refa->op1 == refb->op1) { /* Same function. */ |
470 | if (refa->op2 == refb->op2) | 468 | if (refa->op2 == refb->op2) |
471 | return ALIAS_MUST; /* Same function, same upvalue idx. */ | 469 | return ALIAS_MUST; /* Same function, same upvalue idx. */ |
472 | else | 470 | else |
473 | return ALIAS_NO; /* Same function, different upvalue idx. */ | 471 | return ALIAS_NO; /* Same function, different upvalue idx. */ |
474 | } else { /* Different functions, check disambiguation hash values. */ | 472 | } else { /* Different functions, check disambiguation hash values. */ |
475 | if (((refa->op2 ^ refb->op2) & 0xff)) | 473 | if (((refa->op2 ^ refb->op2) & 0xff)) { |
476 | return ALIAS_NO; /* Upvalues with different hash values cannot alias. */ | 474 | return ALIAS_NO; /* Upvalues with different hash values cannot alias. */ |
477 | else | 475 | } else if (refa->o != refb->o) { |
478 | return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */ | 476 | /* Different UREFx type, but need to confirm the UREFO really is open. */ |
477 | if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC; | ||
478 | else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC; | ||
479 | return ALIAS_NO; | ||
480 | } else { | ||
481 | /* No conclusion can be drawn for same hash value and same UREFx type. */ | ||
482 | return ALIAS_MAY; | ||
483 | } | ||
479 | } | 484 | } |
480 | } | 485 | } |
481 | 486 | ||
diff --git a/src/lj_record.c b/src/lj_record.c index d44f7737..1dd310d4 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -1772,12 +1772,12 @@ noconstify: | |||
1772 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ | 1772 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ |
1773 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); | 1773 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); |
1774 | if (!uvp->closed) { | 1774 | if (!uvp->closed) { |
1775 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); | ||
1776 | /* In current stack? */ | 1775 | /* In current stack? */ |
1777 | if (uvval(uvp) >= tvref(J->L->stack) && | 1776 | if (uvval(uvp) >= tvref(J->L->stack) && |
1778 | uvval(uvp) < tvref(J->L->maxstack)) { | 1777 | uvval(uvp) < tvref(J->L->maxstack)) { |
1779 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); | 1778 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); |
1780 | if (slot >= 0) { /* Aliases an SSA slot? */ | 1779 | if (slot >= 0) { /* Aliases an SSA slot? */ |
1780 | uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv)); | ||
1781 | emitir(IRTG(IR_EQ, IRT_PGC), | 1781 | emitir(IRTG(IR_EQ, IRT_PGC), |
1782 | REF_BASE, | 1782 | REF_BASE, |
1783 | emitir(IRT(IR_ADD, IRT_PGC), uref, | 1783 | emitir(IRT(IR_ADD, IRT_PGC), uref, |
@@ -1792,12 +1792,21 @@ noconstify: | |||
1792 | } | 1792 | } |
1793 | } | 1793 | } |
1794 | } | 1794 | } |
1795 | /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime. | ||
1796 | ** Always marked as a guard, since it might get promoted to IRT_PGC later. | ||
1797 | */ | ||
1798 | uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv); | ||
1799 | uref = tref_ref(uref); | ||
1795 | emitir(IRTG(IR_UGT, IRT_PGC), | 1800 | emitir(IRTG(IR_UGT, IRT_PGC), |
1796 | emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), | 1801 | emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), |
1797 | lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8)); | 1802 | lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8)); |
1798 | } else { | 1803 | } else { |
1804 | /* If fn is constant, then so is the GCupval*, and the upvalue cannot | ||
1805 | ** transition back to open, so no guard is required in this case. | ||
1806 | */ | ||
1807 | IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC; | ||
1808 | uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv)); | ||
1799 | needbarrier = 1; | 1809 | needbarrier = 1; |
1800 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); | ||
1801 | } | 1810 | } |
1802 | if (val == 0) { /* Upvalue load */ | 1811 | if (val == 0) { /* Upvalue load */ |
1803 | IRType t = itype2irt(uvval(uvp)); | 1812 | IRType t = itype2irt(uvval(uvp)); |
diff --git a/src/lj_state.c b/src/lj_state.c index 6efe189d..7e4961bd 100644 --- a/src/lj_state.c +++ b/src/lj_state.c | |||
@@ -346,8 +346,11 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) | |||
346 | lj_assertG(L != mainthread(g), "free of main thread"); | 346 | lj_assertG(L != mainthread(g), "free of main thread"); |
347 | if (obj2gco(L) == gcref(g->cur_L)) | 347 | if (obj2gco(L) == gcref(g->cur_L)) |
348 | setgcrefnull(g->cur_L); | 348 | setgcrefnull(g->cur_L); |
349 | lj_func_closeuv(L, tvref(L->stack)); | 349 | if (gcref(L->openupval) != NULL) { |
350 | lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); | 350 | lj_func_closeuv(L, tvref(L->stack)); |
351 | lj_trace_abort(g); /* For aa_uref soundness. */ | ||
352 | lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); | ||
353 | } | ||
351 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); | 354 | lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); |
352 | lj_mem_freet(g, L); | 355 | lj_mem_freet(g, L); |
353 | } | 356 | } |