aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib_jit.c15
-rw-r--r--src/lj_arch.h3
-rw-r--r--src/lj_asm.c6
-rw-r--r--src/lj_asm_arm.h76
-rw-r--r--src/lj_asm_arm64.h56
-rw-r--r--src/lj_asm_mips.h60
-rw-r--r--src/lj_asm_ppc.h68
-rw-r--r--src/lj_asm_x86.h88
-rw-r--r--src/lj_emit_arm.h11
-rw-r--r--src/lj_emit_mips.h3
-rw-r--r--src/lj_emit_ppc.h3
-rw-r--r--src/lj_emit_x86.h13
-rw-r--r--src/lj_jit.h23
-rw-r--r--src/lj_mcode.c255
-rw-r--r--src/lj_target_arm.h2
-rw-r--r--src/lj_target_arm64.h1
-rw-r--r--src/lj_target_ppc.h1
-rw-r--r--src/lj_trace.c17
18 files changed, 475 insertions, 226 deletions
diff --git a/src/lib_jit.c b/src/lib_jit.c
index fd8e585b..1b74d957 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str)
479 size_t len = *(const uint8_t *)lst; 479 size_t len = *(const uint8_t *)lst;
480 lj_assertJ(len != 0, "bad JIT_P_STRING"); 480 lj_assertJ(len != 0, "bad JIT_P_STRING");
481 if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { 481 if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
482 int32_t n = 0; 482 uint32_t n = 0;
483 const char *p = &str[len+1]; 483 const char *p = &str[len+1];
484 while (*p >= '0' && *p <= '9') 484 while (*p >= '0' && *p <= '9')
485 n = n*10 + (*p++ - '0'); 485 n = n*10 + (*p++ - '0');
486 if (*p) return 0; /* Malformed number. */ 486 if (*p || (int32_t)n < 0) return 0; /* Malformed number. */
487 J->param[i] = n; 487 if (i == JIT_P_sizemcode) { /* Adjust to required range here. */
488#if LJ_TARGET_JUMPRANGE
489 uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64);
490#else
491 uint32_t maxkb = ((1 << (31 - 10)) - 64);
492#endif
493 n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1);
494 if (n > maxkb) n = maxkb;
495 }
496 J->param[i] = (int32_t)n;
488 if (i == JIT_P_hotloop) 497 if (i == JIT_P_hotloop)
489 lj_dispatch_init_hotcount(J2G(J)); 498 lj_dispatch_init_hotcount(J2G(J));
490 return 1; /* Ok. */ 499 return 1; /* Ok. */
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 6d1a9271..799f9c6c 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -301,6 +301,7 @@
301#define LJ_TARGET_MASKROT 1 301#define LJ_TARGET_MASKROT 1
302#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 302#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
303#define LJ_TARGET_GC64 1 303#define LJ_TARGET_GC64 1
304#define LJ_PAGESIZE 16384
304#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL 305#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
305 306
306#define LJ_ARCH_VERSION 80 307#define LJ_ARCH_VERSION 80
@@ -456,7 +457,7 @@
456#define LJ_TARGET_MIPS 1 457#define LJ_TARGET_MIPS 1
457#define LJ_TARGET_EHRETREG 4 458#define LJ_TARGET_EHRETREG 4
458#define LJ_TARGET_EHRAREG 31 459#define LJ_TARGET_EHRAREG 31
459#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ 460#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */
460#define LJ_TARGET_MASKSHIFT 1 461#define LJ_TARGET_MASKSHIFT 1
461#define LJ_TARGET_MASKROT 1 462#define LJ_TARGET_MASKROT 1
462#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 463#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 8f558a03..0e888c29 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -93,6 +93,10 @@ typedef struct ASMState {
93 MCode *invmcp; /* Points to invertible loop branch (or NULL). */ 93 MCode *invmcp; /* Points to invertible loop branch (or NULL). */
94 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ 94 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
95 MCode *realign; /* Realign loop if not NULL. */ 95 MCode *realign; /* Realign loop if not NULL. */
96 MCode *mctail; /* Tail of trace before stack adjust + jmp. */
97#if LJ_TARGET_PPC || LJ_TARGET_ARM64
98 MCode *mcexit; /* Pointer to exit stubs. */
99#endif
96 100
97#ifdef LUAJIT_RANDOM_RA 101#ifdef LUAJIT_RANDOM_RA
98 /* Randomize register allocation. OK for fuzz testing, not for production. */ 102 /* Randomize register allocation. OK for fuzz testing, not for production. */
@@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
2541 RA_DBGX((as, "===== STOP =====")); 2545 RA_DBGX((as, "===== STOP ====="));
2542 2546
2543 /* General trace setup. Emit tail of trace. */ 2547 /* General trace setup. Emit tail of trace. */
2544 asm_tail_prep(as); 2548 asm_tail_prep(as, T->link);
2545 as->mcloop = NULL; 2549 as->mcloop = NULL;
2546 as->flagmcp = NULL; 2550 as->flagmcp = NULL;
2547 as->topslot = 0; 2551 as->topslot = 0;
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 24deaeae..406360d2 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
79/* Generate an exit stub group at the bottom of the reserved MCode memory. */ 79/* Generate an exit stub group at the bottom of the reserved MCode memory. */
80static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) 80static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
81{ 81{
82 ExitNo i;
83 int ind = 0;
84 MCode *target = (MCode *)(void *)lj_vm_exit_handler;
82 MCode *mxp = as->mcbot; 85 MCode *mxp = as->mcbot;
83 int i; 86 if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop)
84 if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
85 asm_mclimit(as); 87 asm_mclimit(as);
86 /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ 88 if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) {
87 *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); 89 /* str lr, [sp]; bl ->vm_exit_handler;
88 *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); 90 ** .long DISPATCH_address, group.
89 mxp++; 91 */
92 *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
93 *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++;
94 } else if ((as->flags & JIT_F_ARMV6T2)) {
95 /*
96 ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr;
97 ** .long DISPATCH_address, group;
98 */
99 *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
100 *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
101 *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
102 *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
103 ind = 2;
104 } else {
105 /* .long vm_exit_handler;
106 ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr;
107 ** .long DISPATCH_address, group;
108 */
109 *mxp++ = (MCode)target;
110 *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
111 *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16;
112 *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
113 ind = 1;
114 }
90 *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ 115 *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */
91 *mxp++ = group*EXITSTUBS_PER_GROUP; 116 *mxp++ = group*EXITSTUBS_PER_GROUP;
92 for (i = 0; i < EXITSTUBS_PER_GROUP; i++) 117 for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
93 *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); 118 *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu);
94 lj_mcode_sync(as->mcbot, mxp); 119 lj_mcode_sync(as->mcbot, mxp);
95 lj_mcode_commitbot(as->J, mxp); 120 lj_mcode_commitbot(as->J, mxp);
96 as->mcbot = mxp; 121 as->mcbot = mxp;
@@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
2210/* Fixup the tail code. */ 2235/* Fixup the tail code. */
2211static void asm_tail_fixup(ASMState *as, TraceNo lnk) 2236static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2212{ 2237{
2213 MCode *p = as->mctop; 2238 MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
2214 MCode *target; 2239 MCode *mcp = as->mctail;
2215 int32_t spadj = as->T->spadjust; 2240 int32_t spadj = as->T->spadjust;
2216 if (spadj == 0) { 2241 if (spadj) { /* Emit stack adjustment. */
2217 as->mctop = --p;
2218 } else {
2219 /* Patch stack adjustment. */
2220 uint32_t k = emit_isk12(ARMI_ADD, spadj); 2242 uint32_t k = emit_isk12(ARMI_ADD, spadj);
2221 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); 2243 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
2222 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); 2244 *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
2245 }
2246 if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) {
2247 *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++;
2248 } else if ((as->flags & JIT_F_ARMV6T2)) {
2249 *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
2250 *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
2251 *mcp++ = ARMI_BX | ARMF_M(RID_LR);
2252 } else {
2253 *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0;
2254 *mcp++ = ARMI_BX | ARMF_M(RID_LR);
2255 *mcp++ = (MCode)target;
2223 } 2256 }
2224 /* Patch exit branch. */ 2257 while (as->mctop > mcp) *--as->mctop = ARMI_NOP;
2225 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
2226 p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu);
2227} 2258}
2228 2259
2229/* Prepare tail of code. */ 2260/* Prepare tail of code. */
2230static void asm_tail_prep(ASMState *as) 2261static void asm_tail_prep(ASMState *as, TraceNo lnk)
2231{ 2262{
2232 MCode *p = as->mctop - 1; /* Leave room for exit branch. */ 2263 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
2233 if (as->loopref) { 2264 if (as->loopref) {
2234 as->invmcp = as->mcp = p; 2265 as->invmcp = as->mcp = p;
2235 } else { 2266 } else {
2236 as->mcp = p-1; /* Leave room for stack pointer adjustment. */ 2267 if (!lnk) {
2268 MCode *target = (MCode *)(void *)lj_vm_exit_interp;
2269 if ((((target - p - 2) + 0x00800000u) >> 24) ||
2270 (((target - p - 1) + 0x00800000u) >> 24)) p -= 2;
2271 }
2272 p--; /* Leave room for stack pointer adjustment. */
2273 as->mcp = p;
2237 as->invmcp = NULL; 2274 as->invmcp = NULL;
2238 } 2275 }
2239 *p = 0; /* Prevent load/store merging. */ 2276 *p = 0; /* Prevent load/store merging. */
2277 as->mctail = p;
2240} 2278}
2241 2279
2242/* -- Trace setup --------------------------------------------------------- */ 2280/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 4feaa3b0..085f9357 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
51static void asm_exitstub_setup(ASMState *as, ExitNo nexits) 51static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
52{ 52{
53 ExitNo i; 53 ExitNo i;
54 int ind;
55 MCode *target = (MCode *)(void *)lj_vm_exit_handler;
54 MCode *mxp = as->mctop; 56 MCode *mxp = as->mctop;
55 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) 57 if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
56 asm_mclimit(as); 58 asm_mclimit(as);
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ 59 ind = !A64F_S_OK(target - (mxp - nexits - 2), 26);
60 /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno;
61 ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno;
62 ** bl <1; bl <1; ...
63 */
58 for (i = nexits-1; (int32_t)i >= 0; i--) 64 for (i = nexits-1; (int32_t)i >= 0; i--)
59 *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); 65 *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i));
66 as->mcexit = mxp;
60 *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); 67 *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
61 mxp--; 68 if (ind) {
62 *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); 69 *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR));
70 *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3));
71 } else {
72 mxp--;
73 *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp));
74 }
63 *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); 75 *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
64 as->mctop = mxp; 76 as->mctop = mxp;
65} 77}
@@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
67static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) 79static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
68{ 80{
69 /* Keep this in-sync with exitstub_trace_addr(). */ 81 /* Keep this in-sync with exitstub_trace_addr(). */
70 return as->mctop + exitno + 3; 82 return as->mcexit + exitno;
71} 83}
72 84
73/* Emit conditional branch to exit for guard. */ 85/* Emit conditional branch to exit for guard. */
@@ -1917,34 +1929,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
1917/* Fixup the tail code. */ 1929/* Fixup the tail code. */
1918static void asm_tail_fixup(ASMState *as, TraceNo lnk) 1930static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1919{ 1931{
1920 MCode *p = as->mctop; 1932 MCode *mcp = as->mctail;
1921 MCode *target; 1933 MCode *target;
1922 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ 1934 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1923 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); 1935 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1924 if (spadj == 0) { 1936 if (spadj) { /* Emit stack adjustment. */
1925 *--p = A64I_LE(A64I_NOP);
1926 as->mctop = p;
1927 } else {
1928 /* Patch stack adjustment. */
1929 uint32_t k = emit_isk12(spadj); 1937 uint32_t k = emit_isk12(spadj);
1930 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); 1938 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
1931 p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); 1939 *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
1932 } 1940 }
1933 /* Patch exit branch. */ 1941 /* Emit exit branch. */
1934 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 1942 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
1935 p[-1] = A64I_B | A64F_S26((target-p)+1); 1943 if (lnk || A64F_S_OK(target - mcp, 26)) {
1944 *mcp = A64I_B | A64F_S26(target - mcp); mcp++;
1945 } else {
1946 *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3);
1947 *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR);
1948 }
1949 while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP);
1936} 1950}
1937 1951
1938/* Prepare tail of code. */ 1952/* Prepare tail of code. */
1939static void asm_tail_prep(ASMState *as) 1953static void asm_tail_prep(ASMState *as, TraceNo lnk)
1940{ 1954{
1941 MCode *p = as->mctop - 1; /* Leave room for exit branch. */ 1955 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
1942 if (as->loopref) { 1956 if (as->loopref) {
1943 as->invmcp = as->mcp = p; 1957 as->invmcp = as->mcp = p;
1944 } else { 1958 } else {
1945 as->mcp = p-1; /* Leave room for stack pointer adjustment. */ 1959 if (!lnk) {
1960 MCode *target = (MCode *)(void *)lj_vm_exit_interp;
1961 if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--;
1962 }
1963 p--; /* Leave room for stack pointer adjustment. */
1964 as->mcp = p;
1946 as->invmcp = NULL; 1965 as->invmcp = NULL;
1947 } 1966 }
1967 as->mctail = p;
1948 *p = 0; /* Prevent load/store merging. */ 1968 *p = 0; /* Prevent load/store merging. */
1949} 1969}
1950 1970
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index af0e714f..8dadabe4 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
92/* Setup exit stub after the end of each trace. */ 92/* Setup exit stub after the end of each trace. */
93static void asm_exitstub_setup(ASMState *as) 93static void asm_exitstub_setup(ASMState *as)
94{ 94{
95 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
95 MCode *mxp = as->mctop; 96 MCode *mxp = as->mctop;
96 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ 97 *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno;
97 *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; 98 if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) {
98 *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); 99 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
99 lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, 100 *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu);
100 "branch target out of range"); 101 } else {
101 *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; 102 /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */
103 *--mxp = MIPSI_JR | MIPSF_S(RID_TMP);
104 *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
105#if LJ_64
106 jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]);
107#else
108 jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
109#endif
110 }
111 *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0;
102 as->mctop = mxp; 112 as->mctop = mxp;
103} 113}
104 114
@@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
428{ 438{
429 /* The modified regs must match with the *.dasc implementation. */ 439 /* The modified regs must match with the *.dasc implementation. */
430 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| 440 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
431 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) 441 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)|
442 RID2RSET(RID_CFUNCADDR)
432#if LJ_TARGET_MIPSR6 443#if LJ_TARGET_MIPSR6
433 |RID2RSET(RID_F21) 444 |RID2RSET(RID_F21)
434#endif 445#endif
@@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
514{ 525{
515 /* The modified regs must match with the *.dasc implementation. */ 526 /* The modified regs must match with the *.dasc implementation. */
516 RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| 527 RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
517 RID2RSET(RID_R1)|RID2RSET(RID_R12); 528 RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR);
518 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 529 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
519 ra_evictset(as, drop); 530 ra_evictset(as, drop);
520 /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ 531 /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
@@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
2699/* Fixup the tail code. */ 2710/* Fixup the tail code. */
2700static void asm_tail_fixup(ASMState *as, TraceNo lnk) 2711static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2701{ 2712{
2702 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; 2713 uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
2714 MCode *mcp = as->mctail;
2703 int32_t spadj = as->T->spadjust; 2715 int32_t spadj = as->T->spadjust;
2704 MCode *p = as->mctop-1; 2716 if (((uintptr_t)mcp ^ target) >> 28 == 0) {
2705 *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; 2717 *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu);
2706 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2718 } else {
2719 *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
2720#if LJ_64
2721 jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]);
2722#else
2723 jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
2724#endif
2725 *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP);
2726 }
2727 *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
2707} 2728}
2708 2729
2709/* Prepare tail of code. */ 2730/* Prepare tail of code. */
2710static void asm_tail_prep(ASMState *as) 2731static void asm_tail_prep(ASMState *as, TraceNo lnk)
2711{ 2732{
2712 as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ 2733 as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */
2713 as->invmcp = as->loopref ? as->mcp : NULL; 2734 if (as->loopref) {
2735 as->invmcp = as->mcp;
2736 } else {
2737 if (!lnk) {
2738 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
2739 if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--;
2740 }
2741 as->invmcp = NULL;
2742 }
2743 as->mctail = as->mcp;
2714} 2744}
2715 2745
2716/* -- Trace setup --------------------------------------------------------- */ 2746/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index df1ac42f..d77c45ce 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
48static void asm_exitstub_setup(ASMState *as, ExitNo nexits) 48static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
49{ 49{
50 ExitNo i; 50 ExitNo i;
51 int ind;
52 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
51 MCode *mxp = as->mctop; 53 MCode *mxp = as->mctop;
52 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) 54 if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
53 asm_mclimit(as); 55 asm_mclimit(as);
54 /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ 56 ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0;
57 /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno;
58 ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno;
59 ** bl <1; bl <1; ...
60 */
55 for (i = nexits-1; (int32_t)i >= 0; i--) 61 for (i = nexits-1; (int32_t)i >= 0; i--)
56 *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); 62 *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2);
63 as->mcexit = mxp;
57 *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ 64 *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */
58 mxp--; 65 if (ind) {
59 *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); 66 *--mxp = PPCI_BCTRL;
60 *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); 67 *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
68 *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP);
69 *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
70 jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
71 } else {
72 mxp--;
73 *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu);
74 *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
75 }
61 as->mctop = mxp; 76 as->mctop = mxp;
62} 77}
63 78
64static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) 79static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
65{ 80{
66 /* Keep this in-sync with exitstub_trace_addr(). */ 81 /* Keep this in-sync with exitstub_trace_addr(). */
67 return as->mctop + exitno + 3; 82 return as->mcexit + exitno;
68} 83}
69 84
70/* Emit conditional branch to exit for guard. */ 85/* Emit conditional branch to exit for guard. */
@@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
2218/* Fixup the tail code. */ 2233/* Fixup the tail code. */
2219static void asm_tail_fixup(ASMState *as, TraceNo lnk) 2234static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2220{ 2235{
2221 MCode *p = as->mctop; 2236 uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
2222 MCode *target; 2237 MCode *mcp = as->mctail;
2223 int32_t spadj = as->T->spadjust; 2238 int32_t spadj = as->T->spadjust;
2224 if (spadj == 0) { 2239 if (spadj) { /* Emit stack adjustment. */
2225 *--p = PPCI_NOP;
2226 *--p = PPCI_NOP;
2227 as->mctop = p;
2228 } else {
2229 /* Patch stack adjustment. */
2230 lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); 2240 lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
2231 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); 2241 *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
2232 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; 2242 *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
2233 } 2243 }
2234 /* Patch exit branch. */ 2244 /* Emit exit branch. */
2235 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 2245 if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) {
2236 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); 2246 *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++;
2247 } else {
2248 *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
2249 jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
2250 *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP);
2251 *mcp++ = PPCI_BCTR;
2252 }
2253 while (as->mctop > mcp) *--as->mctop = PPCI_NOP;
2237} 2254}
2238 2255
2239/* Prepare tail of code. */ 2256/* Prepare tail of code. */
2240static void asm_tail_prep(ASMState *as) 2257static void asm_tail_prep(ASMState *as, TraceNo lnk)
2241{ 2258{
2242 MCode *p = as->mctop - 1; /* Leave room for exit branch. */ 2259 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
2243 if (as->loopref) { 2260 if (as->loopref) {
2244 as->invmcp = as->mcp = p; 2261 as->invmcp = as->mcp = p;
2245 } else { 2262 } else {
2246 as->mcp = p-2; /* Leave room for stack pointer adjustment. */ 2263 if (!lnk) {
2264 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
2265 if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) ||
2266 (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2;
2267 }
2268 p -= 2; /* Leave room for stack pointer adjustment. */
2269 as->mcp = p;
2247 as->invmcp = NULL; 2270 as->invmcp = NULL;
2248 } 2271 }
2272 as->mctail = p;
2249} 2273}
2250 2274
2251/* -- Trace setup --------------------------------------------------------- */ 2275/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 774e77b4..f3c2238a 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -9,9 +9,12 @@
9static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) 9static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
10{ 10{
11 ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; 11 ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
12 MCode *target = (MCode *)(void *)lj_vm_exit_handler;
12 MCode *mxp = as->mcbot; 13 MCode *mxp = as->mcbot;
13 MCode *mxpstart = mxp; 14 MCode *mxpstart = mxp;
14 if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) 15 if (mxp + ((2+2)*EXITSTUBS_PER_GROUP +
16 (LJ_GC64 ? 0 : 8) +
17 (LJ_64 ? 6 : 5)) >= as->mctop)
15 asm_mclimit(as); 18 asm_mclimit(as);
16 /* Push low byte of exitno for each exit stub. */ 19 /* Push low byte of exitno for each exit stub. */
17 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; 20 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
@@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
30 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; 33 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31#endif 34#endif
32 /* Jump to exit handler which fills in the ExitState. */ 35 /* Jump to exit handler which fills in the ExitState. */
33 *mxp++ = XI_JMP; mxp += 4; 36 if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */
34 *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); 37 *mxp++ = XI_JMP; mxp += 4;
38 *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target);
39 } else { /* RIP-relative indirect jump. */
40 *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4;
41 *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp);
42 }
35 /* Commit the code for this group (even if assembly fails later on). */ 43 /* Commit the code for this group (even if assembly fails later on). */
36 lj_mcode_commitbot(as->J, mxp); 44 lj_mcode_commitbot(as->J, mxp);
37 as->mcbot = mxp; 45 as->mcbot = mxp;
@@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
45 ExitNo i; 53 ExitNo i;
46 if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) 54 if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
47 lj_trace_err(as->J, LJ_TRERR_SNAPOV); 55 lj_trace_err(as->J, LJ_TRERR_SNAPOV);
56#if LJ_64
57 if (as->J->exitstubgroup[0] == NULL) {
58 /* Store the two potentially out-of-range targets below group 0. */
59 MCode *mxp = as->mcbot;
60 while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3;
61 *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8;
62 *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8;
63 as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */
64 }
65#endif
48 for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) 66 for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
49 if (as->J->exitstubgroup[i] == NULL) 67 if (as->J->exitstubgroup[i] == NULL)
50 as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); 68 as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
@@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
396 "bad interned 64 bit constant"); 414 "bad interned 64 bit constant");
397 } else { 415 } else {
398 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; 416 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
399 *(uint64_t*)as->mcbot = *k; 417 *(uint64_t *)as->mcbot = *k;
400 ir->i = (int32_t)(as->mctop - as->mcbot); 418 ir->i = (int32_t)(as->mctop - as->mcbot);
401 as->mcbot += 8; 419 as->mcbot += 8;
402 as->mclim = as->mcbot + MCLIM_REDZONE; 420 as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
728 p = (MCode *)(void *)ir_k64(irf)->u64; 746 p = (MCode *)(void *)ir_k64(irf)->u64;
729 else 747 else
730 p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i; 748 p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
731 if (p - as->mcp == (int32_t)(p - as->mcp)) 749 if (jmprel_ok(p, as->mcp))
732 return p; /* Call target is still in +-2GB range. */ 750 return p; /* Call target is still in +-2GB range. */
733 /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ 751 /* Avoid the indirect case of emit_call(). Try to hoist func addr. */
734 } 752 }
@@ -2806,6 +2824,8 @@ static void asm_gc_check(ASMState *as)
2806 emit_rr(as, XO_TEST, RID_RET, RID_RET); 2824 emit_rr(as, XO_TEST, RID_RET, RID_RET);
2807 args[0] = ASMREF_TMP1; /* global_State *g */ 2825 args[0] = ASMREF_TMP1; /* global_State *g */
2808 args[1] = ASMREF_TMP2; /* MSize steps */ 2826 args[1] = ASMREF_TMP2; /* MSize steps */
2827 /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */
2828 if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP;
2809 asm_gencall(as, ci, args); 2829 asm_gencall(as, ci, args);
2810 tmp = ra_releasetmp(as, ASMREF_TMP1); 2830 tmp = ra_releasetmp(as, ASMREF_TMP1);
2811#if LJ_GC64 2831#if LJ_GC64
@@ -2919,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
2919static void asm_tail_fixup(ASMState *as, TraceNo lnk) 2939static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2920{ 2940{
2921 /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ 2941 /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
2922 MCode *p = as->mctop; 2942 MCode *mcp = as->mctail;
2923 MCode *target, *q; 2943 MCode *target;
2924 int32_t spadj = as->T->spadjust; 2944 int32_t spadj = as->T->spadjust;
2925 if (spadj == 0) { 2945 if (spadj) { /* Emit stack adjustment. */
2926 p -= LJ_64 ? 7 : 6; 2946 if (LJ_64) *mcp++ = 0x48;
2927 } else {
2928 MCode *p1;
2929 /* Patch stack adjustment. */
2930 if (checki8(spadj)) { 2947 if (checki8(spadj)) {
2931 p -= 3; 2948 *mcp++ = XI_ARITHi8;
2932 p1 = p-6; 2949 *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
2933 *p1 = (MCode)spadj; 2950 *mcp++ = (MCode)spadj;
2934 } else { 2951 } else {
2935 p1 = p-9; 2952 *mcp++ = XI_ARITHi;
2936 *(int32_t *)p1 = spadj; 2953 *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
2954 *(int32_t *)mcp = spadj; mcp += 4;
2937 } 2955 }
2938#if LJ_64
2939 p1[-3] = 0x48;
2940#endif
2941 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2942 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2943 } 2956 }
2944 /* Patch exit branch. */ 2957 /* Emit exit branch. */
2945 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 2958 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
2946 *(int32_t *)(p-4) = jmprel(as->J, p, target); 2959 if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */
2947 p[-5] = XI_JMP; 2960 *mcp++ = XI_JMP; mcp += 4;
2961 *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target);
2962 } else { /* RIP-relative indirect jump. */
2963 *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4;
2964 *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp);
2965 }
2948 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ 2966 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
2949 for (q = as->mctop-1; q >= p; q--) 2967 while (as->mctop > mcp) *--as->mctop = XI_NOP;
2950 *q = XI_NOP;
2951 as->mctop = p;
2952} 2968}
2953 2969
2954/* Prepare tail of code. */ 2970/* Prepare tail of code. */
2955static void asm_tail_prep(ASMState *as) 2971static void asm_tail_prep(ASMState *as, TraceNo lnk)
2956{ 2972{
2957 MCode *p = as->mctop; 2973 MCode *p = as->mctop;
2958 /* Realign and leave room for backwards loop branch or exit branch. */ 2974 /* Realign and leave room for backwards loop branch or exit branch. */
@@ -2964,15 +2980,17 @@ static void asm_tail_prep(ASMState *as)
2964 as->mctop = p; 2980 as->mctop = p;
2965 p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ 2981 p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
2966 } else { 2982 } else {
2967 p -= 5; /* Space for exit branch (near jmp). */ 2983 p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */
2968 } 2984 }
2969 if (as->loopref) { 2985 if (as->loopref) {
2970 as->invmcp = as->mcp = p; 2986 as->invmcp = as->mcp = p;
2971 } else { 2987 } else {
2972 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ 2988 /* Leave room for ESP adjustment: add esp, imm */
2973 as->mcp = p - (LJ_64 ? 7 : 6); 2989 p -= LJ_64 ? 7 : 6;
2990 as->mcp = p;
2974 as->invmcp = NULL; 2991 as->invmcp = NULL;
2975 } 2992 }
2993 as->mctail = p;
2976} 2994}
2977 2995
2978/* -- Trace setup --------------------------------------------------------- */ 2996/* -- Trace setup --------------------------------------------------------- */
@@ -3132,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
3132 } else if (*p == XI_CALL && 3150 } else if (*p == XI_CALL &&
3133 (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { 3151 (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
3134 pgc = p+7; /* Do not patch GC check exit. */ 3152 pgc = p+7; /* Do not patch GC check exit. */
3153 } else if (LJ_64 && *p == 0xff &&
3154 p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) &&
3155 p[2] == XI_NOP) {
3156 pgc = p+5; /* Do not patch GC check exit. */
3135 } 3157 }
3136 } 3158 }
3137 lj_mcode_sync(T->mcode, T->mcode + T->szmcode); 3159 lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index c60e7d75..3e1eb64b 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
173 return 0; /* Failed. */ 173 return 0; /* Failed. */
174} 174}
175 175
176#define emit_movw_k(k) \
177 (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4))
178#define emit_movt_k(k) \
179 (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4))
180
176/* Load a 32 bit constant into a GPR. */ 181/* Load a 32 bit constant into a GPR. */
177static void emit_loadi(ASMState *as, Reg rd, int32_t i) 182static void emit_loadi(ASMState *as, Reg rd, int32_t i)
178{ 183{
@@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i)
184 emit_d(as, ARMI_MOV^k, rd); 189 emit_d(as, ARMI_MOV^k, rd);
185 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { 190 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
186 /* 16 bit loword constant for ARMv6T2. */ 191 /* 16 bit loword constant for ARMv6T2. */
187 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); 192 emit_d(as, emit_movw_k(i), rd);
188 } else if (emit_kdelta1(as, rd, i)) { 193 } else if (emit_kdelta1(as, rd, i)) {
189 /* One step delta relative to another constant. */ 194 /* One step delta relative to another constant. */
190 } else if ((as->flags & JIT_F_ARMV6T2)) { 195 } else if ((as->flags & JIT_F_ARMV6T2)) {
191 /* 32 bit hiword/loword constant for ARMv6T2. */ 196 /* 32 bit hiword/loword constant for ARMv6T2. */
192 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); 197 emit_d(as, emit_movt_k(i), rd);
193 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); 198 emit_d(as, emit_movw_k(i), rd);
194 } else if (emit_kdelta2(as, rd, i)) { 199 } else if (emit_kdelta2(as, rd, i)) {
195 /* Two step delta relative to another constant. */ 200 /* Two step delta relative to another constant. */
196 } else { 201 } else {
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index d8104959..d65b1c57 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
80 80
81/* -- Emit loads/stores --------------------------------------------------- */ 81/* -- Emit loads/stores --------------------------------------------------- */
82 82
83#define jglofs(as, k) \
84 (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
85
83/* Prefer rematerialization of BASE/L from global_State over spills. */ 86/* Prefer rematerialization of BASE/L from global_State over spills. */
84#define emit_canremat(ref) ((ref) <= REF_BASE) 87#define emit_canremat(ref) ((ref) <= REF_BASE)
85 88
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index b13f00fe..56928e42 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
53 53
54/* -- Emit loads/stores --------------------------------------------------- */ 54/* -- Emit loads/stores --------------------------------------------------- */
55 55
56#define jglofs(as, k) \
57 (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
58
56/* Prefer rematerialization of BASE/L from global_State over spills. */ 59/* Prefer rematerialization of BASE/L from global_State over spills. */
57#define emit_canremat(ref) ((ref) <= REF_BASE) 60#define emit_canremat(ref) ((ref) <= REF_BASE)
58 61
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 5fd6cfa7..858fe753 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -478,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source)
478/* Return label pointing to current PC. */ 478/* Return label pointing to current PC. */
479#define emit_label(as) ((as)->mcp) 479#define emit_label(as) ((as)->mcp)
480 480
481/* Check if two adresses are in relative jump range. */
482static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b)
483{
484#if LJ_64
485 return a - b == (int32_t)(a - b);
486#else
487 UNUSED(a); UNUSED(b);
488 return 1;
489#endif
490}
491
481/* Compute relative 32 bit offset for jump and call instructions. */ 492/* Compute relative 32 bit offset for jump and call instructions. */
482static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) 493static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
483{ 494{
@@ -511,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target)
511{ 522{
512 MCode *p = as->mcp; 523 MCode *p = as->mcp;
513#if LJ_64 524#if LJ_64
514 if (target-p != (int32_t)(target-p)) { 525 if (!jmprel_ok(target, p)) {
515 /* Assumes RID_RET is never an argument to calls and always clobbered. */ 526 /* Assumes RID_RET is never an argument to calls and always clobbered. */
516 emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); 527 emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
517 emit_loadu64(as, RID_RET, (uint64_t)target); 528 emit_loadu64(as, RID_RET, (uint64_t)target);
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 102ba0b4..05a8e9bb 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -104,14 +104,6 @@
104 104
105/* -- JIT engine parameters ----------------------------------------------- */ 105/* -- JIT engine parameters ----------------------------------------------- */
106 106
107#if LJ_TARGET_WINDOWS || LJ_64
108/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */
109#define JIT_P_sizemcode_DEFAULT 64
110#else
111/* Could go as low as 4K, but the mmap() overhead would be rather high. */
112#define JIT_P_sizemcode_DEFAULT 32
113#endif
114
115/* Optimization parameters and their defaults. Length is a char in octal! */ 107/* Optimization parameters and their defaults. Length is a char in octal! */
116#define JIT_PARAMDEF(_) \ 108#define JIT_PARAMDEF(_) \
117 _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ 109 _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
@@ -131,9 +123,9 @@
131 _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \ 123 _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \
132 \ 124 \
133 /* Size of each machine code area (in KBytes). */ \ 125 /* Size of each machine code area (in KBytes). */ \
134 _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ 126 _(\011, sizemcode, 64) \
135 /* Max. total size of all machine code areas (in KBytes). */ \ 127 /* Max. total size of all machine code areas (in KBytes). */ \
136 _(\010, maxmcode, 512) \ 128 _(\010, maxmcode, 2048) \
137 /* End of list. */ 129 /* End of list. */
138 130
139enum { 131enum {
@@ -375,9 +367,13 @@ enum {
375 LJ_K64_M2P64, /* -2^64 */ 367 LJ_K64_M2P64, /* -2^64 */
376#endif 368#endif
377#endif 369#endif
370#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
371 LJ_K64_VM_EXIT_HANDLER,
372 LJ_K64_VM_EXIT_INTERP,
373#endif
378 LJ_K64__MAX, 374 LJ_K64__MAX,
379}; 375};
380#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) 376#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
381 377
382enum { 378enum {
383#if LJ_TARGET_X86ORX64 379#if LJ_TARGET_X86ORX64
@@ -394,6 +390,10 @@ enum {
394 LJ_K32_2P63, /* 2^63 */ 390 LJ_K32_2P63, /* 2^63 */
395 LJ_K32_M2P64, /* -2^64 */ 391 LJ_K32_M2P64, /* -2^64 */
396#endif 392#endif
393#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
394 LJ_K32_VM_EXIT_HANDLER,
395 LJ_K32_VM_EXIT_INTERP,
396#endif
397 LJ_K32__MAX 397 LJ_K32__MAX
398}; 398};
399#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS) 399#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
@@ -513,6 +513,7 @@ typedef struct jit_State {
513 MCode *mcbot; /* Bottom of current mcode area. */ 513 MCode *mcbot; /* Bottom of current mcode area. */
514 size_t szmcarea; /* Size of current mcode area. */ 514 size_t szmcarea; /* Size of current mcode area. */
515 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 515 size_t szallmcarea; /* Total size of all allocated mcode areas. */
516 uintptr_t mcmin, mcmax; /* Mcode allocation range. */
516 517
517 TValue errinfo; /* Additional info element for trace errors. */ 518 TValue errinfo; /* Additional info element for trace errors. */
518 519
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 2b8ac2df..c3032f4e 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end)
63 63
64#if LJ_HASJIT 64#if LJ_HASJIT
65 65
66#if LUAJIT_SECURITY_MCODE != 0
67/* Protection twiddling failed. Probably due to kernel security. */
68static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
69{
70 lua_CFunction panic = J2G(J)->panic;
71 if (panic) {
72 lua_State *L = J->L;
73 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
74 panic(L);
75 }
76 exit(EXIT_FAILURE);
77}
78#endif
79
66#if LJ_TARGET_WINDOWS 80#if LJ_TARGET_WINDOWS
67 81
68#define MCPROT_RW PAGE_READWRITE 82#define MCPROT_RW PAGE_READWRITE
69#define MCPROT_RX PAGE_EXECUTE_READ 83#define MCPROT_RX PAGE_EXECUTE_READ
70#define MCPROT_RWX PAGE_EXECUTE_READWRITE 84#define MCPROT_RWX PAGE_EXECUTE_READWRITE
71 85
72static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) 86static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot)
73{ 87{
74 void *p = LJ_WIN_VALLOC((void *)hint, sz, 88 return LJ_WIN_VALLOC((void *)hint, sz,
75 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); 89 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
76 if (!p && !hint)
77 lj_trace_err(J, LJ_TRERR_MCODEAL);
78 return p;
79} 90}
80 91
81static void mcode_free(jit_State *J, void *p, size_t sz) 92static void mcode_free(void *p, size_t sz)
82{ 93{
83 UNUSED(J); UNUSED(sz); 94 UNUSED(sz);
84 VirtualFree(p, 0, MEM_RELEASE); 95 VirtualFree(p, 0, MEM_RELEASE);
85} 96}
86 97
87static int mcode_setprot(void *p, size_t sz, DWORD prot) 98static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot)
88{ 99{
100#if LUAJIT_SECURITY_MCODE != 0
89 DWORD oprot; 101 DWORD oprot;
90 return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); 102 if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J);
103#else
104 UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
105#endif
91} 106}
92 107
93#elif LJ_TARGET_POSIX 108#elif LJ_TARGET_POSIX
@@ -117,33 +132,33 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
117#define MCPROT_CREATE 0 132#define MCPROT_CREATE 0
118#endif 133#endif
119 134
120static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) 135static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot)
121{ 136{
122 void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0); 137 void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0);
123 if (p == MAP_FAILED) { 138 if (p == MAP_FAILED) return NULL;
124 if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
125 p = NULL;
126#if MCMAP_CREATE 139#if MCMAP_CREATE
127 } else { 140 pthread_jit_write_protect_np(0);
128 pthread_jit_write_protect_np(0);
129#endif 141#endif
130 }
131 return p; 142 return p;
132} 143}
133 144
134static void mcode_free(jit_State *J, void *p, size_t sz) 145static void mcode_free(void *p, size_t sz)
135{ 146{
136 UNUSED(J);
137 munmap(p, sz); 147 munmap(p, sz);
138} 148}
139 149
140static int mcode_setprot(void *p, size_t sz, int prot) 150static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot)
141{ 151{
152#if LUAJIT_SECURITY_MCODE != 0
142#if MCMAP_CREATE 153#if MCMAP_CREATE
154 UNUSED(J); UNUSED(p); UNUSED(sz);
143 pthread_jit_write_protect_np((prot & PROT_EXEC)); 155 pthread_jit_write_protect_np((prot & PROT_EXEC));
144 return 0; 156 return 0;
145#else 157#else
146 return mprotect(p, sz, prot); 158 if (mprotect(p, sz, prot)) mcode_protfail(J);
159#endif
160#else
161 UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
147#endif 162#endif
148} 163}
149 164
@@ -153,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot)
153 168
154#endif 169#endif
155 170
171#ifdef LUAJIT_MCODE_TEST
172/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try:
173** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua
174** LUAJIT_MCODE_TEST=F luajit -jv main.lua
175*/
176static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot)
177{
178 static int test_ofs = 0;
179 static const char *test_str;
180 if (!test_str) {
181 test_str = getenv("LUAJIT_MCODE_TEST");
182 if (!test_str) test_str = "";
183 }
184 switch (test_str[test_ofs]) {
185 case 'a': /* OK for one allocation. */
186 test_ofs++;
187 /* fallthrough */
188 case '\0': /* EOS: OK for any further allocations. */
189 break;
190 case 'h': /* Ignore one hint. */
191 test_ofs++;
192 /* fallthrough */
193 case 'H': /* Ignore any further hints. */
194 hint = 0u;
195 break;
196 case 'r': /* Randomize one hint. */
197 test_ofs++;
198 /* fallthrough */
199 case 'R': /* Randomize any further hints. */
200 hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu;
201 hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1;
202 break;
203 case 'f': /* Fail one allocation. */
204 test_ofs++;
205 /* fallthrough */
206 default: /* 'F' or unknown: Fail any further allocations. */
207 return NULL;
208 }
209 return mcode_alloc_at(hint, sz, prot);
210}
211#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot)
212#endif
213
156/* -- MCode area protection ----------------------------------------------- */ 214/* -- MCode area protection ----------------------------------------------- */
157 215
158#if LUAJIT_SECURITY_MCODE == 0 216#if LUAJIT_SECURITY_MCODE == 0
@@ -174,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot)
174 232
175static void mcode_protect(jit_State *J, int prot) 233static void mcode_protect(jit_State *J, int prot)
176{ 234{
177 UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); 235 UNUSED(J); UNUSED(prot);
178} 236}
179 237
180#else 238#else
@@ -190,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot)
190#define MCPROT_GEN MCPROT_RW 248#define MCPROT_GEN MCPROT_RW
191#define MCPROT_RUN MCPROT_RX 249#define MCPROT_RUN MCPROT_RX
192 250
193/* Protection twiddling failed. Probably due to kernel security. */
194static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
195{
196 lua_CFunction panic = J2G(J)->panic;
197 if (panic) {
198 lua_State *L = J->L;
199 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
200 panic(L);
201 }
202 exit(EXIT_FAILURE);
203}
204
205/* Change protection of MCode area. */ 251/* Change protection of MCode area. */
206static void mcode_protect(jit_State *J, int prot) 252static void mcode_protect(jit_State *J, int prot)
207{ 253{
208 if (J->mcprot != prot) { 254 if (J->mcprot != prot) {
209 if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot))) 255 mcode_setprot(J, J->mcarea, J->szmcarea, prot);
210 mcode_protfail(J);
211 J->mcprot = prot; 256 J->mcprot = prot;
212 } 257 }
213} 258}
@@ -216,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot)
216 261
217/* -- MCode area allocation ----------------------------------------------- */ 262/* -- MCode area allocation ----------------------------------------------- */
218 263
219#if LJ_64
220#define mcode_validptr(p) (p)
221#else
222#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000)
223#endif
224
225#ifdef LJ_TARGET_JUMPRANGE 264#ifdef LJ_TARGET_JUMPRANGE
226 265
227/* Get memory within relative jump distance of our code in 64 bit mode. */ 266#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u)
228static void *mcode_alloc(jit_State *J, size_t sz) 267
268/* Set a memory range for mcode allocation with addr in the middle. */
269static void mcode_setrange(jit_State *J, uintptr_t addr)
229{ 270{
230 /* Target an address in the static assembler code (64K aligned).
231 ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB.
232 ** Use half the jump range so every address in the range can reach any other.
233 */
234#if LJ_TARGET_MIPS 271#if LJ_TARGET_MIPS
235 /* Use the middle of the 256MB-aligned region. */ 272 /* Use the whole 256MB-aligned region. */
236 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 273 J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1);
237 ~(uintptr_t)0x0fffffffu) + 0x08000000u; 274 J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE);
238#else 275#else
239 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; 276 /* Every address in the 64KB-aligned range should be able to reach
277 ** any other, so MCODE_RANGE64 is only half the (signed) branch range.
278 */
279 J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu;
280 J->mcmax = J->mcmin + MCODE_RANGE64;
240#endif 281#endif
241 const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); 282 /* Avoid wrap-around and the 64KB corners. */
242 /* First try a contiguous area below the last one. */ 283 if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u;
243 uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; 284 if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu;
244 int i; 285}
245 /* Limit probing iterations, depending on the available pool size. */ 286
246 for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { 287/* Check if an address is in range of the mcode allocation range. */
247 if (mcode_validptr(hint)) { 288static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz)
248 void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); 289{
249 290 /* Take care of unsigned wrap-around of addr + sz, too. */
250 if (mcode_validptr(p) && 291 return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax;
251 ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range)) 292}
252 return p; 293
253 if (p) mcode_free(J, p, sz); /* Free badly placed area. */ 294/* Get memory within a specific jump range in 64 bit mode. */
254 } 295static void *mcode_alloc(jit_State *J, size_t sz)
255 /* Next try probing 64K-aligned pseudo-random addresses. */ 296{
297 uintptr_t hint;
298 int i = 0, j;
299 if (!J->mcmin) /* Place initial range near the interpreter code. */
300 mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler);
301 else if (!J->mcmax) /* Switch to a new range (already flushed). */
302 goto newrange;
303 /* First try a contiguous area below the last one (if in range). */
304 hint = (uintptr_t)J->mcarea - sz;
305 if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */
306 goto probe;
307 for (; i < 16; i++) {
308 void *p = mcode_alloc_at(hint, sz, MCPROT_GEN);
309 if (mcode_inrange(J, (uintptr_t)p, sz))
310 return p; /* Success. */
311 else if (p)
312 mcode_free(p, sz); /* Free badly placed area. */
313 probe:
314 /* Next try probing 64KB-aligned pseudo-random addresses. */
315 j = 0;
256 do { 316 do {
257 hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000); 317 hint = J->mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64);
258 } while (!(hint + sz < range+range)); 318 if (++j > 15) goto fail;
259 hint = target + hint - range; 319 } while (!mcode_inrange(J, hint, sz));
320 }
321fail:
322 if (!J->mcarea) { /* Switch to a new range now. */
323 void *p;
324 newrange:
325 p = mcode_alloc_at(0, sz, MCPROT_GEN);
326 if (p) {
327 mcode_setrange(J, (uintptr_t)p + (sz >> 1));
328 return p; /* Success. */
329 }
330 } else {
331 J->mcmax = 0; /* Switch to a new range after the flush. */
260 } 332 }
261 lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ 333 lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */
262 return NULL; 334 return NULL;
@@ -269,15 +341,13 @@ static void *mcode_alloc(jit_State *J, size_t sz)
269{ 341{
270#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP 342#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
271 /* Allow better executable memory allocation for OpenBSD W^X mode. */ 343 /* Allow better executable memory allocation for OpenBSD W^X mode. */
272 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); 344 void *p = mcode_alloc_at(0, sz, MCPROT_RUN);
273 if (p && mcode_setprot(p, sz, MCPROT_GEN)) { 345 if (p) mcode_setprot(J, p, sz, MCPROT_GEN);
274 mcode_free(J, p, sz);
275 return NULL;
276 }
277 return p;
278#else 346#else
279 return mcode_alloc_at(J, 0, sz, MCPROT_GEN); 347 void *p = mcode_alloc_at(0, sz, MCPROT_GEN);
280#endif 348#endif
349 if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL);
350 return p;
281} 351}
282 352
283#endif 353#endif
@@ -289,7 +359,6 @@ static void mcode_allocarea(jit_State *J)
289{ 359{
290 MCode *oldarea = J->mcarea; 360 MCode *oldarea = J->mcarea;
291 size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; 361 size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10;
292 sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
293 J->mcarea = (MCode *)mcode_alloc(J, sz); 362 J->mcarea = (MCode *)mcode_alloc(J, sz);
294 J->szmcarea = sz; 363 J->szmcarea = sz;
295 J->mcprot = MCPROT_GEN; 364 J->mcprot = MCPROT_GEN;
@@ -311,7 +380,7 @@ void lj_mcode_free(jit_State *J)
311 MCode *next = ((MCLink *)mc)->next; 380 MCode *next = ((MCLink *)mc)->next;
312 size_t sz = ((MCLink *)mc)->size; 381 size_t sz = ((MCLink *)mc)->size;
313 lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); 382 lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
314 mcode_free(J, mc, sz); 383 mcode_free(mc, sz);
315 mc = next; 384 mc = next;
316 } 385 }
317} 386}
@@ -347,32 +416,25 @@ void lj_mcode_abort(jit_State *J)
347MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) 416MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
348{ 417{
349 if (finish) { 418 if (finish) {
350#if LUAJIT_SECURITY_MCODE
351 if (J->mcarea == ptr) 419 if (J->mcarea == ptr)
352 mcode_protect(J, MCPROT_RUN); 420 mcode_protect(J, MCPROT_RUN);
353 else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) 421 else
354 mcode_protfail(J); 422 mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN);
355#endif
356 return NULL; 423 return NULL;
357 } else { 424 } else {
358 MCode *mc = J->mcarea; 425 uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr;
359 /* Try current area first to use the protection cache. */ 426 /* Try current area first to use the protection cache. */
360 if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { 427 if (addr >= base && addr < base + J->szmcarea) {
361#if LUAJIT_SECURITY_MCODE
362 mcode_protect(J, MCPROT_GEN); 428 mcode_protect(J, MCPROT_GEN);
363#endif 429 return (MCode *)base;
364 return mc;
365 } 430 }
366 /* Otherwise search through the list of MCode areas. */ 431 /* Otherwise search through the list of MCode areas. */
367 for (;;) { 432 for (;;) {
368 mc = ((MCLink *)mc)->next; 433 base = (uintptr_t)(((MCLink *)base)->next);
369 lj_assertJ(mc != NULL, "broken MCode area chain"); 434 lj_assertJ(base != 0, "broken MCode area chain");
370 if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { 435 if (addr >= base && addr < base + ((MCLink *)base)->size) {
371#if LUAJIT_SECURITY_MCODE 436 mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN);
372 if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) 437 return (MCode *)base;
373 mcode_protfail(J);
374#endif
375 return mc;
376 } 438 }
377 } 439 }
378 } 440 }
@@ -384,7 +446,6 @@ void lj_mcode_limiterr(jit_State *J, size_t need)
384 size_t sizemcode, maxmcode; 446 size_t sizemcode, maxmcode;
385 lj_mcode_abort(J); 447 lj_mcode_abort(J);
386 sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; 448 sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
387 sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
388 maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; 449 maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
389 if (need * sizeof(MCode) > sizemcode) 450 if (need * sizeof(MCode) > sizemcode)
390 lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ 451 lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index d0bbc5a5..947545f8 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -190,6 +190,7 @@ typedef enum ARMIns {
190 ARMI_LDRSB = 0xe01000d0, 190 ARMI_LDRSB = 0xe01000d0,
191 ARMI_LDRSH = 0xe01000f0, 191 ARMI_LDRSH = 0xe01000f0,
192 ARMI_LDRD = 0xe00000d0, 192 ARMI_LDRD = 0xe00000d0,
193 ARMI_LDRL = 0xe51f0000,
193 ARMI_STR = 0xe4000000, 194 ARMI_STR = 0xe4000000,
194 ARMI_STRB = 0xe4400000, 195 ARMI_STRB = 0xe4400000,
195 ARMI_STRH = 0xe00000b0, 196 ARMI_STRH = 0xe00000b0,
@@ -200,6 +201,7 @@ typedef enum ARMIns {
200 ARMI_BL = 0xeb000000, 201 ARMI_BL = 0xeb000000,
201 ARMI_BLX = 0xfa000000, 202 ARMI_BLX = 0xfa000000,
202 ARMI_BLXr = 0xe12fff30, 203 ARMI_BLXr = 0xe12fff30,
204 ARMI_BX = 0xe12fff10,
203 205
204 /* ARMv6 */ 206 /* ARMv6 */
205 ARMI_REV = 0xe6bf0f30, 207 ARMI_REV = 0xe6bf0f30,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index 30aff478..3113d141 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -110,6 +110,7 @@ typedef struct {
110static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) 110static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
111{ 111{
112 while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ 112 while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
113 if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */
113 return p + 3 + exitno; 114 return p + 3 + exitno;
114} 115}
115/* Avoid dependence on lj_jit.h if only including lj_target.h. */ 116/* Avoid dependence on lj_jit.h if only including lj_target.h. */
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index 5a1b5a7c..58f31188 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -115,6 +115,7 @@ typedef struct {
115static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) 115static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
116{ 116{
117 while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ 117 while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */
118 if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */
118 return p + 3 + exitno; 119 return p + 3 + exitno;
119} 120}
120/* Avoid dependence on lj_jit.h if only including lj_target.h. */ 121/* Avoid dependence on lj_jit.h if only including lj_target.h. */
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 0e948e8d..3e2cd0b3 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -343,6 +343,14 @@ void lj_trace_initstate(global_State *g)
343 J->k32[LJ_K32_M2P64] = 0xdf800000; 343 J->k32[LJ_K32_M2P64] = 0xdf800000;
344#endif 344#endif
345#endif 345#endif
346#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
347 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
348 J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
349#endif
350#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
351 J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0);
352 J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0);
353#endif
346} 354}
347 355
348/* Free everything associated with the JIT compiler state. */ 356/* Free everything associated with the JIT compiler state. */
@@ -637,10 +645,15 @@ static int trace_abort(jit_State *J)
637 J->cur.traceno = 0; 645 J->cur.traceno = 0;
638 } 646 }
639 L->top--; /* Remove error object */ 647 L->top--; /* Remove error object */
640 if (e == LJ_TRERR_DOWNREC) 648 if (e == LJ_TRERR_DOWNREC) {
641 return trace_downrec(J); 649 return trace_downrec(J);
642 else if (e == LJ_TRERR_MCODEAL) 650 } else if (e == LJ_TRERR_MCODEAL) {
651 if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */
652 J->flags &= ~JIT_F_ON;
653 lj_dispatch_update(J2G(J));
654 }
643 lj_trace_flushall(L); 655 lj_trace_flushall(L);
656 }
644 return 0; 657 return 0;
645} 658}
646 659