aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2025-10-28 00:27:15 +0100
committerMike Pall <mike>2025-10-28 00:27:15 +0100
commite34a78acf6b8656874b1c25a12a7cd1813d73af9 (patch)
tree52755c28fb727cc2296ed4f889b52378e802abf4 /src
parent25a61a182166fec06f1a1a025eb8fabbb6cf483e (diff)
downloadluajit-e34a78acf6b8656874b1c25a12a7cd1813d73af9.tar.gz
luajit-e34a78acf6b8656874b1c25a12a7cd1813d73af9.tar.bz2
luajit-e34a78acf6b8656874b1c25a12a7cd1813d73af9.zip
x64: Various fixes for CET IBT.
Also add ELF notes. #1391
Diffstat (limited to 'src')
-rw-r--r--src/Makefile10
-rw-r--r--src/lj_arch.h18
-rw-r--r--src/lj_asm.c4
-rw-r--r--src/lj_ccallback.c24
-rw-r--r--src/lj_emit_x86.h4
-rw-r--r--src/vm_x64.dasc79
6 files changed, 101 insertions, 38 deletions
diff --git a/src/Makefile b/src/Makefile
index d23e0db2..e657af13 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -446,9 +446,13 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH)))
446 DASM_AFLAGS+= -D PAUTH 446 DASM_AFLAGS+= -D PAUTH
447 TARGET_ARCH+= -DLJ_ABI_PAUTH=1 447 TARGET_ARCH+= -DLJ_ABI_PAUTH=1
448endif 448endif
449ifneq (,$(findstring LJ_CET_BR 1,$(TARGET_TESTARCH))) 449ifneq (,$(findstring LJ_ABI_BRANCH_TRACK 1,$(TARGET_TESTARCH)))
450 DASM_AFLAGS+= -D CET_BR 450 DASM_AFLAGS+= -D BRANCH_TRACK
451 TARGET_ARCH+= -DLJ_CET_BR=1 451 TARGET_ARCH+= -DLJ_ABI_BRANCH_TRACK=1
452endif
453ifneq (,$(findstring LJ_ABI_SHADOW_STACK 1,$(TARGET_TESTARCH)))
454 DASM_AFLAGS+= -D SHADOW_STACK
455 TARGET_ARCH+= -DLJ_ABI_SHADOW_STACK=1
452endif 456endif
453DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) 457DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
454ifeq (Windows,$(TARGET_SYS)) 458ifeq (Windows,$(TARGET_SYS))
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 42c65879..a775b51f 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -219,15 +219,27 @@
219#error "macOS requires GC64 -- don't disable it" 219#error "macOS requires GC64 -- don't disable it"
220#endif 220#endif
221 221
222#if (__CET__ & 1) && defined(LUAJIT_ENABLE_CET_BR) 222#if !defined(LJ_ABI_BRANCH_TRACK) && (__CET__ & 1) && \
223 LJ_TARGET_GC64 && defined(LUAJIT_ENABLE_CET_BR)
223/* 224/*
224** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT). 225** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT).
225** This is not enabled by default because it causes a notable slowdown of 226** This is not enabled by default because it causes a notable slowdown of
226** the interpreter on all x64 CPUs, whether they have CET enabled or not. 227** the interpreter on all x64 CPUs, whether they have CET enabled or not.
227** If your toolchain enables -fcf-protection=branch by default, you need 228** If your toolchain enables -fcf-protection=branch by default, you need
228** to build with: make XCFLAGS=-DLUAJIT_ENABLE_CET_BR 229** to build with: make amalg XCFLAGS=-DLUAJIT_ENABLE_CET_BR
229*/ 230*/
230#define LJ_CET_BR 1 231#define LJ_ABI_BRANCH_TRACK 1
232#endif
233
234#if !defined(LJ_ABI_SHADOW_STACK) && (__CET__ & 2)
235/*
236** Control-Flow Enforcement Technique (CET) shadow stack (CET-SS).
237** It has no code overhead and doesn't cause any slowdowns when unused.
238** It can also be unconditionally enabled since all code already follows
239** a strict CALL to RET correspondence for performance reasons (all modern
240** CPUs use a (non-enforcing) shadow stack for return branch prediction).
241*/
242#define LJ_ABI_SHADOW_STACK 1
231#endif 243#endif
232 244
233#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM 245#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
diff --git a/src/lj_asm.c b/src/lj_asm.c
index e7f3ec1c..8f558a03 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -2586,8 +2586,8 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
2586 asm_head_side(as); 2586 asm_head_side(as);
2587 else 2587 else
2588 asm_head_root(as); 2588 asm_head_root(as);
2589#if LJ_CET_BR 2589#if LJ_ABI_BRANCH_TRACK
2590 emit_endbr(as); 2590 emit_branch_track(as);
2591#endif 2591#endif
2592 asm_phi_fixup(as); 2592 asm_phi_fixup(as);
2593 2593
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 7f08f0a8..5594a731 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -34,22 +34,29 @@
34 34
35#elif LJ_TARGET_X86ORX64 35#elif LJ_TARGET_X86ORX64
36 36
37#if LJ_ABI_BRANCH_TRACK
38#define CALLBACK_MCODE_SLOTSZ 8
39#else
40#define CALLBACK_MCODE_SLOTSZ 4
41#endif
42#define CALLBACK_MCODE_NSLOT (128 / CALLBACK_MCODE_SLOTSZ)
43
37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) 44#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
38#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) 45#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
39 46
40#define CALLBACK_SLOT2OFS(slot) \ 47#define CALLBACK_SLOT2OFS(slot) \
41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) 48 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/CALLBACK_MCODE_NSLOT) + CALLBACK_MCODE_SLOTSZ*(slot))
42 49
43static MSize CALLBACK_OFS2SLOT(MSize ofs) 50static MSize CALLBACK_OFS2SLOT(MSize ofs)
44{ 51{
45 MSize group; 52 MSize group;
46 ofs -= CALLBACK_MCODE_HEAD; 53 ofs -= CALLBACK_MCODE_HEAD;
47 group = ofs / (32*4 + CALLBACK_MCODE_GROUP); 54 group = ofs / (128 + CALLBACK_MCODE_GROUP);
48 return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32; 55 return (ofs % (128 + CALLBACK_MCODE_GROUP))/CALLBACK_MCODE_SLOTSZ + group*CALLBACK_MCODE_NSLOT;
49} 56}
50 57
51#define CALLBACK_MAX_SLOT \ 58#define CALLBACK_MAX_SLOT \
52 (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32) 59 (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+128))*CALLBACK_MCODE_NSLOT)
53 60
54#elif LJ_TARGET_ARM 61#elif LJ_TARGET_ARM
55 62
@@ -118,9 +125,13 @@ static void *callback_mcode_init(global_State *g, uint8_t *page)
118 *(void **)p = target; p += 8; 125 *(void **)p = target; p += 8;
119#endif 126#endif
120 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { 127 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
128#if LJ_ABI_BRANCH_TRACK
129 *(uint32_t *)p = XI_ENDBR64; p += 4;
130#endif
121 /* mov al, slot; jmp group */ 131 /* mov al, slot; jmp group */
122 *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot; 132 *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot;
123 if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) { 133 if ((slot & (CALLBACK_MCODE_NSLOT-1)) == (CALLBACK_MCODE_NSLOT-1) ||
134 slot == CALLBACK_MAX_SLOT-1) {
124 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ 135 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
125 *p++ = XI_PUSH + RID_EBP; 136 *p++ = XI_PUSH + RID_EBP;
126 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); 137 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
@@ -140,7 +151,8 @@ static void *callback_mcode_init(global_State *g, uint8_t *page)
140 *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4; 151 *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4;
141#endif 152#endif
142 } else { 153 } else {
143 *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); 154 *p++ = XI_JMPs;
155 *p++ = (uint8_t)(CALLBACK_MCODE_SLOTSZ*(CALLBACK_MCODE_NSLOT-1-(slot&(CALLBACK_MCODE_NSLOT-1))) - 2);
144 } 156 }
145 } 157 }
146 return p; 158 return p;
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 848301bc..5fd6cfa7 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -70,8 +70,8 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
70 return p; 70 return p;
71} 71}
72 72
73#if LJ_CET_BR 73#if LJ_ABI_BRANCH_TRACK
74static void emit_endbr(ASMState *as) 74static void emit_branch_track(ASMState *as)
75{ 75{
76 emit_u32(as, XI_ENDBR64); 76 emit_u32(as, XI_ENDBR64);
77} 77}
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 52ef88af..2e9f0505 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -191,7 +191,7 @@
191| 191|
192|//-- Control-Flow Enforcement Technique (CET) --------------------------- 192|//-- Control-Flow Enforcement Technique (CET) ---------------------------
193| 193|
194|.if CET_BR 194|.if BRANCH_TRACK
195|.macro endbr; endbr64; .endmacro 195|.macro endbr; endbr64; .endmacro
196|.else 196|.else
197|.macro endbr; .endmacro 197|.macro endbr; .endmacro
@@ -200,13 +200,13 @@
200|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
201| 201|
202|// Instruction headers. 202|// Instruction headers.
203|.macro ins_A; endbr; .endmacro 203|.macro ins_A; .endmacro
204|.macro ins_AD; endbr; .endmacro 204|.macro ins_AD; .endmacro
205|.macro ins_AJ; endbr; .endmacro 205|.macro ins_AJ; .endmacro
206|.macro ins_ABC; endbr; movzx RBd, RCH; movzx RCd, RCL; .endmacro 206|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
207|.macro ins_AB_; endbr; movzx RBd, RCH; .endmacro 207|.macro ins_AB_; movzx RBd, RCH; .endmacro
208|.macro ins_A_C; endbr; movzx RCd, RCL; .endmacro 208|.macro ins_A_C; movzx RCd, RCL; .endmacro
209|.macro ins_AND; endbr; not RD; .endmacro 209|.macro ins_AND; not RD; .endmacro
210| 210|
211|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). 211|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
212|.macro ins_NEXT 212|.macro ins_NEXT
@@ -487,13 +487,12 @@ static void build_subroutines(BuildCtx *ctx)
487 | jmp <3 487 | jmp <3
488 | 488 |
489 |->vm_unwind_yield: 489 |->vm_unwind_yield:
490 | endbr
491 | mov al, LUA_YIELD 490 | mov al, LUA_YIELD
492 | jmp ->vm_unwind_c_eh 491 | jmp ->vm_unwind_c_eh
493 | 492 |
494 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 493 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
495 | endbr
496 | // (void *cframe, int errcode) 494 | // (void *cframe, int errcode)
495 | endbr
497 | mov eax, CARG2d // Error return status for vm_pcall. 496 | mov eax, CARG2d // Error return status for vm_pcall.
498 | mov rsp, CARG1 497 | mov rsp, CARG1
499 |->vm_unwind_c_eh: // Landing pad for external unwinder. 498 |->vm_unwind_c_eh: // Landing pad for external unwinder.
@@ -513,8 +512,8 @@ static void build_subroutines(BuildCtx *ctx)
513 |.endif 512 |.endif
514 | 513 |
515 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 514 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
516 | endbr
517 | // (void *cframe) 515 | // (void *cframe)
516 | endbr
518 | and CARG1, CFRAME_RAWMASK 517 | and CARG1, CFRAME_RAWMASK
519 | mov rsp, CARG1 518 | mov rsp, CARG1
520 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 519 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
@@ -689,7 +688,6 @@ static void build_subroutines(BuildCtx *ctx)
689 |//-- Continuation dispatch ---------------------------------------------- 688 |//-- Continuation dispatch ----------------------------------------------
690 | 689 |
691 |->cont_dispatch: 690 |->cont_dispatch:
692 | endbr
693 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) 691 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
694 | add RA, BASE 692 | add RA, BASE
695 | and PC, -8 693 | and PC, -8
@@ -1152,7 +1150,7 @@ static void build_subroutines(BuildCtx *ctx)
1152 | 1150 |
1153 |.macro .ffunc, name 1151 |.macro .ffunc, name
1154 |->ff_ .. name: 1152 |->ff_ .. name:
1155 | endbr 1153 | endbr
1156 |.endmacro 1154 |.endmacro
1157 | 1155 |
1158 |.macro .ffunc_1, name 1156 |.macro .ffunc_1, name
@@ -2338,8 +2336,8 @@ static void build_subroutines(BuildCtx *ctx)
2338 | 2336 |
2339 |->cont_stitch: // Trace stitching. 2337 |->cont_stitch: // Trace stitching.
2340 |.if JIT 2338 |.if JIT
2341 | endbr
2342 | // BASE = base, RC = result, RB = mbase 2339 | // BASE = base, RC = result, RB = mbase
2340 | endbr
2343 | mov TRACE:ITYPE, [RB-40] // Save previous trace. 2341 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2344 | cleartp TRACE:ITYPE 2342 | cleartp TRACE:ITYPE
2345 | mov TMPRd, MULTRES 2343 | mov TMPRd, MULTRES
@@ -2460,8 +2458,8 @@ static void build_subroutines(BuildCtx *ctx)
2460 | jmp >1 2458 | jmp >1
2461 |.endif 2459 |.endif
2462 |->vm_exit_interp: 2460 |->vm_exit_interp:
2463 | endbr
2464 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. 2461 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2462 | endbr
2465 |.if JIT 2463 |.if JIT
2466 | // Restore additional callee-save registers only used in compiled code. 2464 | // Restore additional callee-save registers only used in compiled code.
2467 |.if X64WIN 2465 |.if X64WIN
@@ -2849,6 +2847,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2849 |=>defop: 2847 |=>defop:
2850 2848
2851 switch (op) { 2849 switch (op) {
2850#if !LJ_HASJIT
2851 case BC_FORL:
2852 case BC_JFORI:
2853 case BC_JFORL:
2854 case BC_ITERL:
2855 case BC_JITERL:
2856 case BC_LOOP:
2857 case BC_JLOOP:
2858 case BC_FUNCF:
2859 case BC_JFUNCF:
2860 case BC_JFUNCV:
2861#endif
2862 case BC_FUNCV: /* NYI: compiled vararg functions. */
2863 break; /* Avoid redundant endbr instructions. */
2864 default:
2865 | endbr
2866 break;
2867 }
2868
2869 switch (op) {
2852 2870
2853 /* -- Comparison ops ---------------------------------------------------- */ 2871 /* -- Comparison ops ---------------------------------------------------- */
2854 2872
@@ -4119,7 +4137,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4119 4137
4120 case BC_ITERN: 4138 case BC_ITERN:
4121 |.if JIT 4139 |.if JIT
4122 | endbr
4123 | hotloop RBd 4140 | hotloop RBd
4124 |.endif 4141 |.endif
4125 |->vm_IITERN: 4142 |->vm_IITERN:
@@ -4299,7 +4316,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4299 | jnz >7 // Not returning to a fixarg Lua func? 4316 | jnz >7 // Not returning to a fixarg Lua func?
4300 switch (op) { 4317 switch (op) {
4301 case BC_RET: 4318 case BC_RET:
4302 | endbr
4303 |->BC_RET_Z: 4319 |->BC_RET_Z:
4304 | mov KBASE, BASE // Use KBASE for result move. 4320 | mov KBASE, BASE // Use KBASE for result move.
4305 | sub RDd, 1 4321 | sub RDd, 1
@@ -4318,12 +4334,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4318 | ja >6 4334 | ja >6
4319 break; 4335 break;
4320 case BC_RET1: 4336 case BC_RET1:
4321 | endbr
4322 | mov RB, [BASE+RA] 4337 | mov RB, [BASE+RA]
4323 | mov [BASE-16], RB 4338 | mov [BASE-16], RB
4324 /* fallthrough */ 4339 /* fallthrough */
4325 case BC_RET0: 4340 case BC_RET0:
4326 | endbr
4327 |5: 4341 |5:
4328 | cmp PC_RB, RDL // More results expected? 4342 | cmp PC_RB, RDL // More results expected?
4329 | ja >6 4343 | ja >6
@@ -4370,7 +4384,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4370 4384
4371 case BC_FORL: 4385 case BC_FORL:
4372 |.if JIT 4386 |.if JIT
4373 | endbr
4374 | hotloop RBd 4387 | hotloop RBd
4375 |.endif 4388 |.endif
4376 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. 4389 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
@@ -4522,7 +4535,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4522 4535
4523 case BC_ITERL: 4536 case BC_ITERL:
4524 |.if JIT 4537 |.if JIT
4525 | endbr
4526 | hotloop RBd 4538 | hotloop RBd
4527 |.endif 4539 |.endif
4528 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. 4540 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
@@ -4616,7 +4628,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4616 4628
4617 case BC_FUNCF: 4629 case BC_FUNCF:
4618 |.if JIT 4630 |.if JIT
4619 | endbr
4620 | hotcall RBd 4631 | hotcall RBd
4621 |.endif 4632 |.endif
4622 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4633 case BC_FUNCV: /* NYI: compiled vararg functions. */
@@ -4887,6 +4898,30 @@ static void emit_asm_debug(BuildCtx *ctx)
4887 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 4898 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4888#endif 4899#endif
4889#endif 4900#endif
4901#if LJ_TARGET_LINUX && (LJ_ABI_BRANCH_TRACK || LJ_ABI_SHADOW_STACK)
4902 fprintf(ctx->fp,
4903 "\t.section .note.gnu.property,\"a\"\n"
4904 "\t.align 8\n"
4905 "\t.long 4\n"
4906 "\t.long 16\n"
4907 "\t.long 5\n"
4908 "\t.long 0x00554e47\n"
4909 "\t.long 0xc0000002\n"
4910 "\t.long 4\n"
4911 "\t.long %d\n"
4912 "\t.long 0\n",
4913#if LJ_ABI_BRANCH_TRACK
4914 1|
4915#else
4916 0|
4917#endif
4918#if LJ_ABI_SHADOW_STACK
4919 2
4920#else
4921 0
4922#endif
4923 );
4924#endif
4890 break; 4925 break;
4891#if !LJ_NO_UNWIND 4926#if !LJ_NO_UNWIND
4892 /* Mental note: never let Apple design an assembler. 4927 /* Mental note: never let Apple design an assembler.