aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2023-08-29 02:12:13 +0200
committerMike Pall <mike>2023-08-29 02:12:13 +0200
commitcf903edb30e0cbd620ebd4bac02d4e2b4410fd02 (patch)
tree5480b7ecf44a9bd980a1fd91e7113fcafe1f15ca /src
parent7cc53f0b85f834dfba1516ea79d59db463e856fa (diff)
downloadluajit-cf903edb30e0cbd620ebd4bac02d4e2b4410fd02.tar.gz
luajit-cf903edb30e0cbd620ebd4bac02d4e2b4410fd02.tar.bz2
luajit-cf903edb30e0cbd620ebd4bac02d4e2b4410fd02.zip
FFI: Unify stack setup for C calls in interpreter.
Diffstat (limited to 'src')
-rw-r--r--src/lj_ccall.c57
-rw-r--r--src/lj_ccall.h7
-rw-r--r--src/vm_arm.dasc8
-rw-r--r--src/vm_arm64.dasc8
-rw-r--r--src/vm_mips.dasc1
-rw-r--r--src/vm_mips64.dasc1
-rw-r--r--src/vm_ppc.dasc3
-rw-r--r--src/vm_x64.dasc8
-rw-r--r--src/vm_x86.dasc22
9 files changed, 62 insertions, 53 deletions
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 04e306eb..9001cb5a 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -20,12 +20,15 @@
20#if LJ_TARGET_X86 20#if LJ_TARGET_X86
21/* -- x86 calling conventions --------------------------------------------- */ 21/* -- x86 calling conventions --------------------------------------------- */
22 22
23#define CCALL_PUSH(arg) \
24 *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
25
23#if LJ_ABI_WIN 26#if LJ_ABI_WIN
24 27
25#define CCALL_HANDLE_STRUCTRET \ 28#define CCALL_HANDLE_STRUCTRET \
26 /* Return structs bigger than 8 by reference (on stack only). */ \ 29 /* Return structs bigger than 8 by reference (on stack only). */ \
27 cc->retref = (sz > 8); \ 30 cc->retref = (sz > 8); \
28 if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; 31 if (cc->retref) CCALL_PUSH(dp);
29 32
30#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET 33#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
31 34
@@ -40,7 +43,7 @@
40 if (ngpr < maxgpr) \ 43 if (ngpr < maxgpr) \
41 cc->gpr[ngpr++] = (GPRArg)dp; \ 44 cc->gpr[ngpr++] = (GPRArg)dp; \
42 else \ 45 else \
43 cc->stack[nsp++] = (GPRArg)dp; \ 46 CCALL_PUSH(dp); \
44 } else { /* Struct with single FP field ends up in FPR. */ \ 47 } else { /* Struct with single FP field ends up in FPR. */ \
45 cc->resx87 = ccall_classify_struct(cts, ctr); \ 48 cc->resx87 = ccall_classify_struct(cts, ctr); \
46 } 49 }
@@ -56,7 +59,7 @@
56 if (ngpr < maxgpr) \ 59 if (ngpr < maxgpr) \
57 cc->gpr[ngpr++] = (GPRArg)dp; \ 60 cc->gpr[ngpr++] = (GPRArg)dp; \
58 else \ 61 else \
59 cc->stack[nsp++] = (GPRArg)dp; 62 CCALL_PUSH(dp);
60 63
61#endif 64#endif
62 65
@@ -67,7 +70,7 @@
67 if (ngpr < maxgpr) \ 70 if (ngpr < maxgpr) \
68 cc->gpr[ngpr++] = (GPRArg)dp; \ 71 cc->gpr[ngpr++] = (GPRArg)dp; \
69 else \ 72 else \
70 cc->stack[nsp++] = (GPRArg)dp; \ 73 CCALL_PUSH(dp); \
71 } 74 }
72 75
73#endif 76#endif
@@ -278,8 +281,8 @@
278 if (ngpr < maxgpr) { \ 281 if (ngpr < maxgpr) { \
279 dp = &cc->gpr[ngpr]; \ 282 dp = &cc->gpr[ngpr]; \
280 if (ngpr + n > maxgpr) { \ 283 if (ngpr + n > maxgpr) { \
281 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 284 nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
282 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 285 if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
283 ngpr = maxgpr; \ 286 ngpr = maxgpr; \
284 } else { \ 287 } else { \
285 ngpr += n; \ 288 ngpr += n; \
@@ -471,8 +474,8 @@
471 if (ngpr < maxgpr) { \ 474 if (ngpr < maxgpr) { \
472 dp = &cc->gpr[ngpr]; \ 475 dp = &cc->gpr[ngpr]; \
473 if (ngpr + n > maxgpr) { \ 476 if (ngpr + n > maxgpr) { \
474 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 477 nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
475 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 478 if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
476 ngpr = maxgpr; \ 479 ngpr = maxgpr; \
477 } else { \ 480 } else { \
478 ngpr += n; \ 481 ngpr += n; \
@@ -565,8 +568,8 @@
565 if (ngpr < maxgpr) { \ 568 if (ngpr < maxgpr) { \
566 dp = &cc->gpr[ngpr]; \ 569 dp = &cc->gpr[ngpr]; \
567 if (ngpr + n > maxgpr) { \ 570 if (ngpr + n > maxgpr) { \
568 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 571 nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
569 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 572 if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
570 ngpr = maxgpr; \ 573 ngpr = maxgpr; \
571 } else { \ 574 } else { \
572 ngpr += n; \ 575 ngpr += n; \
@@ -698,10 +701,11 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
698 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); 701 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
699 if (ccall_struct_reg(cc, cts, dp, rcl)) { 702 if (ccall_struct_reg(cc, cts, dp, rcl)) {
700 /* Register overflow? Pass on stack. */ 703 /* Register overflow? Pass on stack. */
701 MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; 704 MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
702 if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ 705 if (nsp + sz > CCALL_SIZE_STACK)
703 cc->nsp = nsp + n; 706 return 1; /* Too many arguments. */
704 memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); 707 cc->nsp = nsp + sz;
708 memcpy((uint8_t *)cc->stack + nsp, dp, sz);
705 } 709 }
706 return 0; /* Ok. */ 710 return 0; /* Ok. */
707} 711}
@@ -1022,22 +1026,23 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
1022 } else { 1026 } else {
1023 sz = CTSIZE_PTR; 1027 sz = CTSIZE_PTR;
1024 } 1028 }
1025 sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); 1029 n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
1026 n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
1027 1030
1028 CCALL_HANDLE_REGARG /* Handle register arguments. */ 1031 CCALL_HANDLE_REGARG /* Handle register arguments. */
1029 1032
1030 /* Otherwise pass argument on stack. */ 1033 /* Otherwise pass argument on stack. */
1031 if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { 1034 if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
1032 MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; 1035 MSize align = (1u << ctype_align(d->info)) - 1;
1033 nsp = (nsp + align) & ~align; /* Align argument on stack. */ 1036 if (rp)
1037 align = CTSIZE_PTR-1;
1038 nsp = (nsp + align) & ~align;
1034 } 1039 }
1035 if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ 1040 dp = ((uint8_t *)cc->stack) + nsp;
1041 nsp += n * CTSIZE_PTR;
1042 if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
1036 err_nyi: 1043 err_nyi:
1037 lj_err_caller(L, LJ_ERR_FFI_NYICALL); 1044 lj_err_caller(L, LJ_ERR_FFI_NYICALL);
1038 } 1045 }
1039 dp = &cc->stack[nsp];
1040 nsp += n;
1041 isva = 0; 1046 isva = 0;
1042 1047
1043 done: 1048 done:
@@ -1099,10 +1104,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
1099#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) 1104#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
1100 cc->nfpr = nfpr; /* Required for vararg functions. */ 1105 cc->nfpr = nfpr; /* Required for vararg functions. */
1101#endif 1106#endif
1102 cc->nsp = nsp; 1107 cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
1103 cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; 1108 cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
1104 if (nsp > CCALL_SPS_FREE) 1109 if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
1105 cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); 1110 cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
1106 return gcsteps; 1111 return gcsteps;
1107} 1112}
1108 1113
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 547415f7..57300817 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -152,14 +152,15 @@ typedef union FPRArg {
152LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); 152LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
153LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); 153LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
154 154
155#define CCALL_MAXSTACK 32 155#define CCALL_NUM_STACK 31
156#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
156 157
157/* -- C call state -------------------------------------------------------- */ 158/* -- C call state -------------------------------------------------------- */
158 159
159typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { 160typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
160 void (*func)(void); /* Pointer to called function. */ 161 void (*func)(void); /* Pointer to called function. */
161 uint32_t spadj; /* Stack pointer adjustment. */ 162 uint32_t spadj; /* Stack pointer adjustment. */
162 uint8_t nsp; /* Number of stack slots. */ 163 uint8_t nsp; /* Number of bytes on stack. */
163 uint8_t retref; /* Return value by reference. */ 164 uint8_t retref; /* Return value by reference. */
164#if LJ_TARGET_X64 165#if LJ_TARGET_X64
165 uint8_t ngpr; /* Number of arguments in GPRs. */ 166 uint8_t ngpr; /* Number of arguments in GPRs. */
@@ -178,7 +179,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
178 FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ 179 FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
179#endif 180#endif
180 GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ 181 GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
181 GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ 182 GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
182} CCallState; 183} CCallState;
183 184
184/* -- C call handling ----------------------------------------------------- */ 185/* -- C call handling ----------------------------------------------------- */
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 4f0798e0..0d1ea95f 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2571,16 +2571,16 @@ static void build_subroutines(BuildCtx *ctx)
2571 |.endif 2571 |.endif
2572 | mov r11, sp 2572 | mov r11, sp
2573 | sub sp, sp, CARG1 // Readjust stack. 2573 | sub sp, sp, CARG1 // Readjust stack.
2574 | subs CARG2, CARG2, #1 2574 | subs CARG2, CARG2, #4
2575 |.if HFABI 2575 |.if HFABI
2576 | vldm RB, {d0-d7} 2576 | vldm RB, {d0-d7}
2577 |.endif 2577 |.endif
2578 | ldr RB, CCSTATE->func 2578 | ldr RB, CCSTATE->func
2579 | bmi >2 2579 | bmi >2
2580 |1: // Copy stack slots. 2580 |1: // Copy stack slots.
2581 | ldr CARG4, [CARG3, CARG2, lsl #2] 2581 | ldr CARG4, [CARG3, CARG2]
2582 | str CARG4, [sp, CARG2, lsl #2] 2582 | str CARG4, [sp, CARG2]
2583 | subs CARG2, CARG2, #1 2583 | subs CARG2, CARG2, #4
2584 | bpl <1 2584 | bpl <1
2585 |2: 2585 |2:
2586 | ldrd CARG12, CCSTATE->gpr[0] 2586 | ldrd CARG12, CCSTATE->gpr[0]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index a7a9392c..698b4210 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -2222,14 +2222,14 @@ static void build_subroutines(BuildCtx *ctx)
2222 | ldr TMP0w, CCSTATE:x0->spadj 2222 | ldr TMP0w, CCSTATE:x0->spadj
2223 | ldrb TMP1w, CCSTATE->nsp 2223 | ldrb TMP1w, CCSTATE->nsp
2224 | add TMP2, CCSTATE, #offsetof(CCallState, stack) 2224 | add TMP2, CCSTATE, #offsetof(CCallState, stack)
2225 | subs TMP1, TMP1, #1 2225 | subs TMP1, TMP1, #8
2226 | ldr TMP3, CCSTATE->func 2226 | ldr TMP3, CCSTATE->func
2227 | sub sp, sp, TMP0 2227 | sub sp, sp, TMP0
2228 | bmi >2 2228 | bmi >2
2229 |1: // Copy stack slots 2229 |1: // Copy stack slots
2230 | ldr TMP0, [TMP2, TMP1, lsl #3] 2230 | ldr TMP0, [TMP2, TMP1]
2231 | str TMP0, [sp, TMP1, lsl #3] 2231 | str TMP0, [sp, TMP1]
2232 | subs TMP1, TMP1, #1 2232 | subs TMP1, TMP1, #8
2233 | bpl <1 2233 | bpl <1
2234 |2: 2234 |2:
2235 | ldp x0, x1, CCSTATE->gpr[0] 2235 | ldp x0, x1, CCSTATE->gpr[0]
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 94a878b9..f276745c 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -2951,7 +2951,6 @@ static void build_subroutines(BuildCtx *ctx)
2951 | move TMP2, sp 2951 | move TMP2, sp
2952 | subu sp, sp, TMP1 2952 | subu sp, sp, TMP1
2953 | sw ra, -4(TMP2) 2953 | sw ra, -4(TMP2)
2954 | sll CARG2, CARG2, 2
2955 | sw r16, -8(TMP2) 2954 | sw r16, -8(TMP2)
2956 | sw CCSTATE, -12(TMP2) 2955 | sw CCSTATE, -12(TMP2)
2957 | move r16, TMP2 2956 | move r16, TMP2
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index f8e181ee..6c215f2b 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -3065,7 +3065,6 @@ static void build_subroutines(BuildCtx *ctx)
3065 | move TMP2, sp 3065 | move TMP2, sp
3066 | dsubu sp, sp, TMP1 3066 | dsubu sp, sp, TMP1
3067 | sd ra, -8(TMP2) 3067 | sd ra, -8(TMP2)
3068 | sll CARG2, CARG2, 3
3069 | sd r16, -16(TMP2) 3068 | sd r16, -16(TMP2)
3070 | sd CCSTATE, -24(TMP2) 3069 | sd CCSTATE, -24(TMP2)
3071 | move r16, TMP2 3070 | move r16, TMP2
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 73a70a00..f2e5a08f 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -3269,14 +3269,13 @@ static void build_subroutines(BuildCtx *ctx)
3269 | stw TMP0, 4(sp) 3269 | stw TMP0, 4(sp)
3270 | cmpwi cr1, CARG3, 0 3270 | cmpwi cr1, CARG3, 0
3271 | mr TMP2, sp 3271 | mr TMP2, sp
3272 | addic. CARG2, CARG2, -1 3272 | addic. CARG2, CARG2, -4
3273 | stwux sp, sp, TMP1 3273 | stwux sp, sp, TMP1
3274 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. 3274 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
3275 | stw r14, -4(TMP2) 3275 | stw r14, -4(TMP2)
3276 | stw CCSTATE, -8(TMP2) 3276 | stw CCSTATE, -8(TMP2)
3277 | mr r14, TMP2 3277 | mr r14, TMP2
3278 | la TMP1, CCSTATE->stack 3278 | la TMP1, CCSTATE->stack
3279 | slwi CARG2, CARG2, 2
3280 | blty >2 3279 | blty >2
3281 | la TMP2, 8(sp) 3280 | la TMP2, 8(sp)
3282 |1: 3281 |1:
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index a8649b4e..3635ba28 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2755,12 +2755,12 @@ static void build_subroutines(BuildCtx *ctx)
2755 | 2755 |
2756 | // Copy stack slots. 2756 | // Copy stack slots.
2757 | movzx ecx, byte CCSTATE->nsp 2757 | movzx ecx, byte CCSTATE->nsp
2758 | sub ecx, 1 2758 | sub ecx, 8
2759 | js >2 2759 | js >2
2760 |1: 2760 |1:
2761 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 2761 | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
2762 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 2762 | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
2763 | sub ecx, 1 2763 | sub ecx, 8
2764 | jns <1 2764 | jns <1
2765 |2: 2765 |2:
2766 | 2766 |
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index bda9d7d7..c44a24ff 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -3314,19 +3314,25 @@ static void build_subroutines(BuildCtx *ctx)
3314 | 3314 |
3315 | // Copy stack slots. 3315 | // Copy stack slots.
3316 | movzx ecx, byte CCSTATE->nsp 3316 | movzx ecx, byte CCSTATE->nsp
3317 | sub ecx, 1 3317 |.if X64
3318 | sub ecx, 8
3318 | js >2 3319 | js >2
3319 |1: 3320 |1:
3320 |.if X64 3321 | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
3321 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 3322 | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
3322 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 3323 | sub ecx, 8
3324 | jns <1
3325 |2:
3323 |.else 3326 |.else
3324 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] 3327 | sub ecx, 4
3325 | mov [esp+ecx*4], eax 3328 | js >2
3326 |.endif 3329 |1:
3327 | sub ecx, 1 3330 | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
3331 | mov [esp+ecx], eax
3332 | sub ecx, 4
3328 | jns <1 3333 | jns <1
3329 |2: 3334 |2:
3335 |.endif
3330 | 3336 |
3331 |.if X64 3337 |.if X64
3332 | movzx eax, byte CCSTATE->nfpr 3338 | movzx eax, byte CCSTATE->nfpr