aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lj_ccall.c116
-rw-r--r--src/lj_ccall.h15
-rw-r--r--src/lj_ccallback.c42
-rw-r--r--src/lj_ctype.h2
-rw-r--r--src/vm_arm.dasc37
5 files changed, 193 insertions, 19 deletions
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index c3eb25f6..71331f39 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -168,6 +168,8 @@
168#elif LJ_TARGET_ARM 168#elif LJ_TARGET_ARM
169/* -- ARM calling conventions --------------------------------------------- */ 169/* -- ARM calling conventions --------------------------------------------- */
170 170
171#if LJ_ABI_SOFTFP
172
171#define CCALL_HANDLE_STRUCTRET \ 173#define CCALL_HANDLE_STRUCTRET \
172 /* Return structs of size <= 4 in a GPR. */ \ 174 /* Return structs of size <= 4 in a GPR. */ \
173 cc->retref = !(sz <= 4); \ 175 cc->retref = !(sz <= 4); \
@@ -186,13 +188,70 @@
186#define CCALL_HANDLE_COMPLEXARG \ 188#define CCALL_HANDLE_COMPLEXARG \
187 /* Pass complex by value in 2 or 4 GPRs. */ 189 /* Pass complex by value in 2 or 4 GPRs. */
188 190
189/* ARM has a softfp ABI. */ 191#define CCALL_HANDLE_REGARG_FP1
192#define CCALL_HANDLE_REGARG_FP2
193
194#else
195
196#define CCALL_HANDLE_STRUCTRET \
197 cc->retref = !ccall_classify_struct(cts, ctr, ct); \
198 if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
199
200#define CCALL_HANDLE_STRUCTRET2 \
201 if (ccall_classify_struct(cts, ctr, ct) > 1) sp = (uint8_t *)&cc->fpr[0]; \
202 memcpy(dp, sp, ctr->size);
203
204#define CCALL_HANDLE_COMPLEXRET \
205 if (!(ct->info & CTF_VARARG)) cc->retref = 0; /* Return complex in FPRs. */
206
207#define CCALL_HANDLE_COMPLEXRET2 \
208 if (!(ct->info & CTF_VARARG)) memcpy(dp, &cc->fpr[0], ctr->size);
209
210#define CCALL_HANDLE_STRUCTARG \
211 isfp = (ccall_classify_struct(cts, d, ct) > 1);
212 /* Pass all structs by value in registers and/or on the stack. */
213
214#define CCALL_HANDLE_COMPLEXARG \
215 isfp = 1; /* Pass complex by value in FPRs or on stack. */
216
217#define CCALL_HANDLE_REGARG_FP1 \
218 if (isfp && !(ct->info & CTF_VARARG)) { \
219 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \
220 if (nfpr + (n >> 1) <= CCALL_NARG_FPR) { \
221 dp = &cc->fpr[nfpr]; \
222 nfpr += (n >> 1); \
223 goto done; \
224 } \
225 } else { \
226 if (sz > 1 && fprodd != nfpr) fprodd = 0; \
227 if (fprodd) { \
228 if (2*nfpr+n <= 2*CCALL_NARG_FPR+1) { \
229 dp = (void *)&cc->fpr[fprodd-1].f[1]; \
230 nfpr += (n >> 1); \
231 if ((n & 1)) fprodd = 0; else fprodd = nfpr-1; \
232 goto done; \
233 } \
234 } else { \
235 if (2*nfpr+n <= 2*CCALL_NARG_FPR) { \
236 dp = (void *)&cc->fpr[nfpr]; \
237 nfpr += (n >> 1); \
238 if ((n & 1)) fprodd = ++nfpr; else fprodd = 0; \
239 goto done; \
240 } \
241 } \
242 } \
243 fprodd = 0; /* No reordering after the first FP value is on stack. */ \
244 } else {
245
246#define CCALL_HANDLE_REGARG_FP2 }
247
248#endif
249
190#define CCALL_HANDLE_REGARG \ 250#define CCALL_HANDLE_REGARG \
251 CCALL_HANDLE_REGARG_FP1 \
191 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ 252 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \
192 if (ngpr < maxgpr) \ 253 if (ngpr < maxgpr) \
193 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 254 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
194 else \
195 nsp = (nsp + 1u) & ~1u; /* Align argument on stack. */ \
196 } \ 255 } \
197 if (ngpr < maxgpr) { \ 256 if (ngpr < maxgpr) { \
198 dp = &cc->gpr[ngpr]; \ 257 dp = &cc->gpr[ngpr]; \
@@ -204,7 +263,10 @@
204 ngpr += n; \ 263 ngpr += n; \
205 } \ 264 } \
206 goto done; \ 265 goto done; \
207 } 266 } CCALL_HANDLE_REGARG_FP2
267
268#define CCALL_HANDLE_RET \
269 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
208 270
209#elif LJ_TARGET_PPC 271#elif LJ_TARGET_PPC
210/* -- PPC calling conventions --------------------------------------------- */ 272/* -- PPC calling conventions --------------------------------------------- */
@@ -453,6 +515,49 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz)
453} 515}
454#endif 516#endif
455 517
518/* -- ARM hard-float ABI struct classification ---------------------------- */
519
520#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
521
522/* Classify a struct based on its fields. */
523static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
524{
525 CTSize sz = ct->size;
526 unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
527 if ((ctf->info & CTF_VARARG)) goto noth;
528 while (ct->sib) {
529 ct = ctype_get(cts, ct->sib);
530 if (ctype_isfield(ct->info)) {
531 CType *sct = ctype_rawchild(cts, ct);
532 if (ctype_isfp(sct->info)) {
533 r |= sct->size;
534 if (!isu) n++; else if (n == 0) n = 1;
535 } else if (ctype_iscomplex(sct->info)) {
536 r |= (sct->size >> 1);
537 if (!isu) n += 2; else if (n < 2) n = 2;
538 } else {
539 goto noth;
540 }
541 } else if (ctype_isbitfield(ct->info)) {
542 goto noth;
543 } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
544 CType *sct = ctype_child(cts, ct);
545 if (sct->size > 0) {
546 unsigned int s = ccall_classify_struct(cts, sct, ctf);
547 if (s <= 1) goto noth;
548 r |= (s & 255);
549 if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
550 }
551 }
552 }
553 if ((r == 4 || r == 8) && n <= 4)
554 return r + (n << 8);
555noth: /* Not a homogeneous float/double aggregate. */
556 return (sz <= 4); /* Return structs of size <= 4 in a GPR. */
557}
558
559#endif
560
456/* -- Common C call handling ---------------------------------------------- */ 561/* -- Common C call handling ---------------------------------------------- */
457 562
458/* Infer the destination CTypeID for a vararg argument. */ 563/* Infer the destination CTypeID for a vararg argument. */
@@ -494,6 +599,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
494 MSize maxgpr, ngpr = 0, nsp = 0, narg; 599 MSize maxgpr, ngpr = 0, nsp = 0, narg;
495#if CCALL_NARG_FPR 600#if CCALL_NARG_FPR
496 MSize nfpr = 0; 601 MSize nfpr = 0;
602#if LJ_TARGET_ARM
603 MSize fprodd = 0;
604#endif
497#endif 605#endif
498 606
499 /* Clear unused regs to get some determinism in case of misdeclaration. */ 607 /* Clear unused regs to get some determinism in case of misdeclaration. */
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 5985c4a9..62f963e1 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -51,12 +51,21 @@ typedef intptr_t GPRArg;
51#elif LJ_TARGET_ARM 51#elif LJ_TARGET_ARM
52 52
53#define CCALL_NARG_GPR 4 53#define CCALL_NARG_GPR 4
54#define CCALL_NARG_FPR 0
55#define CCALL_NRET_GPR 2 /* For softfp double. */ 54#define CCALL_NRET_GPR 2 /* For softfp double. */
55#if LJ_ABI_SOFTFP
56#define CCALL_NARG_FPR 0
56#define CCALL_NRET_FPR 0 57#define CCALL_NRET_FPR 0
58#else
59#define CCALL_NARG_FPR 8
60#define CCALL_NRET_FPR 4
61#endif
57#define CCALL_SPS_FREE 0 62#define CCALL_SPS_FREE 0
58 63
59typedef intptr_t GPRArg; 64typedef intptr_t GPRArg;
65typedef union FPRArg {
66 double d;
67 float f[2];
68} FPRArg;
60 69
61#elif LJ_TARGET_PPC 70#elif LJ_TARGET_PPC
62 71
@@ -122,7 +131,7 @@ LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
122 131
123/* -- C call state -------------------------------------------------------- */ 132/* -- C call state -------------------------------------------------------- */
124 133
125typedef struct CCallState { 134typedef LJ_ALIGN(8) struct CCallState {
126 void (*func)(void); /* Pointer to called function. */ 135 void (*func)(void); /* Pointer to called function. */
127 uint32_t spadj; /* Stack pointer adjustment. */ 136 uint32_t spadj; /* Stack pointer adjustment. */
128 uint8_t nsp; /* Number of stack slots. */ 137 uint8_t nsp; /* Number of stack slots. */
@@ -135,10 +144,10 @@ typedef struct CCallState {
135#elif LJ_TARGET_PPC 144#elif LJ_TARGET_PPC
136 uint8_t nfpr; /* Number of arguments in FPRs. */ 145 uint8_t nfpr; /* Number of arguments in FPRs. */
137#endif 146#endif
138#if CCALL_NUM_FPR
139#if LJ_32 147#if LJ_32
140 int32_t align1; 148 int32_t align1;
141#endif 149#endif
150#if CCALL_NUM_FPR
142 FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ 151 FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
143#endif 152#endif
144 GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ 153 GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index a9567bc5..430643ee 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -310,22 +310,53 @@ void lj_ccallback_mcode_free(CTState *cts)
310 310
311#elif LJ_TARGET_ARM 311#elif LJ_TARGET_ARM
312 312
313#if LJ_ABI_SOFTFP
314
315#define CALLBACK_HANDLE_REGARG_FP1 UNUSED(isfp);
316#define CALLBACK_HANDLE_REGARG_FP2
317
318#else
319
320#define CALLBACK_HANDLE_REGARG_FP1 \
321 if (isfp) { \
322 if (n == 1) { \
323 if (fprodd) { \
324 sp = &cts->cb.fpr[fprodd-1]; \
325 fprodd = 0; \
326 goto done; \
327 } else if (nfpr + 1 <= CCALL_NARG_FPR) { \
328 sp = &cts->cb.fpr[nfpr++]; \
329 fprodd = nfpr; \
330 goto done; \
331 } \
332 } else { \
333 if (nfpr + 1 <= CCALL_NARG_FPR) { \
334 sp = &cts->cb.fpr[nfpr++]; \
335 goto done; \
336 } \
337 } \
338 fprodd = 0; /* No reordering after the first FP value is on stack. */ \
339 } else {
340
341#define CALLBACK_HANDLE_REGARG_FP2 }
342
343#endif
344
313#define CALLBACK_HANDLE_REGARG \ 345#define CALLBACK_HANDLE_REGARG \
314 UNUSED(isfp); \ 346 CALLBACK_HANDLE_REGARG_FP1 \
315 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 347 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
316 if (ngpr + n <= maxgpr) { \ 348 if (ngpr + n <= maxgpr) { \
317 sp = &cts->cb.gpr[ngpr]; \ 349 sp = &cts->cb.gpr[ngpr]; \
318 ngpr += n; \ 350 ngpr += n; \
319 goto done; \ 351 goto done; \
320 } 352 } CALLBACK_HANDLE_REGARG_FP2
321 353
322#elif LJ_TARGET_PPC 354#elif LJ_TARGET_PPC
323 355
324#define CALLBACK_HANDLE_REGARG \ 356#define CALLBACK_HANDLE_REGARG \
325 if (isfp) { \ 357 if (isfp) { \
326 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 358 if (nfpr + 1 <= CCALL_NARG_FPR) { \
327 sp = &cts->cb.fpr[nfpr]; \ 359 sp = &cts->cb.fpr[nfpr++]; \
328 nfpr += 1; \
329 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ 360 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
330 goto done; \ 361 goto done; \
331 } \ 362 } \
@@ -382,6 +413,9 @@ static void callback_conv_args(CTState *cts, lua_State *L)
382 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; 413 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
383#if CCALL_NARG_FPR 414#if CCALL_NARG_FPR
384 MSize nfpr = 0; 415 MSize nfpr = 0;
416#if LJ_TARGET_ARM
417 MSize fprodd = 0;
418#endif
385#endif 419#endif
386 420
387 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { 421 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 7953654f..7c3b667c 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -155,7 +155,7 @@ typedef struct CType {
155#define CCALL_MAX_GPR 8 155#define CCALL_MAX_GPR 8
156#define CCALL_MAX_FPR 8 156#define CCALL_MAX_FPR 8
157 157
158typedef LJ_ALIGN(8) union FPRCBArg { double d; float f; } FPRCBArg; 158typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;
159 159
160/* C callback state. Defined here, to avoid dragging in lj_ccall.h. */ 160/* C callback state. Defined here, to avoid dragging in lj_ccall.h. */
161 161
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 26f97aa3..355a53e6 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2414,8 +2414,19 @@ static void build_subroutines(BuildCtx *ctx)
2414 |.type CTSTATE, CTState, PC 2414 |.type CTSTATE, CTState, PC
2415 | ldr CTSTATE, GL:r12->ctype_state 2415 | ldr CTSTATE, GL:r12->ctype_state
2416 | add DISPATCH, r12, #GG_G2DISP 2416 | add DISPATCH, r12, #GG_G2DISP
2417 | strd CARG12, CTSTATE->cb.gpr[0] 2417 |.if FPU
2418 | str r4, SAVE_R4
2419 | add r4, sp, CFRAME_SPACE+4+8*8
2420 | vstmdb r4!, {d8-d15}
2421 |.endif
2422 |.if HFABI
2423 | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8])
2424 |.endif
2418 | strd CARG34, CTSTATE->cb.gpr[2] 2425 | strd CARG34, CTSTATE->cb.gpr[2]
2426 | strd CARG12, CTSTATE->cb.gpr[0]
2427 |.if HFABI
2428 | vstmdb r12!, {d0-d7}
2429 |.endif
2419 | ldr CARG4, [sp] 2430 | ldr CARG4, [sp]
2420 | add CARG3, sp, #CFRAME_SIZE 2431 | add CARG3, sp, #CFRAME_SIZE
2421 | mov CARG1, CTSTATE 2432 | mov CARG1, CTSTATE
@@ -2448,6 +2459,9 @@ static void build_subroutines(BuildCtx *ctx)
2448 | mov CARG2, RA 2459 | mov CARG2, RA
2449 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 2460 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2450 | ldrd CARG12, CTSTATE->cb.gpr[0] 2461 | ldrd CARG12, CTSTATE->cb.gpr[0]
2462 |.if HFABI
2463 | vldr d0, CTSTATE->cb.fpr[0]
2464 |.endif
2451 | b ->vm_leave_unw 2465 | b ->vm_leave_unw
2452 |.endif 2466 |.endif
2453 | 2467 |
@@ -2460,9 +2474,15 @@ static void build_subroutines(BuildCtx *ctx)
2460 | ldr CARG1, CCSTATE:CARG1->spadj 2474 | ldr CARG1, CCSTATE:CARG1->spadj
2461 | ldrb CARG2, CCSTATE->nsp 2475 | ldrb CARG2, CCSTATE->nsp
2462 | add CARG3, CCSTATE, #offsetof(CCallState, stack) 2476 | add CARG3, CCSTATE, #offsetof(CCallState, stack)
2477 |.if HFABI
2478 | add RB, CCSTATE, #offsetof(CCallState, fpr[0])
2479 |.endif
2463 | mov r11, sp 2480 | mov r11, sp
2464 | sub sp, sp, CARG1 // Readjust stack. 2481 | sub sp, sp, CARG1 // Readjust stack.
2465 | subs CARG2, CARG2, #1 2482 | subs CARG2, CARG2, #1
2483 |.if HFABI
2484 | vldm RB, {d0-d7}
2485 |.endif
2466 | ldr RB, CCSTATE->func 2486 | ldr RB, CCSTATE->func
2467 | bmi >2 2487 | bmi >2
2468 |1: // Copy stack slots. 2488 |1: // Copy stack slots.
@@ -2471,14 +2491,17 @@ static void build_subroutines(BuildCtx *ctx)
2471 | subs CARG2, CARG2, #1 2491 | subs CARG2, CARG2, #1
2472 | bpl <1 2492 | bpl <1
2473 |2: 2493 |2:
2474 | ldr CARG1, CCSTATE->gpr[0] 2494 | ldrd CARG12, CCSTATE->gpr[0]
2475 | ldr CARG2, CCSTATE->gpr[1] 2495 | ldrd CARG34, CCSTATE->gpr[2]
2476 | ldr CARG3, CCSTATE->gpr[2]
2477 | ldr CARG4, CCSTATE->gpr[3]
2478 | blx RB 2496 | blx RB
2479 | mov sp, r11 2497 | mov sp, r11
2480 | str CRET1, CCSTATE->gpr[0] 2498 |.if HFABI
2481 | str CRET2, CCSTATE->gpr[1] 2499 | add r12, CCSTATE, #offsetof(CCallState, fpr[4])
2500 |.endif
2501 | strd CRET1, CCSTATE->gpr[0]
2502 |.if HFABI
2503 | vstmdb r12!, {d0-d3}
2504 |.endif
2482 | pop {CCSTATE, r5, r11, pc} 2505 | pop {CCSTATE, r5, r11, pc}
2483 |.endif 2506 |.endif
2484 |// Note: vm_ffi_call must be the last function in this object file! 2507 |// Note: vm_ffi_call must be the last function in this object file!