aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2015-01-07 21:06:40 +0100
committerMike Pall <mike>2015-01-07 21:06:40 +0100
commit33f0c24f06d38ea618429c2ea2f7a849e8d7439c (patch)
treed1893a1e30a9a2a8b8972345da3cb89f1b51db9a /src
parentce1a5ee535aea909f297a56bce8ff113e1763403 (diff)
downloadluajit-33f0c24f06d38ea618429c2ea2f7a849e8d7439c.tar.gz
luajit-33f0c24f06d38ea618429c2ea2f7a849e8d7439c.tar.bz2
luajit-33f0c24f06d38ea618429c2ea2f7a849e8d7439c.zip
ARM64: Add FFI support.
Diffstat (limited to 'src')
-rw-r--r--src/lj_arch.h1
-rw-r--r--src/lj_ccall.c121
-rw-r--r--src/lj_ccall.h17
-rw-r--r--src/lj_ccallback.c64
-rw-r--r--src/lj_target.h2
-rw-r--r--src/lj_target_arm64.h97
-rw-r--r--src/vm_arm64.dasc130
7 files changed, 412 insertions, 20 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h
index e919c1a4..2b8fa7fe 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -202,7 +202,6 @@
202#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 202#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
203#define LJ_TARGET_GC64 1 203#define LJ_TARGET_GC64 1
204#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL 204#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
205#define LJ_ARCH_NOFFI 1 /* NYI */
206#define LJ_ARCH_NOJIT 1 /* NYI */ 205#define LJ_ARCH_NOJIT 1 /* NYI */
207 206
208#define LJ_ARCH_VERSION 80 207#define LJ_ARCH_VERSION 80
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 4885820c..5ab5b60d 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -290,6 +290,75 @@
290#define CCALL_HANDLE_RET \ 290#define CCALL_HANDLE_RET \
291 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; 291 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
292 292
293#elif LJ_TARGET_ARM64
294/* -- ARM64 calling conventions ------------------------------------------- */
295
296#define CCALL_HANDLE_STRUCTRET \
297 cc->retref = !ccall_classify_struct(cts, ctr); \
298 if (cc->retref) cc->retp = dp;
299
300#define CCALL_HANDLE_STRUCTRET2 \
301 unsigned int cl = ccall_classify_struct(cts, ctr); \
302 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
303 CTSize i = (cl >> 8) - 1; \
304 do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \
305 } else { \
306 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
307 memcpy(dp, sp, ctr->size); \
308 }
309
310#define CCALL_HANDLE_COMPLEXRET \
311 /* Complex values are returned in one or two FPRs. */ \
312 cc->retref = 0;
313
314#define CCALL_HANDLE_COMPLEXRET2 \
315 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
316 ((float *)dp)[0] = cc->fpr[0].f; \
317 ((float *)dp)[1] = cc->fpr[1].f; \
318 } else { /* Copy complex double from FPRs. */ \
319 ((double *)dp)[0] = cc->fpr[0].d; \
320 ((double *)dp)[1] = cc->fpr[1].d; \
321 }
322
323#define CCALL_HANDLE_STRUCTARG \
324 unsigned int cl = ccall_classify_struct(cts, d); \
325 if (cl == 0) { /* Pass struct by reference. */ \
326 rp = cdataptr(lj_cdata_new(cts, did, sz)); \
327 sz = CTSIZE_PTR; \
328 } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
329 isfp = (cl & 4) ? 2 : 1; \
330 } /* else: Pass struct in GPRs or on stack. */
331
332#define CCALL_HANDLE_COMPLEXARG \
333 /* Pass complex by value in separate (!) FPRs or on stack. */ \
334 isfp = ctr->size == 2*sizeof(float) ? 2 : 1;
335
336#define CCALL_HANDLE_REGARG \
337 if (LJ_TARGET_IOS && isva) { \
338 /* IOS: All variadic arguments are on the stack. */ \
339 } else if (isfp) { /* Try to pass argument in FPRs. */ \
340 int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
341 if (nfpr + n2 <= CCALL_NARG_FPR) { \
342 dp = &cc->fpr[nfpr]; \
343 nfpr += n2; \
344 goto done; \
345 } else { \
346 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
347 if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
348 } \
349 } else { /* Try to pass argument in GPRs. */ \
350 if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
351 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
352 if (ngpr + n <= maxgpr) { \
353 dp = &cc->gpr[ngpr]; \
354 ngpr += n; \
355 goto done; \
356 } else { \
357 ngpr = maxgpr; /* Prevent reordering. */ \
358 if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
359 } \
360 }
361
293#elif LJ_TARGET_PPC 362#elif LJ_TARGET_PPC
294/* -- PPC calling conventions --------------------------------------------- */ 363/* -- PPC calling conventions --------------------------------------------- */
295 364
@@ -584,6 +653,52 @@ noth: /* Not a homogeneous float/double aggregate. */
584 653
585#endif 654#endif
586 655
656/* -- ARM64 ABI struct classification ------------------------------------- */
657
658#if LJ_TARGET_ARM64
659
660/* Classify a struct based on its fields. */
661static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
662{
663 CTSize sz = ct->size;
664 unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
665 while (ct->sib) {
666 CType *sct;
667 ct = ctype_get(cts, ct->sib);
668 if (ctype_isfield(ct->info)) {
669 sct = ctype_rawchild(cts, ct);
670 if (ctype_isfp(sct->info)) {
671 r |= sct->size;
672 if (!isu) n++; else if (n == 0) n = 1;
673 } else if (ctype_iscomplex(sct->info)) {
674 r |= (sct->size >> 1);
675 if (!isu) n += 2; else if (n < 2) n = 2;
676 } else if (ctype_isstruct(sct->info)) {
677 goto substruct;
678 } else {
679 goto noth;
680 }
681 } else if (ctype_isbitfield(ct->info)) {
682 goto noth;
683 } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
684 sct = ctype_rawchild(cts, ct);
685 substruct:
686 if (sct->size > 0) {
687 unsigned int s = ccall_classify_struct(cts, sct);
688 if (s <= 1) goto noth;
689 r |= (s & 255);
690 if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
691 }
692 }
693 }
694 if ((r == 4 || r == 8) && n <= 4)
695 return r + (n << 8);
696noth: /* Not a homogeneous float/double aggregate. */
697 return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
698}
699
700#endif
701
587/* -- Common C call handling ---------------------------------------------- */ 702/* -- Common C call handling ---------------------------------------------- */
588 703
589/* Infer the destination CTypeID for a vararg argument. */ 704/* Infer the destination CTypeID for a vararg argument. */
@@ -766,6 +881,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
766 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ 881 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
767 cc->fpr[nfpr-2].d[1] = 0; 882 cc->fpr[nfpr-2].d[1] = 0;
768 } 883 }
884#elif LJ_TARGET_ARM64
885 if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
886 /* Split float HFA or complex float into separate registers. */
887 CTSize i = (sz >> 2) - 1;
888 do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
889 }
769#else 890#else
770 UNUSED(isfp); 891 UNUSED(isfp);
771#endif 892#endif
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 21af04ef..91983fee 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -68,6 +68,21 @@ typedef union FPRArg {
68 float f[2]; 68 float f[2];
69} FPRArg; 69} FPRArg;
70 70
71#elif LJ_TARGET_ARM64
72
73#define CCALL_NARG_GPR 8
74#define CCALL_NRET_GPR 2
75#define CCALL_NARG_FPR 8
76#define CCALL_NRET_FPR 4
77#define CCALL_SPS_FREE 0
78
79typedef intptr_t GPRArg;
80typedef union FPRArg {
81 double d;
82 float f;
83 uint32_t u32;
84} FPRArg;
85
71#elif LJ_TARGET_PPC 86#elif LJ_TARGET_PPC
72 87
73#define CCALL_NARG_GPR 8 88#define CCALL_NARG_GPR 8
@@ -135,6 +150,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
135 uint8_t nfpr; /* Number of arguments in FPRs. */ 150 uint8_t nfpr; /* Number of arguments in FPRs. */
136#elif LJ_TARGET_X86 151#elif LJ_TARGET_X86
137 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ 152 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
153#elif LJ_TARGET_ARM64
154 void *retp; /* Aggregate return pointer in x8. */
138#elif LJ_TARGET_PPC 155#elif LJ_TARGET_PPC
139 uint8_t nfpr; /* Number of arguments in FPRs. */ 156 uint8_t nfpr; /* Number of arguments in FPRs. */
140#endif 157#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 799dcd0e..66a09440 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -27,7 +27,7 @@
27 27
28#if LJ_OS_NOJIT 28#if LJ_OS_NOJIT
29 29
30/* Disabled callback support. */ 30/* Callbacks disabled. */
31#define CALLBACK_SLOT2OFS(slot) (0*(slot)) 31#define CALLBACK_SLOT2OFS(slot) (0*(slot))
32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) 32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
33#define CALLBACK_MAX_SLOT 0 33#define CALLBACK_MAX_SLOT 0
@@ -54,23 +54,18 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
54#elif LJ_TARGET_ARM 54#elif LJ_TARGET_ARM
55 55
56#define CALLBACK_MCODE_HEAD 32 56#define CALLBACK_MCODE_HEAD 32
57#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 57
58#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 58#elif LJ_TARGET_ARM64
59#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 59
60#define CALLBACK_MCODE_HEAD 32
60 61
61#elif LJ_TARGET_PPC 62#elif LJ_TARGET_PPC
62 63
63#define CALLBACK_MCODE_HEAD 24 64#define CALLBACK_MCODE_HEAD 24
64#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
65#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
66#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
67 65
68#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS
69 67
70#define CALLBACK_MCODE_HEAD 24 68#define CALLBACK_MCODE_HEAD 24
71#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
72#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
73#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
74 69
75#else 70#else
76 71
@@ -81,6 +76,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
81 76
82#endif 77#endif
83 78
79#ifndef CALLBACK_SLOT2OFS
80#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
81#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
82#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
83#endif
84
84/* Convert callback slot number to callback function pointer. */ 85/* Convert callback slot number to callback function pointer. */
85static void *callback_slot2ptr(CTState *cts, MSize slot) 86static void *callback_slot2ptr(CTState *cts, MSize slot)
86{ 87{
@@ -157,6 +158,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
157 } 158 }
158 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 159 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
159} 160}
161#elif LJ_TARGET_ARM64
162static void callback_mcode_init(global_State *g, uint32_t *page)
163{
164 uint32_t *p = page;
165 void *target = (void *)lj_vm_ffi_callback;
166 MSize slot;
167 *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4);
168 *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5);
169 *p++ = A64I_BR | A64F_N(RID_X11);
170 *p++ = A64I_NOP;
171 ((void **)p)[0] = target;
172 ((void **)p)[1] = g;
173 p += 4;
174 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
175 *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot);
176 *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu);
177 p++;
178 }
179 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
180}
160#elif LJ_TARGET_PPC 181#elif LJ_TARGET_PPC
161static void callback_mcode_init(global_State *g, uint32_t *page) 182static void callback_mcode_init(global_State *g, uint32_t *page)
162{ 183{
@@ -351,6 +372,29 @@ void lj_ccallback_mcode_free(CTState *cts)
351 goto done; \ 372 goto done; \
352 } CALLBACK_HANDLE_REGARG_FP2 373 } CALLBACK_HANDLE_REGARG_FP2
353 374
375#elif LJ_TARGET_ARM64
376
377#define CALLBACK_HANDLE_REGARG \
378 if (isfp) { \
379 if (nfpr + n <= CCALL_NARG_FPR) { \
380 sp = &cts->cb.fpr[nfpr]; \
381 nfpr += n; \
382 goto done; \
383 } else { \
384 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
385 } \
386 } else { \
387 if (!LJ_TARGET_IOS && n > 1) \
388 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
389 if (ngpr + n <= maxgpr) { \
390 sp = &cts->cb.gpr[ngpr]; \
391 ngpr += n; \
392 goto done; \
393 } else { \
394 ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
395 } \
396 }
397
354#elif LJ_TARGET_PPC 398#elif LJ_TARGET_PPC
355 399
356#define CALLBACK_HANDLE_REGARG \ 400#define CALLBACK_HANDLE_REGARG \
diff --git a/src/lj_target.h b/src/lj_target.h
index 1a242325..0daecb11 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
138#include "lj_target_x86.h" 138#include "lj_target_x86.h"
139#elif LJ_TARGET_ARM 139#elif LJ_TARGET_ARM
140#include "lj_target_arm.h" 140#include "lj_target_arm.h"
141#elif LJ_TARGET_ARM64
142#include "lj_target_arm64.h"
141#elif LJ_TARGET_PPC 143#elif LJ_TARGET_PPC
142#include "lj_target_ppc.h" 144#include "lj_target_ppc.h"
143#elif LJ_TARGET_MIPS 145#elif LJ_TARGET_MIPS
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 00000000..99e0adc9
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,97 @@
1/*
2** Definitions for ARM64 CPUs.
3** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_ARM64_H
7#define _LJ_TARGET_ARM64_H
8
9/* -- Registers IDs ------------------------------------------------------- */
10
11#define GPRDEF(_) \
12 _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
13 _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
14 _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
15 _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
16#define FPRDEF(_) \
17 _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
18 _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
19 _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
20 _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
21#define VRIDDEF(_)
22
23#define RIDENUM(name) RID_##name,
24
25enum {
26 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
27 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
28 RID_MAX,
29 RID_TMP = RID_LR,
30 RID_ZERO = RID_SP,
31
32 /* Calling conventions. */
33 RID_RET = RID_X0,
34 RID_FPRET = RID_D0,
35
36 /* These definitions must match with the *.dasc file(s): */
37 RID_BASE = RID_X19, /* Interpreter BASE. */
38 RID_LPC = RID_X21, /* Interpreter PC. */
39 RID_GL = RID_X22, /* Interpreter GL. */
40 RID_LREG = RID_X23, /* Interpreter L. */
41
42 /* Register ranges [min, max) and number of registers. */
43 RID_MIN_GPR = RID_X0,
44 RID_MAX_GPR = RID_SP+1,
45 RID_MIN_FPR = RID_MAX_GPR,
46 RID_MAX_FPR = RID_D31+1,
47 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
48 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
49};
50
51#define RID_NUM_KREF RID_NUM_GPR
52#define RID_MIN_KREF RID_X0
53
54/* -- Register sets ------------------------------------------------------- */
55
56/* Make use of all registers, except for x18, fp, lr and sp. */
57#define RSET_FIXED \
58 (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP))
59#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
60#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
61#define RSET_ALL (RSET_GPR|RSET_FPR)
62#define RSET_INIT RSET_ALL
63
64/* lr is an implicit scratch register. */
65#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
66#define RSET_SCRATCH_FPR \
67 (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
68#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
69#define REGARG_FIRSTGPR RID_X0
70#define REGARG_LASTGPR RID_X7
71#define REGARG_NUMGPR 8
72#define REGARG_FIRSTFPR RID_D0
73#define REGARG_LASTFPR RID_D7
74#define REGARG_NUMFPR 8
75
76/* -- Instructions -------------------------------------------------------- */
77
78/* Instruction fields. */
79#define A64F_D(r) (r)
80#define A64F_N(r) ((r) << 5)
81#define A64F_A(r) ((r) << 10)
82#define A64F_M(r) ((r) << 16)
83#define A64F_U16(x) ((x) << 5)
84#define A64F_S26(x) (x)
85#define A64F_S19(x) ((x) << 5)
86
87typedef enum A64Ins {
88 A64I_MOVZw = 0x52800000,
89 A64I_MOVZx = 0xd2800000,
90 A64I_LDRLw = 0x18000000,
91 A64I_LDRLx = 0x58000000,
92 A64I_NOP = 0xd503201f,
93 A64I_B = 0x14000000,
94 A64I_BR = 0xd61f0000,
95} A64Ins;
96
97#endif
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 895262e0..a31cbb3a 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -853,7 +853,8 @@ static void build_subroutines(BuildCtx *ctx)
853 | str PC, SAVE_PC 853 | str PC, SAVE_PC
854 | add CARG3, RA, NARGS8:RC 854 | add CARG3, RA, NARGS8:RC
855 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 855 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
856 | ldp LFUNC:CARG3, PC, [RA, FRAME_FUNC] // Guaranteed to be a function here. 856 | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here.
857 | ldr PC, [BASE, FRAME_PC]
857 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. 858 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
858 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK 859 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
859 | b ->BC_CALLT2_Z 860 | b ->BC_CALLT2_Z
@@ -1859,18 +1860,89 @@ static void build_subroutines(BuildCtx *ctx)
1859 |// Saveregs already performed. Callback slot number in [sp], g in r12. 1860 |// Saveregs already performed. Callback slot number in [sp], g in r12.
1860 |->vm_ffi_callback: 1861 |->vm_ffi_callback:
1861 |.if FFI 1862 |.if FFI
1862 | NYI 1863 |.type CTSTATE, CTState, PC
1864 | saveregs
1865 | ldr CTSTATE, GL:x10->ctype_state
1866 | mov GL, x10
1867 | add x10, sp, # CFRAME_SPACE
1868 | str w9, CTSTATE->cb.slot
1869 | stp x0, x1, CTSTATE->cb.gpr[0]
1870 | stp d0, d1, CTSTATE->cb.fpr[0]
1871 | stp x2, x3, CTSTATE->cb.gpr[2]
1872 | stp d2, d3, CTSTATE->cb.fpr[2]
1873 | stp x4, x5, CTSTATE->cb.gpr[4]
1874 | stp d4, d5, CTSTATE->cb.fpr[4]
1875 | stp x6, x7, CTSTATE->cb.gpr[6]
1876 | stp d6, d7, CTSTATE->cb.fpr[6]
1877 | str x10, CTSTATE->cb.stack
1878 | mov CARG1, CTSTATE
1879 | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
1880 | mov CARG2, sp
1881 | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
1882 | // Returns lua_State *.
1883 | ldp BASE, RC, L:CRET1->base
1884 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
1885 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
1886 | movn TISNIL, #0
1887 | mov L, CRET1
1888 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1889 | sub RC, RC, BASE
1890 | st_vmstate ST_INTERP
1891 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1892 | ins_callt
1863 |.endif 1893 |.endif
1864 | 1894 |
1865 |->cont_ffi_callback: // Return from FFI callback. 1895 |->cont_ffi_callback: // Return from FFI callback.
1866 |.if FFI 1896 |.if FFI
1867 | NYI 1897 | ldr CTSTATE, GL->ctype_state
1898 | stp BASE, CARG4, L->base
1899 | str L, CTSTATE->L
1900 | mov CARG1, CTSTATE
1901 | mov CARG2, RA
1902 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
1903 | ldp x0, x1, CTSTATE->cb.gpr[0]
1904 | ldp d0, d1, CTSTATE->cb.fpr[0]
1905 | b ->vm_leave_unw
1868 |.endif 1906 |.endif
1869 | 1907 |
1870 |->vm_ffi_call: // Call C function via FFI. 1908 |->vm_ffi_call: // Call C function via FFI.
1871 | // Caveat: needs special frame unwinding, see below. 1909 | // Caveat: needs special frame unwinding, see below.
1872 |.if FFI 1910 |.if FFI
1873 | NYI 1911 | .type CCSTATE, CCallState, x19
1912 | stp fp, lr, [sp, #-32]!
1913 | add fp, sp, #0
1914 | str CCSTATE, [sp, #16]
1915 | mov CCSTATE, x0
1916 | ldr TMP0w, CCSTATE:x0->spadj
1917 | ldrb TMP1w, CCSTATE->nsp
1918 | add TMP2, CCSTATE, #offsetof(CCallState, stack)
1919 | subs TMP1, TMP1, #1
1920 | ldr TMP3, CCSTATE->func
1921 | sub sp, fp, TMP0
1922 | bmi >2
1923 |1: // Copy stack slots
1924 | ldr TMP0, [TMP2, TMP1, lsl #3]
1925 | str TMP0, [sp, TMP1, lsl #3]
1926 | subs TMP1, TMP1, #1
1927 | bpl <1
1928 |2:
1929 | ldp x0, x1, CCSTATE->gpr[0]
1930 | ldp d0, d1, CCSTATE->fpr[0]
1931 | ldp x2, x3, CCSTATE->gpr[2]
1932 | ldp d2, d3, CCSTATE->fpr[2]
1933 | ldp x4, x5, CCSTATE->gpr[4]
1934 | ldp d4, d5, CCSTATE->fpr[4]
1935 | ldp x6, x7, CCSTATE->gpr[6]
1936 | ldp d6, d7, CCSTATE->fpr[6]
1937 | ldr x8, CCSTATE->retp
1938 | blr TMP3
1939 | mov sp, fp
1940 | stp x0, x1, CCSTATE->gpr[0]
1941 | stp d0, d1, CCSTATE->fpr[0]
1942 | stp d2, d3, CCSTATE->fpr[2]
1943 | ldr CCSTATE, [sp, #16]
1944 | ldp fp, lr, [sp], #32
1945 | ret
1874 |.endif 1946 |.endif
1875 |// Note: vm_ffi_call must be the last function in this object file! 1947 |// Note: vm_ffi_call must be the last function in this object file!
1876 | 1948 |
@@ -2087,7 +2159,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2087 | 2159 |
2088 |.if FFI 2160 |.if FFI
2089 |7: 2161 |7:
2090 | asr ITYPE, TMP0, #47 2162 | asr ITYPE, CARG1, #47
2091 | cmn ITYPE, #-LJ_TCDATA 2163 | cmn ITYPE, #-LJ_TCDATA
2092 | bne <2 2164 | bne <2
2093 | b ->vmeta_equal_cd 2165 | b ->vmeta_equal_cd
@@ -3600,7 +3672,19 @@ static void emit_asm_debug(BuildCtx *ctx)
3600 "\t.align 3\n" 3672 "\t.align 3\n"
3601 ".LEFDE0:\n\n"); 3673 ".LEFDE0:\n\n");
3602#if LJ_HASFFI 3674#if LJ_HASFFI
3603#error "NYI" 3675 fprintf(ctx->fp,
3676 ".LSFDE1:\n"
3677 "\t.long .LEFDE1-.LASFDE1\n"
3678 ".LASFDE1:\n"
3679 "\t.long .Lframe0\n"
3680 "\t.quad lj_vm_ffi_call\n"
3681 "\t.quad %d\n"
3682 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3683 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3684 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3685 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3686 "\t.align 3\n"
3687 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
3604#endif 3688#endif
3605 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); 3689 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
3606 fprintf(ctx->fp, 3690 fprintf(ctx->fp,
@@ -3615,7 +3699,7 @@ static void emit_asm_debug(BuildCtx *ctx)
3615 "\t.byte 30\n" /* Return address is in lr. */ 3699 "\t.byte 30\n" /* Return address is in lr. */
3616 "\t.uleb128 6\n" /* augmentation length */ 3700 "\t.uleb128 6\n" /* augmentation length */
3617 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 3701 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3618 "\t.long lj_err_unwind_dwarf-.\n" 3702 "\t.long lj_err_unwind_dwarf-.\n"
3619 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 3703 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3620 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ 3704 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3621 "\t.align 3\n" 3705 "\t.align 3\n"
@@ -3627,7 +3711,7 @@ static void emit_asm_debug(BuildCtx *ctx)
3627 "\t.long .LASFDE2-.Lframe1\n" 3711 "\t.long .LASFDE2-.Lframe1\n"
3628 "\t.long .Lbegin-.\n" 3712 "\t.long .Lbegin-.\n"
3629 "\t.long %d\n" 3713 "\t.long %d\n"
3630 "\t.uleb128 0\n" /* augmentation length */ 3714 "\t.uleb128 0\n" /* augmentation length */
3631 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 3715 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3632 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ 3716 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3633 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ 3717 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
@@ -3641,7 +3725,35 @@ static void emit_asm_debug(BuildCtx *ctx)
3641 "\t.align 3\n" 3725 "\t.align 3\n"
3642 ".LEFDE2:\n\n"); 3726 ".LEFDE2:\n\n");
3643#if LJ_HASFFI 3727#if LJ_HASFFI
3644#error "NYI" 3728 fprintf(ctx->fp,
3729 ".Lframe2:\n"
3730 "\t.long .LECIE2-.LSCIE2\n"
3731 ".LSCIE2:\n"
3732 "\t.long 0\n"
3733 "\t.byte 0x1\n"
3734 "\t.string \"zR\"\n"
3735 "\t.uleb128 0x1\n"
3736 "\t.sleb128 -8\n"
3737 "\t.byte 30\n" /* Return address is in lr. */
3738 "\t.uleb128 1\n" /* augmentation length */
3739 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3740 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3741 "\t.align 3\n"
3742 ".LECIE2:\n\n");
3743 fprintf(ctx->fp,
3744 ".LSFDE3:\n"
3745 "\t.long .LEFDE3-.LASFDE3\n"
3746 ".LASFDE3:\n"
3747 "\t.long .LASFDE3-.Lframe2\n"
3748 "\t.long lj_vm_ffi_call-.\n"
3749 "\t.long %d\n"
3750 "\t.uleb128 0\n" /* augmentation length */
3751 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3752 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3753 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3754 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3755 "\t.align 3\n"
3756 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
3645#endif 3757#endif
3646 break; 3758 break;
3647 default: 3759 default: