diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_arch.h | 1 | ||||
| -rw-r--r-- | src/lj_ccall.c | 121 | ||||
| -rw-r--r-- | src/lj_ccall.h | 17 | ||||
| -rw-r--r-- | src/lj_ccallback.c | 64 | ||||
| -rw-r--r-- | src/lj_target.h | 2 | ||||
| -rw-r--r-- | src/lj_target_arm64.h | 97 | ||||
| -rw-r--r-- | src/vm_arm64.dasc | 130 |
7 files changed, 412 insertions, 20 deletions
diff --git a/src/lj_arch.h b/src/lj_arch.h index e919c1a4..2b8fa7fe 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
| @@ -202,7 +202,6 @@ | |||
| 202 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ | 202 | #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ |
| 203 | #define LJ_TARGET_GC64 1 | 203 | #define LJ_TARGET_GC64 1 |
| 204 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL | 204 | #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL |
| 205 | #define LJ_ARCH_NOFFI 1 /* NYI */ | ||
| 206 | #define LJ_ARCH_NOJIT 1 /* NYI */ | 205 | #define LJ_ARCH_NOJIT 1 /* NYI */ |
| 207 | 206 | ||
| 208 | #define LJ_ARCH_VERSION 80 | 207 | #define LJ_ARCH_VERSION 80 |
diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 4885820c..5ab5b60d 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c | |||
| @@ -290,6 +290,75 @@ | |||
| 290 | #define CCALL_HANDLE_RET \ | 290 | #define CCALL_HANDLE_RET \ |
| 291 | if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; | 291 | if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; |
| 292 | 292 | ||
| 293 | #elif LJ_TARGET_ARM64 | ||
| 294 | /* -- ARM64 calling conventions ------------------------------------------- */ | ||
| 295 | |||
| 296 | #define CCALL_HANDLE_STRUCTRET \ | ||
| 297 | cc->retref = !ccall_classify_struct(cts, ctr); \ | ||
| 298 | if (cc->retref) cc->retp = dp; | ||
| 299 | |||
| 300 | #define CCALL_HANDLE_STRUCTRET2 \ | ||
| 301 | unsigned int cl = ccall_classify_struct(cts, ctr); \ | ||
| 302 | if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ | ||
| 303 | CTSize i = (cl >> 8) - 1; \ | ||
| 304 | do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ | ||
| 305 | } else { \ | ||
| 306 | if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ | ||
| 307 | memcpy(dp, sp, ctr->size); \ | ||
| 308 | } | ||
| 309 | |||
| 310 | #define CCALL_HANDLE_COMPLEXRET \ | ||
| 311 | /* Complex values are returned in one or two FPRs. */ \ | ||
| 312 | cc->retref = 0; | ||
| 313 | |||
| 314 | #define CCALL_HANDLE_COMPLEXRET2 \ | ||
| 315 | if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ | ||
| 316 | ((float *)dp)[0] = cc->fpr[0].f; \ | ||
| 317 | ((float *)dp)[1] = cc->fpr[1].f; \ | ||
| 318 | } else { /* Copy complex double from FPRs. */ \ | ||
| 319 | ((double *)dp)[0] = cc->fpr[0].d; \ | ||
| 320 | ((double *)dp)[1] = cc->fpr[1].d; \ | ||
| 321 | } | ||
| 322 | |||
| 323 | #define CCALL_HANDLE_STRUCTARG \ | ||
| 324 | unsigned int cl = ccall_classify_struct(cts, d); \ | ||
| 325 | if (cl == 0) { /* Pass struct by reference. */ \ | ||
| 326 | rp = cdataptr(lj_cdata_new(cts, did, sz)); \ | ||
| 327 | sz = CTSIZE_PTR; \ | ||
| 328 | } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \ | ||
| 329 | isfp = (cl & 4) ? 2 : 1; \ | ||
| 330 | } /* else: Pass struct in GPRs or on stack. */ | ||
| 331 | |||
| 332 | #define CCALL_HANDLE_COMPLEXARG \ | ||
| 333 | /* Pass complex by value in separate (!) FPRs or on stack. */ \ | ||
| 334 | isfp = ctr->size == 2*sizeof(float) ? 2 : 1; | ||
| 335 | |||
| 336 | #define CCALL_HANDLE_REGARG \ | ||
| 337 | if (LJ_TARGET_IOS && isva) { \ | ||
| 338 | /* IOS: All variadic arguments are on the stack. */ \ | ||
| 339 | } else if (isfp) { /* Try to pass argument in FPRs. */ \ | ||
| 340 | int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \ | ||
| 341 | if (nfpr + n2 <= CCALL_NARG_FPR) { \ | ||
| 342 | dp = &cc->fpr[nfpr]; \ | ||
| 343 | nfpr += n2; \ | ||
| 344 | goto done; \ | ||
| 345 | } else { \ | ||
| 346 | nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ | ||
| 347 | if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ | ||
| 348 | } \ | ||
| 349 | } else { /* Try to pass argument in GPRs. */ \ | ||
| 350 | if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ | ||
| 351 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | ||
| 352 | if (ngpr + n <= maxgpr) { \ | ||
| 353 | dp = &cc->gpr[ngpr]; \ | ||
| 354 | ngpr += n; \ | ||
| 355 | goto done; \ | ||
| 356 | } else { \ | ||
| 357 | ngpr = maxgpr; /* Prevent reordering. */ \ | ||
| 358 | if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ | ||
| 359 | } \ | ||
| 360 | } | ||
| 361 | |||
| 293 | #elif LJ_TARGET_PPC | 362 | #elif LJ_TARGET_PPC |
| 294 | /* -- PPC calling conventions --------------------------------------------- */ | 363 | /* -- PPC calling conventions --------------------------------------------- */ |
| 295 | 364 | ||
| @@ -584,6 +653,52 @@ noth: /* Not a homogeneous float/double aggregate. */ | |||
| 584 | 653 | ||
| 585 | #endif | 654 | #endif |
| 586 | 655 | ||
| 656 | /* -- ARM64 ABI struct classification ------------------------------------- */ | ||
| 657 | |||
| 658 | #if LJ_TARGET_ARM64 | ||
| 659 | |||
| 660 | /* Classify a struct based on its fields. */ | ||
| 661 | static unsigned int ccall_classify_struct(CTState *cts, CType *ct) | ||
| 662 | { | ||
| 663 | CTSize sz = ct->size; | ||
| 664 | unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); | ||
| 665 | while (ct->sib) { | ||
| 666 | CType *sct; | ||
| 667 | ct = ctype_get(cts, ct->sib); | ||
| 668 | if (ctype_isfield(ct->info)) { | ||
| 669 | sct = ctype_rawchild(cts, ct); | ||
| 670 | if (ctype_isfp(sct->info)) { | ||
| 671 | r |= sct->size; | ||
| 672 | if (!isu) n++; else if (n == 0) n = 1; | ||
| 673 | } else if (ctype_iscomplex(sct->info)) { | ||
| 674 | r |= (sct->size >> 1); | ||
| 675 | if (!isu) n += 2; else if (n < 2) n = 2; | ||
| 676 | } else if (ctype_isstruct(sct->info)) { | ||
| 677 | goto substruct; | ||
| 678 | } else { | ||
| 679 | goto noth; | ||
| 680 | } | ||
| 681 | } else if (ctype_isbitfield(ct->info)) { | ||
| 682 | goto noth; | ||
| 683 | } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { | ||
| 684 | sct = ctype_rawchild(cts, ct); | ||
| 685 | substruct: | ||
| 686 | if (sct->size > 0) { | ||
| 687 | unsigned int s = ccall_classify_struct(cts, sct); | ||
| 688 | if (s <= 1) goto noth; | ||
| 689 | r |= (s & 255); | ||
| 690 | if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); | ||
| 691 | } | ||
| 692 | } | ||
| 693 | } | ||
| 694 | if ((r == 4 || r == 8) && n <= 4) | ||
| 695 | return r + (n << 8); | ||
| 696 | noth: /* Not a homogeneous float/double aggregate. */ | ||
| 697 | return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ | ||
| 698 | } | ||
| 699 | |||
| 700 | #endif | ||
| 701 | |||
| 587 | /* -- Common C call handling ---------------------------------------------- */ | 702 | /* -- Common C call handling ---------------------------------------------- */ |
| 588 | 703 | ||
| 589 | /* Infer the destination CTypeID for a vararg argument. */ | 704 | /* Infer the destination CTypeID for a vararg argument. */ |
| @@ -766,6 +881,12 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, | |||
| 766 | cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ | 881 | cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ |
| 767 | cc->fpr[nfpr-2].d[1] = 0; | 882 | cc->fpr[nfpr-2].d[1] = 0; |
| 768 | } | 883 | } |
| 884 | #elif LJ_TARGET_ARM64 | ||
| 885 | if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { | ||
| 886 | /* Split float HFA or complex float into separate registers. */ | ||
| 887 | CTSize i = (sz >> 2) - 1; | ||
| 888 | do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); | ||
| 889 | } | ||
| 769 | #else | 890 | #else |
| 770 | UNUSED(isfp); | 891 | UNUSED(isfp); |
| 771 | #endif | 892 | #endif |
diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 21af04ef..91983fee 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h | |||
| @@ -68,6 +68,21 @@ typedef union FPRArg { | |||
| 68 | float f[2]; | 68 | float f[2]; |
| 69 | } FPRArg; | 69 | } FPRArg; |
| 70 | 70 | ||
| 71 | #elif LJ_TARGET_ARM64 | ||
| 72 | |||
| 73 | #define CCALL_NARG_GPR 8 | ||
| 74 | #define CCALL_NRET_GPR 2 | ||
| 75 | #define CCALL_NARG_FPR 8 | ||
| 76 | #define CCALL_NRET_FPR 4 | ||
| 77 | #define CCALL_SPS_FREE 0 | ||
| 78 | |||
| 79 | typedef intptr_t GPRArg; | ||
| 80 | typedef union FPRArg { | ||
| 81 | double d; | ||
| 82 | float f; | ||
| 83 | uint32_t u32; | ||
| 84 | } FPRArg; | ||
| 85 | |||
| 71 | #elif LJ_TARGET_PPC | 86 | #elif LJ_TARGET_PPC |
| 72 | 87 | ||
| 73 | #define CCALL_NARG_GPR 8 | 88 | #define CCALL_NARG_GPR 8 |
| @@ -135,6 +150,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { | |||
| 135 | uint8_t nfpr; /* Number of arguments in FPRs. */ | 150 | uint8_t nfpr; /* Number of arguments in FPRs. */ |
| 136 | #elif LJ_TARGET_X86 | 151 | #elif LJ_TARGET_X86 |
| 137 | uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ | 152 | uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ |
| 153 | #elif LJ_TARGET_ARM64 | ||
| 154 | void *retp; /* Aggregate return pointer in x8. */ | ||
| 138 | #elif LJ_TARGET_PPC | 155 | #elif LJ_TARGET_PPC |
| 139 | uint8_t nfpr; /* Number of arguments in FPRs. */ | 156 | uint8_t nfpr; /* Number of arguments in FPRs. */ |
| 140 | #endif | 157 | #endif |
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 799dcd0e..66a09440 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | #if LJ_OS_NOJIT | 28 | #if LJ_OS_NOJIT |
| 29 | 29 | ||
| 30 | /* Disabled callback support. */ | 30 | /* Callbacks disabled. */ |
| 31 | #define CALLBACK_SLOT2OFS(slot) (0*(slot)) | 31 | #define CALLBACK_SLOT2OFS(slot) (0*(slot)) |
| 32 | #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) | 32 | #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) |
| 33 | #define CALLBACK_MAX_SLOT 0 | 33 | #define CALLBACK_MAX_SLOT 0 |
| @@ -54,23 +54,18 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) | |||
| 54 | #elif LJ_TARGET_ARM | 54 | #elif LJ_TARGET_ARM |
| 55 | 55 | ||
| 56 | #define CALLBACK_MCODE_HEAD 32 | 56 | #define CALLBACK_MCODE_HEAD 32 |
| 57 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | 57 | |
| 58 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | 58 | #elif LJ_TARGET_ARM64 |
| 59 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | 59 | |
| 60 | #define CALLBACK_MCODE_HEAD 32 | ||
| 60 | 61 | ||
| 61 | #elif LJ_TARGET_PPC | 62 | #elif LJ_TARGET_PPC |
| 62 | 63 | ||
| 63 | #define CALLBACK_MCODE_HEAD 24 | 64 | #define CALLBACK_MCODE_HEAD 24 |
| 64 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
| 65 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
| 66 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
| 67 | 65 | ||
| 68 | #elif LJ_TARGET_MIPS | 66 | #elif LJ_TARGET_MIPS |
| 69 | 67 | ||
| 70 | #define CALLBACK_MCODE_HEAD 24 | 68 | #define CALLBACK_MCODE_HEAD 24 |
| 71 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
| 72 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
| 73 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
| 74 | 69 | ||
| 75 | #else | 70 | #else |
| 76 | 71 | ||
| @@ -81,6 +76,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) | |||
| 81 | 76 | ||
| 82 | #endif | 77 | #endif |
| 83 | 78 | ||
| 79 | #ifndef CALLBACK_SLOT2OFS | ||
| 80 | #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) | ||
| 81 | #define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) | ||
| 82 | #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) | ||
| 83 | #endif | ||
| 84 | |||
| 84 | /* Convert callback slot number to callback function pointer. */ | 85 | /* Convert callback slot number to callback function pointer. */ |
| 85 | static void *callback_slot2ptr(CTState *cts, MSize slot) | 86 | static void *callback_slot2ptr(CTState *cts, MSize slot) |
| 86 | { | 87 | { |
| @@ -157,6 +158,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page) | |||
| 157 | } | 158 | } |
| 158 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | 159 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); |
| 159 | } | 160 | } |
| 161 | #elif LJ_TARGET_ARM64 | ||
| 162 | static void callback_mcode_init(global_State *g, uint32_t *page) | ||
| 163 | { | ||
| 164 | uint32_t *p = page; | ||
| 165 | void *target = (void *)lj_vm_ffi_callback; | ||
| 166 | MSize slot; | ||
| 167 | *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); | ||
| 168 | *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); | ||
| 169 | *p++ = A64I_BR | A64F_N(RID_X11); | ||
| 170 | *p++ = A64I_NOP; | ||
| 171 | ((void **)p)[0] = target; | ||
| 172 | ((void **)p)[1] = g; | ||
| 173 | p += 4; | ||
| 174 | for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { | ||
| 175 | *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); | ||
| 176 | *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); | ||
| 177 | p++; | ||
| 178 | } | ||
| 179 | lua_assert(p - page <= CALLBACK_MCODE_SIZE); | ||
| 180 | } | ||
| 160 | #elif LJ_TARGET_PPC | 181 | #elif LJ_TARGET_PPC |
| 161 | static void callback_mcode_init(global_State *g, uint32_t *page) | 182 | static void callback_mcode_init(global_State *g, uint32_t *page) |
| 162 | { | 183 | { |
| @@ -351,6 +372,29 @@ void lj_ccallback_mcode_free(CTState *cts) | |||
| 351 | goto done; \ | 372 | goto done; \ |
| 352 | } CALLBACK_HANDLE_REGARG_FP2 | 373 | } CALLBACK_HANDLE_REGARG_FP2 |
| 353 | 374 | ||
| 375 | #elif LJ_TARGET_ARM64 | ||
| 376 | |||
| 377 | #define CALLBACK_HANDLE_REGARG \ | ||
| 378 | if (isfp) { \ | ||
| 379 | if (nfpr + n <= CCALL_NARG_FPR) { \ | ||
| 380 | sp = &cts->cb.fpr[nfpr]; \ | ||
| 381 | nfpr += n; \ | ||
| 382 | goto done; \ | ||
| 383 | } else { \ | ||
| 384 | nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ | ||
| 385 | } \ | ||
| 386 | } else { \ | ||
| 387 | if (!LJ_TARGET_IOS && n > 1) \ | ||
| 388 | ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ | ||
| 389 | if (ngpr + n <= maxgpr) { \ | ||
| 390 | sp = &cts->cb.gpr[ngpr]; \ | ||
| 391 | ngpr += n; \ | ||
| 392 | goto done; \ | ||
| 393 | } else { \ | ||
| 394 | ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \ | ||
| 395 | } \ | ||
| 396 | } | ||
| 397 | |||
| 354 | #elif LJ_TARGET_PPC | 398 | #elif LJ_TARGET_PPC |
| 355 | 399 | ||
| 356 | #define CALLBACK_HANDLE_REGARG \ | 400 | #define CALLBACK_HANDLE_REGARG \ |
diff --git a/src/lj_target.h b/src/lj_target.h index 1a242325..0daecb11 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
| @@ -138,6 +138,8 @@ typedef uint32_t RegCost; | |||
| 138 | #include "lj_target_x86.h" | 138 | #include "lj_target_x86.h" |
| 139 | #elif LJ_TARGET_ARM | 139 | #elif LJ_TARGET_ARM |
| 140 | #include "lj_target_arm.h" | 140 | #include "lj_target_arm.h" |
| 141 | #elif LJ_TARGET_ARM64 | ||
| 142 | #include "lj_target_arm64.h" | ||
| 141 | #elif LJ_TARGET_PPC | 143 | #elif LJ_TARGET_PPC |
| 142 | #include "lj_target_ppc.h" | 144 | #include "lj_target_ppc.h" |
| 143 | #elif LJ_TARGET_MIPS | 145 | #elif LJ_TARGET_MIPS |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h new file mode 100644 index 00000000..99e0adc9 --- /dev/null +++ b/src/lj_target_arm64.h | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | /* | ||
| 2 | ** Definitions for ARM64 CPUs. | ||
| 3 | ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h | ||
| 4 | */ | ||
| 5 | |||
| 6 | #ifndef _LJ_TARGET_ARM64_H | ||
| 7 | #define _LJ_TARGET_ARM64_H | ||
| 8 | |||
| 9 | /* -- Registers IDs ------------------------------------------------------- */ | ||
| 10 | |||
| 11 | #define GPRDEF(_) \ | ||
| 12 | _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ | ||
| 13 | _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ | ||
| 14 | _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ | ||
| 15 | _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) | ||
| 16 | #define FPRDEF(_) \ | ||
| 17 | _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ | ||
| 18 | _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ | ||
| 19 | _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ | ||
| 20 | _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) | ||
| 21 | #define VRIDDEF(_) | ||
| 22 | |||
| 23 | #define RIDENUM(name) RID_##name, | ||
| 24 | |||
| 25 | enum { | ||
| 26 | GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ | ||
| 27 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | ||
| 28 | RID_MAX, | ||
| 29 | RID_TMP = RID_LR, | ||
| 30 | RID_ZERO = RID_SP, | ||
| 31 | |||
| 32 | /* Calling conventions. */ | ||
| 33 | RID_RET = RID_X0, | ||
| 34 | RID_FPRET = RID_D0, | ||
| 35 | |||
| 36 | /* These definitions must match with the *.dasc file(s): */ | ||
| 37 | RID_BASE = RID_X19, /* Interpreter BASE. */ | ||
| 38 | RID_LPC = RID_X21, /* Interpreter PC. */ | ||
| 39 | RID_GL = RID_X22, /* Interpreter GL. */ | ||
| 40 | RID_LREG = RID_X23, /* Interpreter L. */ | ||
| 41 | |||
| 42 | /* Register ranges [min, max) and number of registers. */ | ||
| 43 | RID_MIN_GPR = RID_X0, | ||
| 44 | RID_MAX_GPR = RID_SP+1, | ||
| 45 | RID_MIN_FPR = RID_MAX_GPR, | ||
| 46 | RID_MAX_FPR = RID_D31+1, | ||
| 47 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | ||
| 48 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR | ||
| 49 | }; | ||
| 50 | |||
| 51 | #define RID_NUM_KREF RID_NUM_GPR | ||
| 52 | #define RID_MIN_KREF RID_X0 | ||
| 53 | |||
| 54 | /* -- Register sets ------------------------------------------------------- */ | ||
| 55 | |||
| 56 | /* Make use of all registers, except for x18, fp, lr and sp. */ | ||
| 57 | #define RSET_FIXED \ | ||
| 58 | (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)) | ||
| 59 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) | ||
| 60 | #define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) | ||
| 61 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
| 62 | #define RSET_INIT RSET_ALL | ||
| 63 | |||
| 64 | /* lr is an implicit scratch register. */ | ||
| 65 | #define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) | ||
| 66 | #define RSET_SCRATCH_FPR \ | ||
| 67 | (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) | ||
| 68 | #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) | ||
| 69 | #define REGARG_FIRSTGPR RID_X0 | ||
| 70 | #define REGARG_LASTGPR RID_X7 | ||
| 71 | #define REGARG_NUMGPR 8 | ||
| 72 | #define REGARG_FIRSTFPR RID_D0 | ||
| 73 | #define REGARG_LASTFPR RID_D7 | ||
| 74 | #define REGARG_NUMFPR 8 | ||
| 75 | |||
| 76 | /* -- Instructions -------------------------------------------------------- */ | ||
| 77 | |||
| 78 | /* Instruction fields. */ | ||
| 79 | #define A64F_D(r) (r) | ||
| 80 | #define A64F_N(r) ((r) << 5) | ||
| 81 | #define A64F_A(r) ((r) << 10) | ||
| 82 | #define A64F_M(r) ((r) << 16) | ||
| 83 | #define A64F_U16(x) ((x) << 5) | ||
| 84 | #define A64F_S26(x) (x) | ||
| 85 | #define A64F_S19(x) ((x) << 5) | ||
| 86 | |||
| 87 | typedef enum A64Ins { | ||
| 88 | A64I_MOVZw = 0x52800000, | ||
| 89 | A64I_MOVZx = 0xd2800000, | ||
| 90 | A64I_LDRLw = 0x18000000, | ||
| 91 | A64I_LDRLx = 0x58000000, | ||
| 92 | A64I_NOP = 0xd503201f, | ||
| 93 | A64I_B = 0x14000000, | ||
| 94 | A64I_BR = 0xd61f0000, | ||
| 95 | } A64Ins; | ||
| 96 | |||
| 97 | #endif | ||
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 895262e0..a31cbb3a 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
| @@ -853,7 +853,8 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 853 | | str PC, SAVE_PC | 853 | | str PC, SAVE_PC |
| 854 | | add CARG3, RA, NARGS8:RC | 854 | | add CARG3, RA, NARGS8:RC |
| 855 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | 855 | | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) |
| 856 | | ldp LFUNC:CARG3, PC, [RA, FRAME_FUNC] // Guaranteed to be a function here. | 856 | | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. |
| 857 | | ldr PC, [BASE, FRAME_PC] | ||
| 857 | | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. | 858 | | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. |
| 858 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | 859 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK |
| 859 | | b ->BC_CALLT2_Z | 860 | | b ->BC_CALLT2_Z |
| @@ -1859,18 +1860,89 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1859 | |// Saveregs already performed. Callback slot number in [sp], g in r12. | 1860 | |// Saveregs already performed. Callback slot number in [sp], g in r12. |
| 1860 | |->vm_ffi_callback: | 1861 | |->vm_ffi_callback: |
| 1861 | |.if FFI | 1862 | |.if FFI |
| 1862 | | NYI | 1863 | |.type CTSTATE, CTState, PC |
| 1864 | | saveregs | ||
| 1865 | | ldr CTSTATE, GL:x10->ctype_state | ||
| 1866 | | mov GL, x10 | ||
| 1867 | | add x10, sp, # CFRAME_SPACE | ||
| 1868 | | str w9, CTSTATE->cb.slot | ||
| 1869 | | stp x0, x1, CTSTATE->cb.gpr[0] | ||
| 1870 | | stp d0, d1, CTSTATE->cb.fpr[0] | ||
| 1871 | | stp x2, x3, CTSTATE->cb.gpr[2] | ||
| 1872 | | stp d2, d3, CTSTATE->cb.fpr[2] | ||
| 1873 | | stp x4, x5, CTSTATE->cb.gpr[4] | ||
| 1874 | | stp d4, d5, CTSTATE->cb.fpr[4] | ||
| 1875 | | stp x6, x7, CTSTATE->cb.gpr[6] | ||
| 1876 | | stp d6, d7, CTSTATE->cb.fpr[6] | ||
| 1877 | | str x10, CTSTATE->cb.stack | ||
| 1878 | | mov CARG1, CTSTATE | ||
| 1879 | | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. | ||
| 1880 | | mov CARG2, sp | ||
| 1881 | | bl extern lj_ccallback_enter // (CTState *cts, void *cf) | ||
| 1882 | | // Returns lua_State *. | ||
| 1883 | | ldp BASE, RC, L:CRET1->base | ||
| 1884 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
| 1885 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
| 1886 | | movn TISNIL, #0 | ||
| 1887 | | mov L, CRET1 | ||
| 1888 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ||
| 1889 | | sub RC, RC, BASE | ||
| 1890 | | st_vmstate ST_INTERP | ||
| 1891 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
| 1892 | | ins_callt | ||
| 1863 | |.endif | 1893 | |.endif |
| 1864 | | | 1894 | | |
| 1865 | |->cont_ffi_callback: // Return from FFI callback. | 1895 | |->cont_ffi_callback: // Return from FFI callback. |
| 1866 | |.if FFI | 1896 | |.if FFI |
| 1867 | | NYI | 1897 | | ldr CTSTATE, GL->ctype_state |
| 1898 | | stp BASE, CARG4, L->base | ||
| 1899 | | str L, CTSTATE->L | ||
| 1900 | | mov CARG1, CTSTATE | ||
| 1901 | | mov CARG2, RA | ||
| 1902 | | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | ||
| 1903 | | ldp x0, x1, CTSTATE->cb.gpr[0] | ||
| 1904 | | ldp d0, d1, CTSTATE->cb.fpr[0] | ||
| 1905 | | b ->vm_leave_unw | ||
| 1868 | |.endif | 1906 | |.endif |
| 1869 | | | 1907 | | |
| 1870 | |->vm_ffi_call: // Call C function via FFI. | 1908 | |->vm_ffi_call: // Call C function via FFI. |
| 1871 | | // Caveat: needs special frame unwinding, see below. | 1909 | | // Caveat: needs special frame unwinding, see below. |
| 1872 | |.if FFI | 1910 | |.if FFI |
| 1873 | | NYI | 1911 | | .type CCSTATE, CCallState, x19 |
| 1912 | | stp fp, lr, [sp, #-32]! | ||
| 1913 | | add fp, sp, #0 | ||
| 1914 | | str CCSTATE, [sp, #16] | ||
| 1915 | | mov CCSTATE, x0 | ||
| 1916 | | ldr TMP0w, CCSTATE:x0->spadj | ||
| 1917 | | ldrb TMP1w, CCSTATE->nsp | ||
| 1918 | | add TMP2, CCSTATE, #offsetof(CCallState, stack) | ||
| 1919 | | subs TMP1, TMP1, #1 | ||
| 1920 | | ldr TMP3, CCSTATE->func | ||
| 1921 | | sub sp, fp, TMP0 | ||
| 1922 | | bmi >2 | ||
| 1923 | |1: // Copy stack slots | ||
| 1924 | | ldr TMP0, [TMP2, TMP1, lsl #3] | ||
| 1925 | | str TMP0, [sp, TMP1, lsl #3] | ||
| 1926 | | subs TMP1, TMP1, #1 | ||
| 1927 | | bpl <1 | ||
| 1928 | |2: | ||
| 1929 | | ldp x0, x1, CCSTATE->gpr[0] | ||
| 1930 | | ldp d0, d1, CCSTATE->fpr[0] | ||
| 1931 | | ldp x2, x3, CCSTATE->gpr[2] | ||
| 1932 | | ldp d2, d3, CCSTATE->fpr[2] | ||
| 1933 | | ldp x4, x5, CCSTATE->gpr[4] | ||
| 1934 | | ldp d4, d5, CCSTATE->fpr[4] | ||
| 1935 | | ldp x6, x7, CCSTATE->gpr[6] | ||
| 1936 | | ldp d6, d7, CCSTATE->fpr[6] | ||
| 1937 | | ldr x8, CCSTATE->retp | ||
| 1938 | | blr TMP3 | ||
| 1939 | | mov sp, fp | ||
| 1940 | | stp x0, x1, CCSTATE->gpr[0] | ||
| 1941 | | stp d0, d1, CCSTATE->fpr[0] | ||
| 1942 | | stp d2, d3, CCSTATE->fpr[2] | ||
| 1943 | | ldr CCSTATE, [sp, #16] | ||
| 1944 | | ldp fp, lr, [sp], #32 | ||
| 1945 | | ret | ||
| 1874 | |.endif | 1946 | |.endif |
| 1875 | |// Note: vm_ffi_call must be the last function in this object file! | 1947 | |// Note: vm_ffi_call must be the last function in this object file! |
| 1876 | | | 1948 | | |
| @@ -2087,7 +2159,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2087 | | | 2159 | | |
| 2088 | |.if FFI | 2160 | |.if FFI |
| 2089 | |7: | 2161 | |7: |
| 2090 | | asr ITYPE, TMP0, #47 | 2162 | | asr ITYPE, CARG1, #47 |
| 2091 | | cmn ITYPE, #-LJ_TCDATA | 2163 | | cmn ITYPE, #-LJ_TCDATA |
| 2092 | | bne <2 | 2164 | | bne <2 |
| 2093 | | b ->vmeta_equal_cd | 2165 | | b ->vmeta_equal_cd |
| @@ -3600,7 +3672,19 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 3600 | "\t.align 3\n" | 3672 | "\t.align 3\n" |
| 3601 | ".LEFDE0:\n\n"); | 3673 | ".LEFDE0:\n\n"); |
| 3602 | #if LJ_HASFFI | 3674 | #if LJ_HASFFI |
| 3603 | #error "NYI" | 3675 | fprintf(ctx->fp, |
| 3676 | ".LSFDE1:\n" | ||
| 3677 | "\t.long .LEFDE1-.LASFDE1\n" | ||
| 3678 | ".LASFDE1:\n" | ||
| 3679 | "\t.long .Lframe0\n" | ||
| 3680 | "\t.quad lj_vm_ffi_call\n" | ||
| 3681 | "\t.quad %d\n" | ||
| 3682 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | ||
| 3683 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | ||
| 3684 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | ||
| 3685 | "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ | ||
| 3686 | "\t.align 3\n" | ||
| 3687 | ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | ||
| 3604 | #endif | 3688 | #endif |
| 3605 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); | 3689 | fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); |
| 3606 | fprintf(ctx->fp, | 3690 | fprintf(ctx->fp, |
| @@ -3615,7 +3699,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 3615 | "\t.byte 30\n" /* Return address is in lr. */ | 3699 | "\t.byte 30\n" /* Return address is in lr. */ |
| 3616 | "\t.uleb128 6\n" /* augmentation length */ | 3700 | "\t.uleb128 6\n" /* augmentation length */ |
| 3617 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | 3701 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ |
| 3618 | "\t.long lj_err_unwind_dwarf-.\n" | 3702 | "\t.long lj_err_unwind_dwarf-.\n" |
| 3619 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | 3703 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ |
| 3620 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | 3704 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ |
| 3621 | "\t.align 3\n" | 3705 | "\t.align 3\n" |
| @@ -3627,7 +3711,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 3627 | "\t.long .LASFDE2-.Lframe1\n" | 3711 | "\t.long .LASFDE2-.Lframe1\n" |
| 3628 | "\t.long .Lbegin-.\n" | 3712 | "\t.long .Lbegin-.\n" |
| 3629 | "\t.long %d\n" | 3713 | "\t.long %d\n" |
| 3630 | "\t.uleb128 0\n" /* augmentation length */ | 3714 | "\t.uleb128 0\n" /* augmentation length */ |
| 3631 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | 3715 | "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ |
| 3632 | "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ | 3716 | "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ |
| 3633 | "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ | 3717 | "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ |
| @@ -3641,7 +3725,35 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
| 3641 | "\t.align 3\n" | 3725 | "\t.align 3\n" |
| 3642 | ".LEFDE2:\n\n"); | 3726 | ".LEFDE2:\n\n"); |
| 3643 | #if LJ_HASFFI | 3727 | #if LJ_HASFFI |
| 3644 | #error "NYI" | 3728 | fprintf(ctx->fp, |
| 3729 | ".Lframe2:\n" | ||
| 3730 | "\t.long .LECIE2-.LSCIE2\n" | ||
| 3731 | ".LSCIE2:\n" | ||
| 3732 | "\t.long 0\n" | ||
| 3733 | "\t.byte 0x1\n" | ||
| 3734 | "\t.string \"zR\"\n" | ||
| 3735 | "\t.uleb128 0x1\n" | ||
| 3736 | "\t.sleb128 -8\n" | ||
| 3737 | "\t.byte 30\n" /* Return address is in lr. */ | ||
| 3738 | "\t.uleb128 1\n" /* augmentation length */ | ||
| 3739 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | ||
| 3740 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | ||
| 3741 | "\t.align 3\n" | ||
| 3742 | ".LECIE2:\n\n"); | ||
| 3743 | fprintf(ctx->fp, | ||
| 3744 | ".LSFDE3:\n" | ||
| 3745 | "\t.long .LEFDE3-.LASFDE3\n" | ||
| 3746 | ".LASFDE3:\n" | ||
| 3747 | "\t.long .LASFDE3-.Lframe2\n" | ||
| 3748 | "\t.long lj_vm_ffi_call-.\n" | ||
| 3749 | "\t.long %d\n" | ||
| 3750 | "\t.uleb128 0\n" /* augmentation length */ | ||
| 3751 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | ||
| 3752 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | ||
| 3753 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | ||
| 3754 | "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ | ||
| 3755 | "\t.align 3\n" | ||
| 3756 | ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | ||
| 3645 | #endif | 3757 | #endif |
| 3646 | break; | 3758 | break; |
| 3647 | default: | 3759 | default: |
