aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/ext_ffi_semantics.html1
-rw-r--r--lib/dump.lua24
-rw-r--r--src/Makefile.dep8
-rw-r--r--src/lj_asm.c11
-rw-r--r--src/lj_asm_arm.h10
-rw-r--r--src/lj_asm_ppc.h12
-rw-r--r--src/lj_asm_x86.h140
-rw-r--r--src/lj_ccall.c4
-rw-r--r--src/lj_ccall.h3
-rw-r--r--src/lj_crecord.c103
-rw-r--r--src/lj_ctype.h2
-rw-r--r--src/lj_ircall.h16
-rw-r--r--src/lj_target_ppc.h2
-rw-r--r--src/lj_target_x86.h4
14 files changed, 262 insertions, 78 deletions
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index 9016a8a6..b8c839c2 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -985,7 +985,6 @@ alignment &gt; 8&nbsp;bytes.</li>
985<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li> 985<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
986<li>Pointer differences for element sizes that are not a power of 986<li>Pointer differences for element sizes that are not a power of
987two.</li> 987two.</li>
988<li>Calls to non-cdecl or vararg C&nbsp;functions.</li>
989<li>Calls to C&nbsp;functions with aggregates passed or returned by 988<li>Calls to C&nbsp;functions with aggregates passed or returned by
990value.</li> 989value.</li>
991<li>Calls to ctype metamethods which are not plain functions.</li> 990<li>Calls to ctype metamethods which are not plain functions.</li>
diff --git a/lib/dump.lua b/lib/dump.lua
index 3a88ef45..0f0e9058 100644
--- a/lib/dump.lua
+++ b/lib/dump.lua
@@ -378,6 +378,24 @@ local function ridsp_name(ridsp)
378 return "" 378 return ""
379end 379end
380 380
381-- Dump CALL* function ref and return optional ctype.
382local function dumpcallfunc(tr, ins)
383 local ctype
384 if ins > 0 then
385 local m, ot, op1, op2 = traceir(tr, ins)
386 if band(ot, 31) == 0 then -- nil type means CARG(func, ctype).
387 ins = op1
388 ctype = formatk(tr, op2)
389 end
390 end
391 if ins < 0 then
392 out:write(format("[0x%x](", tonumber((tracek(tr, ins)))))
393 else
394 out:write(format("%04d (", ins))
395 end
396 return ctype
397end
398
381-- Recursively gather CALL* args and dump them. 399-- Recursively gather CALL* args and dump them.
382local function dumpcallargs(tr, ins) 400local function dumpcallargs(tr, ins)
383 if ins < 0 then 401 if ins < 0 then
@@ -447,15 +465,15 @@ local function dump_ir(tr, dumpsnap, dumpreg)
447 irtype[t], op)) 465 irtype[t], op))
448 local m1, m2 = band(m, 3), band(m, 3*4) 466 local m1, m2 = band(m, 3), band(m, 3*4)
449 if sub(op, 1, 4) == "CALL" then 467 if sub(op, 1, 4) == "CALL" then
468 local ctype
450 if m2 == 1*4 then -- op2 == IRMlit 469 if m2 == 1*4 then -- op2 == IRMlit
451 out:write(format("%-10s (", vmdef.ircall[op2])) 470 out:write(format("%-10s (", vmdef.ircall[op2]))
452 elseif op2 < 0 then
453 out:write(format("[0x%x](", tonumber((tracek(tr, op2)))))
454 else 471 else
455 out:write(format("%04d (", op2)) 472 ctype = dumpcallfunc(tr, op2)
456 end 473 end
457 if op1 ~= -1 then dumpcallargs(tr, op1) end 474 if op1 ~= -1 then dumpcallargs(tr, op1) end
458 out:write(")") 475 out:write(")")
476 if ctype then out:write(" ctype ", ctype) end
459 elseif op == "CNEW " and op2 == -1 then 477 elseif op == "CNEW " and op2 == -1 then
460 out:write(formatk(tr, op1)) 478 out:write(formatk(tr, op1))
461 elseif m1 ~= 3 then -- op1 != IRMnone 479 elseif m1 ~= 3 then -- op1 != IRMnone
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 9c866050..81bbed29 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -1,6 +1,6 @@
1buildvm.o: buildvm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \ 1buildvm.o: buildvm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
2 lj_obj.h lj_gc.h lj_bc.h lj_ir.h lj_ircall.h lj_jit.h lj_frame.h \ 2 lj_obj.h lj_gc.h lj_bc.h lj_ir.h lj_ircall.h lj_jit.h lj_frame.h \
3 lj_dispatch.h lj_ccall.h luajit.h \ 3 lj_dispatch.h lj_ccall.h lj_ctype.h luajit.h \
4 lj_traceerr.h 4 lj_traceerr.h
5buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \ 5buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
6 lj_bc.h 6 lj_bc.h
@@ -86,9 +86,9 @@ lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
86 lj_bc.h lj_vm.h lj_char.h 86 lj_bc.h lj_vm.h lj_char.h
87lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 87lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
88 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 88 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
89 lj_gc.h lj_cparse.h lj_cconv.h lj_clib.h lj_ir.h lj_jit.h lj_ircall.h \ 89 lj_gc.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ir.h lj_jit.h \
90 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 90 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
91 lj_ffrecord.h lj_crecord.h 91 lj_record.h lj_ffrecord.h lj_crecord.h
92lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 92lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
93 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h 93 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h
94lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 94lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 2e204239..9fe53416 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -888,7 +888,16 @@ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
888 nargs++; 888 nargs++;
889 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } 889 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
890 } 890 }
891 /* NYI: fastcall etc. */ 891#if LJ_HASFFI
892 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
893 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
894 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
895 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
896#if LJ_TARGET_X86
897 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
898#endif
899 }
900#endif
892 return (nargs | (ir->t.irt << CCI_OTSHIFT)); 901 return (nargs | (ir->t.irt << CCI_OTSHIFT));
893} 902}
894 903
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 1963f3ba..2d4b8bae 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -331,13 +331,17 @@ static void asm_callx(ASMState *as, IRIns *ir)
331{ 331{
332 IRRef args[CCI_NARGS_MAX]; 332 IRRef args[CCI_NARGS_MAX];
333 CCallInfo ci; 333 CCallInfo ci;
334 IRRef func;
335 IRIns *irf;
334 ci.flags = asm_callx_flags(as, ir); 336 ci.flags = asm_callx_flags(as, ir);
335 asm_collectargs(as, ir, &ci, args); 337 asm_collectargs(as, ir, &ci, args);
336 asm_setupresult(as, ir, &ci); 338 asm_setupresult(as, ir, &ci);
337 if (irref_isk(ir->op2)) { /* Call to constant address. */ 339 func = ir->op2; irf = IR(func);
338 ci.func = (ASMFunction)(void *)(IR(ir->op2)->i); 340 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
341 if (irref_isk(func)) { /* Call to constant address. */
342 ci.func = (ASMFunction)(void *)(irf->i);
339 } else { /* Need a non-argument register for indirect calls. */ 343 } else { /* Need a non-argument register for indirect calls. */
340 Reg freg = ra_alloc1(as, ir->op2, RSET_RANGE(RID_R4, RID_R12+1)); 344 Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1));
341 emit_m(as, ARMI_BLXr, freg); 345 emit_m(as, ARMI_BLXr, freg);
342 ci.func = (ASMFunction)(void *)0; 346 ci.func = (ASMFunction)(void *)0;
343 } 347 }
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 166cf2e4..196ca2ed 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -284,6 +284,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
284 ofs += 4; 284 ofs += 4;
285 } 285 }
286 } 286 }
287 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
288 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
287} 289}
288 290
289/* Setup result reg/sp for call. Evict scratch regs. */ 291/* Setup result reg/sp for call. Evict scratch regs. */
@@ -336,14 +338,18 @@ static void asm_callx(ASMState *as, IRIns *ir)
336{ 338{
337 IRRef args[CCI_NARGS_MAX]; 339 IRRef args[CCI_NARGS_MAX];
338 CCallInfo ci; 340 CCallInfo ci;
341 IRRef func;
342 IRIns *irf;
339 ci.flags = asm_callx_flags(as, ir); 343 ci.flags = asm_callx_flags(as, ir);
340 asm_collectargs(as, ir, &ci, args); 344 asm_collectargs(as, ir, &ci, args);
341 asm_setupresult(as, ir, &ci); 345 asm_setupresult(as, ir, &ci);
342 if (irref_isk(ir->op2)) { /* Call to constant address. */ 346 func = ir->op2; irf = IR(func);
343 ci.func = (ASMFunction)(void *)(IR(ir->op2)->i); 347 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
348 if (irref_isk(func)) { /* Call to constant address. */
349 ci.func = (ASMFunction)(void *)(irf->i);
344 } else { /* Need a non-argument register for indirect calls. */ 350 } else { /* Need a non-argument register for indirect calls. */
345 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 351 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
346 Reg freg = ra_alloc1(as, ir->op2, allow); 352 Reg freg = ra_alloc1(as, func, allow);
347 *--as->mcp = PPCI_BCTRL; 353 *--as->mcp = PPCI_BCTRL;
348 *--as->mcp = PPCI_MTCTR | PPCF_T(freg); 354 *--as->mcp = PPCI_MTCTR | PPCF_T(freg);
349 ci.func = (ASMFunction)(void *)0; 355 ci.func = (ASMFunction)(void *)0;
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 154ca890..391e2de9 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -369,18 +369,76 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
369 369
370/* -- Calls --------------------------------------------------------------- */ 370/* -- Calls --------------------------------------------------------------- */
371 371
372/* Count the required number of stack slots for a call. */
373static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
374{
375 uint32_t i, nargs = CCI_NARGS(ci);
376 int nslots = 0;
377#if LJ_64
378 if (LJ_ABI_WIN) {
379 nslots = (int)(nargs*2); /* Only matters for more than four args. */
380 } else {
381 int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
382 for (i = 0; i < nargs; i++)
383 if (args[i] && irt_isfp(IR(args[i])->t)) {
384 if (nfpr > 0) nfpr--; else nslots += 2;
385 } else {
386 if (ngpr > 0) ngpr--; else nslots += 2;
387 }
388 }
389#else
390 int ngpr = 0;
391 if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
392 ngpr = 2;
393 else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
394 ngpr = 1;
395 for (i = 0; i < nargs; i++)
396 if (args[i] && irt_isfp(IR(args[i])->t)) {
397 nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
398 } else {
399 if (ngpr > 0) ngpr--; else nslots++;
400 }
401#endif
402 return nslots;
403}
404
372/* Generate a call to a C function. */ 405/* Generate a call to a C function. */
373static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 406static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
374{ 407{
375 uint32_t n, nargs = CCI_NARGS(ci); 408 uint32_t n, nargs = CCI_NARGS(ci);
376 int32_t ofs = STACKARG_OFS; 409 int32_t ofs = STACKARG_OFS;
377 uint32_t gprs = REGARG_GPRS;
378#if LJ_64 410#if LJ_64
411 uint32_t gprs = REGARG_GPRS;
379 Reg fpr = REGARG_FIRSTFPR; 412 Reg fpr = REGARG_FIRSTFPR;
413#if !LJ_ABI_WIN
414 MCode *patchnfpr = NULL;
415#endif
416#else
417 uint32_t gprs = 0;
418 if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) {
419 if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
420 gprs = (REGARG_GPRS & 31);
421 else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
422 gprs = REGARG_GPRS;
423 }
380#endif 424#endif
381 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
382 if ((void *)ci->func) 425 if ((void *)ci->func)
383 emit_call(as, ci->func); 426 emit_call(as, ci->func);
427#if LJ_64
428 if ((ci->flags & CCI_VARARG)) { /* Special handling for vararg calls. */
429#if LJ_ABI_WIN
430 for (n = 0; n < 4 && n < nargs; n++) {
431 IRIns *ir = IR(args[n]);
432 if (irt_isfp(ir->t)) /* Duplicate FPRs in GPRs. */
433 emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n),
434 ((gprs >> (n*5)) & 31)); /* Either MOVD or MOVQ. */
435 }
436#else
437 patchnfpr = --as->mcp; /* Indicate number of used FPRs in register al. */
438 *--as->mcp = XI_MOVrib | RID_EAX;
439#endif
440 }
441#endif
384 for (n = 0; n < nargs; n++) { /* Setup args. */ 442 for (n = 0; n < nargs; n++) { /* Setup args. */
385 IRRef ref = args[n]; 443 IRRef ref = args[n];
386 IRIns *ir = IR(ref); 444 IRIns *ir = IR(ref);
@@ -392,15 +450,16 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
392#elif LJ_64 450#elif LJ_64
393 /* POSIX/x64 argument registers are used in order of appearance. */ 451 /* POSIX/x64 argument registers are used in order of appearance. */
394 if (irt_isfp(ir->t)) { 452 if (irt_isfp(ir->t)) {
395 r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++; 453 r = fpr <= REGARG_LASTFPR ? fpr++ : 0;
396 } else { 454 } else {
397 r = gprs & 31; gprs >>= 5; 455 r = gprs & 31; gprs >>= 5;
398 } 456 }
399#else 457#else
400 if (irt_isfp(ir->t) || !(ci->flags & CCI_FASTCALL)) { 458 if (ref && irt_isfp(ir->t)) {
401 r = 0; 459 r = 0;
402 } else { 460 } else {
403 r = gprs & 31; gprs >>= 5; 461 r = gprs & 31; gprs >>= 5;
462 if (!ref) continue;
404 } 463 }
405#endif 464#endif
406 if (r) { /* Argument is in a register. */ 465 if (r) { /* Argument is in a register. */
@@ -442,6 +501,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
442 ofs += sizeof(intptr_t); 501 ofs += sizeof(intptr_t);
443 } 502 }
444 } 503 }
504#if LJ_64 && !LJ_ABI_WIN
505 if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
506#endif
445} 507}
446 508
447/* Setup result reg/sp for call. Evict scratch regs. */ 509/* Setup result reg/sp for call. Evict scratch regs. */
@@ -503,23 +565,50 @@ static void asm_call(ASMState *as, IRIns *ir)
503 asm_gencall(as, ci, args); 565 asm_gencall(as, ci, args);
504} 566}
505 567
568/* Return a constant function pointer or NULL for indirect calls. */
569static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
570{
571#if LJ_32
572 UNUSED(as);
573 if (irref_isk(func))
574 return (void *)irf->i;
575#else
576 if (irref_isk(func)) {
577 MCode *p;
578 if (irf->o == IR_KINT64)
579 p = (MCode *)(void *)ir_k64(irf)->u64;
580 else
581 p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
582 if (p - as->mcp == (int32_t)(p - as->mcp))
583 return p; /* Call target is still in +-2GB range. */
584 /* Avoid the indirect case of emit_call(). Try to hoist func addr. */
585 }
586#endif
587 return NULL;
588}
589
506static void asm_callx(ASMState *as, IRIns *ir) 590static void asm_callx(ASMState *as, IRIns *ir)
507{ 591{
508 IRRef args[CCI_NARGS_MAX]; 592 IRRef args[CCI_NARGS_MAX];
509 CCallInfo ci; 593 CCallInfo ci;
594 IRRef func;
510 IRIns *irf; 595 IRIns *irf;
511 ci.flags = asm_callx_flags(as, ir); 596 ci.flags = asm_callx_flags(as, ir);
512 asm_collectargs(as, ir, &ci, args); 597 asm_collectargs(as, ir, &ci, args);
513 asm_setupresult(as, ir, &ci); 598 asm_setupresult(as, ir, &ci);
514 irf = IR(ir->op2); 599#if LJ_32
515 if (LJ_32 && irref_isk(ir->op2)) { /* Call to constant address on x86. */ 600 /* Have to readjust stack after non-cdecl calls due to callee cleanup. */
516 ci.func = (ASMFunction)(void *)(uintptr_t)(uint32_t)irf->i; 601 if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL)
517 } else { 602 emit_spsub(as, 4 * asm_count_call_slots(as, &ci, args));
518 /* Prefer a non-argument register or RID_RET for indirect calls. */ 603#endif
519 RegSet allow = (RSET_GPR & ~RSET_SCRATCH)|RID2RSET(RID_RET); 604 func = ir->op2; irf = IR(func);
520 Reg r = ra_alloc1(as, ir->op2, allow); 605 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
606 ci.func = (ASMFunction)asm_callx_func(as, irf, func);
607 if (!(void *)ci.func) {
608 /* Use a (hoistable) non-scratch register for indirect calls. */
609 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
610 Reg r = ra_alloc1(as, func, allow);
521 emit_rr(as, XO_GROUP5, XOg_CALL, r); 611 emit_rr(as, XO_GROUP5, XOg_CALL, r);
522 ci.func = (ASMFunction)(void *)0;
523 } 612 }
524 asm_gencall(as, &ci, args); 613 asm_gencall(as, &ci, args);
525} 614}
@@ -2608,35 +2697,14 @@ static void asm_ir(ASMState *as, IRIns *ir)
2608static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2697static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2609{ 2698{
2610 IRRef args[CCI_NARGS_MAX]; 2699 IRRef args[CCI_NARGS_MAX];
2611 uint32_t nargs = (int)CCI_NARGS(ci); 2700 int nslots;
2612 int nslots = 0;
2613 asm_collectargs(as, ir, ci, args); 2701 asm_collectargs(as, ir, ci, args);
2614#if LJ_64 2702 nslots = asm_count_call_slots(as, ci, args);
2615 if (LJ_ABI_WIN) {
2616 nslots = (int)(nargs*2); /* Only matters for more than four args. */
2617 } else {
2618 uint32_t i;
2619 int ngpr = 6, nfpr = 8;
2620 for (i = 0; i < nargs; i++)
2621 if (args[i] && irt_isfp(IR(args[i])->t)) {
2622 if (nfpr > 0) nfpr--; else nslots += 2;
2623 } else {
2624 if (ngpr > 0) ngpr--; else nslots += 2;
2625 }
2626 }
2627 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2703 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2628 as->evenspill = nslots; 2704 as->evenspill = nslots;
2705#if LJ_64
2629 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2706 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
2630#else 2707#else
2631 if ((ci->flags & CCI_FASTCALL)) {
2632 lua_assert(nargs <= 2);
2633 } else {
2634 uint32_t i;
2635 for (i = 0; i < nargs; i++)
2636 nslots += (args[i] && irt_isnum(IR(args[i])->t)) ? 2 : 1;
2637 if (nslots > as->evenspill) /* Leave room for args. */
2638 as->evenspill = nslots;
2639 }
2640 return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET); 2708 return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
2641#endif 2709#endif
2642} 2710}
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 281b45a6..c1c04b6f 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -402,7 +402,7 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz)
402/* -- Common C call handling ---------------------------------------------- */ 402/* -- Common C call handling ---------------------------------------------- */
403 403
404/* Infer the destination CTypeID for a vararg argument. */ 404/* Infer the destination CTypeID for a vararg argument. */
405static CTypeID ccall_ctid_vararg(CTState *cts, cTValue *o) 405CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o)
406{ 406{
407 if (tvisnumber(o)) { 407 if (tvisnumber(o)) {
408 return CTID_DOUBLE; 408 return CTID_DOUBLE;
@@ -506,7 +506,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
506 } else { 506 } else {
507 if (!(ct->info & CTF_VARARG)) 507 if (!(ct->info & CTF_VARARG))
508 lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too many arguments. */ 508 lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too many arguments. */
509 did = ccall_ctid_vararg(cts, o); /* Infer vararg type. */ 509 did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */
510 isva = 1; 510 isva = 1;
511 } 511 }
512 d = ctype_raw(cts, did); 512 d = ctype_raw(cts, did);
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index d9b1e42c..890f665d 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -7,6 +7,7 @@
7#define _LJ_CCALL_H 7#define _LJ_CCALL_H
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_ctype.h"
10 11
11#if LJ_HASFFI 12#if LJ_HASFFI
12 13
@@ -129,6 +130,8 @@ typedef struct CCallState {
129 130
130/* Really belongs to lj_vm.h. */ 131/* Really belongs to lj_vm.h. */
131LJ_ASMF void LJ_FASTCALL lj_vm_ffi_call(CCallState *cc); 132LJ_ASMF void LJ_FASTCALL lj_vm_ffi_call(CCallState *cc);
133
134LJ_FUNC CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o);
132LJ_FUNC int lj_ccall_func(lua_State *L, GCcdata *cd); 135LJ_FUNC int lj_ccall_func(lua_State *L, GCcdata *cd);
133 136
134#endif 137#endif
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 4e6a644a..3dd6f495 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -18,6 +18,7 @@
18#include "lj_cparse.h" 18#include "lj_cparse.h"
19#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_clib.h" 20#include "lj_clib.h"
21#include "lj_ccall.h"
21#include "lj_ir.h" 22#include "lj_ir.h"
22#include "lj_jit.h" 23#include "lj_jit.h"
23#include "lj_ircall.h" 24#include "lj_ircall.h"
@@ -364,7 +365,7 @@ static TRef crec_tv_ct(jit_State *J, CType *s, CTypeID sid, TRef sp)
364 365
365/* -- Convert TValue to C type (store) ------------------------------------ */ 366/* -- Convert TValue to C type (store) ------------------------------------ */
366 367
367static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, TValue *sval) 368static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
368{ 369{
369 CTState *cts = ctype_ctsG(J2G(J)); 370 CTState *cts = ctype_ctsG(J2G(J));
370 CTypeID sid = CTID_P_VOID; 371 CTypeID sid = CTID_P_VOID;
@@ -747,29 +748,88 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
747 CTState *cts, CType *ct) 748 CTState *cts, CType *ct)
748{ 749{
749 TRef args[CCI_NARGS_MAX]; 750 TRef args[CCI_NARGS_MAX];
751 CTypeID fid;
750 MSize i, n; 752 MSize i, n;
751 TRef tr; 753 TRef tr, *base;
754 cTValue *o;
755#if LJ_TARGET_X86
756#if LJ_ABI_WIN
757 TRef *arg0 = NULL, *arg1 = NULL;
758#endif
759 int ngpr = 0;
760 if (ctype_cconv(ct->info) == CTCC_THISCALL)
761 ngpr = 1;
762 else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
763 ngpr = 2;
764#endif
765
766 /* Skip initial attributes. */
767 fid = ct->sib;
768 while (fid) {
769 CType *ctf = ctype_get(cts, fid);
770 if (!ctype_isattrib(ctf->info)) break;
771 fid = ctf->sib;
772 }
752 args[0] = TREF_NIL; 773 args[0] = TREF_NIL;
753 for (n = 0; J->base[n+1]; n++) { 774 for (n = 0, base = J->base+1, o = rd->argv+1; *base; n++, base++, o++) {
775 CTypeID did;
754 CType *d; 776 CType *d;
755 do { 777
756 if (!ct->sib || n >= CCI_NARGS_MAX) 778 if (n >= CCI_NARGS_MAX)
757 lj_trace_err(J, LJ_TRERR_NYICALL);
758 ct = ctype_get(cts, ct->sib);
759 } while (ctype_isattrib(ct->info));
760 if (!ctype_isfield(ct->info))
761 lj_trace_err(J, LJ_TRERR_NYICALL); 779 lj_trace_err(J, LJ_TRERR_NYICALL);
762 d = ctype_rawchild(cts, ct); 780
781 if (fid) { /* Get argument type from field. */
782 CType *ctf = ctype_get(cts, fid);
783 fid = ctf->sib;
784 lua_assert(ctype_isfield(ctf->info));
785 did = ctype_cid(ctf->info);
786 } else {
787 if (!(ct->info & CTF_VARARG))
788 lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */
789 did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */
790 }
791 d = ctype_raw(cts, did);
763 if (!(ctype_isnum(d->info) || ctype_isptr(d->info) || 792 if (!(ctype_isnum(d->info) || ctype_isptr(d->info) ||
764 ctype_isenum(d->info))) 793 ctype_isenum(d->info)))
765 lj_trace_err(J, LJ_TRERR_NYICALL); 794 lj_trace_err(J, LJ_TRERR_NYICALL);
766 tr = crec_ct_tv(J, d, 0, J->base[n+1], &rd->argv[n+1]); 795 tr = crec_ct_tv(J, d, 0, *base, o);
767 if (ctype_isinteger_or_bool(d->info) && d->size < 4) { 796 if (ctype_isinteger_or_bool(d->info)) {
768 if ((d->info & CTF_UNSIGNED)) 797 if (d->size < 4) {
769 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0); 798 if ((d->info & CTF_UNSIGNED))
770 else 799 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0);
771 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16, IRCONV_SEXT); 800 else
801 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
802 }
772 } 803 }
804#if LJ_TARGET_X86
805 /* 64 bit args must not end up in registers for fastcall/thiscall. */
806#if LJ_ABI_WIN
807 if (!ctype_isfp(d->info)) {
808 /* Sigh, the Windows/x86 ABI allows reordering across 64 bit args. */
809 if (tref_typerange(tr, IRT_I64, IRT_U64)) {
810 if (ngpr) {
811 arg0 = &args[n]; args[n++] = TREF_NIL; ngpr--;
812 if (ngpr) {
813 arg1 = &args[n]; args[n++] = TREF_NIL; ngpr--;
814 }
815 }
816 } else {
817 if (arg0) { *arg0 = tr; arg0 = NULL; n--; continue; }
818 if (arg1) { *arg1 = tr; arg1 = NULL; n--; continue; }
819 if (ngpr) ngpr--;
820 }
821 }
822#else
823 if (!ctype_isfp(d->info) && ngpr) {
824 if (tref_typerange(tr, IRT_I64, IRT_U64)) {
825 /* No reordering for other x86 ABIs. Simply add alignment args. */
826 do { args[n++] = TREF_NIL; } while (--ngpr);
827 } else {
828 ngpr--;
829 }
830 }
831#endif
832#endif
773 args[n] = tr; 833 args[n] = tr;
774 } 834 }
775 tr = args[0]; 835 tr = args[0];
@@ -801,12 +861,15 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
801 } 861 }
802 if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) || 862 if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) ||
803 ctype_isvoid(ctr->info)) || 863 ctype_isvoid(ctr->info)) ||
804 ctype_isbool(ctr->info) || (ct->info & CTF_VARARG) || 864 ctype_isbool(ctr->info) || t == IRT_CDATA)
865 lj_trace_err(J, LJ_TRERR_NYICALL);
866 if ((ct->info & CTF_VARARG)
805#if LJ_TARGET_X86 867#if LJ_TARGET_X86
806 ctype_cconv(ct->info) != CTCC_CDECL || 868 || ctype_cconv(ct->info) != CTCC_CDECL
807#endif 869#endif
808 t == IRT_CDATA) 870 )
809 lj_trace_err(J, LJ_TRERR_NYICALL); 871 func = emitir(IRT(IR_CARG, IRT_NIL), func,
872 lj_ir_kint(J, ctype_typeid(cts, ct)));
810 tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func); 873 tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func);
811 if (t == IRT_FLOAT || t == IRT_U32) { 874 if (t == IRT_FLOAT || t == IRT_U32) {
812 tr = emitconv(tr, IRT_NUM, t, 0); 875 tr = emitconv(tr, IRT_NUM, t, 0);
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 82c4427a..49f28108 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -117,7 +117,7 @@ LJ_STATIC_ASSERT(((int)CT_STRUCT & (int)CT_ARRAY) == CT_STRUCT);
117 info = (info & ~(CTMASK_##field<<CTSHIFT_##field)) | \ 117 info = (info & ~(CTMASK_##field<<CTSHIFT_##field)) | \
118 (((CTSize)(val) & CTMASK_##field) << CTSHIFT_##field) 118 (((CTSize)(val) & CTMASK_##field) << CTSHIFT_##field)
119 119
120/* Calling conventions. */ 120/* Calling conventions. ORDER CC */
121enum { CTCC_CDECL, CTCC_THISCALL, CTCC_FASTCALL, CTCC_STDCALL }; 121enum { CTCC_CDECL, CTCC_THISCALL, CTCC_FASTCALL, CTCC_STDCALL };
122 122
123/* Attribute numbers. */ 123/* Attribute numbers. */
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index b1e0e446..8ccc852e 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -27,15 +27,23 @@ typedef struct CCallInfo {
27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) 27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) 28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) 29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
30#define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL) 30#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
31#define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL) 31#define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL)
32#define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL) 32#define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL)
33 33
34/* C call info flags. */ 34/* C call info flags. */
35#define CCI_L 0x0100 /* Implicit L arg. */ 35#define CCI_L 0x0100 /* Implicit L arg. */
36#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ 36#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
37#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ 37#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
38#define CCI_FASTCALL 0x0800 /* Fastcall convention. */ 38#define CCI_VARARG 0x0800 /* Vararg function. */
39
40#define CCI_CC_MASK 0x3000 /* Calling convention mask. */
41#define CCI_CC_SHIFT 12
42/* ORDER CC */
43#define CCI_CC_CDECL 0x0000 /* Default cdecl calling convention. */
44#define CCI_CC_THISCALL 0x1000 /* Thiscall calling convention. */
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
39 47
40/* Helpers for conditional function definitions. */ 48/* Helpers for conditional function definitions. */
41#define IRCALLCOND_ANY(x) x 49#define IRCALLCOND_ANY(x) x
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index 8abc38fd..f754a8d3 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -207,7 +207,9 @@ typedef enum PPCIns {
207 PPCI_BCTRL = 0x4e800421, 207 PPCI_BCTRL = 0x4e800421,
208 208
209 PPCI_CRANDC = 0x4c000102, 209 PPCI_CRANDC = 0x4c000102,
210 PPCI_CRXOR = 0x4c000182,
210 PPCI_CRAND = 0x4c000202, 211 PPCI_CRAND = 0x4c000202,
212 PPCI_CREQV = 0x4c000242,
211 PPCI_CRORC = 0x4c000342, 213 PPCI_CRORC = 0x4c000342,
212 PPCI_CROR = 0x4c000382, 214 PPCI_CROR = 0x4c000382,
213 215
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index c4445ba4..233b77e5 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -85,6 +85,7 @@ enum {
85#define REGARG_GPRS \ 85#define REGARG_GPRS \
86 (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) 86 (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
87#define REGARG_NUMGPR 4 87#define REGARG_NUMGPR 4
88#define REGARG_NUMFPR 4
88#define REGARG_FIRSTFPR RID_XMM0 89#define REGARG_FIRSTFPR RID_XMM0
89#define REGARG_LASTFPR RID_XMM3 90#define REGARG_LASTFPR RID_XMM3
90#define STACKARG_OFS (4*8) 91#define STACKARG_OFS (4*8)
@@ -96,6 +97,7 @@ enum {
96 (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ 97 (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
97 <<5))<<5))<<5))<<5))<<5)) 98 <<5))<<5))<<5))<<5))<<5))
98#define REGARG_NUMGPR 6 99#define REGARG_NUMGPR 6
100#define REGARG_NUMFPR 8
99#define REGARG_FIRSTFPR RID_XMM0 101#define REGARG_FIRSTFPR RID_XMM0
100#define REGARG_LASTFPR RID_XMM7 102#define REGARG_LASTFPR RID_XMM7
101#define STACKARG_OFS 0 103#define STACKARG_OFS 0
@@ -105,6 +107,7 @@ enum {
105#define RSET_SCRATCH (RSET_ACD|RSET_FPR) 107#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
106#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ 108#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
107#define REGARG_NUMGPR 2 /* Fastcall only. */ 109#define REGARG_NUMGPR 2 /* Fastcall only. */
110#define REGARG_NUMFPR 0
108#define STACKARG_OFS 0 111#define STACKARG_OFS 0
109#endif 112#endif
110 113
@@ -192,6 +195,7 @@ typedef enum {
192 XI_JCCs = 0x70, /* Really 7x. */ 195 XI_JCCs = 0x70, /* Really 7x. */
193 XI_JCCn = 0x80, /* Really 0f8x. */ 196 XI_JCCn = 0x80, /* Really 0f8x. */
194 XI_LEA = 0x8d, 197 XI_LEA = 0x8d,
198 XI_MOVrib = 0xb0, /* Really b0+r. */
195 XI_MOVri = 0xb8, /* Really b8+r. */ 199 XI_MOVri = 0xb8, /* Really b8+r. */
196 XI_ARITHib = 0x80, 200 XI_ARITHib = 0x80,
197 XI_ARITHi = 0x81, 201 XI_ARITHi = 0x81,