aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2010-02-25 03:35:07 +0100
committerMike Pall <mike>2010-02-25 03:35:07 +0100
commit3c6cec0846ae25dc9d0c2495cb02316694725add (patch)
tree6900f1555f4c920f87e7d2ef23fce65051ddaf65 /src
parentb95294572ce8efa527e0b0118bb7168117afd171 (diff)
downloadluajit-3c6cec0846ae25dc9d0c2495cb02316694725add.tar.gz
luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.tar.bz2
luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.zip
Add x64 call argument setup. More 32/64 bit cleanups in assembler.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm.c100
-rw-r--r--src/lj_target_x86.h26
2 files changed, 79 insertions, 47 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 0b6ebc09..f38ceaef 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1292,21 +1292,52 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1292{ 1292{
1293 RegSet allow = RSET_ALL; 1293 RegSet allow = RSET_ALL;
1294 uint32_t n, nargs = CCI_NARGS(ci); 1294 uint32_t n, nargs = CCI_NARGS(ci);
1295 int32_t ofs = 0; 1295 int32_t ofs = STACKARG_OFS;
1296 uint32_t gprs = REGARG_GPRS;
1297#if LJ_64
1298 Reg fpr = REGARG_FIRSTFPR;
1299#endif
1296 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ 1300 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
1297 emit_call(as, ci->func); 1301 emit_call(as, ci->func);
1298 for (n = 0; n < nargs; n++) { /* Setup args. */ 1302 for (n = 0; n < nargs; n++) { /* Setup args. */
1299#if LJ_64
1300#error "NYI: 64 bit mode call argument setup"
1301#endif
1302 IRIns *ir = IR(args[n]); 1303 IRIns *ir = IR(args[n]);
1304 Reg r;
1305#if LJ_64 && defined(_WIN64)
1306 /* Windows/x64 argument registers are strictly positional. */
1307 r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
1308 fpr++; gprs >>= 5;
1309#elif LJ_64
1310 /* POSIX/x64 argument registers are used in order of appearance. */
1303 if (irt_isnum(ir->t)) { 1311 if (irt_isnum(ir->t)) {
1304 if ((ofs & 4) && irref_isk(args[n])) { 1312 r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++;
1313 } else {
1314 r = gprs & 31; gprs >>= 5;
1315 }
1316#else
1317 if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) {
1318 r = 0;
1319 } else {
1320 r = gprs & 31; gprs >>= 5;
1321 }
1322#endif
1323 if (r) { /* Argument is in a register. */
1324 if (args[n] < ASMREF_TMP1) {
1325 emit_loadi(as, r, ir->i);
1326 } else {
1327 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
1328 if (ra_hasreg(ir->r)) {
1329 ra_noweak(as, ir->r);
1330 ra_movrr(as, ir, r, ir->r);
1331 } else {
1332 ra_allocref(as, args[n], RID2RSET(r));
1333 }
1334 }
1335 } else if (irt_isnum(ir->t)) { /* FP argument is on stack. */
1336 if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) {
1305 /* Split stores for unaligned FP consts. */ 1337 /* Split stores for unaligned FP consts. */
1306 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); 1338 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
1307 emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); 1339 emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
1308 } else { 1340 } else {
1309 Reg r;
1310 if ((allow & RSET_FPR) == RSET_EMPTY) 1341 if ((allow & RSET_FPR) == RSET_EMPTY)
1311 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1342 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1312 r = ra_alloc1(as, args[n], allow & RSET_FPR); 1343 r = ra_alloc1(as, args[n], allow & RSET_FPR);
@@ -1314,34 +1345,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1314 emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); 1345 emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
1315 } 1346 }
1316 ofs += 8; 1347 ofs += 8;
1317 } else { 1348 } else { /* Non-FP argument is on stack. */
1318 if ((ci->flags & CCI_FASTCALL) && n < 2) { 1349 /* NYI: no widening for 64 bit parameters on x64. */
1319 Reg r = n == 0 ? RID_ECX : RID_EDX; 1350 if (args[n] < ASMREF_TMP1) {
1320 if (args[n] < ASMREF_TMP1) { 1351 emit_movmroi(as, RID_ESP, ofs, ir->i);
1321 emit_loadi(as, r, ir->i);
1322 } else {
1323 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
1324 allow &= ~RID2RSET(r);
1325 if (ra_hasreg(ir->r)) {
1326 ra_noweak(as, ir->r);
1327 ra_movrr(as, ir, r, ir->r);
1328 } else {
1329 ra_allocref(as, args[n], RID2RSET(r));
1330 }
1331 }
1332 } else { 1352 } else {
1333 if (args[n] < ASMREF_TMP1) { 1353 if ((allow & RSET_GPR) == RSET_EMPTY)
1334 emit_movmroi(as, RID_ESP, ofs, ir->i); 1354 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1335 } else { 1355 r = ra_alloc1(as, args[n], allow & RSET_GPR);
1336 Reg r; 1356 allow &= ~RID2RSET(r);
1337 if ((allow & RSET_GPR) == RSET_EMPTY) 1357 emit_movtomro(as, REX_64LU(ir, r), RID_ESP, ofs);
1338 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1339 r = ra_alloc1(as, args[n], allow & RSET_GPR);
1340 allow &= ~RID2RSET(r);
1341 emit_movtomro(as, r, RID_ESP, ofs);
1342 }
1343 ofs += 4;
1344 } 1358 }
1359 ofs += sizeof(intptr_t);
1345 } 1360 }
1346 } 1361 }
1347} 1362}
@@ -2561,7 +2576,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2561 asm_guardcc(as, cc); 2576 asm_guardcc(as, cc);
2562 if (usetest && left != RID_MRM) { 2577 if (usetest && left != RID_MRM) {
2563 /* Use test r,r instead of cmp r,0. */ 2578 /* Use test r,r instead of cmp r,0. */
2564 emit_rr(as, XO_TEST, left, left); 2579 emit_rr(as, XO_TEST, REX_64LU(ir, left), left);
2565 if (irl+1 == ir) /* Referencing previous ins? */ 2580 if (irl+1 == ir) /* Referencing previous ins? */
2566 as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ 2581 as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
2567 } else { 2582 } else {
@@ -2580,11 +2595,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2580 Reg left = ra_alloc1(as, lref, RSET_GPR); 2595 Reg left = ra_alloc1(as, lref, RSET_GPR);
2581 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); 2596 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
2582 asm_guardcc(as, cc); 2597 asm_guardcc(as, cc);
2583#if LJ_64 2598 emit_mrm(as, XO_CMP, REX_64LU(ir, left), right);
2584 if (irt_islightud(ir->t))
2585 left |= REX_64;
2586#endif
2587 emit_mrm(as, XO_CMP, left, right);
2588 } 2599 }
2589 } 2600 }
2590} 2601}
@@ -2732,14 +2743,14 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
2732 /* We don't know spadj yet, so get the C frame from L->cframe. */ 2743 /* We don't know spadj yet, so get the C frame from L->cframe. */
2733 emit_movmroi(as, tmp, CFRAME_OFS_PC, 2744 emit_movmroi(as, tmp, CFRAME_OFS_PC,
2734 (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); 2745 (int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
2735 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); 2746 emit_gri(as, XG_ARITHi(XOg_AND), tmp|REX_64, CFRAME_RAWMASK);
2736 lstate = IR(ASMREF_L)->r; 2747 lstate = IR(ASMREF_L)->r;
2737 emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe)); 2748 emit_rmro(as, XO_MOV, tmp|REX_64, lstate, offsetof(lua_State, cframe));
2738 /* It's ok if lstate is already in a non-scratch reg. But all allocations 2749 /* It's ok if lstate is already in a non-scratch reg. But all allocations
2739 ** in the non-fast path must use a scratch reg. See comment above. 2750 ** in the non-fast path must use a scratch reg. See comment above.
2740 */ 2751 */
2741 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); 2752 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
2742 emit_movtomro(as, base, lstate, offsetof(lua_State, base)); 2753 emit_movtomro(as, base|REX_64, lstate, offsetof(lua_State, base));
2743 asm_gc_sync(as, snap, base); 2754 asm_gc_sync(as, snap, base);
2744 /* BASE/L get restored anyway, better do it inside the slow path. */ 2755 /* BASE/L get restored anyway, better do it inside the slow path. */
2745 if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); 2756 if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
@@ -3447,7 +3458,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3447 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 3458 case IR_CALLN: case IR_CALLL: case IR_CALLS: {
3448 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 3459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
3449#if LJ_64 3460#if LJ_64
3450 /* NYI: add stack slots for calls with more than 4/6 args. */ 3461 /* NYI: add stack slots for x64 calls with many args. */
3462#ifdef _WIN64
3463 lua_assert(CCI_NARGS(ci) <= 4);
3464#else
3465 lua_assert(CCI_NARGS(ci) <= 6); /* Safe lower bound. */
3466#endif
3451 ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); 3467 ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
3452#else 3468#else
3453 /* NYI: not fastcall-aware, but doesn't matter (yet). */ 3469 /* NYI: not fastcall-aware, but doesn't matter (yet). */
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 8e9a8788..83eba0ec 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -78,14 +78,27 @@ enum {
78/* Windows x64 ABI. */ 78/* Windows x64 ABI. */
79#define RSET_SCRATCH \ 79#define RSET_SCRATCH \
80 (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) 80 (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
81#define REGARG_GPRS \
82 (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
83#define REGARG_FIRSTFPR RID_XMM0
84#define REGARG_LASTFPR RID_XMM3
85#define STACKARG_OFS (4*8)
81#else 86#else
82/* The rest of the civilized x64 world has a common ABI. */ 87/* The rest of the civilized x64 world has a common ABI. */
83#define RSET_SCRATCH \ 88#define RSET_SCRATCH \
84 (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) 89 (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
90#define REGARG_GPRS \
91 (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
92 <<5))<<5))<<5))<<5))<<5))
93#define REGARG_FIRSTFPR RID_XMM0
94#define REGARG_LASTFPR RID_XMM7
95#define STACKARG_OFS 0
85#endif 96#endif
86#else 97#else
87/* Common x86 ABI. */ 98/* Common x86 ABI. */
88#define RSET_SCRATCH (RSET_ACD|RSET_FPR) 99#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
100#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */
101#define STACKARG_OFS 0
89#endif 102#endif
90 103
91#if LJ_64 104#if LJ_64
@@ -96,23 +109,26 @@ enum {
96 109
97/* -- Spill slots --------------------------------------------------------- */ 110/* -- Spill slots --------------------------------------------------------- */
98 111
99/* Available fixed spill slots in interpreter frame. 112/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
113**
114** SPS_FIXED: Available fixed spill slots in interpreter frame.
100** This definition must match with the *.dasc file(s). 115** This definition must match with the *.dasc file(s).
116**
117** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
101*/ 118*/
102#if LJ_64 119#if LJ_64
103#ifdef _WIN64 120#ifdef _WIN64
104#define SPS_FIXED (5*2) 121#define SPS_FIXED (5*2)
122#define SPS_FIRST (4*2) /* Don't use callee register save area. */
105#else 123#else
106#define SPS_FIXED 2 124#define SPS_FIXED 2
125#define SPS_FIRST 2
107#endif 126#endif
108#else 127#else
109#define SPS_FIXED 6 128#define SPS_FIXED 6
110#endif
111
112/* First spill slot for general use. Reserve one 64 bit slot. */
113#define SPS_FIRST 2 129#define SPS_FIRST 2
130#endif
114 131
115/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
116#define sps_scale(slot) (4 * (int32_t)(slot)) 132#define sps_scale(slot) (4 * (int32_t)(slot))
117 133
118/* -- Exit state ---------------------------------------------------------- */ 134/* -- Exit state ---------------------------------------------------------- */