diff options
author | Mike Pall <mike> | 2010-02-25 03:35:07 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-02-25 03:35:07 +0100 |
commit | 3c6cec0846ae25dc9d0c2495cb02316694725add (patch) | |
tree | 6900f1555f4c920f87e7d2ef23fce65051ddaf65 /src | |
parent | b95294572ce8efa527e0b0118bb7168117afd171 (diff) | |
download | luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.tar.gz luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.tar.bz2 luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.zip |
Add x64 call argument setup. More 32/64 bit cleanups in assembler.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm.c | 100 | ||||
-rw-r--r-- | src/lj_target_x86.h | 26 |
2 files changed, 79 insertions, 47 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 0b6ebc09..f38ceaef 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1292,21 +1292,52 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1292 | { | 1292 | { |
1293 | RegSet allow = RSET_ALL; | 1293 | RegSet allow = RSET_ALL; |
1294 | uint32_t n, nargs = CCI_NARGS(ci); | 1294 | uint32_t n, nargs = CCI_NARGS(ci); |
1295 | int32_t ofs = 0; | 1295 | int32_t ofs = STACKARG_OFS; |
1296 | uint32_t gprs = REGARG_GPRS; | ||
1297 | #if LJ_64 | ||
1298 | Reg fpr = REGARG_FIRSTFPR; | ||
1299 | #endif | ||
1296 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ | 1300 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ |
1297 | emit_call(as, ci->func); | 1301 | emit_call(as, ci->func); |
1298 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 1302 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
1299 | #if LJ_64 | ||
1300 | #error "NYI: 64 bit mode call argument setup" | ||
1301 | #endif | ||
1302 | IRIns *ir = IR(args[n]); | 1303 | IRIns *ir = IR(args[n]); |
1304 | Reg r; | ||
1305 | #if LJ_64 && defined(_WIN64) | ||
1306 | /* Windows/x64 argument registers are strictly positional. */ | ||
1307 | r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31); | ||
1308 | fpr++; gprs >>= 5; | ||
1309 | #elif LJ_64 | ||
1310 | /* POSIX/x64 argument registers are used in order of appearance. */ | ||
1303 | if (irt_isnum(ir->t)) { | 1311 | if (irt_isnum(ir->t)) { |
1304 | if ((ofs & 4) && irref_isk(args[n])) { | 1312 | r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++; |
1313 | } else { | ||
1314 | r = gprs & 31; gprs >>= 5; | ||
1315 | } | ||
1316 | #else | ||
1317 | if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) { | ||
1318 | r = 0; | ||
1319 | } else { | ||
1320 | r = gprs & 31; gprs >>= 5; | ||
1321 | } | ||
1322 | #endif | ||
1323 | if (r) { /* Argument is in a register. */ | ||
1324 | if (args[n] < ASMREF_TMP1) { | ||
1325 | emit_loadi(as, r, ir->i); | ||
1326 | } else { | ||
1327 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
1328 | if (ra_hasreg(ir->r)) { | ||
1329 | ra_noweak(as, ir->r); | ||
1330 | ra_movrr(as, ir, r, ir->r); | ||
1331 | } else { | ||
1332 | ra_allocref(as, args[n], RID2RSET(r)); | ||
1333 | } | ||
1334 | } | ||
1335 | } else if (irt_isnum(ir->t)) { /* FP argument is on stack. */ | ||
1336 | if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) { | ||
1305 | /* Split stores for unaligned FP consts. */ | 1337 | /* Split stores for unaligned FP consts. */ |
1306 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | 1338 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); |
1307 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); | 1339 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); |
1308 | } else { | 1340 | } else { |
1309 | Reg r; | ||
1310 | if ((allow & RSET_FPR) == RSET_EMPTY) | 1341 | if ((allow & RSET_FPR) == RSET_EMPTY) |
1311 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | 1342 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
1312 | r = ra_alloc1(as, args[n], allow & RSET_FPR); | 1343 | r = ra_alloc1(as, args[n], allow & RSET_FPR); |
@@ -1314,34 +1345,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
1314 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); | 1345 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); |
1315 | } | 1346 | } |
1316 | ofs += 8; | 1347 | ofs += 8; |
1317 | } else { | 1348 | } else { /* Non-FP argument is on stack. */ |
1318 | if ((ci->flags & CCI_FASTCALL) && n < 2) { | 1349 | /* NYI: no widening for 64 bit parameters on x64. */ |
1319 | Reg r = n == 0 ? RID_ECX : RID_EDX; | 1350 | if (args[n] < ASMREF_TMP1) { |
1320 | if (args[n] < ASMREF_TMP1) { | 1351 | emit_movmroi(as, RID_ESP, ofs, ir->i); |
1321 | emit_loadi(as, r, ir->i); | ||
1322 | } else { | ||
1323 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
1324 | allow &= ~RID2RSET(r); | ||
1325 | if (ra_hasreg(ir->r)) { | ||
1326 | ra_noweak(as, ir->r); | ||
1327 | ra_movrr(as, ir, r, ir->r); | ||
1328 | } else { | ||
1329 | ra_allocref(as, args[n], RID2RSET(r)); | ||
1330 | } | ||
1331 | } | ||
1332 | } else { | 1352 | } else { |
1333 | if (args[n] < ASMREF_TMP1) { | 1353 | if ((allow & RSET_GPR) == RSET_EMPTY) |
1334 | emit_movmroi(as, RID_ESP, ofs, ir->i); | 1354 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
1335 | } else { | 1355 | r = ra_alloc1(as, args[n], allow & RSET_GPR); |
1336 | Reg r; | 1356 | allow &= ~RID2RSET(r); |
1337 | if ((allow & RSET_GPR) == RSET_EMPTY) | 1357 | emit_movtomro(as, REX_64LU(ir, r), RID_ESP, ofs); |
1338 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
1339 | r = ra_alloc1(as, args[n], allow & RSET_GPR); | ||
1340 | allow &= ~RID2RSET(r); | ||
1341 | emit_movtomro(as, r, RID_ESP, ofs); | ||
1342 | } | ||
1343 | ofs += 4; | ||
1344 | } | 1358 | } |
1359 | ofs += sizeof(intptr_t); | ||
1345 | } | 1360 | } |
1346 | } | 1361 | } |
1347 | } | 1362 | } |
@@ -2561,7 +2576,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2561 | asm_guardcc(as, cc); | 2576 | asm_guardcc(as, cc); |
2562 | if (usetest && left != RID_MRM) { | 2577 | if (usetest && left != RID_MRM) { |
2563 | /* Use test r,r instead of cmp r,0. */ | 2578 | /* Use test r,r instead of cmp r,0. */ |
2564 | emit_rr(as, XO_TEST, left, left); | 2579 | emit_rr(as, XO_TEST, REX_64LU(ir, left), left); |
2565 | if (irl+1 == ir) /* Referencing previous ins? */ | 2580 | if (irl+1 == ir) /* Referencing previous ins? */ |
2566 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 2581 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
2567 | } else { | 2582 | } else { |
@@ -2580,11 +2595,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2580 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 2595 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
2581 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); | 2596 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); |
2582 | asm_guardcc(as, cc); | 2597 | asm_guardcc(as, cc); |
2583 | #if LJ_64 | 2598 | emit_mrm(as, XO_CMP, REX_64LU(ir, left), right); |
2584 | if (irt_islightud(ir->t)) | ||
2585 | left |= REX_64; | ||
2586 | #endif | ||
2587 | emit_mrm(as, XO_CMP, left, right); | ||
2588 | } | 2599 | } |
2589 | } | 2600 | } |
2590 | } | 2601 | } |
@@ -2732,14 +2743,14 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) | |||
2732 | /* We don't know spadj yet, so get the C frame from L->cframe. */ | 2743 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
2733 | emit_movmroi(as, tmp, CFRAME_OFS_PC, | 2744 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
2734 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); | 2745 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); |
2735 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); | 2746 | emit_gri(as, XG_ARITHi(XOg_AND), tmp|REX_64, CFRAME_RAWMASK); |
2736 | lstate = IR(ASMREF_L)->r; | 2747 | lstate = IR(ASMREF_L)->r; |
2737 | emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe)); | 2748 | emit_rmro(as, XO_MOV, tmp|REX_64, lstate, offsetof(lua_State, cframe)); |
2738 | /* It's ok if lstate is already in a non-scratch reg. But all allocations | 2749 | /* It's ok if lstate is already in a non-scratch reg. But all allocations |
2739 | ** in the non-fast path must use a scratch reg. See comment above. | 2750 | ** in the non-fast path must use a scratch reg. See comment above. |
2740 | */ | 2751 | */ |
2741 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); | 2752 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); |
2742 | emit_movtomro(as, base, lstate, offsetof(lua_State, base)); | 2753 | emit_movtomro(as, base|REX_64, lstate, offsetof(lua_State, base)); |
2743 | asm_gc_sync(as, snap, base); | 2754 | asm_gc_sync(as, snap, base); |
2744 | /* BASE/L get restored anyway, better do it inside the slow path. */ | 2755 | /* BASE/L get restored anyway, better do it inside the slow path. */ |
2745 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); | 2756 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); |
@@ -3447,7 +3458,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3447 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 3458 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
3448 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 3459 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
3449 | #if LJ_64 | 3460 | #if LJ_64 |
3450 | /* NYI: add stack slots for calls with more than 4/6 args. */ | 3461 | /* NYI: add stack slots for x64 calls with many args. */ |
3462 | #ifdef _WIN64 | ||
3463 | lua_assert(CCI_NARGS(ci) <= 4); | ||
3464 | #else | ||
3465 | lua_assert(CCI_NARGS(ci) <= 6); /* Safe lower bound. */ | ||
3466 | #endif | ||
3451 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 3467 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
3452 | #else | 3468 | #else |
3453 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | 3469 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 8e9a8788..83eba0ec 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -78,14 +78,27 @@ enum { | |||
78 | /* Windows x64 ABI. */ | 78 | /* Windows x64 ABI. */ |
79 | #define RSET_SCRATCH \ | 79 | #define RSET_SCRATCH \ |
80 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | 80 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) |
81 | #define REGARG_GPRS \ | ||
82 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) | ||
83 | #define REGARG_FIRSTFPR RID_XMM0 | ||
84 | #define REGARG_LASTFPR RID_XMM3 | ||
85 | #define STACKARG_OFS (4*8) | ||
81 | #else | 86 | #else |
82 | /* The rest of the civilized x64 world has a common ABI. */ | 87 | /* The rest of the civilized x64 world has a common ABI. */ |
83 | #define RSET_SCRATCH \ | 88 | #define RSET_SCRATCH \ |
84 | (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) | 89 | (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) |
90 | #define REGARG_GPRS \ | ||
91 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ | ||
92 | <<5))<<5))<<5))<<5))<<5)) | ||
93 | #define REGARG_FIRSTFPR RID_XMM0 | ||
94 | #define REGARG_LASTFPR RID_XMM7 | ||
95 | #define STACKARG_OFS 0 | ||
85 | #endif | 96 | #endif |
86 | #else | 97 | #else |
87 | /* Common x86 ABI. */ | 98 | /* Common x86 ABI. */ |
88 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) | 99 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) |
100 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ | ||
101 | #define STACKARG_OFS 0 | ||
89 | #endif | 102 | #endif |
90 | 103 | ||
91 | #if LJ_64 | 104 | #if LJ_64 |
@@ -96,23 +109,26 @@ enum { | |||
96 | 109 | ||
97 | /* -- Spill slots --------------------------------------------------------- */ | 110 | /* -- Spill slots --------------------------------------------------------- */ |
98 | 111 | ||
99 | /* Available fixed spill slots in interpreter frame. | 112 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. |
113 | ** | ||
114 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | ||
100 | ** This definition must match with the *.dasc file(s). | 115 | ** This definition must match with the *.dasc file(s). |
116 | ** | ||
117 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | ||
101 | */ | 118 | */ |
102 | #if LJ_64 | 119 | #if LJ_64 |
103 | #ifdef _WIN64 | 120 | #ifdef _WIN64 |
104 | #define SPS_FIXED (5*2) | 121 | #define SPS_FIXED (5*2) |
122 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ | ||
105 | #else | 123 | #else |
106 | #define SPS_FIXED 2 | 124 | #define SPS_FIXED 2 |
125 | #define SPS_FIRST 2 | ||
107 | #endif | 126 | #endif |
108 | #else | 127 | #else |
109 | #define SPS_FIXED 6 | 128 | #define SPS_FIXED 6 |
110 | #endif | ||
111 | |||
112 | /* First spill slot for general use. Reserve one 64 bit slot. */ | ||
113 | #define SPS_FIRST 2 | 129 | #define SPS_FIRST 2 |
130 | #endif | ||
114 | 131 | ||
115 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ | ||
116 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 132 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
117 | 133 | ||
118 | /* -- Exit state ---------------------------------------------------------- */ | 134 | /* -- Exit state ---------------------------------------------------------- */ |