diff options
| author | Mike Pall <mike> | 2010-02-25 03:35:07 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2010-02-25 03:35:07 +0100 |
| commit | 3c6cec0846ae25dc9d0c2495cb02316694725add (patch) | |
| tree | 6900f1555f4c920f87e7d2ef23fce65051ddaf65 /src | |
| parent | b95294572ce8efa527e0b0118bb7168117afd171 (diff) | |
| download | luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.tar.gz luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.tar.bz2 luajit-3c6cec0846ae25dc9d0c2495cb02316694725add.zip | |
Add x64 call argument setup. More 32/64 bit cleanups in assembler.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm.c | 100 | ||||
| -rw-r--r-- | src/lj_target_x86.h | 26 |
2 files changed, 79 insertions, 47 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index 0b6ebc09..f38ceaef 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1292,21 +1292,52 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 1292 | { | 1292 | { |
| 1293 | RegSet allow = RSET_ALL; | 1293 | RegSet allow = RSET_ALL; |
| 1294 | uint32_t n, nargs = CCI_NARGS(ci); | 1294 | uint32_t n, nargs = CCI_NARGS(ci); |
| 1295 | int32_t ofs = 0; | 1295 | int32_t ofs = STACKARG_OFS; |
| 1296 | uint32_t gprs = REGARG_GPRS; | ||
| 1297 | #if LJ_64 | ||
| 1298 | Reg fpr = REGARG_FIRSTFPR; | ||
| 1299 | #endif | ||
| 1296 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ | 1300 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ |
| 1297 | emit_call(as, ci->func); | 1301 | emit_call(as, ci->func); |
| 1298 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 1302 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
| 1299 | #if LJ_64 | ||
| 1300 | #error "NYI: 64 bit mode call argument setup" | ||
| 1301 | #endif | ||
| 1302 | IRIns *ir = IR(args[n]); | 1303 | IRIns *ir = IR(args[n]); |
| 1304 | Reg r; | ||
| 1305 | #if LJ_64 && defined(_WIN64) | ||
| 1306 | /* Windows/x64 argument registers are strictly positional. */ | ||
| 1307 | r = irt_isnum(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31); | ||
| 1308 | fpr++; gprs >>= 5; | ||
| 1309 | #elif LJ_64 | ||
| 1310 | /* POSIX/x64 argument registers are used in order of appearance. */ | ||
| 1303 | if (irt_isnum(ir->t)) { | 1311 | if (irt_isnum(ir->t)) { |
| 1304 | if ((ofs & 4) && irref_isk(args[n])) { | 1312 | r = fpr <= REGARG_LASTFPR ? fpr : 0; fpr++; |
| 1313 | } else { | ||
| 1314 | r = gprs & 31; gprs >>= 5; | ||
| 1315 | } | ||
| 1316 | #else | ||
| 1317 | if (irt_isnum(ir->t) || !(ci->flags & CCI_FASTCALL)) { | ||
| 1318 | r = 0; | ||
| 1319 | } else { | ||
| 1320 | r = gprs & 31; gprs >>= 5; | ||
| 1321 | } | ||
| 1322 | #endif | ||
| 1323 | if (r) { /* Argument is in a register. */ | ||
| 1324 | if (args[n] < ASMREF_TMP1) { | ||
| 1325 | emit_loadi(as, r, ir->i); | ||
| 1326 | } else { | ||
| 1327 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
| 1328 | if (ra_hasreg(ir->r)) { | ||
| 1329 | ra_noweak(as, ir->r); | ||
| 1330 | ra_movrr(as, ir, r, ir->r); | ||
| 1331 | } else { | ||
| 1332 | ra_allocref(as, args[n], RID2RSET(r)); | ||
| 1333 | } | ||
| 1334 | } | ||
| 1335 | } else if (irt_isnum(ir->t)) { /* FP argument is on stack. */ | ||
| 1336 | if (!LJ_64 && (ofs & 4) && irref_isk(args[n])) { | ||
| 1305 | /* Split stores for unaligned FP consts. */ | 1337 | /* Split stores for unaligned FP consts. */ |
| 1306 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | 1338 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); |
| 1307 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); | 1339 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); |
| 1308 | } else { | 1340 | } else { |
| 1309 | Reg r; | ||
| 1310 | if ((allow & RSET_FPR) == RSET_EMPTY) | 1341 | if ((allow & RSET_FPR) == RSET_EMPTY) |
| 1311 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | 1342 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
| 1312 | r = ra_alloc1(as, args[n], allow & RSET_FPR); | 1343 | r = ra_alloc1(as, args[n], allow & RSET_FPR); |
| @@ -1314,34 +1345,18 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 1314 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); | 1345 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); |
| 1315 | } | 1346 | } |
| 1316 | ofs += 8; | 1347 | ofs += 8; |
| 1317 | } else { | 1348 | } else { /* Non-FP argument is on stack. */ |
| 1318 | if ((ci->flags & CCI_FASTCALL) && n < 2) { | 1349 | /* NYI: no widening for 64 bit parameters on x64. */ |
| 1319 | Reg r = n == 0 ? RID_ECX : RID_EDX; | 1350 | if (args[n] < ASMREF_TMP1) { |
| 1320 | if (args[n] < ASMREF_TMP1) { | 1351 | emit_movmroi(as, RID_ESP, ofs, ir->i); |
| 1321 | emit_loadi(as, r, ir->i); | ||
| 1322 | } else { | ||
| 1323 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
| 1324 | allow &= ~RID2RSET(r); | ||
| 1325 | if (ra_hasreg(ir->r)) { | ||
| 1326 | ra_noweak(as, ir->r); | ||
| 1327 | ra_movrr(as, ir, r, ir->r); | ||
| 1328 | } else { | ||
| 1329 | ra_allocref(as, args[n], RID2RSET(r)); | ||
| 1330 | } | ||
| 1331 | } | ||
| 1332 | } else { | 1352 | } else { |
| 1333 | if (args[n] < ASMREF_TMP1) { | 1353 | if ((allow & RSET_GPR) == RSET_EMPTY) |
| 1334 | emit_movmroi(as, RID_ESP, ofs, ir->i); | 1354 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); |
| 1335 | } else { | 1355 | r = ra_alloc1(as, args[n], allow & RSET_GPR); |
| 1336 | Reg r; | 1356 | allow &= ~RID2RSET(r); |
| 1337 | if ((allow & RSET_GPR) == RSET_EMPTY) | 1357 | emit_movtomro(as, REX_64LU(ir, r), RID_ESP, ofs); |
| 1338 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
| 1339 | r = ra_alloc1(as, args[n], allow & RSET_GPR); | ||
| 1340 | allow &= ~RID2RSET(r); | ||
| 1341 | emit_movtomro(as, r, RID_ESP, ofs); | ||
| 1342 | } | ||
| 1343 | ofs += 4; | ||
| 1344 | } | 1358 | } |
| 1359 | ofs += sizeof(intptr_t); | ||
| 1345 | } | 1360 | } |
| 1346 | } | 1361 | } |
| 1347 | } | 1362 | } |
| @@ -2561,7 +2576,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2561 | asm_guardcc(as, cc); | 2576 | asm_guardcc(as, cc); |
| 2562 | if (usetest && left != RID_MRM) { | 2577 | if (usetest && left != RID_MRM) { |
| 2563 | /* Use test r,r instead of cmp r,0. */ | 2578 | /* Use test r,r instead of cmp r,0. */ |
| 2564 | emit_rr(as, XO_TEST, left, left); | 2579 | emit_rr(as, XO_TEST, REX_64LU(ir, left), left); |
| 2565 | if (irl+1 == ir) /* Referencing previous ins? */ | 2580 | if (irl+1 == ir) /* Referencing previous ins? */ |
| 2566 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ | 2581 | as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ |
| 2567 | } else { | 2582 | } else { |
| @@ -2580,11 +2595,7 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2580 | Reg left = ra_alloc1(as, lref, RSET_GPR); | 2595 | Reg left = ra_alloc1(as, lref, RSET_GPR); |
| 2581 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); | 2596 | Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); |
| 2582 | asm_guardcc(as, cc); | 2597 | asm_guardcc(as, cc); |
| 2583 | #if LJ_64 | 2598 | emit_mrm(as, XO_CMP, REX_64LU(ir, left), right); |
| 2584 | if (irt_islightud(ir->t)) | ||
| 2585 | left |= REX_64; | ||
| 2586 | #endif | ||
| 2587 | emit_mrm(as, XO_CMP, left, right); | ||
| 2588 | } | 2599 | } |
| 2589 | } | 2600 | } |
| 2590 | } | 2601 | } |
| @@ -2732,14 +2743,14 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) | |||
| 2732 | /* We don't know spadj yet, so get the C frame from L->cframe. */ | 2743 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
| 2733 | emit_movmroi(as, tmp, CFRAME_OFS_PC, | 2744 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
| 2734 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); | 2745 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); |
| 2735 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); | 2746 | emit_gri(as, XG_ARITHi(XOg_AND), tmp|REX_64, CFRAME_RAWMASK); |
| 2736 | lstate = IR(ASMREF_L)->r; | 2747 | lstate = IR(ASMREF_L)->r; |
| 2737 | emit_rmro(as, XO_MOV, tmp, lstate, offsetof(lua_State, cframe)); | 2748 | emit_rmro(as, XO_MOV, tmp|REX_64, lstate, offsetof(lua_State, cframe)); |
| 2738 | /* It's ok if lstate is already in a non-scratch reg. But all allocations | 2749 | /* It's ok if lstate is already in a non-scratch reg. But all allocations |
| 2739 | ** in the non-fast path must use a scratch reg. See comment above. | 2750 | ** in the non-fast path must use a scratch reg. See comment above. |
| 2740 | */ | 2751 | */ |
| 2741 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); | 2752 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); |
| 2742 | emit_movtomro(as, base, lstate, offsetof(lua_State, base)); | 2753 | emit_movtomro(as, base|REX_64, lstate, offsetof(lua_State, base)); |
| 2743 | asm_gc_sync(as, snap, base); | 2754 | asm_gc_sync(as, snap, base); |
| 2744 | /* BASE/L get restored anyway, better do it inside the slow path. */ | 2755 | /* BASE/L get restored anyway, better do it inside the slow path. */ |
| 2745 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); | 2756 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); |
| @@ -3447,7 +3458,12 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
| 3447 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | 3458 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { |
| 3448 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | 3459 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; |
| 3449 | #if LJ_64 | 3460 | #if LJ_64 |
| 3450 | /* NYI: add stack slots for calls with more than 4/6 args. */ | 3461 | /* NYI: add stack slots for x64 calls with many args. */ |
| 3462 | #ifdef _WIN64 | ||
| 3463 | lua_assert(CCI_NARGS(ci) <= 4); | ||
| 3464 | #else | ||
| 3465 | lua_assert(CCI_NARGS(ci) <= 6); /* Safe lower bound. */ | ||
| 3466 | #endif | ||
| 3451 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | 3467 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); |
| 3452 | #else | 3468 | #else |
| 3453 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | 3469 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 8e9a8788..83eba0ec 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -78,14 +78,27 @@ enum { | |||
| 78 | /* Windows x64 ABI. */ | 78 | /* Windows x64 ABI. */ |
| 79 | #define RSET_SCRATCH \ | 79 | #define RSET_SCRATCH \ |
| 80 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | 80 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) |
| 81 | #define REGARG_GPRS \ | ||
| 82 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) | ||
| 83 | #define REGARG_FIRSTFPR RID_XMM0 | ||
| 84 | #define REGARG_LASTFPR RID_XMM3 | ||
| 85 | #define STACKARG_OFS (4*8) | ||
| 81 | #else | 86 | #else |
| 82 | /* The rest of the civilized x64 world has a common ABI. */ | 87 | /* The rest of the civilized x64 world has a common ABI. */ |
| 83 | #define RSET_SCRATCH \ | 88 | #define RSET_SCRATCH \ |
| 84 | (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) | 89 | (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) |
| 90 | #define REGARG_GPRS \ | ||
| 91 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ | ||
| 92 | <<5))<<5))<<5))<<5))<<5)) | ||
| 93 | #define REGARG_FIRSTFPR RID_XMM0 | ||
| 94 | #define REGARG_LASTFPR RID_XMM7 | ||
| 95 | #define STACKARG_OFS 0 | ||
| 85 | #endif | 96 | #endif |
| 86 | #else | 97 | #else |
| 87 | /* Common x86 ABI. */ | 98 | /* Common x86 ABI. */ |
| 88 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) | 99 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) |
| 100 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ | ||
| 101 | #define STACKARG_OFS 0 | ||
| 89 | #endif | 102 | #endif |
| 90 | 103 | ||
| 91 | #if LJ_64 | 104 | #if LJ_64 |
| @@ -96,23 +109,26 @@ enum { | |||
| 96 | 109 | ||
| 97 | /* -- Spill slots --------------------------------------------------------- */ | 110 | /* -- Spill slots --------------------------------------------------------- */ |
| 98 | 111 | ||
| 99 | /* Available fixed spill slots in interpreter frame. | 112 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. |
| 113 | ** | ||
| 114 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | ||
| 100 | ** This definition must match with the *.dasc file(s). | 115 | ** This definition must match with the *.dasc file(s). |
| 116 | ** | ||
| 117 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | ||
| 101 | */ | 118 | */ |
| 102 | #if LJ_64 | 119 | #if LJ_64 |
| 103 | #ifdef _WIN64 | 120 | #ifdef _WIN64 |
| 104 | #define SPS_FIXED (5*2) | 121 | #define SPS_FIXED (5*2) |
| 122 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ | ||
| 105 | #else | 123 | #else |
| 106 | #define SPS_FIXED 2 | 124 | #define SPS_FIXED 2 |
| 125 | #define SPS_FIRST 2 | ||
| 107 | #endif | 126 | #endif |
| 108 | #else | 127 | #else |
| 109 | #define SPS_FIXED 6 | 128 | #define SPS_FIXED 6 |
| 110 | #endif | ||
| 111 | |||
| 112 | /* First spill slot for general use. Reserve one 64 bit slot. */ | ||
| 113 | #define SPS_FIRST 2 | 129 | #define SPS_FIRST 2 |
| 130 | #endif | ||
| 114 | 131 | ||
| 115 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ | ||
| 116 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 132 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
| 117 | 133 | ||
| 118 | /* -- Exit state ---------------------------------------------------------- */ | 134 | /* -- Exit state ---------------------------------------------------------- */ |
