diff options
author | Mike Pall <mike> | 2009-12-16 01:29:07 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-16 01:29:07 +0100 |
commit | 4cb357d30ff96b59a4bf2421b97d4fbcd2231db9 (patch) | |
tree | 7c3cc9b2d59d1517f7e9cc701d0ae2eae822d341 /src/buildvm_x86.dasc | |
parent | 8df960388870e9d5e53cf4e1504bf2a8325e17a1 (diff) | |
download | luajit-4cb357d30ff96b59a4bf2421b97d4fbcd2231db9.tar.gz luajit-4cb357d30ff96b59a4bf2421b97d4fbcd2231db9.tar.bz2 luajit-4cb357d30ff96b59a4bf2421b97d4fbcd2231db9.zip |
Define x64 interpreter frame and cleanup use of stack temps.
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r-- | src/buildvm_x86.dasc | 245 |
1 files changed, 157 insertions, 88 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 615a83d3..7638cf9b 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc | |||
@@ -19,8 +19,13 @@ | |||
19 | |// Fixed register assignments for the interpreter. | 19 | |// Fixed register assignments for the interpreter. |
20 | |// This is very fragile and has many dependencies. Caveat emptor. | 20 | |// This is very fragile and has many dependencies. Caveat emptor. |
21 | |.define BASE, edx // Not C callee-save, refetched anyway. | 21 | |.define BASE, edx // Not C callee-save, refetched anyway. |
22 | |.if not X64 or X64WIN | ||
22 | |.define KBASE, edi // Must be C callee-save. | 23 | |.define KBASE, edi // Must be C callee-save. |
23 | |.define PC, esi // Must be C callee-save. | 24 | |.define PC, esi // Must be C callee-save. |
25 | |.else | ||
26 | |.define KBASE, r13d // Must be C callee-save. | ||
27 | |.define PC, r12d // Must be C callee-save. | ||
28 | |.endif | ||
24 | |.define DISPATCH, ebx // Must be C callee-save. | 29 | |.define DISPATCH, ebx // Must be C callee-save. |
25 | | | 30 | | |
26 | |.define RA, ecx | 31 | |.define RA, ecx |
@@ -82,32 +87,23 @@ | |||
82 | |.macro pop_eax; .if X64; pop rax; .else; pop eax; .endif; .endmacro | 87 | |.macro pop_eax; .if X64; pop rax; .else; pop eax; .endif; .endmacro |
83 | | | 88 | | |
84 | |// Stack layout while in interpreter. Must match with lj_frame.h. | 89 | |// Stack layout while in interpreter. Must match with lj_frame.h. |
90 | |//----------------------------------------------------------------------- | ||
91 | |.if not X64 // x86 stack layout. | ||
92 | | | ||
85 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | 93 | |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). |
86 | |.macro saveregs | 94 | |.macro saveregs |
87 | | .if X64 | 95 | | push ebp; push edi; push esi; push ebx |
88 | | .if X64WIN; push rdi; push rsi; .endif | 96 | | sub esp, CFRAME_SPACE |
89 | | push rbp; push rbx; push r12; push r13; push r14; push r15 | ||
90 | | sub rsp, CFRAME_SPACE | ||
91 | | .else | ||
92 | | push ebp; push edi; push esi; push ebx | ||
93 | | sub esp, CFRAME_SPACE | ||
94 | | .endif | ||
95 | |.endmacro | 97 | |.endmacro |
96 | |.macro restoreregs | 98 | |.macro restoreregs |
97 | | .if X64 | 99 | | add esp, CFRAME_SPACE |
98 | | add rsp, CFRAME_SPACE | 100 | | pop ebx; pop esi; pop edi; pop ebp |
99 | | pop r15; pop r14; pop r13; pop r12; pop rbx; pop rbp | ||
100 | | .if X64WIN; pop rsi; pop rdi; .endif | ||
101 | | .else | ||
102 | | add esp, CFRAME_SPACE | ||
103 | | pop ebx; pop esi; pop edi; pop ebp | ||
104 | | .endif | ||
105 | |.endmacro | 101 | |.endmacro |
106 | | | 102 | | |
107 | |.define INARG_4, aword [esp+aword*15] | 103 | |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |
108 | |.define INARG_3, aword [esp+aword*14] | 104 | |.define SAVE_NRES, aword [esp+aword*14] |
109 | |.define INARG_2, aword [esp+aword*13] | 105 | |.define SAVE_CFRAME, aword [esp+aword*13] |
110 | |.define INARG_1, aword [esp+aword*12] | 106 | |.define SAVE_L, aword [esp+aword*12] |
111 | |//----- 16 byte aligned, ^^^ arguments from C caller | 107 | |//----- 16 byte aligned, ^^^ arguments from C caller |
112 | |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. | 108 | |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. |
113 | |.define SAVE_R4, aword [esp+aword*10] | 109 | |.define SAVE_R4, aword [esp+aword*10] |
@@ -116,8 +112,8 @@ | |||
116 | |//----- 16 byte aligned | 112 | |//----- 16 byte aligned |
117 | |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. | 113 | |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. |
118 | |.define SAVE_PC, aword [esp+aword*6] | 114 | |.define SAVE_PC, aword [esp+aword*6] |
119 | |.define ARG6, aword [esp+aword*5] | 115 | |.define TMP2, aword [esp+aword*5] |
120 | |.define ARG5, aword [esp+aword*4] | 116 | |.define TMP1, aword [esp+aword*4] |
121 | |//----- 16 byte aligned | 117 | |//----- 16 byte aligned |
122 | |.define ARG4, aword [esp+aword*3] | 118 | |.define ARG4, aword [esp+aword*3] |
123 | |.define ARG3, aword [esp+aword*2] | 119 | |.define ARG3, aword [esp+aword*2] |
@@ -126,24 +122,93 @@ | |||
126 | |//----- 16 byte aligned, ^^^ arguments for C callee | 122 | |//----- 16 byte aligned, ^^^ arguments for C callee |
127 | | | 123 | | |
128 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. | 124 | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |
129 | |.define FPARG5, qword [esp+qword*2] | ||
130 | |.define FPARG3, qword [esp+qword*1] | 125 | |.define FPARG3, qword [esp+qword*1] |
131 | |.define FPARG1, qword [esp] | 126 | |.define FPARG1, qword [esp] |
132 | |// NRESULTS overlaps ARG6 (and FPARG5) | 127 | |// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ). |
133 | |.define NRESULTS, ARG6 | 128 | |.define TMPQ, qword [esp+aword*4] |
129 | |.define ARG5, TMP1 | ||
130 | |.define NRESULTS, TMP2 | ||
134 | | | 131 | | |
135 | |// Arguments for vm_call and vm_pcall. | 132 | |// Arguments for vm_call and vm_pcall. |
136 | |.define INARG_P_ERRF, INARG_4 // vm_pcall only. | 133 | |.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! |
137 | |.define INARG_NRES, INARG_3 | ||
138 | |.define INARG_BASE, INARG_2 | ||
139 | |.define SAVE_L, INARG_1 | ||
140 | | | ||
141 | |.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE! | ||
142 | | | 134 | | |
143 | |// Arguments for vm_cpcall. | 135 | |// Arguments for vm_cpcall. |
144 | |.define INARG_CP_UD, INARG_4 | 136 | |.define INARG_CP_UD, SAVE_ERRF |
145 | |.define INARG_CP_FUNC, INARG_3 | 137 | |.define INARG_CP_FUNC, SAVE_NRES |
146 | |.define INARG_CP_CALL, INARG_2 | 138 | |.define INARG_CP_CALL, SAVE_CFRAME |
139 | | | ||
140 | |//----------------------------------------------------------------------- | ||
141 | |.elif X64WIN // x64/Windows stack layout | ||
142 | | | ||
143 | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | ||
144 | |.macro saveregs | ||
145 | | push rbp; push rdi; push rsi; push rbx | ||
146 | | sub rsp, CFRAME_SPACE | ||
147 | |.endmacro | ||
148 | |.macro restoreregs | ||
149 | | add rsp, CFRAME_SPACE | ||
150 | | pop rbx; pop rsi; pop rdi; pop rbp | ||
151 | |.endmacro | ||
152 | | | ||
153 | |.define UNUSED1, aword [esp+dword*26] | ||
154 | |.define SAVE_PC, dword [esp+dword*25] | ||
155 | |.define SAVE_L, dword [esp+dword*24] | ||
156 | |.define SAVE_ERRF, dword [esp+dword*23] | ||
157 | |.define SAVE_NRES, dword [esp+dword*22] | ||
158 | |.define TMP2, dword [esp+dword*21] | ||
159 | |.define TMP1, dword [esp+dword*20] | ||
160 | |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter | ||
161 | |.define SAVE_RET, aword [esp+aword*9] //<-- rsp entering interpreter. | ||
162 | |.define SAVE_R4, aword [esp+aword*8] | ||
163 | |.define SAVE_R3, aword [esp+aword*7] | ||
164 | |.define SAVE_R2, aword [esp+aword*6] | ||
165 | |.define SAVE_R1, aword [esp+aword*5] //<-- rsp after register saves. | ||
166 | |.define SAVE_CFRAME, aword [esp+aword*4] | ||
167 | |.define CSAVE_4, aword [esp+aword*3] | ||
168 | |.define CSAVE_3, aword [esp+aword*2] | ||
169 | |.define CSAVE_2, aword [esp+aword*1] | ||
170 | |.define CSAVE_1, aword [esp] //<-- rsp while in interpreter. | ||
171 | |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee | ||
172 | | | ||
173 | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). | ||
174 | |.define TMPQ, qword [esp] | ||
175 | |.define NRESULTS, TMP2 | ||
176 | | | ||
177 | |//----------------------------------------------------------------------- | ||
178 | |.else // x64/POSIX stack layout | ||
179 | | | ||
180 | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | ||
181 | |.macro saveregs | ||
182 | | push rbp; push r12; push r13; push rbx | ||
183 | | sub rsp, CFRAME_SPACE | ||
184 | |.endmacro | ||
185 | |.macro restoreregs | ||
186 | | add rsp, CFRAME_SPACE | ||
187 | | pop rbx; pop r13; pop r12; pop rbp | ||
188 | |.endmacro | ||
189 | | | ||
190 | |//----- 16 byte aligned, | ||
191 | |.define SAVE_RET, aword [esp+aword*9] //<-- rsp entering interpreter. | ||
192 | |.define SAVE_R4, aword [esp+aword*8] | ||
193 | |.define SAVE_R3, aword [esp+aword*7] | ||
194 | |.define SAVE_R2, aword [esp+aword*6] | ||
195 | |.define SAVE_R1, aword [esp+aword*5] //<-- rsp after register saves. | ||
196 | |.define SAVE_CFRAME, aword [esp+aword*4] | ||
197 | |.define UNUSED1, aword [esp+aword*3] | ||
198 | |//----- ^^^ awords above, vvv dwords below | ||
199 | |.define SAVE_PC, dword [esp+dword*5] | ||
200 | |.define SAVE_L, dword [esp+dword*4] | ||
201 | |.define SAVE_ERRF, dword [esp+dword*3] | ||
202 | |.define SAVE_NRES, dword [esp+dword*2] | ||
203 | |.define TMP2, dword [esp+dword*1] | ||
204 | |.define TMP1, dword [esp] //<-- rsp while in interpreter. | ||
205 | |//----- 16 byte aligned | ||
206 | | | ||
207 | |// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ). | ||
208 | |.define TMPQ, qword [esp] | ||
209 | |.define NRESULTS, TMP2 | ||
210 | | | ||
211 | |.endif | ||
147 | | | 212 | | |
148 | |//----------------------------------------------------------------------- | 213 | |//----------------------------------------------------------------------- |
149 | | | 214 | | |
@@ -163,7 +228,11 @@ | |||
163 | | movzx OP, RCL | 228 | | movzx OP, RCL |
164 | | add PC, 4 | 229 | | add PC, 4 |
165 | | shr RC, 16 | 230 | | shr RC, 16 |
231 | |.if not X64 | ||
166 | | jmp aword [DISPATCH+OP*4] | 232 | | jmp aword [DISPATCH+OP*4] |
233 | |.else | ||
234 | | jmp aword [DISPATCH+OP*8] | ||
235 | |.endif | ||
167 | |.endmacro | 236 | |.endmacro |
168 | | | 237 | | |
169 | |// Instruction footer. | 238 | |// Instruction footer. |
@@ -420,7 +489,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
420 | | mov L:RB->base, PC | 489 | | mov L:RB->base, PC |
421 | |3: | 490 | |3: |
422 | | mov RD, NRESULTS | 491 | | mov RD, NRESULTS |
423 | | mov RA, INARG_NRES // RA = wanted nresults+1 | 492 | | mov RA, SAVE_NRES // RA = wanted nresults+1 |
424 | |4: | 493 | |4: |
425 | | cmp RA, RD | 494 | | cmp RA, RD |
426 | | jne >6 // More/less results wanted? | 495 | | jne >6 // More/less results wanted? |
@@ -633,8 +702,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
633 | | // Caveat: INARG_P_* and INARG_CP_* overlap! | 702 | | // Caveat: INARG_P_* and INARG_CP_* overlap! |
634 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | 703 | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). |
635 | | sub KBASE, L:RB->top | 704 | | sub KBASE, L:RB->top |
636 | | mov INARG_P_ERRF, 0 // No error function. | 705 | | mov SAVE_ERRF, 0 // No error function. |
637 | | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame. | 706 | | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. |
638 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | 707 | | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). |
639 | | | 708 | | |
640 | | mov ARG3, RC | 709 | | mov ARG3, RC |
@@ -693,9 +762,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
693 | |//-- Table indexing metamethods ----------------------------------------- | 762 | |//-- Table indexing metamethods ----------------------------------------- |
694 | | | 763 | | |
695 | |->vmeta_tgets: | 764 | |->vmeta_tgets: |
696 | | mov ARG5, RC // RC = GCstr * | 765 | | mov TMP1, RC // RC = GCstr * |
697 | | mov ARG6, LJ_TSTR | 766 | | mov TMP2, LJ_TSTR |
698 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | 767 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
699 | | cmp PC_OP, BC_GGET | 768 | | cmp PC_OP, BC_GGET |
700 | | jne >1 | 769 | | jne >1 |
701 | | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | 770 | | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. |
@@ -708,8 +777,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
708 | | movzx RC, PC_RC // Ugly, cannot fild from a byte. | 777 | | movzx RC, PC_RC // Ugly, cannot fild from a byte. |
709 | | mov ARG4, RC | 778 | | mov ARG4, RC |
710 | | fild ARG4 | 779 | | fild ARG4 |
711 | | fstp FPARG5 | 780 | | fstp TMPQ |
712 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | 781 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
713 | | jmp >1 | 782 | | jmp >1 |
714 | | | 783 | | |
715 | |->vmeta_tgetv: | 784 | |->vmeta_tgetv: |
@@ -751,9 +820,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
751 | |//----------------------------------------------------------------------- | 820 | |//----------------------------------------------------------------------- |
752 | | | 821 | | |
753 | |->vmeta_tsets: | 822 | |->vmeta_tsets: |
754 | | mov ARG5, RC // RC = GCstr * | 823 | | mov TMP1, RC // RC = GCstr * |
755 | | mov ARG6, LJ_TSTR | 824 | | mov TMP2, LJ_TSTR |
756 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | 825 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
757 | | cmp PC_OP, BC_GSET | 826 | | cmp PC_OP, BC_GSET |
758 | | jne >1 | 827 | | jne >1 |
759 | | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | 828 | | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. |
@@ -766,8 +835,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
766 | | movzx RC, PC_RC // Ugly, cannot fild from a byte. | 835 | | movzx RC, PC_RC // Ugly, cannot fild from a byte. |
767 | | mov ARG4, RC | 836 | | mov ARG4, RC |
768 | | fild ARG4 | 837 | | fild ARG4 |
769 | | fstp FPARG5 | 838 | | fstp TMPQ |
770 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | 839 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
771 | | jmp >1 | 840 | | jmp >1 |
772 | | | 841 | | |
773 | |->vmeta_tsetv: | 842 | |->vmeta_tsetv: |
@@ -930,8 +999,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
930 | | | 999 | | |
931 | |->vmeta_call: // Resolve and call __call metamethod. | 1000 | |->vmeta_call: // Resolve and call __call metamethod. |
932 | | // RA = new base, RC = nargs+1, BASE = old base, PC = return | 1001 | | // RA = new base, RC = nargs+1, BASE = old base, PC = return |
933 | | mov ARG4, RA // Save RA, RC for us. | 1002 | | mov TMP2, RA // Save RA, RC for us. |
934 | | mov ARG5, NARGS:RC | 1003 | | mov TMP1, NARGS:RC |
935 | | sub RA, 8 | 1004 | | sub RA, 8 |
936 | | lea RC, [RA+NARGS:RC*8] | 1005 | | lea RC, [RA+NARGS:RC*8] |
937 | | mov L:RB, SAVE_L | 1006 | | mov L:RB, SAVE_L |
@@ -942,8 +1011,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
942 | | mov L:RB->base, BASE // This is the callers base! | 1011 | | mov L:RB->base, BASE // This is the callers base! |
943 | | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | 1012 | | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) |
944 | | mov BASE, L:RB->base | 1013 | | mov BASE, L:RB->base |
945 | | mov RA, ARG4 | 1014 | | mov RA, TMP2 |
946 | | mov NARGS:RC, ARG5 | 1015 | | mov NARGS:RC, TMP1 |
947 | | mov LFUNC:RB, [RA-8] | 1016 | | mov LFUNC:RB, [RA-8] |
948 | | add NARGS:RC, 1 | 1017 | | add NARGS:RC, 1 |
949 | | // This is fragile. L->base must not move, KBASE must always be defined. | 1018 | | // This is fragile. L->base must not move, KBASE must always be defined. |
@@ -1137,13 +1206,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1137 | | mov ARG2, TAB:RC | 1206 | | mov ARG2, TAB:RC |
1138 | | mov ARG1, L:RB | 1207 | | mov ARG1, L:RB |
1139 | | mov RB, RA | 1208 | | mov RB, RA |
1140 | | mov ARG4, BASE // Save BASE and RA. | 1209 | | mov TMP1, BASE // Save BASE and RA. |
1141 | | add RA, 8 | 1210 | | add RA, 8 |
1142 | | mov ARG3, RA | 1211 | | mov ARG3, RA |
1143 | | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | 1212 | | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |
1144 | | // cTValue * returned in eax (RC). | 1213 | | // cTValue * returned in eax (RC). |
1145 | | mov RA, RB | 1214 | | mov RA, RB |
1146 | | mov BASE, ARG4 | 1215 | | mov BASE, TMP1 |
1147 | | mov RB, [RC] // Copy table slot. | 1216 | | mov RB, [RC] // Copy table slot. |
1148 | | mov RC, [RC+4] | 1217 | | mov RC, [RC+4] |
1149 | | mov [RA-8], RB | 1218 | | mov [RA-8], RB |
@@ -1199,13 +1268,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1199 | | mov L:RB->base, RA // Add frame since C call can throw. | 1268 | | mov L:RB->base, RA // Add frame since C call can throw. |
1200 | | mov [RA-4], PC | 1269 | | mov [RA-4], PC |
1201 | | mov SAVE_PC, PC // Redundant (but a defined value). | 1270 | | mov SAVE_PC, PC // Redundant (but a defined value). |
1202 | | mov ARG4, BASE // Save BASE. | 1271 | | mov TMP1, BASE // Save BASE. |
1203 | | add RA, 8 | 1272 | | add RA, 8 |
1204 | | mov ARG3, RA | 1273 | | mov ARG3, RA |
1205 | | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | 1274 | | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) |
1206 | | // Flag returned in eax (RC). | 1275 | | // Flag returned in eax (RC). |
1207 | | mov RA, L:RB->base | 1276 | | mov RA, L:RB->base |
1208 | | mov BASE, ARG4 | 1277 | | mov BASE, TMP1 |
1209 | | test RC, RC; jz >3 // End of traversal? | 1278 | | test RC, RC; jz >3 // End of traversal? |
1210 | | mov RB, [RA+8] // Copy key and value to results. | 1279 | | mov RB, [RA+8] // Copy key and value to results. |
1211 | | mov RC, [RA+12] | 1280 | | mov RC, [RA+12] |
@@ -1526,11 +1595,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1526 | | | 1595 | | |
1527 | |.macro math_extern, func | 1596 | |.macro math_extern, func |
1528 | |.ffunc_n math_ .. func | 1597 | |.ffunc_n math_ .. func |
1529 | | mov ARG5, RA | 1598 | | mov TMP1, RA |
1530 | | fstp FPARG1 | 1599 | | fstp FPARG1 |
1531 | | mov RB, BASE | 1600 | | mov RB, BASE |
1532 | | call extern lj_wrapper_ .. func | 1601 | | call extern lj_wrapper_ .. func |
1533 | | mov RA, ARG5 | 1602 | | mov RA, TMP1 |
1534 | | mov BASE, RB | 1603 | | mov BASE, RB |
1535 | | jmp ->fff_resn | 1604 | | jmp ->fff_resn |
1536 | |.endmacro | 1605 | |.endmacro |
@@ -1645,10 +1714,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1645 | | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | 1714 | | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg. |
1646 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | 1715 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback |
1647 | | fld qword [RA] | 1716 | | fld qword [RA] |
1648 | | fistp ARG4 | 1717 | | fistp TMP2 |
1649 | | cmp ARG4, 255; ja ->fff_fallback | 1718 | | cmp TMP2, 255; ja ->fff_fallback |
1650 | | lea RC, ARG4 // Little-endian. | 1719 | | lea RC, TMP2 // Little-endian. |
1651 | | mov ARG5, RA // Save RA. | 1720 | | mov TMP1, RA // Save RA. |
1652 | | mov ARG3, 1 | 1721 | | mov ARG3, 1 |
1653 | | mov ARG2, RC | 1722 | | mov ARG2, RC |
1654 | |->fff_newstr: | 1723 | |->fff_newstr: |
@@ -1658,7 +1727,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1658 | | mov L:RB->base, BASE | 1727 | | mov L:RB->base, BASE |
1659 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) | 1728 | | call extern lj_str_new // (lua_State *L, char *str, size_t l) |
1660 | | // GCstr * returned in eax (RC). | 1729 | | // GCstr * returned in eax (RC). |
1661 | | mov RA, ARG5 | 1730 | | mov RA, TMP1 |
1662 | | mov BASE, L:RB->base | 1731 | | mov BASE, L:RB->base |
1663 | | mov dword [RA-4], LJ_TSTR | 1732 | | mov dword [RA-4], LJ_TSTR |
1664 | | mov [RA-8], STR:RC | 1733 | | mov [RA-8], STR:RC |
@@ -1666,13 +1735,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1666 | | | 1735 | | |
1667 | |.ffunc string_sub | 1736 | |.ffunc string_sub |
1668 | | ffgccheck | 1737 | | ffgccheck |
1669 | | mov ARG5, RA // Save RA. | 1738 | | mov TMP1, RA // Save RA. |
1670 | | mov ARG4, -1 | 1739 | | mov TMP2, -1 |
1671 | | cmp NARGS:RC, 1+2; jb ->fff_fallback | 1740 | | cmp NARGS:RC, 1+2; jb ->fff_fallback |
1672 | | jna >1 | 1741 | | jna >1 |
1673 | | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback | 1742 | | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback |
1674 | | fld qword [RA+16] | 1743 | | fld qword [RA+16] |
1675 | | fistp ARG4 | 1744 | | fistp TMP2 |
1676 | |1: | 1745 | |1: |
1677 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | 1746 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback |
1678 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | 1747 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback |
@@ -1681,7 +1750,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1681 | | mov RB, STR:RB->len | 1750 | | mov RB, STR:RB->len |
1682 | | fld qword [RA+8] | 1751 | | fld qword [RA+8] |
1683 | | fistp ARG3 | 1752 | | fistp ARG3 |
1684 | | mov RC, ARG4 | 1753 | | mov RC, TMP2 |
1685 | | cmp RB, RC // len < end? (unsigned compare) | 1754 | | cmp RB, RC // len < end? (unsigned compare) |
1686 | | jb >5 | 1755 | | jb >5 |
1687 | |2: | 1756 | |2: |
@@ -1722,13 +1791,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1722 | | | 1791 | | |
1723 | |.ffunc_2 string_rep // Only handle the 1-char case inline. | 1792 | |.ffunc_2 string_rep // Only handle the 1-char case inline. |
1724 | | ffgccheck | 1793 | | ffgccheck |
1725 | | mov ARG5, RA // Save RA. | 1794 | | mov TMP1, RA // Save RA. |
1726 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | 1795 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback |
1727 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback | 1796 | | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback |
1728 | | mov STR:RB, [RA] | 1797 | | mov STR:RB, [RA] |
1729 | | fld qword [RA+8] | 1798 | | fld qword [RA+8] |
1730 | | fistp ARG4 | 1799 | | fistp TMP2 |
1731 | | mov RC, ARG4 | 1800 | | mov RC, TMP2 |
1732 | | test RC, RC | 1801 | | test RC, RC |
1733 | | jle ->fff_emptystr // Count <= 0? (or non-int) | 1802 | | jle ->fff_emptystr // Count <= 0? (or non-int) |
1734 | | cmp dword STR:RB->len, 1 | 1803 | | cmp dword STR:RB->len, 1 |
@@ -1748,7 +1817,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1748 | | | 1817 | | |
1749 | |.ffunc_1 string_reverse | 1818 | |.ffunc_1 string_reverse |
1750 | | ffgccheck | 1819 | | ffgccheck |
1751 | | mov ARG5, RA // Save RA. | 1820 | | mov TMP1, RA // Save RA. |
1752 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | 1821 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback |
1753 | | mov STR:RB, [RA] | 1822 | | mov STR:RB, [RA] |
1754 | | mov RC, STR:RB->len | 1823 | | mov RC, STR:RB->len |
@@ -1756,7 +1825,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1756 | | jz ->fff_emptystr // Zero length string? | 1825 | | jz ->fff_emptystr // Zero length string? |
1757 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | 1826 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 |
1758 | | add RB, #STR | 1827 | | add RB, #STR |
1759 | | mov ARG4, PC // Need another temp register. | 1828 | | mov TMP2, PC // Need another temp register. |
1760 | | mov ARG3, RC | 1829 | | mov ARG3, RC |
1761 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | 1830 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] |
1762 | | mov ARG2, PC | 1831 | | mov ARG2, PC |
@@ -1766,19 +1835,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1766 | | sub RC, 1 | 1835 | | sub RC, 1 |
1767 | | mov [PC+RC], RAL | 1836 | | mov [PC+RC], RAL |
1768 | | jnz <1 | 1837 | | jnz <1 |
1769 | | mov PC, ARG4 | 1838 | | mov PC, TMP2 |
1770 | | jmp ->fff_newstr | 1839 | | jmp ->fff_newstr |
1771 | | | 1840 | | |
1772 | |.macro ffstring_case, name, lo, hi | 1841 | |.macro ffstring_case, name, lo, hi |
1773 | | .ffunc_1 name | 1842 | | .ffunc_1 name |
1774 | | ffgccheck | 1843 | | ffgccheck |
1775 | | mov ARG5, RA // Save RA. | 1844 | | mov TMP1, RA // Save RA. |
1776 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback | 1845 | | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback |
1777 | | mov STR:RB, [RA] | 1846 | | mov STR:RB, [RA] |
1778 | | mov RC, STR:RB->len | 1847 | | mov RC, STR:RB->len |
1779 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 | 1848 | | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 |
1780 | | add RB, #STR | 1849 | | add RB, #STR |
1781 | | mov ARG4, PC // Need another temp register. | 1850 | | mov TMP2, PC // Need another temp register. |
1782 | | mov ARG3, RC | 1851 | | mov ARG3, RC |
1783 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] | 1852 | | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] |
1784 | | mov ARG2, PC | 1853 | | mov ARG2, PC |
@@ -1795,7 +1864,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1795 | |3: | 1864 | |3: |
1796 | | sub RC, 1 | 1865 | | sub RC, 1 |
1797 | | jns <1 | 1866 | | jns <1 |
1798 | | mov PC, ARG4 | 1867 | | mov PC, TMP2 |
1799 | | jmp ->fff_newstr | 1868 | | jmp ->fff_newstr |
1800 | |.endmacro | 1869 | |.endmacro |
1801 | | | 1870 | | |
@@ -1822,16 +1891,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1822 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). | 1891 | |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). |
1823 | | | 1892 | | |
1824 | |.ffunc_n bit_tobit | 1893 | |.ffunc_n bit_tobit |
1825 | | mov ARG5, TOBIT_BIAS | 1894 | | mov TMP1, TOBIT_BIAS |
1826 | | fadd ARG5 | 1895 | | fadd TMP1 |
1827 | | fstp FPARG1 // 64 bit FP store. | 1896 | | fstp FPARG1 // 64 bit FP store. |
1828 | | fild ARG1 // 32 bit integer load (s2lfwd ok). | 1897 | | fild ARG1 // 32 bit integer load (s2lfwd ok). |
1829 | | jmp ->fff_resn | 1898 | | jmp ->fff_resn |
1830 | | | 1899 | | |
1831 | |.macro .ffunc_bit, name | 1900 | |.macro .ffunc_bit, name |
1832 | | .ffunc_n name | 1901 | | .ffunc_n name |
1833 | | mov ARG5, TOBIT_BIAS | 1902 | | mov TMP1, TOBIT_BIAS |
1834 | | fadd ARG5 | 1903 | | fadd TMP1 |
1835 | | fstp FPARG1 | 1904 | | fstp FPARG1 |
1836 | | mov RB, ARG1 | 1905 | | mov RB, ARG1 |
1837 | |.endmacro | 1906 | |.endmacro |
@@ -1845,7 +1914,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1845 | | jbe ->fff_resbit | 1914 | | jbe ->fff_resbit |
1846 | | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op | 1915 | | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op |
1847 | | fld qword [RC] | 1916 | | fld qword [RC] |
1848 | | fadd ARG5 | 1917 | | fadd TMP1 |
1849 | | fstp FPARG1 | 1918 | | fstp FPARG1 |
1850 | | ins RB, ARG1 | 1919 | | ins RB, ARG1 |
1851 | | sub RC, 8 | 1920 | | sub RC, 8 |
@@ -1873,10 +1942,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1873 | | | 1942 | | |
1874 | |.macro .ffunc_bit_sh, name, ins | 1943 | |.macro .ffunc_bit_sh, name, ins |
1875 | | .ffunc_nn name | 1944 | | .ffunc_nn name |
1876 | | mov ARG5, TOBIT_BIAS | 1945 | | mov TMP1, TOBIT_BIAS |
1877 | | fadd ARG5 | 1946 | | fadd TMP1 |
1878 | | fstp FPARG3 | 1947 | | fstp FPARG3 |
1879 | | fadd ARG5 | 1948 | | fadd TMP1 |
1880 | | fstp FPARG1 | 1949 | | fstp FPARG1 |
1881 | | mov RC, RA // Assumes RA is ecx. | 1950 | | mov RC, RA // Assumes RA is ecx. |
1882 | | mov RA, ARG3 | 1951 | | mov RA, ARG3 |
@@ -3121,9 +3190,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
3121 | | test byte TAB:RA->nomm, 1<<MM_newindex | 3190 | | test byte TAB:RA->nomm, 1<<MM_newindex |
3122 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | 3191 | | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. |
3123 | |6: | 3192 | |6: |
3124 | | mov ARG5, STR:RC | 3193 | | mov TMP1, STR:RC |
3125 | | mov ARG6, LJ_TSTR | 3194 | | mov TMP2, LJ_TSTR |
3126 | | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. | 3195 | | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. |
3127 | | mov ARG4, TAB:RB // Save TAB:RB for us. | 3196 | | mov ARG4, TAB:RB // Save TAB:RB for us. |
3128 | | mov ARG2, TAB:RB | 3197 | | mov ARG2, TAB:RB |
3129 | | mov L:RB, SAVE_L | 3198 | | mov L:RB, SAVE_L |
@@ -3179,7 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
3179 | 3248 | ||
3180 | case BC_TSETM: | 3249 | case BC_TSETM: |
3181 | | ins_AD // RA = base (table at base-1), RD = num const (start index) | 3250 | | ins_AD // RA = base (table at base-1), RD = num const (start index) |
3182 | | mov ARG5, KBASE // Need one more free register. | 3251 | | mov TMP1, KBASE // Need one more free register. |
3183 | | fld qword [KBASE+RD*8] | 3252 | | fld qword [KBASE+RD*8] |
3184 | | fistp ARG4 // Const is guaranteed to be an int. | 3253 | | fistp ARG4 // Const is guaranteed to be an int. |
3185 | |1: | 3254 | |1: |
@@ -3208,7 +3277,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
3208 | | sub RD, 1 | 3277 | | sub RD, 1 |
3209 | | jnz <3 | 3278 | | jnz <3 |
3210 | |4: | 3279 | |4: |
3211 | | mov KBASE, ARG5 | 3280 | | mov KBASE, TMP1 |
3212 | | ins_next | 3281 | | ins_next |
3213 | | | 3282 | | |
3214 | |5: // Need to resize array part. | 3283 | |5: // Need to resize array part. |