aboutsummaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-16 01:29:07 +0100
committerMike Pall <mike>2009-12-16 01:29:07 +0100
commit4cb357d30ff96b59a4bf2421b97d4fbcd2231db9 (patch)
tree7c3cc9b2d59d1517f7e9cc701d0ae2eae822d341 /src/buildvm_x86.dasc
parent8df960388870e9d5e53cf4e1504bf2a8325e17a1 (diff)
downloadluajit-4cb357d30ff96b59a4bf2421b97d4fbcd2231db9.tar.gz
luajit-4cb357d30ff96b59a4bf2421b97d4fbcd2231db9.tar.bz2
luajit-4cb357d30ff96b59a4bf2421b97d4fbcd2231db9.zip
Define x64 interpreter frame and cleanup use of stack temps.
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc245
1 files changed, 157 insertions, 88 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 615a83d3..7638cf9b 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -19,8 +19,13 @@
19|// Fixed register assignments for the interpreter. 19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor. 20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, edx // Not C callee-save, refetched anyway. 21|.define BASE, edx // Not C callee-save, refetched anyway.
22|.if not X64 or X64WIN
22|.define KBASE, edi // Must be C callee-save. 23|.define KBASE, edi // Must be C callee-save.
23|.define PC, esi // Must be C callee-save. 24|.define PC, esi // Must be C callee-save.
25|.else
26|.define KBASE, r13d // Must be C callee-save.
27|.define PC, r12d // Must be C callee-save.
28|.endif
24|.define DISPATCH, ebx // Must be C callee-save. 29|.define DISPATCH, ebx // Must be C callee-save.
25| 30|
26|.define RA, ecx 31|.define RA, ecx
@@ -82,32 +87,23 @@
82|.macro pop_eax; .if X64; pop rax; .else; pop eax; .endif; .endmacro 87|.macro pop_eax; .if X64; pop rax; .else; pop eax; .endif; .endmacro
83| 88|
84|// Stack layout while in interpreter. Must match with lj_frame.h. 89|// Stack layout while in interpreter. Must match with lj_frame.h.
90|//-----------------------------------------------------------------------
91|.if not X64 // x86 stack layout.
92|
85|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 93|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
86|.macro saveregs 94|.macro saveregs
87| .if X64 95| push ebp; push edi; push esi; push ebx
88| .if X64WIN; push rdi; push rsi; .endif 96| sub esp, CFRAME_SPACE
89| push rbp; push rbx; push r12; push r13; push r14; push r15
90| sub rsp, CFRAME_SPACE
91| .else
92| push ebp; push edi; push esi; push ebx
93| sub esp, CFRAME_SPACE
94| .endif
95|.endmacro 97|.endmacro
96|.macro restoreregs 98|.macro restoreregs
97| .if X64 99| add esp, CFRAME_SPACE
98| add rsp, CFRAME_SPACE 100| pop ebx; pop esi; pop edi; pop ebp
99| pop r15; pop r14; pop r13; pop r12; pop rbx; pop rbp
100| .if X64WIN; pop rsi; pop rdi; .endif
101| .else
102| add esp, CFRAME_SPACE
103| pop ebx; pop esi; pop edi; pop ebp
104| .endif
105|.endmacro 101|.endmacro
106| 102|
107|.define INARG_4, aword [esp+aword*15] 103|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
108|.define INARG_3, aword [esp+aword*14] 104|.define SAVE_NRES, aword [esp+aword*14]
109|.define INARG_2, aword [esp+aword*13] 105|.define SAVE_CFRAME, aword [esp+aword*13]
110|.define INARG_1, aword [esp+aword*12] 106|.define SAVE_L, aword [esp+aword*12]
111|//----- 16 byte aligned, ^^^ arguments from C caller 107|//----- 16 byte aligned, ^^^ arguments from C caller
112|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. 108|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
113|.define SAVE_R4, aword [esp+aword*10] 109|.define SAVE_R4, aword [esp+aword*10]
@@ -116,8 +112,8 @@
116|//----- 16 byte aligned 112|//----- 16 byte aligned
117|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. 113|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
118|.define SAVE_PC, aword [esp+aword*6] 114|.define SAVE_PC, aword [esp+aword*6]
119|.define ARG6, aword [esp+aword*5] 115|.define TMP2, aword [esp+aword*5]
120|.define ARG5, aword [esp+aword*4] 116|.define TMP1, aword [esp+aword*4]
121|//----- 16 byte aligned 117|//----- 16 byte aligned
122|.define ARG4, aword [esp+aword*3] 118|.define ARG4, aword [esp+aword*3]
123|.define ARG3, aword [esp+aword*2] 119|.define ARG3, aword [esp+aword*2]
@@ -126,24 +122,93 @@
126|//----- 16 byte aligned, ^^^ arguments for C callee 122|//----- 16 byte aligned, ^^^ arguments for C callee
127| 123|
128|// FPARGx overlaps ARGx and ARG(x+1) on x86. 124|// FPARGx overlaps ARGx and ARG(x+1) on x86.
129|.define FPARG5, qword [esp+qword*2]
130|.define FPARG3, qword [esp+qword*1] 125|.define FPARG3, qword [esp+qword*1]
131|.define FPARG1, qword [esp] 126|.define FPARG1, qword [esp]
132|// NRESULTS overlaps ARG6 (and FPARG5) 127|// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ).
133|.define NRESULTS, ARG6 128|.define TMPQ, qword [esp+aword*4]
129|.define ARG5, TMP1
130|.define NRESULTS, TMP2
134| 131|
135|// Arguments for vm_call and vm_pcall. 132|// Arguments for vm_call and vm_pcall.
136|.define INARG_P_ERRF, INARG_4 // vm_pcall only. 133|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
137|.define INARG_NRES, INARG_3
138|.define INARG_BASE, INARG_2
139|.define SAVE_L, INARG_1
140|
141|.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE!
142| 134|
143|// Arguments for vm_cpcall. 135|// Arguments for vm_cpcall.
144|.define INARG_CP_UD, INARG_4 136|.define INARG_CP_UD, SAVE_ERRF
145|.define INARG_CP_FUNC, INARG_3 137|.define INARG_CP_FUNC, SAVE_NRES
146|.define INARG_CP_CALL, INARG_2 138|.define INARG_CP_CALL, SAVE_CFRAME
139|
140|//-----------------------------------------------------------------------
141|.elif X64WIN // x64/Windows stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs
145| push rbp; push rdi; push rsi; push rbx
146| sub rsp, CFRAME_SPACE
147|.endmacro
148|.macro restoreregs
149| add rsp, CFRAME_SPACE
150| pop rbx; pop rsi; pop rdi; pop rbp
151|.endmacro
152|
153|.define UNUSED1, aword [esp+dword*26]
154|.define SAVE_PC, dword [esp+dword*25]
155|.define SAVE_L, dword [esp+dword*24]
156|.define SAVE_ERRF, dword [esp+dword*23]
157|.define SAVE_NRES, dword [esp+dword*22]
158|.define TMP2, dword [esp+dword*21]
159|.define TMP1, dword [esp+dword*20]
160|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
161|.define SAVE_RET, aword [esp+aword*9] //<-- rsp entering interpreter.
162|.define SAVE_R4, aword [esp+aword*8]
163|.define SAVE_R3, aword [esp+aword*7]
164|.define SAVE_R2, aword [esp+aword*6]
165|.define SAVE_R1, aword [esp+aword*5] //<-- rsp after register saves.
166|.define SAVE_CFRAME, aword [esp+aword*4]
167|.define CSAVE_4, aword [esp+aword*3]
168|.define CSAVE_3, aword [esp+aword*2]
169|.define CSAVE_2, aword [esp+aword*1]
170|.define CSAVE_1, aword [esp] //<-- rsp while in interpreter.
171|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
172|
173|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
174|.define TMPQ, qword [esp]
175|.define NRESULTS, TMP2
176|
177|//-----------------------------------------------------------------------
178|.else // x64/POSIX stack layout
179|
180|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
181|.macro saveregs
182| push rbp; push r12; push r13; push rbx
183| sub rsp, CFRAME_SPACE
184|.endmacro
185|.macro restoreregs
186| add rsp, CFRAME_SPACE
187| pop rbx; pop r13; pop r12; pop rbp
188|.endmacro
189|
190|//----- 16 byte aligned,
191|.define SAVE_RET, aword [esp+aword*9] //<-- rsp entering interpreter.
192|.define SAVE_R4, aword [esp+aword*8]
193|.define SAVE_R3, aword [esp+aword*7]
194|.define SAVE_R2, aword [esp+aword*6]
195|.define SAVE_R1, aword [esp+aword*5] //<-- rsp after register saves.
196|.define SAVE_CFRAME, aword [esp+aword*4]
197|.define UNUSED1, aword [esp+aword*3]
198|//----- ^^^ awords above, vvv dwords below
199|.define SAVE_PC, dword [esp+dword*5]
200|.define SAVE_L, dword [esp+dword*4]
201|.define SAVE_ERRF, dword [esp+dword*3]
202|.define SAVE_NRES, dword [esp+dword*2]
203|.define TMP2, dword [esp+dword*1]
204|.define TMP1, dword [esp] //<-- rsp while in interpreter.
205|//----- 16 byte aligned
206|
207|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
208|.define TMPQ, qword [esp]
209|.define NRESULTS, TMP2
210|
211|.endif
147| 212|
148|//----------------------------------------------------------------------- 213|//-----------------------------------------------------------------------
149| 214|
@@ -163,7 +228,11 @@
163| movzx OP, RCL 228| movzx OP, RCL
164| add PC, 4 229| add PC, 4
165| shr RC, 16 230| shr RC, 16
231|.if not X64
166| jmp aword [DISPATCH+OP*4] 232| jmp aword [DISPATCH+OP*4]
233|.else
234| jmp aword [DISPATCH+OP*8]
235|.endif
167|.endmacro 236|.endmacro
168| 237|
169|// Instruction footer. 238|// Instruction footer.
@@ -420,7 +489,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
420 | mov L:RB->base, PC 489 | mov L:RB->base, PC
421 |3: 490 |3:
422 | mov RD, NRESULTS 491 | mov RD, NRESULTS
423 | mov RA, INARG_NRES // RA = wanted nresults+1 492 | mov RA, SAVE_NRES // RA = wanted nresults+1
424 |4: 493 |4:
425 | cmp RA, RD 494 | cmp RA, RD
426 | jne >6 // More/less results wanted? 495 | jne >6 // More/less results wanted?
@@ -633,8 +702,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
633 | // Caveat: INARG_P_* and INARG_CP_* overlap! 702 | // Caveat: INARG_P_* and INARG_CP_* overlap!
634 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 703 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
635 | sub KBASE, L:RB->top 704 | sub KBASE, L:RB->top
636 | mov INARG_P_ERRF, 0 // No error function. 705 | mov SAVE_ERRF, 0 // No error function.
637 | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame. 706 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
638 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 707 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
639 | 708 |
640 | mov ARG3, RC 709 | mov ARG3, RC
@@ -693,9 +762,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
693 |//-- Table indexing metamethods ----------------------------------------- 762 |//-- Table indexing metamethods -----------------------------------------
694 | 763 |
695 |->vmeta_tgets: 764 |->vmeta_tgets:
696 | mov ARG5, RC // RC = GCstr * 765 | mov TMP1, RC // RC = GCstr *
697 | mov ARG6, LJ_TSTR 766 | mov TMP2, LJ_TSTR
698 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. 767 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
699 | cmp PC_OP, BC_GGET 768 | cmp PC_OP, BC_GGET
700 | jne >1 769 | jne >1
701 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 770 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
@@ -708,8 +777,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
708 | movzx RC, PC_RC // Ugly, cannot fild from a byte. 777 | movzx RC, PC_RC // Ugly, cannot fild from a byte.
709 | mov ARG4, RC 778 | mov ARG4, RC
710 | fild ARG4 779 | fild ARG4
711 | fstp FPARG5 780 | fstp TMPQ
712 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. 781 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
713 | jmp >1 782 | jmp >1
714 | 783 |
715 |->vmeta_tgetv: 784 |->vmeta_tgetv:
@@ -751,9 +820,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
751 |//----------------------------------------------------------------------- 820 |//-----------------------------------------------------------------------
752 | 821 |
753 |->vmeta_tsets: 822 |->vmeta_tsets:
754 | mov ARG5, RC // RC = GCstr * 823 | mov TMP1, RC // RC = GCstr *
755 | mov ARG6, LJ_TSTR 824 | mov TMP2, LJ_TSTR
756 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. 825 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
757 | cmp PC_OP, BC_GSET 826 | cmp PC_OP, BC_GSET
758 | jne >1 827 | jne >1
759 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 828 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
@@ -766,8 +835,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
766 | movzx RC, PC_RC // Ugly, cannot fild from a byte. 835 | movzx RC, PC_RC // Ugly, cannot fild from a byte.
767 | mov ARG4, RC 836 | mov ARG4, RC
768 | fild ARG4 837 | fild ARG4
769 | fstp FPARG5 838 | fstp TMPQ
770 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. 839 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
771 | jmp >1 840 | jmp >1
772 | 841 |
773 |->vmeta_tsetv: 842 |->vmeta_tsetv:
@@ -930,8 +999,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
930 | 999 |
931 |->vmeta_call: // Resolve and call __call metamethod. 1000 |->vmeta_call: // Resolve and call __call metamethod.
932 | // RA = new base, RC = nargs+1, BASE = old base, PC = return 1001 | // RA = new base, RC = nargs+1, BASE = old base, PC = return
933 | mov ARG4, RA // Save RA, RC for us. 1002 | mov TMP2, RA // Save RA, RC for us.
934 | mov ARG5, NARGS:RC 1003 | mov TMP1, NARGS:RC
935 | sub RA, 8 1004 | sub RA, 8
936 | lea RC, [RA+NARGS:RC*8] 1005 | lea RC, [RA+NARGS:RC*8]
937 | mov L:RB, SAVE_L 1006 | mov L:RB, SAVE_L
@@ -942,8 +1011,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
942 | mov L:RB->base, BASE // This is the callers base! 1011 | mov L:RB->base, BASE // This is the callers base!
943 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1012 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
944 | mov BASE, L:RB->base 1013 | mov BASE, L:RB->base
945 | mov RA, ARG4 1014 | mov RA, TMP2
946 | mov NARGS:RC, ARG5 1015 | mov NARGS:RC, TMP1
947 | mov LFUNC:RB, [RA-8] 1016 | mov LFUNC:RB, [RA-8]
948 | add NARGS:RC, 1 1017 | add NARGS:RC, 1
949 | // This is fragile. L->base must not move, KBASE must always be defined. 1018 | // This is fragile. L->base must not move, KBASE must always be defined.
@@ -1137,13 +1206,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1137 | mov ARG2, TAB:RC 1206 | mov ARG2, TAB:RC
1138 | mov ARG1, L:RB 1207 | mov ARG1, L:RB
1139 | mov RB, RA 1208 | mov RB, RA
1140 | mov ARG4, BASE // Save BASE and RA. 1209 | mov TMP1, BASE // Save BASE and RA.
1141 | add RA, 8 1210 | add RA, 8
1142 | mov ARG3, RA 1211 | mov ARG3, RA
1143 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1212 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1144 | // cTValue * returned in eax (RC). 1213 | // cTValue * returned in eax (RC).
1145 | mov RA, RB 1214 | mov RA, RB
1146 | mov BASE, ARG4 1215 | mov BASE, TMP1
1147 | mov RB, [RC] // Copy table slot. 1216 | mov RB, [RC] // Copy table slot.
1148 | mov RC, [RC+4] 1217 | mov RC, [RC+4]
1149 | mov [RA-8], RB 1218 | mov [RA-8], RB
@@ -1199,13 +1268,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1199 | mov L:RB->base, RA // Add frame since C call can throw. 1268 | mov L:RB->base, RA // Add frame since C call can throw.
1200 | mov [RA-4], PC 1269 | mov [RA-4], PC
1201 | mov SAVE_PC, PC // Redundant (but a defined value). 1270 | mov SAVE_PC, PC // Redundant (but a defined value).
1202 | mov ARG4, BASE // Save BASE. 1271 | mov TMP1, BASE // Save BASE.
1203 | add RA, 8 1272 | add RA, 8
1204 | mov ARG3, RA 1273 | mov ARG3, RA
1205 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1274 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1206 | // Flag returned in eax (RC). 1275 | // Flag returned in eax (RC).
1207 | mov RA, L:RB->base 1276 | mov RA, L:RB->base
1208 | mov BASE, ARG4 1277 | mov BASE, TMP1
1209 | test RC, RC; jz >3 // End of traversal? 1278 | test RC, RC; jz >3 // End of traversal?
1210 | mov RB, [RA+8] // Copy key and value to results. 1279 | mov RB, [RA+8] // Copy key and value to results.
1211 | mov RC, [RA+12] 1280 | mov RC, [RA+12]
@@ -1526,11 +1595,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1526 | 1595 |
1527 |.macro math_extern, func 1596 |.macro math_extern, func
1528 |.ffunc_n math_ .. func 1597 |.ffunc_n math_ .. func
1529 | mov ARG5, RA 1598 | mov TMP1, RA
1530 | fstp FPARG1 1599 | fstp FPARG1
1531 | mov RB, BASE 1600 | mov RB, BASE
1532 | call extern lj_wrapper_ .. func 1601 | call extern lj_wrapper_ .. func
1533 | mov RA, ARG5 1602 | mov RA, TMP1
1534 | mov BASE, RB 1603 | mov BASE, RB
1535 | jmp ->fff_resn 1604 | jmp ->fff_resn
1536 |.endmacro 1605 |.endmacro
@@ -1645,10 +1714,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1645 | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 1714 | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1646 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1715 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1647 | fld qword [RA] 1716 | fld qword [RA]
1648 | fistp ARG4 1717 | fistp TMP2
1649 | cmp ARG4, 255; ja ->fff_fallback 1718 | cmp TMP2, 255; ja ->fff_fallback
1650 | lea RC, ARG4 // Little-endian. 1719 | lea RC, TMP2 // Little-endian.
1651 | mov ARG5, RA // Save RA. 1720 | mov TMP1, RA // Save RA.
1652 | mov ARG3, 1 1721 | mov ARG3, 1
1653 | mov ARG2, RC 1722 | mov ARG2, RC
1654 |->fff_newstr: 1723 |->fff_newstr:
@@ -1658,7 +1727,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1658 | mov L:RB->base, BASE 1727 | mov L:RB->base, BASE
1659 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 1728 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1660 | // GCstr * returned in eax (RC). 1729 | // GCstr * returned in eax (RC).
1661 | mov RA, ARG5 1730 | mov RA, TMP1
1662 | mov BASE, L:RB->base 1731 | mov BASE, L:RB->base
1663 | mov dword [RA-4], LJ_TSTR 1732 | mov dword [RA-4], LJ_TSTR
1664 | mov [RA-8], STR:RC 1733 | mov [RA-8], STR:RC
@@ -1666,13 +1735,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1666 | 1735 |
1667 |.ffunc string_sub 1736 |.ffunc string_sub
1668 | ffgccheck 1737 | ffgccheck
1669 | mov ARG5, RA // Save RA. 1738 | mov TMP1, RA // Save RA.
1670 | mov ARG4, -1 1739 | mov TMP2, -1
1671 | cmp NARGS:RC, 1+2; jb ->fff_fallback 1740 | cmp NARGS:RC, 1+2; jb ->fff_fallback
1672 | jna >1 1741 | jna >1
1673 | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback 1742 | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback
1674 | fld qword [RA+16] 1743 | fld qword [RA+16]
1675 | fistp ARG4 1744 | fistp TMP2
1676 |1: 1745 |1:
1677 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 1746 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1678 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1747 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
@@ -1681,7 +1750,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1681 | mov RB, STR:RB->len 1750 | mov RB, STR:RB->len
1682 | fld qword [RA+8] 1751 | fld qword [RA+8]
1683 | fistp ARG3 1752 | fistp ARG3
1684 | mov RC, ARG4 1753 | mov RC, TMP2
1685 | cmp RB, RC // len < end? (unsigned compare) 1754 | cmp RB, RC // len < end? (unsigned compare)
1686 | jb >5 1755 | jb >5
1687 |2: 1756 |2:
@@ -1722,13 +1791,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1722 | 1791 |
1723 |.ffunc_2 string_rep // Only handle the 1-char case inline. 1792 |.ffunc_2 string_rep // Only handle the 1-char case inline.
1724 | ffgccheck 1793 | ffgccheck
1725 | mov ARG5, RA // Save RA. 1794 | mov TMP1, RA // Save RA.
1726 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 1795 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1727 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1796 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1728 | mov STR:RB, [RA] 1797 | mov STR:RB, [RA]
1729 | fld qword [RA+8] 1798 | fld qword [RA+8]
1730 | fistp ARG4 1799 | fistp TMP2
1731 | mov RC, ARG4 1800 | mov RC, TMP2
1732 | test RC, RC 1801 | test RC, RC
1733 | jle ->fff_emptystr // Count <= 0? (or non-int) 1802 | jle ->fff_emptystr // Count <= 0? (or non-int)
1734 | cmp dword STR:RB->len, 1 1803 | cmp dword STR:RB->len, 1
@@ -1748,7 +1817,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1748 | 1817 |
1749 |.ffunc_1 string_reverse 1818 |.ffunc_1 string_reverse
1750 | ffgccheck 1819 | ffgccheck
1751 | mov ARG5, RA // Save RA. 1820 | mov TMP1, RA // Save RA.
1752 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 1821 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1753 | mov STR:RB, [RA] 1822 | mov STR:RB, [RA]
1754 | mov RC, STR:RB->len 1823 | mov RC, STR:RB->len
@@ -1756,7 +1825,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1756 | jz ->fff_emptystr // Zero length string? 1825 | jz ->fff_emptystr // Zero length string?
1757 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 1826 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
1758 | add RB, #STR 1827 | add RB, #STR
1759 | mov ARG4, PC // Need another temp register. 1828 | mov TMP2, PC // Need another temp register.
1760 | mov ARG3, RC 1829 | mov ARG3, RC
1761 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 1830 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1762 | mov ARG2, PC 1831 | mov ARG2, PC
@@ -1766,19 +1835,19 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1766 | sub RC, 1 1835 | sub RC, 1
1767 | mov [PC+RC], RAL 1836 | mov [PC+RC], RAL
1768 | jnz <1 1837 | jnz <1
1769 | mov PC, ARG4 1838 | mov PC, TMP2
1770 | jmp ->fff_newstr 1839 | jmp ->fff_newstr
1771 | 1840 |
1772 |.macro ffstring_case, name, lo, hi 1841 |.macro ffstring_case, name, lo, hi
1773 | .ffunc_1 name 1842 | .ffunc_1 name
1774 | ffgccheck 1843 | ffgccheck
1775 | mov ARG5, RA // Save RA. 1844 | mov TMP1, RA // Save RA.
1776 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 1845 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1777 | mov STR:RB, [RA] 1846 | mov STR:RB, [RA]
1778 | mov RC, STR:RB->len 1847 | mov RC, STR:RB->len
1779 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 1848 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
1780 | add RB, #STR 1849 | add RB, #STR
1781 | mov ARG4, PC // Need another temp register. 1850 | mov TMP2, PC // Need another temp register.
1782 | mov ARG3, RC 1851 | mov ARG3, RC
1783 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 1852 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1784 | mov ARG2, PC 1853 | mov ARG2, PC
@@ -1795,7 +1864,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1795 |3: 1864 |3:
1796 | sub RC, 1 1865 | sub RC, 1
1797 | jns <1 1866 | jns <1
1798 | mov PC, ARG4 1867 | mov PC, TMP2
1799 | jmp ->fff_newstr 1868 | jmp ->fff_newstr
1800 |.endmacro 1869 |.endmacro
1801 | 1870 |
@@ -1822,16 +1891,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1822 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). 1891 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
1823 | 1892 |
1824 |.ffunc_n bit_tobit 1893 |.ffunc_n bit_tobit
1825 | mov ARG5, TOBIT_BIAS 1894 | mov TMP1, TOBIT_BIAS
1826 | fadd ARG5 1895 | fadd TMP1
1827 | fstp FPARG1 // 64 bit FP store. 1896 | fstp FPARG1 // 64 bit FP store.
1828 | fild ARG1 // 32 bit integer load (s2lfwd ok). 1897 | fild ARG1 // 32 bit integer load (s2lfwd ok).
1829 | jmp ->fff_resn 1898 | jmp ->fff_resn
1830 | 1899 |
1831 |.macro .ffunc_bit, name 1900 |.macro .ffunc_bit, name
1832 | .ffunc_n name 1901 | .ffunc_n name
1833 | mov ARG5, TOBIT_BIAS 1902 | mov TMP1, TOBIT_BIAS
1834 | fadd ARG5 1903 | fadd TMP1
1835 | fstp FPARG1 1904 | fstp FPARG1
1836 | mov RB, ARG1 1905 | mov RB, ARG1
1837 |.endmacro 1906 |.endmacro
@@ -1845,7 +1914,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1845 | jbe ->fff_resbit 1914 | jbe ->fff_resbit
1846 | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op 1915 | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op
1847 | fld qword [RC] 1916 | fld qword [RC]
1848 | fadd ARG5 1917 | fadd TMP1
1849 | fstp FPARG1 1918 | fstp FPARG1
1850 | ins RB, ARG1 1919 | ins RB, ARG1
1851 | sub RC, 8 1920 | sub RC, 8
@@ -1873,10 +1942,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1873 | 1942 |
1874 |.macro .ffunc_bit_sh, name, ins 1943 |.macro .ffunc_bit_sh, name, ins
1875 | .ffunc_nn name 1944 | .ffunc_nn name
1876 | mov ARG5, TOBIT_BIAS 1945 | mov TMP1, TOBIT_BIAS
1877 | fadd ARG5 1946 | fadd TMP1
1878 | fstp FPARG3 1947 | fstp FPARG3
1879 | fadd ARG5 1948 | fadd TMP1
1880 | fstp FPARG1 1949 | fstp FPARG1
1881 | mov RC, RA // Assumes RA is ecx. 1950 | mov RC, RA // Assumes RA is ecx.
1882 | mov RA, ARG3 1951 | mov RA, ARG3
@@ -3121,9 +3190,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
3121 | test byte TAB:RA->nomm, 1<<MM_newindex 3190 | test byte TAB:RA->nomm, 1<<MM_newindex
3122 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 3191 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3123 |6: 3192 |6:
3124 | mov ARG5, STR:RC 3193 | mov TMP1, STR:RC
3125 | mov ARG6, LJ_TSTR 3194 | mov TMP2, LJ_TSTR
3126 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. 3195 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
3127 | mov ARG4, TAB:RB // Save TAB:RB for us. 3196 | mov ARG4, TAB:RB // Save TAB:RB for us.
3128 | mov ARG2, TAB:RB 3197 | mov ARG2, TAB:RB
3129 | mov L:RB, SAVE_L 3198 | mov L:RB, SAVE_L
@@ -3179,7 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
3179 3248
3180 case BC_TSETM: 3249 case BC_TSETM:
3181 | ins_AD // RA = base (table at base-1), RD = num const (start index) 3250 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3182 | mov ARG5, KBASE // Need one more free register. 3251 | mov TMP1, KBASE // Need one more free register.
3183 | fld qword [KBASE+RD*8] 3252 | fld qword [KBASE+RD*8]
3184 | fistp ARG4 // Const is guaranteed to be an int. 3253 | fistp ARG4 // Const is guaranteed to be an int.
3185 |1: 3254 |1:
@@ -3208,7 +3277,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
3208 | sub RD, 1 3277 | sub RD, 1
3209 | jnz <3 3278 | jnz <3
3210 |4: 3279 |4:
3211 | mov KBASE, ARG5 3280 | mov KBASE, TMP1
3212 | ins_next 3281 | ins_next
3213 | 3282 |
3214 |5: // Need to resize array part. 3283 |5: // Need to resize array part.