aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2015-05-04 06:30:57 +0200
committerMike Pall <mike>2015-05-04 06:30:57 +0200
commita92e73023353e59405ebfdb2837b5742d17946a5 (patch)
tree19dca7d5ffe0b5d133d6fd5434a923aa976aaaeb
parent5caf53d502dbe051dac11d86e1f9ad69bbe3eabf (diff)
downloadluajit-a92e73023353e59405ebfdb2837b5742d17946a5.tar.gz
luajit-a92e73023353e59405ebfdb2837b5742d17946a5.tar.bz2
luajit-a92e73023353e59405ebfdb2837b5742d17946a5.zip
x64: Add LJ_GC64 mode interpreter.
Enable this mode with: make XCFLAGS=-DLUAJIT_ENABLE_GC64
-rw-r--r--src/lib_base.c2
-rw-r--r--src/lj_arch.h3
-rw-r--r--src/lj_ccallback.c7
-rw-r--r--src/lj_cdata.c7
-rw-r--r--src/lj_frame.h16
-rw-r--r--src/lj_gdbjit.c4
-rw-r--r--src/lj_target_x86.h4
-rw-r--r--src/vm_x64.dasc4868
8 files changed, 4906 insertions, 5 deletions
diff --git a/src/lib_base.c b/src/lib_base.c
index 35ccdbc7..887fea7a 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -428,7 +428,7 @@ LJLIB_CF(dofile)
428 428
429LJLIB_CF(gcinfo) 429LJLIB_CF(gcinfo)
430{ 430{
431 setintV(L->top++, (G(L)->gc.total >> 10)); 431 setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
432 return 1; 432 return 1;
433} 433}
434 434
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 25c82fd0..442ba4fc 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -158,6 +158,9 @@
158#define LJ_TARGET_MASKROT 1 158#define LJ_TARGET_MASKROT 1
159#define LJ_TARGET_UNALIGNED 1 159#define LJ_TARGET_UNALIGNED 1
160#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL 160#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
161#ifdef LUAJIT_ENABLE_GC64
162#define LJ_TARGET_GC64 1
163#endif
161 164
162#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM 165#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
163 166
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 66a09440..065c329f 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -35,7 +35,7 @@
35#elif LJ_TARGET_X86ORX64 35#elif LJ_TARGET_X86ORX64
36 36
37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) 37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
38#define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) 38#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
39 39
40#define CALLBACK_SLOT2OFS(slot) \ 40#define CALLBACK_SLOT2OFS(slot) \
41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) 41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -120,8 +120,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
120 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ 120 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
121 *p++ = XI_PUSH + RID_EBP; 121 *p++ = XI_PUSH + RID_EBP;
122 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); 122 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
123#if LJ_GC64
124 *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
125 *(uint64_t *)p = (uint64_t)(g); p += 8;
126#else
123 *p++ = XI_MOVri | RID_EBP; 127 *p++ = XI_MOVri | RID_EBP;
124 *(int32_t *)p = i32ptr(g); p += 4; 128 *(int32_t *)p = i32ptr(g); p += 4;
129#endif
125#if LJ_64 130#if LJ_64
126 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ 131 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
127 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; 132 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index fccf7f14..5cd2c114 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -121,7 +121,12 @@ collect_attrib:
121 idx = (ptrdiff_t)intV(key); 121 idx = (ptrdiff_t)intV(key);
122 goto integer_key; 122 goto integer_key;
123 } else if (tvisnum(key)) { /* Numeric key. */ 123 } else if (tvisnum(key)) { /* Numeric key. */
124 idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); 124#ifdef _MSC_VER
125 /* Workaround for MSVC bug. */
126 volatile
127#endif
128 lua_Number n = numV(key);
129 idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
125 integer_key: 130 integer_key:
126 if (ctype_ispointer(ct->info)) { 131 if (ctype_ispointer(ct->info)) {
127 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ 132 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
diff --git a/src/lj_frame.h b/src/lj_frame.h
index b9595a5a..517f4684 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -127,21 +127,37 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
127#elif LJ_TARGET_X64 127#elif LJ_TARGET_X64
128#if LJ_ABI_WIN 128#if LJ_ABI_WIN
129#define CFRAME_OFS_PREV (13*8) 129#define CFRAME_OFS_PREV (13*8)
130#if LJ_GC64
131#define CFRAME_OFS_PC (12*8)
132#define CFRAME_OFS_L (11*8)
133#define CFRAME_OFS_ERRF (21*4)
134#define CFRAME_OFS_NRES (20*4)
135#define CFRAME_OFS_MULTRES (8*4)
136#else
130#define CFRAME_OFS_PC (25*4) 137#define CFRAME_OFS_PC (25*4)
131#define CFRAME_OFS_L (24*4) 138#define CFRAME_OFS_L (24*4)
132#define CFRAME_OFS_ERRF (23*4) 139#define CFRAME_OFS_ERRF (23*4)
133#define CFRAME_OFS_NRES (22*4) 140#define CFRAME_OFS_NRES (22*4)
134#define CFRAME_OFS_MULTRES (21*4) 141#define CFRAME_OFS_MULTRES (21*4)
142#endif
135#define CFRAME_SIZE (10*8) 143#define CFRAME_SIZE (10*8)
136#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) 144#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
137#define CFRAME_SHIFT_MULTRES 0 145#define CFRAME_SHIFT_MULTRES 0
138#else 146#else
139#define CFRAME_OFS_PREV (4*8) 147#define CFRAME_OFS_PREV (4*8)
148#if LJ_GC64
149#define CFRAME_OFS_PC (3*8)
150#define CFRAME_OFS_L (2*8)
151#define CFRAME_OFS_ERRF (3*4)
152#define CFRAME_OFS_NRES (2*4)
153#define CFRAME_OFS_MULTRES (0*4)
154#else
140#define CFRAME_OFS_PC (7*4) 155#define CFRAME_OFS_PC (7*4)
141#define CFRAME_OFS_L (6*4) 156#define CFRAME_OFS_L (6*4)
142#define CFRAME_OFS_ERRF (5*4) 157#define CFRAME_OFS_ERRF (5*4)
143#define CFRAME_OFS_NRES (4*4) 158#define CFRAME_OFS_NRES (4*4)
144#define CFRAME_OFS_MULTRES (1*4) 159#define CFRAME_OFS_MULTRES (1*4)
160#endif
145#define CFRAME_SIZE (10*8) 161#define CFRAME_SIZE (10*8)
146#define CFRAME_SIZE_JIT (CFRAME_SIZE + 16) 162#define CFRAME_SIZE_JIT (CFRAME_SIZE + 16)
147#define CFRAME_SHIFT_MULTRES 0 163#define CFRAME_SHIFT_MULTRES 0
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index c289cd8e..9b95e525 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -556,8 +556,8 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
556 DB(DW_CFA_offset|DW_REG_15); DUV(4); 556 DB(DW_CFA_offset|DW_REG_15); DUV(4);
557 DB(DW_CFA_offset|DW_REG_14); DUV(5); 557 DB(DW_CFA_offset|DW_REG_14); DUV(5);
558 /* Extra registers saved for JIT-compiled code. */ 558 /* Extra registers saved for JIT-compiled code. */
559 DB(DW_CFA_offset|DW_REG_13); DUV(9); 559 DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9);
560 DB(DW_CFA_offset|DW_REG_12); DUV(10); 560 DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10);
561#elif LJ_TARGET_ARM 561#elif LJ_TARGET_ARM
562 { 562 {
563 int i; 563 int i;
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 65e438fd..fc9d3702 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -132,7 +132,11 @@ enum {
132#define SPS_FIXED (4*2) 132#define SPS_FIXED (4*2)
133#define SPS_FIRST (4*2) /* Don't use callee register save area. */ 133#define SPS_FIRST (4*2) /* Don't use callee register save area. */
134#else 134#else
135#if LJ_GC64
136#define SPS_FIXED 2
137#else
135#define SPS_FIXED 4 138#define SPS_FIXED 4
139#endif
136#define SPS_FIRST 2 140#define SPS_FIRST 2
137#endif 141#endif
138#else 142#else
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..bcd3184d
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4868 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146| sub rsp, CFRAME_SPACE
147|.endmacro
148|.macro saveregs
149| push rbp; saveregs_
150|.endmacro
151|.macro restoreregs
152| add rsp, CFRAME_SPACE
153| pop r14; pop r15; pop rbx; pop rbp
154|.endmacro
155|
156|//----- 16 byte aligned,
157|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
158|.define SAVE_R4, aword [rsp+aword*8]
159|.define SAVE_R3, aword [rsp+aword*7]
160|.define SAVE_R2, aword [rsp+aword*6]
161|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
162|.define SAVE_CFRAME, aword [rsp+aword*4]
163|.define SAVE_PC, aword [rsp+aword*3]
164|.define SAVE_L, aword [rsp+aword*2]
165|.define SAVE_ERRF, dword [rsp+dword*3]
166|.define SAVE_NRES, dword [rsp+dword*2]
167|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
168|//----- 16 byte aligned
169|
170|.define TMP1d, dword [rsp]
171|.define TMP1hi, dword [rsp+dword*1]
172|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
173|
174|.endif
175|
176|//-----------------------------------------------------------------------
177|
178|// Instruction headers.
179|.macro ins_A; .endmacro
180|.macro ins_AD; .endmacro
181|.macro ins_AJ; .endmacro
182|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
183|.macro ins_AB_; movzx RBd, RCH; .endmacro
184|.macro ins_A_C; movzx RCd, RCL; .endmacro
185|.macro ins_AND; not RD; .endmacro
186|
187|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
188|.macro ins_NEXT
189| mov RCd, [PC]
190| movzx RAd, RCH
191| movzx OP, RCL
192| add PC, 4
193| shr RCd, 16
194| jmp aword [DISPATCH+OP*8]
195|.endmacro
196|
197|// Instruction footer.
198|.if 1
199| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
200| .define ins_next, ins_NEXT
201| .define ins_next_, ins_NEXT
202|.else
203| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
204| // Affects only certain kinds of benchmarks (and only with -j off).
205| // Around 10%-30% slower on Core2, a lot more slower on P4.
206| .macro ins_next
207| jmp ->ins_next
208| .endmacro
209| .macro ins_next_
210| ->ins_next:
211| ins_NEXT
212| .endmacro
213|.endif
214|
215|// Call decode and dispatch.
216|.macro ins_callt
217| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
218| mov PC, LFUNC:RB->pc
219| mov RAd, [PC]
220| movzx OP, RAL
221| movzx RAd, RAH
222| add PC, 4
223| jmp aword [DISPATCH+OP*8]
224|.endmacro
225|
226|.macro ins_call
227| // BASE = new base, RB = LFUNC, RD = nargs+1
228| mov [BASE-8], PC
229| ins_callt
230|.endmacro
231|
232|//-----------------------------------------------------------------------
233|
234|// Macros to clear or set tags.
235|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
236|.macro settp, reg, tp
237| mov64 ITYPE, ((int64_t)tp<<47)
238| or reg, ITYPE
239|.endmacro
240|.macro settp, dst, reg, tp
241| mov64 dst, ((int64_t)tp<<47)
242| or dst, reg
243|.endmacro
244|.macro setint, reg
245| settp reg, LJ_TISNUM
246|.endmacro
247|.macro setint, dst, reg
248| settp dst, reg, LJ_TISNUM
249|.endmacro
250|
251|// Macros to test operand types.
252|.macro checktp_nc, reg, tp, target
253| mov ITYPE, reg
254| sar ITYPE, 47
255| cmp ITYPEd, tp
256| jne target
257|.endmacro
258|.macro checktp, reg, tp, target
259| mov ITYPE, reg
260| cleartp reg
261| sar ITYPE, 47
262| cmp ITYPEd, tp
263| jne target
264|.endmacro
265|.macro checktptp, src, tp, target
266| mov ITYPE, src
267| sar ITYPE, 47
268| cmp ITYPEd, tp
269| jne target
270|.endmacro
271|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
272|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
273|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
274|
275|.macro checknumx, reg, target, jump
276| mov ITYPE, reg
277| sar ITYPE, 47
278| cmp ITYPEd, LJ_TISNUM
279| jump target
280|.endmacro
281|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
282|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
283|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
284|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
285|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
286|
287|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
288|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
289|
290|// These operands must be used with movzx.
291|.define PC_OP, byte [PC-4]
292|.define PC_RA, byte [PC-3]
293|.define PC_RB, byte [PC-1]
294|.define PC_RC, byte [PC-2]
295|.define PC_RD, word [PC-2]
296|
297|.macro branchPC, reg
298| lea PC, [PC+reg*4-BCBIAS_J*4]
299|.endmacro
300|
301|// Assumes DISPATCH is relative to GL.
302#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
303#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
304|
305#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
306|
307|// Decrement hashed hotcount and trigger trace recorder if zero.
308|.macro hotloop, reg
309| mov reg, PCd
310| shr reg, 1
311| and reg, HOTCOUNT_PCMASK
312| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
313| jb ->vm_hotloop
314|.endmacro
315|
316|.macro hotcall, reg
317| mov reg, PCd
318| shr reg, 1
319| and reg, HOTCOUNT_PCMASK
320| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
321| jb ->vm_hotcall
322|.endmacro
323|
324|// Set current VM state.
325|.macro set_vmstate, st
326| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
327|.endmacro
328|
329|.macro fpop1; fstp st1; .endmacro
330|
331|// Synthesize SSE FP constants.
332|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
333| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
334|.endmacro
335|
336|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
337| mov64 tmp, U64x(val,00000000); movd reg, tmp
338|.endmacro
339|
340|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
341| sseconst_hi reg, tmp, 80000000
342|.endmacro
343|.macro sseconst_1, reg, tmp // Synthesize 1.0.
344| sseconst_hi reg, tmp, 3ff00000
345|.endmacro
346|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
347| sseconst_hi reg, tmp, bff00000
348|.endmacro
349|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
350| sseconst_hi reg, tmp, 43300000
351|.endmacro
352|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
353| sseconst_hi reg, tmp, 43380000
354|.endmacro
355|
356|// Move table write barrier back. Overwrites reg.
357|.macro barrierback, tab, reg
358| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
359| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
360| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
361| mov tab->gclist, reg
362|.endmacro
363|
364|//-----------------------------------------------------------------------
365
366/* Generate subroutines used by opcodes and other parts of the VM. */
367/* The .code_sub section should be last to help static branch prediction. */
368static void build_subroutines(BuildCtx *ctx)
369{
370 |.code_sub
371 |
372 |//-----------------------------------------------------------------------
373 |//-- Return handling ----------------------------------------------------
374 |//-----------------------------------------------------------------------
375 |
376 |->vm_returnp:
377 | test PCd, FRAME_P
378 | jz ->cont_dispatch
379 |
380 | // Return from pcall or xpcall fast func.
381 | and PC, -8
382 | sub BASE, PC // Restore caller base.
383 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
384 | mov PC, [BASE-8] // Fetch PC of previous frame.
385 | // Prepending may overwrite the pcall frame, so do it at the end.
386 | mov_true ITYPE
387 | mov aword [BASE+RA], ITYPE // Prepend true to results.
388 |
389 |->vm_returnc:
390 | add RDd, 1 // RD = nresults+1
391 | jz ->vm_unwind_yield
392 | mov MULTRES, RDd
393 | test PC, FRAME_TYPE
394 | jz ->BC_RET_Z // Handle regular return to Lua.
395 |
396 |->vm_return:
397 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
398 | xor PC, FRAME_C
399 | test PCd, FRAME_TYPE
400 | jnz ->vm_returnp
401 |
402 | // Return to C.
403 | set_vmstate C
404 | and PC, -8
405 | sub PC, BASE
406 | neg PC // Previous base = BASE - delta.
407 |
408 | sub RDd, 1
409 | jz >2
410 |1: // Move results down.
411 | mov RB, [BASE+RA]
412 | mov [BASE-16], RB
413 | add BASE, 8
414 | sub RDd, 1
415 | jnz <1
416 |2:
417 | mov L:RB, SAVE_L
418 | mov L:RB->base, PC
419 |3:
420 | mov RDd, MULTRES
421 | mov RAd, SAVE_NRES // RA = wanted nresults+1
422 |4:
423 | cmp RAd, RDd
424 | jne >6 // More/less results wanted?
425 |5:
426 | sub BASE, 16
427 | mov L:RB->top, BASE
428 |
429 |->vm_leave_cp:
430 | mov RA, SAVE_CFRAME // Restore previous C frame.
431 | mov L:RB->cframe, RA
432 | xor eax, eax // Ok return status for vm_pcall.
433 |
434 |->vm_leave_unw:
435 | restoreregs
436 | ret
437 |
438 |6:
439 | jb >7 // Less results wanted?
440 | // More results wanted. Check stack size and fill up results with nil.
441 | cmp BASE, L:RB->maxstack
442 | ja >8
443 | mov aword [BASE-16], LJ_TNIL
444 | add BASE, 8
445 | add RDd, 1
446 | jmp <4
447 |
448 |7: // Less results wanted.
449 | test RAd, RAd
450 | jz <5 // But check for LUA_MULTRET+1.
451 | sub RA, RD // Negative result!
452 | lea BASE, [BASE+RA*8] // Correct top.
453 | jmp <5
454 |
455 |8: // Corner case: need to grow stack for filling up results.
456 | // This can happen if:
457 | // - A C function grows the stack (a lot).
458 | // - The GC shrinks the stack in between.
459 | // - A return back from a lua_call() with (high) nresults adjustment.
460 | mov L:RB->top, BASE // Save current top held in BASE (yes).
461 | mov MULTRES, RDd // Need to fill only remainder with nil.
462 | mov CARG2d, RAd
463 | mov CARG1, L:RB
464 | call extern lj_state_growstack // (lua_State *L, int n)
465 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
466 | jmp <3
467 |
468 |->vm_unwind_yield:
469 | mov al, LUA_YIELD
470 | jmp ->vm_unwind_c_eh
471 |
472 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
473 | // (void *cframe, int errcode)
474 | mov eax, CARG2d // Error return status for vm_pcall.
475 | mov rsp, CARG1
476 |->vm_unwind_c_eh: // Landing pad for external unwinder.
477 | mov L:RB, SAVE_L
478 | mov GL:RB, L:RB->glref
479 | mov dword GL:RB->vmstate, ~LJ_VMST_C
480 | jmp ->vm_leave_unw
481 |
482 |->vm_unwind_rethrow:
483 |.if not X64WIN
484 | mov CARG1, SAVE_L
485 | mov CARG2d, eax
486 | restoreregs
487 | jmp extern lj_err_throw // (lua_State *L, int errcode)
488 |.endif
489 |
490 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
491 | // (void *cframe)
492 | and CARG1, CFRAME_RAWMASK
493 | mov rsp, CARG1
494 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
495 | mov L:RB, SAVE_L
496 | mov RDd, 1+1 // Really 1+2 results, incr. later.
497 | mov BASE, L:RB->base
498 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
499 | add DISPATCH, GG_G2DISP
500 | mov PC, [BASE-8] // Fetch PC of previous frame.
501 | mov_false RA
502 | mov RB, [BASE]
503 | mov [BASE-16], RA // Prepend false to error message.
504 | mov [BASE-8], RB
505 | mov RA, -16 // Results start at BASE+RA = BASE-16.
506 | set_vmstate INTERP
507 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
508 |
509 |//-----------------------------------------------------------------------
510 |//-- Grow stack for calls -----------------------------------------------
511 |//-----------------------------------------------------------------------
512 |
513 |->vm_growstack_c: // Grow stack for C function.
514 | mov CARG2d, LUA_MINSTACK
515 | jmp >2
516 |
517 |->vm_growstack_v: // Grow stack for vararg Lua function.
518 | sub RD, 8
519 | jmp >1
520 |
521 |->vm_growstack_f: // Grow stack for fixarg Lua function.
522 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
523 | lea RD, [BASE+NARGS:RD*8-8]
524 |1:
525 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
526 | add PC, 4 // Must point after first instruction.
527 | mov L:RB->base, BASE
528 | mov L:RB->top, RD
529 | mov SAVE_PC, PC
530 | mov CARG2, RA
531 |2:
532 | // RB = L, L->base = new base, L->top = top
533 | mov CARG1, L:RB
534 | call extern lj_state_growstack // (lua_State *L, int n)
535 | mov BASE, L:RB->base
536 | mov RD, L:RB->top
537 | mov LFUNC:RB, [BASE-16]
538 | cleartp LFUNC:RB
539 | sub RD, BASE
540 | shr RDd, 3
541 | add NARGS:RDd, 1
542 | // BASE = new base, RB = LFUNC, RD = nargs+1
543 | ins_callt // Just retry the call.
544 |
545 |//-----------------------------------------------------------------------
546 |//-- Entry points into the assembler VM ---------------------------------
547 |//-----------------------------------------------------------------------
548 |
549 |->vm_resume: // Setup C frame and resume thread.
550 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
551 | saveregs
552 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
553 | mov SAVE_L, CARG1
554 | mov RA, CARG2
555 | mov PCd, FRAME_CP
556 | xor RDd, RDd
557 | lea KBASE, [esp+CFRAME_RESUME]
558 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
559 | add DISPATCH, GG_G2DISP
560 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
561 | mov SAVE_CFRAME, RD
562 | mov SAVE_NRES, RDd
563 | mov SAVE_ERRF, RDd
564 | mov L:RB->cframe, KBASE
565 | cmp byte L:RB->status, RDL
566 | je >2 // Initial resume (like a call).
567 |
568 | // Resume after yield (like a return).
569 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
570 | set_vmstate INTERP
571 | mov byte L:RB->status, RDL
572 | mov BASE, L:RB->base
573 | mov RD, L:RB->top
574 | sub RD, RA
575 | shr RDd, 3
576 | add RDd, 1 // RD = nresults+1
577 | sub RA, BASE // RA = resultofs
578 | mov PC, [BASE-8]
579 | mov MULTRES, RDd
580 | test PCd, FRAME_TYPE
581 | jz ->BC_RET_Z
582 | jmp ->vm_return
583 |
584 |->vm_pcall: // Setup protected C frame and enter VM.
585 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
586 | saveregs
587 | mov PCd, FRAME_CP
588 | mov SAVE_ERRF, CARG4d
589 | jmp >1
590 |
591 |->vm_call: // Setup C frame and enter VM.
592 | // (lua_State *L, TValue *base, int nres1)
593 | saveregs
594 | mov PCd, FRAME_C
595 |
596 |1: // Entry point for vm_pcall above (PC = ftype).
597 | mov SAVE_NRES, CARG3d
598 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
599 | mov SAVE_L, CARG1
600 | mov RA, CARG2
601 |
602 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
603 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
604 | mov SAVE_CFRAME, KBASE
605 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
606 | add DISPATCH, GG_G2DISP
607 | mov L:RB->cframe, rsp
608 |
609 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
610 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
611 | set_vmstate INTERP
612 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
613 | add PC, RA
614 | sub PC, BASE // PC = frame delta + frame type
615 |
616 | mov RD, L:RB->top
617 | sub RD, RA
618 | shr NARGS:RDd, 3
619 | add NARGS:RDd, 1 // RD = nargs+1
620 |
621 |->vm_call_dispatch:
622 | mov LFUNC:RB, [RA-16]
623 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
624 |
625 |->vm_call_dispatch_f:
626 | mov BASE, RA
627 | ins_call
628 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
629 |
630 |->vm_cpcall: // Setup protected C frame, call C.
631 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
632 | saveregs
633 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
634 | mov SAVE_L, CARG1
635 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
636 |
637 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
638 | sub KBASE, L:RB->top
639 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
640 | mov SAVE_ERRF, 0 // No error function.
641 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
642 | add DISPATCH, GG_G2DISP
643 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
644 |
645 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
646 | mov SAVE_CFRAME, KBASE
647 | mov L:RB->cframe, rsp
648 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
649 |
650 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
651 | // TValue * (new base) or NULL returned in eax (RC).
652 | test RC, RC
653 | jz ->vm_leave_cp // No base? Just remove C frame.
654 | mov RA, RC
655 | mov PCd, FRAME_CP
656 | jmp <2 // Else continue with the call.
657 |
658 |//-----------------------------------------------------------------------
659 |//-- Metamethod handling ------------------------------------------------
660 |//-----------------------------------------------------------------------
661 |
662 |//-- Continuation dispatch ----------------------------------------------
663 |
664 |->cont_dispatch:
665 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
666 | add RA, BASE
667 | and PC, -8
668 | mov RB, BASE
669 | sub BASE, PC // Restore caller BASE.
670 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
671 | mov RC, RA // ... in [RC]
672 | mov PC, [RB-24] // Restore PC from [cont|PC].
673 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
674 |.if FFI
675 | cmp RA, 1
676 | jbe >1
677 |.endif
678 | mov LFUNC:KBASE, [BASE-16]
679 | cleartp LFUNC:KBASE
680 | mov KBASE, LFUNC:KBASE->pc
681 | mov KBASE, [KBASE+PC2PROTO(k)]
682 | // BASE = base, RC = result, RB = meta base
683 | jmp RA // Jump to continuation.
684 |
685 |.if FFI
686 |1:
687 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
688 | // cont = 0: Tail call from C function.
689 | sub RB, BASE
690 | shr RBd, 3
691 | lea RDd, [RBd-3]
692 | jmp ->vm_call_tail
693 |.endif
694 |
695 |->cont_cat: // BASE = base, RC = result, RB = mbase
696 | movzx RAd, PC_RB
697 | sub RB, 32
698 | lea RA, [BASE+RA*8]
699 | sub RA, RB
700 | je ->cont_ra
701 | neg RA
702 | shr RAd, 3
703 |.if X64WIN
704 | mov CARG3d, RAd
705 | mov L:CARG1, SAVE_L
706 | mov L:CARG1->base, BASE
707 | mov RC, [RC]
708 | mov [RB], RC
709 | mov CARG2, RB
710 |.else
711 | mov L:CARG1, SAVE_L
712 | mov L:CARG1->base, BASE
713 | mov CARG3d, RAd
714 | mov RA, [RC]
715 | mov [RB], RA
716 | mov CARG2, RB
717 |.endif
718 | jmp ->BC_CAT_Z
719 |
720 |//-- Table indexing metamethods -----------------------------------------
721 |
722 |->vmeta_tgets:
723 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
724 | mov TMP1, STR:RC
725 | lea RC, TMP1
726 | cmp PC_OP, BC_GGET
727 | jne >1
728 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
729 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
730 | mov [RB], TAB:RA
731 | jmp >2
732 |
733 |->vmeta_tgetb:
734 | movzx RCd, PC_RC
735 |.if DUALNUM
736 | setint RC
737 | mov TMP1, RC
738 |.else
739 | cvtsi2sd xmm0, RCd
740 | movsd TMP1, xmm0
741 |.endif
742 | lea RC, TMP1
743 | jmp >1
744 |
745 |->vmeta_tgetv:
746 | movzx RCd, PC_RC // Reload TValue *k from RC.
747 | lea RC, [BASE+RC*8]
748 |1:
749 | movzx RBd, PC_RB // Reload TValue *t from RB.
750 | lea RB, [BASE+RB*8]
751 |2:
752 | mov L:CARG1, SAVE_L
753 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
754 | mov CARG2, RB
755 | mov CARG3, RC
756 | mov L:RB, L:CARG1
757 | mov SAVE_PC, PC
758 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
759 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
760 | mov BASE, L:RB->base
761 | test RC, RC
762 | jz >3
763 |->cont_ra: // BASE = base, RC = result
764 | movzx RAd, PC_RA
765 | mov RB, [RC]
766 | mov [BASE+RA*8], RB
767 | ins_next
768 |
769 |3: // Call __index metamethod.
770 | // BASE = base, L->top = new base, stack = cont/func/t/k
771 | mov RA, L:RB->top
772 | mov [RA-24], PC // [cont|PC]
773 | lea PC, [RA+FRAME_CONT]
774 | sub PC, BASE
775 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
776 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
777 | cleartp LFUNC:RB
778 | jmp ->vm_call_dispatch_f
779 |
780 |->vmeta_tgetr:
781 | mov CARG1, TAB:RB
782 | mov RB, BASE // Save BASE.
783 | mov CARG2d, RCd // Caveat: CARG2 == BASE
784 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
785 | // cTValue * or NULL returned in eax (RC).
786 | movzx RAd, PC_RA
787 | mov BASE, RB // Restore BASE.
788 | test RC, RC
789 | jnz ->BC_TGETR_Z
790 | mov ITYPE, LJ_TNIL
791 | jmp ->BC_TGETR2_Z
792 |
793 |//-----------------------------------------------------------------------
794 |
795 |->vmeta_tsets:
796 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
797 | mov TMP1, STR:RC
798 | lea RC, TMP1
799 | cmp PC_OP, BC_GSET
800 | jne >1
801 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
802 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
803 | mov [RB], TAB:RA
804 | jmp >2
805 |
806 |->vmeta_tsetb:
807 | movzx RCd, PC_RC
808 |.if DUALNUM
809 | setint RC
810 | mov TMP1, RC
811 |.else
812 | cvtsi2sd xmm0, RCd
813 | movsd TMP1, xmm0
814 |.endif
815 | lea RC, TMP1
816 | jmp >1
817 |
818 |->vmeta_tsetv:
819 | movzx RCd, PC_RC // Reload TValue *k from RC.
820 | lea RC, [BASE+RC*8]
821 |1:
822 | movzx RBd, PC_RB // Reload TValue *t from RB.
823 | lea RB, [BASE+RB*8]
824 |2:
825 | mov L:CARG1, SAVE_L
826 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
827 | mov CARG2, RB
828 | mov CARG3, RC
829 | mov L:RB, L:CARG1
830 | mov SAVE_PC, PC
831 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
832 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
833 | mov BASE, L:RB->base
834 | test RC, RC
835 | jz >3
836 | // NOBARRIER: lj_meta_tset ensures the table is not black.
837 | movzx RAd, PC_RA
838 | mov RB, [BASE+RA*8]
839 | mov [RC], RB
840 |->cont_nop: // BASE = base, (RC = result)
841 | ins_next
842 |
843 |3: // Call __newindex metamethod.
844 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
845 | mov RA, L:RB->top
846 | mov [RA-24], PC // [cont|PC]
847 | movzx RCd, PC_RA
848 | // Copy value to third argument.
849 | mov RB, [BASE+RC*8]
850 | mov [RA+16], RB
851 | lea PC, [RA+FRAME_CONT]
852 | sub PC, BASE
853 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
854 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
855 | cleartp LFUNC:RB
856 | jmp ->vm_call_dispatch_f
857 |
858 |->vmeta_tsetr:
859 |.if X64WIN
860 | mov L:CARG1, SAVE_L
861 | mov CARG3d, RCd
862 | mov L:CARG1->base, BASE
863 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
864 |.else
865 | mov L:CARG1, SAVE_L
866 | mov CARG2, TAB:RB
867 | mov L:CARG1->base, BASE
868 | mov RB, BASE // Save BASE.
869 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
870 |.endif
871 | mov SAVE_PC, PC
872 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
873 | // TValue * returned in eax (RC).
874 | movzx RAd, PC_RA
875 | mov BASE, RB // Restore BASE.
876 | jmp ->BC_TSETR_Z
877 |
878 |//-- Comparison metamethods ---------------------------------------------
879 |
880 |->vmeta_comp:
881 | movzx RDd, PC_RD
882 | movzx RAd, PC_RA
883 | mov L:RB, SAVE_L
884 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
885 |.if X64WIN
886 | lea CARG3, [BASE+RD*8]
887 | lea CARG2, [BASE+RA*8]
888 |.else
889 | lea CARG2, [BASE+RA*8]
890 | lea CARG3, [BASE+RD*8]
891 |.endif
892 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
893 | movzx CARG4d, PC_OP
894 | mov SAVE_PC, PC
895 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
896 | // 0/1 or TValue * (metamethod) returned in eax (RC).
897 |3:
898 | mov BASE, L:RB->base
899 | cmp RC, 1
900 | ja ->vmeta_binop
901 |4:
902 | lea PC, [PC+4]
903 | jb >6
904 |5:
905 | movzx RDd, PC_RD
906 | branchPC RD
907 |6:
908 | ins_next
909 |
910 |->cont_condt: // BASE = base, RC = result
911 | add PC, 4
912 | mov ITYPE, [RC]
913 | sar ITYPE, 47
914 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
915 | jb <5
916 | jmp <6
917 |
918 |->cont_condf: // BASE = base, RC = result
919 | mov ITYPE, [RC]
920 | sar ITYPE, 47
921 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
922 | jmp <4
923 |
924 |->vmeta_equal:
925 | cleartp TAB:RD
926 | sub PC, 4
927 |.if X64WIN
928 | mov CARG3, RD
929 | mov CARG4d, RBd
930 | mov L:RB, SAVE_L
931 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
932 | mov CARG2, RA
933 | mov CARG1, L:RB // Caveat: CARG1 == RA.
934 |.else
935 | mov CARG2, RA
936 | mov CARG4d, RBd // Caveat: CARG4 == RA.
937 | mov L:RB, SAVE_L
938 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
939 | mov CARG3, RD
940 | mov CARG1, L:RB
941 |.endif
942 | mov SAVE_PC, PC
943 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
944 | // 0/1 or TValue * (metamethod) returned in eax (RC).
945 | jmp <3
946 |
947 |->vmeta_equal_cd:
948 |.if FFI
949 | sub PC, 4
950 | mov L:RB, SAVE_L
951 | mov L:RB->base, BASE
952 | mov CARG1, L:RB
953 | mov CARG2d, dword [PC-4]
954 | mov SAVE_PC, PC
955 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
956 | // 0/1 or TValue * (metamethod) returned in eax (RC).
957 | jmp <3
958 |.endif
959 |
960 |->vmeta_istype:
961 | mov L:RB, SAVE_L
962 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
963 | mov CARG2d, RAd
964 | mov CARG3d, RDd
965 | mov L:CARG1, L:RB
966 | mov SAVE_PC, PC
967 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
968 | mov BASE, L:RB->base
969 | jmp <6
970 |
971 |//-- Arithmetic metamethods ---------------------------------------------
972 |
973 |->vmeta_arith_vno:
974 |.if DUALNUM
975 | movzx RBd, PC_RB
976 | movzx RCd, PC_RC
977 |.endif
978 |->vmeta_arith_vn:
979 | lea RC, [KBASE+RC*8]
980 | jmp >1
981 |
982 |->vmeta_arith_nvo:
983 |.if DUALNUM
984 | movzx RBd, PC_RB
985 | movzx RCd, PC_RC
986 |.endif
987 |->vmeta_arith_nv:
988 | lea TMPR, [KBASE+RC*8]
989 | lea RC, [BASE+RB*8]
990 | mov RB, TMPR
991 | jmp >2
992 |
993 |->vmeta_unm:
994 | lea RC, [BASE+RD*8]
995 | mov RB, RC
996 | jmp >2
997 |
998 |->vmeta_arith_vvo:
999 |.if DUALNUM
1000 | movzx RBd, PC_RB
1001 | movzx RCd, PC_RC
1002 |.endif
1003 |->vmeta_arith_vv:
1004 | lea RC, [BASE+RC*8]
1005 |1:
1006 | lea RB, [BASE+RB*8]
1007 |2:
1008 | lea RA, [BASE+RA*8]
1009 |.if X64WIN
1010 | mov CARG3, RB
1011 | mov CARG4, RC
1012 | movzx RCd, PC_OP
1013 | mov ARG5d, RCd
1014 | mov L:RB, SAVE_L
1015 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1016 | mov CARG2, RA
1017 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1018 |.else
1019 | movzx CARG5d, PC_OP
1020 | mov CARG2, RA
1021 | mov CARG4, RC // Caveat: CARG4 == RA.
1022 | mov L:CARG1, SAVE_L
1023 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1024 | mov CARG3, RB
1025 | mov L:RB, L:CARG1
1026 |.endif
1027 | mov SAVE_PC, PC
1028 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1029 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1030 | mov BASE, L:RB->base
1031 | test RC, RC
1032 | jz ->cont_nop
1033 |
1034 | // Call metamethod for binary op.
1035 |->vmeta_binop:
1036 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1037 | mov RA, RC
1038 | sub RC, BASE
1039 | mov [RA-24], PC // [cont|PC]
1040 | lea PC, [RC+FRAME_CONT]
1041 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1042 | jmp ->vm_call_dispatch
1043 |
1044 |->vmeta_len:
1045 | movzx RDd, PC_RD
1046 | mov L:RB, SAVE_L
1047 | mov L:RB->base, BASE
1048 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1049 | mov L:CARG1, L:RB
1050 | mov SAVE_PC, PC
1051 | call extern lj_meta_len // (lua_State *L, TValue *o)
1052 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1053 | mov BASE, L:RB->base
1054#if LJ_52
1055 | test RC, RC
1056 | jne ->vmeta_binop // Binop call for compatibility.
1057 | movzx RDd, PC_RD
1058 | mov TAB:CARG1, [BASE+RD*8]
1059 | cleartp TAB:CARG1
1060 | jmp ->BC_LEN_Z
1061#else
1062 | jmp ->vmeta_binop // Binop call for compatibility.
1063#endif
1064 |
1065 |//-- Call metamethod ----------------------------------------------------
1066 |
1067 |->vmeta_call_ra:
1068 | lea RA, [BASE+RA*8+16]
1069 |->vmeta_call: // Resolve and call __call metamethod.
1070 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1071 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1072 | mov RB, RA
1073 |.if X64WIN
1074 | mov L:TMPR, SAVE_L
1075 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1076 | lea CARG2, [RA-16]
1077 | lea CARG3, [RA+NARGS:RD*8-8]
1078 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1079 |.else
1080 | mov L:CARG1, SAVE_L
1081 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1082 | lea CARG2, [RA-16]
1083 | lea CARG3, [RA+NARGS:RD*8-8]
1084 |.endif
1085 | mov SAVE_PC, PC
1086 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1087 | mov RA, RB
1088 | mov L:RB, SAVE_L
1089 | mov BASE, L:RB->base
1090 | mov NARGS:RDd, TMP1d
1091 | mov LFUNC:RB, [RA-16]
1092 | cleartp LFUNC:RB
1093 | add NARGS:RDd, 1
1094 | // This is fragile. L->base must not move, KBASE must always be defined.
1095 | cmp KBASE, BASE // Continue with CALLT if flag set.
1096 | je ->BC_CALLT_Z
1097 | mov BASE, RA
1098 | ins_call // Otherwise call resolved metamethod.
1099 |
1100 |//-- Argument coercion for 'for' statement ------------------------------
1101 |
1102 |->vmeta_for:
1103 | mov L:RB, SAVE_L
1104 | mov L:RB->base, BASE
1105 | mov CARG2, RA // Caveat: CARG2 == BASE
1106 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1107 | mov SAVE_PC, PC
1108 | call extern lj_meta_for // (lua_State *L, TValue *base)
1109 | mov BASE, L:RB->base
1110 | mov RCd, [PC-4]
1111 | movzx RAd, RCH
1112 | movzx OP, RCL
1113 | shr RCd, 16
1114 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1115 |
1116 |//-----------------------------------------------------------------------
1117 |//-- Fast functions -----------------------------------------------------
1118 |//-----------------------------------------------------------------------
1119 |
1120 |.macro .ffunc, name
1121 |->ff_ .. name:
1122 |.endmacro
1123 |
1124 |.macro .ffunc_1, name
1125 |->ff_ .. name:
1126 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1127 |.endmacro
1128 |
1129 |.macro .ffunc_2, name
1130 |->ff_ .. name:
1131 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1132 |.endmacro
1133 |
1134 |.macro .ffunc_n, name, op
1135 | .ffunc_1 name
1136 | checknumtp [BASE], ->fff_fallback
1137 | op xmm0, qword [BASE]
1138 |.endmacro
1139 |
1140 |.macro .ffunc_n, name
1141 | .ffunc_n name, movsd
1142 |.endmacro
1143 |
1144 |.macro .ffunc_nn, name
1145 | .ffunc_2 name
1146 | checknumtp [BASE], ->fff_fallback
1147 | checknumtp [BASE+8], ->fff_fallback
1148 | movsd xmm0, qword [BASE]
1149 | movsd xmm1, qword [BASE+8]
1150 |.endmacro
1151 |
1152 |// Inlined GC threshold check. Caveat: uses label 1.
1153 |.macro ffgccheck
1154 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1155 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1156 | jb >1
1157 | call ->fff_gcstep
1158 |1:
1159 |.endmacro
1160 |
1161 |//-- Base library: checks -----------------------------------------------
1162 |
1163 |.ffunc_1 assert
1164 | mov ITYPE, [BASE]
1165 | mov RB, ITYPE
1166 | sar ITYPE, 47
1167 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1168 | mov PC, [BASE-8]
1169 | mov MULTRES, RDd
1170 | mov RB, [BASE]
1171 | mov [BASE-16], RB
1172 | sub RDd, 2
1173 | jz >2
1174 | mov RA, BASE
1175 |1:
1176 | add RA, 8
1177 | mov RB, [RA]
1178 | mov [RA-16], RB
1179 | sub RDd, 1
1180 | jnz <1
1181 |2:
1182 | mov RDd, MULTRES
1183 | jmp ->fff_res_
1184 |
1185 |.ffunc_1 type
1186 | mov RC, [BASE]
1187 | sar RC, 47
1188 | mov RBd, LJ_TISNUM
1189 | cmp RCd, RBd
1190 | cmovb RCd, RBd
1191 | not RCd
1192 |2:
1193 | mov CFUNC:RB, [BASE-16]
1194 | cleartp CFUNC:RB
1195 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1196 | mov PC, [BASE-8]
1197 | settp STR:RC, LJ_TSTR
1198 | mov [BASE-16], STR:RC
1199 | jmp ->fff_res1
1200 |
1201 |//-- Base library: getters and setters ---------------------------------
1202 |
1203 |.ffunc_1 getmetatable
1204 | mov TAB:RB, [BASE]
1205 | mov PC, [BASE-8]
1206 | checktab TAB:RB, >6
1207 |1: // Field metatable must be at same offset for GCtab and GCudata!
1208 | mov TAB:RB, TAB:RB->metatable
1209 |2:
1210 | test TAB:RB, TAB:RB
1211 | mov aword [BASE-16], LJ_TNIL
1212 | jz ->fff_res1
1213 | settp TAB:RC, TAB:RB, LJ_TTAB
1214 | mov [BASE-16], TAB:RC // Store metatable as default result.
1215 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1216 | mov RAd, TAB:RB->hmask
1217 | and RAd, STR:RC->hash
1218 | settp STR:RC, LJ_TSTR
1219 | imul RAd, #NODE
1220 | add NODE:RA, TAB:RB->node
1221 |3: // Rearranged logic, because we expect _not_ to find the key.
1222 | cmp NODE:RA->key, STR:RC
1223 | je >5
1224 |4:
1225 | mov NODE:RA, NODE:RA->next
1226 | test NODE:RA, NODE:RA
1227 | jnz <3
1228 | jmp ->fff_res1 // Not found, keep default result.
1229 |5:
1230 | mov RB, NODE:RA->val
1231 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1232 | mov [BASE-16], RB // Return value of mt.__metatable.
1233 | jmp ->fff_res1
1234 |
1235 |6:
1236 | cmp ITYPEd, LJ_TUDATA; je <1
1237 | cmp ITYPEd, LJ_TISNUM; ja >7
1238 | mov ITYPEd, LJ_TISNUM
1239 |7:
1240 | not ITYPEd
1241 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1242 | jmp <2
1243 |
1244 |.ffunc_2 setmetatable
1245 | mov TAB:RB, [BASE]
1246 | mov TAB:TMPR, TAB:RB
1247 | checktab TAB:RB, ->fff_fallback
1248 | // Fast path: no mt for table yet and not clearing the mt.
1249 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1250 | mov TAB:RC, [BASE+8]
1251 | checktab TAB:RC, ->fff_fallback
1252 | mov TAB:RB->metatable, TAB:RC
1253 | mov PC, [BASE-8]
1254 | mov [BASE-16], TAB:TMPR // Return original table.
1255 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1256 | jz >1
1257 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1258 | barrierback TAB:RB, RC
1259 |1:
1260 | jmp ->fff_res1
1261 |
1262 |.ffunc_2 rawget
1263 |.if X64WIN
1264 | mov TAB:RA, [BASE]
1265 | checktab TAB:RA, ->fff_fallback
1266 | mov RB, BASE // Save BASE.
1267 | lea CARG3, [BASE+8]
1268 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1269 | mov CARG1, SAVE_L
1270 |.else
1271 | mov TAB:CARG2, [BASE]
1272 | checktab TAB:CARG2, ->fff_fallback
1273 | mov RB, BASE // Save BASE.
1274 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1275 | mov CARG1, SAVE_L
1276 |.endif
1277 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1278 | // cTValue * returned in eax (RD).
1279 | mov BASE, RB // Restore BASE.
1280 | // Copy table slot.
1281 | mov RB, [RD]
1282 | mov PC, [BASE-8]
1283 | mov [BASE-16], RB
1284 | jmp ->fff_res1
1285 |
1286 |//-- Base library: conversions ------------------------------------------
1287 |
1288 |.ffunc tonumber
1289 | // Only handles the number case inline (without a base argument).
1290 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1291 | mov RB, [BASE]
1292 | checknumber RB, ->fff_fallback
1293 | mov PC, [BASE-8]
1294 | mov [BASE-16], RB
1295 | jmp ->fff_res1
1296 |
1297 |.ffunc_1 tostring
1298 | // Only handles the string or number case inline.
1299 | mov PC, [BASE-8]
1300 | mov STR:RB, [BASE]
1301 | checktp_nc STR:RB, LJ_TSTR, >3
1302 | // A __tostring method in the string base metatable is ignored.
1303 |2:
1304 | mov [BASE-16], STR:RB
1305 | jmp ->fff_res1
1306 |3: // Handle numbers inline, unless a number base metatable is present.
1307 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1308 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1309 | jne ->fff_fallback
1310 | ffgccheck // Caveat: uses label 1.
1311 | mov L:RB, SAVE_L
1312 | mov L:RB->base, BASE // Add frame since C call can throw.
1313 | mov SAVE_PC, PC // Redundant (but a defined value).
1314 |.if not X64WIN
1315 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1316 |.endif
1317 | mov L:CARG1, L:RB
1318 |.if DUALNUM
1319 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1320 |.else
1321 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1322 |.endif
1323 | // GCstr returned in eax (RD).
1324 | mov BASE, L:RB->base
1325 | settp STR:RB, RD, LJ_TSTR
1326 | jmp <2
1327 |
1328 |//-- Base library: iterators -------------------------------------------
1329 |
1330 |.ffunc_1 next
1331 | je >2 // Missing 2nd arg?
1332 |1:
1333 |.if X64WIN
1334 | mov RA, [BASE]
1335 | checktab RA, ->fff_fallback
1336 |.else
1337 | mov CARG2, [BASE]
1338 | checktab CARG2, ->fff_fallback
1339 |.endif
1340 | mov L:RB, SAVE_L
1341 | mov L:RB->base, BASE // Add frame since C call can throw.
1342 | mov L:RB->top, BASE // Dummy frame length is ok.
1343 | mov PC, [BASE-8]
1344 |.if X64WIN
1345 | lea CARG3, [BASE+8]
1346 | mov CARG2, RA // Caveat: CARG2 == BASE.
1347 | mov CARG1, L:RB
1348 |.else
1349 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1350 | mov CARG1, L:RB
1351 |.endif
1352 | mov SAVE_PC, PC // Needed for ITERN fallback.
1353 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1354 | // Flag returned in eax (RD).
1355 | mov BASE, L:RB->base
1356 | test RDd, RDd; jz >3 // End of traversal?
1357 | // Copy key and value to results.
1358 | mov RB, [BASE+8]
1359 | mov RD, [BASE+16]
1360 | mov [BASE-16], RB
1361 | mov [BASE-8], RD
1362 |->fff_res2:
1363 | mov RDd, 1+2
1364 | jmp ->fff_res
1365 |2: // Set missing 2nd arg to nil.
1366 | mov aword [BASE+8], LJ_TNIL
1367 | jmp <1
1368 |3: // End of traversal: return nil.
1369 | mov aword [BASE-16], LJ_TNIL
1370 | jmp ->fff_res1
1371 |
1372 |.ffunc_1 pairs
1373 | mov TAB:RB, [BASE]
1374 | mov TMPR, TAB:RB
1375 | checktab TAB:RB, ->fff_fallback
1376#if LJ_52
1377 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1378#endif
1379 | mov CFUNC:RD, [BASE-16]
1380 | cleartp CFUNC:RD
1381 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1382 | settp CFUNC:RD, LJ_TFUNC
1383 | mov PC, [BASE-8]
1384 | mov [BASE-16], CFUNC:RD
1385 | mov [BASE-8], TMPR
1386 | mov aword [BASE], LJ_TNIL
1387 | mov RDd, 1+3
1388 | jmp ->fff_res
1389 |
1390 |.ffunc_2 ipairs_aux
1391 | mov TAB:RB, [BASE]
1392 | checktab TAB:RB, ->fff_fallback
1393 |.if DUALNUM
1394 | mov RA, [BASE+8]
1395 | checkint RA, ->fff_fallback
1396 |.else
1397 | checknumtp [BASE+8], ->fff_fallback
1398 | movsd xmm0, qword [BASE+8]
1399 |.endif
1400 | mov PC, [BASE-8]
1401 |.if DUALNUM
1402 | add RAd, 1
1403 | setint ITYPE, RA
1404 | mov [BASE-16], ITYPE
1405 |.else
1406 | sseconst_1 xmm1, TMPR
1407 | addsd xmm0, xmm1
1408 | cvttsd2si RAd, xmm0
1409 | movsd qword [BASE-16], xmm0
1410 |.endif
1411 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1412 | mov RD, TAB:RB->array
1413 | lea RD, [RD+RA*8]
1414 |1:
1415 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1416 | // Copy array slot.
1417 | mov RB, [RD]
1418 | mov [BASE-8], RB
1419 | jmp ->fff_res2
1420 |2: // Check for empty hash part first. Otherwise call C function.
1421 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1422 | mov CARG1, TAB:RB
1423 | mov RB, BASE // Save BASE.
1424 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1425 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1426 | // cTValue * or NULL returned in eax (RD).
1427 | mov BASE, RB
1428 | test RD, RD
1429 | jnz <1
1430 |->fff_res0:
1431 | mov RDd, 1+0
1432 | jmp ->fff_res
1433 |
1434 |.ffunc_1 ipairs
1435 | mov TAB:RB, [BASE]
1436 | mov TMPR, TAB:RB
1437 | checktab TAB:RB, ->fff_fallback
1438#if LJ_52
1439 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1440#endif
1441 | mov CFUNC:RD, [BASE-16]
1442 | cleartp CFUNC:RD
1443 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1444 | settp CFUNC:RD, LJ_TFUNC
1445 | mov PC, [BASE-8]
1446 | mov [BASE-16], CFUNC:RD
1447 | mov [BASE-8], TMPR
1448 |.if DUALNUM
1449 | mov64 RD, ((int64_t)LJ_TISNUM<<47)
1450 | mov [BASE], RD
1451 |.else
1452 | mov qword [BASE], 0
1453 |.endif
1454 | mov RDd, 1+3
1455 | jmp ->fff_res
1456 |
1457 |//-- Base library: catch errors ----------------------------------------
1458 |
1459 |.ffunc_1 pcall
1460 | lea RA, [BASE+16]
1461 | sub NARGS:RDd, 1
1462 | mov PCd, 16+FRAME_PCALL
1463 |1:
1464 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1465 | shr RB, HOOK_ACTIVE_SHIFT
1466 | and RB, 1
1467 | add PC, RB // Remember active hook before pcall.
1468 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1469 | mov KBASE, RD
1470 |2:
1471 | mov RB, [RA+KBASE*8-24]
1472 | mov [RA+KBASE*8-16], RB
1473 | sub KBASE, 1
1474 | ja <2
1475 | jmp ->vm_call_dispatch
1476 |
1477 |.ffunc_2 xpcall
1478 | mov LFUNC:RA, [BASE+8]
1479 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1480 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1481 | mov [BASE], LFUNC:RA
1482 | mov [BASE+8], LFUNC:RB
1483 | lea RA, [BASE+24]
1484 | sub NARGS:RDd, 2
1485 | mov PCd, 24+FRAME_PCALL
1486 | jmp <1
1487 |
1488 |//-- Coroutine library --------------------------------------------------
1489 |
1490 |.macro coroutine_resume_wrap, resume
1491 |.if resume
1492 |.ffunc_1 coroutine_resume
1493 | mov L:RB, [BASE]
1494 | cleartp L:RB
1495 |.else
1496 |.ffunc coroutine_wrap_aux
1497 | mov CFUNC:RB, [BASE-16]
1498 | cleartp CFUNC:RB
1499 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1500 | cleartp L:RB
1501 |.endif
1502 | mov PC, [BASE-8]
1503 | mov SAVE_PC, PC
1504 | mov TMP1, L:RB
1505 |.if resume
1506 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1507 |.endif
1508 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1509 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1510 | mov RA, L:RB->top
1511 | je >1 // Status != LUA_YIELD (i.e. 0)?
1512 | cmp RA, L:RB->base // Check for presence of initial func.
1513 | je ->fff_fallback
1514 | mov PC, [RA-8] // Move initial function up.
1515 | mov [RA], PC
1516 | add RA, 8
1517 |1:
1518 |.if resume
1519 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1520 |.else
1521 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1522 |.endif
1523 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1524 | mov L:RB->top, PC
1525 |
1526 | mov L:RB, SAVE_L
1527 | mov L:RB->base, BASE
1528 |.if resume
1529 | add BASE, 8 // Keep resumed thread in stack for GC.
1530 |.endif
1531 | mov L:RB->top, BASE
1532 |.if resume
1533 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1534 |.else
1535 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1536 |.endif
1537 | sub RB, PC // Relative to PC.
1538 |
1539 | cmp PC, RA
1540 | je >3
1541 |2: // Move args to coroutine.
1542 | mov RC, [PC+RB]
1543 | mov [PC-8], RC
1544 | sub PC, 8
1545 | cmp PC, RA
1546 | jne <2
1547 |3:
1548 | mov CARG2, RA
1549 | mov CARG1, TMP1
1550 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1551 |
1552 | mov L:RB, SAVE_L
1553 | mov L:PC, TMP1
1554 | mov BASE, L:RB->base
1555 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1556 | set_vmstate INTERP
1557 |
1558 | cmp eax, LUA_YIELD
1559 | ja >8
1560 |4:
1561 | mov RA, L:PC->base
1562 | mov KBASE, L:PC->top
1563 | mov L:PC->top, RA // Clear coroutine stack.
1564 | mov PC, KBASE
1565 | sub PC, RA
1566 | je >6 // No results?
1567 | lea RD, [BASE+PC]
1568 | shr PCd, 3
1569 | cmp RD, L:RB->maxstack
1570 | ja >9 // Need to grow stack?
1571 |
1572 | mov RB, BASE
1573 | sub RB, RA
1574 |5: // Move results from coroutine.
1575 | mov RD, [RA]
1576 | mov [RA+RB], RD
1577 | add RA, 8
1578 | cmp RA, KBASE
1579 | jne <5
1580 |6:
1581 |.if resume
1582 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1583 | mov_true ITYPE // Prepend true to results.
1584 | mov [BASE-8], ITYPE
1585 |.else
1586 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1587 |.endif
1588 |7:
1589 | mov PC, SAVE_PC
1590 | mov MULTRES, RDd
1591 |.if resume
1592 | mov RA, -8
1593 |.else
1594 | xor RAd, RAd
1595 |.endif
1596 | test PCd, FRAME_TYPE
1597 | jz ->BC_RET_Z
1598 | jmp ->vm_return
1599 |
1600 |8: // Coroutine returned with error (at co->top-1).
1601 |.if resume
1602 | mov_false ITYPE // Prepend false to results.
1603 | mov [BASE-8], ITYPE
1604 | mov RA, L:PC->top
1605 | sub RA, 8
1606 | mov L:PC->top, RA // Clear error from coroutine stack.
1607 | // Copy error message.
1608 | mov RD, [RA]
1609 | mov [BASE], RD
1610 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1611 | jmp <7
1612 |.else
1613 | mov CARG2, L:PC
1614 | mov CARG1, L:RB
1615 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1616 | // Error function does not return.
1617 |.endif
1618 |
1619 |9: // Handle stack expansion on return from yield.
1620 | mov L:RA, TMP1
1621 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1622 | mov CARG2, PC
1623 | mov CARG1, L:RB
1624 | call extern lj_state_growstack // (lua_State *L, int n)
1625 | mov L:PC, TMP1
1626 | mov BASE, L:RB->base
1627 | jmp <4 // Retry the stack move.
1628 |.endmacro
1629 |
1630 | coroutine_resume_wrap 1 // coroutine.resume
1631 | coroutine_resume_wrap 0 // coroutine.wrap
1632 |
1633 |.ffunc coroutine_yield
1634 | mov L:RB, SAVE_L
1635 | test aword L:RB->cframe, CFRAME_RESUME
1636 | jz ->fff_fallback
1637 | mov L:RB->base, BASE
1638 | lea RD, [BASE+NARGS:RD*8-8]
1639 | mov L:RB->top, RD
1640 | xor RDd, RDd
1641 | mov aword L:RB->cframe, RD
1642 | mov al, LUA_YIELD
1643 | mov byte L:RB->status, al
1644 | jmp ->vm_leave_unw
1645 |
1646 |//-- Math library -------------------------------------------------------
1647 |
1648 | .ffunc_1 math_abs
1649 | mov RB, [BASE]
1650 |.if DUALNUM
1651 | checkint RB, >3
1652 | cmp RBd, 0; jns ->fff_resi
1653 | neg RBd; js >2
1654 |->fff_resbit:
1655 |->fff_resi:
1656 | setint RB
1657 |->fff_resRB:
1658 | mov PC, [BASE-8]
1659 | mov [BASE-16], RB
1660 | jmp ->fff_res1
1661 |2:
1662 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1663 | jmp ->fff_resRB
1664 |3:
1665 | ja ->fff_fallback
1666 |.else
1667 | checknum RB, ->fff_fallback
1668 |.endif
1669 | shl RB, 1
1670 | shr RB, 1
1671 | mov PC, [BASE-8]
1672 | mov [BASE-16], RB
1673 | jmp ->fff_res1
1674 |
1675 |.ffunc_n math_sqrt, sqrtsd
1676 |->fff_resxmm0:
1677 | mov PC, [BASE-8]
1678 | movsd qword [BASE-16], xmm0
1679 | // fallthrough
1680 |
1681 |->fff_res1:
1682 | mov RDd, 1+1
1683 |->fff_res:
1684 | mov MULTRES, RDd
1685 |->fff_res_:
1686 | test PCd, FRAME_TYPE
1687 | jnz >7
1688 |5:
1689 | cmp PC_RB, RDL // More results expected?
1690 | ja >6
1691 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1692 | movzx RAd, PC_RA
1693 | neg RA
1694 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1695 | ins_next
1696 |
1697 |6: // Fill up results with nil.
1698 | mov aword [BASE+RD*8-24], LJ_TNIL
1699 | add RD, 1
1700 | jmp <5
1701 |
1702 |7: // Non-standard return case.
1703 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1704 | jmp ->vm_return
1705 |
1706 |.macro math_round, func
1707 | .ffunc math_ .. func
1708 |.if DUALNUM
1709 | mov RB, [BASE]
1710 | checknumx RB, ->fff_resRB, je
1711 | ja ->fff_fallback
1712 |.else
1713 | checknumtp [BASE], ->fff_fallback
1714 |.endif
1715 | movsd xmm0, qword [BASE]
1716 | call ->vm_ .. func .. _sse
1717 |.if DUALNUM
1718 | cvttsd2si RBd, xmm0
1719 | cmp RBd, 0x80000000
1720 | jne ->fff_resi
1721 | cvtsi2sd xmm1, RBd
1722 | ucomisd xmm0, xmm1
1723 | jp ->fff_resxmm0
1724 | je ->fff_resi
1725 |.endif
1726 | jmp ->fff_resxmm0
1727 |.endmacro
1728 |
1729 | math_round floor
1730 | math_round ceil
1731 |
1732 |.ffunc math_log
1733 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1734 | checknumtp [BASE], ->fff_fallback
1735 | movsd xmm0, qword [BASE]
1736 | mov RB, BASE
1737 | call extern log
1738 | mov BASE, RB
1739 | jmp ->fff_resxmm0
1740 |
1741 |.macro math_extern, func
1742 | .ffunc_n math_ .. func
1743 | mov RB, BASE
1744 | call extern func
1745 | mov BASE, RB
1746 | jmp ->fff_resxmm0
1747 |.endmacro
1748 |
1749 |.macro math_extern2, func
1750 | .ffunc_nn math_ .. func
1751 | mov RB, BASE
1752 | call extern func
1753 | mov BASE, RB
1754 | jmp ->fff_resxmm0
1755 |.endmacro
1756 |
1757 | math_extern log10
1758 | math_extern exp
1759 | math_extern sin
1760 | math_extern cos
1761 | math_extern tan
1762 | math_extern asin
1763 | math_extern acos
1764 | math_extern atan
1765 | math_extern sinh
1766 | math_extern cosh
1767 | math_extern tanh
1768 | math_extern2 pow
1769 | math_extern2 atan2
1770 | math_extern2 fmod
1771 |
1772 |.ffunc_2 math_ldexp
1773 | checknumtp [BASE], ->fff_fallback
1774 | checknumtp [BASE+8], ->fff_fallback
1775 | fld qword [BASE+8]
1776 | fld qword [BASE]
1777 | fscale
1778 | fpop1
1779 | mov PC, [BASE-8]
1780 | fstp qword [BASE-16]
1781 | jmp ->fff_res1
1782 |
1783 |.ffunc_n math_frexp
1784 | lea CARG1, TMP1
1785 | mov RB, BASE
1786 | call extern frexp
1787 | mov BASE, RB
1788 | mov RBd, TMP1d
1789 | mov PC, [BASE-8]
1790 | movsd qword [BASE-16], xmm0
1791 |.if DUALNUM
1792 | setint RB
1793 | mov [BASE-8], RB
1794 |.else
1795 | cvtsi2sd xmm1, RBd
1796 | movsd qword [BASE-8], xmm1
1797 |.endif
1798 | mov RDd, 1+2
1799 | jmp ->fff_res
1800 |
1801 |.ffunc_n math_modf
1802 | lea CARG1, [BASE-16]
1803 | mov PC, [BASE-8]
1804 | mov RB, BASE
1805 | call extern modf
1806 | mov BASE, RB
1807 | mov PC, [BASE-8]
1808 | movsd qword [BASE-8], xmm0
1809 | mov RDd, 1+2
1810 | jmp ->fff_res
1811 |
1812 |.macro math_minmax, name, cmovop, sseop
1813 | .ffunc name
1814 | mov RAd, 2
1815 |.if DUALNUM
1816 | mov RB, [BASE]
1817 | checkint RB, >4
1818 |1: // Handle integers.
1819 | cmp RAd, RDd; jae ->fff_resRB
1820 | mov TMPR, [BASE+RA*8-8]
1821 | checkint TMPR, >3
1822 | cmp RBd, TMPRd
1823 | cmovop RB, TMPR
1824 | add RAd, 1
1825 | jmp <1
1826 |3:
1827 | ja ->fff_fallback
1828 | // Convert intermediate result to number and continue below.
1829 | cvtsi2sd xmm0, RBd
1830 | jmp >6
1831 |4:
1832 | ja ->fff_fallback
1833 |.else
1834 | checknumtp [BASE], ->fff_fallback
1835 |.endif
1836 |
1837 | movsd xmm0, qword [BASE]
1838 |5: // Handle numbers or integers.
1839 | cmp RAd, RDd; jae ->fff_resxmm0
1840 |.if DUALNUM
1841 | mov RB, [BASE+RA*8-8]
1842 | checknumx RB, >6, jb
1843 | ja ->fff_fallback
1844 | cvtsi2sd xmm1, RBd
1845 | jmp >7
1846 |.else
1847 | checknumtp [BASE+RA*8-8], ->fff_fallback
1848 |.endif
1849 |6:
1850 | movsd xmm1, qword [BASE+RA*8-8]
1851 |7:
1852 | sseop xmm0, xmm1
1853 | add RAd, 1
1854 | jmp <5
1855 |.endmacro
1856 |
1857 | math_minmax math_min, cmovg, minsd
1858 | math_minmax math_max, cmovl, maxsd
1859 |
1860 |//-- String library -----------------------------------------------------
1861 |
1862 |.ffunc string_byte // Only handle the 1-arg case here.
1863 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1864 | mov STR:RB, [BASE]
1865 | checkstr STR:RB, ->fff_fallback
1866 | mov PC, [BASE-8]
1867 | cmp dword STR:RB->len, 1
1868 | jb ->fff_res0 // Return no results for empty string.
1869 | movzx RBd, byte STR:RB[1]
1870 |.if DUALNUM
1871 | jmp ->fff_resi
1872 |.else
1873 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1874 |.endif
1875 |
1876 |.ffunc string_char // Only handle the 1-arg case here.
1877 | ffgccheck
1878 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1879 |.if DUALNUM
1880 | mov RB, [BASE]
1881 | checkint RB, ->fff_fallback
1882 |.else
1883 | checknumtp [BASE], ->fff_fallback
1884 | cvttsd2si RBd, qword [BASE]
1885 |.endif
1886 | cmp RBd, 255; ja ->fff_fallback
1887 | mov TMP1d, RBd
1888 | mov TMPRd, 1
1889 | lea RD, TMP1 // Points to stack. Little-endian.
1890 |->fff_newstr:
1891 | mov L:RB, SAVE_L
1892 | mov L:RB->base, BASE
1893 | mov CARG3d, TMPRd // Zero-extended to size_t.
1894 | mov CARG2, RD
1895 | mov CARG1, L:RB
1896 | mov SAVE_PC, PC
1897 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1898 |->fff_resstr:
1899 | // GCstr * returned in eax (RD).
1900 | mov BASE, L:RB->base
1901 | mov PC, [BASE-8]
1902 | settp STR:RD, LJ_TSTR
1903 | mov [BASE-16], STR:RD
1904 | jmp ->fff_res1
1905 |
1906 |.ffunc string_sub
1907 | ffgccheck
1908 | mov TMPRd, -1
1909 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1910 | jna >1
1911 |.if DUALNUM
1912 | mov TMPR, [BASE+16]
1913 | checkint TMPR, ->fff_fallback
1914 |.else
1915 | checknumtp [BASE+16], ->fff_fallback
1916 | cvttsd2si TMPRd, qword [BASE+16]
1917 |.endif
1918 |1:
1919 | mov STR:RB, [BASE]
1920 | checkstr STR:RB, ->fff_fallback
1921 |.if DUALNUM
1922 | mov ITYPE, [BASE+8]
1923 | mov RAd, ITYPEd // Must clear hiword for lea below.
1924 | sar ITYPE, 47
1925 | cmp ITYPEd, LJ_TISNUM
1926 | jne ->fff_fallback
1927 |.else
1928 | checknumtp [BASE+8], ->fff_fallback
1929 | cvttsd2si RAd, qword [BASE+8]
1930 |.endif
1931 | mov RCd, STR:RB->len
1932 | cmp RCd, TMPRd // len < end? (unsigned compare)
1933 | jb >5
1934 |2:
1935 | test RAd, RAd // start <= 0?
1936 | jle >7
1937 |3:
1938 | sub TMPRd, RAd // start > end?
1939 | jl ->fff_emptystr
1940 | lea RD, [STR:RB+RAd+#STR-1]
1941 | add TMPRd, 1
1942 |4:
1943 | jmp ->fff_newstr
1944 |
1945 |5: // Negative end or overflow.
1946 | jl >6
1947 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1948 | jmp <2
1949 |6: // Overflow.
1950 | mov TMPRd, RCd // end = len
1951 | jmp <2
1952 |
1953 |7: // Negative start or underflow.
1954 | je >8
1955 | add RAd, RCd // start = start+(len+1)
1956 | add RAd, 1
1957 | jg <3 // start > 0?
1958 |8: // Underflow.
1959 | mov RAd, 1 // start = 1
1960 | jmp <3
1961 |
1962 |->fff_emptystr: // Range underflow.
1963 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1964 | jmp <4
1965 |
1966 |.macro ffstring_op, name
1967 | .ffunc_1 string_ .. name
1968 | ffgccheck
1969 |.if X64WIN
1970 | mov STR:TMPR, [BASE]
1971 | checkstr STR:TMPR, ->fff_fallback
1972 |.else
1973 | mov STR:CARG2, [BASE]
1974 | checkstr STR:CARG2, ->fff_fallback
1975 |.endif
1976 | mov L:RB, SAVE_L
1977 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
1978 | mov L:RB->base, BASE
1979 |.if X64WIN
1980 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
1981 |.endif
1982 | mov RC, SBUF:CARG1->b
1983 | mov SBUF:CARG1->L, L:RB
1984 | mov SBUF:CARG1->p, RC
1985 | mov SAVE_PC, PC
1986 | call extern lj_buf_putstr_ .. name
1987 | mov CARG1, rax
1988 | call extern lj_buf_tostr
1989 | jmp ->fff_resstr
1990 |.endmacro
1991 |
1992 |ffstring_op reverse
1993 |ffstring_op lower
1994 |ffstring_op upper
1995 |
1996 |//-- Bit library --------------------------------------------------------
1997 |
1998 |.macro .ffunc_bit, name, kind, fdef
1999 | fdef name
2000 |.if kind == 2
2001 | sseconst_tobit xmm1, RB
2002 |.endif
2003 |.if DUALNUM
2004 | mov RB, [BASE]
2005 | checkint RB, >1
2006 |.if kind > 0
2007 | jmp >2
2008 |.else
2009 | jmp ->fff_resbit
2010 |.endif
2011 |1:
2012 | ja ->fff_fallback
2013 | movd xmm0, RB
2014 |.else
2015 | checknumtp [BASE], ->fff_fallback
2016 | movsd xmm0, qword [BASE]
2017 |.endif
2018 |.if kind < 2
2019 | sseconst_tobit xmm1, RB
2020 |.endif
2021 | addsd xmm0, xmm1
2022 | movd RBd, xmm0
2023 |2:
2024 |.endmacro
2025 |
2026 |.macro .ffunc_bit, name, kind
2027 | .ffunc_bit name, kind, .ffunc_1
2028 |.endmacro
2029 |
2030 |.ffunc_bit bit_tobit, 0
2031 | jmp ->fff_resbit
2032 |
2033 |.macro .ffunc_bit_op, name, ins
2034 | .ffunc_bit name, 2
2035 | mov TMPRd, NARGS:RDd // Save for fallback.
2036 | lea RD, [BASE+NARGS:RD*8-16]
2037 |1:
2038 | cmp RD, BASE
2039 | jbe ->fff_resbit
2040 |.if DUALNUM
2041 | mov RA, [RD]
2042 | checkint RA, >2
2043 | ins RBd, RAd
2044 | sub RD, 8
2045 | jmp <1
2046 |2:
2047 | ja ->fff_fallback_bit_op
2048 | movd xmm0, RA
2049 |.else
2050 | checknumtp [RD], ->fff_fallback_bit_op
2051 | movsd xmm0, qword [RD]
2052 |.endif
2053 | addsd xmm0, xmm1
2054 | movd RAd, xmm0
2055 | ins RBd, RAd
2056 | sub RD, 8
2057 | jmp <1
2058 |.endmacro
2059 |
2060 |.ffunc_bit_op bit_band, and
2061 |.ffunc_bit_op bit_bor, or
2062 |.ffunc_bit_op bit_bxor, xor
2063 |
2064 |.ffunc_bit bit_bswap, 1
2065 | bswap RBd
2066 | jmp ->fff_resbit
2067 |
2068 |.ffunc_bit bit_bnot, 1
2069 | not RBd
2070 |.if DUALNUM
2071 | jmp ->fff_resbit
2072 |.else
2073 |->fff_resbit:
2074 | cvtsi2sd xmm0, RBd
2075 | jmp ->fff_resxmm0
2076 |.endif
2077 |
2078 |->fff_fallback_bit_op:
2079 | mov NARGS:RDd, TMPRd // Restore for fallback
2080 | jmp ->fff_fallback
2081 |
2082 |.macro .ffunc_bit_sh, name, ins
2083 |.if DUALNUM
2084 | .ffunc_bit name, 1, .ffunc_2
2085 | // Note: no inline conversion from number for 2nd argument!
2086 | mov RA, [BASE+8]
2087 | checkint RA, ->fff_fallback
2088 |.else
2089 | .ffunc_nn name
2090 | sseconst_tobit xmm2, RB
2091 | addsd xmm0, xmm2
2092 | addsd xmm1, xmm2
2093 | movd RBd, xmm0
2094 | movd RAd, xmm1
2095 |.endif
2096 | ins RBd, cl // Assumes RA is ecx.
2097 | jmp ->fff_resbit
2098 |.endmacro
2099 |
2100 |.ffunc_bit_sh bit_lshift, shl
2101 |.ffunc_bit_sh bit_rshift, shr
2102 |.ffunc_bit_sh bit_arshift, sar
2103 |.ffunc_bit_sh bit_rol, rol
2104 |.ffunc_bit_sh bit_ror, ror
2105 |
2106 |//-----------------------------------------------------------------------
2107 |
2108 |->fff_fallback_2:
2109 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2110 | jmp ->fff_fallback
2111 |->fff_fallback_1:
2112 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2113 |->fff_fallback: // Call fast function fallback handler.
2114 | // BASE = new base, RD = nargs+1
2115 | mov L:RB, SAVE_L
2116 | mov PC, [BASE-8] // Fallback may overwrite PC.
2117 | mov SAVE_PC, PC // Redundant (but a defined value).
2118 | mov L:RB->base, BASE
2119 | lea RD, [BASE+NARGS:RD*8-8]
2120 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2121 | mov L:RB->top, RD
2122 | mov CFUNC:RD, [BASE-16]
2123 | cleartp CFUNC:RD
2124 | cmp RA, L:RB->maxstack
2125 | ja >5 // Need to grow stack.
2126 | mov CARG1, L:RB
2127 | call aword CFUNC:RD->f // (lua_State *L)
2128 | mov BASE, L:RB->base
2129 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2130 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2131 |1:
2132 | mov RA, L:RB->top
2133 | sub RA, BASE
2134 | shr RAd, 3
2135 | test RDd, RDd
2136 | lea NARGS:RDd, [RAd+1]
2137 | mov LFUNC:RB, [BASE-16]
2138 | jne ->vm_call_tail // Returned -1?
2139 | cleartp LFUNC:RB
2140 | ins_callt // Returned 0: retry fast path.
2141 |
2142 |// Reconstruct previous base for vmeta_call during tailcall.
2143 |->vm_call_tail:
2144 | mov RA, BASE
2145 | test PCd, FRAME_TYPE
2146 | jnz >3
2147 | movzx RBd, PC_RA
2148 | neg RB
2149 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2150 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2151 |3:
2152 | mov RB, PC
2153 | and RB, -8
2154 | sub BASE, RB
2155 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2156 |
2157 |5: // Grow stack for fallback handler.
2158 | mov CARG2d, LUA_MINSTACK
2159 | mov CARG1, L:RB
2160 | call extern lj_state_growstack // (lua_State *L, int n)
2161 | mov BASE, L:RB->base
2162 | xor RDd, RDd // Simulate a return 0.
2163 | jmp <1 // Dumb retry (goes through ff first).
2164 |
2165 |->fff_gcstep: // Call GC step function.
2166 | // BASE = new base, RD = nargs+1
2167 | pop RB // Must keep stack at same level.
2168 | mov TMP1, RB // Save return address
2169 | mov L:RB, SAVE_L
2170 | mov SAVE_PC, PC // Redundant (but a defined value).
2171 | mov L:RB->base, BASE
2172 | lea RD, [BASE+NARGS:RD*8-8]
2173 | mov CARG1, L:RB
2174 | mov L:RB->top, RD
2175 | call extern lj_gc_step // (lua_State *L)
2176 | mov BASE, L:RB->base
2177 | mov RD, L:RB->top
2178 | sub RD, BASE
2179 | shr RDd, 3
2180 | add NARGS:RDd, 1
2181 | mov RB, TMP1
2182 | push RB // Restore return address.
2183 | ret
2184 |
2185 |//-----------------------------------------------------------------------
2186 |//-- Special dispatch targets -------------------------------------------
2187 |//-----------------------------------------------------------------------
2188 |
2189 |->vm_record: // Dispatch target for recording phase.
2190 |.if JIT
2191 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2192 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2193 | jnz >5
2194 | // Decrement the hookcount for consistency, but always do the call.
2195 | test RDL, HOOK_ACTIVE
2196 | jnz >1
2197 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2198 | jz >1
2199 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2200 | jmp >1
2201 |.endif
2202 |
2203 |->vm_rethook: // Dispatch target for return hooks.
2204 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2205 | test RDL, HOOK_ACTIVE // Hook already active?
2206 | jnz >5
2207 | jmp >1
2208 |
2209 |->vm_inshook: // Dispatch target for instr/line hooks.
2210 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2211 | test RDL, HOOK_ACTIVE // Hook already active?
2212 | jnz >5
2213 |
2214 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2215 | jz >5
2216 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2217 | jz >1
2218 | test RDL, LUA_MASKLINE
2219 | jz >5
2220 |1:
2221 | mov L:RB, SAVE_L
2222 | mov L:RB->base, BASE
2223 | mov CARG2, PC // Caveat: CARG2 == BASE
2224 | mov CARG1, L:RB
2225 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2226 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2227 |3:
2228 | mov BASE, L:RB->base
2229 |4:
2230 | movzx RAd, PC_RA
2231 |5:
2232 | movzx OP, PC_OP
2233 | movzx RDd, PC_RD
2234 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2235 |
2236 |->cont_hook: // Continue from hook yield.
2237 | add PC, 4
2238 | mov RA, [RB-40]
2239 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2240 | jmp <4
2241 |
2242 |->vm_hotloop: // Hot loop counter underflow.
2243 |.if JIT
2244 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2245 | cleartp LFUNC:RB
2246 | mov RB, LFUNC:RB->pc
2247 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2248 | lea RD, [BASE+RD*8]
2249 | mov L:RB, SAVE_L
2250 | mov L:RB->base, BASE
2251 | mov L:RB->top, RD
2252 | mov CARG2, PC
2253 | lea CARG1, [DISPATCH+GG_DISP2J]
2254 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2255 | mov SAVE_PC, PC
2256 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2257 | jmp <3
2258 |.endif
2259 |
2260 |->vm_callhook: // Dispatch target for call hooks.
2261 | mov SAVE_PC, PC
2262 |.if JIT
2263 | jmp >1
2264 |.endif
2265 |
2266 |->vm_hotcall: // Hot call counter underflow.
2267 |.if JIT
2268 | mov SAVE_PC, PC
2269 | or PC, 1 // Marker for hot call.
2270 |1:
2271 |.endif
2272 | lea RD, [BASE+NARGS:RD*8-8]
2273 | mov L:RB, SAVE_L
2274 | mov L:RB->base, BASE
2275 | mov L:RB->top, RD
2276 | mov CARG2, PC
2277 | mov CARG1, L:RB
2278 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2279 | // ASMFunction returned in eax/rax (RD).
2280 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2281 |.if JIT
2282 | and PC, -2
2283 |.endif
2284 | mov BASE, L:RB->base
2285 | mov RA, RD
2286 | mov RD, L:RB->top
2287 | sub RD, BASE
2288 | mov RB, RA
2289 | movzx RAd, PC_RA
2290 | shr RDd, 3
2291 | add NARGS:RDd, 1
2292 | jmp RB
2293 |
2294 |->cont_stitch: // Trace stitching.
2295 |.if JIT
2296 | // BASE = base, RC = result, RB = mbase
2297 | mov ITYPEd, [RB-24] // Save previous trace number.
2298 | mov TMPRd, MULTRES
2299 | movzx RAd, PC_RA
2300 | lea RA, [BASE+RA*8] // Call base.
2301 | sub TMPRd, 1
2302 | jz >2
2303 |1: // Move results down.
2304 | mov RB, [RC]
2305 | mov [RA], RB
2306 | add RC, 8
2307 | add RA, 8
2308 | sub TMPRd, 1
2309 | jnz <1
2310 |2:
2311 | movzx RCd, PC_RA
2312 | movzx RBd, PC_RB
2313 | add RC, RB
2314 | lea RC, [BASE+RC*8-8]
2315 |3:
2316 | cmp RC, RA
2317 | ja >9 // More results wanted?
2318 |
2319 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2320 | mov TRACE:RD, [RA+ITYPE*8]
2321 | test TRACE:RD, TRACE:RD
2322 | jz ->cont_nop
2323 | movzx RDd, word TRACE:RD->link
2324 | cmp RDd, RBd
2325 | je ->cont_nop // Blacklisted.
2326 | test RDd, RDd
2327 | jne =>BC_JLOOP // Jump to stitched trace.
2328 |
2329 | // Stitch a new trace to the previous trace.
2330 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2331 | mov L:RB, SAVE_L
2332 | mov L:RB->base, BASE
2333 | mov CARG2, PC
2334 | lea CARG1, [DISPATCH+GG_DISP2J]
2335 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2336 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2337 | mov BASE, L:RB->base
2338 | jmp ->cont_nop
2339 |
2340 |9: // Fill up results with nil.
2341 | mov aword [RA], LJ_TNIL
2342 | add RA, 8
2343 | jmp <3
2344 |.endif
2345 |
2346 |->vm_profhook: // Dispatch target for profiler hook.
2347#if LJ_HASPROFILE
2348 | mov L:RB, SAVE_L
2349 | mov L:RB->base, BASE
2350 | mov CARG2, PC // Caveat: CARG2 == BASE
2351 | mov CARG1, L:RB
2352 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2353 | mov BASE, L:RB->base
2354 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2355 | sub PC, 4
2356 | jmp ->cont_nop
2357#endif
2358 |
2359 |//-----------------------------------------------------------------------
2360 |//-- Trace exit handler -------------------------------------------------
2361 |//-----------------------------------------------------------------------
2362 |
2363 |// Called from an exit stub with the exit number on the stack.
2364 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2365 |->vm_exit_handler:
2366 |.if JIT
2367 | push r13; push r12
2368 | push r11; push r10; push r9; push r8
2369 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2370 | push rbx; push rdx; push rcx; push rax
2371 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2372 | mov RCH, byte [rbp-16]
2373 | mov [rbp-8], r15; mov [rbp-16], r14
2374 | // Caveat: DISPATCH is rbx.
2375 | mov DISPATCH, [ebp]
2376 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2377 | set_vmstate EXIT
2378 | mov [DISPATCH+DISPATCH_J(exitno)], RC
2379 | mov [DISPATCH+DISPATCH_J(parent)], RA
2380 |.if X64WIN
2381 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2382 |.else
2383 | sub rsp, 16*8 // Room for SSE regs.
2384 |.endif
2385 | add rbp, -128
2386 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2387 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2388 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2389 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2390 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2391 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2392 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2393 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2394 | // Caveat: RB is rbp.
2395 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2396 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2397 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2398 | mov L:RB->base, BASE
2399 |.if X64WIN
2400 | lea CARG2, [rsp+4*8]
2401 |.else
2402 | mov CARG2, rsp
2403 |.endif
2404 | lea CARG1, [DISPATCH+GG_DISP2J]
2405 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2406 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2407 | // MULTRES or negated error code returned in eax (RD).
2408 | mov RA, L:RB->cframe
2409 | and RA, CFRAME_RAWMASK
2410 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2411 | mov BASE, L:RB->base
2412 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2413 | jmp >1
2414 |.endif
2415 |->vm_exit_interp:
2416 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2417 |.if JIT
2418 | // Restore additional callee-save registers only used in compiled code.
2419 |.if X64WIN
2420 | lea RA, [rsp+10*16+4*8]
2421 |1:
2422 | movdqa xmm15, [RA-10*16]
2423 | movdqa xmm14, [RA-9*16]
2424 | movdqa xmm13, [RA-8*16]
2425 | movdqa xmm12, [RA-7*16]
2426 | movdqa xmm11, [RA-6*16]
2427 | movdqa xmm10, [RA-5*16]
2428 | movdqa xmm9, [RA-4*16]
2429 | movdqa xmm8, [RA-3*16]
2430 | movdqa xmm7, [RA-2*16]
2431 | mov rsp, RA // Reposition stack to C frame.
2432 | movdqa xmm6, [RA-1*16]
2433 | mov r15, CSAVE_1
2434 | mov r14, CSAVE_2
2435 | mov r13, CSAVE_3
2436 | mov r12, CSAVE_4
2437 |.else
2438 | lea RA, [rsp+16]
2439 |1:
2440 | mov r13, [RA-8]
2441 | mov r12, [RA]
2442 | mov rsp, RA // Reposition stack to C frame.
2443 |.endif
2444 | test RDd, RDd; js >9 // Check for error from exit.
2445 | mov L:RB, SAVE_L
2446 | mov MULTRES, RDd
2447 | mov LFUNC:KBASE, [BASE-16]
2448 | cleartp LFUNC:KBASE
2449 | mov KBASE, LFUNC:KBASE->pc
2450 | mov KBASE, [KBASE+PC2PROTO(k)]
2451 | mov L:RB->base, BASE
2452 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2453 | set_vmstate INTERP
2454 | // Modified copy of ins_next which handles function header dispatch, too.
2455 | mov RCd, [PC]
2456 | movzx RAd, RCH
2457 | movzx OP, RCL
2458 | add PC, 4
2459 | shr RCd, 16
2460 | cmp OP, BC_FUNCF // Function header?
2461 | jb >3
2462 | cmp OP, BC_FUNCC+2 // Fast function?
2463 | jae >4
2464 |2:
2465 | mov RCd, MULTRES // RC/RD holds nres+1.
2466 |3:
2467 | jmp aword [DISPATCH+OP*8]
2468 |
2469 |4: // Check frame below fast function.
2470 | mov RC, [BASE-8]
2471 | test RCd, FRAME_TYPE
2472 | jnz <2 // Trace stitching continuation?
2473 | // Otherwise set KBASE for Lua function below fast function.
2474 | movzx RCd, byte [RC-3]
2475 | neg RC
2476 | mov LFUNC:KBASE, [BASE+RC*8-24]
2477 | cleartp LFUNC:KBASE
2478 | mov KBASE, LFUNC:KBASE->pc
2479 | mov KBASE, [KBASE+PC2PROTO(k)]
2480 | jmp <2
2481 |
2482 |9: // Rethrow error from the right C frame.
2483 | neg RD
2484 | mov CARG1, L:RB
2485 | mov CARG2, RD
2486 | call extern lj_err_throw // (lua_State *L, int errcode)
2487 |.endif
2488 |
2489 |//-----------------------------------------------------------------------
2490 |//-- Math helper functions ----------------------------------------------
2491 |//-----------------------------------------------------------------------
2492 |
2493 |// FP value rounding. Called by math.floor/math.ceil fast functions
2494 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2495 |.macro vm_round, name, mode, cond
2496 |->name:
2497 |->name .. _sse:
2498 | sseconst_abs xmm2, RD
2499 | sseconst_2p52 xmm3, RD
2500 | movaps xmm1, xmm0
2501 | andpd xmm1, xmm2 // |x|
2502 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2503 | jbe >1
2504 | andnpd xmm2, xmm0 // Isolate sign bit.
2505 |.if mode == 2 // trunc(x)?
2506 | movaps xmm0, xmm1
2507 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2508 | subsd xmm1, xmm3
2509 | sseconst_1 xmm3, RD
2510 | cmpsd xmm0, xmm1, 1 // |x| < result?
2511 | andpd xmm0, xmm3
2512 | subsd xmm1, xmm0 // If yes, subtract -1.
2513 | orpd xmm1, xmm2 // Merge sign bit back in.
2514 |.else
2515 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2516 | subsd xmm1, xmm3
2517 | orpd xmm1, xmm2 // Merge sign bit back in.
2518 | .if mode == 1 // ceil(x)?
2519 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2520 | cmpsd xmm0, xmm1, 6 // x > result?
2521 | .else // floor(x)?
2522 | sseconst_1 xmm2, RD
2523 | cmpsd xmm0, xmm1, 1 // x < result?
2524 | .endif
2525 | andpd xmm0, xmm2
2526 | subsd xmm1, xmm0 // If yes, subtract +-1.
2527 |.endif
2528 | movaps xmm0, xmm1
2529 |1:
2530 | ret
2531 |.endmacro
2532 |
2533 | vm_round vm_floor, 0, 1
2534 | vm_round vm_ceil, 1, JIT
2535 | vm_round vm_trunc, 2, JIT
2536 |
2537 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2538 |->vm_mod:
2539 |// Args in xmm0/xmm1, return value in xmm0.
2540 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2541 | movaps xmm5, xmm0
2542 | divsd xmm0, xmm1
2543 | sseconst_abs xmm2, RD
2544 | sseconst_2p52 xmm3, RD
2545 | movaps xmm4, xmm0
2546 | andpd xmm4, xmm2 // |x/y|
2547 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2548 | jbe >1
2549 | andnpd xmm2, xmm0 // Isolate sign bit.
2550 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2551 | subsd xmm4, xmm3
2552 | orpd xmm4, xmm2 // Merge sign bit back in.
2553 | sseconst_1 xmm2, RD
2554 | cmpsd xmm0, xmm4, 1 // x/y < result?
2555 | andpd xmm0, xmm2
2556 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2557 | movaps xmm0, xmm5
2558 | mulsd xmm1, xmm4
2559 | subsd xmm0, xmm1
2560 | ret
2561 |1:
2562 | mulsd xmm1, xmm0
2563 | movaps xmm0, xmm5
2564 | subsd xmm0, xmm1
2565 | ret
2566 |
2567 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2568 |->vm_powi_sse:
2569 | cmp eax, 1; jle >6 // i<=1?
2570 | // Now 1 < (unsigned)i <= 0x80000000.
2571 |1: // Handle leading zeros.
2572 | test eax, 1; jnz >2
2573 | mulsd xmm0, xmm0
2574 | shr eax, 1
2575 | jmp <1
2576 |2:
2577 | shr eax, 1; jz >5
2578 | movaps xmm1, xmm0
2579 |3: // Handle trailing bits.
2580 | mulsd xmm0, xmm0
2581 | shr eax, 1; jz >4
2582 | jnc <3
2583 | mulsd xmm1, xmm0
2584 | jmp <3
2585 |4:
2586 | mulsd xmm0, xmm1
2587 |5:
2588 | ret
2589 |6:
2590 | je <5 // x^1 ==> x
2591 | jb >7 // x^0 ==> 1
2592 | neg eax
2593 | call <1
2594 | sseconst_1 xmm1, RD
2595 | divsd xmm1, xmm0
2596 | movaps xmm0, xmm1
2597 | ret
2598 |7:
2599 | sseconst_1 xmm0, RD
2600 | ret
2601 |
2602 |//-----------------------------------------------------------------------
2603 |//-- Miscellaneous functions --------------------------------------------
2604 |//-----------------------------------------------------------------------
2605 |
2606 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2607 |->vm_cpuid:
2608 | mov eax, CARG1d
2609 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2610 | push rbx
2611 | cpuid
2612 | mov [rsi], eax
2613 | mov [rsi+4], ebx
2614 | mov [rsi+8], ecx
2615 | mov [rsi+12], edx
2616 | pop rbx
2617 | .if X64WIN; pop rsi; .endif
2618 | ret
2619 |
2620 |//-----------------------------------------------------------------------
2621 |//-- Assertions ---------------------------------------------------------
2622 |//-----------------------------------------------------------------------
2623 |
2624 |->assert_bad_for_arg_type:
2625#ifdef LUA_USE_ASSERT
2626 | int3
2627#endif
2628 | int3
2629 |
2630 |//-----------------------------------------------------------------------
2631 |//-- FFI helper functions -----------------------------------------------
2632 |//-----------------------------------------------------------------------
2633 |
2634 |// Handler for callback functions. Callback slot number in ah/al.
2635 |->vm_ffi_callback:
2636 |.if FFI
2637 |.type CTSTATE, CTState, PC
2638 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2639 | lea DISPATCH, [ebp+GG_G2DISP]
2640 | mov CTSTATE, GL:ebp->ctype_state
2641 | movzx eax, ax
2642 | mov CTSTATE->cb.slot, eax
2643 | mov CTSTATE->cb.gpr[0], CARG1
2644 | mov CTSTATE->cb.gpr[1], CARG2
2645 | mov CTSTATE->cb.gpr[2], CARG3
2646 | mov CTSTATE->cb.gpr[3], CARG4
2647 | movsd qword CTSTATE->cb.fpr[0], xmm0
2648 | movsd qword CTSTATE->cb.fpr[1], xmm1
2649 | movsd qword CTSTATE->cb.fpr[2], xmm2
2650 | movsd qword CTSTATE->cb.fpr[3], xmm3
2651 |.if X64WIN
2652 | lea rax, [rsp+CFRAME_SIZE+4*8]
2653 |.else
2654 | lea rax, [rsp+CFRAME_SIZE]
2655 | mov CTSTATE->cb.gpr[4], CARG5
2656 | mov CTSTATE->cb.gpr[5], CARG6
2657 | movsd qword CTSTATE->cb.fpr[4], xmm4
2658 | movsd qword CTSTATE->cb.fpr[5], xmm5
2659 | movsd qword CTSTATE->cb.fpr[6], xmm6
2660 | movsd qword CTSTATE->cb.fpr[7], xmm7
2661 |.endif
2662 | mov CTSTATE->cb.stack, rax
2663 | mov CARG2, rsp
2664 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2665 | mov CARG1, CTSTATE
2666 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2667 | // lua_State * returned in eax (RD).
2668 | set_vmstate INTERP
2669 | mov BASE, L:RD->base
2670 | mov RD, L:RD->top
2671 | sub RD, BASE
2672 | mov LFUNC:RB, [BASE-16]
2673 | cleartp LFUNC:RB
2674 | shr RD, 3
2675 | add RD, 1
2676 | ins_callt
2677 |.endif
2678 |
2679 |->cont_ffi_callback: // Return from FFI callback.
2680 |.if FFI
2681 | mov L:RA, SAVE_L
2682 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2683 | mov aword CTSTATE->L, L:RA
2684 | mov L:RA->base, BASE
2685 | mov L:RA->top, RB
2686 | mov CARG1, CTSTATE
2687 | mov CARG2, RC
2688 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2689 | mov rax, CTSTATE->cb.gpr[0]
2690 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2691 | jmp ->vm_leave_unw
2692 |.endif
2693 |
2694 |->vm_ffi_call: // Call C function via FFI.
2695 | // Caveat: needs special frame unwinding, see below.
2696 |.if FFI
2697 | .type CCSTATE, CCallState, rbx
2698 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2699 |
2700 | // Readjust stack.
2701 | mov eax, CCSTATE->spadj
2702 | sub rsp, rax
2703 |
2704 | // Copy stack slots.
2705 | movzx ecx, byte CCSTATE->nsp
2706 | sub ecx, 1
2707 | js >2
2708 |1:
2709 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2710 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2711 | sub ecx, 1
2712 | jns <1
2713 |2:
2714 |
2715 | movzx eax, byte CCSTATE->nfpr
2716 | mov CARG1, CCSTATE->gpr[0]
2717 | mov CARG2, CCSTATE->gpr[1]
2718 | mov CARG3, CCSTATE->gpr[2]
2719 | mov CARG4, CCSTATE->gpr[3]
2720 |.if not X64WIN
2721 | mov CARG5, CCSTATE->gpr[4]
2722 | mov CARG6, CCSTATE->gpr[5]
2723 |.endif
2724 | test eax, eax; jz >5
2725 | movaps xmm0, CCSTATE->fpr[0]
2726 | movaps xmm1, CCSTATE->fpr[1]
2727 | movaps xmm2, CCSTATE->fpr[2]
2728 | movaps xmm3, CCSTATE->fpr[3]
2729 |.if not X64WIN
2730 | cmp eax, 4; jbe >5
2731 | movaps xmm4, CCSTATE->fpr[4]
2732 | movaps xmm5, CCSTATE->fpr[5]
2733 | movaps xmm6, CCSTATE->fpr[6]
2734 | movaps xmm7, CCSTATE->fpr[7]
2735 |.endif
2736 |5:
2737 |
2738 | call aword CCSTATE->func
2739 |
2740 | mov CCSTATE->gpr[0], rax
2741 | movaps CCSTATE->fpr[0], xmm0
2742 |.if not X64WIN
2743 | mov CCSTATE->gpr[1], rdx
2744 | movaps CCSTATE->fpr[1], xmm1
2745 |.endif
2746 |
2747 | mov rbx, [rbp-8]; leave; ret
2748 |.endif
2749 |// Note: vm_ffi_call must be the last function in this object file!
2750 |
2751 |//-----------------------------------------------------------------------
2752}
2753
2754/* Generate the code for a single instruction. */
2755static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2756{
2757 int vk = 0;
2758 |// Note: aligning all instructions does not pay off.
2759 |=>defop:
2760
2761 switch (op) {
2762
2763 /* -- Comparison ops ---------------------------------------------------- */
2764
2765 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2766
2767 |.macro jmp_comp, lt, ge, le, gt, target
2768 ||switch (op) {
2769 ||case BC_ISLT:
2770 | lt target
2771 ||break;
2772 ||case BC_ISGE:
2773 | ge target
2774 ||break;
2775 ||case BC_ISLE:
2776 | le target
2777 ||break;
2778 ||case BC_ISGT:
2779 | gt target
2780 ||break;
2781 ||default: break; /* Shut up GCC. */
2782 ||}
2783 |.endmacro
2784
2785 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2786 | // RA = src1, RD = src2, JMP with RD = target
2787 | ins_AD
2788 | mov ITYPE, [BASE+RA*8]
2789 | mov RB, [BASE+RD*8]
2790 | mov RA, ITYPE
2791 | mov RD, RB
2792 | sar ITYPE, 47
2793 | sar RB, 47
2794 |.if DUALNUM
2795 | cmp ITYPEd, LJ_TISNUM; jne >7
2796 | cmp RBd, LJ_TISNUM; jne >8
2797 | add PC, 4
2798 | cmp RAd, RDd
2799 | jmp_comp jge, jl, jg, jle, >9
2800 |6:
2801 | movzx RDd, PC_RD
2802 | branchPC RD
2803 |9:
2804 | ins_next
2805 |
2806 |7: // RA is not an integer.
2807 | ja ->vmeta_comp
2808 | // RA is a number.
2809 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2810 | // RA is a number, RD is an integer.
2811 | cvtsi2sd xmm0, RDd
2812 | jmp >2
2813 |
2814 |8: // RA is an integer, RD is not an integer.
2815 | ja ->vmeta_comp
2816 | // RA is an integer, RD is a number.
2817 | cvtsi2sd xmm1, RAd
2818 | movd xmm0, RD
2819 | jmp >3
2820 |.else
2821 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2822 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2823 |.endif
2824 |1:
2825 | movd xmm0, RD
2826 |2:
2827 | movd xmm1, RA
2828 |3:
2829 | add PC, 4
2830 | ucomisd xmm0, xmm1
2831 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2832 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2833 |.if DUALNUM
2834 | jmp_comp jbe, ja, jb, jae, <9
2835 | jmp <6
2836 |.else
2837 | jmp_comp jbe, ja, jb, jae, >1
2838 | movzx RDd, PC_RD
2839 | branchPC RD
2840 |1:
2841 | ins_next
2842 |.endif
2843 break;
2844
2845 case BC_ISEQV: case BC_ISNEV:
2846 vk = op == BC_ISEQV;
2847 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2848 | mov RB, [BASE+RD*8]
2849 | mov ITYPE, [BASE+RA*8]
2850 | add PC, 4
2851 | mov RD, RB
2852 | mov RA, ITYPE
2853 | sar RB, 47
2854 | sar ITYPE, 47
2855 |.if DUALNUM
2856 | cmp RBd, LJ_TISNUM; jne >7
2857 | cmp ITYPEd, LJ_TISNUM; jne >8
2858 | cmp RDd, RAd
2859 if (vk) {
2860 | jne >9
2861 } else {
2862 | je >9
2863 }
2864 | movzx RDd, PC_RD
2865 | branchPC RD
2866 |9:
2867 | ins_next
2868 |
2869 |7: // RD is not an integer.
2870 | ja >5
2871 | // RD is a number.
2872 | movd xmm1, RD
2873 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2874 | // RD is a number, RA is an integer.
2875 | cvtsi2sd xmm0, RAd
2876 | jmp >2
2877 |
2878 |8: // RD is an integer, RA is not an integer.
2879 | ja >5
2880 | // RD is an integer, RA is a number.
2881 | cvtsi2sd xmm1, RDd
2882 | jmp >1
2883 |
2884 |.else
2885 | cmp RBd, LJ_TISNUM; jae >5
2886 | cmp ITYPEd, LJ_TISNUM; jae >5
2887 | movd xmm1, RD
2888 |.endif
2889 |1:
2890 | movd xmm0, RA
2891 |2:
2892 | ucomisd xmm0, xmm1
2893 |4:
2894 iseqne_fp:
2895 if (vk) {
2896 | jp >2 // Unordered means not equal.
2897 | jne >2
2898 } else {
2899 | jp >2 // Unordered means not equal.
2900 | je >1
2901 }
2902 iseqne_end:
2903 if (vk) {
2904 |1: // EQ: Branch to the target.
2905 | movzx RDd, PC_RD
2906 | branchPC RD
2907 |2: // NE: Fallthrough to next instruction.
2908 |.if not FFI
2909 |3:
2910 |.endif
2911 } else {
2912 |.if not FFI
2913 |3:
2914 |.endif
2915 |2: // NE: Branch to the target.
2916 | movzx RDd, PC_RD
2917 | branchPC RD
2918 |1: // EQ: Fallthrough to next instruction.
2919 }
2920 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2921 op == BC_ISEQN || op == BC_ISNEN)) {
2922 | jmp <9
2923 } else {
2924 | ins_next
2925 }
2926 |
2927 if (op == BC_ISEQV || op == BC_ISNEV) {
2928 |5: // Either or both types are not numbers.
2929 |.if FFI
2930 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2931 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2932 |.endif
2933 | cmp RA, RD
2934 | je <1 // Same GCobjs or pvalues?
2935 | cmp RBd, ITYPEd
2936 | jne <2 // Not the same type?
2937 | cmp RBd, LJ_TISTABUD
2938 | ja <2 // Different objects and not table/ud?
2939 |
2940 | // Different tables or userdatas. Need to check __eq metamethod.
2941 | // Field metatable must be at same offset for GCtab and GCudata!
2942 | cleartp TAB:RA
2943 | mov TAB:RB, TAB:RA->metatable
2944 | test TAB:RB, TAB:RB
2945 | jz <2 // No metatable?
2946 | test byte TAB:RB->nomm, 1<<MM_eq
2947 | jnz <2 // Or 'no __eq' flag set?
2948 if (vk) {
2949 | xor RBd, RBd // ne = 0
2950 } else {
2951 | mov RBd, 1 // ne = 1
2952 }
2953 | jmp ->vmeta_equal // Handle __eq metamethod.
2954 } else {
2955 |.if FFI
2956 |3:
2957 | cmp ITYPEd, LJ_TCDATA
2958 if (LJ_DUALNUM && vk) {
2959 | jne <9
2960 } else {
2961 | jne <2
2962 }
2963 | jmp ->vmeta_equal_cd
2964 |.endif
2965 }
2966 break;
2967 case BC_ISEQS: case BC_ISNES:
2968 vk = op == BC_ISEQS;
2969 | ins_AND // RA = src, RD = str const, JMP with RD = target
2970 | mov RB, [BASE+RA*8]
2971 | add PC, 4
2972 | checkstr RB, >3
2973 | cmp RB, [KBASE+RD*8]
2974 iseqne_test:
2975 if (vk) {
2976 | jne >2
2977 } else {
2978 | je >1
2979 }
2980 goto iseqne_end;
2981 case BC_ISEQN: case BC_ISNEN:
2982 vk = op == BC_ISEQN;
2983 | ins_AD // RA = src, RD = num const, JMP with RD = target
2984 | mov RB, [BASE+RA*8]
2985 | add PC, 4
2986 |.if DUALNUM
2987 | checkint RB, >7
2988 | mov RD, [KBASE+RD*8]
2989 | checkint RD, >8
2990 | cmp RBd, RDd
2991 if (vk) {
2992 | jne >9
2993 } else {
2994 | je >9
2995 }
2996 | movzx RDd, PC_RD
2997 | branchPC RD
2998 |9:
2999 | ins_next
3000 |
3001 |7: // RA is not an integer.
3002 | ja >3
3003 | // RA is a number.
3004 | mov RD, [KBASE+RD*8]
3005 | checkint RD, >1
3006 | // RA is a number, RD is an integer.
3007 | cvtsi2sd xmm0, RDd
3008 | jmp >2
3009 |
3010 |8: // RA is an integer, RD is a number.
3011 | cvtsi2sd xmm0, RBd
3012 | movd xmm1, RD
3013 | ucomisd xmm0, xmm1
3014 | jmp >4
3015 |1:
3016 | movd xmm0, RD
3017 |.else
3018 | checknum RB, >3
3019 |1:
3020 | movsd xmm0, qword [KBASE+RD*8]
3021 |.endif
3022 |2:
3023 | ucomisd xmm0, qword [BASE+RA*8]
3024 |4:
3025 goto iseqne_fp;
3026 case BC_ISEQP: case BC_ISNEP:
3027 vk = op == BC_ISEQP;
3028 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3029 | mov RB, [BASE+RA*8]
3030 | sar RB, 47
3031 | add PC, 4
3032 | cmp RBd, RDd
3033 if (!LJ_HASFFI) goto iseqne_test;
3034 if (vk) {
3035 | jne >3
3036 | movzx RDd, PC_RD
3037 | branchPC RD
3038 |2:
3039 | ins_next
3040 |3:
3041 | cmp RBd, LJ_TCDATA; jne <2
3042 | jmp ->vmeta_equal_cd
3043 } else {
3044 | je >2
3045 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3046 | movzx RDd, PC_RD
3047 | branchPC RD
3048 |2:
3049 | ins_next
3050 }
3051 break;
3052
3053 /* -- Unary test and copy ops ------------------------------------------- */
3054
3055 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3056 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3057 | mov ITYPE, [BASE+RD*8]
3058 | add PC, 4
3059 if (op == BC_ISTC || op == BC_ISFC) {
3060 | mov RB, ITYPE
3061 }
3062 | sar ITYPE, 47
3063 | cmp ITYPEd, LJ_TISTRUECOND
3064 if (op == BC_IST || op == BC_ISTC) {
3065 | jae >1
3066 } else {
3067 | jb >1
3068 }
3069 if (op == BC_ISTC || op == BC_ISFC) {
3070 | mov [BASE+RA*8], RB
3071 }
3072 | movzx RDd, PC_RD
3073 | branchPC RD
3074 |1: // Fallthrough to the next instruction.
3075 | ins_next
3076 break;
3077
3078 case BC_ISTYPE:
3079 | ins_AD // RA = src, RD = -type
3080 | mov RB, [BASE+RA*8]
3081 | sar RB, 47
3082 | add RBd, RDd
3083 | jne ->vmeta_istype
3084 | ins_next
3085 break;
3086 case BC_ISNUM:
3087 | ins_AD // RA = src, RD = -(TISNUM-1)
3088 | checknumtp [BASE+RA*8], ->vmeta_istype
3089 | ins_next
3090 break;
3091
3092 /* -- Unary ops --------------------------------------------------------- */
3093
3094 case BC_MOV:
3095 | ins_AD // RA = dst, RD = src
3096 | mov RB, [BASE+RD*8]
3097 | mov [BASE+RA*8], RB
3098 | ins_next_
3099 break;
3100 case BC_NOT:
3101 | ins_AD // RA = dst, RD = src
3102 | mov RB, [BASE+RD*8]
3103 | sar RB, 47
3104 | mov RCd, 2
3105 | cmp RB, LJ_TISTRUECOND
3106 | sbb RCd, 0
3107 | shl RC, 47
3108 | not RC
3109 | mov [BASE+RA*8], RC
3110 | ins_next
3111 break;
3112 case BC_UNM:
3113 | ins_AD // RA = dst, RD = src
3114 | mov RB, [BASE+RD*8]
3115 |.if DUALNUM
3116 | checkint RB, >5
3117 | neg RBd
3118 | jo >4
3119 | setint RB
3120 |9:
3121 | mov [BASE+RA*8], RB
3122 | ins_next
3123 |4:
3124 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3125 | jmp <9
3126 |5:
3127 | ja ->vmeta_unm
3128 |.else
3129 | checknum RB, ->vmeta_unm
3130 |.endif
3131 | mov64 RD, U64x(80000000,00000000)
3132 | xor RB, RD
3133 |.if DUALNUM
3134 | jmp <9
3135 |.else
3136 | mov [BASE+RA*8], RB
3137 | ins_next
3138 |.endif
3139 break;
3140 case BC_LEN:
3141 | ins_AD // RA = dst, RD = src
3142 | mov RD, [BASE+RD*8]
3143 | checkstr RD, >2
3144 |.if DUALNUM
3145 | mov RDd, dword STR:RD->len
3146 |1:
3147 | setint RD
3148 | mov [BASE+RA*8], RD
3149 |.else
3150 | xorps xmm0, xmm0
3151 | cvtsi2sd xmm0, dword STR:RD->len
3152 |1:
3153 | movsd qword [BASE+RA*8], xmm0
3154 |.endif
3155 | ins_next
3156 |2:
3157 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3158 | mov TAB:CARG1, TAB:RD
3159#if LJ_52
3160 | mov TAB:RB, TAB:RD->metatable
3161 | cmp TAB:RB, 0
3162 | jnz >9
3163 |3:
3164#endif
3165 |->BC_LEN_Z:
3166 | mov RB, BASE // Save BASE.
3167 | call extern lj_tab_len // (GCtab *t)
3168 | // Length of table returned in eax (RD).
3169 |.if DUALNUM
3170 | // Nothing to do.
3171 |.else
3172 | cvtsi2sd xmm0, RDd
3173 |.endif
3174 | mov BASE, RB // Restore BASE.
3175 | movzx RAd, PC_RA
3176 | jmp <1
3177#if LJ_52
3178 |9: // Check for __len.
3179 | test byte TAB:RB->nomm, 1<<MM_len
3180 | jnz <3
3181 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3182#endif
3183 break;
3184
3185 /* -- Binary ops -------------------------------------------------------- */
3186
3187 |.macro ins_arithpre, sseins, ssereg
3188 | ins_ABC
3189 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3190 ||switch (vk) {
3191 ||case 0:
3192 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3193 | .if DUALNUM
3194 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3195 | .endif
3196 | movsd xmm0, qword [BASE+RB*8]
3197 | sseins ssereg, qword [KBASE+RC*8]
3198 || break;
3199 ||case 1:
3200 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3201 | .if DUALNUM
3202 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3203 | .endif
3204 | movsd xmm0, qword [KBASE+RC*8]
3205 | sseins ssereg, qword [BASE+RB*8]
3206 || break;
3207 ||default:
3208 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3209 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3210 | movsd xmm0, qword [BASE+RB*8]
3211 | sseins ssereg, qword [BASE+RC*8]
3212 || break;
3213 ||}
3214 |.endmacro
3215 |
3216 |.macro ins_arithdn, intins
3217 | ins_ABC
3218 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3219 ||switch (vk) {
3220 ||case 0:
3221 | mov RB, [BASE+RB*8]
3222 | mov RC, [KBASE+RC*8]
3223 | checkint RB, ->vmeta_arith_vno
3224 | checkint RC, ->vmeta_arith_vno
3225 | intins RBd, RCd; jo ->vmeta_arith_vno
3226 || break;
3227 ||case 1:
3228 | mov RB, [BASE+RB*8]
3229 | mov RC, [KBASE+RC*8]
3230 | checkint RB, ->vmeta_arith_nvo
3231 | checkint RC, ->vmeta_arith_nvo
3232 | intins RCd, RBd; jo ->vmeta_arith_nvo
3233 || break;
3234 ||default:
3235 | mov RB, [BASE+RB*8]
3236 | mov RC, [BASE+RC*8]
3237 | checkint RB, ->vmeta_arith_vvo
3238 | checkint RC, ->vmeta_arith_vvo
3239 | intins RBd, RCd; jo ->vmeta_arith_vvo
3240 || break;
3241 ||}
3242 ||if (vk == 1) {
3243 | setint RC
3244 | mov [BASE+RA*8], RC
3245 ||} else {
3246 | setint RB
3247 | mov [BASE+RA*8], RB
3248 ||}
3249 | ins_next
3250 |.endmacro
3251 |
3252 |.macro ins_arithpost
3253 | movsd qword [BASE+RA*8], xmm0
3254 |.endmacro
3255 |
3256 |.macro ins_arith, sseins
3257 | ins_arithpre sseins, xmm0
3258 | ins_arithpost
3259 | ins_next
3260 |.endmacro
3261 |
3262 |.macro ins_arith, intins, sseins
3263 |.if DUALNUM
3264 | ins_arithdn intins
3265 |.else
3266 | ins_arith, sseins
3267 |.endif
3268 |.endmacro
3269
3270 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3271 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3272 | ins_arith add, addsd
3273 break;
3274 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3275 | ins_arith sub, subsd
3276 break;
3277 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3278 | ins_arith imul, mulsd
3279 break;
3280 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3281 | ins_arith divsd
3282 break;
3283 case BC_MODVN:
3284 | ins_arithpre movsd, xmm1
3285 |->BC_MODVN_Z:
3286 | call ->vm_mod
3287 | ins_arithpost
3288 | ins_next
3289 break;
3290 case BC_MODNV: case BC_MODVV:
3291 | ins_arithpre movsd, xmm1
3292 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3293 break;
3294 case BC_POW:
3295 | ins_arithpre movsd, xmm1
3296 | mov RB, BASE
3297 | call extern pow
3298 | movzx RAd, PC_RA
3299 | mov BASE, RB
3300 | ins_arithpost
3301 | ins_next
3302 break;
3303
3304 case BC_CAT:
3305 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3306 | mov L:CARG1, SAVE_L
3307 | mov L:CARG1->base, BASE
3308 | lea CARG2, [BASE+RC*8]
3309 | mov CARG3d, RCd
3310 | sub CARG3d, RBd
3311 |->BC_CAT_Z:
3312 | mov L:RB, L:CARG1
3313 | mov SAVE_PC, PC
3314 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3315 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3316 | mov BASE, L:RB->base
3317 | test RC, RC
3318 | jnz ->vmeta_binop
3319 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3320 | movzx RAd, PC_RA
3321 | mov RC, [BASE+RB*8]
3322 | mov [BASE+RA*8], RC
3323 | ins_next
3324 break;
3325
3326 /* -- Constant ops ------------------------------------------------------ */
3327
3328 case BC_KSTR:
3329 | ins_AND // RA = dst, RD = str const (~)
3330 | mov RD, [KBASE+RD*8]
3331 | settp RD, LJ_TSTR
3332 | mov [BASE+RA*8], RD
3333 | ins_next
3334 break;
3335 case BC_KCDATA:
3336 |.if FFI
3337 | ins_AND // RA = dst, RD = cdata const (~)
3338 | mov RD, [KBASE+RD*8]
3339 | settp RD, LJ_TCDATA
3340 | mov [BASE+RA*8], RD
3341 | ins_next
3342 |.endif
3343 break;
3344 case BC_KSHORT:
3345 | ins_AD // RA = dst, RD = signed int16 literal
3346 |.if DUALNUM
3347 | movsx RDd, RDW
3348 | setint RD
3349 | mov [BASE+RA*8], RD
3350 |.else
3351 | movsx RDd, RDW // Sign-extend literal.
3352 | cvtsi2sd xmm0, RDd
3353 | movsd qword [BASE+RA*8], xmm0
3354 |.endif
3355 | ins_next
3356 break;
3357 case BC_KNUM:
3358 | ins_AD // RA = dst, RD = num const
3359 | movsd xmm0, qword [KBASE+RD*8]
3360 | movsd qword [BASE+RA*8], xmm0
3361 | ins_next
3362 break;
3363 case BC_KPRI:
3364 | ins_AD // RA = dst, RD = primitive type (~)
3365 | shl RD, 47
3366 | not RD
3367 | mov [BASE+RA*8], RD
3368 | ins_next
3369 break;
3370 case BC_KNIL:
3371 | ins_AD // RA = dst_start, RD = dst_end
3372 | lea RA, [BASE+RA*8+8]
3373 | lea RD, [BASE+RD*8]
3374 | mov RB, LJ_TNIL
3375 | mov [RA-8], RB // Sets minimum 2 slots.
3376 |1:
3377 | mov [RA], RB
3378 | add RA, 8
3379 | cmp RA, RD
3380 | jbe <1
3381 | ins_next
3382 break;
3383
3384 /* -- Upvalue and function ops ------------------------------------------ */
3385
3386 case BC_UGET:
3387 | ins_AD // RA = dst, RD = upvalue #
3388 | mov LFUNC:RB, [BASE-16]
3389 | cleartp LFUNC:RB
3390 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3391 | mov RB, UPVAL:RB->v
3392 | mov RD, [RB]
3393 | mov [BASE+RA*8], RD
3394 | ins_next
3395 break;
3396 case BC_USETV:
3397#define TV2MARKOFS \
3398 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3399 | ins_AD // RA = upvalue #, RD = src
3400 | mov LFUNC:RB, [BASE-16]
3401 | cleartp LFUNC:RB
3402 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3403 | cmp byte UPVAL:RB->closed, 0
3404 | mov RB, UPVAL:RB->v
3405 | mov RA, [BASE+RD*8]
3406 | mov [RB], RA
3407 | jz >1
3408 | // Check barrier for closed upvalue.
3409 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3410 | jnz >2
3411 |1:
3412 | ins_next
3413 |
3414 |2: // Upvalue is black. Check if new value is collectable and white.
3415 | sub RD, LJ_TISGCV
3416 | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3417 | jbe <1
3418 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3419 | jz <1
3420 | // Crossed a write barrier. Move the barrier forward.
3421 |.if not X64WIN
3422 | mov CARG2, RB
3423 | mov RB, BASE // Save BASE.
3424 |.else
3425 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3426 |.endif
3427 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3428 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3429 | mov BASE, RB // Restore BASE.
3430 | jmp <1
3431 break;
3432#undef TV2MARKOFS
3433 case BC_USETS:
3434 | ins_AND // RA = upvalue #, RD = str const (~)
3435 | mov LFUNC:RB, [BASE-16]
3436 | cleartp LFUNC:RB
3437 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3438 | mov STR:RA, [KBASE+RD*8]
3439 | mov RD, UPVAL:RB->v
3440 | settp STR:RA, LJ_TSTR
3441 | mov [RD], STR:RA
3442 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3443 | jnz >2
3444 |1:
3445 | ins_next
3446 |
3447 |2: // Check if string is white and ensure upvalue is closed.
3448 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3449 | jz <1
3450 | cmp byte UPVAL:RB->closed, 0
3451 | jz <1
3452 | // Crossed a write barrier. Move the barrier forward.
3453 | mov RB, BASE // Save BASE (CARG2 == BASE).
3454 | mov CARG2, RD
3455 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3456 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3457 | mov BASE, RB // Restore BASE.
3458 | jmp <1
3459 break;
3460 case BC_USETN:
3461 | ins_AD // RA = upvalue #, RD = num const
3462 | mov LFUNC:RB, [BASE-16]
3463 | cleartp LFUNC:RB
3464 | movsd xmm0, qword [KBASE+RD*8]
3465 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3466 | mov RA, UPVAL:RB->v
3467 | movsd qword [RA], xmm0
3468 | ins_next
3469 break;
3470 case BC_USETP:
3471 | ins_AD // RA = upvalue #, RD = primitive type (~)
3472 | mov LFUNC:RB, [BASE-16]
3473 | cleartp LFUNC:RB
3474 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3475 | shl RD, 47
3476 | not RD
3477 | mov RA, UPVAL:RB->v
3478 | mov [RA], RD
3479 | ins_next
3480 break;
3481 case BC_UCLO:
3482 | ins_AD // RA = level, RD = target
3483 | branchPC RD // Do this first to free RD.
3484 | mov L:RB, SAVE_L
3485 | cmp dword L:RB->openupval, 0
3486 | je >1
3487 | mov L:RB->base, BASE
3488 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3489 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3490 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3491 | mov BASE, L:RB->base
3492 |1:
3493 | ins_next
3494 break;
3495
3496 case BC_FNEW:
3497 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3498 | mov L:RB, SAVE_L
3499 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3500 | mov CARG3, [BASE-16]
3501 | cleartp CARG3
3502 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3503 | mov CARG1, L:RB
3504 | mov SAVE_PC, PC
3505 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3506 | call extern lj_func_newL_gc
3507 | // GCfuncL * returned in eax (RC).
3508 | mov BASE, L:RB->base
3509 | movzx RAd, PC_RA
3510 | settp LFUNC:RC, LJ_TFUNC
3511 | mov [BASE+RA*8], LFUNC:RC
3512 | ins_next
3513 break;
3514
3515 /* -- Table ops --------------------------------------------------------- */
3516
3517 case BC_TNEW:
3518 | ins_AD // RA = dst, RD = hbits|asize
3519 | mov L:RB, SAVE_L
3520 | mov L:RB->base, BASE
3521 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3522 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3523 | mov SAVE_PC, PC
3524 | jae >5
3525 |1:
3526 | mov CARG3d, RDd
3527 | and RDd, 0x7ff
3528 | shr CARG3d, 11
3529 | cmp RDd, 0x7ff
3530 | je >3
3531 |2:
3532 | mov L:CARG1, L:RB
3533 | mov CARG2d, RDd
3534 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3535 | // Table * returned in eax (RC).
3536 | mov BASE, L:RB->base
3537 | movzx RAd, PC_RA
3538 | settp TAB:RC, LJ_TTAB
3539 | mov [BASE+RA*8], TAB:RC
3540 | ins_next
3541 |3: // Turn 0x7ff into 0x801.
3542 | mov RDd, 0x801
3543 | jmp <2
3544 |5:
3545 | mov L:CARG1, L:RB
3546 | call extern lj_gc_step_fixtop // (lua_State *L)
3547 | movzx RDd, PC_RD
3548 | jmp <1
3549 break;
3550 case BC_TDUP:
3551 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3552 | mov L:RB, SAVE_L
3553 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3554 | mov SAVE_PC, PC
3555 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3556 | mov L:RB->base, BASE
3557 | jae >3
3558 |2:
3559 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3560 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3561 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3562 | // Table * returned in eax (RC).
3563 | mov BASE, L:RB->base
3564 | movzx RAd, PC_RA
3565 | settp TAB:RC, LJ_TTAB
3566 | mov [BASE+RA*8], TAB:RC
3567 | ins_next
3568 |3:
3569 | mov L:CARG1, L:RB
3570 | call extern lj_gc_step_fixtop // (lua_State *L)
3571 | movzx RDd, PC_RD // Need to reload RD.
3572 | not RD
3573 | jmp <2
3574 break;
3575
3576 case BC_GGET:
3577 | ins_AND // RA = dst, RD = str const (~)
3578 | mov LFUNC:RB, [BASE-16]
3579 | cleartp LFUNC:RB
3580 | mov TAB:RB, LFUNC:RB->env
3581 | mov STR:RC, [KBASE+RD*8]
3582 | jmp ->BC_TGETS_Z
3583 break;
3584 case BC_GSET:
3585 | ins_AND // RA = src, RD = str const (~)
3586 | mov LFUNC:RB, [BASE-16]
3587 | cleartp LFUNC:RB
3588 | mov TAB:RB, LFUNC:RB->env
3589 | mov STR:RC, [KBASE+RD*8]
3590 | jmp ->BC_TSETS_Z
3591 break;
3592
3593 case BC_TGETV:
3594 | ins_ABC // RA = dst, RB = table, RC = key
3595 | mov TAB:RB, [BASE+RB*8]
3596 | mov RC, [BASE+RC*8]
3597 | checktab TAB:RB, ->vmeta_tgetv
3598 |
3599 | // Integer key?
3600 |.if DUALNUM
3601 | checkint RC, >5
3602 |.else
3603 | // Convert number to int and back and compare.
3604 | checknum RC, >5
3605 | movd xmm0, RC
3606 | cvttsd2si RCd, xmm0
3607 | cvtsi2sd xmm1, RCd
3608 | ucomisd xmm0, xmm1
3609 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3610 |.endif
3611 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3612 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3613 | shl RCd, 3
3614 | add RC, TAB:RB->array
3615 | // Get array slot.
3616 | mov ITYPE, [RC]
3617 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3618 | je >2
3619 |1:
3620 | mov [BASE+RA*8], ITYPE
3621 | ins_next
3622 |
3623 |2: // Check for __index if table value is nil.
3624 | mov TAB:TMPR, TAB:RB->metatable
3625 | test TAB:TMPR, TAB:TMPR
3626 | jz <1
3627 | test byte TAB:TMPR->nomm, 1<<MM_index
3628 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3629 | jmp <1
3630 |
3631 |5: // String key?
3632 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3633 | cleartp STR:RC
3634 | jmp ->BC_TGETS_Z
3635 break;
3636 case BC_TGETS:
3637 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3638 | mov TAB:RB, [BASE+RB*8]
3639 | not RC
3640 | mov STR:RC, [KBASE+RC*8]
3641 | checktab TAB:RB, ->vmeta_tgets
3642 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3643 | mov TMPRd, TAB:RB->hmask
3644 | and TMPRd, STR:RC->hash
3645 | imul TMPRd, #NODE
3646 | add NODE:TMPR, TAB:RB->node
3647 | settp ITYPE, STR:RC, LJ_TSTR
3648 |1:
3649 | cmp NODE:TMPR->key, ITYPE
3650 | jne >4
3651 | // Get node value.
3652 | mov ITYPE, NODE:TMPR->val
3653 | cmp ITYPE, LJ_TNIL
3654 | je >5 // Key found, but nil value?
3655 |2:
3656 | mov [BASE+RA*8], ITYPE
3657 | ins_next
3658 |
3659 |4: // Follow hash chain.
3660 | mov NODE:TMPR, NODE:TMPR->next
3661 | test NODE:TMPR, NODE:TMPR
3662 | jnz <1
3663 | // End of hash chain: key not found, nil result.
3664 | mov ITYPE, LJ_TNIL
3665 |
3666 |5: // Check for __index if table value is nil.
3667 | mov TAB:TMPR, TAB:RB->metatable
3668 | test TAB:TMPR, TAB:TMPR
3669 | jz <2 // No metatable: done.
3670 | test byte TAB:TMPR->nomm, 1<<MM_index
3671 | jnz <2 // 'no __index' flag set: done.
3672 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3673 break;
3674 case BC_TGETB:
3675 | ins_ABC // RA = dst, RB = table, RC = byte literal
3676 | mov TAB:RB, [BASE+RB*8]
3677 | checktab TAB:RB, ->vmeta_tgetb
3678 | cmp RCd, TAB:RB->asize
3679 | jae ->vmeta_tgetb
3680 | shl RCd, 3
3681 | add RC, TAB:RB->array
3682 | // Get array slot.
3683 | mov ITYPE, [RC]
3684 | cmp ITYPE, LJ_TNIL
3685 | je >2
3686 |1:
3687 | mov [BASE+RA*8], ITYPE
3688 | ins_next
3689 |
3690 |2: // Check for __index if table value is nil.
3691 | mov TAB:TMPR, TAB:RB->metatable
3692 | test TAB:TMPR, TAB:TMPR
3693 | jz <1
3694 | test byte TAB:TMPR->nomm, 1<<MM_index
3695 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3696 | jmp <1
3697 break;
3698 case BC_TGETR:
3699 | ins_ABC // RA = dst, RB = table, RC = key
3700 | mov TAB:RB, [BASE+RB*8]
3701 | cleartp TAB:RB
3702 |.if DUALNUM
3703 | mov RCd, dword [BASE+RC*8]
3704 |.else
3705 | cvttsd2si RCd, qword [BASE+RC*8]
3706 |.endif
3707 | cmp RCd, TAB:RB->asize
3708 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3709 | shl RCd, 3
3710 | add RC, TAB:RB->array
3711 | // Get array slot.
3712 |->BC_TGETR_Z:
3713 | mov ITYPE, [RC]
3714 |->BC_TGETR2_Z:
3715 | mov [BASE+RA*8], ITYPE
3716 | ins_next
3717 break;
3718
3719 case BC_TSETV:
3720 | ins_ABC // RA = src, RB = table, RC = key
3721 | mov TAB:RB, [BASE+RB*8]
3722 | mov RC, [BASE+RC*8]
3723 | checktab TAB:RB, ->vmeta_tsetv
3724 |
3725 | // Integer key?
3726 |.if DUALNUM
3727 | checkint RC, >5
3728 |.else
3729 | // Convert number to int and back and compare.
3730 | checknum RC, >5
3731 | movd xmm0, RC
3732 | cvttsd2si RCd, xmm0
3733 | cvtsi2sd xmm1, RCd
3734 | ucomisd xmm0, xmm1
3735 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3736 |.endif
3737 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3738 | jae ->vmeta_tsetv
3739 | shl RCd, 3
3740 | add RC, TAB:RB->array
3741 | cmp aword [RC], LJ_TNIL
3742 | je >3 // Previous value is nil?
3743 |1:
3744 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3745 | jnz >7
3746 |2: // Set array slot.
3747 | mov RB, [BASE+RA*8]
3748 | mov [RC], RB
3749 | ins_next
3750 |
3751 |3: // Check for __newindex if previous value is nil.
3752 | mov TAB:TMPR, TAB:RB->metatable
3753 | test TAB:TMPR, TAB:TMPR
3754 | jz <1
3755 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3756 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3757 | jmp <1
3758 |
3759 |5: // String key?
3760 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3761 | cleartp STR:RC
3762 | jmp ->BC_TSETS_Z
3763 |
3764 |7: // Possible table write barrier for the value. Skip valiswhite check.
3765 | barrierback TAB:RB, TMPR
3766 | jmp <2
3767 break;
3768 case BC_TSETS:
3769 | ins_ABC // RA = src, RB = table, RC = str const (~)
3770 | mov TAB:RB, [BASE+RB*8]
3771 | not RC
3772 | mov STR:RC, [KBASE+RC*8]
3773 | checktab TAB:RB, ->vmeta_tsets
3774 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3775 | mov TMPRd, TAB:RB->hmask
3776 | and TMPRd, STR:RC->hash
3777 | imul TMPRd, #NODE
3778 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3779 | add NODE:TMPR, TAB:RB->node
3780 | settp ITYPE, STR:RC, LJ_TSTR
3781 |1:
3782 | cmp NODE:TMPR->key, ITYPE
3783 | jne >5
3784 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3785 | cmp aword [TMPR], LJ_TNIL
3786 | je >4 // Previous value is nil?
3787 |2:
3788 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3789 | jnz >7
3790 |3: // Set node value.
3791 | mov ITYPE, [BASE+RA*8]
3792 | mov [TMPR], ITYPE
3793 | ins_next
3794 |
3795 |4: // Check for __newindex if previous value is nil.
3796 | mov TAB:ITYPE, TAB:RB->metatable
3797 | test TAB:ITYPE, TAB:ITYPE
3798 | jz <2
3799 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3800 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3801 | jmp <2
3802 |
3803 |5: // Follow hash chain.
3804 | mov NODE:TMPR, NODE:TMPR->next
3805 | test NODE:TMPR, NODE:TMPR
3806 | jnz <1
3807 | // End of hash chain: key not found, add a new one.
3808 |
3809 | // But check for __newindex first.
3810 | mov TAB:TMPR, TAB:RB->metatable
3811 | test TAB:TMPR, TAB:TMPR
3812 | jz >6 // No metatable: continue.
3813 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3814 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3815 |6:
3816 | mov TMP1, ITYPE
3817 | mov L:CARG1, SAVE_L
3818 | mov L:CARG1->base, BASE
3819 | lea CARG3, TMP1
3820 | mov CARG2, TAB:RB
3821 | mov SAVE_PC, PC
3822 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3823 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3824 | mov L:CARG1, SAVE_L
3825 | mov BASE, L:CARG1->base
3826 | mov TMPR, rax
3827 | movzx RA, PC_RA
3828 | jmp <2 // Must check write barrier for value.
3829 |
3830 |7: // Possible table write barrier for the value. Skip valiswhite check.
3831 | barrierback TAB:RB, ITYPE
3832 | jmp <3
3833 break;
3834 case BC_TSETB:
3835 | ins_ABC // RA = src, RB = table, RC = byte literal
3836 | mov TAB:RB, [BASE+RB*8]
3837 | checktab TAB:RB, ->vmeta_tsetb
3838 | cmp RCd, TAB:RB->asize
3839 | jae ->vmeta_tsetb
3840 | shl RCd, 3
3841 | add RC, TAB:RB->array
3842 | cmp aword [RC], LJ_TNIL
3843 | je >3 // Previous value is nil?
3844 |1:
3845 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3846 | jnz >7
3847 |2: // Set array slot.
3848 | mov ITYPE, [BASE+RA*8]
3849 | mov [RC], ITYPE
3850 | ins_next
3851 |
3852 |3: // Check for __newindex if previous value is nil.
3853 | mov TAB:TMPR, TAB:RB->metatable
3854 | test TAB:TMPR, TAB:TMPR
3855 | jz <1
3856 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3857 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3858 | jmp <1
3859 |
3860 |7: // Possible table write barrier for the value. Skip valiswhite check.
3861 | barrierback TAB:RB, TMPR
3862 | jmp <2
3863 break;
3864 case BC_TSETR:
3865 | ins_ABC // RA = src, RB = table, RC = key
3866 | mov TAB:RB, [BASE+RB*8]
3867 | cleartp TAB:RB
3868 |.if DUALNUM
3869 | mov RC, [BASE+RC*8]
3870 |.else
3871 | cvttsd2si RCd, qword [BASE+RC*8]
3872 |.endif
3873 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3874 | jnz >7
3875 |2:
3876 | cmp RCd, TAB:RB->asize
3877 | jae ->vmeta_tsetr
3878 | shl RCd, 3
3879 | add RC, TAB:RB->array
3880 | // Set array slot.
3881 |->BC_TSETR_Z:
3882 | mov ITYPE, [BASE+RA*8]
3883 | mov [RC], ITYPE
3884 | ins_next
3885 |
3886 |7: // Possible table write barrier for the value. Skip valiswhite check.
3887 | barrierback TAB:RB, TMPR
3888 | jmp <2
3889 break;
3890
3891 case BC_TSETM:
3892 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3893 |1:
3894 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3895 | lea RA, [BASE+RA*8]
3896 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3897 | cleartp TAB:RB
3898 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3899 | jnz >7
3900 |2:
3901 | mov RDd, MULTRES
3902 | sub RDd, 1
3903 | jz >4 // Nothing to copy?
3904 | add RDd, TMPRd // Compute needed size.
3905 | cmp RDd, TAB:RB->asize
3906 | ja >5 // Doesn't fit into array part?
3907 | sub RDd, TMPRd
3908 | shl TMPRd, 3
3909 | add TMPR, TAB:RB->array
3910 |3: // Copy result slots to table.
3911 | mov RB, [RA]
3912 | add RA, 8
3913 | mov [TMPR], RB
3914 | add TMPR, 8
3915 | sub RDd, 1
3916 | jnz <3
3917 |4:
3918 | ins_next
3919 |
3920 |5: // Need to resize array part.
3921 | mov L:CARG1, SAVE_L
3922 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3923 | mov CARG2, TAB:RB
3924 | mov CARG3d, RDd
3925 | mov L:RB, L:CARG1
3926 | mov SAVE_PC, PC
3927 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3928 | mov BASE, L:RB->base
3929 | movzx RAd, PC_RA // Restore RA.
3930 | movzx RDd, PC_RD // Restore RD.
3931 | jmp <1 // Retry.
3932 |
3933 |7: // Possible table write barrier for any value. Skip valiswhite check.
3934 | barrierback TAB:RB, RD
3935 | jmp <2
3936 break;
3937
3938 /* -- Calls and vararg handling ----------------------------------------- */
3939
3940 case BC_CALL: case BC_CALLM:
3941 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3942 if (op == BC_CALLM) {
3943 | add NARGS:RDd, MULTRES
3944 }
3945 | mov LFUNC:RB, [BASE+RA*8]
3946 | checkfunc LFUNC:RB, ->vmeta_call_ra
3947 | lea BASE, [BASE+RA*8+16]
3948 | ins_call
3949 break;
3950
3951 case BC_CALLMT:
3952 | ins_AD // RA = base, RD = extra_nargs
3953 | add NARGS:RDd, MULTRES
3954 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3955 break;
3956 case BC_CALLT:
3957 | ins_AD // RA = base, RD = nargs+1
3958 | lea RA, [BASE+RA*8+16]
3959 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3960 | mov LFUNC:RB, [RA-16]
3961 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3962 |->BC_CALLT_Z:
3963 | mov PC, [BASE-8]
3964 | test PCd, FRAME_TYPE
3965 | jnz >7
3966 |1:
3967 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
3968 | mov MULTRES, NARGS:RDd
3969 | sub NARGS:RDd, 1
3970 | jz >3
3971 |2: // Move args down.
3972 | mov RB, [RA]
3973 | add RA, 8
3974 | mov [KBASE], RB
3975 | add KBASE, 8
3976 | sub NARGS:RDd, 1
3977 | jnz <2
3978 |
3979 | mov LFUNC:RB, [BASE-16]
3980 |3:
3981 | cleartp LFUNC:RB
3982 | mov NARGS:RDd, MULTRES
3983 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
3984 | ja >5
3985 |4:
3986 | ins_callt
3987 |
3988 |5: // Tailcall to a fast function.
3989 | test PCd, FRAME_TYPE // Lua frame below?
3990 | jnz <4
3991 | movzx RAd, PC_RA
3992 | neg RA
3993 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
3994 | cleartp LFUNC:KBASE
3995 | mov KBASE, LFUNC:KBASE->pc
3996 | mov KBASE, [KBASE+PC2PROTO(k)]
3997 | jmp <4
3998 |
3999 |7: // Tailcall from a vararg function.
4000 | sub PC, FRAME_VARG
4001 | test PCd, FRAME_TYPEP
4002 | jnz >8 // Vararg frame below?
4003 | sub BASE, PC // Need to relocate BASE/KBASE down.
4004 | mov KBASE, BASE
4005 | mov PC, [BASE-8]
4006 | jmp <1
4007 |8:
4008 | add PCd, FRAME_VARG
4009 | jmp <1
4010 break;
4011
4012 case BC_ITERC:
4013 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4014 | lea RA, [BASE+RA*8+16] // fb = base+2
4015 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4016 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4017 | mov [RA], RB
4018 | mov [RA+8], RC
4019 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-1] = fb[-5]
4020 | mov [RA-16], LFUNC:RB
4021 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4022 | checkfunc LFUNC:RB, ->vmeta_call
4023 | mov BASE, RA
4024 | ins_call
4025 break;
4026
4027 case BC_ITERN:
4028 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4029 |.if JIT
4030 | // NYI: add hotloop, record BC_ITERN.
4031 |.endif
4032 | mov TAB:RB, [BASE+RA*8-16]
4033 | cleartp TAB:RB
4034 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4035 | mov TMPRd, TAB:RB->asize
4036 | add PC, 4
4037 | mov ITYPE, TAB:RB->array
4038 |1: // Traverse array part.
4039 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4040 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4041 |.if not DUALNUM
4042 | cvtsi2sd xmm0, RCd
4043 |.endif
4044 | // Copy array slot to returned value.
4045 | mov RB, [ITYPE+RC*8]
4046 | mov [BASE+RA*8+8], RB
4047 | // Return array index as a numeric key.
4048 |.if DUALNUM
4049 | setint ITYPE, RC
4050 | mov [BASE+RA*8], ITYPE
4051 |.else
4052 | movsd qword [BASE+RA*8], xmm0
4053 |.endif
4054 | add RCd, 1
4055 | mov [BASE+RA*8-8], RCd // Update control var.
4056 |2:
4057 | movzx RDd, PC_RD // Get target from ITERL.
4058 | branchPC RD
4059 |3:
4060 | ins_next
4061 |
4062 |4: // Skip holes in array part.
4063 | add RCd, 1
4064 | jmp <1
4065 |
4066 |5: // Traverse hash part.
4067 | sub RCd, TMPRd
4068 |6:
4069 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4070 | imul ITYPEd, RCd, #NODE
4071 | add NODE:ITYPE, TAB:RB->node
4072 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4073 | lea TMPRd, [RCd+TMPRd+1]
4074 | // Copy key and value from hash slot.
4075 | mov RB, NODE:ITYPE->key
4076 | mov RC, NODE:ITYPE->val
4077 | mov [BASE+RA*8], RB
4078 | mov [BASE+RA*8+8], RC
4079 | mov [BASE+RA*8-8], TMPRd
4080 | jmp <2
4081 |
4082 |7: // Skip holes in hash part.
4083 | add RCd, 1
4084 | jmp <6
4085 break;
4086
4087 case BC_ISNEXT:
4088 | ins_AD // RA = base, RD = target (points to ITERN)
4089 | mov CFUNC:RB, [BASE+RA*8-24]
4090 | checkfunc CFUNC:RB, >5
4091 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4092 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4093 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4094 | branchPC RD
4095 | mov64 TMPR, U64x(fffe7fff, 00000000)
4096 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4097 |1:
4098 | ins_next
4099 |5: // Despecialize bytecode if any of the checks fail.
4100 | mov PC_OP, BC_JMP
4101 | branchPC RD
4102 | mov byte [PC], BC_ITERC
4103 | jmp <1
4104 break;
4105
4106 case BC_VARG:
4107 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4108 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4109 | lea RA, [BASE+RA*8]
4110 | sub TMPR, [BASE-8]
4111 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4112 | test RB, RB
4113 | jz >5 // Copy all varargs?
4114 | lea RB, [RA+RB*8-8]
4115 | cmp TMPR, BASE // No vararg slots?
4116 | jnb >2
4117 |1: // Copy vararg slots to destination slots.
4118 | mov RC, [TMPR-16]
4119 | add TMPR, 8
4120 | mov [RA], RC
4121 | add RA, 8
4122 | cmp RA, RB // All destination slots filled?
4123 | jnb >3
4124 | cmp TMPR, BASE // No more vararg slots?
4125 | jb <1
4126 |2: // Fill up remainder with nil.
4127 | mov aword [RA], LJ_TNIL
4128 | add RA, 8
4129 | cmp RA, RB
4130 | jb <2
4131 |3:
4132 | ins_next
4133 |
4134 |5: // Copy all varargs.
4135 | mov MULTRES, 1 // MULTRES = 0+1
4136 | mov RC, BASE
4137 | sub RC, TMPR
4138 | jbe <3 // No vararg slots?
4139 | mov RBd, RCd
4140 | shr RBd, 3
4141 | add RBd, 1
4142 | mov MULTRES, RBd // MULTRES = #varargs+1
4143 | mov L:RB, SAVE_L
4144 | add RC, RA
4145 | cmp RC, L:RB->maxstack
4146 | ja >7 // Need to grow stack?
4147 |6: // Copy all vararg slots.
4148 | mov RC, [TMPR-16]
4149 | add TMPR, 8
4150 | mov [RA], RC
4151 | add RA, 8
4152 | cmp TMPR, BASE // No more vararg slots?
4153 | jb <6
4154 | jmp <3
4155 |
4156 |7: // Grow stack for varargs.
4157 | mov L:RB->base, BASE
4158 | mov L:RB->top, RA
4159 | mov SAVE_PC, PC
4160 | sub TMPR, BASE // Need delta, because BASE may change.
4161 | mov TMP1hi, TMPRd
4162 | mov CARG2d, MULTRES
4163 | sub CARG2d, 1
4164 | mov CARG1, L:RB
4165 | call extern lj_state_growstack // (lua_State *L, int n)
4166 | mov BASE, L:RB->base
4167 | movsxd TMPR, TMP1hi
4168 | mov RA, L:RB->top
4169 | add TMPR, BASE
4170 | jmp <6
4171 break;
4172
4173 /* -- Returns ----------------------------------------------------------- */
4174
4175 case BC_RETM:
4176 | ins_AD // RA = results, RD = extra_nresults
4177 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4178 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4179 break;
4180
4181 case BC_RET: case BC_RET0: case BC_RET1:
4182 | ins_AD // RA = results, RD = nresults+1
4183 if (op != BC_RET0) {
4184 | shl RAd, 3
4185 }
4186 |1:
4187 | mov PC, [BASE-8]
4188 | mov MULTRES, RDd // Save nresults+1.
4189 | test PCd, FRAME_TYPE // Check frame type marker.
4190 | jnz >7 // Not returning to a fixarg Lua func?
4191 switch (op) {
4192 case BC_RET:
4193 |->BC_RET_Z:
4194 | mov KBASE, BASE // Use KBASE for result move.
4195 | sub RDd, 1
4196 | jz >3
4197 |2: // Move results down.
4198 | mov RB, [KBASE+RA]
4199 | mov [KBASE-16], RB
4200 | add KBASE, 8
4201 | sub RDd, 1
4202 | jnz <2
4203 |3:
4204 | mov RDd, MULTRES // Note: MULTRES may be >255.
4205 | movzx RBd, PC_RB // So cannot compare with RDL!
4206 |5:
4207 | cmp RBd, RDd // More results expected?
4208 | ja >6
4209 break;
4210 case BC_RET1:
4211 | mov RB, [BASE+RA]
4212 | mov [BASE-16], RB
4213 /* fallthrough */
4214 case BC_RET0:
4215 |5:
4216 | cmp PC_RB, RDL // More results expected?
4217 | ja >6
4218 default:
4219 break;
4220 }
4221 | movzx RAd, PC_RA
4222 | neg RA
4223 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4224 | mov LFUNC:KBASE, [BASE-16]
4225 | cleartp LFUNC:KBASE
4226 | mov KBASE, LFUNC:KBASE->pc
4227 | mov KBASE, [KBASE+PC2PROTO(k)]
4228 | ins_next
4229 |
4230 |6: // Fill up results with nil.
4231 if (op == BC_RET) {
4232 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4233 | add KBASE, 8
4234 } else {
4235 | mov aword [BASE+RD*8-24], LJ_TNIL
4236 }
4237 | add RD, 1
4238 | jmp <5
4239 |
4240 |7: // Non-standard return case.
4241 | lea RB, [PC-FRAME_VARG]
4242 | test RBd, FRAME_TYPEP
4243 | jnz ->vm_return
4244 | // Return from vararg function: relocate BASE down and RA up.
4245 | sub BASE, RB
4246 if (op != BC_RET0) {
4247 | add RA, RB
4248 }
4249 | jmp <1
4250 break;
4251
4252 /* -- Loops and branches ------------------------------------------------ */
4253
4254 |.define FOR_IDX, [RA]
4255 |.define FOR_STOP, [RA+8]
4256 |.define FOR_STEP, [RA+16]
4257 |.define FOR_EXT, [RA+24]
4258
4259 case BC_FORL:
4260 |.if JIT
4261 | hotloop RBd
4262 |.endif
4263 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4264 break;
4265
4266 case BC_JFORI:
4267 case BC_JFORL:
4268#if !LJ_HASJIT
4269 break;
4270#endif
4271 case BC_FORI:
4272 case BC_IFORL:
4273 vk = (op == BC_IFORL || op == BC_JFORL);
4274 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4275 | lea RA, [BASE+RA*8]
4276 if (LJ_DUALNUM) {
4277 | mov RB, FOR_IDX
4278 | checkint RB, >9
4279 | mov TMPR, FOR_STOP
4280 if (!vk) {
4281 | checkint TMPR, ->vmeta_for
4282 | mov ITYPE, FOR_STEP
4283 | test ITYPEd, ITYPEd; js >5
4284 | sar ITYPE, 47;
4285 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4286 } else {
4287#ifdef LUA_USE_ASSERT
4288 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4289 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4290#endif
4291 | mov ITYPE, FOR_STEP
4292 | test ITYPEd, ITYPEd; js >5
4293 | add RBd, ITYPEd; jo >1
4294 | setint RB
4295 | mov FOR_IDX, RB
4296 }
4297 | cmp RBd, TMPRd
4298 | mov FOR_EXT, RB
4299 if (op == BC_FORI) {
4300 | jle >7
4301 |1:
4302 |6:
4303 | branchPC RD
4304 } else if (op == BC_JFORI) {
4305 | branchPC RD
4306 | movzx RDd, PC_RD
4307 | jle =>BC_JLOOP
4308 |1:
4309 |6:
4310 } else if (op == BC_IFORL) {
4311 | jg >7
4312 |6:
4313 | branchPC RD
4314 |1:
4315 } else {
4316 | jle =>BC_JLOOP
4317 |1:
4318 |6:
4319 }
4320 |7:
4321 | ins_next
4322 |
4323 |5: // Invert check for negative step.
4324 if (!vk) {
4325 | sar ITYPE, 47;
4326 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4327 } else {
4328 | add RBd, ITYPEd; jo <1
4329 | setint RB
4330 | mov FOR_IDX, RB
4331 }
4332 | cmp RBd, TMPRd
4333 | mov FOR_EXT, RB
4334 if (op == BC_FORI) {
4335 | jge <7
4336 } else if (op == BC_JFORI) {
4337 | branchPC RD
4338 | movzx RDd, PC_RD
4339 | jge =>BC_JLOOP
4340 } else if (op == BC_IFORL) {
4341 | jl <7
4342 } else {
4343 | jge =>BC_JLOOP
4344 }
4345 | jmp <6
4346 |9: // Fallback to FP variant.
4347 if (!vk) {
4348 | jae ->vmeta_for
4349 }
4350 } else if (!vk) {
4351 | checknumtp FOR_IDX, ->vmeta_for
4352 }
4353 if (!vk) {
4354 | checknumtp FOR_STOP, ->vmeta_for
4355 } else {
4356#ifdef LUA_USE_ASSERT
4357 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4358 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4359#endif
4360 }
4361 | mov RB, FOR_STEP
4362 if (!vk) {
4363 | checknum RB, ->vmeta_for
4364 }
4365 | movsd xmm0, qword FOR_IDX
4366 | movsd xmm1, qword FOR_STOP
4367 if (vk) {
4368 | addsd xmm0, qword FOR_STEP
4369 | movsd qword FOR_IDX, xmm0
4370 | test RB, RB; js >3
4371 } else {
4372 | jl >3
4373 }
4374 | ucomisd xmm1, xmm0
4375 |1:
4376 | movsd qword FOR_EXT, xmm0
4377 if (op == BC_FORI) {
4378 |.if DUALNUM
4379 | jnb <7
4380 |.else
4381 | jnb >2
4382 | branchPC RD
4383 |.endif
4384 } else if (op == BC_JFORI) {
4385 | branchPC RD
4386 | movzx RDd, PC_RD
4387 | jnb =>BC_JLOOP
4388 } else if (op == BC_IFORL) {
4389 |.if DUALNUM
4390 | jb <7
4391 |.else
4392 | jb >2
4393 | branchPC RD
4394 |.endif
4395 } else {
4396 | jnb =>BC_JLOOP
4397 }
4398 |.if DUALNUM
4399 | jmp <6
4400 |.else
4401 |2:
4402 | ins_next
4403 |.endif
4404 |
4405 |3: // Invert comparison if step is negative.
4406 | ucomisd xmm0, xmm1
4407 | jmp <1
4408 break;
4409
4410 case BC_ITERL:
4411 |.if JIT
4412 | hotloop RBd
4413 |.endif
4414 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4415 break;
4416
4417 case BC_JITERL:
4418#if !LJ_HASJIT
4419 break;
4420#endif
4421 case BC_IITERL:
4422 | ins_AJ // RA = base, RD = target
4423 | lea RA, [BASE+RA*8]
4424 | mov RB, [RA]
4425 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4426 if (op == BC_JITERL) {
4427 | mov [RA-8], RB
4428 | jmp =>BC_JLOOP
4429 } else {
4430 | branchPC RD // Otherwise save control var + branch.
4431 | mov [RA-8], RB
4432 }
4433 |1:
4434 | ins_next
4435 break;
4436
4437 case BC_LOOP:
4438 | ins_A // RA = base, RD = target (loop extent)
4439 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4440 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4441 |.if JIT
4442 | hotloop RBd
4443 |.endif
4444 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4445 break;
4446
4447 case BC_ILOOP:
4448 | ins_A // RA = base, RD = target (loop extent)
4449 | ins_next
4450 break;
4451
4452 case BC_JLOOP:
4453 |.if JIT
4454 | ins_AD // RA = base (ignored), RD = traceno
4455 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4456 | mov TRACE:RD, [RA+RD*8]
4457 | mov RD, TRACE:RD->mcode
4458 | mov L:RB, SAVE_L
4459 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4460 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4461 | // Save additional callee-save registers only used in compiled code.
4462 |.if X64WIN
4463 | mov CSAVE_4, r12
4464 | mov CSAVE_3, r13
4465 | mov CSAVE_2, r14
4466 | mov CSAVE_1, r15
4467 | mov RA, rsp
4468 | sub rsp, 10*16+4*8
4469 | movdqa [RA-1*16], xmm6
4470 | movdqa [RA-2*16], xmm7
4471 | movdqa [RA-3*16], xmm8
4472 | movdqa [RA-4*16], xmm9
4473 | movdqa [RA-5*16], xmm10
4474 | movdqa [RA-6*16], xmm11
4475 | movdqa [RA-7*16], xmm12
4476 | movdqa [RA-8*16], xmm13
4477 | movdqa [RA-9*16], xmm14
4478 | movdqa [RA-10*16], xmm15
4479 |.else
4480 | sub rsp, 16
4481 | mov [rsp+16], r12
4482 | mov [rsp+8], r13
4483 |.endif
4484 | jmp RD
4485 |.endif
4486 break;
4487
4488 case BC_JMP:
4489 | ins_AJ // RA = unused, RD = target
4490 | branchPC RD
4491 | ins_next
4492 break;
4493
4494 /* -- Function headers -------------------------------------------------- */
4495
4496 /*
4497 ** Reminder: A function may be called with func/args above L->maxstack,
4498 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4499 ** too. This means all FUNC* ops (including fast functions) must check
4500 ** for stack overflow _before_ adding more slots!
4501 */
4502
4503 case BC_FUNCF:
4504 |.if JIT
4505 | hotcall RBd
4506 |.endif
4507 case BC_FUNCV: /* NYI: compiled vararg functions. */
4508 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4509 break;
4510
4511 case BC_JFUNCF:
4512#if !LJ_HASJIT
4513 break;
4514#endif
4515 case BC_IFUNCF:
4516 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4517 | mov KBASE, [PC-4+PC2PROTO(k)]
4518 | mov L:RB, SAVE_L
4519 | lea RA, [BASE+RA*8] // Top of frame.
4520 | cmp RA, L:RB->maxstack
4521 | ja ->vm_growstack_f
4522 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4523 | cmp NARGS:RDd, RAd // Check for missing parameters.
4524 | jbe >3
4525 |2:
4526 if (op == BC_JFUNCF) {
4527 | movzx RDd, PC_RD
4528 | jmp =>BC_JLOOP
4529 } else {
4530 | ins_next
4531 }
4532 |
4533 |3: // Clear missing parameters.
4534 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4535 | add NARGS:RDd, 1
4536 | cmp NARGS:RDd, RAd
4537 | jbe <3
4538 | jmp <2
4539 break;
4540
4541 case BC_JFUNCV:
4542#if !LJ_HASJIT
4543 break;
4544#endif
4545 | int3 // NYI: compiled vararg functions
4546 break; /* NYI: compiled vararg functions. */
4547
4548 case BC_IFUNCV:
4549 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4550 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4551 | lea RD, [BASE+NARGS:RD*8+8]
4552 | mov LFUNC:KBASE, [BASE-16]
4553 | mov [RD-8], RB // Store delta + FRAME_VARG.
4554 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4555 | mov L:RB, SAVE_L
4556 | lea RA, [RD+RA*8]
4557 | cmp RA, L:RB->maxstack
4558 | ja ->vm_growstack_v // Need to grow stack.
4559 | mov RA, BASE
4560 | mov BASE, RD
4561 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4562 | test RBd, RBd
4563 | jz >2
4564 | add RA, 8
4565 |1: // Copy fixarg slots up to new frame.
4566 | add RA, 8
4567 | cmp RA, BASE
4568 | jnb >3 // Less args than parameters?
4569 | mov KBASE, [RA-16]
4570 | mov [RD], KBASE
4571 | add RD, 8
4572 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4573 | sub RBd, 1
4574 | jnz <1
4575 |2:
4576 if (op == BC_JFUNCV) {
4577 | movzx RDd, PC_RD
4578 | jmp =>BC_JLOOP
4579 } else {
4580 | mov KBASE, [PC-4+PC2PROTO(k)]
4581 | ins_next
4582 }
4583 |
4584 |3: // Clear missing parameters.
4585 | mov aword [RD], LJ_TNIL
4586 | add RD, 8
4587 | sub RBd, 1
4588 | jnz <3
4589 | jmp <2
4590 break;
4591
4592 case BC_FUNCC:
4593 case BC_FUNCCW:
4594 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4595 | mov CFUNC:RB, [BASE-16]
4596 | cleartp CFUNC:RB
4597 | mov KBASE, CFUNC:RB->f
4598 | mov L:RB, SAVE_L
4599 | lea RD, [BASE+NARGS:RD*8-8]
4600 | mov L:RB->base, BASE
4601 | lea RA, [RD+8*LUA_MINSTACK]
4602 | cmp RA, L:RB->maxstack
4603 | mov L:RB->top, RD
4604 if (op == BC_FUNCC) {
4605 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4606 } else {
4607 | mov CARG2, KBASE
4608 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4609 }
4610 | ja ->vm_growstack_c // Need to grow stack.
4611 | set_vmstate C
4612 if (op == BC_FUNCC) {
4613 | call KBASE // (lua_State *L)
4614 } else {
4615 | // (lua_State *L, lua_CFunction f)
4616 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4617 }
4618 | // nresults returned in eax (RD).
4619 | mov BASE, L:RB->base
4620 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4621 | set_vmstate INTERP
4622 | lea RA, [BASE+RD*8]
4623 | neg RA
4624 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4625 | mov PC, [BASE-8] // Fetch PC of caller.
4626 | jmp ->vm_returnc
4627 break;
4628
4629 /* ---------------------------------------------------------------------- */
4630
4631 default:
4632 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4633 exit(2);
4634 break;
4635 }
4636}
4637
4638static int build_backend(BuildCtx *ctx)
4639{
4640 int op;
4641 dasm_growpc(Dst, BC__MAX);
4642 build_subroutines(ctx);
4643 |.code_op
4644 for (op = 0; op < BC__MAX; op++)
4645 build_ins(ctx, (BCOp)op, op);
4646 return BC__MAX;
4647}
4648
4649/* Emit pseudo frame-info for all assembler functions. */
4650static void emit_asm_debug(BuildCtx *ctx)
4651{
4652 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4653 switch (ctx->mode) {
4654 case BUILD_elfasm:
4655 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4656 fprintf(ctx->fp,
4657 ".Lframe0:\n"
4658 "\t.long .LECIE0-.LSCIE0\n"
4659 ".LSCIE0:\n"
4660 "\t.long 0xffffffff\n"
4661 "\t.byte 0x1\n"
4662 "\t.string \"\"\n"
4663 "\t.uleb128 0x1\n"
4664 "\t.sleb128 -8\n"
4665 "\t.byte 0x10\n"
4666 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4667 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4668 "\t.align 8\n"
4669 ".LECIE0:\n\n");
4670 fprintf(ctx->fp,
4671 ".LSFDE0:\n"
4672 "\t.long .LEFDE0-.LASFDE0\n"
4673 ".LASFDE0:\n"
4674 "\t.long .Lframe0\n"
4675 "\t.quad .Lbegin\n"
4676 "\t.quad %d\n"
4677 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4678 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4679 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4680 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4681 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4682 "\t.align 8\n"
4683 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4684#if LJ_HASFFI
4685 fprintf(ctx->fp,
4686 ".LSFDE1:\n"
4687 "\t.long .LEFDE1-.LASFDE1\n"
4688 ".LASFDE1:\n"
4689 "\t.long .Lframe0\n"
4690 "\t.quad lj_vm_ffi_call\n"
4691 "\t.quad %d\n"
4692 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4693 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4694 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4695 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4696 "\t.align 8\n"
4697 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4698#endif
4699#if (defined(__sun__) && defined(__svr4__))
4700 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4701#else
4702 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4703#endif
4704 fprintf(ctx->fp,
4705 ".Lframe1:\n"
4706 "\t.long .LECIE1-.LSCIE1\n"
4707 ".LSCIE1:\n"
4708 "\t.long 0\n"
4709 "\t.byte 0x1\n"
4710 "\t.string \"zPR\"\n"
4711 "\t.uleb128 0x1\n"
4712 "\t.sleb128 -8\n"
4713 "\t.byte 0x10\n"
4714 "\t.uleb128 6\n" /* augmentation length */
4715 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4716 "\t.long lj_err_unwind_dwarf-.\n"
4717 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4718 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4719 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4720 "\t.align 8\n"
4721 ".LECIE1:\n\n");
4722 fprintf(ctx->fp,
4723 ".LSFDE2:\n"
4724 "\t.long .LEFDE2-.LASFDE2\n"
4725 ".LASFDE2:\n"
4726 "\t.long .LASFDE2-.Lframe1\n"
4727 "\t.long .Lbegin-.\n"
4728 "\t.long %d\n"
4729 "\t.uleb128 0\n" /* augmentation length */
4730 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4731 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4732 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4733 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4734 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4735 "\t.align 8\n"
4736 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4737#if LJ_HASFFI
4738 fprintf(ctx->fp,
4739 ".Lframe2:\n"
4740 "\t.long .LECIE2-.LSCIE2\n"
4741 ".LSCIE2:\n"
4742 "\t.long 0\n"
4743 "\t.byte 0x1\n"
4744 "\t.string \"zR\"\n"
4745 "\t.uleb128 0x1\n"
4746 "\t.sleb128 -8\n"
4747 "\t.byte 0x10\n"
4748 "\t.uleb128 1\n" /* augmentation length */
4749 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4750 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4751 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4752 "\t.align 8\n"
4753 ".LECIE2:\n\n");
4754 fprintf(ctx->fp,
4755 ".LSFDE3:\n"
4756 "\t.long .LEFDE3-.LASFDE3\n"
4757 ".LASFDE3:\n"
4758 "\t.long .LASFDE3-.Lframe2\n"
4759 "\t.long lj_vm_ffi_call-.\n"
4760 "\t.long %d\n"
4761 "\t.uleb128 0\n" /* augmentation length */
4762 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4763 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4764 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4765 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4766 "\t.align 8\n"
4767 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4768#endif
4769 break;
4770 /* Mental note: never let Apple design an assembler.
4771 ** Or a linker. Or a plastic case. But I digress.
4772 */
4773 case BUILD_machasm: {
4774#if LJ_HASFFI
4775 int fcsize = 0;
4776#endif
4777 int i;
4778 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4779 fprintf(ctx->fp,
4780 "EH_frame1:\n"
4781 "\t.set L$set$x,LECIEX-LSCIEX\n"
4782 "\t.long L$set$x\n"
4783 "LSCIEX:\n"
4784 "\t.long 0\n"
4785 "\t.byte 0x1\n"
4786 "\t.ascii \"zPR\\0\"\n"
4787 "\t.byte 0x1\n"
4788 "\t.byte 128-8\n"
4789 "\t.byte 0x10\n"
4790 "\t.byte 6\n" /* augmentation length */
4791 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4792 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4793 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4794 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4795 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4796 "\t.align 3\n"
4797 "LECIEX:\n\n");
4798 for (i = 0; i < ctx->nsym; i++) {
4799 const char *name = ctx->sym[i].name;
4800 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4801 if (size == 0) continue;
4802#if LJ_HASFFI
4803 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4804#endif
4805 fprintf(ctx->fp,
4806 "%s.eh:\n"
4807 "LSFDE%d:\n"
4808 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4809 "\t.long L$set$%d\n"
4810 "LASFDE%d:\n"
4811 "\t.long LASFDE%d-EH_frame1\n"
4812 "\t.long %s-.\n"
4813 "\t.long %d\n"
4814 "\t.byte 0\n" /* augmentation length */
4815 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4816 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4817 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4818 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4819 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4820 "\t.align 3\n"
4821 "LEFDE%d:\n\n",
4822 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4823 }
4824#if LJ_HASFFI
4825 if (fcsize) {
4826 fprintf(ctx->fp,
4827 "EH_frame2:\n"
4828 "\t.set L$set$y,LECIEY-LSCIEY\n"
4829 "\t.long L$set$y\n"
4830 "LSCIEY:\n"
4831 "\t.long 0\n"
4832 "\t.byte 0x1\n"
4833 "\t.ascii \"zR\\0\"\n"
4834 "\t.byte 0x1\n"
4835 "\t.byte 128-8\n"
4836 "\t.byte 0x10\n"
4837 "\t.byte 1\n" /* augmentation length */
4838 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4839 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4840 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4841 "\t.align 3\n"
4842 "LECIEY:\n\n");
4843 fprintf(ctx->fp,
4844 "_lj_vm_ffi_call.eh:\n"
4845 "LSFDEY:\n"
4846 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4847 "\t.long L$set$yy\n"
4848 "LASFDEY:\n"
4849 "\t.long LASFDEY-EH_frame2\n"
4850 "\t.long _lj_vm_ffi_call-.\n"
4851 "\t.long %d\n"
4852 "\t.byte 0\n" /* augmentation length */
4853 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4854 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4855 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4856 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4857 "\t.align 3\n"
4858 "LEFDEY:\n\n", fcsize);
4859 }
4860#endif
4861 fprintf(ctx->fp, ".subsections_via_symbols\n");
4862 }
4863 break;
4864 default: /* Difficult for other modes. */
4865 break;
4866 }
4867}
4868