aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x64.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x64.dasc')
-rw-r--r--src/vm_x64.dasc4909
1 files changed, 4909 insertions, 0 deletions
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..a5749b17
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4909 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146|.if NO_UNWIND
147| push r13; push r12
148|.endif
149| sub rsp, CFRAME_SPACE
150|.endmacro
151|.macro saveregs
152| push rbp; saveregs_
153|.endmacro
154|.macro restoreregs
155| add rsp, CFRAME_SPACE
156|.if NO_UNWIND
157| pop r12; pop r13
158|.endif
159| pop r14; pop r15; pop rbx; pop rbp
160|.endmacro
161|
162|//----- 16 byte aligned,
163|.if NO_UNWIND
164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165|.define SAVE_R4, aword [rsp+aword*10]
166|.define SAVE_R3, aword [rsp+aword*9]
167|.define SAVE_R2, aword [rsp+aword*8]
168|.define SAVE_R1, aword [rsp+aword*7]
169|.define SAVE_RU2, aword [rsp+aword*6]
170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171|.else
172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173|.define SAVE_R4, aword [rsp+aword*8]
174|.define SAVE_R3, aword [rsp+aword*7]
175|.define SAVE_R2, aword [rsp+aword*6]
176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177|.endif
178|.define SAVE_CFRAME, aword [rsp+aword*4]
179|.define SAVE_PC, aword [rsp+aword*3]
180|.define SAVE_L, aword [rsp+aword*2]
181|.define SAVE_ERRF, dword [rsp+dword*3]
182|.define SAVE_NRES, dword [rsp+dword*2]
183|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184|//----- 16 byte aligned
185|
186|.define TMP1d, dword [rsp]
187|.define TMP1hi, dword [rsp+dword*1]
188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189|
190|.endif
191|
192|//-----------------------------------------------------------------------
193|
194|// Instruction headers.
195|.macro ins_A; .endmacro
196|.macro ins_AD; .endmacro
197|.macro ins_AJ; .endmacro
198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199|.macro ins_AB_; movzx RBd, RCH; .endmacro
200|.macro ins_A_C; movzx RCd, RCL; .endmacro
201|.macro ins_AND; not RD; .endmacro
202|
203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204|.macro ins_NEXT
205| mov RCd, [PC]
206| movzx RAd, RCH
207| movzx OP, RCL
208| add PC, 4
209| shr RCd, 16
210| jmp aword [DISPATCH+OP*8]
211|.endmacro
212|
213|// Instruction footer.
214|.if 1
215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216| .define ins_next, ins_NEXT
217| .define ins_next_, ins_NEXT
218|.else
219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220| // Affects only certain kinds of benchmarks (and only with -j off).
221| // Around 10%-30% slower on Core2, a lot more slower on P4.
222| .macro ins_next
223| jmp ->ins_next
224| .endmacro
225| .macro ins_next_
226| ->ins_next:
227| ins_NEXT
228| .endmacro
229|.endif
230|
231|// Call decode and dispatch.
232|.macro ins_callt
233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234| mov PC, LFUNC:RB->pc
235| mov RAd, [PC]
236| movzx OP, RAL
237| movzx RAd, RAH
238| add PC, 4
239| jmp aword [DISPATCH+OP*8]
240|.endmacro
241|
242|.macro ins_call
243| // BASE = new base, RB = LFUNC, RD = nargs+1
244| mov [BASE-8], PC
245| ins_callt
246|.endmacro
247|
248|//-----------------------------------------------------------------------
249|
250|// Macros to clear or set tags.
251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252|.macro settp, reg, tp
253| mov64 ITYPE, ((uint64_t)tp<<47)
254| or reg, ITYPE
255|.endmacro
256|.macro settp, dst, reg, tp
257| mov64 dst, ((uint64_t)tp<<47)
258| or dst, reg
259|.endmacro
260|.macro setint, reg
261| settp reg, LJ_TISNUM
262|.endmacro
263|.macro setint, dst, reg
264| settp dst, reg, LJ_TISNUM
265|.endmacro
266|
267|// Macros to test operand types.
268|.macro checktp_nc, reg, tp, target
269| mov ITYPE, reg
270| sar ITYPE, 47
271| cmp ITYPEd, tp
272| jne target
273|.endmacro
274|.macro checktp, reg, tp, target
275| mov ITYPE, reg
276| cleartp reg
277| sar ITYPE, 47
278| cmp ITYPEd, tp
279| jne target
280|.endmacro
281|.macro checktptp, src, tp, target
282| mov ITYPE, src
283| sar ITYPE, 47
284| cmp ITYPEd, tp
285| jne target
286|.endmacro
287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290|
291|.macro checknumx, reg, target, jump
292| mov ITYPE, reg
293| sar ITYPE, 47
294| cmp ITYPEd, LJ_TISNUM
295| jump target
296|.endmacro
297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302|
303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305|
306|// These operands must be used with movzx.
307|.define PC_OP, byte [PC-4]
308|.define PC_RA, byte [PC-3]
309|.define PC_RB, byte [PC-1]
310|.define PC_RC, byte [PC-2]
311|.define PC_RD, word [PC-2]
312|
313|.macro branchPC, reg
314| lea PC, [PC+reg*4-BCBIAS_J*4]
315|.endmacro
316|
317|// Assumes DISPATCH is relative to GL.
318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320|
321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322|
323|// Decrement hashed hotcount and trigger trace recorder if zero.
324|.macro hotloop, reg
325| mov reg, PCd
326| shr reg, 1
327| and reg, HOTCOUNT_PCMASK
328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329| jb ->vm_hotloop
330|.endmacro
331|
332|.macro hotcall, reg
333| mov reg, PCd
334| shr reg, 1
335| and reg, HOTCOUNT_PCMASK
336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337| jb ->vm_hotcall
338|.endmacro
339|
340|// Set current VM state.
341|.macro set_vmstate, st
342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343|.endmacro
344|
345|.macro fpop1; fstp st1; .endmacro
346|
347|// Synthesize SSE FP constants.
348|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350|.endmacro
351|
352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353| mov64 tmp, U64x(val,00000000); movd reg, tmp
354|.endmacro
355|
356|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357| sseconst_hi reg, tmp, 80000000
358|.endmacro
359|.macro sseconst_1, reg, tmp // Synthesize 1.0.
360| sseconst_hi reg, tmp, 3ff00000
361|.endmacro
362|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363| sseconst_hi reg, tmp, bff00000
364|.endmacro
365|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366| sseconst_hi reg, tmp, 43300000
367|.endmacro
368|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369| sseconst_hi reg, tmp, 43380000
370|.endmacro
371|
372|// Move table write barrier back. Overwrites reg.
373|.macro barrierback, tab, reg
374| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377| mov tab->gclist, reg
378|.endmacro
379|
380|//-----------------------------------------------------------------------
381
382/* Generate subroutines used by opcodes and other parts of the VM. */
383/* The .code_sub section should be last to help static branch prediction. */
384static void build_subroutines(BuildCtx *ctx)
385{
386 |.code_sub
387 |
388 |//-----------------------------------------------------------------------
389 |//-- Return handling ----------------------------------------------------
390 |//-----------------------------------------------------------------------
391 |
392 |->vm_returnp:
393 | test PCd, FRAME_P
394 | jz ->cont_dispatch
395 |
396 | // Return from pcall or xpcall fast func.
397 | and PC, -8
398 | sub BASE, PC // Restore caller base.
399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400 | mov PC, [BASE-8] // Fetch PC of previous frame.
401 | // Prepending may overwrite the pcall frame, so do it at the end.
402 | mov_true ITYPE
403 | mov aword [BASE+RA], ITYPE // Prepend true to results.
404 |
405 |->vm_returnc:
406 | add RDd, 1 // RD = nresults+1
407 | jz ->vm_unwind_yield
408 | mov MULTRES, RDd
409 | test PC, FRAME_TYPE
410 | jz ->BC_RET_Z // Handle regular return to Lua.
411 |
412 |->vm_return:
413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
414 | xor PC, FRAME_C
415 | test PCd, FRAME_TYPE
416 | jnz ->vm_returnp
417 |
418 | // Return to C.
419 | set_vmstate C
420 | and PC, -8
421 | sub PC, BASE
422 | neg PC // Previous base = BASE - delta.
423 |
424 | sub RDd, 1
425 | jz >2
426 |1: // Move results down.
427 | mov RB, [BASE+RA]
428 | mov [BASE-16], RB
429 | add BASE, 8
430 | sub RDd, 1
431 | jnz <1
432 |2:
433 | mov L:RB, SAVE_L
434 | mov L:RB->base, PC
435 |3:
436 | mov RDd, MULTRES
437 | mov RAd, SAVE_NRES // RA = wanted nresults+1
438 |4:
439 | cmp RAd, RDd
440 | jne >6 // More/less results wanted?
441 |5:
442 | sub BASE, 16
443 | mov L:RB->top, BASE
444 |
445 |->vm_leave_cp:
446 | mov RA, SAVE_CFRAME // Restore previous C frame.
447 | mov L:RB->cframe, RA
448 | xor eax, eax // Ok return status for vm_pcall.
449 |
450 |->vm_leave_unw:
451 | restoreregs
452 | ret
453 |
454 |6:
455 | jb >7 // Less results wanted?
456 | // More results wanted. Check stack size and fill up results with nil.
457 | cmp BASE, L:RB->maxstack
458 | ja >8
459 | mov aword [BASE-16], LJ_TNIL
460 | add BASE, 8
461 | add RDd, 1
462 | jmp <4
463 |
464 |7: // Less results wanted.
465 | test RAd, RAd
466 | jz <5 // But check for LUA_MULTRET+1.
467 | sub RA, RD // Negative result!
468 | lea BASE, [BASE+RA*8] // Correct top.
469 | jmp <5
470 |
471 |8: // Corner case: need to grow stack for filling up results.
472 | // This can happen if:
473 | // - A C function grows the stack (a lot).
474 | // - The GC shrinks the stack in between.
475 | // - A return back from a lua_call() with (high) nresults adjustment.
476 | mov L:RB->top, BASE // Save current top held in BASE (yes).
477 | mov MULTRES, RDd // Need to fill only remainder with nil.
478 | mov CARG2d, RAd
479 | mov CARG1, L:RB
480 | call extern lj_state_growstack // (lua_State *L, int n)
481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
482 | jmp <3
483 |
484 |->vm_unwind_yield:
485 | mov al, LUA_YIELD
486 | jmp ->vm_unwind_c_eh
487 |
488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489 | // (void *cframe, int errcode)
490 | mov eax, CARG2d // Error return status for vm_pcall.
491 | mov rsp, CARG1
492 |->vm_unwind_c_eh: // Landing pad for external unwinder.
493 | mov L:RB, SAVE_L
494 | mov GL:RB, L:RB->glref
495 | mov dword GL:RB->vmstate, ~LJ_VMST_C
496 | jmp ->vm_leave_unw
497 |
498 |->vm_unwind_rethrow:
499 |.if not X64WIN
500 | mov CARG1, SAVE_L
501 | mov CARG2d, eax
502 | restoreregs
503 | jmp extern lj_err_throw // (lua_State *L, int errcode)
504 |.endif
505 |
506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
507 | // (void *cframe)
508 | and CARG1, CFRAME_RAWMASK
509 | mov rsp, CARG1
510 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
511 | mov L:RB, SAVE_L
512 | mov RDd, 1+1 // Really 1+2 results, incr. later.
513 | mov BASE, L:RB->base
514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515 | add DISPATCH, GG_G2DISP
516 | mov PC, [BASE-8] // Fetch PC of previous frame.
517 | mov_false RA
518 | mov RB, [BASE]
519 | mov [BASE-16], RA // Prepend false to error message.
520 | mov [BASE-8], RB
521 | mov RA, -16 // Results start at BASE+RA = BASE-16.
522 | set_vmstate INTERP
523 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
524 |
525 |//-----------------------------------------------------------------------
526 |//-- Grow stack for calls -----------------------------------------------
527 |//-----------------------------------------------------------------------
528 |
529 |->vm_growstack_c: // Grow stack for C function.
530 | mov CARG2d, LUA_MINSTACK
531 | jmp >2
532 |
533 |->vm_growstack_v: // Grow stack for vararg Lua function.
534 | sub RD, 16 // LJ_FR2
535 | jmp >1
536 |
537 |->vm_growstack_f: // Grow stack for fixarg Lua function.
538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539 | lea RD, [BASE+NARGS:RD*8-8]
540 |1:
541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542 | add PC, 4 // Must point after first instruction.
543 | mov L:RB->base, BASE
544 | mov L:RB->top, RD
545 | mov SAVE_PC, PC
546 | mov CARG2, RA
547 |2:
548 | // RB = L, L->base = new base, L->top = top
549 | mov CARG1, L:RB
550 | call extern lj_state_growstack // (lua_State *L, int n)
551 | mov BASE, L:RB->base
552 | mov RD, L:RB->top
553 | mov LFUNC:RB, [BASE-16]
554 | cleartp LFUNC:RB
555 | sub RD, BASE
556 | shr RDd, 3
557 | add NARGS:RDd, 1
558 | // BASE = new base, RB = LFUNC, RD = nargs+1
559 | ins_callt // Just retry the call.
560 |
561 |//-----------------------------------------------------------------------
562 |//-- Entry points into the assembler VM ---------------------------------
563 |//-----------------------------------------------------------------------
564 |
565 |->vm_resume: // Setup C frame and resume thread.
566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
567 | saveregs
568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
569 | mov SAVE_L, CARG1
570 | mov RA, CARG2
571 | mov PCd, FRAME_CP
572 | xor RDd, RDd
573 | lea KBASE, [esp+CFRAME_RESUME]
574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575 | add DISPATCH, GG_G2DISP
576 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577 | mov SAVE_CFRAME, RD
578 | mov SAVE_NRES, RDd
579 | mov SAVE_ERRF, RDd
580 | mov L:RB->cframe, KBASE
581 | cmp byte L:RB->status, RDL
582 | je >2 // Initial resume (like a call).
583 |
584 | // Resume after yield (like a return).
585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
586 | set_vmstate INTERP
587 | mov byte L:RB->status, RDL
588 | mov BASE, L:RB->base
589 | mov RD, L:RB->top
590 | sub RD, RA
591 | shr RDd, 3
592 | add RDd, 1 // RD = nresults+1
593 | sub RA, BASE // RA = resultofs
594 | mov PC, [BASE-8]
595 | mov MULTRES, RDd
596 | test PCd, FRAME_TYPE
597 | jz ->BC_RET_Z
598 | jmp ->vm_return
599 |
600 |->vm_pcall: // Setup protected C frame and enter VM.
601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
602 | saveregs
603 | mov PCd, FRAME_CP
604 | mov SAVE_ERRF, CARG4d
605 | jmp >1
606 |
607 |->vm_call: // Setup C frame and enter VM.
608 | // (lua_State *L, TValue *base, int nres1)
609 | saveregs
610 | mov PCd, FRAME_C
611 |
612 |1: // Entry point for vm_pcall above (PC = ftype).
613 | mov SAVE_NRES, CARG3d
614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
615 | mov SAVE_L, CARG1
616 | mov RA, CARG2
617 |
618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620 | mov SAVE_CFRAME, KBASE
621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622 | add DISPATCH, GG_G2DISP
623 | mov L:RB->cframe, rsp
624 |
625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
627 | set_vmstate INTERP
628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
629 | add PC, RA
630 | sub PC, BASE // PC = frame delta + frame type
631 |
632 | mov RD, L:RB->top
633 | sub RD, RA
634 | shr NARGS:RDd, 3
635 | add NARGS:RDd, 1 // RD = nargs+1
636 |
637 |->vm_call_dispatch:
638 | mov LFUNC:RB, [RA-16]
639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
640 |
641 |->vm_call_dispatch_f:
642 | mov BASE, RA
643 | ins_call
644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
645 |
646 |->vm_cpcall: // Setup protected C frame, call C.
647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
648 | saveregs
649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
650 | mov SAVE_L, CARG1
651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
652 |
653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654 | sub KBASE, L:RB->top
655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656 | mov SAVE_ERRF, 0 // No error function.
657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658 | add DISPATCH, GG_G2DISP
659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
660 |
661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662 | mov SAVE_CFRAME, KBASE
663 | mov L:RB->cframe, rsp
664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
665 |
666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667 | // TValue * (new base) or NULL returned in eax (RC).
668 | test RC, RC
669 | jz ->vm_leave_cp // No base? Just remove C frame.
670 | mov RA, RC
671 | mov PCd, FRAME_CP
672 | jmp <2 // Else continue with the call.
673 |
674 |//-----------------------------------------------------------------------
675 |//-- Metamethod handling ------------------------------------------------
676 |//-----------------------------------------------------------------------
677 |
678 |//-- Continuation dispatch ----------------------------------------------
679 |
680 |->cont_dispatch:
681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
682 | add RA, BASE
683 | and PC, -8
684 | mov RB, BASE
685 | sub BASE, PC // Restore caller BASE.
686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687 | mov RC, RA // ... in [RC]
688 | mov PC, [RB-24] // Restore PC from [cont|PC].
689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
690 |.if FFI
691 | cmp RA, 1
692 | jbe >1
693 |.endif
694 | mov LFUNC:KBASE, [BASE-16]
695 | cleartp LFUNC:KBASE
696 | mov KBASE, LFUNC:KBASE->pc
697 | mov KBASE, [KBASE+PC2PROTO(k)]
698 | // BASE = base, RC = result, RB = meta base
699 | jmp RA // Jump to continuation.
700 |
701 |.if FFI
702 |1:
703 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704 | // cont = 0: Tail call from C function.
705 | sub RB, BASE
706 | shr RBd, 3
707 | lea RDd, [RBd-3]
708 | jmp ->vm_call_tail
709 |.endif
710 |
711 |->cont_cat: // BASE = base, RC = result, RB = mbase
712 | movzx RAd, PC_RB
713 | sub RB, 32
714 | lea RA, [BASE+RA*8]
715 | sub RA, RB
716 | je ->cont_ra
717 | neg RA
718 | shr RAd, 3
719 |.if X64WIN
720 | mov CARG3d, RAd
721 | mov L:CARG1, SAVE_L
722 | mov L:CARG1->base, BASE
723 | mov RC, [RC]
724 | mov [RB], RC
725 | mov CARG2, RB
726 |.else
727 | mov L:CARG1, SAVE_L
728 | mov L:CARG1->base, BASE
729 | mov CARG3d, RAd
730 | mov RA, [RC]
731 | mov [RB], RA
732 | mov CARG2, RB
733 |.endif
734 | jmp ->BC_CAT_Z
735 |
736 |//-- Table indexing metamethods -----------------------------------------
737 |
738 |->vmeta_tgets:
739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
740 | mov TMP1, STR:RC
741 | lea RC, TMP1
742 | cmp PC_OP, BC_GGET
743 | jne >1
744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
746 | mov [RB], TAB:RA
747 | jmp >2
748 |
749 |->vmeta_tgetb:
750 | movzx RCd, PC_RC
751 |.if DUALNUM
752 | setint RC
753 | mov TMP1, RC
754 |.else
755 | cvtsi2sd xmm0, RCd
756 | movsd TMP1, xmm0
757 |.endif
758 | lea RC, TMP1
759 | jmp >1
760 |
761 |->vmeta_tgetv:
762 | movzx RCd, PC_RC // Reload TValue *k from RC.
763 | lea RC, [BASE+RC*8]
764 |1:
765 | movzx RBd, PC_RB // Reload TValue *t from RB.
766 | lea RB, [BASE+RB*8]
767 |2:
768 | mov L:CARG1, SAVE_L
769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
770 | mov CARG2, RB
771 | mov CARG3, RC
772 | mov L:RB, L:CARG1
773 | mov SAVE_PC, PC
774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776 | mov BASE, L:RB->base
777 | test RC, RC
778 | jz >3
779 |->cont_ra: // BASE = base, RC = result
780 | movzx RAd, PC_RA
781 | mov RB, [RC]
782 | mov [BASE+RA*8], RB
783 | ins_next
784 |
785 |3: // Call __index metamethod.
786 | // BASE = base, L->top = new base, stack = cont/func/t/k
787 | mov RA, L:RB->top
788 | mov [RA-24], PC // [cont|PC]
789 | lea PC, [RA+FRAME_CONT]
790 | sub PC, BASE
791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
793 | cleartp LFUNC:RB
794 | jmp ->vm_call_dispatch_f
795 |
796 |->vmeta_tgetr:
797 | mov CARG1, TAB:RB
798 | mov RB, BASE // Save BASE.
799 | mov CARG2d, RCd // Caveat: CARG2 == BASE
800 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801 | // cTValue * or NULL returned in eax (RC).
802 | movzx RAd, PC_RA
803 | mov BASE, RB // Restore BASE.
804 | test RC, RC
805 | jnz ->BC_TGETR_Z
806 | mov ITYPE, LJ_TNIL
807 | jmp ->BC_TGETR2_Z
808 |
809 |//-----------------------------------------------------------------------
810 |
811 |->vmeta_tsets:
812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
813 | mov TMP1, STR:RC
814 | lea RC, TMP1
815 | cmp PC_OP, BC_GSET
816 | jne >1
817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
819 | mov [RB], TAB:RA
820 | jmp >2
821 |
822 |->vmeta_tsetb:
823 | movzx RCd, PC_RC
824 |.if DUALNUM
825 | setint RC
826 | mov TMP1, RC
827 |.else
828 | cvtsi2sd xmm0, RCd
829 | movsd TMP1, xmm0
830 |.endif
831 | lea RC, TMP1
832 | jmp >1
833 |
834 |->vmeta_tsetv:
835 | movzx RCd, PC_RC // Reload TValue *k from RC.
836 | lea RC, [BASE+RC*8]
837 |1:
838 | movzx RBd, PC_RB // Reload TValue *t from RB.
839 | lea RB, [BASE+RB*8]
840 |2:
841 | mov L:CARG1, SAVE_L
842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
843 | mov CARG2, RB
844 | mov CARG3, RC
845 | mov L:RB, L:CARG1
846 | mov SAVE_PC, PC
847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849 | mov BASE, L:RB->base
850 | test RC, RC
851 | jz >3
852 | // NOBARRIER: lj_meta_tset ensures the table is not black.
853 | movzx RAd, PC_RA
854 | mov RB, [BASE+RA*8]
855 | mov [RC], RB
856 |->cont_nop: // BASE = base, (RC = result)
857 | ins_next
858 |
859 |3: // Call __newindex metamethod.
860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
861 | mov RA, L:RB->top
862 | mov [RA-24], PC // [cont|PC]
863 | movzx RCd, PC_RA
864 | // Copy value to third argument.
865 | mov RB, [BASE+RC*8]
866 | mov [RA+16], RB
867 | lea PC, [RA+FRAME_CONT]
868 | sub PC, BASE
869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
871 | cleartp LFUNC:RB
872 | jmp ->vm_call_dispatch_f
873 |
874 |->vmeta_tsetr:
875 |.if X64WIN
876 | mov L:CARG1, SAVE_L
877 | mov CARG3d, RCd
878 | mov L:CARG1->base, BASE
879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
880 |.else
881 | mov L:CARG1, SAVE_L
882 | mov CARG2, TAB:RB
883 | mov L:CARG1->base, BASE
884 | mov RB, BASE // Save BASE.
885 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
886 |.endif
887 | mov SAVE_PC, PC
888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889 | // TValue * returned in eax (RC).
890 | movzx RAd, PC_RA
891 | mov BASE, RB // Restore BASE.
892 | jmp ->BC_TSETR_Z
893 |
894 |//-- Comparison metamethods ---------------------------------------------
895 |
896 |->vmeta_comp:
897 | movzx RDd, PC_RD
898 | movzx RAd, PC_RA
899 | mov L:RB, SAVE_L
900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
901 |.if X64WIN
902 | lea CARG3, [BASE+RD*8]
903 | lea CARG2, [BASE+RA*8]
904 |.else
905 | lea CARG2, [BASE+RA*8]
906 | lea CARG3, [BASE+RD*8]
907 |.endif
908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909 | movzx CARG4d, PC_OP
910 | mov SAVE_PC, PC
911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912 | // 0/1 or TValue * (metamethod) returned in eax (RC).
913 |3:
914 | mov BASE, L:RB->base
915 | cmp RC, 1
916 | ja ->vmeta_binop
917 |4:
918 | lea PC, [PC+4]
919 | jb >6
920 |5:
921 | movzx RDd, PC_RD
922 | branchPC RD
923 |6:
924 | ins_next
925 |
926 |->cont_condt: // BASE = base, RC = result
927 | add PC, 4
928 | mov ITYPE, [RC]
929 | sar ITYPE, 47
930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
931 | jb <5
932 | jmp <6
933 |
934 |->cont_condf: // BASE = base, RC = result
935 | mov ITYPE, [RC]
936 | sar ITYPE, 47
937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
938 | jmp <4
939 |
940 |->vmeta_equal:
941 | cleartp TAB:RD
942 | sub PC, 4
943 |.if X64WIN
944 | mov CARG3, RD
945 | mov CARG4d, RBd
946 | mov L:RB, SAVE_L
947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
948 | mov CARG2, RA
949 | mov CARG1, L:RB // Caveat: CARG1 == RA.
950 |.else
951 | mov CARG2, RA
952 | mov CARG4d, RBd // Caveat: CARG4 == RA.
953 | mov L:RB, SAVE_L
954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
955 | mov CARG3, RD
956 | mov CARG1, L:RB
957 |.endif
958 | mov SAVE_PC, PC
959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960 | // 0/1 or TValue * (metamethod) returned in eax (RC).
961 | jmp <3
962 |
963 |->vmeta_equal_cd:
964 |.if FFI
965 | sub PC, 4
966 | mov L:RB, SAVE_L
967 | mov L:RB->base, BASE
968 | mov CARG1, L:RB
969 | mov CARG2d, dword [PC-4]
970 | mov SAVE_PC, PC
971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972 | // 0/1 or TValue * (metamethod) returned in eax (RC).
973 | jmp <3
974 |.endif
975 |
976 |->vmeta_istype:
977 | mov L:RB, SAVE_L
978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
979 | mov CARG2d, RAd
980 | mov CARG3d, RDd
981 | mov L:CARG1, L:RB
982 | mov SAVE_PC, PC
983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984 | mov BASE, L:RB->base
985 | jmp <6
986 |
987 |//-- Arithmetic metamethods ---------------------------------------------
988 |
989 |->vmeta_arith_vno:
990 |.if DUALNUM
991 | movzx RBd, PC_RB
992 | movzx RCd, PC_RC
993 |.endif
994 |->vmeta_arith_vn:
995 | lea RC, [KBASE+RC*8]
996 | jmp >1
997 |
998 |->vmeta_arith_nvo:
999 |.if DUALNUM
1000 | movzx RBd, PC_RB
1001 | movzx RCd, PC_RC
1002 |.endif
1003 |->vmeta_arith_nv:
1004 | lea TMPR, [KBASE+RC*8]
1005 | lea RC, [BASE+RB*8]
1006 | mov RB, TMPR
1007 | jmp >2
1008 |
1009 |->vmeta_unm:
1010 | lea RC, [BASE+RD*8]
1011 | mov RB, RC
1012 | jmp >2
1013 |
1014 |->vmeta_arith_vvo:
1015 |.if DUALNUM
1016 | movzx RBd, PC_RB
1017 | movzx RCd, PC_RC
1018 |.endif
1019 |->vmeta_arith_vv:
1020 | lea RC, [BASE+RC*8]
1021 |1:
1022 | lea RB, [BASE+RB*8]
1023 |2:
1024 | lea RA, [BASE+RA*8]
1025 |.if X64WIN
1026 | mov CARG3, RB
1027 | mov CARG4, RC
1028 | movzx RCd, PC_OP
1029 | mov ARG5d, RCd
1030 | mov L:RB, SAVE_L
1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1032 | mov CARG2, RA
1033 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1034 |.else
1035 | movzx CARG5d, PC_OP
1036 | mov CARG2, RA
1037 | mov CARG4, RC // Caveat: CARG4 == RA.
1038 | mov L:CARG1, SAVE_L
1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1040 | mov CARG3, RB
1041 | mov L:RB, L:CARG1
1042 |.endif
1043 | mov SAVE_PC, PC
1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046 | mov BASE, L:RB->base
1047 | test RC, RC
1048 | jz ->cont_nop
1049 |
1050 | // Call metamethod for binary op.
1051 |->vmeta_binop:
1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1053 | mov RA, RC
1054 | sub RC, BASE
1055 | mov [RA-24], PC // [cont|PC]
1056 | lea PC, [RC+FRAME_CONT]
1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058 | jmp ->vm_call_dispatch
1059 |
1060 |->vmeta_len:
1061 | movzx RDd, PC_RD
1062 | mov L:RB, SAVE_L
1063 | mov L:RB->base, BASE
1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1065 | mov L:CARG1, L:RB
1066 | mov SAVE_PC, PC
1067 | call extern lj_meta_len // (lua_State *L, TValue *o)
1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069 | mov BASE, L:RB->base
1070#if LJ_52
1071 | test RC, RC
1072 | jne ->vmeta_binop // Binop call for compatibility.
1073 | movzx RDd, PC_RD
1074 | mov TAB:CARG1, [BASE+RD*8]
1075 | cleartp TAB:CARG1
1076 | jmp ->BC_LEN_Z
1077#else
1078 | jmp ->vmeta_binop // Binop call for compatibility.
1079#endif
1080 |
1081 |//-- Call metamethod ----------------------------------------------------
1082 |
1083 |->vmeta_call_ra:
1084 | lea RA, [BASE+RA*8+16]
1085 |->vmeta_call: // Resolve and call __call metamethod.
1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1088 | mov RB, RA
1089 |.if X64WIN
1090 | mov L:TMPR, SAVE_L
1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092 | lea CARG2, [RA-16]
1093 | lea CARG3, [RA+NARGS:RD*8-8]
1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1095 |.else
1096 | mov L:CARG1, SAVE_L
1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098 | lea CARG2, [RA-16]
1099 | lea CARG3, [RA+NARGS:RD*8-8]
1100 |.endif
1101 | mov SAVE_PC, PC
1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1103 | mov RA, RB
1104 | mov L:RB, SAVE_L
1105 | mov BASE, L:RB->base
1106 | mov NARGS:RDd, TMP1d
1107 | mov LFUNC:RB, [RA-16]
1108 | add NARGS:RDd, 1
1109 | // This is fragile. L->base must not move, KBASE must always be defined.
1110 | cmp KBASE, BASE // Continue with CALLT if flag set.
1111 | je ->BC_CALLT_Z
1112 | cleartp LFUNC:RB
1113 | mov BASE, RA
1114 | ins_call // Otherwise call resolved metamethod.
1115 |
1116 |//-- Argument coercion for 'for' statement ------------------------------
1117 |
1118 |->vmeta_for:
1119 | mov L:RB, SAVE_L
1120 | mov L:RB->base, BASE
1121 | mov CARG2, RA // Caveat: CARG2 == BASE
1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1123 | mov SAVE_PC, PC
1124 | call extern lj_meta_for // (lua_State *L, TValue *base)
1125 | mov BASE, L:RB->base
1126 | mov RCd, [PC-4]
1127 | movzx RAd, RCH
1128 | movzx OP, RCL
1129 | shr RCd, 16
1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1131 |
1132 |//-----------------------------------------------------------------------
1133 |//-- Fast functions -----------------------------------------------------
1134 |//-----------------------------------------------------------------------
1135 |
1136 |.macro .ffunc, name
1137 |->ff_ .. name:
1138 |.endmacro
1139 |
1140 |.macro .ffunc_1, name
1141 |->ff_ .. name:
1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1143 |.endmacro
1144 |
1145 |.macro .ffunc_2, name
1146 |->ff_ .. name:
1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1148 |.endmacro
1149 |
1150 |.macro .ffunc_n, name, op
1151 | .ffunc_1 name
1152 | checknumtp [BASE], ->fff_fallback
1153 | op xmm0, qword [BASE]
1154 |.endmacro
1155 |
1156 |.macro .ffunc_n, name
1157 | .ffunc_n name, movsd
1158 |.endmacro
1159 |
1160 |.macro .ffunc_nn, name
1161 | .ffunc_2 name
1162 | checknumtp [BASE], ->fff_fallback
1163 | checknumtp [BASE+8], ->fff_fallback
1164 | movsd xmm0, qword [BASE]
1165 | movsd xmm1, qword [BASE+8]
1166 |.endmacro
1167 |
1168 |// Inlined GC threshold check. Caveat: uses label 1.
1169 |.macro ffgccheck
1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1172 | jb >1
1173 | call ->fff_gcstep
1174 |1:
1175 |.endmacro
1176 |
1177 |//-- Base library: checks -----------------------------------------------
1178 |
1179 |.ffunc_1 assert
1180 | mov ITYPE, [BASE]
1181 | mov RB, ITYPE
1182 | sar ITYPE, 47
1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1184 | mov PC, [BASE-8]
1185 | mov MULTRES, RDd
1186 | mov RB, [BASE]
1187 | mov [BASE-16], RB
1188 | sub RDd, 2
1189 | jz >2
1190 | mov RA, BASE
1191 |1:
1192 | add RA, 8
1193 | mov RB, [RA]
1194 | mov [RA-16], RB
1195 | sub RDd, 1
1196 | jnz <1
1197 |2:
1198 | mov RDd, MULTRES
1199 | jmp ->fff_res_
1200 |
1201 |.ffunc_1 type
1202 | mov RC, [BASE]
1203 | sar RC, 47
1204 | mov RBd, LJ_TISNUM
1205 | cmp RCd, RBd
1206 | cmovb RCd, RBd
1207 | not RCd
1208 |2:
1209 | mov CFUNC:RB, [BASE-16]
1210 | cleartp CFUNC:RB
1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1212 | mov PC, [BASE-8]
1213 | settp STR:RC, LJ_TSTR
1214 | mov [BASE-16], STR:RC
1215 | jmp ->fff_res1
1216 |
1217 |//-- Base library: getters and setters ---------------------------------
1218 |
1219 |.ffunc_1 getmetatable
1220 | mov TAB:RB, [BASE]
1221 | mov PC, [BASE-8]
1222 | checktab TAB:RB, >6
1223 |1: // Field metatable must be at same offset for GCtab and GCudata!
1224 | mov TAB:RB, TAB:RB->metatable
1225 |2:
1226 | test TAB:RB, TAB:RB
1227 | mov aword [BASE-16], LJ_TNIL
1228 | jz ->fff_res1
1229 | settp TAB:RC, TAB:RB, LJ_TTAB
1230 | mov [BASE-16], TAB:RC // Store metatable as default result.
1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232 | mov RAd, TAB:RB->hmask
1233 | and RAd, STR:RC->hash
1234 | settp STR:RC, LJ_TSTR
1235 | imul RAd, #NODE
1236 | add NODE:RA, TAB:RB->node
1237 |3: // Rearranged logic, because we expect _not_ to find the key.
1238 | cmp NODE:RA->key, STR:RC
1239 | je >5
1240 |4:
1241 | mov NODE:RA, NODE:RA->next
1242 | test NODE:RA, NODE:RA
1243 | jnz <3
1244 | jmp ->fff_res1 // Not found, keep default result.
1245 |5:
1246 | mov RB, NODE:RA->val
1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248 | mov [BASE-16], RB // Return value of mt.__metatable.
1249 | jmp ->fff_res1
1250 |
1251 |6:
1252 | cmp ITYPEd, LJ_TUDATA; je <1
1253 | cmp ITYPEd, LJ_TISNUM; ja >7
1254 | mov ITYPEd, LJ_TISNUM
1255 |7:
1256 | not ITYPEd
1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1258 | jmp <2
1259 |
1260 |.ffunc_2 setmetatable
1261 | mov TAB:RB, [BASE]
1262 | mov TAB:TMPR, TAB:RB
1263 | checktab TAB:RB, ->fff_fallback
1264 | // Fast path: no mt for table yet and not clearing the mt.
1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266 | mov TAB:RA, [BASE+8]
1267 | checktab TAB:RA, ->fff_fallback
1268 | mov TAB:RB->metatable, TAB:RA
1269 | mov PC, [BASE-8]
1270 | mov [BASE-16], TAB:TMPR // Return original table.
1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1272 | jz >1
1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274 | barrierback TAB:RB, RC
1275 |1:
1276 | jmp ->fff_res1
1277 |
1278 |.ffunc_2 rawget
1279 |.if X64WIN
1280 | mov TAB:RA, [BASE]
1281 | checktab TAB:RA, ->fff_fallback
1282 | mov RB, BASE // Save BASE.
1283 | lea CARG3, [BASE+8]
1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1285 | mov CARG1, SAVE_L
1286 |.else
1287 | mov TAB:CARG2, [BASE]
1288 | checktab TAB:CARG2, ->fff_fallback
1289 | mov RB, BASE // Save BASE.
1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1291 | mov CARG1, SAVE_L
1292 |.endif
1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294 | // cTValue * returned in eax (RD).
1295 | mov BASE, RB // Restore BASE.
1296 | // Copy table slot.
1297 | mov RB, [RD]
1298 | mov PC, [BASE-8]
1299 | mov [BASE-16], RB
1300 | jmp ->fff_res1
1301 |
1302 |//-- Base library: conversions ------------------------------------------
1303 |
1304 |.ffunc tonumber
1305 | // Only handles the number case inline (without a base argument).
1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1307 | mov RB, [BASE]
1308 | checknumber RB, ->fff_fallback
1309 | mov PC, [BASE-8]
1310 | mov [BASE-16], RB
1311 | jmp ->fff_res1
1312 |
1313 |.ffunc_1 tostring
1314 | // Only handles the string or number case inline.
1315 | mov PC, [BASE-8]
1316 | mov STR:RB, [BASE]
1317 | checktp_nc STR:RB, LJ_TSTR, >3
1318 | // A __tostring method in the string base metatable is ignored.
1319 |2:
1320 | mov [BASE-16], STR:RB
1321 | jmp ->fff_res1
1322 |3: // Handle numbers inline, unless a number base metatable is present.
1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325 | jne ->fff_fallback
1326 | ffgccheck // Caveat: uses label 1.
1327 | mov L:RB, SAVE_L
1328 | mov L:RB->base, BASE // Add frame since C call can throw.
1329 | mov SAVE_PC, PC // Redundant (but a defined value).
1330 |.if not X64WIN
1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1332 |.endif
1333 | mov L:CARG1, L:RB
1334 |.if DUALNUM
1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1336 |.else
1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1338 |.endif
1339 | // GCstr returned in eax (RD).
1340 | mov BASE, L:RB->base
1341 | settp STR:RB, RD, LJ_TSTR
1342 | jmp <2
1343 |
1344 |//-- Base library: iterators -------------------------------------------
1345 |
1346 |.ffunc_1 next
1347 | je >2 // Missing 2nd arg?
1348 |1:
1349 |.if X64WIN
1350 | mov RA, [BASE]
1351 | checktab RA, ->fff_fallback
1352 |.else
1353 | mov CARG2, [BASE]
1354 | checktab CARG2, ->fff_fallback
1355 |.endif
1356 | mov L:RB, SAVE_L
1357 | mov L:RB->base, BASE // Add frame since C call can throw.
1358 | mov L:RB->top, BASE // Dummy frame length is ok.
1359 | mov PC, [BASE-8]
1360 |.if X64WIN
1361 | lea CARG3, [BASE+8]
1362 | mov CARG2, RA // Caveat: CARG2 == BASE.
1363 | mov CARG1, L:RB
1364 |.else
1365 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1366 | mov CARG1, L:RB
1367 |.endif
1368 | mov SAVE_PC, PC // Needed for ITERN fallback.
1369 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1370 | // Flag returned in eax (RD).
1371 | mov BASE, L:RB->base
1372 | test RDd, RDd; jz >3 // End of traversal?
1373 | // Copy key and value to results.
1374 | mov RB, [BASE+8]
1375 | mov RD, [BASE+16]
1376 | mov [BASE-16], RB
1377 | mov [BASE-8], RD
1378 |->fff_res2:
1379 | mov RDd, 1+2
1380 | jmp ->fff_res
1381 |2: // Set missing 2nd arg to nil.
1382 | mov aword [BASE+8], LJ_TNIL
1383 | jmp <1
1384 |3: // End of traversal: return nil.
1385 | mov aword [BASE-16], LJ_TNIL
1386 | jmp ->fff_res1
1387 |
1388 |.ffunc_1 pairs
1389 | mov TAB:RB, [BASE]
1390 | mov TMPR, TAB:RB
1391 | checktab TAB:RB, ->fff_fallback
1392#if LJ_52
1393 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1394#endif
1395 | mov CFUNC:RD, [BASE-16]
1396 | cleartp CFUNC:RD
1397 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1398 | settp CFUNC:RD, LJ_TFUNC
1399 | mov PC, [BASE-8]
1400 | mov [BASE-16], CFUNC:RD
1401 | mov [BASE-8], TMPR
1402 | mov aword [BASE], LJ_TNIL
1403 | mov RDd, 1+3
1404 | jmp ->fff_res
1405 |
1406 |.ffunc_2 ipairs_aux
1407 | mov TAB:RB, [BASE]
1408 | checktab TAB:RB, ->fff_fallback
1409 |.if DUALNUM
1410 | mov RA, [BASE+8]
1411 | checkint RA, ->fff_fallback
1412 |.else
1413 | checknumtp [BASE+8], ->fff_fallback
1414 | movsd xmm0, qword [BASE+8]
1415 |.endif
1416 | mov PC, [BASE-8]
1417 |.if DUALNUM
1418 | add RAd, 1
1419 | setint ITYPE, RA
1420 | mov [BASE-16], ITYPE
1421 |.else
1422 | sseconst_1 xmm1, TMPR
1423 | addsd xmm0, xmm1
1424 | cvttsd2si RAd, xmm0
1425 | movsd qword [BASE-16], xmm0
1426 |.endif
1427 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1428 | mov RD, TAB:RB->array
1429 | lea RD, [RD+RA*8]
1430 |1:
1431 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1432 | // Copy array slot.
1433 | mov RB, [RD]
1434 | mov [BASE-8], RB
1435 | jmp ->fff_res2
1436 |2: // Check for empty hash part first. Otherwise call C function.
1437 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1438 |.if X64WIN
1439 | mov TMPR, BASE
1440 | mov CARG2d, RAd
1441 | mov CARG1, TAB:RB
1442 | mov RB, TMPR
1443 |.else
1444 | mov CARG1, TAB:RB
1445 | mov RB, BASE // Save BASE.
1446 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1447 |.endif
1448 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1449 | // cTValue * or NULL returned in eax (RD).
1450 | mov BASE, RB
1451 | test RD, RD
1452 | jnz <1
1453 |->fff_res0:
1454 | mov RDd, 1+0
1455 | jmp ->fff_res
1456 |
1457 |.ffunc_1 ipairs
1458 | mov TAB:RB, [BASE]
1459 | mov TMPR, TAB:RB
1460 | checktab TAB:RB, ->fff_fallback
1461#if LJ_52
1462 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1463#endif
1464 | mov CFUNC:RD, [BASE-16]
1465 | cleartp CFUNC:RD
1466 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1467 | settp CFUNC:RD, LJ_TFUNC
1468 | mov PC, [BASE-8]
1469 | mov [BASE-16], CFUNC:RD
1470 | mov [BASE-8], TMPR
1471 |.if DUALNUM
1472 | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
1473 | mov [BASE], RD
1474 |.else
1475 | mov qword [BASE], 0
1476 |.endif
1477 | mov RDd, 1+3
1478 | jmp ->fff_res
1479 |
1480 |//-- Base library: catch errors ----------------------------------------
1481 |
1482 |.ffunc_1 pcall
1483 | lea RA, [BASE+16]
1484 | sub NARGS:RDd, 1
1485 | mov PCd, 16+FRAME_PCALL
1486 |1:
1487 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1488 | shr RB, HOOK_ACTIVE_SHIFT
1489 | and RB, 1
1490 | add PC, RB // Remember active hook before pcall.
1491 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1492 | mov KBASE, RD
1493 |2:
1494 | mov RB, [RA+KBASE*8-24]
1495 | mov [RA+KBASE*8-16], RB
1496 | sub KBASE, 1
1497 | ja <2
1498 | jmp ->vm_call_dispatch
1499 |
1500 |.ffunc_2 xpcall
1501 | mov LFUNC:RA, [BASE+8]
1502 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1503 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1504 | mov [BASE], LFUNC:RA
1505 | mov [BASE+8], LFUNC:RB
1506 | lea RA, [BASE+24]
1507 | sub NARGS:RDd, 2
1508 | mov PCd, 24+FRAME_PCALL
1509 | jmp <1
1510 |
1511 |//-- Coroutine library --------------------------------------------------
1512 |
1513 |.macro coroutine_resume_wrap, resume
1514 |.if resume
1515 |.ffunc_1 coroutine_resume
1516 | mov L:RB, [BASE]
1517 | cleartp L:RB
1518 |.else
1519 |.ffunc coroutine_wrap_aux
1520 | mov CFUNC:RB, [BASE-16]
1521 | cleartp CFUNC:RB
1522 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1523 | cleartp L:RB
1524 |.endif
1525 | mov PC, [BASE-8]
1526 | mov SAVE_PC, PC
1527 | mov TMP1, L:RB
1528 |.if resume
1529 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1530 |.endif
1531 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1532 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1533 | mov RA, L:RB->top
1534 | je >1 // Status != LUA_YIELD (i.e. 0)?
1535 | cmp RA, L:RB->base // Check for presence of initial func.
1536 | je ->fff_fallback
1537 | mov PC, [RA-8] // Move initial function up.
1538 | mov [RA], PC
1539 | add RA, 8
1540 |1:
1541 |.if resume
1542 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1543 |.else
1544 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1545 |.endif
1546 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1547 | mov L:RB->top, PC
1548 |
1549 | mov L:RB, SAVE_L
1550 | mov L:RB->base, BASE
1551 |.if resume
1552 | add BASE, 8 // Keep resumed thread in stack for GC.
1553 |.endif
1554 | mov L:RB->top, BASE
1555 |.if resume
1556 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1557 |.else
1558 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1559 |.endif
1560 | sub RB, PC // Relative to PC.
1561 |
1562 | cmp PC, RA
1563 | je >3
1564 |2: // Move args to coroutine.
1565 | mov RC, [PC+RB]
1566 | mov [PC-8], RC
1567 | sub PC, 8
1568 | cmp PC, RA
1569 | jne <2
1570 |3:
1571 | mov CARG2, RA
1572 | mov CARG1, TMP1
1573 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1574 |
1575 | mov L:RB, SAVE_L
1576 | mov L:PC, TMP1
1577 | mov BASE, L:RB->base
1578 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1579 | set_vmstate INTERP
1580 |
1581 | cmp eax, LUA_YIELD
1582 | ja >8
1583 |4:
1584 | mov RA, L:PC->base
1585 | mov KBASE, L:PC->top
1586 | mov L:PC->top, RA // Clear coroutine stack.
1587 | mov PC, KBASE
1588 | sub PC, RA
1589 | je >6 // No results?
1590 | lea RD, [BASE+PC]
1591 | shr PCd, 3
1592 | cmp RD, L:RB->maxstack
1593 | ja >9 // Need to grow stack?
1594 |
1595 | mov RB, BASE
1596 | sub RB, RA
1597 |5: // Move results from coroutine.
1598 | mov RD, [RA]
1599 | mov [RA+RB], RD
1600 | add RA, 8
1601 | cmp RA, KBASE
1602 | jne <5
1603 |6:
1604 |.if resume
1605 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1606 | mov_true ITYPE // Prepend true to results.
1607 | mov [BASE-8], ITYPE
1608 |.else
1609 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1610 |.endif
1611 |7:
1612 | mov PC, SAVE_PC
1613 | mov MULTRES, RDd
1614 |.if resume
1615 | mov RA, -8
1616 |.else
1617 | xor RAd, RAd
1618 |.endif
1619 | test PCd, FRAME_TYPE
1620 | jz ->BC_RET_Z
1621 | jmp ->vm_return
1622 |
1623 |8: // Coroutine returned with error (at co->top-1).
1624 |.if resume
1625 | mov_false ITYPE // Prepend false to results.
1626 | mov [BASE-8], ITYPE
1627 | mov RA, L:PC->top
1628 | sub RA, 8
1629 | mov L:PC->top, RA // Clear error from coroutine stack.
1630 | // Copy error message.
1631 | mov RD, [RA]
1632 | mov [BASE], RD
1633 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1634 | jmp <7
1635 |.else
1636 | mov CARG2, L:PC
1637 | mov CARG1, L:RB
1638 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1639 | // Error function does not return.
1640 |.endif
1641 |
1642 |9: // Handle stack expansion on return from yield.
1643 | mov L:RA, TMP1
1644 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1645 | mov CARG2, PC
1646 | mov CARG1, L:RB
1647 | call extern lj_state_growstack // (lua_State *L, int n)
1648 | mov L:PC, TMP1
1649 | mov BASE, L:RB->base
1650 | jmp <4 // Retry the stack move.
1651 |.endmacro
1652 |
1653 | coroutine_resume_wrap 1 // coroutine.resume
1654 | coroutine_resume_wrap 0 // coroutine.wrap
1655 |
1656 |.ffunc coroutine_yield
1657 | mov L:RB, SAVE_L
1658 | test aword L:RB->cframe, CFRAME_RESUME
1659 | jz ->fff_fallback
1660 | mov L:RB->base, BASE
1661 | lea RD, [BASE+NARGS:RD*8-8]
1662 | mov L:RB->top, RD
1663 | xor RDd, RDd
1664 | mov aword L:RB->cframe, RD
1665 | mov al, LUA_YIELD
1666 | mov byte L:RB->status, al
1667 | jmp ->vm_leave_unw
1668 |
1669 |//-- Math library -------------------------------------------------------
1670 |
1671 | .ffunc_1 math_abs
1672 | mov RB, [BASE]
1673 |.if DUALNUM
1674 | checkint RB, >3
1675 | cmp RBd, 0; jns ->fff_resi
1676 | neg RBd; js >2
1677 |->fff_resbit:
1678 |->fff_resi:
1679 | setint RB
1680 |->fff_resRB:
1681 | mov PC, [BASE-8]
1682 | mov [BASE-16], RB
1683 | jmp ->fff_res1
1684 |2:
1685 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1686 | jmp ->fff_resRB
1687 |3:
1688 | ja ->fff_fallback
1689 |.else
1690 | checknum RB, ->fff_fallback
1691 |.endif
1692 | shl RB, 1
1693 | shr RB, 1
1694 | mov PC, [BASE-8]
1695 | mov [BASE-16], RB
1696 | jmp ->fff_res1
1697 |
1698 |.ffunc_n math_sqrt, sqrtsd
1699 |->fff_resxmm0:
1700 | mov PC, [BASE-8]
1701 | movsd qword [BASE-16], xmm0
1702 | // fallthrough
1703 |
1704 |->fff_res1:
1705 | mov RDd, 1+1
1706 |->fff_res:
1707 | mov MULTRES, RDd
1708 |->fff_res_:
1709 | test PCd, FRAME_TYPE
1710 | jnz >7
1711 |5:
1712 | cmp PC_RB, RDL // More results expected?
1713 | ja >6
1714 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1715 | movzx RAd, PC_RA
1716 | neg RA
1717 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1718 | ins_next
1719 |
1720 |6: // Fill up results with nil.
1721 | mov aword [BASE+RD*8-24], LJ_TNIL
1722 | add RD, 1
1723 | jmp <5
1724 |
1725 |7: // Non-standard return case.
1726 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1727 | jmp ->vm_return
1728 |
1729 |.macro math_round, func
1730 | .ffunc math_ .. func
1731 |.if DUALNUM
1732 | mov RB, [BASE]
1733 | checknumx RB, ->fff_resRB, je
1734 | ja ->fff_fallback
1735 |.else
1736 | checknumtp [BASE], ->fff_fallback
1737 |.endif
1738 | movsd xmm0, qword [BASE]
1739 | call ->vm_ .. func .. _sse
1740 |.if DUALNUM
1741 | cvttsd2si RBd, xmm0
1742 | cmp RBd, 0x80000000
1743 | jne ->fff_resi
1744 | cvtsi2sd xmm1, RBd
1745 | ucomisd xmm0, xmm1
1746 | jp ->fff_resxmm0
1747 | je ->fff_resi
1748 |.endif
1749 | jmp ->fff_resxmm0
1750 |.endmacro
1751 |
1752 | math_round floor
1753 | math_round ceil
1754 |
1755 |.ffunc math_log
1756 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1757 | checknumtp [BASE], ->fff_fallback
1758 | movsd xmm0, qword [BASE]
1759 | mov RB, BASE
1760 | call extern log
1761 | mov BASE, RB
1762 | jmp ->fff_resxmm0
1763 |
1764 |.macro math_extern, func
1765 | .ffunc_n math_ .. func
1766 | mov RB, BASE
1767 | call extern func
1768 | mov BASE, RB
1769 | jmp ->fff_resxmm0
1770 |.endmacro
1771 |
1772 |.macro math_extern2, func
1773 | .ffunc_nn math_ .. func
1774 | mov RB, BASE
1775 | call extern func
1776 | mov BASE, RB
1777 | jmp ->fff_resxmm0
1778 |.endmacro
1779 |
1780 | math_extern log10
1781 | math_extern exp
1782 | math_extern sin
1783 | math_extern cos
1784 | math_extern tan
1785 | math_extern asin
1786 | math_extern acos
1787 | math_extern atan
1788 | math_extern sinh
1789 | math_extern cosh
1790 | math_extern tanh
1791 | math_extern2 pow
1792 | math_extern2 atan2
1793 | math_extern2 fmod
1794 |
1795 |.ffunc_2 math_ldexp
1796 | checknumtp [BASE], ->fff_fallback
1797 | checknumtp [BASE+8], ->fff_fallback
1798 | fld qword [BASE+8]
1799 | fld qword [BASE]
1800 | fscale
1801 | fpop1
1802 | mov PC, [BASE-8]
1803 | fstp qword [BASE-16]
1804 | jmp ->fff_res1
1805 |
1806 |.ffunc_n math_frexp
1807 | mov RB, BASE
1808 |.if X64WIN
1809 | lea CARG2, TMP1 // Caveat: CARG2 == BASE
1810 |.else
1811 | lea CARG1, TMP1
1812 |.endif
1813 | call extern frexp
1814 | mov BASE, RB
1815 | mov RBd, TMP1d
1816 | mov PC, [BASE-8]
1817 | movsd qword [BASE-16], xmm0
1818 |.if DUALNUM
1819 | setint RB
1820 | mov [BASE-8], RB
1821 |.else
1822 | cvtsi2sd xmm1, RBd
1823 | movsd qword [BASE-8], xmm1
1824 |.endif
1825 | mov RDd, 1+2
1826 | jmp ->fff_res
1827 |
1828 |.ffunc_n math_modf
1829 | mov RB, BASE
1830 |.if X64WIN
1831 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
1832 |.else
1833 | lea CARG1, [BASE-16]
1834 |.endif
1835 | call extern modf
1836 | mov BASE, RB
1837 | mov PC, [BASE-8]
1838 | movsd qword [BASE-8], xmm0
1839 | mov RDd, 1+2
1840 | jmp ->fff_res
1841 |
1842 |.macro math_minmax, name, cmovop, sseop
1843 | .ffunc name
1844 | mov RAd, 2
1845 |.if DUALNUM
1846 | mov RB, [BASE]
1847 | checkint RB, >4
1848 |1: // Handle integers.
1849 | cmp RAd, RDd; jae ->fff_resRB
1850 | mov TMPR, [BASE+RA*8-8]
1851 | checkint TMPR, >3
1852 | cmp RBd, TMPRd
1853 | cmovop RB, TMPR
1854 | add RAd, 1
1855 | jmp <1
1856 |3:
1857 | ja ->fff_fallback
1858 | // Convert intermediate result to number and continue below.
1859 | cvtsi2sd xmm0, RBd
1860 | jmp >6
1861 |4:
1862 | ja ->fff_fallback
1863 |.else
1864 | checknumtp [BASE], ->fff_fallback
1865 |.endif
1866 |
1867 | movsd xmm0, qword [BASE]
1868 |5: // Handle numbers or integers.
1869 | cmp RAd, RDd; jae ->fff_resxmm0
1870 |.if DUALNUM
1871 | mov RB, [BASE+RA*8-8]
1872 | checknumx RB, >6, jb
1873 | ja ->fff_fallback
1874 | cvtsi2sd xmm1, RBd
1875 | jmp >7
1876 |.else
1877 | checknumtp [BASE+RA*8-8], ->fff_fallback
1878 |.endif
1879 |6:
1880 | movsd xmm1, qword [BASE+RA*8-8]
1881 |7:
1882 | sseop xmm0, xmm1
1883 | add RAd, 1
1884 | jmp <5
1885 |.endmacro
1886 |
1887 | math_minmax math_min, cmovg, minsd
1888 | math_minmax math_max, cmovl, maxsd
1889 |
1890 |//-- String library -----------------------------------------------------
1891 |
1892 |.ffunc string_byte // Only handle the 1-arg case here.
1893 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1894 | mov STR:RB, [BASE]
1895 | checkstr STR:RB, ->fff_fallback
1896 | mov PC, [BASE-8]
1897 | cmp dword STR:RB->len, 1
1898 | jb ->fff_res0 // Return no results for empty string.
1899 | movzx RBd, byte STR:RB[1]
1900 |.if DUALNUM
1901 | jmp ->fff_resi
1902 |.else
1903 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1904 |.endif
1905 |
1906 |.ffunc string_char // Only handle the 1-arg case here.
1907 | ffgccheck
1908 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1909 |.if DUALNUM
1910 | mov RB, [BASE]
1911 | checkint RB, ->fff_fallback
1912 |.else
1913 | checknumtp [BASE], ->fff_fallback
1914 | cvttsd2si RBd, qword [BASE]
1915 |.endif
1916 | cmp RBd, 255; ja ->fff_fallback
1917 | mov TMP1d, RBd
1918 | mov TMPRd, 1
1919 | lea RD, TMP1 // Points to stack. Little-endian.
1920 |->fff_newstr:
1921 | mov L:RB, SAVE_L
1922 | mov L:RB->base, BASE
1923 | mov CARG3d, TMPRd // Zero-extended to size_t.
1924 | mov CARG2, RD
1925 | mov CARG1, L:RB
1926 | mov SAVE_PC, PC
1927 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1928 |->fff_resstr:
1929 | // GCstr * returned in eax (RD).
1930 | mov BASE, L:RB->base
1931 | mov PC, [BASE-8]
1932 | settp STR:RD, LJ_TSTR
1933 | mov [BASE-16], STR:RD
1934 | jmp ->fff_res1
1935 |
1936 |.ffunc string_sub
1937 | ffgccheck
1938 | mov TMPRd, -1
1939 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1940 | jna >1
1941 |.if DUALNUM
1942 | mov TMPR, [BASE+16]
1943 | checkint TMPR, ->fff_fallback
1944 |.else
1945 | checknumtp [BASE+16], ->fff_fallback
1946 | cvttsd2si TMPRd, qword [BASE+16]
1947 |.endif
1948 |1:
1949 | mov STR:RB, [BASE]
1950 | checkstr STR:RB, ->fff_fallback
1951 |.if DUALNUM
1952 | mov ITYPE, [BASE+8]
1953 | mov RAd, ITYPEd // Must clear hiword for lea below.
1954 | sar ITYPE, 47
1955 | cmp ITYPEd, LJ_TISNUM
1956 | jne ->fff_fallback
1957 |.else
1958 | checknumtp [BASE+8], ->fff_fallback
1959 | cvttsd2si RAd, qword [BASE+8]
1960 |.endif
1961 | mov RCd, STR:RB->len
1962 | cmp RCd, TMPRd // len < end? (unsigned compare)
1963 | jb >5
1964 |2:
1965 | test RAd, RAd // start <= 0?
1966 | jle >7
1967 |3:
1968 | sub TMPRd, RAd // start > end?
1969 | jl ->fff_emptystr
1970 | lea RD, [STR:RB+RAd+#STR-1]
1971 | add TMPRd, 1
1972 |4:
1973 | jmp ->fff_newstr
1974 |
1975 |5: // Negative end or overflow.
1976 | jl >6
1977 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1978 | jmp <2
1979 |6: // Overflow.
1980 | mov TMPRd, RCd // end = len
1981 | jmp <2
1982 |
1983 |7: // Negative start or underflow.
1984 | je >8
1985 | add RAd, RCd // start = start+(len+1)
1986 | add RAd, 1
1987 | jg <3 // start > 0?
1988 |8: // Underflow.
1989 | mov RAd, 1 // start = 1
1990 | jmp <3
1991 |
1992 |->fff_emptystr: // Range underflow.
1993 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1994 | jmp <4
1995 |
1996 |.macro ffstring_op, name
1997 | .ffunc_1 string_ .. name
1998 | ffgccheck
1999 |.if X64WIN
2000 | mov STR:TMPR, [BASE]
2001 | checkstr STR:TMPR, ->fff_fallback
2002 |.else
2003 | mov STR:CARG2, [BASE]
2004 | checkstr STR:CARG2, ->fff_fallback
2005 |.endif
2006 | mov L:RB, SAVE_L
2007 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2008 | mov L:RB->base, BASE
2009 |.if X64WIN
2010 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2011 |.endif
2012 | mov RC, SBUF:CARG1->b
2013 | mov SBUF:CARG1->L, L:RB
2014 | mov SBUF:CARG1->p, RC
2015 | mov SAVE_PC, PC
2016 | call extern lj_buf_putstr_ .. name
2017 | mov CARG1, rax
2018 | call extern lj_buf_tostr
2019 | jmp ->fff_resstr
2020 |.endmacro
2021 |
2022 |ffstring_op reverse
2023 |ffstring_op lower
2024 |ffstring_op upper
2025 |
2026 |//-- Bit library --------------------------------------------------------
2027 |
2028 |.macro .ffunc_bit, name, kind, fdef
2029 | fdef name
2030 |.if kind == 2
2031 | sseconst_tobit xmm1, RB
2032 |.endif
2033 |.if DUALNUM
2034 | mov RB, [BASE]
2035 | checkint RB, >1
2036 |.if kind > 0
2037 | jmp >2
2038 |.else
2039 | jmp ->fff_resbit
2040 |.endif
2041 |1:
2042 | ja ->fff_fallback
2043 | movd xmm0, RB
2044 |.else
2045 | checknumtp [BASE], ->fff_fallback
2046 | movsd xmm0, qword [BASE]
2047 |.endif
2048 |.if kind < 2
2049 | sseconst_tobit xmm1, RB
2050 |.endif
2051 | addsd xmm0, xmm1
2052 | movd RBd, xmm0
2053 |2:
2054 |.endmacro
2055 |
2056 |.macro .ffunc_bit, name, kind
2057 | .ffunc_bit name, kind, .ffunc_1
2058 |.endmacro
2059 |
2060 |.ffunc_bit bit_tobit, 0
2061 | jmp ->fff_resbit
2062 |
2063 |.macro .ffunc_bit_op, name, ins
2064 | .ffunc_bit name, 2
2065 | mov TMPRd, NARGS:RDd // Save for fallback.
2066 | lea RD, [BASE+NARGS:RD*8-16]
2067 |1:
2068 | cmp RD, BASE
2069 | jbe ->fff_resbit
2070 |.if DUALNUM
2071 | mov RA, [RD]
2072 | checkint RA, >2
2073 | ins RBd, RAd
2074 | sub RD, 8
2075 | jmp <1
2076 |2:
2077 | ja ->fff_fallback_bit_op
2078 | movd xmm0, RA
2079 |.else
2080 | checknumtp [RD], ->fff_fallback_bit_op
2081 | movsd xmm0, qword [RD]
2082 |.endif
2083 | addsd xmm0, xmm1
2084 | movd RAd, xmm0
2085 | ins RBd, RAd
2086 | sub RD, 8
2087 | jmp <1
2088 |.endmacro
2089 |
2090 |.ffunc_bit_op bit_band, and
2091 |.ffunc_bit_op bit_bor, or
2092 |.ffunc_bit_op bit_bxor, xor
2093 |
2094 |.ffunc_bit bit_bswap, 1
2095 | bswap RBd
2096 | jmp ->fff_resbit
2097 |
2098 |.ffunc_bit bit_bnot, 1
2099 | not RBd
2100 |.if DUALNUM
2101 | jmp ->fff_resbit
2102 |.else
2103 |->fff_resbit:
2104 | cvtsi2sd xmm0, RBd
2105 | jmp ->fff_resxmm0
2106 |.endif
2107 |
2108 |->fff_fallback_bit_op:
2109 | mov NARGS:RDd, TMPRd // Restore for fallback
2110 | jmp ->fff_fallback
2111 |
2112 |.macro .ffunc_bit_sh, name, ins
2113 |.if DUALNUM
2114 | .ffunc_bit name, 1, .ffunc_2
2115 | // Note: no inline conversion from number for 2nd argument!
2116 | mov RA, [BASE+8]
2117 | checkint RA, ->fff_fallback
2118 |.else
2119 | .ffunc_nn name
2120 | sseconst_tobit xmm2, RB
2121 | addsd xmm0, xmm2
2122 | addsd xmm1, xmm2
2123 | movd RBd, xmm0
2124 | movd RAd, xmm1
2125 |.endif
2126 | ins RBd, cl // Assumes RA is ecx.
2127 | jmp ->fff_resbit
2128 |.endmacro
2129 |
2130 |.ffunc_bit_sh bit_lshift, shl
2131 |.ffunc_bit_sh bit_rshift, shr
2132 |.ffunc_bit_sh bit_arshift, sar
2133 |.ffunc_bit_sh bit_rol, rol
2134 |.ffunc_bit_sh bit_ror, ror
2135 |
2136 |//-----------------------------------------------------------------------
2137 |
2138 |->fff_fallback_2:
2139 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2140 | jmp ->fff_fallback
2141 |->fff_fallback_1:
2142 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2143 |->fff_fallback: // Call fast function fallback handler.
2144 | // BASE = new base, RD = nargs+1
2145 | mov L:RB, SAVE_L
2146 | mov PC, [BASE-8] // Fallback may overwrite PC.
2147 | mov SAVE_PC, PC // Redundant (but a defined value).
2148 | mov L:RB->base, BASE
2149 | lea RD, [BASE+NARGS:RD*8-8]
2150 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2151 | mov L:RB->top, RD
2152 | mov CFUNC:RD, [BASE-16]
2153 | cleartp CFUNC:RD
2154 | cmp RA, L:RB->maxstack
2155 | ja >5 // Need to grow stack.
2156 | mov CARG1, L:RB
2157 | call aword CFUNC:RD->f // (lua_State *L)
2158 | mov BASE, L:RB->base
2159 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2160 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2161 |1:
2162 | mov RA, L:RB->top
2163 | sub RA, BASE
2164 | shr RAd, 3
2165 | test RDd, RDd
2166 | lea NARGS:RDd, [RAd+1]
2167 | mov LFUNC:RB, [BASE-16]
2168 | jne ->vm_call_tail // Returned -1?
2169 | cleartp LFUNC:RB
2170 | ins_callt // Returned 0: retry fast path.
2171 |
2172 |// Reconstruct previous base for vmeta_call during tailcall.
2173 |->vm_call_tail:
2174 | mov RA, BASE
2175 | test PCd, FRAME_TYPE
2176 | jnz >3
2177 | movzx RBd, PC_RA
2178 | neg RB
2179 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2180 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2181 |3:
2182 | mov RB, PC
2183 | and RB, -8
2184 | sub BASE, RB
2185 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2186 |
2187 |5: // Grow stack for fallback handler.
2188 | mov CARG2d, LUA_MINSTACK
2189 | mov CARG1, L:RB
2190 | call extern lj_state_growstack // (lua_State *L, int n)
2191 | mov BASE, L:RB->base
2192 | xor RDd, RDd // Simulate a return 0.
2193 | jmp <1 // Dumb retry (goes through ff first).
2194 |
2195 |->fff_gcstep: // Call GC step function.
2196 | // BASE = new base, RD = nargs+1
2197 | pop RB // Must keep stack at same level.
2198 | mov TMP1, RB // Save return address
2199 | mov L:RB, SAVE_L
2200 | mov SAVE_PC, PC // Redundant (but a defined value).
2201 | mov L:RB->base, BASE
2202 | lea RD, [BASE+NARGS:RD*8-8]
2203 | mov CARG1, L:RB
2204 | mov L:RB->top, RD
2205 | call extern lj_gc_step // (lua_State *L)
2206 | mov BASE, L:RB->base
2207 | mov RD, L:RB->top
2208 | sub RD, BASE
2209 | shr RDd, 3
2210 | add NARGS:RDd, 1
2211 | mov RB, TMP1
2212 | push RB // Restore return address.
2213 | ret
2214 |
2215 |//-----------------------------------------------------------------------
2216 |//-- Special dispatch targets -------------------------------------------
2217 |//-----------------------------------------------------------------------
2218 |
2219 |->vm_record: // Dispatch target for recording phase.
2220 |.if JIT
2221 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2222 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2223 | jnz >5
2224 | // Decrement the hookcount for consistency, but always do the call.
2225 | test RDL, HOOK_ACTIVE
2226 | jnz >1
2227 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2228 | jz >1
2229 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2230 | jmp >1
2231 |.endif
2232 |
2233 |->vm_rethook: // Dispatch target for return hooks.
2234 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2235 | test RDL, HOOK_ACTIVE // Hook already active?
2236 | jnz >5
2237 | jmp >1
2238 |
2239 |->vm_inshook: // Dispatch target for instr/line hooks.
2240 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2241 | test RDL, HOOK_ACTIVE // Hook already active?
2242 | jnz >5
2243 |
2244 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2245 | jz >5
2246 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2247 | jz >1
2248 | test RDL, LUA_MASKLINE
2249 | jz >5
2250 |1:
2251 | mov L:RB, SAVE_L
2252 | mov L:RB->base, BASE
2253 | mov CARG2, PC // Caveat: CARG2 == BASE
2254 | mov CARG1, L:RB
2255 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2256 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2257 |3:
2258 | mov BASE, L:RB->base
2259 |4:
2260 | movzx RAd, PC_RA
2261 |5:
2262 | movzx OP, PC_OP
2263 | movzx RDd, PC_RD
2264 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2265 |
2266 |->cont_hook: // Continue from hook yield.
2267 | add PC, 4
2268 | mov RA, [RB-40]
2269 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2270 | jmp <4
2271 |
2272 |->vm_hotloop: // Hot loop counter underflow.
2273 |.if JIT
2274 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2275 | cleartp LFUNC:RB
2276 | mov RB, LFUNC:RB->pc
2277 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2278 | lea RD, [BASE+RD*8]
2279 | mov L:RB, SAVE_L
2280 | mov L:RB->base, BASE
2281 | mov L:RB->top, RD
2282 | mov CARG2, PC
2283 | lea CARG1, [DISPATCH+GG_DISP2J]
2284 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2285 | mov SAVE_PC, PC
2286 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2287 | jmp <3
2288 |.endif
2289 |
2290 |->vm_callhook: // Dispatch target for call hooks.
2291 | mov SAVE_PC, PC
2292 |.if JIT
2293 | jmp >1
2294 |.endif
2295 |
2296 |->vm_hotcall: // Hot call counter underflow.
2297 |.if JIT
2298 | mov SAVE_PC, PC
2299 | or PC, 1 // Marker for hot call.
2300 |1:
2301 |.endif
2302 | lea RD, [BASE+NARGS:RD*8-8]
2303 | mov L:RB, SAVE_L
2304 | mov L:RB->base, BASE
2305 | mov L:RB->top, RD
2306 | mov CARG2, PC
2307 | mov CARG1, L:RB
2308 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2309 | // ASMFunction returned in eax/rax (RD).
2310 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2311 |.if JIT
2312 | and PC, -2
2313 |.endif
2314 | mov BASE, L:RB->base
2315 | mov RA, RD
2316 | mov RD, L:RB->top
2317 | sub RD, BASE
2318 | mov RB, RA
2319 | movzx RAd, PC_RA
2320 | shr RDd, 3
2321 | add NARGS:RDd, 1
2322 | jmp RB
2323 |
2324 |->cont_stitch: // Trace stitching.
2325 |.if JIT
2326 | // BASE = base, RC = result, RB = mbase
2327 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2328 | cleartp TRACE:ITYPE
2329 | mov TMPRd, MULTRES
2330 | movzx RAd, PC_RA
2331 | lea RA, [BASE+RA*8] // Call base.
2332 | sub TMPRd, 1
2333 | jz >2
2334 |1: // Move results down.
2335 | mov RB, [RC]
2336 | mov [RA], RB
2337 | add RC, 8
2338 | add RA, 8
2339 | sub TMPRd, 1
2340 | jnz <1
2341 |2:
2342 | movzx RCd, PC_RA
2343 | movzx RBd, PC_RB
2344 | add RC, RB
2345 | lea RC, [BASE+RC*8-8]
2346 |3:
2347 | cmp RC, RA
2348 | ja >9 // More results wanted?
2349 |
2350 | test TRACE:ITYPE, TRACE:ITYPE
2351 | jz ->cont_nop
2352 | movzx RBd, word TRACE:ITYPE->traceno
2353 | movzx RDd, word TRACE:ITYPE->link
2354 | cmp RDd, RBd
2355 | je ->cont_nop // Blacklisted.
2356 | test RDd, RDd
2357 | jne =>BC_JLOOP // Jump to stitched trace.
2358 |
2359 | // Stitch a new trace to the previous trace.
2360 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2361 | mov L:RB, SAVE_L
2362 | mov L:RB->base, BASE
2363 | mov CARG2, PC
2364 | lea CARG1, [DISPATCH+GG_DISP2J]
2365 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2366 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2367 | mov BASE, L:RB->base
2368 | jmp ->cont_nop
2369 |
2370 |9: // Fill up results with nil.
2371 | mov aword [RA], LJ_TNIL
2372 | add RA, 8
2373 | jmp <3
2374 |.endif
2375 |
2376 |->vm_profhook: // Dispatch target for profiler hook.
2377#if LJ_HASPROFILE
2378 | mov L:RB, SAVE_L
2379 | mov L:RB->base, BASE
2380 | mov CARG2, PC // Caveat: CARG2 == BASE
2381 | mov CARG1, L:RB
2382 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2383 | mov BASE, L:RB->base
2384 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2385 | sub PC, 4
2386 | jmp ->cont_nop
2387#endif
2388 |
2389 |//-----------------------------------------------------------------------
2390 |//-- Trace exit handler -------------------------------------------------
2391 |//-----------------------------------------------------------------------
2392 |
2393 |// Called from an exit stub with the exit number on the stack.
2394 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2395 |->vm_exit_handler:
2396 |.if JIT
2397 | push r13; push r12
2398 | push r11; push r10; push r9; push r8
2399 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2400 | push rbx; push rdx; push rcx; push rax
2401 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2402 | mov RCH, byte [rbp-16]
2403 | mov [rbp-8], r15; mov [rbp-16], r14
2404 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2405 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2406 | set_vmstate EXIT
2407 | mov [DISPATCH+DISPATCH_J(exitno)], RCd
2408 | mov [DISPATCH+DISPATCH_J(parent)], RAd
2409 |.if X64WIN
2410 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2411 |.else
2412 | sub rsp, 16*8 // Room for SSE regs.
2413 |.endif
2414 | add rbp, -128
2415 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2416 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2417 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2418 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2419 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2420 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2421 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2422 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2423 | // Caveat: RB is rbp.
2424 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2425 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2426 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2427 | mov L:RB->base, BASE
2428 |.if X64WIN
2429 | lea CARG2, [rsp+4*8]
2430 |.else
2431 | mov CARG2, rsp
2432 |.endif
2433 | lea CARG1, [DISPATCH+GG_DISP2J]
2434 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2435 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2436 | // MULTRES or negated error code returned in eax (RD).
2437 | mov RA, L:RB->cframe
2438 | and RA, CFRAME_RAWMASK
2439 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2440 | mov BASE, L:RB->base
2441 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2442 | jmp >1
2443 |.endif
2444 |->vm_exit_interp:
2445 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2446 |.if JIT
2447 | // Restore additional callee-save registers only used in compiled code.
2448 |.if X64WIN
2449 | lea RA, [rsp+10*16+4*8]
2450 |1:
2451 | movdqa xmm15, [RA-10*16]
2452 | movdqa xmm14, [RA-9*16]
2453 | movdqa xmm13, [RA-8*16]
2454 | movdqa xmm12, [RA-7*16]
2455 | movdqa xmm11, [RA-6*16]
2456 | movdqa xmm10, [RA-5*16]
2457 | movdqa xmm9, [RA-4*16]
2458 | movdqa xmm8, [RA-3*16]
2459 | movdqa xmm7, [RA-2*16]
2460 | mov rsp, RA // Reposition stack to C frame.
2461 | movdqa xmm6, [RA-1*16]
2462 | mov r15, CSAVE_1
2463 | mov r14, CSAVE_2
2464 | mov r13, CSAVE_3
2465 | mov r12, CSAVE_4
2466 |.else
2467 | lea RA, [rsp+16]
2468 |1:
2469 | mov r13, [RA-8]
2470 | mov r12, [RA]
2471 | mov rsp, RA // Reposition stack to C frame.
2472 |.endif
2473 | test RDd, RDd; js >9 // Check for error from exit.
2474 | mov L:RB, SAVE_L
2475 | mov MULTRES, RDd
2476 | mov LFUNC:KBASE, [BASE-16]
2477 | cleartp LFUNC:KBASE
2478 | mov KBASE, LFUNC:KBASE->pc
2479 | mov KBASE, [KBASE+PC2PROTO(k)]
2480 | mov L:RB->base, BASE
2481 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2482 | set_vmstate INTERP
2483 | // Modified copy of ins_next which handles function header dispatch, too.
2484 | mov RCd, [PC]
2485 | movzx RAd, RCH
2486 | movzx OP, RCL
2487 | add PC, 4
2488 | shr RCd, 16
2489 | cmp OP, BC_FUNCF // Function header?
2490 | jb >3
2491 | cmp OP, BC_FUNCC+2 // Fast function?
2492 | jae >4
2493 |2:
2494 | mov RCd, MULTRES // RC/RD holds nres+1.
2495 |3:
2496 | jmp aword [DISPATCH+OP*8]
2497 |
2498 |4: // Check frame below fast function.
2499 | mov RC, [BASE-8]
2500 | test RCd, FRAME_TYPE
2501 | jnz <2 // Trace stitching continuation?
2502 | // Otherwise set KBASE for Lua function below fast function.
2503 | movzx RCd, byte [RC-3]
2504 | neg RC
2505 | mov LFUNC:KBASE, [BASE+RC*8-32]
2506 | cleartp LFUNC:KBASE
2507 | mov KBASE, LFUNC:KBASE->pc
2508 | mov KBASE, [KBASE+PC2PROTO(k)]
2509 | jmp <2
2510 |
2511 |9: // Rethrow error from the right C frame.
2512 | neg RD
2513 | mov CARG1, L:RB
2514 | mov CARG2, RD
2515 | call extern lj_err_throw // (lua_State *L, int errcode)
2516 |.endif
2517 |
2518 |//-----------------------------------------------------------------------
2519 |//-- Math helper functions ----------------------------------------------
2520 |//-----------------------------------------------------------------------
2521 |
2522 |// FP value rounding. Called by math.floor/math.ceil fast functions
2523 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2524 |.macro vm_round, name, mode, cond
2525 |->name:
2526 |->name .. _sse:
2527 | sseconst_abs xmm2, RD
2528 | sseconst_2p52 xmm3, RD
2529 | movaps xmm1, xmm0
2530 | andpd xmm1, xmm2 // |x|
2531 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2532 | jbe >1
2533 | andnpd xmm2, xmm0 // Isolate sign bit.
2534 |.if mode == 2 // trunc(x)?
2535 | movaps xmm0, xmm1
2536 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2537 | subsd xmm1, xmm3
2538 | sseconst_1 xmm3, RD
2539 | cmpsd xmm0, xmm1, 1 // |x| < result?
2540 | andpd xmm0, xmm3
2541 | subsd xmm1, xmm0 // If yes, subtract -1.
2542 | orpd xmm1, xmm2 // Merge sign bit back in.
2543 |.else
2544 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2545 | subsd xmm1, xmm3
2546 | orpd xmm1, xmm2 // Merge sign bit back in.
2547 | .if mode == 1 // ceil(x)?
2548 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2549 | cmpsd xmm0, xmm1, 6 // x > result?
2550 | .else // floor(x)?
2551 | sseconst_1 xmm2, RD
2552 | cmpsd xmm0, xmm1, 1 // x < result?
2553 | .endif
2554 | andpd xmm0, xmm2
2555 | subsd xmm1, xmm0 // If yes, subtract +-1.
2556 |.endif
2557 | movaps xmm0, xmm1
2558 |1:
2559 | ret
2560 |.endmacro
2561 |
2562 | vm_round vm_floor, 0, 1
2563 | vm_round vm_ceil, 1, JIT
2564 | vm_round vm_trunc, 2, JIT
2565 |
2566 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2567 |->vm_mod:
2568 |// Args in xmm0/xmm1, return value in xmm0.
2569 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2570 | movaps xmm5, xmm0
2571 | divsd xmm0, xmm1
2572 | sseconst_abs xmm2, RD
2573 | sseconst_2p52 xmm3, RD
2574 | movaps xmm4, xmm0
2575 | andpd xmm4, xmm2 // |x/y|
2576 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2577 | jbe >1
2578 | andnpd xmm2, xmm0 // Isolate sign bit.
2579 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2580 | subsd xmm4, xmm3
2581 | orpd xmm4, xmm2 // Merge sign bit back in.
2582 | sseconst_1 xmm2, RD
2583 | cmpsd xmm0, xmm4, 1 // x/y < result?
2584 | andpd xmm0, xmm2
2585 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2586 | movaps xmm0, xmm5
2587 | mulsd xmm1, xmm4
2588 | subsd xmm0, xmm1
2589 | ret
2590 |1:
2591 | mulsd xmm1, xmm0
2592 | movaps xmm0, xmm5
2593 | subsd xmm0, xmm1
2594 | ret
2595 |
2596 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2597 |->vm_powi_sse:
2598 | cmp eax, 1; jle >6 // i<=1?
2599 | // Now 1 < (unsigned)i <= 0x80000000.
2600 |1: // Handle leading zeros.
2601 | test eax, 1; jnz >2
2602 | mulsd xmm0, xmm0
2603 | shr eax, 1
2604 | jmp <1
2605 |2:
2606 | shr eax, 1; jz >5
2607 | movaps xmm1, xmm0
2608 |3: // Handle trailing bits.
2609 | mulsd xmm0, xmm0
2610 | shr eax, 1; jz >4
2611 | jnc <3
2612 | mulsd xmm1, xmm0
2613 | jmp <3
2614 |4:
2615 | mulsd xmm0, xmm1
2616 |5:
2617 | ret
2618 |6:
2619 | je <5 // x^1 ==> x
2620 | jb >7 // x^0 ==> 1
2621 | neg eax
2622 | call <1
2623 | sseconst_1 xmm1, RD
2624 | divsd xmm1, xmm0
2625 | movaps xmm0, xmm1
2626 | ret
2627 |7:
2628 | sseconst_1 xmm0, RD
2629 | ret
2630 |
2631 |//-----------------------------------------------------------------------
2632 |//-- Miscellaneous functions --------------------------------------------
2633 |//-----------------------------------------------------------------------
2634 |
2635 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2636 |->vm_cpuid:
2637 | mov eax, CARG1d
2638 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2639 | push rbx
2640 | xor ecx, ecx
2641 | cpuid
2642 | mov [rsi], eax
2643 | mov [rsi+4], ebx
2644 | mov [rsi+8], ecx
2645 | mov [rsi+12], edx
2646 | pop rbx
2647 | .if X64WIN; pop rsi; .endif
2648 | ret
2649 |
2650 |//-----------------------------------------------------------------------
2651 |//-- Assertions ---------------------------------------------------------
2652 |//-----------------------------------------------------------------------
2653 |
2654 |->assert_bad_for_arg_type:
2655#ifdef LUA_USE_ASSERT
2656 | int3
2657#endif
2658 | int3
2659 |
2660 |//-----------------------------------------------------------------------
2661 |//-- FFI helper functions -----------------------------------------------
2662 |//-----------------------------------------------------------------------
2663 |
2664 |// Handler for callback functions. Callback slot number in ah/al.
2665 |->vm_ffi_callback:
2666 |.if FFI
2667 |.type CTSTATE, CTState, PC
2668 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2669 | lea DISPATCH, [ebp+GG_G2DISP]
2670 | mov CTSTATE, GL:ebp->ctype_state
2671 | movzx eax, ax
2672 | mov CTSTATE->cb.slot, eax
2673 | mov CTSTATE->cb.gpr[0], CARG1
2674 | mov CTSTATE->cb.gpr[1], CARG2
2675 | mov CTSTATE->cb.gpr[2], CARG3
2676 | mov CTSTATE->cb.gpr[3], CARG4
2677 | movsd qword CTSTATE->cb.fpr[0], xmm0
2678 | movsd qword CTSTATE->cb.fpr[1], xmm1
2679 | movsd qword CTSTATE->cb.fpr[2], xmm2
2680 | movsd qword CTSTATE->cb.fpr[3], xmm3
2681 |.if X64WIN
2682 | lea rax, [rsp+CFRAME_SIZE+4*8]
2683 |.else
2684 | lea rax, [rsp+CFRAME_SIZE]
2685 | mov CTSTATE->cb.gpr[4], CARG5
2686 | mov CTSTATE->cb.gpr[5], CARG6
2687 | movsd qword CTSTATE->cb.fpr[4], xmm4
2688 | movsd qword CTSTATE->cb.fpr[5], xmm5
2689 | movsd qword CTSTATE->cb.fpr[6], xmm6
2690 | movsd qword CTSTATE->cb.fpr[7], xmm7
2691 |.endif
2692 | mov CTSTATE->cb.stack, rax
2693 | mov CARG2, rsp
2694 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2695 | mov CARG1, CTSTATE
2696 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2697 | // lua_State * returned in eax (RD).
2698 | set_vmstate INTERP
2699 | mov BASE, L:RD->base
2700 | mov RD, L:RD->top
2701 | sub RD, BASE
2702 | mov LFUNC:RB, [BASE-16]
2703 | cleartp LFUNC:RB
2704 | shr RD, 3
2705 | add RD, 1
2706 | ins_callt
2707 |.endif
2708 |
2709 |->cont_ffi_callback: // Return from FFI callback.
2710 |.if FFI
2711 | mov L:RA, SAVE_L
2712 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2713 | mov aword CTSTATE->L, L:RA
2714 | mov L:RA->base, BASE
2715 | mov L:RA->top, RB
2716 | mov CARG1, CTSTATE
2717 | mov CARG2, RC
2718 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2719 | mov rax, CTSTATE->cb.gpr[0]
2720 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2721 | jmp ->vm_leave_unw
2722 |.endif
2723 |
2724 |->vm_ffi_call: // Call C function via FFI.
2725 | // Caveat: needs special frame unwinding, see below.
2726 |.if FFI
2727 | .type CCSTATE, CCallState, rbx
2728 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2729 |
2730 | // Readjust stack.
2731 | mov eax, CCSTATE->spadj
2732 | sub rsp, rax
2733 |
2734 | // Copy stack slots.
2735 | movzx ecx, byte CCSTATE->nsp
2736 | sub ecx, 1
2737 | js >2
2738 |1:
2739 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2740 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2741 | sub ecx, 1
2742 | jns <1
2743 |2:
2744 |
2745 | movzx eax, byte CCSTATE->nfpr
2746 | mov CARG1, CCSTATE->gpr[0]
2747 | mov CARG2, CCSTATE->gpr[1]
2748 | mov CARG3, CCSTATE->gpr[2]
2749 | mov CARG4, CCSTATE->gpr[3]
2750 |.if not X64WIN
2751 | mov CARG5, CCSTATE->gpr[4]
2752 | mov CARG6, CCSTATE->gpr[5]
2753 |.endif
2754 | test eax, eax; jz >5
2755 | movaps xmm0, CCSTATE->fpr[0]
2756 | movaps xmm1, CCSTATE->fpr[1]
2757 | movaps xmm2, CCSTATE->fpr[2]
2758 | movaps xmm3, CCSTATE->fpr[3]
2759 |.if not X64WIN
2760 | cmp eax, 4; jbe >5
2761 | movaps xmm4, CCSTATE->fpr[4]
2762 | movaps xmm5, CCSTATE->fpr[5]
2763 | movaps xmm6, CCSTATE->fpr[6]
2764 | movaps xmm7, CCSTATE->fpr[7]
2765 |.endif
2766 |5:
2767 |
2768 | call aword CCSTATE->func
2769 |
2770 | mov CCSTATE->gpr[0], rax
2771 | movaps CCSTATE->fpr[0], xmm0
2772 |.if not X64WIN
2773 | mov CCSTATE->gpr[1], rdx
2774 | movaps CCSTATE->fpr[1], xmm1
2775 |.endif
2776 |
2777 | mov rbx, [rbp-8]; leave; ret
2778 |.endif
2779 |// Note: vm_ffi_call must be the last function in this object file!
2780 |
2781 |//-----------------------------------------------------------------------
2782}
2783
2784/* Generate the code for a single instruction. */
2785static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2786{
2787 int vk = 0;
2788 |// Note: aligning all instructions does not pay off.
2789 |=>defop:
2790
2791 switch (op) {
2792
2793 /* -- Comparison ops ---------------------------------------------------- */
2794
2795 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2796
2797 |.macro jmp_comp, lt, ge, le, gt, target
2798 ||switch (op) {
2799 ||case BC_ISLT:
2800 | lt target
2801 ||break;
2802 ||case BC_ISGE:
2803 | ge target
2804 ||break;
2805 ||case BC_ISLE:
2806 | le target
2807 ||break;
2808 ||case BC_ISGT:
2809 | gt target
2810 ||break;
2811 ||default: break; /* Shut up GCC. */
2812 ||}
2813 |.endmacro
2814
2815 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2816 | // RA = src1, RD = src2, JMP with RD = target
2817 | ins_AD
2818 | mov ITYPE, [BASE+RA*8]
2819 | mov RB, [BASE+RD*8]
2820 | mov RA, ITYPE
2821 | mov RD, RB
2822 | sar ITYPE, 47
2823 | sar RB, 47
2824 |.if DUALNUM
2825 | cmp ITYPEd, LJ_TISNUM; jne >7
2826 | cmp RBd, LJ_TISNUM; jne >8
2827 | add PC, 4
2828 | cmp RAd, RDd
2829 | jmp_comp jge, jl, jg, jle, >9
2830 |6:
2831 | movzx RDd, PC_RD
2832 | branchPC RD
2833 |9:
2834 | ins_next
2835 |
2836 |7: // RA is not an integer.
2837 | ja ->vmeta_comp
2838 | // RA is a number.
2839 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2840 | // RA is a number, RD is an integer.
2841 | cvtsi2sd xmm0, RDd
2842 | jmp >2
2843 |
2844 |8: // RA is an integer, RD is not an integer.
2845 | ja ->vmeta_comp
2846 | // RA is an integer, RD is a number.
2847 | cvtsi2sd xmm1, RAd
2848 | movd xmm0, RD
2849 | jmp >3
2850 |.else
2851 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2852 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2853 |.endif
2854 |1:
2855 | movd xmm0, RD
2856 |2:
2857 | movd xmm1, RA
2858 |3:
2859 | add PC, 4
2860 | ucomisd xmm0, xmm1
2861 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2862 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2863 |.if DUALNUM
2864 | jmp_comp jbe, ja, jb, jae, <9
2865 | jmp <6
2866 |.else
2867 | jmp_comp jbe, ja, jb, jae, >1
2868 | movzx RDd, PC_RD
2869 | branchPC RD
2870 |1:
2871 | ins_next
2872 |.endif
2873 break;
2874
2875 case BC_ISEQV: case BC_ISNEV:
2876 vk = op == BC_ISEQV;
2877 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2878 | mov RB, [BASE+RD*8]
2879 | mov ITYPE, [BASE+RA*8]
2880 | add PC, 4
2881 | mov RD, RB
2882 | mov RA, ITYPE
2883 | sar RB, 47
2884 | sar ITYPE, 47
2885 |.if DUALNUM
2886 | cmp RBd, LJ_TISNUM; jne >7
2887 | cmp ITYPEd, LJ_TISNUM; jne >8
2888 | cmp RDd, RAd
2889 if (vk) {
2890 | jne >9
2891 } else {
2892 | je >9
2893 }
2894 | movzx RDd, PC_RD
2895 | branchPC RD
2896 |9:
2897 | ins_next
2898 |
2899 |7: // RD is not an integer.
2900 | ja >5
2901 | // RD is a number.
2902 | movd xmm1, RD
2903 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2904 | // RD is a number, RA is an integer.
2905 | cvtsi2sd xmm0, RAd
2906 | jmp >2
2907 |
2908 |8: // RD is an integer, RA is not an integer.
2909 | ja >5
2910 | // RD is an integer, RA is a number.
2911 | cvtsi2sd xmm1, RDd
2912 | jmp >1
2913 |
2914 |.else
2915 | cmp RBd, LJ_TISNUM; jae >5
2916 | cmp ITYPEd, LJ_TISNUM; jae >5
2917 | movd xmm1, RD
2918 |.endif
2919 |1:
2920 | movd xmm0, RA
2921 |2:
2922 | ucomisd xmm0, xmm1
2923 |4:
2924 iseqne_fp:
2925 if (vk) {
2926 | jp >2 // Unordered means not equal.
2927 | jne >2
2928 } else {
2929 | jp >2 // Unordered means not equal.
2930 | je >1
2931 }
2932 iseqne_end:
2933 if (vk) {
2934 |1: // EQ: Branch to the target.
2935 | movzx RDd, PC_RD
2936 | branchPC RD
2937 |2: // NE: Fallthrough to next instruction.
2938 |.if not FFI
2939 |3:
2940 |.endif
2941 } else {
2942 |.if not FFI
2943 |3:
2944 |.endif
2945 |2: // NE: Branch to the target.
2946 | movzx RDd, PC_RD
2947 | branchPC RD
2948 |1: // EQ: Fallthrough to next instruction.
2949 }
2950 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2951 op == BC_ISEQN || op == BC_ISNEN)) {
2952 | jmp <9
2953 } else {
2954 | ins_next
2955 }
2956 |
2957 if (op == BC_ISEQV || op == BC_ISNEV) {
2958 |5: // Either or both types are not numbers.
2959 |.if FFI
2960 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2961 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2962 |.endif
2963 | cmp RA, RD
2964 | je <1 // Same GCobjs or pvalues?
2965 | cmp RBd, ITYPEd
2966 | jne <2 // Not the same type?
2967 | cmp RBd, LJ_TISTABUD
2968 | ja <2 // Different objects and not table/ud?
2969 |
2970 | // Different tables or userdatas. Need to check __eq metamethod.
2971 | // Field metatable must be at same offset for GCtab and GCudata!
2972 | cleartp TAB:RA
2973 | mov TAB:RB, TAB:RA->metatable
2974 | test TAB:RB, TAB:RB
2975 | jz <2 // No metatable?
2976 | test byte TAB:RB->nomm, 1<<MM_eq
2977 | jnz <2 // Or 'no __eq' flag set?
2978 if (vk) {
2979 | xor RBd, RBd // ne = 0
2980 } else {
2981 | mov RBd, 1 // ne = 1
2982 }
2983 | jmp ->vmeta_equal // Handle __eq metamethod.
2984 } else {
2985 |.if FFI
2986 |3:
2987 | cmp ITYPEd, LJ_TCDATA
2988 if (LJ_DUALNUM && vk) {
2989 | jne <9
2990 } else {
2991 | jne <2
2992 }
2993 | jmp ->vmeta_equal_cd
2994 |.endif
2995 }
2996 break;
2997 case BC_ISEQS: case BC_ISNES:
2998 vk = op == BC_ISEQS;
2999 | ins_AND // RA = src, RD = str const, JMP with RD = target
3000 | mov RB, [BASE+RA*8]
3001 | add PC, 4
3002 | checkstr RB, >3
3003 | cmp RB, [KBASE+RD*8]
3004 iseqne_test:
3005 if (vk) {
3006 | jne >2
3007 } else {
3008 | je >1
3009 }
3010 goto iseqne_end;
3011 case BC_ISEQN: case BC_ISNEN:
3012 vk = op == BC_ISEQN;
3013 | ins_AD // RA = src, RD = num const, JMP with RD = target
3014 | mov RB, [BASE+RA*8]
3015 | add PC, 4
3016 |.if DUALNUM
3017 | checkint RB, >7
3018 | mov RD, [KBASE+RD*8]
3019 | checkint RD, >8
3020 | cmp RBd, RDd
3021 if (vk) {
3022 | jne >9
3023 } else {
3024 | je >9
3025 }
3026 | movzx RDd, PC_RD
3027 | branchPC RD
3028 |9:
3029 | ins_next
3030 |
3031 |7: // RA is not an integer.
3032 | ja >3
3033 | // RA is a number.
3034 | mov RD, [KBASE+RD*8]
3035 | checkint RD, >1
3036 | // RA is a number, RD is an integer.
3037 | cvtsi2sd xmm0, RDd
3038 | jmp >2
3039 |
3040 |8: // RA is an integer, RD is a number.
3041 | cvtsi2sd xmm0, RBd
3042 | movd xmm1, RD
3043 | ucomisd xmm0, xmm1
3044 | jmp >4
3045 |1:
3046 | movd xmm0, RD
3047 |.else
3048 | checknum RB, >3
3049 |1:
3050 | movsd xmm0, qword [KBASE+RD*8]
3051 |.endif
3052 |2:
3053 | ucomisd xmm0, qword [BASE+RA*8]
3054 |4:
3055 goto iseqne_fp;
3056 case BC_ISEQP: case BC_ISNEP:
3057 vk = op == BC_ISEQP;
3058 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3059 | mov RB, [BASE+RA*8]
3060 | sar RB, 47
3061 | add PC, 4
3062 | cmp RBd, RDd
3063 if (!LJ_HASFFI) goto iseqne_test;
3064 if (vk) {
3065 | jne >3
3066 | movzx RDd, PC_RD
3067 | branchPC RD
3068 |2:
3069 | ins_next
3070 |3:
3071 | cmp RBd, LJ_TCDATA; jne <2
3072 | jmp ->vmeta_equal_cd
3073 } else {
3074 | je >2
3075 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3076 | movzx RDd, PC_RD
3077 | branchPC RD
3078 |2:
3079 | ins_next
3080 }
3081 break;
3082
3083 /* -- Unary test and copy ops ------------------------------------------- */
3084
3085 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3086 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3087 | mov ITYPE, [BASE+RD*8]
3088 | add PC, 4
3089 if (op == BC_ISTC || op == BC_ISFC) {
3090 | mov RB, ITYPE
3091 }
3092 | sar ITYPE, 47
3093 | cmp ITYPEd, LJ_TISTRUECOND
3094 if (op == BC_IST || op == BC_ISTC) {
3095 | jae >1
3096 } else {
3097 | jb >1
3098 }
3099 if (op == BC_ISTC || op == BC_ISFC) {
3100 | mov [BASE+RA*8], RB
3101 }
3102 | movzx RDd, PC_RD
3103 | branchPC RD
3104 |1: // Fallthrough to the next instruction.
3105 | ins_next
3106 break;
3107
3108 case BC_ISTYPE:
3109 | ins_AD // RA = src, RD = -type
3110 | mov RB, [BASE+RA*8]
3111 | sar RB, 47
3112 | add RBd, RDd
3113 | jne ->vmeta_istype
3114 | ins_next
3115 break;
3116 case BC_ISNUM:
3117 | ins_AD // RA = src, RD = -(TISNUM-1)
3118 | checknumtp [BASE+RA*8], ->vmeta_istype
3119 | ins_next
3120 break;
3121
3122 /* -- Unary ops --------------------------------------------------------- */
3123
3124 case BC_MOV:
3125 | ins_AD // RA = dst, RD = src
3126 | mov RB, [BASE+RD*8]
3127 | mov [BASE+RA*8], RB
3128 | ins_next_
3129 break;
3130 case BC_NOT:
3131 | ins_AD // RA = dst, RD = src
3132 | mov RB, [BASE+RD*8]
3133 | sar RB, 47
3134 | mov RCd, 2
3135 | cmp RB, LJ_TISTRUECOND
3136 | sbb RCd, 0
3137 | shl RC, 47
3138 | not RC
3139 | mov [BASE+RA*8], RC
3140 | ins_next
3141 break;
3142 case BC_UNM:
3143 | ins_AD // RA = dst, RD = src
3144 | mov RB, [BASE+RD*8]
3145 |.if DUALNUM
3146 | checkint RB, >5
3147 | neg RBd
3148 | jo >4
3149 | setint RB
3150 |9:
3151 | mov [BASE+RA*8], RB
3152 | ins_next
3153 |4:
3154 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3155 | jmp <9
3156 |5:
3157 | ja ->vmeta_unm
3158 |.else
3159 | checknum RB, ->vmeta_unm
3160 |.endif
3161 | mov64 RD, U64x(80000000,00000000)
3162 | xor RB, RD
3163 |.if DUALNUM
3164 | jmp <9
3165 |.else
3166 | mov [BASE+RA*8], RB
3167 | ins_next
3168 |.endif
3169 break;
3170 case BC_LEN:
3171 | ins_AD // RA = dst, RD = src
3172 | mov RD, [BASE+RD*8]
3173 | checkstr RD, >2
3174 |.if DUALNUM
3175 | mov RDd, dword STR:RD->len
3176 |1:
3177 | setint RD
3178 | mov [BASE+RA*8], RD
3179 |.else
3180 | xorps xmm0, xmm0
3181 | cvtsi2sd xmm0, dword STR:RD->len
3182 |1:
3183 | movsd qword [BASE+RA*8], xmm0
3184 |.endif
3185 | ins_next
3186 |2:
3187 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3188 | mov TAB:CARG1, TAB:RD
3189#if LJ_52
3190 | mov TAB:RB, TAB:RD->metatable
3191 | cmp TAB:RB, 0
3192 | jnz >9
3193 |3:
3194#endif
3195 |->BC_LEN_Z:
3196 | mov RB, BASE // Save BASE.
3197 | call extern lj_tab_len // (GCtab *t)
3198 | // Length of table returned in eax (RD).
3199 |.if DUALNUM
3200 | // Nothing to do.
3201 |.else
3202 | cvtsi2sd xmm0, RDd
3203 |.endif
3204 | mov BASE, RB // Restore BASE.
3205 | movzx RAd, PC_RA
3206 | jmp <1
3207#if LJ_52
3208 |9: // Check for __len.
3209 | test byte TAB:RB->nomm, 1<<MM_len
3210 | jnz <3
3211 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3212#endif
3213 break;
3214
3215 /* -- Binary ops -------------------------------------------------------- */
3216
3217 |.macro ins_arithpre, sseins, ssereg
3218 | ins_ABC
3219 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3220 ||switch (vk) {
3221 ||case 0:
3222 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3223 | .if DUALNUM
3224 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3225 | .endif
3226 | movsd xmm0, qword [BASE+RB*8]
3227 | sseins ssereg, qword [KBASE+RC*8]
3228 || break;
3229 ||case 1:
3230 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3231 | .if DUALNUM
3232 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3233 | .endif
3234 | movsd xmm0, qword [KBASE+RC*8]
3235 | sseins ssereg, qword [BASE+RB*8]
3236 || break;
3237 ||default:
3238 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3239 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3240 | movsd xmm0, qword [BASE+RB*8]
3241 | sseins ssereg, qword [BASE+RC*8]
3242 || break;
3243 ||}
3244 |.endmacro
3245 |
3246 |.macro ins_arithdn, intins
3247 | ins_ABC
3248 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3249 ||switch (vk) {
3250 ||case 0:
3251 | mov RB, [BASE+RB*8]
3252 | mov RC, [KBASE+RC*8]
3253 | checkint RB, ->vmeta_arith_vno
3254 | checkint RC, ->vmeta_arith_vno
3255 | intins RBd, RCd; jo ->vmeta_arith_vno
3256 || break;
3257 ||case 1:
3258 | mov RB, [BASE+RB*8]
3259 | mov RC, [KBASE+RC*8]
3260 | checkint RB, ->vmeta_arith_nvo
3261 | checkint RC, ->vmeta_arith_nvo
3262 | intins RCd, RBd; jo ->vmeta_arith_nvo
3263 || break;
3264 ||default:
3265 | mov RB, [BASE+RB*8]
3266 | mov RC, [BASE+RC*8]
3267 | checkint RB, ->vmeta_arith_vvo
3268 | checkint RC, ->vmeta_arith_vvo
3269 | intins RBd, RCd; jo ->vmeta_arith_vvo
3270 || break;
3271 ||}
3272 ||if (vk == 1) {
3273 | setint RC
3274 | mov [BASE+RA*8], RC
3275 ||} else {
3276 | setint RB
3277 | mov [BASE+RA*8], RB
3278 ||}
3279 | ins_next
3280 |.endmacro
3281 |
3282 |.macro ins_arithpost
3283 | movsd qword [BASE+RA*8], xmm0
3284 |.endmacro
3285 |
3286 |.macro ins_arith, sseins
3287 | ins_arithpre sseins, xmm0
3288 | ins_arithpost
3289 | ins_next
3290 |.endmacro
3291 |
3292 |.macro ins_arith, intins, sseins
3293 |.if DUALNUM
3294 | ins_arithdn intins
3295 |.else
3296 | ins_arith, sseins
3297 |.endif
3298 |.endmacro
3299
3300 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3301 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3302 | ins_arith add, addsd
3303 break;
3304 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3305 | ins_arith sub, subsd
3306 break;
3307 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3308 | ins_arith imul, mulsd
3309 break;
3310 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3311 | ins_arith divsd
3312 break;
3313 case BC_MODVN:
3314 | ins_arithpre movsd, xmm1
3315 |->BC_MODVN_Z:
3316 | call ->vm_mod
3317 | ins_arithpost
3318 | ins_next
3319 break;
3320 case BC_MODNV: case BC_MODVV:
3321 | ins_arithpre movsd, xmm1
3322 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3323 break;
3324 case BC_POW:
3325 | ins_arithpre movsd, xmm1
3326 | mov RB, BASE
3327 | call extern pow
3328 | movzx RAd, PC_RA
3329 | mov BASE, RB
3330 | ins_arithpost
3331 | ins_next
3332 break;
3333
3334 case BC_CAT:
3335 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3336 | mov L:CARG1, SAVE_L
3337 | mov L:CARG1->base, BASE
3338 | lea CARG2, [BASE+RC*8]
3339 | mov CARG3d, RCd
3340 | sub CARG3d, RBd
3341 |->BC_CAT_Z:
3342 | mov L:RB, L:CARG1
3343 | mov SAVE_PC, PC
3344 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3345 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3346 | mov BASE, L:RB->base
3347 | test RC, RC
3348 | jnz ->vmeta_binop
3349 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3350 | movzx RAd, PC_RA
3351 | mov RC, [BASE+RB*8]
3352 | mov [BASE+RA*8], RC
3353 | ins_next
3354 break;
3355
3356 /* -- Constant ops ------------------------------------------------------ */
3357
3358 case BC_KSTR:
3359 | ins_AND // RA = dst, RD = str const (~)
3360 | mov RD, [KBASE+RD*8]
3361 | settp RD, LJ_TSTR
3362 | mov [BASE+RA*8], RD
3363 | ins_next
3364 break;
3365 case BC_KCDATA:
3366 |.if FFI
3367 | ins_AND // RA = dst, RD = cdata const (~)
3368 | mov RD, [KBASE+RD*8]
3369 | settp RD, LJ_TCDATA
3370 | mov [BASE+RA*8], RD
3371 | ins_next
3372 |.endif
3373 break;
3374 case BC_KSHORT:
3375 | ins_AD // RA = dst, RD = signed int16 literal
3376 |.if DUALNUM
3377 | movsx RDd, RDW
3378 | setint RD
3379 | mov [BASE+RA*8], RD
3380 |.else
3381 | movsx RDd, RDW // Sign-extend literal.
3382 | cvtsi2sd xmm0, RDd
3383 | movsd qword [BASE+RA*8], xmm0
3384 |.endif
3385 | ins_next
3386 break;
3387 case BC_KNUM:
3388 | ins_AD // RA = dst, RD = num const
3389 | movsd xmm0, qword [KBASE+RD*8]
3390 | movsd qword [BASE+RA*8], xmm0
3391 | ins_next
3392 break;
3393 case BC_KPRI:
3394 | ins_AD // RA = dst, RD = primitive type (~)
3395 | shl RD, 47
3396 | not RD
3397 | mov [BASE+RA*8], RD
3398 | ins_next
3399 break;
3400 case BC_KNIL:
3401 | ins_AD // RA = dst_start, RD = dst_end
3402 | lea RA, [BASE+RA*8+8]
3403 | lea RD, [BASE+RD*8]
3404 | mov RB, LJ_TNIL
3405 | mov [RA-8], RB // Sets minimum 2 slots.
3406 |1:
3407 | mov [RA], RB
3408 | add RA, 8
3409 | cmp RA, RD
3410 | jbe <1
3411 | ins_next
3412 break;
3413
3414 /* -- Upvalue and function ops ------------------------------------------ */
3415
3416 case BC_UGET:
3417 | ins_AD // RA = dst, RD = upvalue #
3418 | mov LFUNC:RB, [BASE-16]
3419 | cleartp LFUNC:RB
3420 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3421 | mov RB, UPVAL:RB->v
3422 | mov RD, [RB]
3423 | mov [BASE+RA*8], RD
3424 | ins_next
3425 break;
3426 case BC_USETV:
3427#define TV2MARKOFS \
3428 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3429 | ins_AD // RA = upvalue #, RD = src
3430 | mov LFUNC:RB, [BASE-16]
3431 | cleartp LFUNC:RB
3432 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3433 | cmp byte UPVAL:RB->closed, 0
3434 | mov RB, UPVAL:RB->v
3435 | mov RA, [BASE+RD*8]
3436 | mov [RB], RA
3437 | jz >1
3438 | // Check barrier for closed upvalue.
3439 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3440 | jnz >2
3441 |1:
3442 | ins_next
3443 |
3444 |2: // Upvalue is black. Check if new value is collectable and white.
3445 | mov RD, RA
3446 | sar RD, 47
3447 | sub RDd, LJ_TISGCV
3448 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3449 | jbe <1
3450 | cleartp GCOBJ:RA
3451 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3452 | jz <1
3453 | // Crossed a write barrier. Move the barrier forward.
3454 |.if not X64WIN
3455 | mov CARG2, RB
3456 | mov RB, BASE // Save BASE.
3457 |.else
3458 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3459 |.endif
3460 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3461 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3462 | mov BASE, RB // Restore BASE.
3463 | jmp <1
3464 break;
3465#undef TV2MARKOFS
3466 case BC_USETS:
3467 | ins_AND // RA = upvalue #, RD = str const (~)
3468 | mov LFUNC:RB, [BASE-16]
3469 | cleartp LFUNC:RB
3470 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3471 | mov STR:RA, [KBASE+RD*8]
3472 | mov RD, UPVAL:RB->v
3473 | settp STR:ITYPE, STR:RA, LJ_TSTR
3474 | mov [RD], STR:ITYPE
3475 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3476 | jnz >2
3477 |1:
3478 | ins_next
3479 |
3480 |2: // Check if string is white and ensure upvalue is closed.
3481 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3482 | jz <1
3483 | cmp byte UPVAL:RB->closed, 0
3484 | jz <1
3485 | // Crossed a write barrier. Move the barrier forward.
3486 | mov RB, BASE // Save BASE (CARG2 == BASE).
3487 | mov CARG2, RD
3488 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3489 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3490 | mov BASE, RB // Restore BASE.
3491 | jmp <1
3492 break;
3493 case BC_USETN:
3494 | ins_AD // RA = upvalue #, RD = num const
3495 | mov LFUNC:RB, [BASE-16]
3496 | cleartp LFUNC:RB
3497 | movsd xmm0, qword [KBASE+RD*8]
3498 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3499 | mov RA, UPVAL:RB->v
3500 | movsd qword [RA], xmm0
3501 | ins_next
3502 break;
3503 case BC_USETP:
3504 | ins_AD // RA = upvalue #, RD = primitive type (~)
3505 | mov LFUNC:RB, [BASE-16]
3506 | cleartp LFUNC:RB
3507 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3508 | shl RD, 47
3509 | not RD
3510 | mov RA, UPVAL:RB->v
3511 | mov [RA], RD
3512 | ins_next
3513 break;
3514 case BC_UCLO:
3515 | ins_AD // RA = level, RD = target
3516 | branchPC RD // Do this first to free RD.
3517 | mov L:RB, SAVE_L
3518 | cmp aword L:RB->openupval, 0
3519 | je >1
3520 | mov L:RB->base, BASE
3521 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3522 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3523 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3524 | mov BASE, L:RB->base
3525 |1:
3526 | ins_next
3527 break;
3528
3529 case BC_FNEW:
3530 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3531 | mov L:RB, SAVE_L
3532 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3533 | mov CARG3, [BASE-16]
3534 | cleartp CARG3
3535 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3536 | mov CARG1, L:RB
3537 | mov SAVE_PC, PC
3538 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3539 | call extern lj_func_newL_gc
3540 | // GCfuncL * returned in eax (RC).
3541 | mov BASE, L:RB->base
3542 | movzx RAd, PC_RA
3543 | settp LFUNC:RC, LJ_TFUNC
3544 | mov [BASE+RA*8], LFUNC:RC
3545 | ins_next
3546 break;
3547
3548 /* -- Table ops --------------------------------------------------------- */
3549
3550 case BC_TNEW:
3551 | ins_AD // RA = dst, RD = hbits|asize
3552 | mov L:RB, SAVE_L
3553 | mov L:RB->base, BASE
3554 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3555 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3556 | mov SAVE_PC, PC
3557 | jae >5
3558 |1:
3559 | mov CARG3d, RDd
3560 | and RDd, 0x7ff
3561 | shr CARG3d, 11
3562 | cmp RDd, 0x7ff
3563 | je >3
3564 |2:
3565 | mov L:CARG1, L:RB
3566 | mov CARG2d, RDd
3567 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3568 | // Table * returned in eax (RC).
3569 | mov BASE, L:RB->base
3570 | movzx RAd, PC_RA
3571 | settp TAB:RC, LJ_TTAB
3572 | mov [BASE+RA*8], TAB:RC
3573 | ins_next
3574 |3: // Turn 0x7ff into 0x801.
3575 | mov RDd, 0x801
3576 | jmp <2
3577 |5:
3578 | mov L:CARG1, L:RB
3579 | call extern lj_gc_step_fixtop // (lua_State *L)
3580 | movzx RDd, PC_RD
3581 | jmp <1
3582 break;
3583 case BC_TDUP:
3584 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3585 | mov L:RB, SAVE_L
3586 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3587 | mov SAVE_PC, PC
3588 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3589 | mov L:RB->base, BASE
3590 | jae >3
3591 |2:
3592 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3593 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3594 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3595 | // Table * returned in eax (RC).
3596 | mov BASE, L:RB->base
3597 | movzx RAd, PC_RA
3598 | settp TAB:RC, LJ_TTAB
3599 | mov [BASE+RA*8], TAB:RC
3600 | ins_next
3601 |3:
3602 | mov L:CARG1, L:RB
3603 | call extern lj_gc_step_fixtop // (lua_State *L)
3604 | movzx RDd, PC_RD // Need to reload RD.
3605 | not RD
3606 | jmp <2
3607 break;
3608
3609 case BC_GGET:
3610 | ins_AND // RA = dst, RD = str const (~)
3611 | mov LFUNC:RB, [BASE-16]
3612 | cleartp LFUNC:RB
3613 | mov TAB:RB, LFUNC:RB->env
3614 | mov STR:RC, [KBASE+RD*8]
3615 | jmp ->BC_TGETS_Z
3616 break;
3617 case BC_GSET:
3618 | ins_AND // RA = src, RD = str const (~)
3619 | mov LFUNC:RB, [BASE-16]
3620 | cleartp LFUNC:RB
3621 | mov TAB:RB, LFUNC:RB->env
3622 | mov STR:RC, [KBASE+RD*8]
3623 | jmp ->BC_TSETS_Z
3624 break;
3625
3626 case BC_TGETV:
3627 | ins_ABC // RA = dst, RB = table, RC = key
3628 | mov TAB:RB, [BASE+RB*8]
3629 | mov RC, [BASE+RC*8]
3630 | checktab TAB:RB, ->vmeta_tgetv
3631 |
3632 | // Integer key?
3633 |.if DUALNUM
3634 | checkint RC, >5
3635 |.else
3636 | // Convert number to int and back and compare.
3637 | checknum RC, >5
3638 | movd xmm0, RC
3639 | cvttsd2si RCd, xmm0
3640 | cvtsi2sd xmm1, RCd
3641 | ucomisd xmm0, xmm1
3642 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3643 |.endif
3644 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3645 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3646 | shl RCd, 3
3647 | add RC, TAB:RB->array
3648 | // Get array slot.
3649 | mov ITYPE, [RC]
3650 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3651 | je >2
3652 |1:
3653 | mov [BASE+RA*8], ITYPE
3654 | ins_next
3655 |
3656 |2: // Check for __index if table value is nil.
3657 | mov TAB:TMPR, TAB:RB->metatable
3658 | test TAB:TMPR, TAB:TMPR
3659 | jz <1
3660 | test byte TAB:TMPR->nomm, 1<<MM_index
3661 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3662 | jmp <1
3663 |
3664 |5: // String key?
3665 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3666 | cleartp STR:RC
3667 | jmp ->BC_TGETS_Z
3668 break;
3669 case BC_TGETS:
3670 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3671 | mov TAB:RB, [BASE+RB*8]
3672 | not RC
3673 | mov STR:RC, [KBASE+RC*8]
3674 | checktab TAB:RB, ->vmeta_tgets
3675 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3676 | mov TMPRd, TAB:RB->hmask
3677 | and TMPRd, STR:RC->hash
3678 | imul TMPRd, #NODE
3679 | add NODE:TMPR, TAB:RB->node
3680 | settp ITYPE, STR:RC, LJ_TSTR
3681 |1:
3682 | cmp NODE:TMPR->key, ITYPE
3683 | jne >4
3684 | // Get node value.
3685 | mov ITYPE, NODE:TMPR->val
3686 | cmp ITYPE, LJ_TNIL
3687 | je >5 // Key found, but nil value?
3688 |2:
3689 | mov [BASE+RA*8], ITYPE
3690 | ins_next
3691 |
3692 |4: // Follow hash chain.
3693 | mov NODE:TMPR, NODE:TMPR->next
3694 | test NODE:TMPR, NODE:TMPR
3695 | jnz <1
3696 | // End of hash chain: key not found, nil result.
3697 | mov ITYPE, LJ_TNIL
3698 |
3699 |5: // Check for __index if table value is nil.
3700 | mov TAB:TMPR, TAB:RB->metatable
3701 | test TAB:TMPR, TAB:TMPR
3702 | jz <2 // No metatable: done.
3703 | test byte TAB:TMPR->nomm, 1<<MM_index
3704 | jnz <2 // 'no __index' flag set: done.
3705 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3706 break;
3707 case BC_TGETB:
3708 | ins_ABC // RA = dst, RB = table, RC = byte literal
3709 | mov TAB:RB, [BASE+RB*8]
3710 | checktab TAB:RB, ->vmeta_tgetb
3711 | cmp RCd, TAB:RB->asize
3712 | jae ->vmeta_tgetb
3713 | shl RCd, 3
3714 | add RC, TAB:RB->array
3715 | // Get array slot.
3716 | mov ITYPE, [RC]
3717 | cmp ITYPE, LJ_TNIL
3718 | je >2
3719 |1:
3720 | mov [BASE+RA*8], ITYPE
3721 | ins_next
3722 |
3723 |2: // Check for __index if table value is nil.
3724 | mov TAB:TMPR, TAB:RB->metatable
3725 | test TAB:TMPR, TAB:TMPR
3726 | jz <1
3727 | test byte TAB:TMPR->nomm, 1<<MM_index
3728 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3729 | jmp <1
3730 break;
3731 case BC_TGETR:
3732 | ins_ABC // RA = dst, RB = table, RC = key
3733 | mov TAB:RB, [BASE+RB*8]
3734 | cleartp TAB:RB
3735 |.if DUALNUM
3736 | mov RCd, dword [BASE+RC*8]
3737 |.else
3738 | cvttsd2si RCd, qword [BASE+RC*8]
3739 |.endif
3740 | cmp RCd, TAB:RB->asize
3741 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3742 | shl RCd, 3
3743 | add RC, TAB:RB->array
3744 | // Get array slot.
3745 |->BC_TGETR_Z:
3746 | mov ITYPE, [RC]
3747 |->BC_TGETR2_Z:
3748 | mov [BASE+RA*8], ITYPE
3749 | ins_next
3750 break;
3751
3752 case BC_TSETV:
3753 | ins_ABC // RA = src, RB = table, RC = key
3754 | mov TAB:RB, [BASE+RB*8]
3755 | mov RC, [BASE+RC*8]
3756 | checktab TAB:RB, ->vmeta_tsetv
3757 |
3758 | // Integer key?
3759 |.if DUALNUM
3760 | checkint RC, >5
3761 |.else
3762 | // Convert number to int and back and compare.
3763 | checknum RC, >5
3764 | movd xmm0, RC
3765 | cvttsd2si RCd, xmm0
3766 | cvtsi2sd xmm1, RCd
3767 | ucomisd xmm0, xmm1
3768 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3769 |.endif
3770 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3771 | jae ->vmeta_tsetv
3772 | shl RCd, 3
3773 | add RC, TAB:RB->array
3774 | cmp aword [RC], LJ_TNIL
3775 | je >3 // Previous value is nil?
3776 |1:
3777 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3778 | jnz >7
3779 |2: // Set array slot.
3780 | mov RB, [BASE+RA*8]
3781 | mov [RC], RB
3782 | ins_next
3783 |
3784 |3: // Check for __newindex if previous value is nil.
3785 | mov TAB:TMPR, TAB:RB->metatable
3786 | test TAB:TMPR, TAB:TMPR
3787 | jz <1
3788 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3789 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3790 | jmp <1
3791 |
3792 |5: // String key?
3793 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3794 | cleartp STR:RC
3795 | jmp ->BC_TSETS_Z
3796 |
3797 |7: // Possible table write barrier for the value. Skip valiswhite check.
3798 | barrierback TAB:RB, TMPR
3799 | jmp <2
3800 break;
3801 case BC_TSETS:
3802 | ins_ABC // RA = src, RB = table, RC = str const (~)
3803 | mov TAB:RB, [BASE+RB*8]
3804 | not RC
3805 | mov STR:RC, [KBASE+RC*8]
3806 | checktab TAB:RB, ->vmeta_tsets
3807 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3808 | mov TMPRd, TAB:RB->hmask
3809 | and TMPRd, STR:RC->hash
3810 | imul TMPRd, #NODE
3811 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3812 | add NODE:TMPR, TAB:RB->node
3813 | settp ITYPE, STR:RC, LJ_TSTR
3814 |1:
3815 | cmp NODE:TMPR->key, ITYPE
3816 | jne >5
3817 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3818 | cmp aword [TMPR], LJ_TNIL
3819 | je >4 // Previous value is nil?
3820 |2:
3821 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3822 | jnz >7
3823 |3: // Set node value.
3824 | mov ITYPE, [BASE+RA*8]
3825 | mov [TMPR], ITYPE
3826 | ins_next
3827 |
3828 |4: // Check for __newindex if previous value is nil.
3829 | mov TAB:ITYPE, TAB:RB->metatable
3830 | test TAB:ITYPE, TAB:ITYPE
3831 | jz <2
3832 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3833 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3834 | jmp <2
3835 |
3836 |5: // Follow hash chain.
3837 | mov NODE:TMPR, NODE:TMPR->next
3838 | test NODE:TMPR, NODE:TMPR
3839 | jnz <1
3840 | // End of hash chain: key not found, add a new one.
3841 |
3842 | // But check for __newindex first.
3843 | mov TAB:TMPR, TAB:RB->metatable
3844 | test TAB:TMPR, TAB:TMPR
3845 | jz >6 // No metatable: continue.
3846 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3847 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3848 |6:
3849 | mov TMP1, ITYPE
3850 | mov L:CARG1, SAVE_L
3851 | mov L:CARG1->base, BASE
3852 | lea CARG3, TMP1
3853 | mov CARG2, TAB:RB
3854 | mov SAVE_PC, PC
3855 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3856 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3857 | mov L:CARG1, SAVE_L
3858 | mov BASE, L:CARG1->base
3859 | mov TMPR, rax
3860 | movzx RAd, PC_RA
3861 | jmp <2 // Must check write barrier for value.
3862 |
3863 |7: // Possible table write barrier for the value. Skip valiswhite check.
3864 | barrierback TAB:RB, ITYPE
3865 | jmp <3
3866 break;
3867 case BC_TSETB:
3868 | ins_ABC // RA = src, RB = table, RC = byte literal
3869 | mov TAB:RB, [BASE+RB*8]
3870 | checktab TAB:RB, ->vmeta_tsetb
3871 | cmp RCd, TAB:RB->asize
3872 | jae ->vmeta_tsetb
3873 | shl RCd, 3
3874 | add RC, TAB:RB->array
3875 | cmp aword [RC], LJ_TNIL
3876 | je >3 // Previous value is nil?
3877 |1:
3878 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3879 | jnz >7
3880 |2: // Set array slot.
3881 | mov ITYPE, [BASE+RA*8]
3882 | mov [RC], ITYPE
3883 | ins_next
3884 |
3885 |3: // Check for __newindex if previous value is nil.
3886 | mov TAB:TMPR, TAB:RB->metatable
3887 | test TAB:TMPR, TAB:TMPR
3888 | jz <1
3889 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3890 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3891 | jmp <1
3892 |
3893 |7: // Possible table write barrier for the value. Skip valiswhite check.
3894 | barrierback TAB:RB, TMPR
3895 | jmp <2
3896 break;
3897 case BC_TSETR:
3898 | ins_ABC // RA = src, RB = table, RC = key
3899 | mov TAB:RB, [BASE+RB*8]
3900 | cleartp TAB:RB
3901 |.if DUALNUM
3902 | mov RC, [BASE+RC*8]
3903 |.else
3904 | cvttsd2si RCd, qword [BASE+RC*8]
3905 |.endif
3906 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3907 | jnz >7
3908 |2:
3909 | cmp RCd, TAB:RB->asize
3910 | jae ->vmeta_tsetr
3911 | shl RCd, 3
3912 | add RC, TAB:RB->array
3913 | // Set array slot.
3914 |->BC_TSETR_Z:
3915 | mov ITYPE, [BASE+RA*8]
3916 | mov [RC], ITYPE
3917 | ins_next
3918 |
3919 |7: // Possible table write barrier for the value. Skip valiswhite check.
3920 | barrierback TAB:RB, TMPR
3921 | jmp <2
3922 break;
3923
3924 case BC_TSETM:
3925 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3926 |1:
3927 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3928 | lea RA, [BASE+RA*8]
3929 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3930 | cleartp TAB:RB
3931 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3932 | jnz >7
3933 |2:
3934 | mov RDd, MULTRES
3935 | sub RDd, 1
3936 | jz >4 // Nothing to copy?
3937 | add RDd, TMPRd // Compute needed size.
3938 | cmp RDd, TAB:RB->asize
3939 | ja >5 // Doesn't fit into array part?
3940 | sub RDd, TMPRd
3941 | shl TMPRd, 3
3942 | add TMPR, TAB:RB->array
3943 |3: // Copy result slots to table.
3944 | mov RB, [RA]
3945 | add RA, 8
3946 | mov [TMPR], RB
3947 | add TMPR, 8
3948 | sub RDd, 1
3949 | jnz <3
3950 |4:
3951 | ins_next
3952 |
3953 |5: // Need to resize array part.
3954 | mov L:CARG1, SAVE_L
3955 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3956 | mov CARG2, TAB:RB
3957 | mov CARG3d, RDd
3958 | mov L:RB, L:CARG1
3959 | mov SAVE_PC, PC
3960 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3961 | mov BASE, L:RB->base
3962 | movzx RAd, PC_RA // Restore RA.
3963 | movzx RDd, PC_RD // Restore RD.
3964 | jmp <1 // Retry.
3965 |
3966 |7: // Possible table write barrier for any value. Skip valiswhite check.
3967 | barrierback TAB:RB, RD
3968 | jmp <2
3969 break;
3970
3971 /* -- Calls and vararg handling ----------------------------------------- */
3972
3973 case BC_CALL: case BC_CALLM:
3974 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3975 if (op == BC_CALLM) {
3976 | add NARGS:RDd, MULTRES
3977 }
3978 | mov LFUNC:RB, [BASE+RA*8]
3979 | checkfunc LFUNC:RB, ->vmeta_call_ra
3980 | lea BASE, [BASE+RA*8+16]
3981 | ins_call
3982 break;
3983
3984 case BC_CALLMT:
3985 | ins_AD // RA = base, RD = extra_nargs
3986 | add NARGS:RDd, MULTRES
3987 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3988 break;
3989 case BC_CALLT:
3990 | ins_AD // RA = base, RD = nargs+1
3991 | lea RA, [BASE+RA*8+16]
3992 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3993 | mov LFUNC:RB, [RA-16]
3994 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3995 |->BC_CALLT_Z:
3996 | mov PC, [BASE-8]
3997 | test PCd, FRAME_TYPE
3998 | jnz >7
3999 |1:
4000 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4001 | mov MULTRES, NARGS:RDd
4002 | sub NARGS:RDd, 1
4003 | jz >3
4004 |2: // Move args down.
4005 | mov RB, [RA]
4006 | add RA, 8
4007 | mov [KBASE], RB
4008 | add KBASE, 8
4009 | sub NARGS:RDd, 1
4010 | jnz <2
4011 |
4012 | mov LFUNC:RB, [BASE-16]
4013 |3:
4014 | cleartp LFUNC:RB
4015 | mov NARGS:RDd, MULTRES
4016 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4017 | ja >5
4018 |4:
4019 | ins_callt
4020 |
4021 |5: // Tailcall to a fast function.
4022 | test PCd, FRAME_TYPE // Lua frame below?
4023 | jnz <4
4024 | movzx RAd, PC_RA
4025 | neg RA
4026 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4027 | cleartp LFUNC:KBASE
4028 | mov KBASE, LFUNC:KBASE->pc
4029 | mov KBASE, [KBASE+PC2PROTO(k)]
4030 | jmp <4
4031 |
4032 |7: // Tailcall from a vararg function.
4033 | sub PC, FRAME_VARG
4034 | test PCd, FRAME_TYPEP
4035 | jnz >8 // Vararg frame below?
4036 | sub BASE, PC // Need to relocate BASE/KBASE down.
4037 | mov KBASE, BASE
4038 | mov PC, [BASE-8]
4039 | jmp <1
4040 |8:
4041 | add PCd, FRAME_VARG
4042 | jmp <1
4043 break;
4044
4045 case BC_ITERC:
4046 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4047 | lea RA, [BASE+RA*8+16] // fb = base+2
4048 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4049 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4050 | mov [RA], RB
4051 | mov [RA+8], RC
4052 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
4053 | mov [RA-16], LFUNC:RB
4054 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4055 | checkfunc LFUNC:RB, ->vmeta_call
4056 | mov BASE, RA
4057 | ins_call
4058 break;
4059
4060 case BC_ITERN:
4061 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4062 |.if JIT
4063 | // NYI: add hotloop, record BC_ITERN.
4064 |.endif
4065 | mov TAB:RB, [BASE+RA*8-16]
4066 | cleartp TAB:RB
4067 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4068 | mov TMPRd, TAB:RB->asize
4069 | add PC, 4
4070 | mov ITYPE, TAB:RB->array
4071 |1: // Traverse array part.
4072 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4073 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4074 |.if not DUALNUM
4075 | cvtsi2sd xmm0, RCd
4076 |.endif
4077 | // Copy array slot to returned value.
4078 | mov RB, [ITYPE+RC*8]
4079 | mov [BASE+RA*8+8], RB
4080 | // Return array index as a numeric key.
4081 |.if DUALNUM
4082 | setint ITYPE, RC
4083 | mov [BASE+RA*8], ITYPE
4084 |.else
4085 | movsd qword [BASE+RA*8], xmm0
4086 |.endif
4087 | add RCd, 1
4088 | mov [BASE+RA*8-8], RCd // Update control var.
4089 |2:
4090 | movzx RDd, PC_RD // Get target from ITERL.
4091 | branchPC RD
4092 |3:
4093 | ins_next
4094 |
4095 |4: // Skip holes in array part.
4096 | add RCd, 1
4097 | jmp <1
4098 |
4099 |5: // Traverse hash part.
4100 | sub RCd, TMPRd
4101 |6:
4102 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4103 | imul ITYPEd, RCd, #NODE
4104 | add NODE:ITYPE, TAB:RB->node
4105 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4106 | lea TMPRd, [RCd+TMPRd+1]
4107 | // Copy key and value from hash slot.
4108 | mov RB, NODE:ITYPE->key
4109 | mov RC, NODE:ITYPE->val
4110 | mov [BASE+RA*8], RB
4111 | mov [BASE+RA*8+8], RC
4112 | mov [BASE+RA*8-8], TMPRd
4113 | jmp <2
4114 |
4115 |7: // Skip holes in hash part.
4116 | add RCd, 1
4117 | jmp <6
4118 break;
4119
4120 case BC_ISNEXT:
4121 | ins_AD // RA = base, RD = target (points to ITERN)
4122 | mov CFUNC:RB, [BASE+RA*8-24]
4123 | checkfunc CFUNC:RB, >5
4124 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4125 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4126 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4127 | branchPC RD
4128 | mov64 TMPR, U64x(fffe7fff, 00000000)
4129 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4130 |1:
4131 | ins_next
4132 |5: // Despecialize bytecode if any of the checks fail.
4133 | mov PC_OP, BC_JMP
4134 | branchPC RD
4135 | mov byte [PC], BC_ITERC
4136 | jmp <1
4137 break;
4138
4139 case BC_VARG:
4140 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4141 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4142 | lea RA, [BASE+RA*8]
4143 | sub TMPR, [BASE-8]
4144 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4145 | test RB, RB
4146 | jz >5 // Copy all varargs?
4147 | lea RB, [RA+RB*8-8]
4148 | cmp TMPR, BASE // No vararg slots?
4149 | jnb >2
4150 |1: // Copy vararg slots to destination slots.
4151 | mov RC, [TMPR-16]
4152 | add TMPR, 8
4153 | mov [RA], RC
4154 | add RA, 8
4155 | cmp RA, RB // All destination slots filled?
4156 | jnb >3
4157 | cmp TMPR, BASE // No more vararg slots?
4158 | jb <1
4159 |2: // Fill up remainder with nil.
4160 | mov aword [RA], LJ_TNIL
4161 | add RA, 8
4162 | cmp RA, RB
4163 | jb <2
4164 |3:
4165 | ins_next
4166 |
4167 |5: // Copy all varargs.
4168 | mov MULTRES, 1 // MULTRES = 0+1
4169 | mov RC, BASE
4170 | sub RC, TMPR
4171 | jbe <3 // No vararg slots?
4172 | mov RBd, RCd
4173 | shr RBd, 3
4174 | add RBd, 1
4175 | mov MULTRES, RBd // MULTRES = #varargs+1
4176 | mov L:RB, SAVE_L
4177 | add RC, RA
4178 | cmp RC, L:RB->maxstack
4179 | ja >7 // Need to grow stack?
4180 |6: // Copy all vararg slots.
4181 | mov RC, [TMPR-16]
4182 | add TMPR, 8
4183 | mov [RA], RC
4184 | add RA, 8
4185 | cmp TMPR, BASE // No more vararg slots?
4186 | jb <6
4187 | jmp <3
4188 |
4189 |7: // Grow stack for varargs.
4190 | mov L:RB->base, BASE
4191 | mov L:RB->top, RA
4192 | mov SAVE_PC, PC
4193 | sub TMPR, BASE // Need delta, because BASE may change.
4194 | mov TMP1hi, TMPRd
4195 | mov CARG2d, MULTRES
4196 | sub CARG2d, 1
4197 | mov CARG1, L:RB
4198 | call extern lj_state_growstack // (lua_State *L, int n)
4199 | mov BASE, L:RB->base
4200 | movsxd TMPR, TMP1hi
4201 | mov RA, L:RB->top
4202 | add TMPR, BASE
4203 | jmp <6
4204 break;
4205
4206 /* -- Returns ----------------------------------------------------------- */
4207
4208 case BC_RETM:
4209 | ins_AD // RA = results, RD = extra_nresults
4210 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4211 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4212 break;
4213
4214 case BC_RET: case BC_RET0: case BC_RET1:
4215 | ins_AD // RA = results, RD = nresults+1
4216 if (op != BC_RET0) {
4217 | shl RAd, 3
4218 }
4219 |1:
4220 | mov PC, [BASE-8]
4221 | mov MULTRES, RDd // Save nresults+1.
4222 | test PCd, FRAME_TYPE // Check frame type marker.
4223 | jnz >7 // Not returning to a fixarg Lua func?
4224 switch (op) {
4225 case BC_RET:
4226 |->BC_RET_Z:
4227 | mov KBASE, BASE // Use KBASE for result move.
4228 | sub RDd, 1
4229 | jz >3
4230 |2: // Move results down.
4231 | mov RB, [KBASE+RA]
4232 | mov [KBASE-16], RB
4233 | add KBASE, 8
4234 | sub RDd, 1
4235 | jnz <2
4236 |3:
4237 | mov RDd, MULTRES // Note: MULTRES may be >255.
4238 | movzx RBd, PC_RB // So cannot compare with RDL!
4239 |5:
4240 | cmp RBd, RDd // More results expected?
4241 | ja >6
4242 break;
4243 case BC_RET1:
4244 | mov RB, [BASE+RA]
4245 | mov [BASE-16], RB
4246 /* fallthrough */
4247 case BC_RET0:
4248 |5:
4249 | cmp PC_RB, RDL // More results expected?
4250 | ja >6
4251 default:
4252 break;
4253 }
4254 | movzx RAd, PC_RA
4255 | neg RA
4256 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4257 | mov LFUNC:KBASE, [BASE-16]
4258 | cleartp LFUNC:KBASE
4259 | mov KBASE, LFUNC:KBASE->pc
4260 | mov KBASE, [KBASE+PC2PROTO(k)]
4261 | ins_next
4262 |
4263 |6: // Fill up results with nil.
4264 if (op == BC_RET) {
4265 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4266 | add KBASE, 8
4267 } else {
4268 | mov aword [BASE+RD*8-24], LJ_TNIL
4269 }
4270 | add RD, 1
4271 | jmp <5
4272 |
4273 |7: // Non-standard return case.
4274 | lea RB, [PC-FRAME_VARG]
4275 | test RBd, FRAME_TYPEP
4276 | jnz ->vm_return
4277 | // Return from vararg function: relocate BASE down and RA up.
4278 | sub BASE, RB
4279 if (op != BC_RET0) {
4280 | add RA, RB
4281 }
4282 | jmp <1
4283 break;
4284
4285 /* -- Loops and branches ------------------------------------------------ */
4286
4287 |.define FOR_IDX, [RA]
4288 |.define FOR_STOP, [RA+8]
4289 |.define FOR_STEP, [RA+16]
4290 |.define FOR_EXT, [RA+24]
4291
4292 case BC_FORL:
4293 |.if JIT
4294 | hotloop RBd
4295 |.endif
4296 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4297 break;
4298
4299 case BC_JFORI:
4300 case BC_JFORL:
4301#if !LJ_HASJIT
4302 break;
4303#endif
4304 case BC_FORI:
4305 case BC_IFORL:
4306 vk = (op == BC_IFORL || op == BC_JFORL);
4307 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4308 | lea RA, [BASE+RA*8]
4309 if (LJ_DUALNUM) {
4310 | mov RB, FOR_IDX
4311 | checkint RB, >9
4312 | mov TMPR, FOR_STOP
4313 if (!vk) {
4314 | checkint TMPR, ->vmeta_for
4315 | mov ITYPE, FOR_STEP
4316 | test ITYPEd, ITYPEd; js >5
4317 | sar ITYPE, 47;
4318 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4319 } else {
4320#ifdef LUA_USE_ASSERT
4321 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4322 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4323#endif
4324 | mov ITYPE, FOR_STEP
4325 | test ITYPEd, ITYPEd; js >5
4326 | add RBd, ITYPEd; jo >1
4327 | setint RB
4328 | mov FOR_IDX, RB
4329 }
4330 | cmp RBd, TMPRd
4331 | mov FOR_EXT, RB
4332 if (op == BC_FORI) {
4333 | jle >7
4334 |1:
4335 |6:
4336 | branchPC RD
4337 } else if (op == BC_JFORI) {
4338 | branchPC RD
4339 | movzx RDd, PC_RD
4340 | jle =>BC_JLOOP
4341 |1:
4342 |6:
4343 } else if (op == BC_IFORL) {
4344 | jg >7
4345 |6:
4346 | branchPC RD
4347 |1:
4348 } else {
4349 | jle =>BC_JLOOP
4350 |1:
4351 |6:
4352 }
4353 |7:
4354 | ins_next
4355 |
4356 |5: // Invert check for negative step.
4357 if (!vk) {
4358 | sar ITYPE, 47;
4359 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4360 } else {
4361 | add RBd, ITYPEd; jo <1
4362 | setint RB
4363 | mov FOR_IDX, RB
4364 }
4365 | cmp RBd, TMPRd
4366 | mov FOR_EXT, RB
4367 if (op == BC_FORI) {
4368 | jge <7
4369 } else if (op == BC_JFORI) {
4370 | branchPC RD
4371 | movzx RDd, PC_RD
4372 | jge =>BC_JLOOP
4373 } else if (op == BC_IFORL) {
4374 | jl <7
4375 } else {
4376 | jge =>BC_JLOOP
4377 }
4378 | jmp <6
4379 |9: // Fallback to FP variant.
4380 if (!vk) {
4381 | jae ->vmeta_for
4382 }
4383 } else if (!vk) {
4384 | checknumtp FOR_IDX, ->vmeta_for
4385 }
4386 if (!vk) {
4387 | checknumtp FOR_STOP, ->vmeta_for
4388 } else {
4389#ifdef LUA_USE_ASSERT
4390 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4391 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4392#endif
4393 }
4394 | mov RB, FOR_STEP
4395 if (!vk) {
4396 | checknum RB, ->vmeta_for
4397 }
4398 | movsd xmm0, qword FOR_IDX
4399 | movsd xmm1, qword FOR_STOP
4400 if (vk) {
4401 | addsd xmm0, qword FOR_STEP
4402 | movsd qword FOR_IDX, xmm0
4403 | test RB, RB; js >3
4404 } else {
4405 | jl >3
4406 }
4407 | ucomisd xmm1, xmm0
4408 |1:
4409 | movsd qword FOR_EXT, xmm0
4410 if (op == BC_FORI) {
4411 |.if DUALNUM
4412 | jnb <7
4413 |.else
4414 | jnb >2
4415 | branchPC RD
4416 |.endif
4417 } else if (op == BC_JFORI) {
4418 | branchPC RD
4419 | movzx RDd, PC_RD
4420 | jnb =>BC_JLOOP
4421 } else if (op == BC_IFORL) {
4422 |.if DUALNUM
4423 | jb <7
4424 |.else
4425 | jb >2
4426 | branchPC RD
4427 |.endif
4428 } else {
4429 | jnb =>BC_JLOOP
4430 }
4431 |.if DUALNUM
4432 | jmp <6
4433 |.else
4434 |2:
4435 | ins_next
4436 |.endif
4437 |
4438 |3: // Invert comparison if step is negative.
4439 | ucomisd xmm0, xmm1
4440 | jmp <1
4441 break;
4442
4443 case BC_ITERL:
4444 |.if JIT
4445 | hotloop RBd
4446 |.endif
4447 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4448 break;
4449
4450 case BC_JITERL:
4451#if !LJ_HASJIT
4452 break;
4453#endif
4454 case BC_IITERL:
4455 | ins_AJ // RA = base, RD = target
4456 | lea RA, [BASE+RA*8]
4457 | mov RB, [RA]
4458 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4459 if (op == BC_JITERL) {
4460 | mov [RA-8], RB
4461 | jmp =>BC_JLOOP
4462 } else {
4463 | branchPC RD // Otherwise save control var + branch.
4464 | mov [RA-8], RB
4465 }
4466 |1:
4467 | ins_next
4468 break;
4469
4470 case BC_LOOP:
4471 | ins_A // RA = base, RD = target (loop extent)
4472 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4473 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4474 |.if JIT
4475 | hotloop RBd
4476 |.endif
4477 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4478 break;
4479
4480 case BC_ILOOP:
4481 | ins_A // RA = base, RD = target (loop extent)
4482 | ins_next
4483 break;
4484
4485 case BC_JLOOP:
4486 |.if JIT
4487 | ins_AD // RA = base (ignored), RD = traceno
4488 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4489 | mov TRACE:RD, [RA+RD*8]
4490 | mov RD, TRACE:RD->mcode
4491 | mov L:RB, SAVE_L
4492 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4493 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4494 | // Save additional callee-save registers only used in compiled code.
4495 |.if X64WIN
4496 | mov CSAVE_4, r12
4497 | mov CSAVE_3, r13
4498 | mov CSAVE_2, r14
4499 | mov CSAVE_1, r15
4500 | mov RA, rsp
4501 | sub rsp, 10*16+4*8
4502 | movdqa [RA-1*16], xmm6
4503 | movdqa [RA-2*16], xmm7
4504 | movdqa [RA-3*16], xmm8
4505 | movdqa [RA-4*16], xmm9
4506 | movdqa [RA-5*16], xmm10
4507 | movdqa [RA-6*16], xmm11
4508 | movdqa [RA-7*16], xmm12
4509 | movdqa [RA-8*16], xmm13
4510 | movdqa [RA-9*16], xmm14
4511 | movdqa [RA-10*16], xmm15
4512 |.else
4513 | sub rsp, 16
4514 | mov [rsp+16], r12
4515 | mov [rsp+8], r13
4516 |.endif
4517 | jmp RD
4518 |.endif
4519 break;
4520
4521 case BC_JMP:
4522 | ins_AJ // RA = unused, RD = target
4523 | branchPC RD
4524 | ins_next
4525 break;
4526
4527 /* -- Function headers -------------------------------------------------- */
4528
4529 /*
4530 ** Reminder: A function may be called with func/args above L->maxstack,
4531 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4532 ** too. This means all FUNC* ops (including fast functions) must check
4533 ** for stack overflow _before_ adding more slots!
4534 */
4535
4536 case BC_FUNCF:
4537 |.if JIT
4538 | hotcall RBd
4539 |.endif
4540 case BC_FUNCV: /* NYI: compiled vararg functions. */
4541 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4542 break;
4543
4544 case BC_JFUNCF:
4545#if !LJ_HASJIT
4546 break;
4547#endif
4548 case BC_IFUNCF:
4549 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4550 | mov KBASE, [PC-4+PC2PROTO(k)]
4551 | mov L:RB, SAVE_L
4552 | lea RA, [BASE+RA*8] // Top of frame.
4553 | cmp RA, L:RB->maxstack
4554 | ja ->vm_growstack_f
4555 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4556 | cmp NARGS:RDd, RAd // Check for missing parameters.
4557 | jbe >3
4558 |2:
4559 if (op == BC_JFUNCF) {
4560 | movzx RDd, PC_RD
4561 | jmp =>BC_JLOOP
4562 } else {
4563 | ins_next
4564 }
4565 |
4566 |3: // Clear missing parameters.
4567 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4568 | add NARGS:RDd, 1
4569 | cmp NARGS:RDd, RAd
4570 | jbe <3
4571 | jmp <2
4572 break;
4573
4574 case BC_JFUNCV:
4575#if !LJ_HASJIT
4576 break;
4577#endif
4578 | int3 // NYI: compiled vararg functions
4579 break; /* NYI: compiled vararg functions. */
4580
4581 case BC_IFUNCV:
4582 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4583 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4584 | lea RD, [BASE+NARGS:RD*8+8]
4585 | mov LFUNC:KBASE, [BASE-16]
4586 | mov [RD-8], RB // Store delta + FRAME_VARG.
4587 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4588 | mov L:RB, SAVE_L
4589 | lea RA, [RD+RA*8]
4590 | cmp RA, L:RB->maxstack
4591 | ja ->vm_growstack_v // Need to grow stack.
4592 | mov RA, BASE
4593 | mov BASE, RD
4594 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4595 | test RBd, RBd
4596 | jz >2
4597 | add RA, 8
4598 |1: // Copy fixarg slots up to new frame.
4599 | add RA, 8
4600 | cmp RA, BASE
4601 | jnb >3 // Less args than parameters?
4602 | mov KBASE, [RA-16]
4603 | mov [RD], KBASE
4604 | add RD, 8
4605 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4606 | sub RBd, 1
4607 | jnz <1
4608 |2:
4609 if (op == BC_JFUNCV) {
4610 | movzx RDd, PC_RD
4611 | jmp =>BC_JLOOP
4612 } else {
4613 | mov KBASE, [PC-4+PC2PROTO(k)]
4614 | ins_next
4615 }
4616 |
4617 |3: // Clear missing parameters.
4618 | mov aword [RD], LJ_TNIL
4619 | add RD, 8
4620 | sub RBd, 1
4621 | jnz <3
4622 | jmp <2
4623 break;
4624
4625 case BC_FUNCC:
4626 case BC_FUNCCW:
4627 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4628 | mov CFUNC:RB, [BASE-16]
4629 | cleartp CFUNC:RB
4630 | mov KBASE, CFUNC:RB->f
4631 | mov L:RB, SAVE_L
4632 | lea RD, [BASE+NARGS:RD*8-8]
4633 | mov L:RB->base, BASE
4634 | lea RA, [RD+8*LUA_MINSTACK]
4635 | cmp RA, L:RB->maxstack
4636 | mov L:RB->top, RD
4637 if (op == BC_FUNCC) {
4638 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4639 } else {
4640 | mov CARG2, KBASE
4641 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4642 }
4643 | ja ->vm_growstack_c // Need to grow stack.
4644 | set_vmstate C
4645 if (op == BC_FUNCC) {
4646 | call KBASE // (lua_State *L)
4647 } else {
4648 | // (lua_State *L, lua_CFunction f)
4649 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4650 }
4651 | // nresults returned in eax (RD).
4652 | mov BASE, L:RB->base
4653 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4654 | set_vmstate INTERP
4655 | lea RA, [BASE+RD*8]
4656 | neg RA
4657 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4658 | mov PC, [BASE-8] // Fetch PC of caller.
4659 | jmp ->vm_returnc
4660 break;
4661
4662 /* ---------------------------------------------------------------------- */
4663
4664 default:
4665 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4666 exit(2);
4667 break;
4668 }
4669}
4670
4671static int build_backend(BuildCtx *ctx)
4672{
4673 int op;
4674 dasm_growpc(Dst, BC__MAX);
4675 build_subroutines(ctx);
4676 |.code_op
4677 for (op = 0; op < BC__MAX; op++)
4678 build_ins(ctx, (BCOp)op, op);
4679 return BC__MAX;
4680}
4681
4682/* Emit pseudo frame-info for all assembler functions. */
4683static void emit_asm_debug(BuildCtx *ctx)
4684{
4685 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4686 switch (ctx->mode) {
4687 case BUILD_elfasm:
4688 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4689 fprintf(ctx->fp,
4690 ".Lframe0:\n"
4691 "\t.long .LECIE0-.LSCIE0\n"
4692 ".LSCIE0:\n"
4693 "\t.long 0xffffffff\n"
4694 "\t.byte 0x1\n"
4695 "\t.string \"\"\n"
4696 "\t.uleb128 0x1\n"
4697 "\t.sleb128 -8\n"
4698 "\t.byte 0x10\n"
4699 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4700 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4701 "\t.align 8\n"
4702 ".LECIE0:\n\n");
4703 fprintf(ctx->fp,
4704 ".LSFDE0:\n"
4705 "\t.long .LEFDE0-.LASFDE0\n"
4706 ".LASFDE0:\n"
4707 "\t.long .Lframe0\n"
4708 "\t.quad .Lbegin\n"
4709 "\t.quad %d\n"
4710 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4711 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4712 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4713 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4714 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4715#if LJ_NO_UNWIND
4716 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4717 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4718#endif
4719 "\t.align 8\n"
4720 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4721#if LJ_HASFFI
4722 fprintf(ctx->fp,
4723 ".LSFDE1:\n"
4724 "\t.long .LEFDE1-.LASFDE1\n"
4725 ".LASFDE1:\n"
4726 "\t.long .Lframe0\n"
4727 "\t.quad lj_vm_ffi_call\n"
4728 "\t.quad %d\n"
4729 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4730 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4731 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4732 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4733 "\t.align 8\n"
4734 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4735#endif
4736#if !LJ_NO_UNWIND
4737#if (defined(__sun__) && defined(__svr4__))
4738 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4739#else
4740 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4741#endif
4742 fprintf(ctx->fp,
4743 ".Lframe1:\n"
4744 "\t.long .LECIE1-.LSCIE1\n"
4745 ".LSCIE1:\n"
4746 "\t.long 0\n"
4747 "\t.byte 0x1\n"
4748 "\t.string \"zPR\"\n"
4749 "\t.uleb128 0x1\n"
4750 "\t.sleb128 -8\n"
4751 "\t.byte 0x10\n"
4752 "\t.uleb128 6\n" /* augmentation length */
4753 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4754 "\t.long lj_err_unwind_dwarf-.\n"
4755 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4756 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4757 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4758 "\t.align 8\n"
4759 ".LECIE1:\n\n");
4760 fprintf(ctx->fp,
4761 ".LSFDE2:\n"
4762 "\t.long .LEFDE2-.LASFDE2\n"
4763 ".LASFDE2:\n"
4764 "\t.long .LASFDE2-.Lframe1\n"
4765 "\t.long .Lbegin-.\n"
4766 "\t.long %d\n"
4767 "\t.uleb128 0\n" /* augmentation length */
4768 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4769 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4770 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4771 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4772 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4773 "\t.align 8\n"
4774 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4775#if LJ_HASFFI
4776 fprintf(ctx->fp,
4777 ".Lframe2:\n"
4778 "\t.long .LECIE2-.LSCIE2\n"
4779 ".LSCIE2:\n"
4780 "\t.long 0\n"
4781 "\t.byte 0x1\n"
4782 "\t.string \"zR\"\n"
4783 "\t.uleb128 0x1\n"
4784 "\t.sleb128 -8\n"
4785 "\t.byte 0x10\n"
4786 "\t.uleb128 1\n" /* augmentation length */
4787 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4788 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4789 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4790 "\t.align 8\n"
4791 ".LECIE2:\n\n");
4792 fprintf(ctx->fp,
4793 ".LSFDE3:\n"
4794 "\t.long .LEFDE3-.LASFDE3\n"
4795 ".LASFDE3:\n"
4796 "\t.long .LASFDE3-.Lframe2\n"
4797 "\t.long lj_vm_ffi_call-.\n"
4798 "\t.long %d\n"
4799 "\t.uleb128 0\n" /* augmentation length */
4800 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4801 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4802 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4803 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4804 "\t.align 8\n"
4805 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4806#endif
4807#endif
4808 break;
4809#if !LJ_NO_UNWIND
4810 /* Mental note: never let Apple design an assembler.
4811 ** Or a linker. Or a plastic case. But I digress.
4812 */
4813 case BUILD_machasm: {
4814#if LJ_HASFFI
4815 int fcsize = 0;
4816#endif
4817 int i;
4818 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4819 fprintf(ctx->fp,
4820 "EH_frame1:\n"
4821 "\t.set L$set$x,LECIEX-LSCIEX\n"
4822 "\t.long L$set$x\n"
4823 "LSCIEX:\n"
4824 "\t.long 0\n"
4825 "\t.byte 0x1\n"
4826 "\t.ascii \"zPR\\0\"\n"
4827 "\t.byte 0x1\n"
4828 "\t.byte 128-8\n"
4829 "\t.byte 0x10\n"
4830 "\t.byte 6\n" /* augmentation length */
4831 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4832 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4833 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4834 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4835 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4836 "\t.align 3\n"
4837 "LECIEX:\n\n");
4838 for (i = 0; i < ctx->nsym; i++) {
4839 const char *name = ctx->sym[i].name;
4840 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4841 if (size == 0) continue;
4842#if LJ_HASFFI
4843 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4844#endif
4845 fprintf(ctx->fp,
4846 "%s.eh:\n"
4847 "LSFDE%d:\n"
4848 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4849 "\t.long L$set$%d\n"
4850 "LASFDE%d:\n"
4851 "\t.long LASFDE%d-EH_frame1\n"
4852 "\t.long %s-.\n"
4853 "\t.long %d\n"
4854 "\t.byte 0\n" /* augmentation length */
4855 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4856 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4857 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4858 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4859 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4860 "\t.align 3\n"
4861 "LEFDE%d:\n\n",
4862 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4863 }
4864#if LJ_HASFFI
4865 if (fcsize) {
4866 fprintf(ctx->fp,
4867 "EH_frame2:\n"
4868 "\t.set L$set$y,LECIEY-LSCIEY\n"
4869 "\t.long L$set$y\n"
4870 "LSCIEY:\n"
4871 "\t.long 0\n"
4872 "\t.byte 0x1\n"
4873 "\t.ascii \"zR\\0\"\n"
4874 "\t.byte 0x1\n"
4875 "\t.byte 128-8\n"
4876 "\t.byte 0x10\n"
4877 "\t.byte 1\n" /* augmentation length */
4878 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4879 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4880 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4881 "\t.align 3\n"
4882 "LECIEY:\n\n");
4883 fprintf(ctx->fp,
4884 "_lj_vm_ffi_call.eh:\n"
4885 "LSFDEY:\n"
4886 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4887 "\t.long L$set$yy\n"
4888 "LASFDEY:\n"
4889 "\t.long LASFDEY-EH_frame2\n"
4890 "\t.long _lj_vm_ffi_call-.\n"
4891 "\t.long %d\n"
4892 "\t.byte 0\n" /* augmentation length */
4893 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4894 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4895 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4896 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4897 "\t.align 3\n"
4898 "LEFDEY:\n\n", fcsize);
4899 }
4900#endif
4901 fprintf(ctx->fp, ".subsections_via_symbols\n");
4902 }
4903 break;
4904#endif
4905 default: /* Difficult for other modes. */
4906 break;
4907 }
4908}
4909