summaryrefslogtreecommitdiff
path: root/src/vm_x64.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x64.dasc')
-rw-r--r--src/vm_x64.dasc4946
1 files changed, 4946 insertions, 0 deletions
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..a8649b4e
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4946 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146|.if NO_UNWIND
147| push r13; push r12
148|.endif
149| sub rsp, CFRAME_SPACE
150|.endmacro
151|.macro saveregs
152| push rbp; saveregs_
153|.endmacro
154|.macro restoreregs
155| add rsp, CFRAME_SPACE
156|.if NO_UNWIND
157| pop r12; pop r13
158|.endif
159| pop r14; pop r15; pop rbx; pop rbp
160|.endmacro
161|
162|//----- 16 byte aligned,
163|.if NO_UNWIND
164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165|.define SAVE_R4, aword [rsp+aword*10]
166|.define SAVE_R3, aword [rsp+aword*9]
167|.define SAVE_R2, aword [rsp+aword*8]
168|.define SAVE_R1, aword [rsp+aword*7]
169|.define SAVE_RU2, aword [rsp+aword*6]
170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171|.else
172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173|.define SAVE_R4, aword [rsp+aword*8]
174|.define SAVE_R3, aword [rsp+aword*7]
175|.define SAVE_R2, aword [rsp+aword*6]
176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177|.endif
178|.define SAVE_CFRAME, aword [rsp+aword*4]
179|.define SAVE_PC, aword [rsp+aword*3]
180|.define SAVE_L, aword [rsp+aword*2]
181|.define SAVE_ERRF, dword [rsp+dword*3]
182|.define SAVE_NRES, dword [rsp+dword*2]
183|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184|//----- 16 byte aligned
185|
186|.define TMP1d, dword [rsp]
187|.define TMP1hi, dword [rsp+dword*1]
188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189|
190|.endif
191|
192|//-----------------------------------------------------------------------
193|
194|// Instruction headers.
195|.macro ins_A; .endmacro
196|.macro ins_AD; .endmacro
197|.macro ins_AJ; .endmacro
198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199|.macro ins_AB_; movzx RBd, RCH; .endmacro
200|.macro ins_A_C; movzx RCd, RCL; .endmacro
201|.macro ins_AND; not RD; .endmacro
202|
203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204|.macro ins_NEXT
205| mov RCd, [PC]
206| movzx RAd, RCH
207| movzx OP, RCL
208| add PC, 4
209| shr RCd, 16
210| jmp aword [DISPATCH+OP*8]
211|.endmacro
212|
213|// Instruction footer.
214|.if 1
215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216| .define ins_next, ins_NEXT
217| .define ins_next_, ins_NEXT
218|.else
219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220| // Affects only certain kinds of benchmarks (and only with -j off).
221| // Around 10%-30% slower on Core2, a lot more slower on P4.
222| .macro ins_next
223| jmp ->ins_next
224| .endmacro
225| .macro ins_next_
226| ->ins_next:
227| ins_NEXT
228| .endmacro
229|.endif
230|
231|// Call decode and dispatch.
232|.macro ins_callt
233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234| mov PC, LFUNC:RB->pc
235| mov RAd, [PC]
236| movzx OP, RAL
237| movzx RAd, RAH
238| add PC, 4
239| jmp aword [DISPATCH+OP*8]
240|.endmacro
241|
242|.macro ins_call
243| // BASE = new base, RB = LFUNC, RD = nargs+1
244| mov [BASE-8], PC
245| ins_callt
246|.endmacro
247|
248|//-----------------------------------------------------------------------
249|
250|// Macros to clear or set tags.
251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252|.macro settp, reg, tp
253| mov64 ITYPE, ((uint64_t)tp<<47)
254| or reg, ITYPE
255|.endmacro
256|.macro settp, dst, reg, tp
257| mov64 dst, ((uint64_t)tp<<47)
258| or dst, reg
259|.endmacro
260|.macro setint, reg
261| settp reg, LJ_TISNUM
262|.endmacro
263|.macro setint, dst, reg
264| settp dst, reg, LJ_TISNUM
265|.endmacro
266|
267|// Macros to test operand types.
268|.macro checktp_nc, reg, tp, target
269| mov ITYPE, reg
270| sar ITYPE, 47
271| cmp ITYPEd, tp
272| jne target
273|.endmacro
274|.macro checktp, reg, tp, target
275| mov ITYPE, reg
276| cleartp reg
277| sar ITYPE, 47
278| cmp ITYPEd, tp
279| jne target
280|.endmacro
281|.macro checktptp, src, tp, target
282| mov ITYPE, src
283| sar ITYPE, 47
284| cmp ITYPEd, tp
285| jne target
286|.endmacro
287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290|
291|.macro checknumx, reg, target, jump
292| mov ITYPE, reg
293| sar ITYPE, 47
294| cmp ITYPEd, LJ_TISNUM
295| jump target
296|.endmacro
297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302|
303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305|
306|// These operands must be used with movzx.
307|.define PC_OP, byte [PC-4]
308|.define PC_RA, byte [PC-3]
309|.define PC_RB, byte [PC-1]
310|.define PC_RC, byte [PC-2]
311|.define PC_RD, word [PC-2]
312|
313|.macro branchPC, reg
314| lea PC, [PC+reg*4-BCBIAS_J*4]
315|.endmacro
316|
317|// Assumes DISPATCH is relative to GL.
318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320|
321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322|
323|// Decrement hashed hotcount and trigger trace recorder if zero.
324|.macro hotloop, reg
325| mov reg, PCd
326| shr reg, 1
327| and reg, HOTCOUNT_PCMASK
328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329| jb ->vm_hotloop
330|.endmacro
331|
332|.macro hotcall, reg
333| mov reg, PCd
334| shr reg, 1
335| and reg, HOTCOUNT_PCMASK
336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337| jb ->vm_hotcall
338|.endmacro
339|
340|// Set current VM state.
341|.macro set_vmstate, st
342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343|.endmacro
344|
345|.macro fpop1; fstp st1; .endmacro
346|
347|// Synthesize SSE FP constants.
348|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350|.endmacro
351|
352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353| mov64 tmp, U64x(val,00000000); movd reg, tmp
354|.endmacro
355|
356|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357| sseconst_hi reg, tmp, 80000000
358|.endmacro
359|.macro sseconst_1, reg, tmp // Synthesize 1.0.
360| sseconst_hi reg, tmp, 3ff00000
361|.endmacro
362|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
363| sseconst_hi reg, tmp, 43300000
364|.endmacro
365|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
366| sseconst_hi reg, tmp, 43380000
367|.endmacro
368|
369|// Move table write barrier back. Overwrites reg.
370|.macro barrierback, tab, reg
371| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
372| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
373| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
374| mov tab->gclist, reg
375|.endmacro
376|
377|//-----------------------------------------------------------------------
378
379/* Generate subroutines used by opcodes and other parts of the VM. */
380/* The .code_sub section should be last to help static branch prediction. */
381static void build_subroutines(BuildCtx *ctx)
382{
383 |.code_sub
384 |
385 |//-----------------------------------------------------------------------
386 |//-- Return handling ----------------------------------------------------
387 |//-----------------------------------------------------------------------
388 |
389 |->vm_returnp:
390 | test PCd, FRAME_P
391 | jz ->cont_dispatch
392 |
393 | // Return from pcall or xpcall fast func.
394 | and PC, -8
395 | sub BASE, PC // Restore caller base.
396 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
397 | mov PC, [BASE-8] // Fetch PC of previous frame.
398 | // Prepending may overwrite the pcall frame, so do it at the end.
399 | mov_true ITYPE
400 | mov aword [BASE+RA], ITYPE // Prepend true to results.
401 |
402 |->vm_returnc:
403 | add RDd, 1 // RD = nresults+1
404 | jz ->vm_unwind_yield
405 | mov MULTRES, RDd
406 | test PC, FRAME_TYPE
407 | jz ->BC_RET_Z // Handle regular return to Lua.
408 |
409 |->vm_return:
410 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
411 | xor PC, FRAME_C
412 | test PCd, FRAME_TYPE
413 | jnz ->vm_returnp
414 |
415 | // Return to C.
416 | set_vmstate C
417 | and PC, -8
418 | sub PC, BASE
419 | neg PC // Previous base = BASE - delta.
420 |
421 | sub RDd, 1
422 | jz >2
423 |1: // Move results down.
424 | mov RB, [BASE+RA]
425 | mov [BASE-16], RB
426 | add BASE, 8
427 | sub RDd, 1
428 | jnz <1
429 |2:
430 | mov L:RB, SAVE_L
431 | mov L:RB->base, PC
432 |3:
433 | mov RDd, MULTRES
434 | mov RAd, SAVE_NRES // RA = wanted nresults+1
435 |4:
436 | cmp RAd, RDd
437 | jne >6 // More/less results wanted?
438 |5:
439 | sub BASE, 16
440 | mov L:RB->top, BASE
441 |
442 |->vm_leave_cp:
443 | mov RA, SAVE_CFRAME // Restore previous C frame.
444 | mov L:RB->cframe, RA
445 | xor eax, eax // Ok return status for vm_pcall.
446 |
447 |->vm_leave_unw:
448 | restoreregs
449 | ret
450 |
451 |6:
452 | jb >7 // Less results wanted?
453 | // More results wanted. Check stack size and fill up results with nil.
454 | cmp BASE, L:RB->maxstack
455 | ja >8
456 | mov aword [BASE-16], LJ_TNIL
457 | add BASE, 8
458 | add RDd, 1
459 | jmp <4
460 |
461 |7: // Less results wanted.
462 | test RAd, RAd
463 | jz <5 // But check for LUA_MULTRET+1.
464 | sub RA, RD // Negative result!
465 | lea BASE, [BASE+RA*8] // Correct top.
466 | jmp <5
467 |
468 |8: // Corner case: need to grow stack for filling up results.
469 | // This can happen if:
470 | // - A C function grows the stack (a lot).
471 | // - The GC shrinks the stack in between.
472 | // - A return back from a lua_call() with (high) nresults adjustment.
473 | mov L:RB->top, BASE // Save current top held in BASE (yes).
474 | mov MULTRES, RDd // Need to fill only remainder with nil.
475 | mov CARG2d, RAd
476 | mov CARG1, L:RB
477 | call extern lj_state_growstack // (lua_State *L, int n)
478 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
479 | jmp <3
480 |
481 |->vm_unwind_yield:
482 | mov al, LUA_YIELD
483 | jmp ->vm_unwind_c_eh
484 |
485 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
486 | // (void *cframe, int errcode)
487 | mov eax, CARG2d // Error return status for vm_pcall.
488 | mov rsp, CARG1
489 |->vm_unwind_c_eh: // Landing pad for external unwinder.
490 | mov L:RB, SAVE_L
491 | mov GL:RB, L:RB->glref
492 | mov dword GL:RB->vmstate, ~LJ_VMST_C
493 | jmp ->vm_leave_unw
494 |
495 |->vm_unwind_rethrow:
496 |.if not X64WIN
497 | mov CARG1, SAVE_L
498 | mov CARG2d, eax
499 | restoreregs
500 | jmp extern lj_err_throw // (lua_State *L, int errcode)
501 |.endif
502 |
503 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
504 | // (void *cframe)
505 | and CARG1, CFRAME_RAWMASK
506 | mov rsp, CARG1
507 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
508 | mov L:RB, SAVE_L
509 | mov RDd, 1+1 // Really 1+2 results, incr. later.
510 | mov BASE, L:RB->base
511 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
512 | add DISPATCH, GG_G2DISP
513 | mov PC, [BASE-8] // Fetch PC of previous frame.
514 | mov_false RA
515 | mov RB, [BASE]
516 | mov [BASE-16], RA // Prepend false to error message.
517 | mov [BASE-8], RB
518 | mov RA, -16 // Results start at BASE+RA = BASE-16.
519 | set_vmstate INTERP
520 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
521 |
522 |//-----------------------------------------------------------------------
523 |//-- Grow stack for calls -----------------------------------------------
524 |//-----------------------------------------------------------------------
525 |
526 |->vm_growstack_c: // Grow stack for C function.
527 | mov CARG2d, LUA_MINSTACK
528 | jmp >2
529 |
530 |->vm_growstack_v: // Grow stack for vararg Lua function.
531 | sub RD, 16 // LJ_FR2
532 | jmp >1
533 |
534 |->vm_growstack_f: // Grow stack for fixarg Lua function.
535 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
536 | lea RD, [BASE+NARGS:RD*8-8]
537 |1:
538 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
539 | add PC, 4 // Must point after first instruction.
540 | mov L:RB->base, BASE
541 | mov L:RB->top, RD
542 | mov SAVE_PC, PC
543 | mov CARG2, RA
544 |2:
545 | // RB = L, L->base = new base, L->top = top
546 | mov CARG1, L:RB
547 | call extern lj_state_growstack // (lua_State *L, int n)
548 | mov BASE, L:RB->base
549 | mov RD, L:RB->top
550 | mov LFUNC:RB, [BASE-16]
551 | cleartp LFUNC:RB
552 | sub RD, BASE
553 | shr RDd, 3
554 | add NARGS:RDd, 1
555 | // BASE = new base, RB = LFUNC, RD = nargs+1
556 | ins_callt // Just retry the call.
557 |
558 |//-----------------------------------------------------------------------
559 |//-- Entry points into the assembler VM ---------------------------------
560 |//-----------------------------------------------------------------------
561 |
562 |->vm_resume: // Setup C frame and resume thread.
563 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
564 | saveregs
565 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
566 | mov SAVE_L, CARG1
567 | mov RA, CARG2
568 | mov PCd, FRAME_CP
569 | xor RDd, RDd
570 | lea KBASE, [esp+CFRAME_RESUME]
571 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
572 | add DISPATCH, GG_G2DISP
573 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
574 | mov SAVE_CFRAME, RD
575 | mov SAVE_NRES, RDd
576 | mov SAVE_ERRF, RDd
577 | mov L:RB->cframe, KBASE
578 | cmp byte L:RB->status, RDL
579 | je >2 // Initial resume (like a call).
580 |
581 | // Resume after yield (like a return).
582 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
583 | set_vmstate INTERP
584 | mov byte L:RB->status, RDL
585 | mov BASE, L:RB->base
586 | mov RD, L:RB->top
587 | sub RD, RA
588 | shr RDd, 3
589 | add RDd, 1 // RD = nresults+1
590 | sub RA, BASE // RA = resultofs
591 | mov PC, [BASE-8]
592 | mov MULTRES, RDd
593 | test PCd, FRAME_TYPE
594 | jz ->BC_RET_Z
595 | jmp ->vm_return
596 |
597 |->vm_pcall: // Setup protected C frame and enter VM.
598 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
599 | saveregs
600 | mov PCd, FRAME_CP
601 | mov SAVE_ERRF, CARG4d
602 | jmp >1
603 |
604 |->vm_call: // Setup C frame and enter VM.
605 | // (lua_State *L, TValue *base, int nres1)
606 | saveregs
607 | mov PCd, FRAME_C
608 |
609 |1: // Entry point for vm_pcall above (PC = ftype).
610 | mov SAVE_NRES, CARG3d
611 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
612 | mov SAVE_L, CARG1
613 | mov RA, CARG2
614 |
615 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
616 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
617 | mov SAVE_CFRAME, KBASE
618 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
619 | add DISPATCH, GG_G2DISP
620 | mov L:RB->cframe, rsp
621 |
622 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
623 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
624 | set_vmstate INTERP
625 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
626 | add PC, RA
627 | sub PC, BASE // PC = frame delta + frame type
628 |
629 | mov RD, L:RB->top
630 | sub RD, RA
631 | shr NARGS:RDd, 3
632 | add NARGS:RDd, 1 // RD = nargs+1
633 |
634 |->vm_call_dispatch:
635 | mov LFUNC:RB, [RA-16]
636 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
637 |
638 |->vm_call_dispatch_f:
639 | mov BASE, RA
640 | ins_call
641 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
642 |
643 |->vm_cpcall: // Setup protected C frame, call C.
644 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
645 | saveregs
646 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
647 | mov SAVE_L, CARG1
648 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
649 |
650 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
651 | sub KBASE, L:RB->top
652 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
653 | mov SAVE_ERRF, 0 // No error function.
654 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
655 | add DISPATCH, GG_G2DISP
656 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
657 |
658 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
659 | mov SAVE_CFRAME, KBASE
660 | mov L:RB->cframe, rsp
661 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
662 |
663 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
664 | // TValue * (new base) or NULL returned in eax (RC).
665 | test RC, RC
666 | jz ->vm_leave_cp // No base? Just remove C frame.
667 | mov RA, RC
668 | mov PCd, FRAME_CP
669 | jmp <2 // Else continue with the call.
670 |
671 |//-----------------------------------------------------------------------
672 |//-- Metamethod handling ------------------------------------------------
673 |//-----------------------------------------------------------------------
674 |
675 |//-- Continuation dispatch ----------------------------------------------
676 |
677 |->cont_dispatch:
678 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
679 | add RA, BASE
680 | and PC, -8
681 | mov RB, BASE
682 | sub BASE, PC // Restore caller BASE.
683 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
684 | mov RC, RA // ... in [RC]
685 | mov PC, [RB-24] // Restore PC from [cont|PC].
686 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
687 |.if FFI
688 | cmp RA, 1
689 | jbe >1
690 |.endif
691 | mov LFUNC:KBASE, [BASE-16]
692 | cleartp LFUNC:KBASE
693 | mov KBASE, LFUNC:KBASE->pc
694 | mov KBASE, [KBASE+PC2PROTO(k)]
695 | // BASE = base, RC = result, RB = meta base
696 | jmp RA // Jump to continuation.
697 |
698 |.if FFI
699 |1:
700 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
701 | // cont = 0: Tail call from C function.
702 | sub RB, BASE
703 | shr RBd, 3
704 | lea RDd, [RBd-3]
705 | jmp ->vm_call_tail
706 |.endif
707 |
708 |->cont_cat: // BASE = base, RC = result, RB = mbase
709 | movzx RAd, PC_RB
710 | sub RB, 32
711 | lea RA, [BASE+RA*8]
712 | sub RA, RB
713 | je ->cont_ra
714 | neg RA
715 | shr RAd, 3
716 |.if X64WIN
717 | mov CARG3d, RAd
718 | mov L:CARG1, SAVE_L
719 | mov L:CARG1->base, BASE
720 | mov RC, [RC]
721 | mov [RB], RC
722 | mov CARG2, RB
723 |.else
724 | mov L:CARG1, SAVE_L
725 | mov L:CARG1->base, BASE
726 | mov CARG3d, RAd
727 | mov RA, [RC]
728 | mov [RB], RA
729 | mov CARG2, RB
730 |.endif
731 | jmp ->BC_CAT_Z
732 |
733 |//-- Table indexing metamethods -----------------------------------------
734 |
735 |->vmeta_tgets:
736 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
737 | mov TMP1, STR:RC
738 | lea RC, TMP1
739 | cmp PC_OP, BC_GGET
740 | jne >1
741 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
742 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
743 | mov [RB], TAB:RA
744 | jmp >2
745 |
746 |->vmeta_tgetb:
747 | movzx RCd, PC_RC
748 |.if DUALNUM
749 | setint RC
750 | mov TMP1, RC
751 |.else
752 | cvtsi2sd xmm0, RCd
753 | movsd TMP1, xmm0
754 |.endif
755 | lea RC, TMP1
756 | jmp >1
757 |
758 |->vmeta_tgetv:
759 | movzx RCd, PC_RC // Reload TValue *k from RC.
760 | lea RC, [BASE+RC*8]
761 |1:
762 | movzx RBd, PC_RB // Reload TValue *t from RB.
763 | lea RB, [BASE+RB*8]
764 |2:
765 | mov L:CARG1, SAVE_L
766 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
767 | mov CARG2, RB
768 | mov CARG3, RC
769 | mov L:RB, L:CARG1
770 | mov SAVE_PC, PC
771 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
772 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
773 | mov BASE, L:RB->base
774 | test RC, RC
775 | jz >3
776 |->cont_ra: // BASE = base, RC = result
777 | movzx RAd, PC_RA
778 | mov RB, [RC]
779 | mov [BASE+RA*8], RB
780 | ins_next
781 |
782 |3: // Call __index metamethod.
783 | // BASE = base, L->top = new base, stack = cont/func/t/k
784 | mov RA, L:RB->top
785 | mov [RA-24], PC // [cont|PC]
786 | lea PC, [RA+FRAME_CONT]
787 | sub PC, BASE
788 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
789 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
790 | cleartp LFUNC:RB
791 | jmp ->vm_call_dispatch_f
792 |
793 |->vmeta_tgetr:
794 | mov CARG1, TAB:RB
795 | mov RB, BASE // Save BASE.
796 | mov CARG2d, RCd // Caveat: CARG2 == BASE
797 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
798 | // cTValue * or NULL returned in eax (RC).
799 | movzx RAd, PC_RA
800 | mov BASE, RB // Restore BASE.
801 | test RC, RC
802 | jnz ->BC_TGETR_Z
803 | mov ITYPE, LJ_TNIL
804 | jmp ->BC_TGETR2_Z
805 |
806 |//-----------------------------------------------------------------------
807 |
808 |->vmeta_tsets:
809 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
810 | mov TMP1, STR:RC
811 | lea RC, TMP1
812 | cmp PC_OP, BC_GSET
813 | jne >1
814 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
815 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
816 | mov [RB], TAB:RA
817 | jmp >2
818 |
819 |->vmeta_tsetb:
820 | movzx RCd, PC_RC
821 |.if DUALNUM
822 | setint RC
823 | mov TMP1, RC
824 |.else
825 | cvtsi2sd xmm0, RCd
826 | movsd TMP1, xmm0
827 |.endif
828 | lea RC, TMP1
829 | jmp >1
830 |
831 |->vmeta_tsetv:
832 | movzx RCd, PC_RC // Reload TValue *k from RC.
833 | lea RC, [BASE+RC*8]
834 |1:
835 | movzx RBd, PC_RB // Reload TValue *t from RB.
836 | lea RB, [BASE+RB*8]
837 |2:
838 | mov L:CARG1, SAVE_L
839 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
840 | mov CARG2, RB
841 | mov CARG3, RC
842 | mov L:RB, L:CARG1
843 | mov SAVE_PC, PC
844 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
845 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
846 | mov BASE, L:RB->base
847 | test RC, RC
848 | jz >3
849 | // NOBARRIER: lj_meta_tset ensures the table is not black.
850 | movzx RAd, PC_RA
851 | mov RB, [BASE+RA*8]
852 | mov [RC], RB
853 |->cont_nop: // BASE = base, (RC = result)
854 | ins_next
855 |
856 |3: // Call __newindex metamethod.
857 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
858 | mov RA, L:RB->top
859 | mov [RA-24], PC // [cont|PC]
860 | movzx RCd, PC_RA
861 | // Copy value to third argument.
862 | mov RB, [BASE+RC*8]
863 | mov [RA+16], RB
864 | lea PC, [RA+FRAME_CONT]
865 | sub PC, BASE
866 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
867 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
868 | cleartp LFUNC:RB
869 | jmp ->vm_call_dispatch_f
870 |
871 |->vmeta_tsetr:
872 |.if X64WIN
873 | mov L:CARG1, SAVE_L
874 | mov CARG3d, RCd
875 | mov L:CARG1->base, BASE
876 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
877 |.else
878 | mov L:CARG1, SAVE_L
879 | mov CARG2, TAB:RB
880 | mov L:CARG1->base, BASE
881 | mov RB, BASE // Save BASE.
882 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
883 |.endif
884 | mov SAVE_PC, PC
885 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
886 | // TValue * returned in eax (RC).
887 | movzx RAd, PC_RA
888 | mov BASE, RB // Restore BASE.
889 | jmp ->BC_TSETR_Z
890 |
891 |//-- Comparison metamethods ---------------------------------------------
892 |
893 |->vmeta_comp:
894 | movzx RDd, PC_RD
895 | movzx RAd, PC_RA
896 | mov L:RB, SAVE_L
897 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
898 |.if X64WIN
899 | lea CARG3, [BASE+RD*8]
900 | lea CARG2, [BASE+RA*8]
901 |.else
902 | lea CARG2, [BASE+RA*8]
903 | lea CARG3, [BASE+RD*8]
904 |.endif
905 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
906 | movzx CARG4d, PC_OP
907 | mov SAVE_PC, PC
908 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
909 | // 0/1 or TValue * (metamethod) returned in eax (RC).
910 |3:
911 | mov BASE, L:RB->base
912 | cmp RC, 1
913 | ja ->vmeta_binop
914 |4:
915 | lea PC, [PC+4]
916 | jb >6
917 |5:
918 | movzx RDd, PC_RD
919 | branchPC RD
920 |6:
921 | ins_next
922 |
923 |->cont_condt: // BASE = base, RC = result
924 | add PC, 4
925 | mov ITYPE, [RC]
926 | sar ITYPE, 47
927 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
928 | jb <5
929 | jmp <6
930 |
931 |->cont_condf: // BASE = base, RC = result
932 | mov ITYPE, [RC]
933 | sar ITYPE, 47
934 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
935 | jmp <4
936 |
937 |->vmeta_equal:
938 | cleartp TAB:RD
939 | sub PC, 4
940 |.if X64WIN
941 | mov CARG3, RD
942 | mov CARG4d, RBd
943 | mov L:RB, SAVE_L
944 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
945 | mov CARG2, RA
946 | mov CARG1, L:RB // Caveat: CARG1 == RA.
947 |.else
948 | mov CARG2, RA
949 | mov CARG4d, RBd // Caveat: CARG4 == RA.
950 | mov L:RB, SAVE_L
951 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
952 | mov CARG3, RD
953 | mov CARG1, L:RB
954 |.endif
955 | mov SAVE_PC, PC
956 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
957 | // 0/1 or TValue * (metamethod) returned in eax (RC).
958 | jmp <3
959 |
960 |->vmeta_equal_cd:
961 |.if FFI
962 | sub PC, 4
963 | mov L:RB, SAVE_L
964 | mov L:RB->base, BASE
965 | mov CARG1, L:RB
966 | mov CARG2d, dword [PC-4]
967 | mov SAVE_PC, PC
968 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
969 | // 0/1 or TValue * (metamethod) returned in eax (RC).
970 | jmp <3
971 |.endif
972 |
973 |->vmeta_istype:
974 | mov L:RB, SAVE_L
975 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
976 | mov CARG2d, RAd
977 | mov CARG3d, RDd
978 | mov L:CARG1, L:RB
979 | mov SAVE_PC, PC
980 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
981 | mov BASE, L:RB->base
982 | jmp <6
983 |
984 |//-- Arithmetic metamethods ---------------------------------------------
985 |
986 |->vmeta_arith_vno:
987 |.if DUALNUM
988 | movzx RBd, PC_RB
989 | movzx RCd, PC_RC
990 |.endif
991 |->vmeta_arith_vn:
992 | lea RC, [KBASE+RC*8]
993 | jmp >1
994 |
995 |->vmeta_arith_nvo:
996 |.if DUALNUM
997 | movzx RBd, PC_RB
998 | movzx RCd, PC_RC
999 |.endif
1000 |->vmeta_arith_nv:
1001 | lea TMPR, [KBASE+RC*8]
1002 | lea RC, [BASE+RB*8]
1003 | mov RB, TMPR
1004 | jmp >2
1005 |
1006 |->vmeta_unm:
1007 | lea RC, [BASE+RD*8]
1008 | mov RB, RC
1009 | jmp >2
1010 |
1011 |->vmeta_arith_vvo:
1012 |.if DUALNUM
1013 | movzx RBd, PC_RB
1014 | movzx RCd, PC_RC
1015 |.endif
1016 |->vmeta_arith_vv:
1017 | lea RC, [BASE+RC*8]
1018 |1:
1019 | lea RB, [BASE+RB*8]
1020 |2:
1021 | lea RA, [BASE+RA*8]
1022 |.if X64WIN
1023 | mov CARG3, RB
1024 | mov CARG4, RC
1025 | movzx RCd, PC_OP
1026 | mov ARG5d, RCd
1027 | mov L:RB, SAVE_L
1028 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1029 | mov CARG2, RA
1030 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1031 |.else
1032 | movzx CARG5d, PC_OP
1033 | mov CARG2, RA
1034 | mov CARG4, RC // Caveat: CARG4 == RA.
1035 | mov L:CARG1, SAVE_L
1036 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1037 | mov CARG3, RB
1038 | mov L:RB, L:CARG1
1039 |.endif
1040 | mov SAVE_PC, PC
1041 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1042 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1043 | mov BASE, L:RB->base
1044 | test RC, RC
1045 | jz ->cont_nop
1046 |
1047 | // Call metamethod for binary op.
1048 |->vmeta_binop:
1049 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1050 | mov RA, RC
1051 | sub RC, BASE
1052 | mov [RA-24], PC // [cont|PC]
1053 | lea PC, [RC+FRAME_CONT]
1054 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1055 | jmp ->vm_call_dispatch
1056 |
1057 |->vmeta_len:
1058 | movzx RDd, PC_RD
1059 | mov L:RB, SAVE_L
1060 | mov L:RB->base, BASE
1061 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1062 | mov L:CARG1, L:RB
1063 | mov SAVE_PC, PC
1064 | call extern lj_meta_len // (lua_State *L, TValue *o)
1065 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1066 | mov BASE, L:RB->base
1067#if LJ_52
1068 | test RC, RC
1069 | jne ->vmeta_binop // Binop call for compatibility.
1070 | movzx RDd, PC_RD
1071 | mov TAB:CARG1, [BASE+RD*8]
1072 | cleartp TAB:CARG1
1073 | jmp ->BC_LEN_Z
1074#else
1075 | jmp ->vmeta_binop // Binop call for compatibility.
1076#endif
1077 |
1078 |//-- Call metamethod ----------------------------------------------------
1079 |
1080 |->vmeta_call_ra:
1081 | lea RA, [BASE+RA*8+16]
1082 |->vmeta_call: // Resolve and call __call metamethod.
1083 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1084 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1085 | mov RB, RA
1086 |.if X64WIN
1087 | mov L:TMPR, SAVE_L
1088 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1089 | lea CARG2, [RA-16]
1090 | lea CARG3, [RA+NARGS:RD*8-8]
1091 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1092 |.else
1093 | mov L:CARG1, SAVE_L
1094 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1095 | lea CARG2, [RA-16]
1096 | lea CARG3, [RA+NARGS:RD*8-8]
1097 |.endif
1098 | mov SAVE_PC, PC
1099 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1100 | mov RA, RB
1101 | mov L:RB, SAVE_L
1102 | mov BASE, L:RB->base
1103 | mov NARGS:RDd, TMP1d
1104 | mov LFUNC:RB, [RA-16]
1105 | add NARGS:RDd, 1
1106 | // This is fragile. L->base must not move, KBASE must always be defined.
1107 | cmp KBASE, BASE // Continue with CALLT if flag set.
1108 | je ->BC_CALLT_Z
1109 | cleartp LFUNC:RB
1110 | mov BASE, RA
1111 | ins_call // Otherwise call resolved metamethod.
1112 |
1113 |//-- Argument coercion for 'for' statement ------------------------------
1114 |
1115 |->vmeta_for:
1116 | mov L:RB, SAVE_L
1117 | mov L:RB->base, BASE
1118 | mov CARG2, RA // Caveat: CARG2 == BASE
1119 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1120 | mov SAVE_PC, PC
1121 | call extern lj_meta_for // (lua_State *L, TValue *base)
1122 | mov BASE, L:RB->base
1123 | mov RCd, [PC-4]
1124 | movzx RAd, RCH
1125 | movzx OP, RCL
1126 | shr RCd, 16
1127 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1128 |
1129 |//-----------------------------------------------------------------------
1130 |//-- Fast functions -----------------------------------------------------
1131 |//-----------------------------------------------------------------------
1132 |
1133 |.macro .ffunc, name
1134 |->ff_ .. name:
1135 |.endmacro
1136 |
1137 |.macro .ffunc_1, name
1138 |->ff_ .. name:
1139 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1140 |.endmacro
1141 |
1142 |.macro .ffunc_2, name
1143 |->ff_ .. name:
1144 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1145 |.endmacro
1146 |
1147 |.macro .ffunc_n, name, op
1148 | .ffunc_1 name
1149 | checknumtp [BASE], ->fff_fallback
1150 | op xmm0, qword [BASE]
1151 |.endmacro
1152 |
1153 |.macro .ffunc_n, name
1154 | .ffunc_n name, movsd
1155 |.endmacro
1156 |
1157 |.macro .ffunc_nn, name
1158 | .ffunc_2 name
1159 | checknumtp [BASE], ->fff_fallback
1160 | checknumtp [BASE+8], ->fff_fallback
1161 | movsd xmm0, qword [BASE]
1162 | movsd xmm1, qword [BASE+8]
1163 |.endmacro
1164 |
1165 |// Inlined GC threshold check. Caveat: uses label 1.
1166 |.macro ffgccheck
1167 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1168 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1169 | jb >1
1170 | call ->fff_gcstep
1171 |1:
1172 |.endmacro
1173 |
1174 |//-- Base library: checks -----------------------------------------------
1175 |
1176 |.ffunc_1 assert
1177 | mov ITYPE, [BASE]
1178 | mov RB, ITYPE
1179 | sar ITYPE, 47
1180 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1181 | mov PC, [BASE-8]
1182 | mov MULTRES, RDd
1183 | mov RB, [BASE]
1184 | mov [BASE-16], RB
1185 | sub RDd, 2
1186 | jz >2
1187 | mov RA, BASE
1188 |1:
1189 | add RA, 8
1190 | mov RB, [RA]
1191 | mov [RA-16], RB
1192 | sub RDd, 1
1193 | jnz <1
1194 |2:
1195 | mov RDd, MULTRES
1196 | jmp ->fff_res_
1197 |
1198 |.ffunc_1 type
1199 | mov RC, [BASE]
1200 | sar RC, 47
1201 | mov RBd, LJ_TISNUM
1202 | cmp RCd, RBd
1203 | cmovb RCd, RBd
1204 | not RCd
1205 |2:
1206 | mov CFUNC:RB, [BASE-16]
1207 | cleartp CFUNC:RB
1208 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1209 | mov PC, [BASE-8]
1210 | settp STR:RC, LJ_TSTR
1211 | mov [BASE-16], STR:RC
1212 | jmp ->fff_res1
1213 |
1214 |//-- Base library: getters and setters ---------------------------------
1215 |
1216 |.ffunc_1 getmetatable
1217 | mov TAB:RB, [BASE]
1218 | mov PC, [BASE-8]
1219 | checktab TAB:RB, >6
1220 |1: // Field metatable must be at same offset for GCtab and GCudata!
1221 | mov TAB:RB, TAB:RB->metatable
1222 |2:
1223 | test TAB:RB, TAB:RB
1224 | mov aword [BASE-16], LJ_TNIL
1225 | jz ->fff_res1
1226 | settp TAB:RC, TAB:RB, LJ_TTAB
1227 | mov [BASE-16], TAB:RC // Store metatable as default result.
1228 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1229 | mov RAd, TAB:RB->hmask
1230 | and RAd, STR:RC->sid
1231 | settp STR:RC, LJ_TSTR
1232 | imul RAd, #NODE
1233 | add NODE:RA, TAB:RB->node
1234 |3: // Rearranged logic, because we expect _not_ to find the key.
1235 | cmp NODE:RA->key, STR:RC
1236 | je >5
1237 |4:
1238 | mov NODE:RA, NODE:RA->next
1239 | test NODE:RA, NODE:RA
1240 | jnz <3
1241 | jmp ->fff_res1 // Not found, keep default result.
1242 |5:
1243 | mov RB, NODE:RA->val
1244 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1245 | mov [BASE-16], RB // Return value of mt.__metatable.
1246 | jmp ->fff_res1
1247 |
1248 |6:
1249 | cmp ITYPEd, LJ_TUDATA; je <1
1250 | cmp ITYPEd, LJ_TISNUM; ja >7
1251 | mov ITYPEd, LJ_TISNUM
1252 |7:
1253 | not ITYPEd
1254 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1255 | jmp <2
1256 |
1257 |.ffunc_2 setmetatable
1258 | mov TAB:RB, [BASE]
1259 | mov TAB:TMPR, TAB:RB
1260 | checktab TAB:RB, ->fff_fallback
1261 | // Fast path: no mt for table yet and not clearing the mt.
1262 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1263 | mov TAB:RA, [BASE+8]
1264 | checktab TAB:RA, ->fff_fallback
1265 | mov TAB:RB->metatable, TAB:RA
1266 | mov PC, [BASE-8]
1267 | mov [BASE-16], TAB:TMPR // Return original table.
1268 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1269 | jz >1
1270 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1271 | barrierback TAB:RB, RC
1272 |1:
1273 | jmp ->fff_res1
1274 |
1275 |.ffunc_2 rawget
1276 |.if X64WIN
1277 | mov TAB:RA, [BASE]
1278 | checktab TAB:RA, ->fff_fallback
1279 | mov RB, BASE // Save BASE.
1280 | lea CARG3, [BASE+8]
1281 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1282 | mov CARG1, SAVE_L
1283 |.else
1284 | mov TAB:CARG2, [BASE]
1285 | checktab TAB:CARG2, ->fff_fallback
1286 | mov RB, BASE // Save BASE.
1287 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1288 | mov CARG1, SAVE_L
1289 |.endif
1290 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1291 | // cTValue * returned in eax (RD).
1292 | mov BASE, RB // Restore BASE.
1293 | // Copy table slot.
1294 | mov RB, [RD]
1295 | mov PC, [BASE-8]
1296 | mov [BASE-16], RB
1297 | jmp ->fff_res1
1298 |
1299 |//-- Base library: conversions ------------------------------------------
1300 |
1301 |.ffunc tonumber
1302 | // Only handles the number case inline (without a base argument).
1303 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1304 | mov RB, [BASE]
1305 | checknumber RB, ->fff_fallback
1306 | mov PC, [BASE-8]
1307 | mov [BASE-16], RB
1308 | jmp ->fff_res1
1309 |
1310 |.ffunc_1 tostring
1311 | // Only handles the string or number case inline.
1312 | mov PC, [BASE-8]
1313 | mov STR:RB, [BASE]
1314 | checktp_nc STR:RB, LJ_TSTR, >3
1315 | // A __tostring method in the string base metatable is ignored.
1316 |2:
1317 | mov [BASE-16], STR:RB
1318 | jmp ->fff_res1
1319 |3: // Handle numbers inline, unless a number base metatable is present.
1320 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1321 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1322 | jne ->fff_fallback
1323 | ffgccheck // Caveat: uses label 1.
1324 | mov L:RB, SAVE_L
1325 | mov L:RB->base, BASE // Add frame since C call can throw.
1326 | mov SAVE_PC, PC // Redundant (but a defined value).
1327 |.if not X64WIN
1328 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1329 |.endif
1330 | mov L:CARG1, L:RB
1331 |.if DUALNUM
1332 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1333 |.else
1334 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1335 |.endif
1336 | // GCstr returned in eax (RD).
1337 | mov BASE, L:RB->base
1338 | settp STR:RB, RD, LJ_TSTR
1339 | jmp <2
1340 |
1341 |//-- Base library: iterators -------------------------------------------
1342 |
1343 |.ffunc_1 next
1344 | je >2 // Missing 2nd arg?
1345 |1:
1346 | mov CARG1, [BASE]
1347 | mov PC, [BASE-8]
1348 | checktab CARG1, ->fff_fallback
1349 | mov RB, BASE // Save BASE.
1350 |.if X64WIN
1351 | lea CARG3, [BASE-16]
1352 | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE.
1353 |.else
1354 | lea CARG2, [BASE+8]
1355 | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE.
1356 |.endif
1357 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1358 | // 1=found, 0=end, -1=error returned in eax (RD).
1359 | mov BASE, RB // Restore BASE.
1360 | test RDd, RDd; jg ->fff_res2 // Found key/value.
1361 | js ->fff_fallback_2 // Invalid key.
1362 | // End of traversal: return nil.
1363 | mov aword [BASE-16], LJ_TNIL
1364 | jmp ->fff_res1
1365 |2: // Set missing 2nd arg to nil.
1366 | mov aword [BASE+8], LJ_TNIL
1367 | jmp <1
1368 |
1369 |.ffunc_1 pairs
1370 | mov TAB:RB, [BASE]
1371 | mov TMPR, TAB:RB
1372 | checktab TAB:RB, ->fff_fallback
1373#if LJ_52
1374 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1375#endif
1376 | mov CFUNC:RD, [BASE-16]
1377 | cleartp CFUNC:RD
1378 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1379 | settp CFUNC:RD, LJ_TFUNC
1380 | mov PC, [BASE-8]
1381 | mov [BASE-16], CFUNC:RD
1382 | mov [BASE-8], TMPR
1383 | mov aword [BASE], LJ_TNIL
1384 | mov RDd, 1+3
1385 | jmp ->fff_res
1386 |
1387 |.ffunc_2 ipairs_aux
1388 | mov TAB:RB, [BASE]
1389 | checktab TAB:RB, ->fff_fallback
1390 |.if DUALNUM
1391 | mov RA, [BASE+8]
1392 | checkint RA, ->fff_fallback
1393 |.else
1394 | checknumtp [BASE+8], ->fff_fallback
1395 | movsd xmm0, qword [BASE+8]
1396 |.endif
1397 | mov PC, [BASE-8]
1398 |.if DUALNUM
1399 | add RAd, 1
1400 | setint ITYPE, RA
1401 | mov [BASE-16], ITYPE
1402 |.else
1403 | sseconst_1 xmm1, TMPR
1404 | addsd xmm0, xmm1
1405 | cvttsd2si RAd, xmm0
1406 | movsd qword [BASE-16], xmm0
1407 |.endif
1408 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1409 | mov RD, TAB:RB->array
1410 | lea RD, [RD+RA*8]
1411 |1:
1412 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1413 | // Copy array slot.
1414 | mov RB, [RD]
1415 | mov [BASE-8], RB
1416 |->fff_res2:
1417 | mov RDd, 1+2
1418 | jmp ->fff_res
1419 |2: // Check for empty hash part first. Otherwise call C function.
1420 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1421 |.if X64WIN
1422 | mov TMPR, BASE
1423 | mov CARG2d, RAd
1424 | mov CARG1, TAB:RB
1425 | mov RB, TMPR
1426 |.else
1427 | mov CARG1, TAB:RB
1428 | mov RB, BASE // Save BASE.
1429 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1430 |.endif
1431 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1432 | // cTValue * or NULL returned in eax (RD).
1433 | mov BASE, RB
1434 | test RD, RD
1435 | jnz <1
1436 |->fff_res0:
1437 | mov RDd, 1+0
1438 | jmp ->fff_res
1439 |
1440 |.ffunc_1 ipairs
1441 | mov TAB:RB, [BASE]
1442 | mov TMPR, TAB:RB
1443 | checktab TAB:RB, ->fff_fallback
1444#if LJ_52
1445 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1446#endif
1447 | mov CFUNC:RD, [BASE-16]
1448 | cleartp CFUNC:RD
1449 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1450 | settp CFUNC:RD, LJ_TFUNC
1451 | mov PC, [BASE-8]
1452 | mov [BASE-16], CFUNC:RD
1453 | mov [BASE-8], TMPR
1454 |.if DUALNUM
1455 | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
1456 | mov [BASE], RD
1457 |.else
1458 | mov qword [BASE], 0
1459 |.endif
1460 | mov RDd, 1+3
1461 | jmp ->fff_res
1462 |
1463 |//-- Base library: catch errors ----------------------------------------
1464 |
1465 |.ffunc_1 pcall
1466 | lea RA, [BASE+16]
1467 | sub NARGS:RDd, 1
1468 | mov PCd, 16+FRAME_PCALL
1469 |1:
1470 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1471 | shr RB, HOOK_ACTIVE_SHIFT
1472 | and RB, 1
1473 | add PC, RB // Remember active hook before pcall.
1474 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1475 | mov KBASE, RD
1476 |2:
1477 | mov RB, [RA+KBASE*8-24]
1478 | mov [RA+KBASE*8-16], RB
1479 | sub KBASE, 1
1480 | ja <2
1481 | jmp ->vm_call_dispatch
1482 |
1483 |.ffunc_2 xpcall
1484 | mov LFUNC:RA, [BASE+8]
1485 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1486 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1487 | mov [BASE], LFUNC:RA
1488 | mov [BASE+8], LFUNC:RB
1489 | lea RA, [BASE+24]
1490 | sub NARGS:RDd, 2
1491 | mov PCd, 24+FRAME_PCALL
1492 | jmp <1
1493 |
1494 |//-- Coroutine library --------------------------------------------------
1495 |
1496 |.macro coroutine_resume_wrap, resume
1497 |.if resume
1498 |.ffunc_1 coroutine_resume
1499 | mov L:RB, [BASE]
1500 | cleartp L:RB
1501 |.else
1502 |.ffunc coroutine_wrap_aux
1503 | mov CFUNC:RB, [BASE-16]
1504 | cleartp CFUNC:RB
1505 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1506 | cleartp L:RB
1507 |.endif
1508 | mov PC, [BASE-8]
1509 | mov SAVE_PC, PC
1510 | mov TMP1, L:RB
1511 |.if resume
1512 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1513 |.endif
1514 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1515 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1516 | mov RA, L:RB->top
1517 | je >1 // Status != LUA_YIELD (i.e. 0)?
1518 | cmp RA, L:RB->base // Check for presence of initial func.
1519 | je ->fff_fallback
1520 | mov PC, [RA-8] // Move initial function up.
1521 | mov [RA], PC
1522 | add RA, 8
1523 |1:
1524 |.if resume
1525 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1526 |.else
1527 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1528 |.endif
1529 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1530 | mov L:RB->top, PC
1531 |
1532 | mov L:RB, SAVE_L
1533 | mov L:RB->base, BASE
1534 |.if resume
1535 | add BASE, 8 // Keep resumed thread in stack for GC.
1536 |.endif
1537 | mov L:RB->top, BASE
1538 |.if resume
1539 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1540 |.else
1541 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1542 |.endif
1543 | sub RB, PC // Relative to PC.
1544 |
1545 | cmp PC, RA
1546 | je >3
1547 |2: // Move args to coroutine.
1548 | mov RC, [PC+RB]
1549 | mov [PC-8], RC
1550 | sub PC, 8
1551 | cmp PC, RA
1552 | jne <2
1553 |3:
1554 | mov CARG2, RA
1555 | mov CARG1, TMP1
1556 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1557 |
1558 | mov L:RB, SAVE_L
1559 | mov L:PC, TMP1
1560 | mov BASE, L:RB->base
1561 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1562 | set_vmstate INTERP
1563 |
1564 | cmp eax, LUA_YIELD
1565 | ja >8
1566 |4:
1567 | mov RA, L:PC->base
1568 | mov KBASE, L:PC->top
1569 | mov L:PC->top, RA // Clear coroutine stack.
1570 | mov PC, KBASE
1571 | sub PC, RA
1572 | je >6 // No results?
1573 | lea RD, [BASE+PC]
1574 | shr PCd, 3
1575 | cmp RD, L:RB->maxstack
1576 | ja >9 // Need to grow stack?
1577 |
1578 | mov RB, BASE
1579 | sub RB, RA
1580 |5: // Move results from coroutine.
1581 | mov RD, [RA]
1582 | mov [RA+RB], RD
1583 | add RA, 8
1584 | cmp RA, KBASE
1585 | jne <5
1586 |6:
1587 |.if resume
1588 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1589 | mov_true ITYPE // Prepend true to results.
1590 | mov [BASE-8], ITYPE
1591 |.else
1592 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1593 |.endif
1594 |7:
1595 | mov PC, SAVE_PC
1596 | mov MULTRES, RDd
1597 |.if resume
1598 | mov RA, -8
1599 |.else
1600 | xor RAd, RAd
1601 |.endif
1602 | test PCd, FRAME_TYPE
1603 | jz ->BC_RET_Z
1604 | jmp ->vm_return
1605 |
1606 |8: // Coroutine returned with error (at co->top-1).
1607 |.if resume
1608 | mov_false ITYPE // Prepend false to results.
1609 | mov [BASE-8], ITYPE
1610 | mov RA, L:PC->top
1611 | sub RA, 8
1612 | mov L:PC->top, RA // Clear error from coroutine stack.
1613 | // Copy error message.
1614 | mov RD, [RA]
1615 | mov [BASE], RD
1616 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1617 | jmp <7
1618 |.else
1619 | mov CARG2, L:PC
1620 | mov CARG1, L:RB
1621 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1622 | // Error function does not return.
1623 |.endif
1624 |
1625 |9: // Handle stack expansion on return from yield.
1626 | mov L:RA, TMP1
1627 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1628 | mov CARG2, PC
1629 | mov CARG1, L:RB
1630 | call extern lj_state_growstack // (lua_State *L, int n)
1631 | mov L:PC, TMP1
1632 | mov BASE, L:RB->base
1633 | jmp <4 // Retry the stack move.
1634 |.endmacro
1635 |
1636 | coroutine_resume_wrap 1 // coroutine.resume
1637 | coroutine_resume_wrap 0 // coroutine.wrap
1638 |
1639 |.ffunc coroutine_yield
1640 | mov L:RB, SAVE_L
1641 | test aword L:RB->cframe, CFRAME_RESUME
1642 | jz ->fff_fallback
1643 | mov L:RB->base, BASE
1644 | lea RD, [BASE+NARGS:RD*8-8]
1645 | mov L:RB->top, RD
1646 | xor RDd, RDd
1647 | mov aword L:RB->cframe, RD
1648 | mov al, LUA_YIELD
1649 | mov byte L:RB->status, al
1650 | jmp ->vm_leave_unw
1651 |
1652 |//-- Math library -------------------------------------------------------
1653 |
1654 | .ffunc_1 math_abs
1655 | mov RB, [BASE]
1656 |.if DUALNUM
1657 | checkint RB, >3
1658 | cmp RBd, 0; jns ->fff_resi
1659 | neg RBd; js >2
1660 |->fff_resbit:
1661 |->fff_resi:
1662 | setint RB
1663 |->fff_resRB:
1664 | mov PC, [BASE-8]
1665 | mov [BASE-16], RB
1666 | jmp ->fff_res1
1667 |2:
1668 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1669 | jmp ->fff_resRB
1670 |3:
1671 | ja ->fff_fallback
1672 |.else
1673 | checknum RB, ->fff_fallback
1674 |.endif
1675 | shl RB, 1
1676 | shr RB, 1
1677 | mov PC, [BASE-8]
1678 | mov [BASE-16], RB
1679 | jmp ->fff_res1
1680 |
1681 |.ffunc_n math_sqrt, sqrtsd
1682 |->fff_resxmm0:
1683 | mov PC, [BASE-8]
1684 | movsd qword [BASE-16], xmm0
1685 | // fallthrough
1686 |
1687 |->fff_res1:
1688 | mov RDd, 1+1
1689 |->fff_res:
1690 | mov MULTRES, RDd
1691 |->fff_res_:
1692 | test PCd, FRAME_TYPE
1693 | jnz >7
1694 |5:
1695 | cmp PC_RB, RDL // More results expected?
1696 | ja >6
1697 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1698 | movzx RAd, PC_RA
1699 | neg RA
1700 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1701 | ins_next
1702 |
1703 |6: // Fill up results with nil.
1704 | mov aword [BASE+RD*8-24], LJ_TNIL
1705 | add RD, 1
1706 | jmp <5
1707 |
1708 |7: // Non-standard return case.
1709 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1710 | jmp ->vm_return
1711 |
1712 |.macro math_round, func
1713 | .ffunc math_ .. func
1714 |.if DUALNUM
1715 | mov RB, [BASE]
1716 | checknumx RB, ->fff_resRB, je
1717 | ja ->fff_fallback
1718 |.else
1719 | checknumtp [BASE], ->fff_fallback
1720 |.endif
1721 | movsd xmm0, qword [BASE]
1722 | call ->vm_ .. func .. _sse
1723 |.if DUALNUM
1724 | cvttsd2si RBd, xmm0
1725 | cmp RBd, 0x80000000
1726 | jne ->fff_resi
1727 | cvtsi2sd xmm1, RBd
1728 | ucomisd xmm0, xmm1
1729 | jp ->fff_resxmm0
1730 | je ->fff_resi
1731 |.endif
1732 | jmp ->fff_resxmm0
1733 |.endmacro
1734 |
1735 | math_round floor
1736 | math_round ceil
1737 |
1738 |.ffunc math_log
1739 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1740 | checknumtp [BASE], ->fff_fallback
1741 | movsd xmm0, qword [BASE]
1742 | mov RB, BASE
1743 | call extern log
1744 | mov BASE, RB
1745 | jmp ->fff_resxmm0
1746 |
1747 |.macro math_extern, func
1748 | .ffunc_n math_ .. func
1749 | mov RB, BASE
1750 | call extern func
1751 | mov BASE, RB
1752 | jmp ->fff_resxmm0
1753 |.endmacro
1754 |
1755 |.macro math_extern2, func
1756 | .ffunc_nn math_ .. func
1757 | mov RB, BASE
1758 | call extern func
1759 | mov BASE, RB
1760 | jmp ->fff_resxmm0
1761 |.endmacro
1762 |
1763 | math_extern log10
1764 | math_extern exp
1765 | math_extern sin
1766 | math_extern cos
1767 | math_extern tan
1768 | math_extern asin
1769 | math_extern acos
1770 | math_extern atan
1771 | math_extern sinh
1772 | math_extern cosh
1773 | math_extern tanh
1774 | math_extern2 pow
1775 | math_extern2 atan2
1776 | math_extern2 fmod
1777 |
1778 |.ffunc_2 math_ldexp
1779 | checknumtp [BASE], ->fff_fallback
1780 | checknumtp [BASE+8], ->fff_fallback
1781 | fld qword [BASE+8]
1782 | fld qword [BASE]
1783 | fscale
1784 | fpop1
1785 | mov PC, [BASE-8]
1786 | fstp qword [BASE-16]
1787 | jmp ->fff_res1
1788 |
1789 |.ffunc_n math_frexp
1790 | mov RB, BASE
1791 |.if X64WIN
1792 | lea CARG2, TMP1 // Caveat: CARG2 == BASE
1793 |.else
1794 | lea CARG1, TMP1
1795 |.endif
1796 | call extern frexp
1797 | mov BASE, RB
1798 | mov RBd, TMP1d
1799 | mov PC, [BASE-8]
1800 | movsd qword [BASE-16], xmm0
1801 |.if DUALNUM
1802 | setint RB
1803 | mov [BASE-8], RB
1804 |.else
1805 | cvtsi2sd xmm1, RBd
1806 | movsd qword [BASE-8], xmm1
1807 |.endif
1808 | mov RDd, 1+2
1809 | jmp ->fff_res
1810 |
1811 |.ffunc_n math_modf
1812 | mov RB, BASE
1813 |.if X64WIN
1814 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
1815 |.else
1816 | lea CARG1, [BASE-16]
1817 |.endif
1818 | call extern modf
1819 | mov BASE, RB
1820 | mov PC, [BASE-8]
1821 | movsd qword [BASE-8], xmm0
1822 | mov RDd, 1+2
1823 | jmp ->fff_res
1824 |
1825 |.macro math_minmax, name, cmovop, sseop
1826 | .ffunc_1 name
1827 | mov RAd, 2
1828 |.if DUALNUM
1829 | mov RB, [BASE]
1830 | checkint RB, >4
1831 |1: // Handle integers.
1832 | cmp RAd, RDd; jae ->fff_resRB
1833 | mov TMPR, [BASE+RA*8-8]
1834 | checkint TMPR, >3
1835 | cmp RBd, TMPRd
1836 | cmovop RB, TMPR
1837 | add RAd, 1
1838 | jmp <1
1839 |3:
1840 | ja ->fff_fallback
1841 | // Convert intermediate result to number and continue below.
1842 | cvtsi2sd xmm0, RBd
1843 | jmp >6
1844 |4:
1845 | ja ->fff_fallback
1846 |.else
1847 | checknumtp [BASE], ->fff_fallback
1848 |.endif
1849 |
1850 | movsd xmm0, qword [BASE]
1851 |5: // Handle numbers or integers.
1852 | cmp RAd, RDd; jae ->fff_resxmm0
1853 |.if DUALNUM
1854 | mov RB, [BASE+RA*8-8]
1855 | checknumx RB, >6, jb
1856 | ja ->fff_fallback
1857 | cvtsi2sd xmm1, RBd
1858 | jmp >7
1859 |.else
1860 | checknumtp [BASE+RA*8-8], ->fff_fallback
1861 |.endif
1862 |6:
1863 | movsd xmm1, qword [BASE+RA*8-8]
1864 |7:
1865 | sseop xmm0, xmm1
1866 | add RAd, 1
1867 | jmp <5
1868 |.endmacro
1869 |
1870 | math_minmax math_min, cmovg, minsd
1871 | math_minmax math_max, cmovl, maxsd
1872 |
1873 |//-- String library -----------------------------------------------------
1874 |
1875 |.ffunc string_byte // Only handle the 1-arg case here.
1876 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1877 | mov STR:RB, [BASE]
1878 | checkstr STR:RB, ->fff_fallback
1879 | mov PC, [BASE-8]
1880 | cmp dword STR:RB->len, 1
1881 | jb ->fff_res0 // Return no results for empty string.
1882 | movzx RBd, byte STR:RB[1]
1883 |.if DUALNUM
1884 | jmp ->fff_resi
1885 |.else
1886 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1887 |.endif
1888 |
1889 |.ffunc string_char // Only handle the 1-arg case here.
1890 | ffgccheck
1891 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1892 |.if DUALNUM
1893 | mov RB, [BASE]
1894 | checkint RB, ->fff_fallback
1895 |.else
1896 | checknumtp [BASE], ->fff_fallback
1897 | cvttsd2si RBd, qword [BASE]
1898 |.endif
1899 | cmp RBd, 255; ja ->fff_fallback
1900 | mov TMP1d, RBd
1901 | mov TMPRd, 1
1902 | lea RD, TMP1 // Points to stack. Little-endian.
1903 |->fff_newstr:
1904 | mov L:RB, SAVE_L
1905 | mov L:RB->base, BASE
1906 | mov CARG3d, TMPRd // Zero-extended to size_t.
1907 | mov CARG2, RD
1908 | mov CARG1, L:RB
1909 | mov SAVE_PC, PC
1910 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1911 |->fff_resstr:
1912 | // GCstr * returned in eax (RD).
1913 | mov BASE, L:RB->base
1914 | mov PC, [BASE-8]
1915 | settp STR:RD, LJ_TSTR
1916 | mov [BASE-16], STR:RD
1917 | jmp ->fff_res1
1918 |
1919 |.ffunc string_sub
1920 | ffgccheck
1921 | mov TMPRd, -1
1922 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1923 | jna >1
1924 |.if DUALNUM
1925 | mov TMPR, [BASE+16]
1926 | checkint TMPR, ->fff_fallback
1927 |.else
1928 | checknumtp [BASE+16], ->fff_fallback
1929 | cvttsd2si TMPRd, qword [BASE+16]
1930 |.endif
1931 |1:
1932 | mov STR:RB, [BASE]
1933 | checkstr STR:RB, ->fff_fallback
1934 |.if DUALNUM
1935 | mov ITYPE, [BASE+8]
1936 | mov RAd, ITYPEd // Must clear hiword for lea below.
1937 | sar ITYPE, 47
1938 | cmp ITYPEd, LJ_TISNUM
1939 | jne ->fff_fallback
1940 |.else
1941 | checknumtp [BASE+8], ->fff_fallback
1942 | cvttsd2si RAd, qword [BASE+8]
1943 |.endif
1944 | mov RCd, STR:RB->len
1945 | cmp RCd, TMPRd // len < end? (unsigned compare)
1946 | jb >5
1947 |2:
1948 | test RAd, RAd // start <= 0?
1949 | jle >7
1950 |3:
1951 | sub TMPRd, RAd // start > end?
1952 | jl ->fff_emptystr
1953 | lea RD, [STR:RB+RAd+#STR-1]
1954 | add TMPRd, 1
1955 |4:
1956 | jmp ->fff_newstr
1957 |
1958 |5: // Negative end or overflow.
1959 | jl >6
1960 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1961 | jmp <2
1962 |6: // Overflow.
1963 | mov TMPRd, RCd // end = len
1964 | jmp <2
1965 |
1966 |7: // Negative start or underflow.
1967 | je >8
1968 | add RAd, RCd // start = start+(len+1)
1969 | add RAd, 1
1970 | jg <3 // start > 0?
1971 |8: // Underflow.
1972 | mov RAd, 1 // start = 1
1973 | jmp <3
1974 |
1975 |->fff_emptystr: // Range underflow.
1976 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1977 | jmp <4
1978 |
1979 |.macro ffstring_op, name
1980 | .ffunc_1 string_ .. name
1981 | ffgccheck
1982 |.if X64WIN
1983 | mov STR:TMPR, [BASE]
1984 | checkstr STR:TMPR, ->fff_fallback
1985 |.else
1986 | mov STR:CARG2, [BASE]
1987 | checkstr STR:CARG2, ->fff_fallback
1988 |.endif
1989 | mov L:RB, SAVE_L
1990 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
1991 | mov L:RB->base, BASE
1992 |.if X64WIN
1993 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
1994 |.endif
1995 | mov RC, SBUF:CARG1->b
1996 | mov SBUF:CARG1->L, L:RB
1997 | mov SBUF:CARG1->w, RC
1998 | mov SAVE_PC, PC
1999 | call extern lj_buf_putstr_ .. name
2000 | mov CARG1, rax
2001 | call extern lj_buf_tostr
2002 | jmp ->fff_resstr
2003 |.endmacro
2004 |
2005 |ffstring_op reverse
2006 |ffstring_op lower
2007 |ffstring_op upper
2008 |
2009 |//-- Bit library --------------------------------------------------------
2010 |
2011 |.macro .ffunc_bit, name, kind, fdef
2012 | fdef name
2013 |.if kind == 2
2014 | sseconst_tobit xmm1, RB
2015 |.endif
2016 |.if DUALNUM
2017 | mov RB, [BASE]
2018 | checkint RB, >1
2019 |.if kind > 0
2020 | jmp >2
2021 |.else
2022 | jmp ->fff_resbit
2023 |.endif
2024 |1:
2025 | ja ->fff_fallback
2026 | movd xmm0, RB
2027 |.else
2028 | checknumtp [BASE], ->fff_fallback
2029 | movsd xmm0, qword [BASE]
2030 |.endif
2031 |.if kind < 2
2032 | sseconst_tobit xmm1, RB
2033 |.endif
2034 | addsd xmm0, xmm1
2035 | movd RBd, xmm0
2036 |2:
2037 |.endmacro
2038 |
2039 |.macro .ffunc_bit, name, kind
2040 | .ffunc_bit name, kind, .ffunc_1
2041 |.endmacro
2042 |
2043 |.ffunc_bit bit_tobit, 0
2044 | jmp ->fff_resbit
2045 |
2046 |.macro .ffunc_bit_op, name, ins
2047 | .ffunc_bit name, 2
2048 | mov TMPRd, NARGS:RDd // Save for fallback.
2049 | lea RD, [BASE+NARGS:RD*8-16]
2050 |1:
2051 | cmp RD, BASE
2052 | jbe ->fff_resbit
2053 |.if DUALNUM
2054 | mov RA, [RD]
2055 | checkint RA, >2
2056 | ins RBd, RAd
2057 | sub RD, 8
2058 | jmp <1
2059 |2:
2060 | ja ->fff_fallback_bit_op
2061 | movd xmm0, RA
2062 |.else
2063 | checknumtp [RD], ->fff_fallback_bit_op
2064 | movsd xmm0, qword [RD]
2065 |.endif
2066 | addsd xmm0, xmm1
2067 | movd RAd, xmm0
2068 | ins RBd, RAd
2069 | sub RD, 8
2070 | jmp <1
2071 |.endmacro
2072 |
2073 |.ffunc_bit_op bit_band, and
2074 |.ffunc_bit_op bit_bor, or
2075 |.ffunc_bit_op bit_bxor, xor
2076 |
2077 |.ffunc_bit bit_bswap, 1
2078 | bswap RBd
2079 | jmp ->fff_resbit
2080 |
2081 |.ffunc_bit bit_bnot, 1
2082 | not RBd
2083 |.if DUALNUM
2084 | jmp ->fff_resbit
2085 |.else
2086 |->fff_resbit:
2087 | cvtsi2sd xmm0, RBd
2088 | jmp ->fff_resxmm0
2089 |.endif
2090 |
2091 |->fff_fallback_bit_op:
2092 | mov NARGS:RDd, TMPRd // Restore for fallback
2093 | jmp ->fff_fallback
2094 |
2095 |.macro .ffunc_bit_sh, name, ins
2096 |.if DUALNUM
2097 | .ffunc_bit name, 1, .ffunc_2
2098 | // Note: no inline conversion from number for 2nd argument!
2099 | mov RA, [BASE+8]
2100 | checkint RA, ->fff_fallback
2101 |.else
2102 | .ffunc_nn name
2103 | sseconst_tobit xmm2, RB
2104 | addsd xmm0, xmm2
2105 | addsd xmm1, xmm2
2106 | movd RBd, xmm0
2107 | movd RAd, xmm1
2108 |.endif
2109 | ins RBd, cl // Assumes RA is ecx.
2110 | jmp ->fff_resbit
2111 |.endmacro
2112 |
2113 |.ffunc_bit_sh bit_lshift, shl
2114 |.ffunc_bit_sh bit_rshift, shr
2115 |.ffunc_bit_sh bit_arshift, sar
2116 |.ffunc_bit_sh bit_rol, rol
2117 |.ffunc_bit_sh bit_ror, ror
2118 |
2119 |//-----------------------------------------------------------------------
2120 |
2121 |->fff_fallback_2:
2122 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2123 | jmp ->fff_fallback
2124 |->fff_fallback_1:
2125 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2126 |->fff_fallback: // Call fast function fallback handler.
2127 | // BASE = new base, RD = nargs+1
2128 | mov L:RB, SAVE_L
2129 | mov PC, [BASE-8] // Fallback may overwrite PC.
2130 | mov SAVE_PC, PC // Redundant (but a defined value).
2131 | mov L:RB->base, BASE
2132 | lea RD, [BASE+NARGS:RD*8-8]
2133 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2134 | mov L:RB->top, RD
2135 | mov CFUNC:RD, [BASE-16]
2136 | cleartp CFUNC:RD
2137 | cmp RA, L:RB->maxstack
2138 | ja >5 // Need to grow stack.
2139 | mov CARG1, L:RB
2140 | call aword CFUNC:RD->f // (lua_State *L)
2141 | mov BASE, L:RB->base
2142 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2143 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2144 |1:
2145 | mov RA, L:RB->top
2146 | sub RA, BASE
2147 | shr RAd, 3
2148 | test RDd, RDd
2149 | lea NARGS:RDd, [RAd+1]
2150 | mov LFUNC:RB, [BASE-16]
2151 | jne ->vm_call_tail // Returned -1?
2152 | cleartp LFUNC:RB
2153 | ins_callt // Returned 0: retry fast path.
2154 |
2155 |// Reconstruct previous base for vmeta_call during tailcall.
2156 |->vm_call_tail:
2157 | mov RA, BASE
2158 | test PCd, FRAME_TYPE
2159 | jnz >3
2160 | movzx RBd, PC_RA
2161 | neg RB
2162 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2163 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2164 |3:
2165 | mov RB, PC
2166 | and RB, -8
2167 | sub BASE, RB
2168 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2169 |
2170 |5: // Grow stack for fallback handler.
2171 | mov CARG2d, LUA_MINSTACK
2172 | mov CARG1, L:RB
2173 | call extern lj_state_growstack // (lua_State *L, int n)
2174 | mov BASE, L:RB->base
2175 | xor RDd, RDd // Simulate a return 0.
2176 | jmp <1 // Dumb retry (goes through ff first).
2177 |
2178 |->fff_gcstep: // Call GC step function.
2179 | // BASE = new base, RD = nargs+1
2180 | pop RB // Must keep stack at same level.
2181 | mov TMP1, RB // Save return address
2182 | mov L:RB, SAVE_L
2183 | mov SAVE_PC, PC // Redundant (but a defined value).
2184 | mov L:RB->base, BASE
2185 | lea RD, [BASE+NARGS:RD*8-8]
2186 | mov CARG1, L:RB
2187 | mov L:RB->top, RD
2188 | call extern lj_gc_step // (lua_State *L)
2189 | mov BASE, L:RB->base
2190 | mov RD, L:RB->top
2191 | sub RD, BASE
2192 | shr RDd, 3
2193 | add NARGS:RDd, 1
2194 | mov RB, TMP1
2195 | push RB // Restore return address.
2196 | ret
2197 |
2198 |//-----------------------------------------------------------------------
2199 |//-- Special dispatch targets -------------------------------------------
2200 |//-----------------------------------------------------------------------
2201 |
2202 |->vm_record: // Dispatch target for recording phase.
2203 |.if JIT
2204 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2205 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2206 | jnz >5
2207 | // Decrement the hookcount for consistency, but always do the call.
2208 | test RDL, HOOK_ACTIVE
2209 | jnz >1
2210 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2211 | jz >1
2212 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2213 | jmp >1
2214 |.endif
2215 |
2216 |->vm_rethook: // Dispatch target for return hooks.
2217 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2218 | test RDL, HOOK_ACTIVE // Hook already active?
2219 | jnz >5
2220 | jmp >1
2221 |
2222 |->vm_inshook: // Dispatch target for instr/line hooks.
2223 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2224 | test RDL, HOOK_ACTIVE // Hook already active?
2225 | jnz >5
2226 |
2227 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2228 | jz >5
2229 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2230 | jz >1
2231 | test RDL, LUA_MASKLINE
2232 | jz >5
2233 |1:
2234 | mov L:RB, SAVE_L
2235 | mov L:RB->base, BASE
2236 | mov CARG2, PC // Caveat: CARG2 == BASE
2237 | mov CARG1, L:RB
2238 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2239 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2240 |3:
2241 | mov BASE, L:RB->base
2242 |4:
2243 | movzx RAd, PC_RA
2244 |5:
2245 | movzx OP, PC_OP
2246 | movzx RDd, PC_RD
2247 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2248 |
2249 |->cont_hook: // Continue from hook yield.
2250 | add PC, 4
2251 | mov RA, [RB-40]
2252 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2253 | jmp <4
2254 |
2255 |->vm_hotloop: // Hot loop counter underflow.
2256 |.if JIT
2257 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2258 | cleartp LFUNC:RB
2259 | mov RB, LFUNC:RB->pc
2260 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2261 | lea RD, [BASE+RD*8]
2262 | mov L:RB, SAVE_L
2263 | mov L:RB->base, BASE
2264 | mov L:RB->top, RD
2265 | mov CARG2, PC
2266 | lea CARG1, [DISPATCH+GG_DISP2J]
2267 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2268 | mov SAVE_PC, PC
2269 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2270 | jmp <3
2271 |.endif
2272 |
2273 |->vm_callhook: // Dispatch target for call hooks.
2274 | mov SAVE_PC, PC
2275 |.if JIT
2276 | jmp >1
2277 |.endif
2278 |
2279 |->vm_hotcall: // Hot call counter underflow.
2280 |.if JIT
2281 | mov SAVE_PC, PC
2282 | or PC, 1 // Marker for hot call.
2283 |1:
2284 |.endif
2285 | lea RD, [BASE+NARGS:RD*8-8]
2286 | mov L:RB, SAVE_L
2287 | mov L:RB->base, BASE
2288 | mov L:RB->top, RD
2289 | mov CARG2, PC
2290 | mov CARG1, L:RB
2291 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2292 | // ASMFunction returned in eax/rax (RD).
2293 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2294 |.if JIT
2295 | and PC, -2
2296 |.endif
2297 | mov BASE, L:RB->base
2298 | mov RA, RD
2299 | mov RD, L:RB->top
2300 | sub RD, BASE
2301 | mov RB, RA
2302 | movzx RAd, PC_RA
2303 | shr RDd, 3
2304 | add NARGS:RDd, 1
2305 | jmp RB
2306 |
2307 |->cont_stitch: // Trace stitching.
2308 |.if JIT
2309 | // BASE = base, RC = result, RB = mbase
2310 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2311 | cleartp TRACE:ITYPE
2312 | mov TMPRd, MULTRES
2313 | movzx RAd, PC_RA
2314 | lea RA, [BASE+RA*8] // Call base.
2315 | sub TMPRd, 1
2316 | jz >2
2317 |1: // Move results down.
2318 | mov RB, [RC]
2319 | mov [RA], RB
2320 | add RC, 8
2321 | add RA, 8
2322 | sub TMPRd, 1
2323 | jnz <1
2324 |2:
2325 | movzx RCd, PC_RA
2326 | movzx RBd, PC_RB
2327 | add RC, RB
2328 | lea RC, [BASE+RC*8-8]
2329 |3:
2330 | cmp RC, RA
2331 | ja >9 // More results wanted?
2332 |
2333 | test TRACE:ITYPE, TRACE:ITYPE
2334 | jz ->cont_nop
2335 | movzx RBd, word TRACE:ITYPE->traceno
2336 | movzx RDd, word TRACE:ITYPE->link
2337 | cmp RDd, RBd
2338 | je ->cont_nop // Blacklisted.
2339 | test RDd, RDd
2340 | jne =>BC_JLOOP // Jump to stitched trace.
2341 |
2342 | // Stitch a new trace to the previous trace.
2343 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2344 | mov L:RB, SAVE_L
2345 | mov L:RB->base, BASE
2346 | mov CARG2, PC
2347 | lea CARG1, [DISPATCH+GG_DISP2J]
2348 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2349 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2350 | mov BASE, L:RB->base
2351 | jmp ->cont_nop
2352 |
2353 |9: // Fill up results with nil.
2354 | mov aword [RA], LJ_TNIL
2355 | add RA, 8
2356 | jmp <3
2357 |.endif
2358 |
2359 |->vm_profhook: // Dispatch target for profiler hook.
2360#if LJ_HASPROFILE
2361 | mov L:RB, SAVE_L
2362 | mov L:RB->base, BASE
2363 | mov CARG2, PC // Caveat: CARG2 == BASE
2364 | mov CARG1, L:RB
2365 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2366 | mov BASE, L:RB->base
2367 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2368 | sub PC, 4
2369 | jmp ->cont_nop
2370#endif
2371 |
2372 |//-----------------------------------------------------------------------
2373 |//-- Trace exit handler -------------------------------------------------
2374 |//-----------------------------------------------------------------------
2375 |
2376 |// Called from an exit stub with the exit number on the stack.
2377 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2378 |->vm_exit_handler:
2379 |.if JIT
2380 | push r13; push r12
2381 | push r11; push r10; push r9; push r8
2382 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2383 | push rbx; push rdx; push rcx; push rax
2384 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2385 | mov RCH, byte [rbp-16]
2386 | mov [rbp-8], r15; mov [rbp-16], r14
2387 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2388 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2389 | set_vmstate EXIT
2390 | mov [DISPATCH+DISPATCH_J(exitno)], RCd
2391 | mov [DISPATCH+DISPATCH_J(parent)], RAd
2392 |.if X64WIN
2393 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2394 |.else
2395 | sub rsp, 16*8 // Room for SSE regs.
2396 |.endif
2397 | add rbp, -128
2398 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2399 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2400 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2401 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2402 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2403 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2404 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2405 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2406 | // Caveat: RB is rbp.
2407 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2408 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2409 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2410 | mov L:RB->base, BASE
2411 |.if X64WIN
2412 | lea CARG2, [rsp+4*8]
2413 |.else
2414 | mov CARG2, rsp
2415 |.endif
2416 | lea CARG1, [DISPATCH+GG_DISP2J]
2417 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2418 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2419 | // MULTRES or negated error code returned in eax (RD).
2420 | mov RA, L:RB->cframe
2421 | and RA, CFRAME_RAWMASK
2422 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2423 | mov BASE, L:RB->base
2424 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2425 | jmp >1
2426 |.endif
2427 |->vm_exit_interp:
2428 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2429 |.if JIT
2430 | // Restore additional callee-save registers only used in compiled code.
2431 |.if X64WIN
2432 | lea RA, [rsp+10*16+4*8]
2433 |1:
2434 | movdqa xmm15, [RA-10*16]
2435 | movdqa xmm14, [RA-9*16]
2436 | movdqa xmm13, [RA-8*16]
2437 | movdqa xmm12, [RA-7*16]
2438 | movdqa xmm11, [RA-6*16]
2439 | movdqa xmm10, [RA-5*16]
2440 | movdqa xmm9, [RA-4*16]
2441 | movdqa xmm8, [RA-3*16]
2442 | movdqa xmm7, [RA-2*16]
2443 | mov rsp, RA // Reposition stack to C frame.
2444 | movdqa xmm6, [RA-1*16]
2445 | mov r15, CSAVE_1
2446 | mov r14, CSAVE_2
2447 | mov r13, CSAVE_3
2448 | mov r12, CSAVE_4
2449 |.else
2450 | lea RA, [rsp+16]
2451 |1:
2452 | mov r13, [RA-8]
2453 | mov r12, [RA]
2454 | mov rsp, RA // Reposition stack to C frame.
2455 |.endif
2456 | cmp RDd, -LUA_ERRERR; jae >9 // Check for error from exit.
2457 | mov L:RB, SAVE_L
2458 | mov MULTRES, RDd
2459 | mov LFUNC:KBASE, [BASE-16]
2460 | cleartp LFUNC:KBASE
2461 | mov KBASE, LFUNC:KBASE->pc
2462 | mov KBASE, [KBASE+PC2PROTO(k)]
2463 | mov L:RB->base, BASE
2464 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2465 | set_vmstate INTERP
2466 | // Modified copy of ins_next which handles function header dispatch, too.
2467 | mov RCd, [PC]
2468 | movzx RAd, RCH
2469 | movzx OP, RCL
2470 | add PC, 4
2471 | shr RCd, 16
2472 | cmp MULTRES, -17 // Static dispatch?
2473 | je >5
2474 | cmp OP, BC_FUNCF // Function header?
2475 | jb >3
2476 | cmp OP, BC_FUNCC+2 // Fast function?
2477 | jae >4
2478 |2:
2479 | mov RCd, MULTRES // RC/RD holds nres+1.
2480 |3:
2481 | jmp aword [DISPATCH+OP*8]
2482 |
2483 |4: // Check frame below fast function.
2484 | mov RC, [BASE-8]
2485 | test RCd, FRAME_TYPE
2486 | jnz <2 // Trace stitching continuation?
2487 | // Otherwise set KBASE for Lua function below fast function.
2488 | movzx RCd, byte [RC-3]
2489 | neg RC
2490 | mov LFUNC:KBASE, [BASE+RC*8-32]
2491 | cleartp LFUNC:KBASE
2492 | mov KBASE, LFUNC:KBASE->pc
2493 | mov KBASE, [KBASE+PC2PROTO(k)]
2494 | jmp <2
2495 |
2496 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2497 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2498 | mov TRACE:RA, [RA+RD*8]
2499 | mov RCd, TRACE:RA->startins
2500 | movzx RAd, RCH
2501 | movzx OP, RCL
2502 | shr RCd, 16
2503 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
2504 |
2505 |9: // Rethrow error from the right C frame.
2506 | mov CARG2d, RDd
2507 | mov CARG1, L:RB
2508 | neg CARG2d
2509 | call extern lj_err_trace // (lua_State *L, int errcode)
2510 |.endif
2511 |
2512 |//-----------------------------------------------------------------------
2513 |//-- Math helper functions ----------------------------------------------
2514 |//-----------------------------------------------------------------------
2515 |
2516 |// FP value rounding. Called by math.floor/math.ceil fast functions
2517 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2518 |.macro vm_round, name, mode, cond
2519 |->name:
2520 |->name .. _sse:
2521 | sseconst_abs xmm2, RD
2522 | sseconst_2p52 xmm3, RD
2523 | movaps xmm1, xmm0
2524 | andpd xmm1, xmm2 // |x|
2525 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2526 | jbe >1
2527 | andnpd xmm2, xmm0 // Isolate sign bit.
2528 |.if mode == 2 // trunc(x)?
2529 | movaps xmm0, xmm1
2530 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2531 | subsd xmm1, xmm3
2532 | sseconst_1 xmm3, RD
2533 | cmpsd xmm0, xmm1, 1 // |x| < result?
2534 | andpd xmm0, xmm3
2535 | subsd xmm1, xmm0 // If yes, subtract -1.
2536 | orpd xmm1, xmm2 // Merge sign bit back in.
2537 |.else
2538 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2539 | subsd xmm1, xmm3
2540 | orpd xmm1, xmm2 // Merge sign bit back in.
2541 | sseconst_1 xmm3, RD
2542 | .if mode == 1 // ceil(x)?
2543 | cmpsd xmm0, xmm1, 6 // x > result?
2544 | andpd xmm0, xmm3
2545 | addsd xmm1, xmm0 // If yes, add 1.
2546 | orpd xmm1, xmm2 // Merge sign bit back in (again).
2547 | .else // floor(x)?
2548 | cmpsd xmm0, xmm1, 1 // x < result?
2549 | andpd xmm0, xmm3
2550 | subsd xmm1, xmm0 // If yes, subtract 1.
2551 | .endif
2552 |.endif
2553 | movaps xmm0, xmm1
2554 |1:
2555 | ret
2556 |.endmacro
2557 |
2558 | vm_round vm_floor, 0, 1
2559 | vm_round vm_ceil, 1, JIT
2560 | vm_round vm_trunc, 2, JIT
2561 |
2562 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2563 |->vm_mod:
2564 |// Args in xmm0/xmm1, return value in xmm0.
2565 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2566 | movaps xmm5, xmm0
2567 | divsd xmm0, xmm1
2568 | sseconst_abs xmm2, RD
2569 | sseconst_2p52 xmm3, RD
2570 | movaps xmm4, xmm0
2571 | andpd xmm4, xmm2 // |x/y|
2572 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2573 | jbe >1
2574 | andnpd xmm2, xmm0 // Isolate sign bit.
2575 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2576 | subsd xmm4, xmm3
2577 | orpd xmm4, xmm2 // Merge sign bit back in.
2578 | sseconst_1 xmm2, RD
2579 | cmpsd xmm0, xmm4, 1 // x/y < result?
2580 | andpd xmm0, xmm2
2581 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2582 | movaps xmm0, xmm5
2583 | mulsd xmm1, xmm4
2584 | subsd xmm0, xmm1
2585 | ret
2586 |1:
2587 | mulsd xmm1, xmm0
2588 | movaps xmm0, xmm5
2589 | subsd xmm0, xmm1
2590 | ret
2591 |
2592 |//-----------------------------------------------------------------------
2593 |//-- Miscellaneous functions --------------------------------------------
2594 |//-----------------------------------------------------------------------
2595 |
2596 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2597 |->vm_cpuid:
2598 | mov eax, CARG1d
2599 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2600 | push rbx
2601 | xor ecx, ecx
2602 | cpuid
2603 | mov [rsi], eax
2604 | mov [rsi+4], ebx
2605 | mov [rsi+8], ecx
2606 | mov [rsi+12], edx
2607 | pop rbx
2608 | .if X64WIN; pop rsi; .endif
2609 | ret
2610 |
2611 |.define NEXT_TAB, TAB:CARG1
2612 |.define NEXT_IDX, CARG2d
2613 |.define NEXT_IDXa, CARG2
2614 |.define NEXT_PTR, RC
2615 |.define NEXT_PTRd, RCd
2616 |.define NEXT_TMP, CARG3
2617 |.define NEXT_ASIZE, CARG4d
2618 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
2619 |.if X64WIN
2620 |.define NEXT_RES_PTR, [rsp+aword*5]
2621 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
2622 |.else
2623 |.define NEXT_RES_PTR, [rsp+aword*1]
2624 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
2625 |.endif
2626 |
2627 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2628 |// Next idx returned in edx.
2629 |->vm_next:
2630 |.if JIT
2631 | mov NEXT_ASIZE, NEXT_TAB->asize
2632 |1: // Traverse array part.
2633 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
2634 | mov NEXT_TMP, NEXT_TAB->array
2635 | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8]
2636 | cmp NEXT_TMP, LJ_TNIL; je >2
2637 | lea NEXT_PTR, NEXT_RES_PTR
2638 | mov qword [NEXT_PTR], NEXT_TMP
2639 |.if DUALNUM
2640 | setint NEXT_TMP, NEXT_IDXa
2641 | mov qword [NEXT_PTR+qword*1], NEXT_TMP
2642 |.else
2643 | cvtsi2sd xmm0, NEXT_IDX
2644 | movsd qword [NEXT_PTR+qword*1], xmm0
2645 |.endif
2646 | NEXT_RES_IDX 1
2647 | ret
2648 |2: // Skip holes in array part.
2649 | add NEXT_IDX, 1
2650 | jmp <1
2651 |
2652 |5: // Traverse hash part.
2653 | sub NEXT_IDX, NEXT_ASIZE
2654 |6:
2655 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
2656 | imul NEXT_PTRd, NEXT_IDX, #NODE
2657 | add NODE:NEXT_PTR, NEXT_TAB->node
2658 | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7
2659 | NEXT_RES_IDXL NEXT_ASIZE+1
2660 | ret
2661 |7: // Skip holes in hash part.
2662 | add NEXT_IDX, 1
2663 | jmp <6
2664 |
2665 |9: // End of iteration. Set the key to nil (not the value).
2666 | NEXT_RES_IDX NEXT_ASIZE
2667 | lea NEXT_PTR, NEXT_RES_PTR
2668 | mov qword [NEXT_PTR+qword*1], LJ_TNIL
2669 | ret
2670 |.endif
2671 |
2672 |//-----------------------------------------------------------------------
2673 |//-- Assertions ---------------------------------------------------------
2674 |//-----------------------------------------------------------------------
2675 |
2676 |->assert_bad_for_arg_type:
2677#ifdef LUA_USE_ASSERT
2678 | int3
2679#endif
2680 | int3
2681 |
2682 |//-----------------------------------------------------------------------
2683 |//-- FFI helper functions -----------------------------------------------
2684 |//-----------------------------------------------------------------------
2685 |
2686 |// Handler for callback functions. Callback slot number in ah/al.
2687 |->vm_ffi_callback:
2688 |.if FFI
2689 |.type CTSTATE, CTState, PC
2690 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2691 | lea DISPATCH, [ebp+GG_G2DISP]
2692 | mov CTSTATE, GL:ebp->ctype_state
2693 | movzx eax, ax
2694 | mov CTSTATE->cb.slot, eax
2695 | mov CTSTATE->cb.gpr[0], CARG1
2696 | mov CTSTATE->cb.gpr[1], CARG2
2697 | mov CTSTATE->cb.gpr[2], CARG3
2698 | mov CTSTATE->cb.gpr[3], CARG4
2699 | movsd qword CTSTATE->cb.fpr[0], xmm0
2700 | movsd qword CTSTATE->cb.fpr[1], xmm1
2701 | movsd qword CTSTATE->cb.fpr[2], xmm2
2702 | movsd qword CTSTATE->cb.fpr[3], xmm3
2703 |.if X64WIN
2704 | lea rax, [rsp+CFRAME_SIZE+4*8]
2705 |.else
2706 | lea rax, [rsp+CFRAME_SIZE]
2707 | mov CTSTATE->cb.gpr[4], CARG5
2708 | mov CTSTATE->cb.gpr[5], CARG6
2709 | movsd qword CTSTATE->cb.fpr[4], xmm4
2710 | movsd qword CTSTATE->cb.fpr[5], xmm5
2711 | movsd qword CTSTATE->cb.fpr[6], xmm6
2712 | movsd qword CTSTATE->cb.fpr[7], xmm7
2713 |.endif
2714 | mov CTSTATE->cb.stack, rax
2715 | mov CARG2, rsp
2716 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2717 | mov CARG1, CTSTATE
2718 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2719 | // lua_State * returned in eax (RD).
2720 | set_vmstate INTERP
2721 | mov BASE, L:RD->base
2722 | mov RD, L:RD->top
2723 | sub RD, BASE
2724 | mov LFUNC:RB, [BASE-16]
2725 | cleartp LFUNC:RB
2726 | shr RD, 3
2727 | add RD, 1
2728 | ins_callt
2729 |.endif
2730 |
2731 |->cont_ffi_callback: // Return from FFI callback.
2732 |.if FFI
2733 | mov L:RA, SAVE_L
2734 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2735 | mov aword CTSTATE->L, L:RA
2736 | mov L:RA->base, BASE
2737 | mov L:RA->top, RB
2738 | mov CARG1, CTSTATE
2739 | mov CARG2, RC
2740 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2741 | mov rax, CTSTATE->cb.gpr[0]
2742 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2743 | jmp ->vm_leave_unw
2744 |.endif
2745 |
2746 |->vm_ffi_call: // Call C function via FFI.
2747 | // Caveat: needs special frame unwinding, see below.
2748 |.if FFI
2749 | .type CCSTATE, CCallState, rbx
2750 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2751 |
2752 | // Readjust stack.
2753 | mov eax, CCSTATE->spadj
2754 | sub rsp, rax
2755 |
2756 | // Copy stack slots.
2757 | movzx ecx, byte CCSTATE->nsp
2758 | sub ecx, 1
2759 | js >2
2760 |1:
2761 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2762 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2763 | sub ecx, 1
2764 | jns <1
2765 |2:
2766 |
2767 | movzx eax, byte CCSTATE->nfpr
2768 | mov CARG1, CCSTATE->gpr[0]
2769 | mov CARG2, CCSTATE->gpr[1]
2770 | mov CARG3, CCSTATE->gpr[2]
2771 | mov CARG4, CCSTATE->gpr[3]
2772 |.if not X64WIN
2773 | mov CARG5, CCSTATE->gpr[4]
2774 | mov CARG6, CCSTATE->gpr[5]
2775 |.endif
2776 | test eax, eax; jz >5
2777 | movaps xmm0, CCSTATE->fpr[0]
2778 | movaps xmm1, CCSTATE->fpr[1]
2779 | movaps xmm2, CCSTATE->fpr[2]
2780 | movaps xmm3, CCSTATE->fpr[3]
2781 |.if not X64WIN
2782 | cmp eax, 4; jbe >5
2783 | movaps xmm4, CCSTATE->fpr[4]
2784 | movaps xmm5, CCSTATE->fpr[5]
2785 | movaps xmm6, CCSTATE->fpr[6]
2786 | movaps xmm7, CCSTATE->fpr[7]
2787 |.endif
2788 |5:
2789 |
2790 | call aword CCSTATE->func
2791 |
2792 | mov CCSTATE->gpr[0], rax
2793 | movaps CCSTATE->fpr[0], xmm0
2794 |.if not X64WIN
2795 | mov CCSTATE->gpr[1], rdx
2796 | movaps CCSTATE->fpr[1], xmm1
2797 |.endif
2798 |
2799 | mov rbx, [rbp-8]; leave; ret
2800 |.endif
2801 |// Note: vm_ffi_call must be the last function in this object file!
2802 |
2803 |//-----------------------------------------------------------------------
2804}
2805
2806/* Generate the code for a single instruction. */
2807static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2808{
2809 int vk = 0;
2810 |// Note: aligning all instructions does not pay off.
2811 |=>defop:
2812
2813 switch (op) {
2814
2815 /* -- Comparison ops ---------------------------------------------------- */
2816
2817 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2818
2819 |.macro jmp_comp, lt, ge, le, gt, target
2820 ||switch (op) {
2821 ||case BC_ISLT:
2822 | lt target
2823 ||break;
2824 ||case BC_ISGE:
2825 | ge target
2826 ||break;
2827 ||case BC_ISLE:
2828 | le target
2829 ||break;
2830 ||case BC_ISGT:
2831 | gt target
2832 ||break;
2833 ||default: break; /* Shut up GCC. */
2834 ||}
2835 |.endmacro
2836
2837 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2838 | // RA = src1, RD = src2, JMP with RD = target
2839 | ins_AD
2840 | mov ITYPE, [BASE+RA*8]
2841 | mov RB, [BASE+RD*8]
2842 | mov RA, ITYPE
2843 | mov RD, RB
2844 | sar ITYPE, 47
2845 | sar RB, 47
2846 |.if DUALNUM
2847 | cmp ITYPEd, LJ_TISNUM; jne >7
2848 | cmp RBd, LJ_TISNUM; jne >8
2849 | add PC, 4
2850 | cmp RAd, RDd
2851 | jmp_comp jge, jl, jg, jle, >9
2852 |6:
2853 | movzx RDd, PC_RD
2854 | branchPC RD
2855 |9:
2856 | ins_next
2857 |
2858 |7: // RA is not an integer.
2859 | ja ->vmeta_comp
2860 | // RA is a number.
2861 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2862 | // RA is a number, RD is an integer.
2863 | cvtsi2sd xmm0, RDd
2864 | jmp >2
2865 |
2866 |8: // RA is an integer, RD is not an integer.
2867 | ja ->vmeta_comp
2868 | // RA is an integer, RD is a number.
2869 | cvtsi2sd xmm1, RAd
2870 | movd xmm0, RD
2871 | jmp >3
2872 |.else
2873 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2874 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2875 |.endif
2876 |1:
2877 | movd xmm0, RD
2878 |2:
2879 | movd xmm1, RA
2880 |3:
2881 | add PC, 4
2882 | ucomisd xmm0, xmm1
2883 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2884 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2885 |.if DUALNUM
2886 | jmp_comp jbe, ja, jb, jae, <9
2887 | jmp <6
2888 |.else
2889 | jmp_comp jbe, ja, jb, jae, >1
2890 | movzx RDd, PC_RD
2891 | branchPC RD
2892 |1:
2893 | ins_next
2894 |.endif
2895 break;
2896
2897 case BC_ISEQV: case BC_ISNEV:
2898 vk = op == BC_ISEQV;
2899 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2900 | mov RB, [BASE+RD*8]
2901 | mov ITYPE, [BASE+RA*8]
2902 | add PC, 4
2903 | mov RD, RB
2904 | mov RA, ITYPE
2905 | sar RB, 47
2906 | sar ITYPE, 47
2907 |.if DUALNUM
2908 | cmp RBd, LJ_TISNUM; jne >7
2909 | cmp ITYPEd, LJ_TISNUM; jne >8
2910 | cmp RDd, RAd
2911 if (vk) {
2912 | jne >9
2913 } else {
2914 | je >9
2915 }
2916 | movzx RDd, PC_RD
2917 | branchPC RD
2918 |9:
2919 | ins_next
2920 |
2921 |7: // RD is not an integer.
2922 | ja >5
2923 | // RD is a number.
2924 | movd xmm1, RD
2925 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2926 | // RD is a number, RA is an integer.
2927 | cvtsi2sd xmm0, RAd
2928 | jmp >2
2929 |
2930 |8: // RD is an integer, RA is not an integer.
2931 | ja >5
2932 | // RD is an integer, RA is a number.
2933 | cvtsi2sd xmm1, RDd
2934 | jmp >1
2935 |
2936 |.else
2937 | cmp RBd, LJ_TISNUM; jae >5
2938 | cmp ITYPEd, LJ_TISNUM; jae >5
2939 | movd xmm1, RD
2940 |.endif
2941 |1:
2942 | movd xmm0, RA
2943 |2:
2944 | ucomisd xmm0, xmm1
2945 |4:
2946 iseqne_fp:
2947 if (vk) {
2948 | jp >2 // Unordered means not equal.
2949 | jne >2
2950 } else {
2951 | jp >2 // Unordered means not equal.
2952 | je >1
2953 }
2954 iseqne_end:
2955 if (vk) {
2956 |1: // EQ: Branch to the target.
2957 | movzx RDd, PC_RD
2958 | branchPC RD
2959 |2: // NE: Fallthrough to next instruction.
2960 |.if not FFI
2961 |3:
2962 |.endif
2963 } else {
2964 |.if not FFI
2965 |3:
2966 |.endif
2967 |2: // NE: Branch to the target.
2968 | movzx RDd, PC_RD
2969 | branchPC RD
2970 |1: // EQ: Fallthrough to next instruction.
2971 }
2972 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2973 op == BC_ISEQN || op == BC_ISNEN)) {
2974 | jmp <9
2975 } else {
2976 | ins_next
2977 }
2978 |
2979 if (op == BC_ISEQV || op == BC_ISNEV) {
2980 |5: // Either or both types are not numbers.
2981 |.if FFI
2982 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2983 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2984 |.endif
2985 | cmp RA, RD
2986 | je <1 // Same GCobjs or pvalues?
2987 | cmp RBd, ITYPEd
2988 | jne <2 // Not the same type?
2989 | cmp RBd, LJ_TISTABUD
2990 | ja <2 // Different objects and not table/ud?
2991 |
2992 | // Different tables or userdatas. Need to check __eq metamethod.
2993 | // Field metatable must be at same offset for GCtab and GCudata!
2994 | cleartp TAB:RA
2995 | mov TAB:RB, TAB:RA->metatable
2996 | test TAB:RB, TAB:RB
2997 | jz <2 // No metatable?
2998 | test byte TAB:RB->nomm, 1<<MM_eq
2999 | jnz <2 // Or 'no __eq' flag set?
3000 if (vk) {
3001 | xor RBd, RBd // ne = 0
3002 } else {
3003 | mov RBd, 1 // ne = 1
3004 }
3005 | jmp ->vmeta_equal // Handle __eq metamethod.
3006 } else {
3007 |.if FFI
3008 |3:
3009 | cmp ITYPEd, LJ_TCDATA
3010 if (LJ_DUALNUM && vk) {
3011 | jne <9
3012 } else {
3013 | jne <2
3014 }
3015 | jmp ->vmeta_equal_cd
3016 |.endif
3017 }
3018 break;
3019 case BC_ISEQS: case BC_ISNES:
3020 vk = op == BC_ISEQS;
3021 | ins_AND // RA = src, RD = str const, JMP with RD = target
3022 | mov RB, [BASE+RA*8]
3023 | add PC, 4
3024 | checkstr RB, >3
3025 | cmp RB, [KBASE+RD*8]
3026 iseqne_test:
3027 if (vk) {
3028 | jne >2
3029 } else {
3030 | je >1
3031 }
3032 goto iseqne_end;
3033 case BC_ISEQN: case BC_ISNEN:
3034 vk = op == BC_ISEQN;
3035 | ins_AD // RA = src, RD = num const, JMP with RD = target
3036 | mov RB, [BASE+RA*8]
3037 | add PC, 4
3038 |.if DUALNUM
3039 | checkint RB, >7
3040 | mov RD, [KBASE+RD*8]
3041 | checkint RD, >8
3042 | cmp RBd, RDd
3043 if (vk) {
3044 | jne >9
3045 } else {
3046 | je >9
3047 }
3048 | movzx RDd, PC_RD
3049 | branchPC RD
3050 |9:
3051 | ins_next
3052 |
3053 |7: // RA is not an integer.
3054 | ja >3
3055 | // RA is a number.
3056 | mov RD, [KBASE+RD*8]
3057 | checkint RD, >1
3058 | // RA is a number, RD is an integer.
3059 | cvtsi2sd xmm0, RDd
3060 | jmp >2
3061 |
3062 |8: // RA is an integer, RD is a number.
3063 | cvtsi2sd xmm0, RBd
3064 | movd xmm1, RD
3065 | ucomisd xmm0, xmm1
3066 | jmp >4
3067 |1:
3068 | movd xmm0, RD
3069 |.else
3070 | checknum RB, >3
3071 |1:
3072 | movsd xmm0, qword [KBASE+RD*8]
3073 |.endif
3074 |2:
3075 | ucomisd xmm0, qword [BASE+RA*8]
3076 |4:
3077 goto iseqne_fp;
3078 case BC_ISEQP: case BC_ISNEP:
3079 vk = op == BC_ISEQP;
3080 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3081 | mov RB, [BASE+RA*8]
3082 | sar RB, 47
3083 | add PC, 4
3084 | cmp RBd, RDd
3085 if (!LJ_HASFFI) goto iseqne_test;
3086 if (vk) {
3087 | jne >3
3088 | movzx RDd, PC_RD
3089 | branchPC RD
3090 |2:
3091 | ins_next
3092 |3:
3093 | cmp RBd, LJ_TCDATA; jne <2
3094 | jmp ->vmeta_equal_cd
3095 } else {
3096 | je >2
3097 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3098 | movzx RDd, PC_RD
3099 | branchPC RD
3100 |2:
3101 | ins_next
3102 }
3103 break;
3104
3105 /* -- Unary test and copy ops ------------------------------------------- */
3106
3107 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3108 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3109 | mov ITYPE, [BASE+RD*8]
3110 | add PC, 4
3111 if (op == BC_ISTC || op == BC_ISFC) {
3112 | mov RB, ITYPE
3113 }
3114 | sar ITYPE, 47
3115 | cmp ITYPEd, LJ_TISTRUECOND
3116 if (op == BC_IST || op == BC_ISTC) {
3117 | jae >1
3118 } else {
3119 | jb >1
3120 }
3121 if (op == BC_ISTC || op == BC_ISFC) {
3122 | mov [BASE+RA*8], RB
3123 }
3124 | movzx RDd, PC_RD
3125 | branchPC RD
3126 |1: // Fallthrough to the next instruction.
3127 | ins_next
3128 break;
3129
3130 case BC_ISTYPE:
3131 | ins_AD // RA = src, RD = -type
3132 | mov RB, [BASE+RA*8]
3133 | sar RB, 47
3134 | add RBd, RDd
3135 | jne ->vmeta_istype
3136 | ins_next
3137 break;
3138 case BC_ISNUM:
3139 | ins_AD // RA = src, RD = -(TISNUM-1)
3140 | checknumtp [BASE+RA*8], ->vmeta_istype
3141 | ins_next
3142 break;
3143
3144 /* -- Unary ops --------------------------------------------------------- */
3145
3146 case BC_MOV:
3147 | ins_AD // RA = dst, RD = src
3148 | mov RB, [BASE+RD*8]
3149 | mov [BASE+RA*8], RB
3150 | ins_next_
3151 break;
3152 case BC_NOT:
3153 | ins_AD // RA = dst, RD = src
3154 | mov RB, [BASE+RD*8]
3155 | sar RB, 47
3156 | mov RCd, 2
3157 | cmp RB, LJ_TISTRUECOND
3158 | sbb RCd, 0
3159 | shl RC, 47
3160 | not RC
3161 | mov [BASE+RA*8], RC
3162 | ins_next
3163 break;
3164 case BC_UNM:
3165 | ins_AD // RA = dst, RD = src
3166 | mov RB, [BASE+RD*8]
3167 |.if DUALNUM
3168 | checkint RB, >5
3169 | neg RBd
3170 | jo >4
3171 | setint RB
3172 |9:
3173 | mov [BASE+RA*8], RB
3174 | ins_next
3175 |4:
3176 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3177 | jmp <9
3178 |5:
3179 | ja ->vmeta_unm
3180 |.else
3181 | checknum RB, ->vmeta_unm
3182 |.endif
3183 | mov64 RD, U64x(80000000,00000000)
3184 | xor RB, RD
3185 |.if DUALNUM
3186 | jmp <9
3187 |.else
3188 | mov [BASE+RA*8], RB
3189 | ins_next
3190 |.endif
3191 break;
3192 case BC_LEN:
3193 | ins_AD // RA = dst, RD = src
3194 | mov RD, [BASE+RD*8]
3195 | checkstr RD, >2
3196 |.if DUALNUM
3197 | mov RDd, dword STR:RD->len
3198 |1:
3199 | setint RD
3200 | mov [BASE+RA*8], RD
3201 |.else
3202 | xorps xmm0, xmm0
3203 | cvtsi2sd xmm0, dword STR:RD->len
3204 |1:
3205 | movsd qword [BASE+RA*8], xmm0
3206 |.endif
3207 | ins_next
3208 |2:
3209 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3210 | mov TAB:CARG1, TAB:RD
3211#if LJ_52
3212 | mov TAB:RB, TAB:RD->metatable
3213 | cmp TAB:RB, 0
3214 | jnz >9
3215 |3:
3216#endif
3217 |->BC_LEN_Z:
3218 | mov RB, BASE // Save BASE.
3219 | call extern lj_tab_len // (GCtab *t)
3220 | // Length of table returned in eax (RD).
3221 |.if DUALNUM
3222 | // Nothing to do.
3223 |.else
3224 | cvtsi2sd xmm0, RDd
3225 |.endif
3226 | mov BASE, RB // Restore BASE.
3227 | movzx RAd, PC_RA
3228 | jmp <1
3229#if LJ_52
3230 |9: // Check for __len.
3231 | test byte TAB:RB->nomm, 1<<MM_len
3232 | jnz <3
3233 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3234#endif
3235 break;
3236
3237 /* -- Binary ops -------------------------------------------------------- */
3238
3239 |.macro ins_arithpre, sseins, ssereg
3240 | ins_ABC
3241 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3242 ||switch (vk) {
3243 ||case 0:
3244 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3245 | .if DUALNUM
3246 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3247 | .endif
3248 | movsd xmm0, qword [BASE+RB*8]
3249 | sseins ssereg, qword [KBASE+RC*8]
3250 || break;
3251 ||case 1:
3252 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3253 | .if DUALNUM
3254 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3255 | .endif
3256 | movsd xmm0, qword [KBASE+RC*8]
3257 | sseins ssereg, qword [BASE+RB*8]
3258 || break;
3259 ||default:
3260 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3261 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3262 | movsd xmm0, qword [BASE+RB*8]
3263 | sseins ssereg, qword [BASE+RC*8]
3264 || break;
3265 ||}
3266 |.endmacro
3267 |
3268 |.macro ins_arithdn, intins
3269 | ins_ABC
3270 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3271 ||switch (vk) {
3272 ||case 0:
3273 | mov RB, [BASE+RB*8]
3274 | mov RC, [KBASE+RC*8]
3275 | checkint RB, ->vmeta_arith_vno
3276 | checkint RC, ->vmeta_arith_vno
3277 | intins RBd, RCd; jo ->vmeta_arith_vno
3278 || break;
3279 ||case 1:
3280 | mov RB, [BASE+RB*8]
3281 | mov RC, [KBASE+RC*8]
3282 | checkint RB, ->vmeta_arith_nvo
3283 | checkint RC, ->vmeta_arith_nvo
3284 | intins RCd, RBd; jo ->vmeta_arith_nvo
3285 || break;
3286 ||default:
3287 | mov RB, [BASE+RB*8]
3288 | mov RC, [BASE+RC*8]
3289 | checkint RB, ->vmeta_arith_vvo
3290 | checkint RC, ->vmeta_arith_vvo
3291 | intins RBd, RCd; jo ->vmeta_arith_vvo
3292 || break;
3293 ||}
3294 ||if (vk == 1) {
3295 | setint RC
3296 | mov [BASE+RA*8], RC
3297 ||} else {
3298 | setint RB
3299 | mov [BASE+RA*8], RB
3300 ||}
3301 | ins_next
3302 |.endmacro
3303 |
3304 |.macro ins_arithpost
3305 | movsd qword [BASE+RA*8], xmm0
3306 |.endmacro
3307 |
3308 |.macro ins_arith, sseins
3309 | ins_arithpre sseins, xmm0
3310 | ins_arithpost
3311 | ins_next
3312 |.endmacro
3313 |
3314 |.macro ins_arith, intins, sseins
3315 |.if DUALNUM
3316 | ins_arithdn intins
3317 |.else
3318 | ins_arith, sseins
3319 |.endif
3320 |.endmacro
3321
3322 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3323 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3324 | ins_arith add, addsd
3325 break;
3326 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3327 | ins_arith sub, subsd
3328 break;
3329 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3330 | ins_arith imul, mulsd
3331 break;
3332 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3333 | ins_arith divsd
3334 break;
3335 case BC_MODVN:
3336 | ins_arithpre movsd, xmm1
3337 |->BC_MODVN_Z:
3338 | call ->vm_mod
3339 | ins_arithpost
3340 | ins_next
3341 break;
3342 case BC_MODNV: case BC_MODVV:
3343 | ins_arithpre movsd, xmm1
3344 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3345 break;
3346 case BC_POW:
3347 | ins_arithpre movsd, xmm1
3348 | mov RB, BASE
3349 | call extern pow
3350 | movzx RAd, PC_RA
3351 | mov BASE, RB
3352 | ins_arithpost
3353 | ins_next
3354 break;
3355
3356 case BC_CAT:
3357 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3358 | mov L:CARG1, SAVE_L
3359 | mov L:CARG1->base, BASE
3360 | lea CARG2, [BASE+RC*8]
3361 | mov CARG3d, RCd
3362 | sub CARG3d, RBd
3363 |->BC_CAT_Z:
3364 | mov L:RB, L:CARG1
3365 | mov SAVE_PC, PC
3366 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3367 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3368 | mov BASE, L:RB->base
3369 | test RC, RC
3370 | jnz ->vmeta_binop
3371 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3372 | movzx RAd, PC_RA
3373 | mov RC, [BASE+RB*8]
3374 | mov [BASE+RA*8], RC
3375 | ins_next
3376 break;
3377
3378 /* -- Constant ops ------------------------------------------------------ */
3379
3380 case BC_KSTR:
3381 | ins_AND // RA = dst, RD = str const (~)
3382 | mov RD, [KBASE+RD*8]
3383 | settp RD, LJ_TSTR
3384 | mov [BASE+RA*8], RD
3385 | ins_next
3386 break;
3387 case BC_KCDATA:
3388 |.if FFI
3389 | ins_AND // RA = dst, RD = cdata const (~)
3390 | mov RD, [KBASE+RD*8]
3391 | settp RD, LJ_TCDATA
3392 | mov [BASE+RA*8], RD
3393 | ins_next
3394 |.endif
3395 break;
3396 case BC_KSHORT:
3397 | ins_AD // RA = dst, RD = signed int16 literal
3398 |.if DUALNUM
3399 | movsx RDd, RDW
3400 | setint RD
3401 | mov [BASE+RA*8], RD
3402 |.else
3403 | movsx RDd, RDW // Sign-extend literal.
3404 | cvtsi2sd xmm0, RDd
3405 | movsd qword [BASE+RA*8], xmm0
3406 |.endif
3407 | ins_next
3408 break;
3409 case BC_KNUM:
3410 | ins_AD // RA = dst, RD = num const
3411 | movsd xmm0, qword [KBASE+RD*8]
3412 | movsd qword [BASE+RA*8], xmm0
3413 | ins_next
3414 break;
3415 case BC_KPRI:
3416 | ins_AD // RA = dst, RD = primitive type (~)
3417 | shl RD, 47
3418 | not RD
3419 | mov [BASE+RA*8], RD
3420 | ins_next
3421 break;
3422 case BC_KNIL:
3423 | ins_AD // RA = dst_start, RD = dst_end
3424 | lea RA, [BASE+RA*8+8]
3425 | lea RD, [BASE+RD*8]
3426 | mov RB, LJ_TNIL
3427 | mov [RA-8], RB // Sets minimum 2 slots.
3428 |1:
3429 | mov [RA], RB
3430 | add RA, 8
3431 | cmp RA, RD
3432 | jbe <1
3433 | ins_next
3434 break;
3435
3436 /* -- Upvalue and function ops ------------------------------------------ */
3437
3438 case BC_UGET:
3439 | ins_AD // RA = dst, RD = upvalue #
3440 | mov LFUNC:RB, [BASE-16]
3441 | cleartp LFUNC:RB
3442 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3443 | mov RB, UPVAL:RB->v
3444 | mov RD, [RB]
3445 | mov [BASE+RA*8], RD
3446 | ins_next
3447 break;
3448 case BC_USETV:
3449#define TV2MARKOFS \
3450 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3451 | ins_AD // RA = upvalue #, RD = src
3452 | mov LFUNC:RB, [BASE-16]
3453 | cleartp LFUNC:RB
3454 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3455 | cmp byte UPVAL:RB->closed, 0
3456 | mov RB, UPVAL:RB->v
3457 | mov RA, [BASE+RD*8]
3458 | mov [RB], RA
3459 | jz >1
3460 | // Check barrier for closed upvalue.
3461 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3462 | jnz >2
3463 |1:
3464 | ins_next
3465 |
3466 |2: // Upvalue is black. Check if new value is collectable and white.
3467 | mov RD, RA
3468 | sar RD, 47
3469 | sub RDd, LJ_TISGCV
3470 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3471 | jbe <1
3472 | cleartp GCOBJ:RA
3473 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3474 | jz <1
3475 | // Crossed a write barrier. Move the barrier forward.
3476 |.if not X64WIN
3477 | mov CARG2, RB
3478 | mov RB, BASE // Save BASE.
3479 |.else
3480 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3481 |.endif
3482 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3483 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3484 | mov BASE, RB // Restore BASE.
3485 | jmp <1
3486 break;
3487#undef TV2MARKOFS
3488 case BC_USETS:
3489 | ins_AND // RA = upvalue #, RD = str const (~)
3490 | mov LFUNC:RB, [BASE-16]
3491 | cleartp LFUNC:RB
3492 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3493 | mov STR:RA, [KBASE+RD*8]
3494 | mov RD, UPVAL:RB->v
3495 | settp STR:ITYPE, STR:RA, LJ_TSTR
3496 | mov [RD], STR:ITYPE
3497 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3498 | jnz >2
3499 |1:
3500 | ins_next
3501 |
3502 |2: // Check if string is white and ensure upvalue is closed.
3503 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3504 | jz <1
3505 | cmp byte UPVAL:RB->closed, 0
3506 | jz <1
3507 | // Crossed a write barrier. Move the barrier forward.
3508 | mov RB, BASE // Save BASE (CARG2 == BASE).
3509 | mov CARG2, RD
3510 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3511 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3512 | mov BASE, RB // Restore BASE.
3513 | jmp <1
3514 break;
3515 case BC_USETN:
3516 | ins_AD // RA = upvalue #, RD = num const
3517 | mov LFUNC:RB, [BASE-16]
3518 | cleartp LFUNC:RB
3519 | movsd xmm0, qword [KBASE+RD*8]
3520 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3521 | mov RA, UPVAL:RB->v
3522 | movsd qword [RA], xmm0
3523 | ins_next
3524 break;
3525 case BC_USETP:
3526 | ins_AD // RA = upvalue #, RD = primitive type (~)
3527 | mov LFUNC:RB, [BASE-16]
3528 | cleartp LFUNC:RB
3529 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3530 | shl RD, 47
3531 | not RD
3532 | mov RA, UPVAL:RB->v
3533 | mov [RA], RD
3534 | ins_next
3535 break;
3536 case BC_UCLO:
3537 | ins_AD // RA = level, RD = target
3538 | branchPC RD // Do this first to free RD.
3539 | mov L:RB, SAVE_L
3540 | cmp aword L:RB->openupval, 0
3541 | je >1
3542 | mov L:RB->base, BASE
3543 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3544 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3545 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3546 | mov BASE, L:RB->base
3547 |1:
3548 | ins_next
3549 break;
3550
3551 case BC_FNEW:
3552 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3553 | mov L:RB, SAVE_L
3554 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3555 | mov CARG3, [BASE-16]
3556 | cleartp CARG3
3557 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3558 | mov CARG1, L:RB
3559 | mov SAVE_PC, PC
3560 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3561 | call extern lj_func_newL_gc
3562 | // GCfuncL * returned in eax (RC).
3563 | mov BASE, L:RB->base
3564 | movzx RAd, PC_RA
3565 | settp LFUNC:RC, LJ_TFUNC
3566 | mov [BASE+RA*8], LFUNC:RC
3567 | ins_next
3568 break;
3569
3570 /* -- Table ops --------------------------------------------------------- */
3571
3572 case BC_TNEW:
3573 | ins_AD // RA = dst, RD = hbits|asize
3574 | mov L:RB, SAVE_L
3575 | mov L:RB->base, BASE
3576 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3577 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3578 | mov SAVE_PC, PC
3579 | jae >5
3580 |1:
3581 | mov CARG3d, RDd
3582 | and RDd, 0x7ff
3583 | shr CARG3d, 11
3584 | cmp RDd, 0x7ff
3585 | je >3
3586 |2:
3587 | mov L:CARG1, L:RB
3588 | mov CARG2d, RDd
3589 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3590 | // Table * returned in eax (RC).
3591 | mov BASE, L:RB->base
3592 | movzx RAd, PC_RA
3593 | settp TAB:RC, LJ_TTAB
3594 | mov [BASE+RA*8], TAB:RC
3595 | ins_next
3596 |3: // Turn 0x7ff into 0x801.
3597 | mov RDd, 0x801
3598 | jmp <2
3599 |5:
3600 | mov L:CARG1, L:RB
3601 | call extern lj_gc_step_fixtop // (lua_State *L)
3602 | movzx RDd, PC_RD
3603 | jmp <1
3604 break;
3605 case BC_TDUP:
3606 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3607 | mov L:RB, SAVE_L
3608 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3609 | mov SAVE_PC, PC
3610 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3611 | mov L:RB->base, BASE
3612 | jae >3
3613 |2:
3614 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3615 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3616 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3617 | // Table * returned in eax (RC).
3618 | mov BASE, L:RB->base
3619 | movzx RAd, PC_RA
3620 | settp TAB:RC, LJ_TTAB
3621 | mov [BASE+RA*8], TAB:RC
3622 | ins_next
3623 |3:
3624 | mov L:CARG1, L:RB
3625 | call extern lj_gc_step_fixtop // (lua_State *L)
3626 | movzx RDd, PC_RD // Need to reload RD.
3627 | not RD
3628 | jmp <2
3629 break;
3630
3631 case BC_GGET:
3632 | ins_AND // RA = dst, RD = str const (~)
3633 | mov LFUNC:RB, [BASE-16]
3634 | cleartp LFUNC:RB
3635 | mov TAB:RB, LFUNC:RB->env
3636 | mov STR:RC, [KBASE+RD*8]
3637 | jmp ->BC_TGETS_Z
3638 break;
3639 case BC_GSET:
3640 | ins_AND // RA = src, RD = str const (~)
3641 | mov LFUNC:RB, [BASE-16]
3642 | cleartp LFUNC:RB
3643 | mov TAB:RB, LFUNC:RB->env
3644 | mov STR:RC, [KBASE+RD*8]
3645 | jmp ->BC_TSETS_Z
3646 break;
3647
3648 case BC_TGETV:
3649 | ins_ABC // RA = dst, RB = table, RC = key
3650 | mov TAB:RB, [BASE+RB*8]
3651 | mov RC, [BASE+RC*8]
3652 | checktab TAB:RB, ->vmeta_tgetv
3653 |
3654 | // Integer key?
3655 |.if DUALNUM
3656 | checkint RC, >5
3657 |.else
3658 | // Convert number to int and back and compare.
3659 | checknum RC, >5
3660 | movd xmm0, RC
3661 | cvttsd2si RCd, xmm0
3662 | cvtsi2sd xmm1, RCd
3663 | ucomisd xmm0, xmm1
3664 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3665 |.endif
3666 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3667 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3668 | shl RCd, 3
3669 | add RC, TAB:RB->array
3670 | // Get array slot.
3671 | mov ITYPE, [RC]
3672 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3673 | je >2
3674 |1:
3675 | mov [BASE+RA*8], ITYPE
3676 | ins_next
3677 |
3678 |2: // Check for __index if table value is nil.
3679 | mov TAB:TMPR, TAB:RB->metatable
3680 | test TAB:TMPR, TAB:TMPR
3681 | jz <1
3682 | test byte TAB:TMPR->nomm, 1<<MM_index
3683 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3684 | jmp <1
3685 |
3686 |5: // String key?
3687 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3688 | cleartp STR:RC
3689 | jmp ->BC_TGETS_Z
3690 break;
3691 case BC_TGETS:
3692 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3693 | mov TAB:RB, [BASE+RB*8]
3694 | not RC
3695 | mov STR:RC, [KBASE+RC*8]
3696 | checktab TAB:RB, ->vmeta_tgets
3697 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3698 | mov TMPRd, TAB:RB->hmask
3699 | and TMPRd, STR:RC->sid
3700 | imul TMPRd, #NODE
3701 | add NODE:TMPR, TAB:RB->node
3702 | settp ITYPE, STR:RC, LJ_TSTR
3703 |1:
3704 | cmp NODE:TMPR->key, ITYPE
3705 | jne >4
3706 | // Get node value.
3707 | mov ITYPE, NODE:TMPR->val
3708 | cmp ITYPE, LJ_TNIL
3709 | je >5 // Key found, but nil value?
3710 |2:
3711 | mov [BASE+RA*8], ITYPE
3712 | ins_next
3713 |
3714 |4: // Follow hash chain.
3715 | mov NODE:TMPR, NODE:TMPR->next
3716 | test NODE:TMPR, NODE:TMPR
3717 | jnz <1
3718 | // End of hash chain: key not found, nil result.
3719 | mov ITYPE, LJ_TNIL
3720 |
3721 |5: // Check for __index if table value is nil.
3722 | mov TAB:TMPR, TAB:RB->metatable
3723 | test TAB:TMPR, TAB:TMPR
3724 | jz <2 // No metatable: done.
3725 | test byte TAB:TMPR->nomm, 1<<MM_index
3726 | jnz <2 // 'no __index' flag set: done.
3727 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3728 break;
3729 case BC_TGETB:
3730 | ins_ABC // RA = dst, RB = table, RC = byte literal
3731 | mov TAB:RB, [BASE+RB*8]
3732 | checktab TAB:RB, ->vmeta_tgetb
3733 | cmp RCd, TAB:RB->asize
3734 | jae ->vmeta_tgetb
3735 | shl RCd, 3
3736 | add RC, TAB:RB->array
3737 | // Get array slot.
3738 | mov ITYPE, [RC]
3739 | cmp ITYPE, LJ_TNIL
3740 | je >2
3741 |1:
3742 | mov [BASE+RA*8], ITYPE
3743 | ins_next
3744 |
3745 |2: // Check for __index if table value is nil.
3746 | mov TAB:TMPR, TAB:RB->metatable
3747 | test TAB:TMPR, TAB:TMPR
3748 | jz <1
3749 | test byte TAB:TMPR->nomm, 1<<MM_index
3750 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3751 | jmp <1
3752 break;
3753 case BC_TGETR:
3754 | ins_ABC // RA = dst, RB = table, RC = key
3755 | mov TAB:RB, [BASE+RB*8]
3756 | cleartp TAB:RB
3757 |.if DUALNUM
3758 | mov RCd, dword [BASE+RC*8]
3759 |.else
3760 | cvttsd2si RCd, qword [BASE+RC*8]
3761 |.endif
3762 | cmp RCd, TAB:RB->asize
3763 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3764 | shl RCd, 3
3765 | add RC, TAB:RB->array
3766 | // Get array slot.
3767 |->BC_TGETR_Z:
3768 | mov ITYPE, [RC]
3769 |->BC_TGETR2_Z:
3770 | mov [BASE+RA*8], ITYPE
3771 | ins_next
3772 break;
3773
3774 case BC_TSETV:
3775 | ins_ABC // RA = src, RB = table, RC = key
3776 | mov TAB:RB, [BASE+RB*8]
3777 | mov RC, [BASE+RC*8]
3778 | checktab TAB:RB, ->vmeta_tsetv
3779 |
3780 | // Integer key?
3781 |.if DUALNUM
3782 | checkint RC, >5
3783 |.else
3784 | // Convert number to int and back and compare.
3785 | checknum RC, >5
3786 | movd xmm0, RC
3787 | cvttsd2si RCd, xmm0
3788 | cvtsi2sd xmm1, RCd
3789 | ucomisd xmm0, xmm1
3790 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3791 |.endif
3792 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3793 | jae ->vmeta_tsetv
3794 | shl RCd, 3
3795 | add RC, TAB:RB->array
3796 | cmp aword [RC], LJ_TNIL
3797 | je >3 // Previous value is nil?
3798 |1:
3799 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3800 | jnz >7
3801 |2: // Set array slot.
3802 | mov RB, [BASE+RA*8]
3803 | mov [RC], RB
3804 | ins_next
3805 |
3806 |3: // Check for __newindex if previous value is nil.
3807 | mov TAB:TMPR, TAB:RB->metatable
3808 | test TAB:TMPR, TAB:TMPR
3809 | jz <1
3810 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3811 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3812 | jmp <1
3813 |
3814 |5: // String key?
3815 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3816 | cleartp STR:RC
3817 | jmp ->BC_TSETS_Z
3818 |
3819 |7: // Possible table write barrier for the value. Skip valiswhite check.
3820 | barrierback TAB:RB, TMPR
3821 | jmp <2
3822 break;
3823 case BC_TSETS:
3824 | ins_ABC // RA = src, RB = table, RC = str const (~)
3825 | mov TAB:RB, [BASE+RB*8]
3826 | not RC
3827 | mov STR:RC, [KBASE+RC*8]
3828 | checktab TAB:RB, ->vmeta_tsets
3829 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3830 | mov TMPRd, TAB:RB->hmask
3831 | and TMPRd, STR:RC->sid
3832 | imul TMPRd, #NODE
3833 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3834 | add NODE:TMPR, TAB:RB->node
3835 | settp ITYPE, STR:RC, LJ_TSTR
3836 |1:
3837 | cmp NODE:TMPR->key, ITYPE
3838 | jne >5
3839 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3840 | cmp aword [TMPR], LJ_TNIL
3841 | je >4 // Previous value is nil?
3842 |2:
3843 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3844 | jnz >7
3845 |3: // Set node value.
3846 | mov ITYPE, [BASE+RA*8]
3847 | mov [TMPR], ITYPE
3848 | ins_next
3849 |
3850 |4: // Check for __newindex if previous value is nil.
3851 | mov TAB:ITYPE, TAB:RB->metatable
3852 | test TAB:ITYPE, TAB:ITYPE
3853 | jz <2
3854 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3855 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3856 | jmp <2
3857 |
3858 |5: // Follow hash chain.
3859 | mov NODE:TMPR, NODE:TMPR->next
3860 | test NODE:TMPR, NODE:TMPR
3861 | jnz <1
3862 | // End of hash chain: key not found, add a new one.
3863 |
3864 | // But check for __newindex first.
3865 | mov TAB:TMPR, TAB:RB->metatable
3866 | test TAB:TMPR, TAB:TMPR
3867 | jz >6 // No metatable: continue.
3868 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3869 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3870 |6:
3871 | mov TMP1, ITYPE
3872 | mov L:CARG1, SAVE_L
3873 | mov L:CARG1->base, BASE
3874 | lea CARG3, TMP1
3875 | mov CARG2, TAB:RB
3876 | mov SAVE_PC, PC
3877 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3878 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3879 | mov L:CARG1, SAVE_L
3880 | mov BASE, L:CARG1->base
3881 | mov TMPR, rax
3882 | movzx RAd, PC_RA
3883 | jmp <2 // Must check write barrier for value.
3884 |
3885 |7: // Possible table write barrier for the value. Skip valiswhite check.
3886 | barrierback TAB:RB, ITYPE
3887 | jmp <3
3888 break;
3889 case BC_TSETB:
3890 | ins_ABC // RA = src, RB = table, RC = byte literal
3891 | mov TAB:RB, [BASE+RB*8]
3892 | checktab TAB:RB, ->vmeta_tsetb
3893 | cmp RCd, TAB:RB->asize
3894 | jae ->vmeta_tsetb
3895 | shl RCd, 3
3896 | add RC, TAB:RB->array
3897 | cmp aword [RC], LJ_TNIL
3898 | je >3 // Previous value is nil?
3899 |1:
3900 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3901 | jnz >7
3902 |2: // Set array slot.
3903 | mov ITYPE, [BASE+RA*8]
3904 | mov [RC], ITYPE
3905 | ins_next
3906 |
3907 |3: // Check for __newindex if previous value is nil.
3908 | mov TAB:TMPR, TAB:RB->metatable
3909 | test TAB:TMPR, TAB:TMPR
3910 | jz <1
3911 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3912 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3913 | jmp <1
3914 |
3915 |7: // Possible table write barrier for the value. Skip valiswhite check.
3916 | barrierback TAB:RB, TMPR
3917 | jmp <2
3918 break;
3919 case BC_TSETR:
3920 | ins_ABC // RA = src, RB = table, RC = key
3921 | mov TAB:RB, [BASE+RB*8]
3922 | cleartp TAB:RB
3923 |.if DUALNUM
3924 | mov RC, [BASE+RC*8]
3925 |.else
3926 | cvttsd2si RCd, qword [BASE+RC*8]
3927 |.endif
3928 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3929 | jnz >7
3930 |2:
3931 | cmp RCd, TAB:RB->asize
3932 | jae ->vmeta_tsetr
3933 | shl RCd, 3
3934 | add RC, TAB:RB->array
3935 | // Set array slot.
3936 |->BC_TSETR_Z:
3937 | mov ITYPE, [BASE+RA*8]
3938 | mov [RC], ITYPE
3939 | ins_next
3940 |
3941 |7: // Possible table write barrier for the value. Skip valiswhite check.
3942 | barrierback TAB:RB, TMPR
3943 | jmp <2
3944 break;
3945
3946 case BC_TSETM:
3947 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3948 |1:
3949 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3950 | lea RA, [BASE+RA*8]
3951 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3952 | cleartp TAB:RB
3953 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3954 | jnz >7
3955 |2:
3956 | mov RDd, MULTRES
3957 | sub RDd, 1
3958 | jz >4 // Nothing to copy?
3959 | add RDd, TMPRd // Compute needed size.
3960 | cmp RDd, TAB:RB->asize
3961 | ja >5 // Doesn't fit into array part?
3962 | sub RDd, TMPRd
3963 | shl TMPRd, 3
3964 | add TMPR, TAB:RB->array
3965 |3: // Copy result slots to table.
3966 | mov RB, [RA]
3967 | add RA, 8
3968 | mov [TMPR], RB
3969 | add TMPR, 8
3970 | sub RDd, 1
3971 | jnz <3
3972 |4:
3973 | ins_next
3974 |
3975 |5: // Need to resize array part.
3976 | mov L:CARG1, SAVE_L
3977 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3978 | mov CARG2, TAB:RB
3979 | mov CARG3d, RDd
3980 | mov L:RB, L:CARG1
3981 | mov SAVE_PC, PC
3982 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3983 | mov BASE, L:RB->base
3984 | movzx RAd, PC_RA // Restore RA.
3985 | movzx RDd, PC_RD // Restore RD.
3986 | jmp <1 // Retry.
3987 |
3988 |7: // Possible table write barrier for any value. Skip valiswhite check.
3989 | barrierback TAB:RB, RD
3990 | jmp <2
3991 break;
3992
3993 /* -- Calls and vararg handling ----------------------------------------- */
3994
3995 case BC_CALL: case BC_CALLM:
3996 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3997 if (op == BC_CALLM) {
3998 | add NARGS:RDd, MULTRES
3999 }
4000 | mov LFUNC:RB, [BASE+RA*8]
4001 | checkfunc LFUNC:RB, ->vmeta_call_ra
4002 | lea BASE, [BASE+RA*8+16]
4003 | ins_call
4004 break;
4005
4006 case BC_CALLMT:
4007 | ins_AD // RA = base, RD = extra_nargs
4008 | add NARGS:RDd, MULTRES
4009 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
4010 break;
4011 case BC_CALLT:
4012 | ins_AD // RA = base, RD = nargs+1
4013 | lea RA, [BASE+RA*8+16]
4014 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
4015 | mov LFUNC:RB, [RA-16]
4016 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
4017 |->BC_CALLT_Z:
4018 | mov PC, [BASE-8]
4019 | test PCd, FRAME_TYPE
4020 | jnz >7
4021 |1:
4022 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4023 | mov MULTRES, NARGS:RDd
4024 | sub NARGS:RDd, 1
4025 | jz >3
4026 |2: // Move args down.
4027 | mov RB, [RA]
4028 | add RA, 8
4029 | mov [KBASE], RB
4030 | add KBASE, 8
4031 | sub NARGS:RDd, 1
4032 | jnz <2
4033 |
4034 | mov LFUNC:RB, [BASE-16]
4035 |3:
4036 | cleartp LFUNC:RB
4037 | mov NARGS:RDd, MULTRES
4038 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4039 | ja >5
4040 |4:
4041 | ins_callt
4042 |
4043 |5: // Tailcall to a fast function.
4044 | test PCd, FRAME_TYPE // Lua frame below?
4045 | jnz <4
4046 | movzx RAd, PC_RA
4047 | neg RA
4048 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4049 | cleartp LFUNC:KBASE
4050 | mov KBASE, LFUNC:KBASE->pc
4051 | mov KBASE, [KBASE+PC2PROTO(k)]
4052 | jmp <4
4053 |
4054 |7: // Tailcall from a vararg function.
4055 | sub PC, FRAME_VARG
4056 | test PCd, FRAME_TYPEP
4057 | jnz >8 // Vararg frame below?
4058 | sub BASE, PC // Need to relocate BASE/KBASE down.
4059 | mov KBASE, BASE
4060 | mov PC, [BASE-8]
4061 | jmp <1
4062 |8:
4063 | add PCd, FRAME_VARG
4064 | jmp <1
4065 break;
4066
4067 case BC_ITERC:
4068 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4069 | lea RA, [BASE+RA*8+16] // fb = base+2
4070 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4071 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4072 | mov [RA], RB
4073 | mov [RA+8], RC
4074 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
4075 | mov [RA-16], LFUNC:RB
4076 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4077 | checkfunc LFUNC:RB, ->vmeta_call
4078 | mov BASE, RA
4079 | ins_call
4080 break;
4081
4082 case BC_ITERN:
4083 |.if JIT
4084 | hotloop RBd
4085 |.endif
4086 |->vm_IITERN:
4087 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4088 | mov TAB:RB, [BASE+RA*8-16]
4089 | cleartp TAB:RB
4090 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4091 | mov TMPRd, TAB:RB->asize
4092 | add PC, 4
4093 | mov ITYPE, TAB:RB->array
4094 |1: // Traverse array part.
4095 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4096 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4097 |.if not DUALNUM
4098 | cvtsi2sd xmm0, RCd
4099 |.endif
4100 | // Copy array slot to returned value.
4101 | mov RB, [ITYPE+RC*8]
4102 | mov [BASE+RA*8+8], RB
4103 | // Return array index as a numeric key.
4104 |.if DUALNUM
4105 | setint ITYPE, RC
4106 | mov [BASE+RA*8], ITYPE
4107 |.else
4108 | movsd qword [BASE+RA*8], xmm0
4109 |.endif
4110 | add RCd, 1
4111 | mov [BASE+RA*8-8], RCd // Update control var.
4112 |2:
4113 | movzx RDd, PC_RD // Get target from ITERL.
4114 | branchPC RD
4115 |3:
4116 | ins_next
4117 |
4118 |4: // Skip holes in array part.
4119 | add RCd, 1
4120 | jmp <1
4121 |
4122 |5: // Traverse hash part.
4123 | sub RCd, TMPRd
4124 |6:
4125 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4126 | imul ITYPEd, RCd, #NODE
4127 | add NODE:ITYPE, TAB:RB->node
4128 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4129 | lea TMPRd, [RCd+TMPRd+1]
4130 | // Copy key and value from hash slot.
4131 | mov RB, NODE:ITYPE->key
4132 | mov RC, NODE:ITYPE->val
4133 | mov [BASE+RA*8], RB
4134 | mov [BASE+RA*8+8], RC
4135 | mov [BASE+RA*8-8], TMPRd
4136 | jmp <2
4137 |
4138 |7: // Skip holes in hash part.
4139 | add RCd, 1
4140 | jmp <6
4141 break;
4142
4143 case BC_ISNEXT:
4144 | ins_AD // RA = base, RD = target (points to ITERN)
4145 | mov CFUNC:RB, [BASE+RA*8-24]
4146 | checkfunc CFUNC:RB, >5
4147 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4148 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4149 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4150 | branchPC RD
4151 | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32)
4152 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4153 |1:
4154 | ins_next
4155 |5: // Despecialize bytecode if any of the checks fail.
4156 | mov PC_OP, BC_JMP
4157 | branchPC RD
4158 |.if JIT
4159 | cmp byte [PC], BC_ITERN
4160 | jne >6
4161 |.endif
4162 | mov byte [PC], BC_ITERC
4163 | jmp <1
4164 |.if JIT
4165 |6: // Unpatch JLOOP.
4166 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4167 | movzx RCd, word [PC+2]
4168 | mov TRACE:RA, [RA+RC*8]
4169 | mov eax, TRACE:RA->startins
4170 | mov al, BC_ITERC
4171 | mov dword [PC], eax
4172 | jmp <1
4173 |.endif
4174 break;
4175
4176 case BC_VARG:
4177 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4178 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4179 | lea RA, [BASE+RA*8]
4180 | sub TMPR, [BASE-8]
4181 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4182 | test RB, RB
4183 | jz >5 // Copy all varargs?
4184 | lea RB, [RA+RB*8-8]
4185 | cmp TMPR, BASE // No vararg slots?
4186 | jnb >2
4187 |1: // Copy vararg slots to destination slots.
4188 | mov RC, [TMPR-16]
4189 | add TMPR, 8
4190 | mov [RA], RC
4191 | add RA, 8
4192 | cmp RA, RB // All destination slots filled?
4193 | jnb >3
4194 | cmp TMPR, BASE // No more vararg slots?
4195 | jb <1
4196 |2: // Fill up remainder with nil.
4197 | mov aword [RA], LJ_TNIL
4198 | add RA, 8
4199 | cmp RA, RB
4200 | jb <2
4201 |3:
4202 | ins_next
4203 |
4204 |5: // Copy all varargs.
4205 | mov MULTRES, 1 // MULTRES = 0+1
4206 | mov RC, BASE
4207 | sub RC, TMPR
4208 | jbe <3 // No vararg slots?
4209 | mov RBd, RCd
4210 | shr RBd, 3
4211 | add RBd, 1
4212 | mov MULTRES, RBd // MULTRES = #varargs+1
4213 | mov L:RB, SAVE_L
4214 | add RC, RA
4215 | cmp RC, L:RB->maxstack
4216 | ja >7 // Need to grow stack?
4217 |6: // Copy all vararg slots.
4218 | mov RC, [TMPR-16]
4219 | add TMPR, 8
4220 | mov [RA], RC
4221 | add RA, 8
4222 | cmp TMPR, BASE // No more vararg slots?
4223 | jb <6
4224 | jmp <3
4225 |
4226 |7: // Grow stack for varargs.
4227 | mov L:RB->base, BASE
4228 | mov L:RB->top, RA
4229 | mov SAVE_PC, PC
4230 | sub TMPR, BASE // Need delta, because BASE may change.
4231 | mov TMP1hi, TMPRd
4232 | mov CARG2d, MULTRES
4233 | sub CARG2d, 1
4234 | mov CARG1, L:RB
4235 | call extern lj_state_growstack // (lua_State *L, int n)
4236 | mov BASE, L:RB->base
4237 | movsxd TMPR, TMP1hi
4238 | mov RA, L:RB->top
4239 | add TMPR, BASE
4240 | jmp <6
4241 break;
4242
4243 /* -- Returns ----------------------------------------------------------- */
4244
4245 case BC_RETM:
4246 | ins_AD // RA = results, RD = extra_nresults
4247 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4248 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4249 break;
4250
4251 case BC_RET: case BC_RET0: case BC_RET1:
4252 | ins_AD // RA = results, RD = nresults+1
4253 if (op != BC_RET0) {
4254 | shl RAd, 3
4255 }
4256 |1:
4257 | mov PC, [BASE-8]
4258 | mov MULTRES, RDd // Save nresults+1.
4259 | test PCd, FRAME_TYPE // Check frame type marker.
4260 | jnz >7 // Not returning to a fixarg Lua func?
4261 switch (op) {
4262 case BC_RET:
4263 |->BC_RET_Z:
4264 | mov KBASE, BASE // Use KBASE for result move.
4265 | sub RDd, 1
4266 | jz >3
4267 |2: // Move results down.
4268 | mov RB, [KBASE+RA]
4269 | mov [KBASE-16], RB
4270 | add KBASE, 8
4271 | sub RDd, 1
4272 | jnz <2
4273 |3:
4274 | mov RDd, MULTRES // Note: MULTRES may be >255.
4275 | movzx RBd, PC_RB // So cannot compare with RDL!
4276 |5:
4277 | cmp RBd, RDd // More results expected?
4278 | ja >6
4279 break;
4280 case BC_RET1:
4281 | mov RB, [BASE+RA]
4282 | mov [BASE-16], RB
4283 /* fallthrough */
4284 case BC_RET0:
4285 |5:
4286 | cmp PC_RB, RDL // More results expected?
4287 | ja >6
4288 default:
4289 break;
4290 }
4291 | movzx RAd, PC_RA
4292 | neg RA
4293 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4294 | mov LFUNC:KBASE, [BASE-16]
4295 | cleartp LFUNC:KBASE
4296 | mov KBASE, LFUNC:KBASE->pc
4297 | mov KBASE, [KBASE+PC2PROTO(k)]
4298 | ins_next
4299 |
4300 |6: // Fill up results with nil.
4301 if (op == BC_RET) {
4302 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4303 | add KBASE, 8
4304 } else {
4305 | mov aword [BASE+RD*8-24], LJ_TNIL
4306 }
4307 | add RD, 1
4308 | jmp <5
4309 |
4310 |7: // Non-standard return case.
4311 | lea RB, [PC-FRAME_VARG]
4312 | test RBd, FRAME_TYPEP
4313 | jnz ->vm_return
4314 | // Return from vararg function: relocate BASE down and RA up.
4315 | sub BASE, RB
4316 if (op != BC_RET0) {
4317 | add RA, RB
4318 }
4319 | jmp <1
4320 break;
4321
4322 /* -- Loops and branches ------------------------------------------------ */
4323
4324 |.define FOR_IDX, [RA]
4325 |.define FOR_STOP, [RA+8]
4326 |.define FOR_STEP, [RA+16]
4327 |.define FOR_EXT, [RA+24]
4328
4329 case BC_FORL:
4330 |.if JIT
4331 | hotloop RBd
4332 |.endif
4333 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4334 break;
4335
4336 case BC_JFORI:
4337 case BC_JFORL:
4338#if !LJ_HASJIT
4339 break;
4340#endif
4341 case BC_FORI:
4342 case BC_IFORL:
4343 vk = (op == BC_IFORL || op == BC_JFORL);
4344 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4345 | lea RA, [BASE+RA*8]
4346 if (LJ_DUALNUM) {
4347 | mov RB, FOR_IDX
4348 | checkint RB, >9
4349 | mov TMPR, FOR_STOP
4350 if (!vk) {
4351 | checkint TMPR, ->vmeta_for
4352 | mov ITYPE, FOR_STEP
4353 | test ITYPEd, ITYPEd; js >5
4354 | sar ITYPE, 47;
4355 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4356 } else {
4357#ifdef LUA_USE_ASSERT
4358 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4359 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4360#endif
4361 | mov ITYPE, FOR_STEP
4362 | test ITYPEd, ITYPEd; js >5
4363 | add RBd, ITYPEd; jo >1
4364 | setint RB
4365 | mov FOR_IDX, RB
4366 }
4367 | cmp RBd, TMPRd
4368 | mov FOR_EXT, RB
4369 if (op == BC_FORI) {
4370 | jle >7
4371 |1:
4372 |6:
4373 | branchPC RD
4374 } else if (op == BC_JFORI) {
4375 | branchPC RD
4376 | movzx RDd, PC_RD
4377 | jle =>BC_JLOOP
4378 |1:
4379 |6:
4380 } else if (op == BC_IFORL) {
4381 | jg >7
4382 |6:
4383 | branchPC RD
4384 |1:
4385 } else {
4386 | jle =>BC_JLOOP
4387 |1:
4388 |6:
4389 }
4390 |7:
4391 | ins_next
4392 |
4393 |5: // Invert check for negative step.
4394 if (!vk) {
4395 | sar ITYPE, 47;
4396 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4397 } else {
4398 | add RBd, ITYPEd; jo <1
4399 | setint RB
4400 | mov FOR_IDX, RB
4401 }
4402 | cmp RBd, TMPRd
4403 | mov FOR_EXT, RB
4404 if (op == BC_FORI) {
4405 | jge <7
4406 } else if (op == BC_JFORI) {
4407 | branchPC RD
4408 | movzx RDd, PC_RD
4409 | jge =>BC_JLOOP
4410 } else if (op == BC_IFORL) {
4411 | jl <7
4412 } else {
4413 | jge =>BC_JLOOP
4414 }
4415 | jmp <6
4416 |9: // Fallback to FP variant.
4417 if (!vk) {
4418 | jae ->vmeta_for
4419 }
4420 } else if (!vk) {
4421 | checknumtp FOR_IDX, ->vmeta_for
4422 }
4423 if (!vk) {
4424 | checknumtp FOR_STOP, ->vmeta_for
4425 } else {
4426#ifdef LUA_USE_ASSERT
4427 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4428 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4429#endif
4430 }
4431 | mov RB, FOR_STEP
4432 if (!vk) {
4433 | checknum RB, ->vmeta_for
4434 }
4435 | movsd xmm0, qword FOR_IDX
4436 | movsd xmm1, qword FOR_STOP
4437 if (vk) {
4438 | addsd xmm0, qword FOR_STEP
4439 | movsd qword FOR_IDX, xmm0
4440 | test RB, RB; js >3
4441 } else {
4442 | jl >3
4443 }
4444 | ucomisd xmm1, xmm0
4445 |1:
4446 | movsd qword FOR_EXT, xmm0
4447 if (op == BC_FORI) {
4448 |.if DUALNUM
4449 | jnb <7
4450 |.else
4451 | jnb >2
4452 | branchPC RD
4453 |.endif
4454 } else if (op == BC_JFORI) {
4455 | branchPC RD
4456 | movzx RDd, PC_RD
4457 | jnb =>BC_JLOOP
4458 } else if (op == BC_IFORL) {
4459 |.if DUALNUM
4460 | jb <7
4461 |.else
4462 | jb >2
4463 | branchPC RD
4464 |.endif
4465 } else {
4466 | jnb =>BC_JLOOP
4467 }
4468 |.if DUALNUM
4469 | jmp <6
4470 |.else
4471 |2:
4472 | ins_next
4473 |.endif
4474 |
4475 |3: // Invert comparison if step is negative.
4476 | ucomisd xmm0, xmm1
4477 | jmp <1
4478 break;
4479
4480 case BC_ITERL:
4481 |.if JIT
4482 | hotloop RBd
4483 |.endif
4484 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4485 break;
4486
4487 case BC_JITERL:
4488#if !LJ_HASJIT
4489 break;
4490#endif
4491 case BC_IITERL:
4492 | ins_AJ // RA = base, RD = target
4493 | lea RA, [BASE+RA*8]
4494 | mov RB, [RA]
4495 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4496 if (op == BC_JITERL) {
4497 | mov [RA-8], RB
4498 | jmp =>BC_JLOOP
4499 } else {
4500 | branchPC RD // Otherwise save control var + branch.
4501 | mov [RA-8], RB
4502 }
4503 |1:
4504 | ins_next
4505 break;
4506
4507 case BC_LOOP:
4508 | ins_A // RA = base, RD = target (loop extent)
4509 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4510 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4511 |.if JIT
4512 | hotloop RBd
4513 |.endif
4514 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4515 break;
4516
4517 case BC_ILOOP:
4518 | ins_A // RA = base, RD = target (loop extent)
4519 | ins_next
4520 break;
4521
4522 case BC_JLOOP:
4523 |.if JIT
4524 | ins_AD // RA = base (ignored), RD = traceno
4525 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4526 | mov TRACE:RD, [RA+RD*8]
4527 | mov RD, TRACE:RD->mcode
4528 | mov L:RB, SAVE_L
4529 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4530 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4531 | // Save additional callee-save registers only used in compiled code.
4532 |.if X64WIN
4533 | mov CSAVE_4, r12
4534 | mov CSAVE_3, r13
4535 | mov CSAVE_2, r14
4536 | mov CSAVE_1, r15
4537 | mov RA, rsp
4538 | sub rsp, 10*16+4*8
4539 | movdqa [RA-1*16], xmm6
4540 | movdqa [RA-2*16], xmm7
4541 | movdqa [RA-3*16], xmm8
4542 | movdqa [RA-4*16], xmm9
4543 | movdqa [RA-5*16], xmm10
4544 | movdqa [RA-6*16], xmm11
4545 | movdqa [RA-7*16], xmm12
4546 | movdqa [RA-8*16], xmm13
4547 | movdqa [RA-9*16], xmm14
4548 | movdqa [RA-10*16], xmm15
4549 |.else
4550 | sub rsp, 16
4551 | mov [rsp+16], r12
4552 | mov [rsp+8], r13
4553 |.endif
4554 | jmp RD
4555 |.endif
4556 break;
4557
4558 case BC_JMP:
4559 | ins_AJ // RA = unused, RD = target
4560 | branchPC RD
4561 | ins_next
4562 break;
4563
4564 /* -- Function headers -------------------------------------------------- */
4565
4566 /*
4567 ** Reminder: A function may be called with func/args above L->maxstack,
4568 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4569 ** too. This means all FUNC* ops (including fast functions) must check
4570 ** for stack overflow _before_ adding more slots!
4571 */
4572
4573 case BC_FUNCF:
4574 |.if JIT
4575 | hotcall RBd
4576 |.endif
4577 case BC_FUNCV: /* NYI: compiled vararg functions. */
4578 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4579 break;
4580
4581 case BC_JFUNCF:
4582#if !LJ_HASJIT
4583 break;
4584#endif
4585 case BC_IFUNCF:
4586 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4587 | mov KBASE, [PC-4+PC2PROTO(k)]
4588 | mov L:RB, SAVE_L
4589 | lea RA, [BASE+RA*8] // Top of frame.
4590 | cmp RA, L:RB->maxstack
4591 | ja ->vm_growstack_f
4592 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4593 | cmp NARGS:RDd, RAd // Check for missing parameters.
4594 | jbe >3
4595 |2:
4596 if (op == BC_JFUNCF) {
4597 | movzx RDd, PC_RD
4598 | jmp =>BC_JLOOP
4599 } else {
4600 | ins_next
4601 }
4602 |
4603 |3: // Clear missing parameters.
4604 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4605 | add NARGS:RDd, 1
4606 | cmp NARGS:RDd, RAd
4607 | jbe <3
4608 | jmp <2
4609 break;
4610
4611 case BC_JFUNCV:
4612#if !LJ_HASJIT
4613 break;
4614#endif
4615 | int3 // NYI: compiled vararg functions
4616 break; /* NYI: compiled vararg functions. */
4617
4618 case BC_IFUNCV:
4619 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4620 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4621 | lea RD, [BASE+NARGS:RD*8+8]
4622 | mov LFUNC:KBASE, [BASE-16]
4623 | mov [RD-8], RB // Store delta + FRAME_VARG.
4624 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4625 | mov L:RB, SAVE_L
4626 | lea RA, [RD+RA*8]
4627 | cmp RA, L:RB->maxstack
4628 | ja ->vm_growstack_v // Need to grow stack.
4629 | mov RA, BASE
4630 | mov BASE, RD
4631 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4632 | test RBd, RBd
4633 | jz >2
4634 | add RA, 8
4635 |1: // Copy fixarg slots up to new frame.
4636 | add RA, 8
4637 | cmp RA, BASE
4638 | jnb >3 // Less args than parameters?
4639 | mov KBASE, [RA-16]
4640 | mov [RD], KBASE
4641 | add RD, 8
4642 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4643 | sub RBd, 1
4644 | jnz <1
4645 |2:
4646 if (op == BC_JFUNCV) {
4647 | movzx RDd, PC_RD
4648 | jmp =>BC_JLOOP
4649 } else {
4650 | mov KBASE, [PC-4+PC2PROTO(k)]
4651 | ins_next
4652 }
4653 |
4654 |3: // Clear missing parameters.
4655 | mov aword [RD], LJ_TNIL
4656 | add RD, 8
4657 | sub RBd, 1
4658 | jnz <3
4659 | jmp <2
4660 break;
4661
4662 case BC_FUNCC:
4663 case BC_FUNCCW:
4664 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4665 | mov CFUNC:RB, [BASE-16]
4666 | cleartp CFUNC:RB
4667 | mov KBASE, CFUNC:RB->f
4668 | mov L:RB, SAVE_L
4669 | lea RD, [BASE+NARGS:RD*8-8]
4670 | mov L:RB->base, BASE
4671 | lea RA, [RD+8*LUA_MINSTACK]
4672 | cmp RA, L:RB->maxstack
4673 | mov L:RB->top, RD
4674 if (op == BC_FUNCC) {
4675 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4676 } else {
4677 | mov CARG2, KBASE
4678 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4679 }
4680 | ja ->vm_growstack_c // Need to grow stack.
4681 | set_vmstate C
4682 if (op == BC_FUNCC) {
4683 | call KBASE // (lua_State *L)
4684 } else {
4685 | // (lua_State *L, lua_CFunction f)
4686 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4687 }
4688 | // nresults returned in eax (RD).
4689 | mov BASE, L:RB->base
4690 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4691 | set_vmstate INTERP
4692 | lea RA, [BASE+RD*8]
4693 | neg RA
4694 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4695 | mov PC, [BASE-8] // Fetch PC of caller.
4696 | jmp ->vm_returnc
4697 break;
4698
4699 /* ---------------------------------------------------------------------- */
4700
4701 default:
4702 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4703 exit(2);
4704 break;
4705 }
4706}
4707
4708static int build_backend(BuildCtx *ctx)
4709{
4710 int op;
4711 dasm_growpc(Dst, BC__MAX);
4712 build_subroutines(ctx);
4713 |.code_op
4714 for (op = 0; op < BC__MAX; op++)
4715 build_ins(ctx, (BCOp)op, op);
4716 return BC__MAX;
4717}
4718
4719/* Emit pseudo frame-info for all assembler functions. */
4720static void emit_asm_debug(BuildCtx *ctx)
4721{
4722 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4723 switch (ctx->mode) {
4724 case BUILD_elfasm:
4725 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4726 fprintf(ctx->fp,
4727 ".Lframe0:\n"
4728 "\t.long .LECIE0-.LSCIE0\n"
4729 ".LSCIE0:\n"
4730 "\t.long 0xffffffff\n"
4731 "\t.byte 0x1\n"
4732 "\t.string \"\"\n"
4733 "\t.uleb128 0x1\n"
4734 "\t.sleb128 -8\n"
4735 "\t.byte 0x10\n"
4736 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4737 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4738 "\t.align 8\n"
4739 ".LECIE0:\n\n");
4740 fprintf(ctx->fp,
4741 ".LSFDE0:\n"
4742 "\t.long .LEFDE0-.LASFDE0\n"
4743 ".LASFDE0:\n"
4744 "\t.long .Lframe0\n"
4745 "\t.quad .Lbegin\n"
4746 "\t.quad %d\n"
4747 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4748 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4749 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4750 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4751 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4752#if LJ_NO_UNWIND
4753 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4754 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4755#endif
4756 "\t.align 8\n"
4757 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4758#if LJ_HASFFI
4759 fprintf(ctx->fp,
4760 ".LSFDE1:\n"
4761 "\t.long .LEFDE1-.LASFDE1\n"
4762 ".LASFDE1:\n"
4763 "\t.long .Lframe0\n"
4764 "\t.quad lj_vm_ffi_call\n"
4765 "\t.quad %d\n"
4766 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4767 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4768 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4769 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4770 "\t.align 8\n"
4771 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4772#endif
4773#if !LJ_NO_UNWIND
4774#if LJ_TARGET_SOLARIS
4775 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4776#else
4777 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4778#endif
4779 fprintf(ctx->fp,
4780 ".Lframe1:\n"
4781 "\t.long .LECIE1-.LSCIE1\n"
4782 ".LSCIE1:\n"
4783 "\t.long 0\n"
4784 "\t.byte 0x1\n"
4785 "\t.string \"zPR\"\n"
4786 "\t.uleb128 0x1\n"
4787 "\t.sleb128 -8\n"
4788 "\t.byte 0x10\n"
4789 "\t.uleb128 6\n" /* augmentation length */
4790 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4791 "\t.long lj_err_unwind_dwarf-.\n"
4792 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4793 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4794 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4795 "\t.align 8\n"
4796 ".LECIE1:\n\n");
4797 fprintf(ctx->fp,
4798 ".LSFDE2:\n"
4799 "\t.long .LEFDE2-.LASFDE2\n"
4800 ".LASFDE2:\n"
4801 "\t.long .LASFDE2-.Lframe1\n"
4802 "\t.long .Lbegin-.\n"
4803 "\t.long %d\n"
4804 "\t.uleb128 0\n" /* augmentation length */
4805 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4806 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4807 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4808 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4809 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4810 "\t.align 8\n"
4811 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4812#if LJ_HASFFI
4813 fprintf(ctx->fp,
4814 ".Lframe2:\n"
4815 "\t.long .LECIE2-.LSCIE2\n"
4816 ".LSCIE2:\n"
4817 "\t.long 0\n"
4818 "\t.byte 0x1\n"
4819 "\t.string \"zR\"\n"
4820 "\t.uleb128 0x1\n"
4821 "\t.sleb128 -8\n"
4822 "\t.byte 0x10\n"
4823 "\t.uleb128 1\n" /* augmentation length */
4824 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4825 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4826 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4827 "\t.align 8\n"
4828 ".LECIE2:\n\n");
4829 fprintf(ctx->fp,
4830 ".LSFDE3:\n"
4831 "\t.long .LEFDE3-.LASFDE3\n"
4832 ".LASFDE3:\n"
4833 "\t.long .LASFDE3-.Lframe2\n"
4834 "\t.long lj_vm_ffi_call-.\n"
4835 "\t.long %d\n"
4836 "\t.uleb128 0\n" /* augmentation length */
4837 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4838 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4839 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4840 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4841 "\t.align 8\n"
4842 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4843#endif
4844#endif
4845 break;
4846#if !LJ_NO_UNWIND
4847 /* Mental note: never let Apple design an assembler.
4848 ** Or a linker. Or a plastic case. But I digress.
4849 */
4850 case BUILD_machasm: {
4851#if LJ_HASFFI
4852 int fcsize = 0;
4853#endif
4854 int i;
4855 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4856 fprintf(ctx->fp,
4857 "EH_frame1:\n"
4858 "\t.set L$set$x,LECIEX-LSCIEX\n"
4859 "\t.long L$set$x\n"
4860 "LSCIEX:\n"
4861 "\t.long 0\n"
4862 "\t.byte 0x1\n"
4863 "\t.ascii \"zPR\\0\"\n"
4864 "\t.byte 0x1\n"
4865 "\t.byte 128-8\n"
4866 "\t.byte 0x10\n"
4867 "\t.byte 6\n" /* augmentation length */
4868 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4869 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4870 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4871 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4872 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4873 "\t.align 3\n"
4874 "LECIEX:\n\n");
4875 for (i = 0; i < ctx->nsym; i++) {
4876 const char *name = ctx->sym[i].name;
4877 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4878 if (size == 0) continue;
4879#if LJ_HASFFI
4880 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4881#endif
4882 fprintf(ctx->fp,
4883 "%s.eh:\n"
4884 "LSFDE%d:\n"
4885 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4886 "\t.long L$set$%d\n"
4887 "LASFDE%d:\n"
4888 "\t.long LASFDE%d-EH_frame1\n"
4889 "\t.long %s-.\n"
4890 "\t.long %d\n"
4891 "\t.byte 0\n" /* augmentation length */
4892 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4893 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4894 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4895 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4896 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4897 "\t.align 3\n"
4898 "LEFDE%d:\n\n",
4899 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4900 }
4901#if LJ_HASFFI
4902 if (fcsize) {
4903 fprintf(ctx->fp,
4904 "EH_frame2:\n"
4905 "\t.set L$set$y,LECIEY-LSCIEY\n"
4906 "\t.long L$set$y\n"
4907 "LSCIEY:\n"
4908 "\t.long 0\n"
4909 "\t.byte 0x1\n"
4910 "\t.ascii \"zR\\0\"\n"
4911 "\t.byte 0x1\n"
4912 "\t.byte 128-8\n"
4913 "\t.byte 0x10\n"
4914 "\t.byte 1\n" /* augmentation length */
4915 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4916 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4917 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4918 "\t.align 3\n"
4919 "LECIEY:\n\n");
4920 fprintf(ctx->fp,
4921 "_lj_vm_ffi_call.eh:\n"
4922 "LSFDEY:\n"
4923 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4924 "\t.long L$set$yy\n"
4925 "LASFDEY:\n"
4926 "\t.long LASFDEY-EH_frame2\n"
4927 "\t.long _lj_vm_ffi_call-.\n"
4928 "\t.long %d\n"
4929 "\t.byte 0\n" /* augmentation length */
4930 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4931 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4932 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4933 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4934 "\t.align 3\n"
4935 "LEFDEY:\n\n", fcsize);
4936 }
4937#endif
4938 fprintf(ctx->fp, ".subsections_via_symbols\n");
4939 }
4940 break;
4941#endif
4942 default: /* Difficult for other modes. */
4943 break;
4944 }
4945}
4946