aboutsummaryrefslogtreecommitdiff
path: root/src/vm_x64.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm_x64.dasc')
-rw-r--r--src/vm_x64.dasc4971
1 files changed, 4971 insertions, 0 deletions
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..b222190a
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4971 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146|.if NO_UNWIND
147| push r13; push r12
148|.endif
149| sub rsp, CFRAME_SPACE
150|.endmacro
151|.macro saveregs
152| push rbp; saveregs_
153|.endmacro
154|.macro restoreregs
155| add rsp, CFRAME_SPACE
156|.if NO_UNWIND
157| pop r12; pop r13
158|.endif
159| pop r14; pop r15; pop rbx; pop rbp
160|.endmacro
161|
162|//----- 16 byte aligned,
163|.if NO_UNWIND
164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165|.define SAVE_R4, aword [rsp+aword*10]
166|.define SAVE_R3, aword [rsp+aword*9]
167|.define SAVE_R2, aword [rsp+aword*8]
168|.define SAVE_R1, aword [rsp+aword*7]
169|.define SAVE_RU2, aword [rsp+aword*6]
170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171|.else
172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173|.define SAVE_R4, aword [rsp+aword*8]
174|.define SAVE_R3, aword [rsp+aword*7]
175|.define SAVE_R2, aword [rsp+aword*6]
176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177|.endif
178|.define SAVE_CFRAME, aword [rsp+aword*4]
179|.define SAVE_PC, aword [rsp+aword*3]
180|.define SAVE_L, aword [rsp+aword*2]
181|.define SAVE_ERRF, dword [rsp+dword*3]
182|.define SAVE_NRES, dword [rsp+dword*2]
183|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184|//----- 16 byte aligned
185|
186|.define TMP1d, dword [rsp]
187|.define TMP1hi, dword [rsp+dword*1]
188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189|
190|.endif
191|
192|//-----------------------------------------------------------------------
193|
194|// Instruction headers.
195|.macro ins_A; .endmacro
196|.macro ins_AD; .endmacro
197|.macro ins_AJ; .endmacro
198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199|.macro ins_AB_; movzx RBd, RCH; .endmacro
200|.macro ins_A_C; movzx RCd, RCL; .endmacro
201|.macro ins_AND; not RD; .endmacro
202|
203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204|.macro ins_NEXT
205| mov RCd, [PC]
206| movzx RAd, RCH
207| movzx OP, RCL
208| add PC, 4
209| shr RCd, 16
210| jmp aword [DISPATCH+OP*8]
211|.endmacro
212|
213|// Instruction footer.
214|.if 1
215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216| .define ins_next, ins_NEXT
217| .define ins_next_, ins_NEXT
218|.else
219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220| // Affects only certain kinds of benchmarks (and only with -j off).
221| // Around 10%-30% slower on Core2, a lot more slower on P4.
222| .macro ins_next
223| jmp ->ins_next
224| .endmacro
225| .macro ins_next_
226| ->ins_next:
227| ins_NEXT
228| .endmacro
229|.endif
230|
231|// Call decode and dispatch.
232|.macro ins_callt
233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234| mov PC, LFUNC:RB->pc
235| mov RAd, [PC]
236| movzx OP, RAL
237| movzx RAd, RAH
238| add PC, 4
239| jmp aword [DISPATCH+OP*8]
240|.endmacro
241|
242|.macro ins_call
243| // BASE = new base, RB = LFUNC, RD = nargs+1
244| mov [BASE-8], PC
245| ins_callt
246|.endmacro
247|
248|//-----------------------------------------------------------------------
249|
250|// Macros to clear or set tags.
251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252|.macro settp, reg, tp
253| mov64 ITYPE, ((uint64_t)tp<<47)
254| or reg, ITYPE
255|.endmacro
256|.macro settp, dst, reg, tp
257| mov64 dst, ((uint64_t)tp<<47)
258| or dst, reg
259|.endmacro
260|.macro setint, reg
261| settp reg, LJ_TISNUM
262|.endmacro
263|.macro setint, dst, reg
264| settp dst, reg, LJ_TISNUM
265|.endmacro
266|
267|// Macros to test operand types.
268|.macro checktp_nc, reg, tp, target
269| mov ITYPE, reg
270| sar ITYPE, 47
271| cmp ITYPEd, tp
272| jne target
273|.endmacro
274|.macro checktp, reg, tp, target
275| mov ITYPE, reg
276| cleartp reg
277| sar ITYPE, 47
278| cmp ITYPEd, tp
279| jne target
280|.endmacro
281|.macro checktptp, src, tp, target
282| mov ITYPE, src
283| sar ITYPE, 47
284| cmp ITYPEd, tp
285| jne target
286|.endmacro
287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290|
291|.macro checknumx, reg, target, jump
292| mov ITYPE, reg
293| sar ITYPE, 47
294| cmp ITYPEd, LJ_TISNUM
295| jump target
296|.endmacro
297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302|
303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305|
306|// These operands must be used with movzx.
307|.define PC_OP, byte [PC-4]
308|.define PC_RA, byte [PC-3]
309|.define PC_RB, byte [PC-1]
310|.define PC_RC, byte [PC-2]
311|.define PC_RD, word [PC-2]
312|
313|.macro branchPC, reg
314| lea PC, [PC+reg*4-BCBIAS_J*4]
315|.endmacro
316|
317|// Assumes DISPATCH is relative to GL.
318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320|
321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322|
323|// Decrement hashed hotcount and trigger trace recorder if zero.
324|.macro hotloop, reg
325| mov reg, PCd
326| shr reg, 1
327| and reg, HOTCOUNT_PCMASK
328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329| jb ->vm_hotloop
330|.endmacro
331|
332|.macro hotcall, reg
333| mov reg, PCd
334| shr reg, 1
335| and reg, HOTCOUNT_PCMASK
336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337| jb ->vm_hotcall
338|.endmacro
339|
340|// Set current VM state.
341|.macro set_vmstate, st
342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343|.endmacro
344|
345|.macro fpop1; fstp st1; .endmacro
346|
347|// Synthesize SSE FP constants.
348|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350|.endmacro
351|
352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353| mov64 tmp, U64x(val,00000000); movd reg, tmp
354|.endmacro
355|
356|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357| sseconst_hi reg, tmp, 80000000
358|.endmacro
359|.macro sseconst_1, reg, tmp // Synthesize 1.0.
360| sseconst_hi reg, tmp, 3ff00000
361|.endmacro
362|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363| sseconst_hi reg, tmp, bff00000
364|.endmacro
365|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366| sseconst_hi reg, tmp, 43300000
367|.endmacro
368|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369| sseconst_hi reg, tmp, 43380000
370|.endmacro
371|
372|// Move table write barrier back. Overwrites reg.
373|.macro barrierback, tab, reg
374| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377| mov tab->gclist, reg
378|.endmacro
379|
380|//-----------------------------------------------------------------------
381
382/* Generate subroutines used by opcodes and other parts of the VM. */
383/* The .code_sub section should be last to help static branch prediction. */
384static void build_subroutines(BuildCtx *ctx)
385{
386 |.code_sub
387 |
388 |//-----------------------------------------------------------------------
389 |//-- Return handling ----------------------------------------------------
390 |//-----------------------------------------------------------------------
391 |
392 |->vm_returnp:
393 | test PCd, FRAME_P
394 | jz ->cont_dispatch
395 |
396 | // Return from pcall or xpcall fast func.
397 | and PC, -8
398 | sub BASE, PC // Restore caller base.
399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400 | mov PC, [BASE-8] // Fetch PC of previous frame.
401 | // Prepending may overwrite the pcall frame, so do it at the end.
402 | mov_true ITYPE
403 | mov aword [BASE+RA], ITYPE // Prepend true to results.
404 |
405 |->vm_returnc:
406 | add RDd, 1 // RD = nresults+1
407 | jz ->vm_unwind_yield
408 | mov MULTRES, RDd
409 | test PC, FRAME_TYPE
410 | jz ->BC_RET_Z // Handle regular return to Lua.
411 |
412 |->vm_return:
413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
414 | xor PC, FRAME_C
415 | test PCd, FRAME_TYPE
416 | jnz ->vm_returnp
417 |
418 | // Return to C.
419 | set_vmstate C
420 | and PC, -8
421 | sub PC, BASE
422 | neg PC // Previous base = BASE - delta.
423 |
424 | sub RDd, 1
425 | jz >2
426 |1: // Move results down.
427 | mov RB, [BASE+RA]
428 | mov [BASE-16], RB
429 | add BASE, 8
430 | sub RDd, 1
431 | jnz <1
432 |2:
433 | mov L:RB, SAVE_L
434 | mov L:RB->base, PC
435 |3:
436 | mov RDd, MULTRES
437 | mov RAd, SAVE_NRES // RA = wanted nresults+1
438 |4:
439 | cmp RAd, RDd
440 | jne >6 // More/less results wanted?
441 |5:
442 | sub BASE, 16
443 | mov L:RB->top, BASE
444 |
445 |->vm_leave_cp:
446 | mov RA, SAVE_CFRAME // Restore previous C frame.
447 | mov L:RB->cframe, RA
448 | xor eax, eax // Ok return status for vm_pcall.
449 |
450 |->vm_leave_unw:
451 | restoreregs
452 | ret
453 |
454 |6:
455 | jb >7 // Less results wanted?
456 | // More results wanted. Check stack size and fill up results with nil.
457 | cmp BASE, L:RB->maxstack
458 | ja >8
459 | mov aword [BASE-16], LJ_TNIL
460 | add BASE, 8
461 | add RDd, 1
462 | jmp <4
463 |
464 |7: // Less results wanted.
465 | test RAd, RAd
466 | jz <5 // But check for LUA_MULTRET+1.
467 | sub RA, RD // Negative result!
468 | lea BASE, [BASE+RA*8] // Correct top.
469 | jmp <5
470 |
471 |8: // Corner case: need to grow stack for filling up results.
472 | // This can happen if:
473 | // - A C function grows the stack (a lot).
474 | // - The GC shrinks the stack in between.
475 | // - A return back from a lua_call() with (high) nresults adjustment.
476 | mov L:RB->top, BASE // Save current top held in BASE (yes).
477 | mov MULTRES, RDd // Need to fill only remainder with nil.
478 | mov CARG2d, RAd
479 | mov CARG1, L:RB
480 | call extern lj_state_growstack // (lua_State *L, int n)
481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
482 | jmp <3
483 |
484 |->vm_unwind_yield:
485 | mov al, LUA_YIELD
486 | jmp ->vm_unwind_c_eh
487 |
488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489 | // (void *cframe, int errcode)
490 | mov eax, CARG2d // Error return status for vm_pcall.
491 | mov rsp, CARG1
492 |->vm_unwind_c_eh: // Landing pad for external unwinder.
493 | mov L:RB, SAVE_L
494 | mov GL:RB, L:RB->glref
495 | mov dword GL:RB->vmstate, ~LJ_VMST_C
496 | jmp ->vm_leave_unw
497 |
498 |->vm_unwind_rethrow:
499 |.if not X64WIN
500 | mov CARG1, SAVE_L
501 | mov CARG2d, eax
502 | restoreregs
503 | jmp extern lj_err_throw // (lua_State *L, int errcode)
504 |.endif
505 |
506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
507 | // (void *cframe)
508 | and CARG1, CFRAME_RAWMASK
509 | mov rsp, CARG1
510 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
511 | mov L:RB, SAVE_L
512 | mov RDd, 1+1 // Really 1+2 results, incr. later.
513 | mov BASE, L:RB->base
514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515 | add DISPATCH, GG_G2DISP
516 | mov PC, [BASE-8] // Fetch PC of previous frame.
517 | mov_false RA
518 | mov RB, [BASE]
519 | mov [BASE-16], RA // Prepend false to error message.
520 | mov [BASE-8], RB
521 | mov RA, -16 // Results start at BASE+RA = BASE-16.
522 | set_vmstate INTERP
523 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
524 |
525 |//-----------------------------------------------------------------------
526 |//-- Grow stack for calls -----------------------------------------------
527 |//-----------------------------------------------------------------------
528 |
529 |->vm_growstack_c: // Grow stack for C function.
530 | mov CARG2d, LUA_MINSTACK
531 | jmp >2
532 |
533 |->vm_growstack_v: // Grow stack for vararg Lua function.
534 | sub RD, 16 // LJ_FR2
535 | jmp >1
536 |
537 |->vm_growstack_f: // Grow stack for fixarg Lua function.
538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539 | lea RD, [BASE+NARGS:RD*8-8]
540 |1:
541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542 | add PC, 4 // Must point after first instruction.
543 | mov L:RB->base, BASE
544 | mov L:RB->top, RD
545 | mov SAVE_PC, PC
546 | mov CARG2, RA
547 |2:
548 | // RB = L, L->base = new base, L->top = top
549 | mov CARG1, L:RB
550 | call extern lj_state_growstack // (lua_State *L, int n)
551 | mov BASE, L:RB->base
552 | mov RD, L:RB->top
553 | mov LFUNC:RB, [BASE-16]
554 | cleartp LFUNC:RB
555 | sub RD, BASE
556 | shr RDd, 3
557 | add NARGS:RDd, 1
558 | // BASE = new base, RB = LFUNC, RD = nargs+1
559 | ins_callt // Just retry the call.
560 |
561 |//-----------------------------------------------------------------------
562 |//-- Entry points into the assembler VM ---------------------------------
563 |//-----------------------------------------------------------------------
564 |
565 |->vm_resume: // Setup C frame and resume thread.
566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
567 | saveregs
568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
569 | mov SAVE_L, CARG1
570 | mov RA, CARG2
571 | mov PCd, FRAME_CP
572 | xor RDd, RDd
573 | lea KBASE, [esp+CFRAME_RESUME]
574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575 | add DISPATCH, GG_G2DISP
576 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577 | mov SAVE_CFRAME, RD
578 | mov SAVE_NRES, RDd
579 | mov SAVE_ERRF, RDd
580 | mov L:RB->cframe, KBASE
581 | cmp byte L:RB->status, RDL
582 | je >2 // Initial resume (like a call).
583 |
584 | // Resume after yield (like a return).
585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
586 | set_vmstate INTERP
587 | mov byte L:RB->status, RDL
588 | mov BASE, L:RB->base
589 | mov RD, L:RB->top
590 | sub RD, RA
591 | shr RDd, 3
592 | add RDd, 1 // RD = nresults+1
593 | sub RA, BASE // RA = resultofs
594 | mov PC, [BASE-8]
595 | mov MULTRES, RDd
596 | test PCd, FRAME_TYPE
597 | jz ->BC_RET_Z
598 | jmp ->vm_return
599 |
600 |->vm_pcall: // Setup protected C frame and enter VM.
601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
602 | saveregs
603 | mov PCd, FRAME_CP
604 | mov SAVE_ERRF, CARG4d
605 | jmp >1
606 |
607 |->vm_call: // Setup C frame and enter VM.
608 | // (lua_State *L, TValue *base, int nres1)
609 | saveregs
610 | mov PCd, FRAME_C
611 |
612 |1: // Entry point for vm_pcall above (PC = ftype).
613 | mov SAVE_NRES, CARG3d
614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
615 | mov SAVE_L, CARG1
616 | mov RA, CARG2
617 |
618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620 | mov SAVE_CFRAME, KBASE
621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622 | add DISPATCH, GG_G2DISP
623 | mov L:RB->cframe, rsp
624 |
625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
627 | set_vmstate INTERP
628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
629 | add PC, RA
630 | sub PC, BASE // PC = frame delta + frame type
631 |
632 | mov RD, L:RB->top
633 | sub RD, RA
634 | shr NARGS:RDd, 3
635 | add NARGS:RDd, 1 // RD = nargs+1
636 |
637 |->vm_call_dispatch:
638 | mov LFUNC:RB, [RA-16]
639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
640 |
641 |->vm_call_dispatch_f:
642 | mov BASE, RA
643 | ins_call
644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
645 |
646 |->vm_cpcall: // Setup protected C frame, call C.
647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
648 | saveregs
649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
650 | mov SAVE_L, CARG1
651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
652 |
653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654 | sub KBASE, L:RB->top
655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656 | mov SAVE_ERRF, 0 // No error function.
657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658 | add DISPATCH, GG_G2DISP
659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
660 |
661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662 | mov SAVE_CFRAME, KBASE
663 | mov L:RB->cframe, rsp
664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
665 |
666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667 | // TValue * (new base) or NULL returned in eax (RC).
668 | test RC, RC
669 | jz ->vm_leave_cp // No base? Just remove C frame.
670 | mov RA, RC
671 | mov PCd, FRAME_CP
672 | jmp <2 // Else continue with the call.
673 |
674 |//-----------------------------------------------------------------------
675 |//-- Metamethod handling ------------------------------------------------
676 |//-----------------------------------------------------------------------
677 |
678 |//-- Continuation dispatch ----------------------------------------------
679 |
680 |->cont_dispatch:
681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
682 | add RA, BASE
683 | and PC, -8
684 | mov RB, BASE
685 | sub BASE, PC // Restore caller BASE.
686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687 | mov RC, RA // ... in [RC]
688 | mov PC, [RB-24] // Restore PC from [cont|PC].
689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
690 |.if FFI
691 | cmp RA, 1
692 | jbe >1
693 |.endif
694 | mov LFUNC:KBASE, [BASE-16]
695 | cleartp LFUNC:KBASE
696 | mov KBASE, LFUNC:KBASE->pc
697 | mov KBASE, [KBASE+PC2PROTO(k)]
698 | // BASE = base, RC = result, RB = meta base
699 | jmp RA // Jump to continuation.
700 |
701 |.if FFI
702 |1:
703 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704 | // cont = 0: Tail call from C function.
705 | sub RB, BASE
706 | shr RBd, 3
707 | lea RDd, [RBd-3]
708 | jmp ->vm_call_tail
709 |.endif
710 |
711 |->cont_cat: // BASE = base, RC = result, RB = mbase
712 | movzx RAd, PC_RB
713 | sub RB, 32
714 | lea RA, [BASE+RA*8]
715 | sub RA, RB
716 | je ->cont_ra
717 | neg RA
718 | shr RAd, 3
719 |.if X64WIN
720 | mov CARG3d, RAd
721 | mov L:CARG1, SAVE_L
722 | mov L:CARG1->base, BASE
723 | mov RC, [RC]
724 | mov [RB], RC
725 | mov CARG2, RB
726 |.else
727 | mov L:CARG1, SAVE_L
728 | mov L:CARG1->base, BASE
729 | mov CARG3d, RAd
730 | mov RA, [RC]
731 | mov [RB], RA
732 | mov CARG2, RB
733 |.endif
734 | jmp ->BC_CAT_Z
735 |
736 |//-- Table indexing metamethods -----------------------------------------
737 |
738 |->vmeta_tgets:
739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
740 | mov TMP1, STR:RC
741 | lea RC, TMP1
742 | cmp PC_OP, BC_GGET
743 | jne >1
744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
746 | mov [RB], TAB:RA
747 | jmp >2
748 |
749 |->vmeta_tgetb:
750 | movzx RCd, PC_RC
751 |.if DUALNUM
752 | setint RC
753 | mov TMP1, RC
754 |.else
755 | cvtsi2sd xmm0, RCd
756 | movsd TMP1, xmm0
757 |.endif
758 | lea RC, TMP1
759 | jmp >1
760 |
761 |->vmeta_tgetv:
762 | movzx RCd, PC_RC // Reload TValue *k from RC.
763 | lea RC, [BASE+RC*8]
764 |1:
765 | movzx RBd, PC_RB // Reload TValue *t from RB.
766 | lea RB, [BASE+RB*8]
767 |2:
768 | mov L:CARG1, SAVE_L
769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
770 | mov CARG2, RB
771 | mov CARG3, RC
772 | mov L:RB, L:CARG1
773 | mov SAVE_PC, PC
774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776 | mov BASE, L:RB->base
777 | test RC, RC
778 | jz >3
779 |->cont_ra: // BASE = base, RC = result
780 | movzx RAd, PC_RA
781 | mov RB, [RC]
782 | mov [BASE+RA*8], RB
783 | ins_next
784 |
785 |3: // Call __index metamethod.
786 | // BASE = base, L->top = new base, stack = cont/func/t/k
787 | mov RA, L:RB->top
788 | mov [RA-24], PC // [cont|PC]
789 | lea PC, [RA+FRAME_CONT]
790 | sub PC, BASE
791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
793 | cleartp LFUNC:RB
794 | jmp ->vm_call_dispatch_f
795 |
796 |->vmeta_tgetr:
797 | mov CARG1, TAB:RB
798 | mov RB, BASE // Save BASE.
799 | mov CARG2d, RCd // Caveat: CARG2 == BASE
800 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801 | // cTValue * or NULL returned in eax (RC).
802 | movzx RAd, PC_RA
803 | mov BASE, RB // Restore BASE.
804 | test RC, RC
805 | jnz ->BC_TGETR_Z
806 | mov ITYPE, LJ_TNIL
807 | jmp ->BC_TGETR2_Z
808 |
809 |//-----------------------------------------------------------------------
810 |
811 |->vmeta_tsets:
812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
813 | mov TMP1, STR:RC
814 | lea RC, TMP1
815 | cmp PC_OP, BC_GSET
816 | jne >1
817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
819 | mov [RB], TAB:RA
820 | jmp >2
821 |
822 |->vmeta_tsetb:
823 | movzx RCd, PC_RC
824 |.if DUALNUM
825 | setint RC
826 | mov TMP1, RC
827 |.else
828 | cvtsi2sd xmm0, RCd
829 | movsd TMP1, xmm0
830 |.endif
831 | lea RC, TMP1
832 | jmp >1
833 |
834 |->vmeta_tsetv:
835 | movzx RCd, PC_RC // Reload TValue *k from RC.
836 | lea RC, [BASE+RC*8]
837 |1:
838 | movzx RBd, PC_RB // Reload TValue *t from RB.
839 | lea RB, [BASE+RB*8]
840 |2:
841 | mov L:CARG1, SAVE_L
842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
843 | mov CARG2, RB
844 | mov CARG3, RC
845 | mov L:RB, L:CARG1
846 | mov SAVE_PC, PC
847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849 | mov BASE, L:RB->base
850 | test RC, RC
851 | jz >3
852 | // NOBARRIER: lj_meta_tset ensures the table is not black.
853 | movzx RAd, PC_RA
854 | mov RB, [BASE+RA*8]
855 | mov [RC], RB
856 |->cont_nop: // BASE = base, (RC = result)
857 | ins_next
858 |
859 |3: // Call __newindex metamethod.
860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
861 | mov RA, L:RB->top
862 | mov [RA-24], PC // [cont|PC]
863 | movzx RCd, PC_RA
864 | // Copy value to third argument.
865 | mov RB, [BASE+RC*8]
866 | mov [RA+16], RB
867 | lea PC, [RA+FRAME_CONT]
868 | sub PC, BASE
869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
871 | cleartp LFUNC:RB
872 | jmp ->vm_call_dispatch_f
873 |
874 |->vmeta_tsetr:
875 |.if X64WIN
876 | mov L:CARG1, SAVE_L
877 | mov CARG3d, RCd
878 | mov L:CARG1->base, BASE
879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
880 |.else
881 | mov L:CARG1, SAVE_L
882 | mov CARG2, TAB:RB
883 | mov L:CARG1->base, BASE
884 | mov RB, BASE // Save BASE.
885 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
886 |.endif
887 | mov SAVE_PC, PC
888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889 | // TValue * returned in eax (RC).
890 | movzx RAd, PC_RA
891 | mov BASE, RB // Restore BASE.
892 | jmp ->BC_TSETR_Z
893 |
894 |//-- Comparison metamethods ---------------------------------------------
895 |
896 |->vmeta_comp:
897 | movzx RDd, PC_RD
898 | movzx RAd, PC_RA
899 | mov L:RB, SAVE_L
900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
901 |.if X64WIN
902 | lea CARG3, [BASE+RD*8]
903 | lea CARG2, [BASE+RA*8]
904 |.else
905 | lea CARG2, [BASE+RA*8]
906 | lea CARG3, [BASE+RD*8]
907 |.endif
908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909 | movzx CARG4d, PC_OP
910 | mov SAVE_PC, PC
911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912 | // 0/1 or TValue * (metamethod) returned in eax (RC).
913 |3:
914 | mov BASE, L:RB->base
915 | cmp RC, 1
916 | ja ->vmeta_binop
917 |4:
918 | lea PC, [PC+4]
919 | jb >6
920 |5:
921 | movzx RDd, PC_RD
922 | branchPC RD
923 |6:
924 | ins_next
925 |
926 |->cont_condt: // BASE = base, RC = result
927 | add PC, 4
928 | mov ITYPE, [RC]
929 | sar ITYPE, 47
930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
931 | jb <5
932 | jmp <6
933 |
934 |->cont_condf: // BASE = base, RC = result
935 | mov ITYPE, [RC]
936 | sar ITYPE, 47
937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
938 | jmp <4
939 |
940 |->vmeta_equal:
941 | cleartp TAB:RD
942 | sub PC, 4
943 |.if X64WIN
944 | mov CARG3, RD
945 | mov CARG4d, RBd
946 | mov L:RB, SAVE_L
947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
948 | mov CARG2, RA
949 | mov CARG1, L:RB // Caveat: CARG1 == RA.
950 |.else
951 | mov CARG2, RA
952 | mov CARG4d, RBd // Caveat: CARG4 == RA.
953 | mov L:RB, SAVE_L
954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
955 | mov CARG3, RD
956 | mov CARG1, L:RB
957 |.endif
958 | mov SAVE_PC, PC
959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960 | // 0/1 or TValue * (metamethod) returned in eax (RC).
961 | jmp <3
962 |
963 |->vmeta_equal_cd:
964 |.if FFI
965 | sub PC, 4
966 | mov L:RB, SAVE_L
967 | mov L:RB->base, BASE
968 | mov CARG1, L:RB
969 | mov CARG2d, dword [PC-4]
970 | mov SAVE_PC, PC
971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972 | // 0/1 or TValue * (metamethod) returned in eax (RC).
973 | jmp <3
974 |.endif
975 |
976 |->vmeta_istype:
977 | mov L:RB, SAVE_L
978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
979 | mov CARG2d, RAd
980 | mov CARG3d, RDd
981 | mov L:CARG1, L:RB
982 | mov SAVE_PC, PC
983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984 | mov BASE, L:RB->base
985 | jmp <6
986 |
987 |//-- Arithmetic metamethods ---------------------------------------------
988 |
989 |->vmeta_arith_vno:
990 |.if DUALNUM
991 | movzx RBd, PC_RB
992 | movzx RCd, PC_RC
993 |.endif
994 |->vmeta_arith_vn:
995 | lea RC, [KBASE+RC*8]
996 | jmp >1
997 |
998 |->vmeta_arith_nvo:
999 |.if DUALNUM
1000 | movzx RBd, PC_RB
1001 | movzx RCd, PC_RC
1002 |.endif
1003 |->vmeta_arith_nv:
1004 | lea TMPR, [KBASE+RC*8]
1005 | lea RC, [BASE+RB*8]
1006 | mov RB, TMPR
1007 | jmp >2
1008 |
1009 |->vmeta_unm:
1010 | lea RC, [BASE+RD*8]
1011 | mov RB, RC
1012 | jmp >2
1013 |
1014 |->vmeta_arith_vvo:
1015 |.if DUALNUM
1016 | movzx RBd, PC_RB
1017 | movzx RCd, PC_RC
1018 |.endif
1019 |->vmeta_arith_vv:
1020 | lea RC, [BASE+RC*8]
1021 |1:
1022 | lea RB, [BASE+RB*8]
1023 |2:
1024 | lea RA, [BASE+RA*8]
1025 |.if X64WIN
1026 | mov CARG3, RB
1027 | mov CARG4, RC
1028 | movzx RCd, PC_OP
1029 | mov ARG5d, RCd
1030 | mov L:RB, SAVE_L
1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1032 | mov CARG2, RA
1033 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1034 |.else
1035 | movzx CARG5d, PC_OP
1036 | mov CARG2, RA
1037 | mov CARG4, RC // Caveat: CARG4 == RA.
1038 | mov L:CARG1, SAVE_L
1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1040 | mov CARG3, RB
1041 | mov L:RB, L:CARG1
1042 |.endif
1043 | mov SAVE_PC, PC
1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046 | mov BASE, L:RB->base
1047 | test RC, RC
1048 | jz ->cont_nop
1049 |
1050 | // Call metamethod for binary op.
1051 |->vmeta_binop:
1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1053 | mov RA, RC
1054 | sub RC, BASE
1055 | mov [RA-24], PC // [cont|PC]
1056 | lea PC, [RC+FRAME_CONT]
1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058 | jmp ->vm_call_dispatch
1059 |
1060 |->vmeta_len:
1061 | movzx RDd, PC_RD
1062 | mov L:RB, SAVE_L
1063 | mov L:RB->base, BASE
1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1065 | mov L:CARG1, L:RB
1066 | mov SAVE_PC, PC
1067 | call extern lj_meta_len // (lua_State *L, TValue *o)
1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069 | mov BASE, L:RB->base
1070#if LJ_52
1071 | test RC, RC
1072 | jne ->vmeta_binop // Binop call for compatibility.
1073 | movzx RDd, PC_RD
1074 | mov TAB:CARG1, [BASE+RD*8]
1075 | cleartp TAB:CARG1
1076 | jmp ->BC_LEN_Z
1077#else
1078 | jmp ->vmeta_binop // Binop call for compatibility.
1079#endif
1080 |
1081 |//-- Call metamethod ----------------------------------------------------
1082 |
1083 |->vmeta_call_ra:
1084 | lea RA, [BASE+RA*8+16]
1085 |->vmeta_call: // Resolve and call __call metamethod.
1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1088 | mov RB, RA
1089 |.if X64WIN
1090 | mov L:TMPR, SAVE_L
1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092 | lea CARG2, [RA-16]
1093 | lea CARG3, [RA+NARGS:RD*8-8]
1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1095 |.else
1096 | mov L:CARG1, SAVE_L
1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098 | lea CARG2, [RA-16]
1099 | lea CARG3, [RA+NARGS:RD*8-8]
1100 |.endif
1101 | mov SAVE_PC, PC
1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1103 | mov RA, RB
1104 | mov L:RB, SAVE_L
1105 | mov BASE, L:RB->base
1106 | mov NARGS:RDd, TMP1d
1107 | mov LFUNC:RB, [RA-16]
1108 | add NARGS:RDd, 1
1109 | // This is fragile. L->base must not move, KBASE must always be defined.
1110 | cmp KBASE, BASE // Continue with CALLT if flag set.
1111 | je ->BC_CALLT_Z
1112 | cleartp LFUNC:RB
1113 | mov BASE, RA
1114 | ins_call // Otherwise call resolved metamethod.
1115 |
1116 |//-- Argument coercion for 'for' statement ------------------------------
1117 |
1118 |->vmeta_for:
1119 | mov L:RB, SAVE_L
1120 | mov L:RB->base, BASE
1121 | mov CARG2, RA // Caveat: CARG2 == BASE
1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1123 | mov SAVE_PC, PC
1124 | call extern lj_meta_for // (lua_State *L, TValue *base)
1125 | mov BASE, L:RB->base
1126 | mov RCd, [PC-4]
1127 | movzx RAd, RCH
1128 | movzx OP, RCL
1129 | shr RCd, 16
1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1131 |
1132 |//-----------------------------------------------------------------------
1133 |//-- Fast functions -----------------------------------------------------
1134 |//-----------------------------------------------------------------------
1135 |
1136 |.macro .ffunc, name
1137 |->ff_ .. name:
1138 |.endmacro
1139 |
1140 |.macro .ffunc_1, name
1141 |->ff_ .. name:
1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1143 |.endmacro
1144 |
1145 |.macro .ffunc_2, name
1146 |->ff_ .. name:
1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1148 |.endmacro
1149 |
1150 |.macro .ffunc_n, name, op
1151 | .ffunc_1 name
1152 | checknumtp [BASE], ->fff_fallback
1153 | op xmm0, qword [BASE]
1154 |.endmacro
1155 |
1156 |.macro .ffunc_n, name
1157 | .ffunc_n name, movsd
1158 |.endmacro
1159 |
1160 |.macro .ffunc_nn, name
1161 | .ffunc_2 name
1162 | checknumtp [BASE], ->fff_fallback
1163 | checknumtp [BASE+8], ->fff_fallback
1164 | movsd xmm0, qword [BASE]
1165 | movsd xmm1, qword [BASE+8]
1166 |.endmacro
1167 |
1168 |// Inlined GC threshold check. Caveat: uses label 1.
1169 |.macro ffgccheck
1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1172 | jb >1
1173 | call ->fff_gcstep
1174 |1:
1175 |.endmacro
1176 |
1177 |//-- Base library: checks -----------------------------------------------
1178 |
1179 |.ffunc_1 assert
1180 | mov ITYPE, [BASE]
1181 | mov RB, ITYPE
1182 | sar ITYPE, 47
1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1184 | mov PC, [BASE-8]
1185 | mov MULTRES, RDd
1186 | mov RB, [BASE]
1187 | mov [BASE-16], RB
1188 | sub RDd, 2
1189 | jz >2
1190 | mov RA, BASE
1191 |1:
1192 | add RA, 8
1193 | mov RB, [RA]
1194 | mov [RA-16], RB
1195 | sub RDd, 1
1196 | jnz <1
1197 |2:
1198 | mov RDd, MULTRES
1199 | jmp ->fff_res_
1200 |
1201 |.ffunc_1 type
1202 | mov RC, [BASE]
1203 | sar RC, 47
1204 | mov RBd, LJ_TISNUM
1205 | cmp RCd, RBd
1206 | cmovb RCd, RBd
1207 | not RCd
1208 |2:
1209 | mov CFUNC:RB, [BASE-16]
1210 | cleartp CFUNC:RB
1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1212 | mov PC, [BASE-8]
1213 | settp STR:RC, LJ_TSTR
1214 | mov [BASE-16], STR:RC
1215 | jmp ->fff_res1
1216 |
1217 |//-- Base library: getters and setters ---------------------------------
1218 |
1219 |.ffunc_1 getmetatable
1220 | mov TAB:RB, [BASE]
1221 | mov PC, [BASE-8]
1222 | checktab TAB:RB, >6
1223 |1: // Field metatable must be at same offset for GCtab and GCudata!
1224 | mov TAB:RB, TAB:RB->metatable
1225 |2:
1226 | test TAB:RB, TAB:RB
1227 | mov aword [BASE-16], LJ_TNIL
1228 | jz ->fff_res1
1229 | settp TAB:RC, TAB:RB, LJ_TTAB
1230 | mov [BASE-16], TAB:RC // Store metatable as default result.
1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232 | mov RAd, TAB:RB->hmask
1233 | and RAd, STR:RC->sid
1234 | settp STR:RC, LJ_TSTR
1235 | imul RAd, #NODE
1236 | add NODE:RA, TAB:RB->node
1237 |3: // Rearranged logic, because we expect _not_ to find the key.
1238 | cmp NODE:RA->key, STR:RC
1239 | je >5
1240 |4:
1241 | mov NODE:RA, NODE:RA->next
1242 | test NODE:RA, NODE:RA
1243 | jnz <3
1244 | jmp ->fff_res1 // Not found, keep default result.
1245 |5:
1246 | mov RB, NODE:RA->val
1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248 | mov [BASE-16], RB // Return value of mt.__metatable.
1249 | jmp ->fff_res1
1250 |
1251 |6:
1252 | cmp ITYPEd, LJ_TUDATA; je <1
1253 | cmp ITYPEd, LJ_TISNUM; ja >7
1254 | mov ITYPEd, LJ_TISNUM
1255 |7:
1256 | not ITYPEd
1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1258 | jmp <2
1259 |
1260 |.ffunc_2 setmetatable
1261 | mov TAB:RB, [BASE]
1262 | mov TAB:TMPR, TAB:RB
1263 | checktab TAB:RB, ->fff_fallback
1264 | // Fast path: no mt for table yet and not clearing the mt.
1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266 | mov TAB:RA, [BASE+8]
1267 | checktab TAB:RA, ->fff_fallback
1268 | mov TAB:RB->metatable, TAB:RA
1269 | mov PC, [BASE-8]
1270 | mov [BASE-16], TAB:TMPR // Return original table.
1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1272 | jz >1
1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274 | barrierback TAB:RB, RC
1275 |1:
1276 | jmp ->fff_res1
1277 |
1278 |.ffunc_2 rawget
1279 |.if X64WIN
1280 | mov TAB:RA, [BASE]
1281 | checktab TAB:RA, ->fff_fallback
1282 | mov RB, BASE // Save BASE.
1283 | lea CARG3, [BASE+8]
1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1285 | mov CARG1, SAVE_L
1286 |.else
1287 | mov TAB:CARG2, [BASE]
1288 | checktab TAB:CARG2, ->fff_fallback
1289 | mov RB, BASE // Save BASE.
1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1291 | mov CARG1, SAVE_L
1292 |.endif
1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294 | // cTValue * returned in eax (RD).
1295 | mov BASE, RB // Restore BASE.
1296 | // Copy table slot.
1297 | mov RB, [RD]
1298 | mov PC, [BASE-8]
1299 | mov [BASE-16], RB
1300 | jmp ->fff_res1
1301 |
1302 |//-- Base library: conversions ------------------------------------------
1303 |
1304 |.ffunc tonumber
1305 | // Only handles the number case inline (without a base argument).
1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1307 | mov RB, [BASE]
1308 | checknumber RB, ->fff_fallback
1309 | mov PC, [BASE-8]
1310 | mov [BASE-16], RB
1311 | jmp ->fff_res1
1312 |
1313 |.ffunc_1 tostring
1314 | // Only handles the string or number case inline.
1315 | mov PC, [BASE-8]
1316 | mov STR:RB, [BASE]
1317 | checktp_nc STR:RB, LJ_TSTR, >3
1318 | // A __tostring method in the string base metatable is ignored.
1319 |2:
1320 | mov [BASE-16], STR:RB
1321 | jmp ->fff_res1
1322 |3: // Handle numbers inline, unless a number base metatable is present.
1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325 | jne ->fff_fallback
1326 | ffgccheck // Caveat: uses label 1.
1327 | mov L:RB, SAVE_L
1328 | mov L:RB->base, BASE // Add frame since C call can throw.
1329 | mov SAVE_PC, PC // Redundant (but a defined value).
1330 |.if not X64WIN
1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1332 |.endif
1333 | mov L:CARG1, L:RB
1334 |.if DUALNUM
1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1336 |.else
1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1338 |.endif
1339 | // GCstr returned in eax (RD).
1340 | mov BASE, L:RB->base
1341 | settp STR:RB, RD, LJ_TSTR
1342 | jmp <2
1343 |
1344 |//-- Base library: iterators -------------------------------------------
1345 |
1346 |.ffunc_1 next
1347 | je >2 // Missing 2nd arg?
1348 |1:
1349 | mov CARG1, [BASE]
1350 | mov PC, [BASE-8]
1351 | checktab CARG1, ->fff_fallback
1352 | mov RB, BASE // Save BASE.
1353 |.if X64WIN
1354 | lea CARG3, [BASE-16]
1355 | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE.
1356 |.else
1357 | lea CARG2, [BASE+8]
1358 | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE.
1359 |.endif
1360 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1361 | // 1=found, 0=end, -1=error returned in eax (RD).
1362 | mov BASE, RB // Restore BASE.
1363 | test RDd, RDd; jg ->fff_res2 // Found key/value.
1364 | js ->fff_fallback_2 // Invalid key.
1365 | // End of traversal: return nil.
1366 | mov aword [BASE-16], LJ_TNIL
1367 | jmp ->fff_res1
1368 |2: // Set missing 2nd arg to nil.
1369 | mov aword [BASE+8], LJ_TNIL
1370 | jmp <1
1371 |
1372 |.ffunc_1 pairs
1373 | mov TAB:RB, [BASE]
1374 | mov TMPR, TAB:RB
1375 | checktab TAB:RB, ->fff_fallback
1376#if LJ_52
1377 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1378#endif
1379 | mov CFUNC:RD, [BASE-16]
1380 | cleartp CFUNC:RD
1381 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1382 | settp CFUNC:RD, LJ_TFUNC
1383 | mov PC, [BASE-8]
1384 | mov [BASE-16], CFUNC:RD
1385 | mov [BASE-8], TMPR
1386 | mov aword [BASE], LJ_TNIL
1387 | mov RDd, 1+3
1388 | jmp ->fff_res
1389 |
1390 |.ffunc_2 ipairs_aux
1391 | mov TAB:RB, [BASE]
1392 | checktab TAB:RB, ->fff_fallback
1393 |.if DUALNUM
1394 | mov RA, [BASE+8]
1395 | checkint RA, ->fff_fallback
1396 |.else
1397 | checknumtp [BASE+8], ->fff_fallback
1398 | movsd xmm0, qword [BASE+8]
1399 |.endif
1400 | mov PC, [BASE-8]
1401 |.if DUALNUM
1402 | add RAd, 1
1403 | setint ITYPE, RA
1404 | mov [BASE-16], ITYPE
1405 |.else
1406 | sseconst_1 xmm1, TMPR
1407 | addsd xmm0, xmm1
1408 | cvttsd2si RAd, xmm0
1409 | movsd qword [BASE-16], xmm0
1410 |.endif
1411 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1412 | mov RD, TAB:RB->array
1413 | lea RD, [RD+RA*8]
1414 |1:
1415 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1416 | // Copy array slot.
1417 | mov RB, [RD]
1418 | mov [BASE-8], RB
1419 |->fff_res2:
1420 | mov RDd, 1+2
1421 | jmp ->fff_res
1422 |2: // Check for empty hash part first. Otherwise call C function.
1423 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1424 |.if X64WIN
1425 | mov TMPR, BASE
1426 | mov CARG2d, RAd
1427 | mov CARG1, TAB:RB
1428 | mov RB, TMPR
1429 |.else
1430 | mov CARG1, TAB:RB
1431 | mov RB, BASE // Save BASE.
1432 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1433 |.endif
1434 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1435 | // cTValue * or NULL returned in eax (RD).
1436 | mov BASE, RB
1437 | test RD, RD
1438 | jnz <1
1439 |->fff_res0:
1440 | mov RDd, 1+0
1441 | jmp ->fff_res
1442 |
1443 |.ffunc_1 ipairs
1444 | mov TAB:RB, [BASE]
1445 | mov TMPR, TAB:RB
1446 | checktab TAB:RB, ->fff_fallback
1447#if LJ_52
1448 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1449#endif
1450 | mov CFUNC:RD, [BASE-16]
1451 | cleartp CFUNC:RD
1452 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1453 | settp CFUNC:RD, LJ_TFUNC
1454 | mov PC, [BASE-8]
1455 | mov [BASE-16], CFUNC:RD
1456 | mov [BASE-8], TMPR
1457 |.if DUALNUM
1458 | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
1459 | mov [BASE], RD
1460 |.else
1461 | mov qword [BASE], 0
1462 |.endif
1463 | mov RDd, 1+3
1464 | jmp ->fff_res
1465 |
1466 |//-- Base library: catch errors ----------------------------------------
1467 |
1468 |.ffunc_1 pcall
1469 | lea RA, [BASE+16]
1470 | sub NARGS:RDd, 1
1471 | mov PCd, 16+FRAME_PCALL
1472 |1:
1473 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1474 | shr RB, HOOK_ACTIVE_SHIFT
1475 | and RB, 1
1476 | add PC, RB // Remember active hook before pcall.
1477 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1478 | mov KBASE, RD
1479 |2:
1480 | mov RB, [RA+KBASE*8-24]
1481 | mov [RA+KBASE*8-16], RB
1482 | sub KBASE, 1
1483 | ja <2
1484 | jmp ->vm_call_dispatch
1485 |
1486 |.ffunc_2 xpcall
1487 | mov LFUNC:RA, [BASE+8]
1488 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1489 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1490 | mov [BASE], LFUNC:RA
1491 | mov [BASE+8], LFUNC:RB
1492 | lea RA, [BASE+24]
1493 | sub NARGS:RDd, 2
1494 | mov PCd, 24+FRAME_PCALL
1495 | jmp <1
1496 |
1497 |//-- Coroutine library --------------------------------------------------
1498 |
1499 |.macro coroutine_resume_wrap, resume
1500 |.if resume
1501 |.ffunc_1 coroutine_resume
1502 | mov L:RB, [BASE]
1503 | cleartp L:RB
1504 |.else
1505 |.ffunc coroutine_wrap_aux
1506 | mov CFUNC:RB, [BASE-16]
1507 | cleartp CFUNC:RB
1508 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1509 | cleartp L:RB
1510 |.endif
1511 | mov PC, [BASE-8]
1512 | mov SAVE_PC, PC
1513 | mov TMP1, L:RB
1514 |.if resume
1515 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1516 |.endif
1517 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1518 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1519 | mov RA, L:RB->top
1520 | je >1 // Status != LUA_YIELD (i.e. 0)?
1521 | cmp RA, L:RB->base // Check for presence of initial func.
1522 | je ->fff_fallback
1523 | mov PC, [RA-8] // Move initial function up.
1524 | mov [RA], PC
1525 | add RA, 8
1526 |1:
1527 |.if resume
1528 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1529 |.else
1530 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1531 |.endif
1532 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1533 | mov L:RB->top, PC
1534 |
1535 | mov L:RB, SAVE_L
1536 | mov L:RB->base, BASE
1537 |.if resume
1538 | add BASE, 8 // Keep resumed thread in stack for GC.
1539 |.endif
1540 | mov L:RB->top, BASE
1541 |.if resume
1542 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1543 |.else
1544 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1545 |.endif
1546 | sub RB, PC // Relative to PC.
1547 |
1548 | cmp PC, RA
1549 | je >3
1550 |2: // Move args to coroutine.
1551 | mov RC, [PC+RB]
1552 | mov [PC-8], RC
1553 | sub PC, 8
1554 | cmp PC, RA
1555 | jne <2
1556 |3:
1557 | mov CARG2, RA
1558 | mov CARG1, TMP1
1559 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1560 |
1561 | mov L:RB, SAVE_L
1562 | mov L:PC, TMP1
1563 | mov BASE, L:RB->base
1564 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1565 | set_vmstate INTERP
1566 |
1567 | cmp eax, LUA_YIELD
1568 | ja >8
1569 |4:
1570 | mov RA, L:PC->base
1571 | mov KBASE, L:PC->top
1572 | mov L:PC->top, RA // Clear coroutine stack.
1573 | mov PC, KBASE
1574 | sub PC, RA
1575 | je >6 // No results?
1576 | lea RD, [BASE+PC]
1577 | shr PCd, 3
1578 | cmp RD, L:RB->maxstack
1579 | ja >9 // Need to grow stack?
1580 |
1581 | mov RB, BASE
1582 | sub RB, RA
1583 |5: // Move results from coroutine.
1584 | mov RD, [RA]
1585 | mov [RA+RB], RD
1586 | add RA, 8
1587 | cmp RA, KBASE
1588 | jne <5
1589 |6:
1590 |.if resume
1591 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1592 | mov_true ITYPE // Prepend true to results.
1593 | mov [BASE-8], ITYPE
1594 |.else
1595 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1596 |.endif
1597 |7:
1598 | mov PC, SAVE_PC
1599 | mov MULTRES, RDd
1600 |.if resume
1601 | mov RA, -8
1602 |.else
1603 | xor RAd, RAd
1604 |.endif
1605 | test PCd, FRAME_TYPE
1606 | jz ->BC_RET_Z
1607 | jmp ->vm_return
1608 |
1609 |8: // Coroutine returned with error (at co->top-1).
1610 |.if resume
1611 | mov_false ITYPE // Prepend false to results.
1612 | mov [BASE-8], ITYPE
1613 | mov RA, L:PC->top
1614 | sub RA, 8
1615 | mov L:PC->top, RA // Clear error from coroutine stack.
1616 | // Copy error message.
1617 | mov RD, [RA]
1618 | mov [BASE], RD
1619 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1620 | jmp <7
1621 |.else
1622 | mov CARG2, L:PC
1623 | mov CARG1, L:RB
1624 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1625 | // Error function does not return.
1626 |.endif
1627 |
1628 |9: // Handle stack expansion on return from yield.
1629 | mov L:RA, TMP1
1630 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1631 | mov CARG2, PC
1632 | mov CARG1, L:RB
1633 | call extern lj_state_growstack // (lua_State *L, int n)
1634 | mov L:PC, TMP1
1635 | mov BASE, L:RB->base
1636 | jmp <4 // Retry the stack move.
1637 |.endmacro
1638 |
1639 | coroutine_resume_wrap 1 // coroutine.resume
1640 | coroutine_resume_wrap 0 // coroutine.wrap
1641 |
1642 |.ffunc coroutine_yield
1643 | mov L:RB, SAVE_L
1644 | test aword L:RB->cframe, CFRAME_RESUME
1645 | jz ->fff_fallback
1646 | mov L:RB->base, BASE
1647 | lea RD, [BASE+NARGS:RD*8-8]
1648 | mov L:RB->top, RD
1649 | xor RDd, RDd
1650 | mov aword L:RB->cframe, RD
1651 | mov al, LUA_YIELD
1652 | mov byte L:RB->status, al
1653 | jmp ->vm_leave_unw
1654 |
1655 |//-- Math library -------------------------------------------------------
1656 |
1657 | .ffunc_1 math_abs
1658 | mov RB, [BASE]
1659 |.if DUALNUM
1660 | checkint RB, >3
1661 | cmp RBd, 0; jns ->fff_resi
1662 | neg RBd; js >2
1663 |->fff_resbit:
1664 |->fff_resi:
1665 | setint RB
1666 |->fff_resRB:
1667 | mov PC, [BASE-8]
1668 | mov [BASE-16], RB
1669 | jmp ->fff_res1
1670 |2:
1671 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1672 | jmp ->fff_resRB
1673 |3:
1674 | ja ->fff_fallback
1675 |.else
1676 | checknum RB, ->fff_fallback
1677 |.endif
1678 | shl RB, 1
1679 | shr RB, 1
1680 | mov PC, [BASE-8]
1681 | mov [BASE-16], RB
1682 | jmp ->fff_res1
1683 |
1684 |.ffunc_n math_sqrt, sqrtsd
1685 |->fff_resxmm0:
1686 | mov PC, [BASE-8]
1687 | movsd qword [BASE-16], xmm0
1688 | // fallthrough
1689 |
1690 |->fff_res1:
1691 | mov RDd, 1+1
1692 |->fff_res:
1693 | mov MULTRES, RDd
1694 |->fff_res_:
1695 | test PCd, FRAME_TYPE
1696 | jnz >7
1697 |5:
1698 | cmp PC_RB, RDL // More results expected?
1699 | ja >6
1700 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1701 | movzx RAd, PC_RA
1702 | neg RA
1703 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1704 | ins_next
1705 |
1706 |6: // Fill up results with nil.
1707 | mov aword [BASE+RD*8-24], LJ_TNIL
1708 | add RD, 1
1709 | jmp <5
1710 |
1711 |7: // Non-standard return case.
1712 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1713 | jmp ->vm_return
1714 |
1715 |.macro math_round, func
1716 | .ffunc math_ .. func
1717 |.if DUALNUM
1718 | mov RB, [BASE]
1719 | checknumx RB, ->fff_resRB, je
1720 | ja ->fff_fallback
1721 |.else
1722 | checknumtp [BASE], ->fff_fallback
1723 |.endif
1724 | movsd xmm0, qword [BASE]
1725 | call ->vm_ .. func .. _sse
1726 |.if DUALNUM
1727 | cvttsd2si RBd, xmm0
1728 | cmp RBd, 0x80000000
1729 | jne ->fff_resi
1730 | cvtsi2sd xmm1, RBd
1731 | ucomisd xmm0, xmm1
1732 | jp ->fff_resxmm0
1733 | je ->fff_resi
1734 |.endif
1735 | jmp ->fff_resxmm0
1736 |.endmacro
1737 |
1738 | math_round floor
1739 | math_round ceil
1740 |
1741 |.ffunc math_log
1742 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1743 | checknumtp [BASE], ->fff_fallback
1744 | movsd xmm0, qword [BASE]
1745 | mov RB, BASE
1746 | call extern log
1747 | mov BASE, RB
1748 | jmp ->fff_resxmm0
1749 |
1750 |.macro math_extern, func
1751 | .ffunc_n math_ .. func
1752 | mov RB, BASE
1753 | call extern func
1754 | mov BASE, RB
1755 | jmp ->fff_resxmm0
1756 |.endmacro
1757 |
1758 |.macro math_extern2, func
1759 | .ffunc_nn math_ .. func
1760 | mov RB, BASE
1761 | call extern func
1762 | mov BASE, RB
1763 | jmp ->fff_resxmm0
1764 |.endmacro
1765 |
1766 | math_extern log10
1767 | math_extern exp
1768 | math_extern sin
1769 | math_extern cos
1770 | math_extern tan
1771 | math_extern asin
1772 | math_extern acos
1773 | math_extern atan
1774 | math_extern sinh
1775 | math_extern cosh
1776 | math_extern tanh
1777 | math_extern2 pow
1778 | math_extern2 atan2
1779 | math_extern2 fmod
1780 |
1781 |.ffunc_2 math_ldexp
1782 | checknumtp [BASE], ->fff_fallback
1783 | checknumtp [BASE+8], ->fff_fallback
1784 | fld qword [BASE+8]
1785 | fld qword [BASE]
1786 | fscale
1787 | fpop1
1788 | mov PC, [BASE-8]
1789 | fstp qword [BASE-16]
1790 | jmp ->fff_res1
1791 |
1792 |.ffunc_n math_frexp
1793 | mov RB, BASE
1794 |.if X64WIN
1795 | lea CARG2, TMP1 // Caveat: CARG2 == BASE
1796 |.else
1797 | lea CARG1, TMP1
1798 |.endif
1799 | call extern frexp
1800 | mov BASE, RB
1801 | mov RBd, TMP1d
1802 | mov PC, [BASE-8]
1803 | movsd qword [BASE-16], xmm0
1804 |.if DUALNUM
1805 | setint RB
1806 | mov [BASE-8], RB
1807 |.else
1808 | cvtsi2sd xmm1, RBd
1809 | movsd qword [BASE-8], xmm1
1810 |.endif
1811 | mov RDd, 1+2
1812 | jmp ->fff_res
1813 |
1814 |.ffunc_n math_modf
1815 | mov RB, BASE
1816 |.if X64WIN
1817 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
1818 |.else
1819 | lea CARG1, [BASE-16]
1820 |.endif
1821 | call extern modf
1822 | mov BASE, RB
1823 | mov PC, [BASE-8]
1824 | movsd qword [BASE-8], xmm0
1825 | mov RDd, 1+2
1826 | jmp ->fff_res
1827 |
1828 |.macro math_minmax, name, cmovop, sseop
1829 | .ffunc_1 name
1830 | mov RAd, 2
1831 |.if DUALNUM
1832 | mov RB, [BASE]
1833 | checkint RB, >4
1834 |1: // Handle integers.
1835 | cmp RAd, RDd; jae ->fff_resRB
1836 | mov TMPR, [BASE+RA*8-8]
1837 | checkint TMPR, >3
1838 | cmp RBd, TMPRd
1839 | cmovop RB, TMPR
1840 | add RAd, 1
1841 | jmp <1
1842 |3:
1843 | ja ->fff_fallback
1844 | // Convert intermediate result to number and continue below.
1845 | cvtsi2sd xmm0, RBd
1846 | jmp >6
1847 |4:
1848 | ja ->fff_fallback
1849 |.else
1850 | checknumtp [BASE], ->fff_fallback
1851 |.endif
1852 |
1853 | movsd xmm0, qword [BASE]
1854 |5: // Handle numbers or integers.
1855 | cmp RAd, RDd; jae ->fff_resxmm0
1856 |.if DUALNUM
1857 | mov RB, [BASE+RA*8-8]
1858 | checknumx RB, >6, jb
1859 | ja ->fff_fallback
1860 | cvtsi2sd xmm1, RBd
1861 | jmp >7
1862 |.else
1863 | checknumtp [BASE+RA*8-8], ->fff_fallback
1864 |.endif
1865 |6:
1866 | movsd xmm1, qword [BASE+RA*8-8]
1867 |7:
1868 | sseop xmm0, xmm1
1869 | add RAd, 1
1870 | jmp <5
1871 |.endmacro
1872 |
1873 | math_minmax math_min, cmovg, minsd
1874 | math_minmax math_max, cmovl, maxsd
1875 |
1876 |//-- String library -----------------------------------------------------
1877 |
1878 |.ffunc string_byte // Only handle the 1-arg case here.
1879 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1880 | mov STR:RB, [BASE]
1881 | checkstr STR:RB, ->fff_fallback
1882 | mov PC, [BASE-8]
1883 | cmp dword STR:RB->len, 1
1884 | jb ->fff_res0 // Return no results for empty string.
1885 | movzx RBd, byte STR:RB[1]
1886 |.if DUALNUM
1887 | jmp ->fff_resi
1888 |.else
1889 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1890 |.endif
1891 |
1892 |.ffunc string_char // Only handle the 1-arg case here.
1893 | ffgccheck
1894 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1895 |.if DUALNUM
1896 | mov RB, [BASE]
1897 | checkint RB, ->fff_fallback
1898 |.else
1899 | checknumtp [BASE], ->fff_fallback
1900 | cvttsd2si RBd, qword [BASE]
1901 |.endif
1902 | cmp RBd, 255; ja ->fff_fallback
1903 | mov TMP1d, RBd
1904 | mov TMPRd, 1
1905 | lea RD, TMP1 // Points to stack. Little-endian.
1906 |->fff_newstr:
1907 | mov L:RB, SAVE_L
1908 | mov L:RB->base, BASE
1909 | mov CARG3d, TMPRd // Zero-extended to size_t.
1910 | mov CARG2, RD
1911 | mov CARG1, L:RB
1912 | mov SAVE_PC, PC
1913 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1914 |->fff_resstr:
1915 | // GCstr * returned in eax (RD).
1916 | mov BASE, L:RB->base
1917 | mov PC, [BASE-8]
1918 | settp STR:RD, LJ_TSTR
1919 | mov [BASE-16], STR:RD
1920 | jmp ->fff_res1
1921 |
1922 |.ffunc string_sub
1923 | ffgccheck
1924 | mov TMPRd, -1
1925 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1926 | jna >1
1927 |.if DUALNUM
1928 | mov TMPR, [BASE+16]
1929 | checkint TMPR, ->fff_fallback
1930 |.else
1931 | checknumtp [BASE+16], ->fff_fallback
1932 | cvttsd2si TMPRd, qword [BASE+16]
1933 |.endif
1934 |1:
1935 | mov STR:RB, [BASE]
1936 | checkstr STR:RB, ->fff_fallback
1937 |.if DUALNUM
1938 | mov ITYPE, [BASE+8]
1939 | mov RAd, ITYPEd // Must clear hiword for lea below.
1940 | sar ITYPE, 47
1941 | cmp ITYPEd, LJ_TISNUM
1942 | jne ->fff_fallback
1943 |.else
1944 | checknumtp [BASE+8], ->fff_fallback
1945 | cvttsd2si RAd, qword [BASE+8]
1946 |.endif
1947 | mov RCd, STR:RB->len
1948 | cmp RCd, TMPRd // len < end? (unsigned compare)
1949 | jb >5
1950 |2:
1951 | test RAd, RAd // start <= 0?
1952 | jle >7
1953 |3:
1954 | sub TMPRd, RAd // start > end?
1955 | jl ->fff_emptystr
1956 | lea RD, [STR:RB+RAd+#STR-1]
1957 | add TMPRd, 1
1958 |4:
1959 | jmp ->fff_newstr
1960 |
1961 |5: // Negative end or overflow.
1962 | jl >6
1963 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1964 | jmp <2
1965 |6: // Overflow.
1966 | mov TMPRd, RCd // end = len
1967 | jmp <2
1968 |
1969 |7: // Negative start or underflow.
1970 | je >8
1971 | add RAd, RCd // start = start+(len+1)
1972 | add RAd, 1
1973 | jg <3 // start > 0?
1974 |8: // Underflow.
1975 | mov RAd, 1 // start = 1
1976 | jmp <3
1977 |
1978 |->fff_emptystr: // Range underflow.
1979 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1980 | jmp <4
1981 |
1982 |.macro ffstring_op, name
1983 | .ffunc_1 string_ .. name
1984 | ffgccheck
1985 |.if X64WIN
1986 | mov STR:TMPR, [BASE]
1987 | checkstr STR:TMPR, ->fff_fallback
1988 |.else
1989 | mov STR:CARG2, [BASE]
1990 | checkstr STR:CARG2, ->fff_fallback
1991 |.endif
1992 | mov L:RB, SAVE_L
1993 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
1994 | mov L:RB->base, BASE
1995 |.if X64WIN
1996 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
1997 |.endif
1998 | mov RC, SBUF:CARG1->b
1999 | mov SBUF:CARG1->L, L:RB
2000 | mov SBUF:CARG1->w, RC
2001 | mov SAVE_PC, PC
2002 | call extern lj_buf_putstr_ .. name
2003 | mov CARG1, rax
2004 | call extern lj_buf_tostr
2005 | jmp ->fff_resstr
2006 |.endmacro
2007 |
2008 |ffstring_op reverse
2009 |ffstring_op lower
2010 |ffstring_op upper
2011 |
2012 |//-- Bit library --------------------------------------------------------
2013 |
2014 |.macro .ffunc_bit, name, kind, fdef
2015 | fdef name
2016 |.if kind == 2
2017 | sseconst_tobit xmm1, RB
2018 |.endif
2019 |.if DUALNUM
2020 | mov RB, [BASE]
2021 | checkint RB, >1
2022 |.if kind > 0
2023 | jmp >2
2024 |.else
2025 | jmp ->fff_resbit
2026 |.endif
2027 |1:
2028 | ja ->fff_fallback
2029 | movd xmm0, RB
2030 |.else
2031 | checknumtp [BASE], ->fff_fallback
2032 | movsd xmm0, qword [BASE]
2033 |.endif
2034 |.if kind < 2
2035 | sseconst_tobit xmm1, RB
2036 |.endif
2037 | addsd xmm0, xmm1
2038 | movd RBd, xmm0
2039 |2:
2040 |.endmacro
2041 |
2042 |.macro .ffunc_bit, name, kind
2043 | .ffunc_bit name, kind, .ffunc_1
2044 |.endmacro
2045 |
2046 |.ffunc_bit bit_tobit, 0
2047 | jmp ->fff_resbit
2048 |
2049 |.macro .ffunc_bit_op, name, ins
2050 | .ffunc_bit name, 2
2051 | mov TMPRd, NARGS:RDd // Save for fallback.
2052 | lea RD, [BASE+NARGS:RD*8-16]
2053 |1:
2054 | cmp RD, BASE
2055 | jbe ->fff_resbit
2056 |.if DUALNUM
2057 | mov RA, [RD]
2058 | checkint RA, >2
2059 | ins RBd, RAd
2060 | sub RD, 8
2061 | jmp <1
2062 |2:
2063 | ja ->fff_fallback_bit_op
2064 | movd xmm0, RA
2065 |.else
2066 | checknumtp [RD], ->fff_fallback_bit_op
2067 | movsd xmm0, qword [RD]
2068 |.endif
2069 | addsd xmm0, xmm1
2070 | movd RAd, xmm0
2071 | ins RBd, RAd
2072 | sub RD, 8
2073 | jmp <1
2074 |.endmacro
2075 |
2076 |.ffunc_bit_op bit_band, and
2077 |.ffunc_bit_op bit_bor, or
2078 |.ffunc_bit_op bit_bxor, xor
2079 |
2080 |.ffunc_bit bit_bswap, 1
2081 | bswap RBd
2082 | jmp ->fff_resbit
2083 |
2084 |.ffunc_bit bit_bnot, 1
2085 | not RBd
2086 |.if DUALNUM
2087 | jmp ->fff_resbit
2088 |.else
2089 |->fff_resbit:
2090 | cvtsi2sd xmm0, RBd
2091 | jmp ->fff_resxmm0
2092 |.endif
2093 |
2094 |->fff_fallback_bit_op:
2095 | mov NARGS:RDd, TMPRd // Restore for fallback
2096 | jmp ->fff_fallback
2097 |
2098 |.macro .ffunc_bit_sh, name, ins
2099 |.if DUALNUM
2100 | .ffunc_bit name, 1, .ffunc_2
2101 | // Note: no inline conversion from number for 2nd argument!
2102 | mov RA, [BASE+8]
2103 | checkint RA, ->fff_fallback
2104 |.else
2105 | .ffunc_nn name
2106 | sseconst_tobit xmm2, RB
2107 | addsd xmm0, xmm2
2108 | addsd xmm1, xmm2
2109 | movd RBd, xmm0
2110 | movd RAd, xmm1
2111 |.endif
2112 | ins RBd, cl // Assumes RA is ecx.
2113 | jmp ->fff_resbit
2114 |.endmacro
2115 |
2116 |.ffunc_bit_sh bit_lshift, shl
2117 |.ffunc_bit_sh bit_rshift, shr
2118 |.ffunc_bit_sh bit_arshift, sar
2119 |.ffunc_bit_sh bit_rol, rol
2120 |.ffunc_bit_sh bit_ror, ror
2121 |
2122 |//-----------------------------------------------------------------------
2123 |
2124 |->fff_fallback_2:
2125 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2126 | jmp ->fff_fallback
2127 |->fff_fallback_1:
2128 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2129 |->fff_fallback: // Call fast function fallback handler.
2130 | // BASE = new base, RD = nargs+1
2131 | mov L:RB, SAVE_L
2132 | mov PC, [BASE-8] // Fallback may overwrite PC.
2133 | mov SAVE_PC, PC // Redundant (but a defined value).
2134 | mov L:RB->base, BASE
2135 | lea RD, [BASE+NARGS:RD*8-8]
2136 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2137 | mov L:RB->top, RD
2138 | mov CFUNC:RD, [BASE-16]
2139 | cleartp CFUNC:RD
2140 | cmp RA, L:RB->maxstack
2141 | ja >5 // Need to grow stack.
2142 | mov CARG1, L:RB
2143 | call aword CFUNC:RD->f // (lua_State *L)
2144 | mov BASE, L:RB->base
2145 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2146 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2147 |1:
2148 | mov RA, L:RB->top
2149 | sub RA, BASE
2150 | shr RAd, 3
2151 | test RDd, RDd
2152 | lea NARGS:RDd, [RAd+1]
2153 | mov LFUNC:RB, [BASE-16]
2154 | jne ->vm_call_tail // Returned -1?
2155 | cleartp LFUNC:RB
2156 | ins_callt // Returned 0: retry fast path.
2157 |
2158 |// Reconstruct previous base for vmeta_call during tailcall.
2159 |->vm_call_tail:
2160 | mov RA, BASE
2161 | test PCd, FRAME_TYPE
2162 | jnz >3
2163 | movzx RBd, PC_RA
2164 | neg RB
2165 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2166 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2167 |3:
2168 | mov RB, PC
2169 | and RB, -8
2170 | sub BASE, RB
2171 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2172 |
2173 |5: // Grow stack for fallback handler.
2174 | mov CARG2d, LUA_MINSTACK
2175 | mov CARG1, L:RB
2176 | call extern lj_state_growstack // (lua_State *L, int n)
2177 | mov BASE, L:RB->base
2178 | xor RDd, RDd // Simulate a return 0.
2179 | jmp <1 // Dumb retry (goes through ff first).
2180 |
2181 |->fff_gcstep: // Call GC step function.
2182 | // BASE = new base, RD = nargs+1
2183 | pop RB // Must keep stack at same level.
2184 | mov TMP1, RB // Save return address
2185 | mov L:RB, SAVE_L
2186 | mov SAVE_PC, PC // Redundant (but a defined value).
2187 | mov L:RB->base, BASE
2188 | lea RD, [BASE+NARGS:RD*8-8]
2189 | mov CARG1, L:RB
2190 | mov L:RB->top, RD
2191 | call extern lj_gc_step // (lua_State *L)
2192 | mov BASE, L:RB->base
2193 | mov RD, L:RB->top
2194 | sub RD, BASE
2195 | shr RDd, 3
2196 | add NARGS:RDd, 1
2197 | mov RB, TMP1
2198 | push RB // Restore return address.
2199 | ret
2200 |
2201 |//-----------------------------------------------------------------------
2202 |//-- Special dispatch targets -------------------------------------------
2203 |//-----------------------------------------------------------------------
2204 |
2205 |->vm_record: // Dispatch target for recording phase.
2206 |.if JIT
2207 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2208 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2209 | jnz >5
2210 | // Decrement the hookcount for consistency, but always do the call.
2211 | test RDL, HOOK_ACTIVE
2212 | jnz >1
2213 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2214 | jz >1
2215 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2216 | jmp >1
2217 |.endif
2218 |
2219 |->vm_rethook: // Dispatch target for return hooks.
2220 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2221 | test RDL, HOOK_ACTIVE // Hook already active?
2222 | jnz >5
2223 | jmp >1
2224 |
2225 |->vm_inshook: // Dispatch target for instr/line hooks.
2226 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2227 | test RDL, HOOK_ACTIVE // Hook already active?
2228 | jnz >5
2229 |
2230 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2231 | jz >5
2232 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2233 | jz >1
2234 | test RDL, LUA_MASKLINE
2235 | jz >5
2236 |1:
2237 | mov L:RB, SAVE_L
2238 | mov L:RB->base, BASE
2239 | mov CARG2, PC // Caveat: CARG2 == BASE
2240 | mov CARG1, L:RB
2241 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2242 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2243 |3:
2244 | mov BASE, L:RB->base
2245 |4:
2246 | movzx RAd, PC_RA
2247 |5:
2248 | movzx OP, PC_OP
2249 | movzx RDd, PC_RD
2250 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2251 |
2252 |->cont_hook: // Continue from hook yield.
2253 | add PC, 4
2254 | mov RA, [RB-40]
2255 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2256 | jmp <4
2257 |
2258 |->vm_hotloop: // Hot loop counter underflow.
2259 |.if JIT
2260 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2261 | cleartp LFUNC:RB
2262 | mov RB, LFUNC:RB->pc
2263 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2264 | lea RD, [BASE+RD*8]
2265 | mov L:RB, SAVE_L
2266 | mov L:RB->base, BASE
2267 | mov L:RB->top, RD
2268 | mov CARG2, PC
2269 | lea CARG1, [DISPATCH+GG_DISP2J]
2270 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2271 | mov SAVE_PC, PC
2272 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2273 | jmp <3
2274 |.endif
2275 |
2276 |->vm_callhook: // Dispatch target for call hooks.
2277 | mov SAVE_PC, PC
2278 |.if JIT
2279 | jmp >1
2280 |.endif
2281 |
2282 |->vm_hotcall: // Hot call counter underflow.
2283 |.if JIT
2284 | mov SAVE_PC, PC
2285 | or PC, 1 // Marker for hot call.
2286 |1:
2287 |.endif
2288 | lea RD, [BASE+NARGS:RD*8-8]
2289 | mov L:RB, SAVE_L
2290 | mov L:RB->base, BASE
2291 | mov L:RB->top, RD
2292 | mov CARG2, PC
2293 | mov CARG1, L:RB
2294 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2295 | // ASMFunction returned in eax/rax (RD).
2296 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2297 |.if JIT
2298 | and PC, -2
2299 |.endif
2300 | mov BASE, L:RB->base
2301 | mov RA, RD
2302 | mov RD, L:RB->top
2303 | sub RD, BASE
2304 | mov RB, RA
2305 | movzx RAd, PC_RA
2306 | shr RDd, 3
2307 | add NARGS:RDd, 1
2308 | jmp RB
2309 |
2310 |->cont_stitch: // Trace stitching.
2311 |.if JIT
2312 | // BASE = base, RC = result, RB = mbase
2313 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2314 | cleartp TRACE:ITYPE
2315 | mov TMPRd, MULTRES
2316 | movzx RAd, PC_RA
2317 | lea RA, [BASE+RA*8] // Call base.
2318 | sub TMPRd, 1
2319 | jz >2
2320 |1: // Move results down.
2321 | mov RB, [RC]
2322 | mov [RA], RB
2323 | add RC, 8
2324 | add RA, 8
2325 | sub TMPRd, 1
2326 | jnz <1
2327 |2:
2328 | movzx RCd, PC_RA
2329 | movzx RBd, PC_RB
2330 | add RC, RB
2331 | lea RC, [BASE+RC*8-8]
2332 |3:
2333 | cmp RC, RA
2334 | ja >9 // More results wanted?
2335 |
2336 | test TRACE:ITYPE, TRACE:ITYPE
2337 | jz ->cont_nop
2338 | movzx RBd, word TRACE:ITYPE->traceno
2339 | movzx RDd, word TRACE:ITYPE->link
2340 | cmp RDd, RBd
2341 | je ->cont_nop // Blacklisted.
2342 | test RDd, RDd
2343 | jne =>BC_JLOOP // Jump to stitched trace.
2344 |
2345 | // Stitch a new trace to the previous trace.
2346 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2347 | mov L:RB, SAVE_L
2348 | mov L:RB->base, BASE
2349 | mov CARG2, PC
2350 | lea CARG1, [DISPATCH+GG_DISP2J]
2351 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2352 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2353 | mov BASE, L:RB->base
2354 | jmp ->cont_nop
2355 |
2356 |9: // Fill up results with nil.
2357 | mov aword [RA], LJ_TNIL
2358 | add RA, 8
2359 | jmp <3
2360 |.endif
2361 |
2362 |->vm_profhook: // Dispatch target for profiler hook.
2363#if LJ_HASPROFILE
2364 | mov L:RB, SAVE_L
2365 | mov L:RB->base, BASE
2366 | mov CARG2, PC // Caveat: CARG2 == BASE
2367 | mov CARG1, L:RB
2368 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2369 | mov BASE, L:RB->base
2370 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2371 | sub PC, 4
2372 | jmp ->cont_nop
2373#endif
2374 |
2375 |//-----------------------------------------------------------------------
2376 |//-- Trace exit handler -------------------------------------------------
2377 |//-----------------------------------------------------------------------
2378 |
2379 |// Called from an exit stub with the exit number on the stack.
2380 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2381 |->vm_exit_handler:
2382 |.if JIT
2383 | push r13; push r12
2384 | push r11; push r10; push r9; push r8
2385 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2386 | push rbx; push rdx; push rcx; push rax
2387 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2388 | mov RCH, byte [rbp-16]
2389 | mov [rbp-8], r15; mov [rbp-16], r14
2390 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2391 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2392 | set_vmstate EXIT
2393 | mov [DISPATCH+DISPATCH_J(exitno)], RCd
2394 | mov [DISPATCH+DISPATCH_J(parent)], RAd
2395 |.if X64WIN
2396 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2397 |.else
2398 | sub rsp, 16*8 // Room for SSE regs.
2399 |.endif
2400 | add rbp, -128
2401 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2402 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2403 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2404 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2405 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2406 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2407 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2408 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2409 | // Caveat: RB is rbp.
2410 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2411 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2412 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2413 | mov L:RB->base, BASE
2414 |.if X64WIN
2415 | lea CARG2, [rsp+4*8]
2416 |.else
2417 | mov CARG2, rsp
2418 |.endif
2419 | lea CARG1, [DISPATCH+GG_DISP2J]
2420 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2421 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2422 | // MULTRES or negated error code returned in eax (RD).
2423 | mov RA, L:RB->cframe
2424 | and RA, CFRAME_RAWMASK
2425 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2426 | mov BASE, L:RB->base
2427 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2428 | jmp >1
2429 |.endif
2430 |->vm_exit_interp:
2431 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2432 |.if JIT
2433 | // Restore additional callee-save registers only used in compiled code.
2434 |.if X64WIN
2435 | lea RA, [rsp+10*16+4*8]
2436 |1:
2437 | movdqa xmm15, [RA-10*16]
2438 | movdqa xmm14, [RA-9*16]
2439 | movdqa xmm13, [RA-8*16]
2440 | movdqa xmm12, [RA-7*16]
2441 | movdqa xmm11, [RA-6*16]
2442 | movdqa xmm10, [RA-5*16]
2443 | movdqa xmm9, [RA-4*16]
2444 | movdqa xmm8, [RA-3*16]
2445 | movdqa xmm7, [RA-2*16]
2446 | mov rsp, RA // Reposition stack to C frame.
2447 | movdqa xmm6, [RA-1*16]
2448 | mov r15, CSAVE_1
2449 | mov r14, CSAVE_2
2450 | mov r13, CSAVE_3
2451 | mov r12, CSAVE_4
2452 |.else
2453 | lea RA, [rsp+16]
2454 |1:
2455 | mov r13, [RA-8]
2456 | mov r12, [RA]
2457 | mov rsp, RA // Reposition stack to C frame.
2458 |.endif
2459 | test RDd, RDd; js >9 // Check for error from exit.
2460 | mov L:RB, SAVE_L
2461 | mov MULTRES, RDd
2462 | mov LFUNC:KBASE, [BASE-16]
2463 | cleartp LFUNC:KBASE
2464 | mov KBASE, LFUNC:KBASE->pc
2465 | mov KBASE, [KBASE+PC2PROTO(k)]
2466 | mov L:RB->base, BASE
2467 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2468 | set_vmstate INTERP
2469 | // Modified copy of ins_next which handles function header dispatch, too.
2470 | mov RCd, [PC]
2471 | movzx RAd, RCH
2472 | movzx OP, RCL
2473 | add PC, 4
2474 | shr RCd, 16
2475 | cmp OP, BC_FUNCF // Function header?
2476 | jb >3
2477 | cmp OP, BC_FUNCC+2 // Fast function?
2478 | jae >4
2479 |2:
2480 | mov RCd, MULTRES // RC/RD holds nres+1.
2481 |3:
2482 | jmp aword [DISPATCH+OP*8]
2483 |
2484 |4: // Check frame below fast function.
2485 | mov RC, [BASE-8]
2486 | test RCd, FRAME_TYPE
2487 | jnz <2 // Trace stitching continuation?
2488 | // Otherwise set KBASE for Lua function below fast function.
2489 | movzx RCd, byte [RC-3]
2490 | neg RC
2491 | mov LFUNC:KBASE, [BASE+RC*8-32]
2492 | cleartp LFUNC:KBASE
2493 | mov KBASE, LFUNC:KBASE->pc
2494 | mov KBASE, [KBASE+PC2PROTO(k)]
2495 | jmp <2
2496 |
2497 |9: // Rethrow error from the right C frame.
2498 | mov CARG2d, RDd
2499 | mov CARG1, L:RB
2500 | neg CARG2d
2501 | call extern lj_err_trace // (lua_State *L, int errcode)
2502 |.endif
2503 |
2504 |//-----------------------------------------------------------------------
2505 |//-- Math helper functions ----------------------------------------------
2506 |//-----------------------------------------------------------------------
2507 |
2508 |// FP value rounding. Called by math.floor/math.ceil fast functions
2509 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2510 |.macro vm_round, name, mode, cond
2511 |->name:
2512 |->name .. _sse:
2513 | sseconst_abs xmm2, RD
2514 | sseconst_2p52 xmm3, RD
2515 | movaps xmm1, xmm0
2516 | andpd xmm1, xmm2 // |x|
2517 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2518 | jbe >1
2519 | andnpd xmm2, xmm0 // Isolate sign bit.
2520 |.if mode == 2 // trunc(x)?
2521 | movaps xmm0, xmm1
2522 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2523 | subsd xmm1, xmm3
2524 | sseconst_1 xmm3, RD
2525 | cmpsd xmm0, xmm1, 1 // |x| < result?
2526 | andpd xmm0, xmm3
2527 | subsd xmm1, xmm0 // If yes, subtract -1.
2528 | orpd xmm1, xmm2 // Merge sign bit back in.
2529 |.else
2530 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2531 | subsd xmm1, xmm3
2532 | orpd xmm1, xmm2 // Merge sign bit back in.
2533 | .if mode == 1 // ceil(x)?
2534 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2535 | cmpsd xmm0, xmm1, 6 // x > result?
2536 | .else // floor(x)?
2537 | sseconst_1 xmm2, RD
2538 | cmpsd xmm0, xmm1, 1 // x < result?
2539 | .endif
2540 | andpd xmm0, xmm2
2541 | subsd xmm1, xmm0 // If yes, subtract +-1.
2542 |.endif
2543 | movaps xmm0, xmm1
2544 |1:
2545 | ret
2546 |.endmacro
2547 |
2548 | vm_round vm_floor, 0, 1
2549 | vm_round vm_ceil, 1, JIT
2550 | vm_round vm_trunc, 2, JIT
2551 |
2552 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2553 |->vm_mod:
2554 |// Args in xmm0/xmm1, return value in xmm0.
2555 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2556 | movaps xmm5, xmm0
2557 | divsd xmm0, xmm1
2558 | sseconst_abs xmm2, RD
2559 | sseconst_2p52 xmm3, RD
2560 | movaps xmm4, xmm0
2561 | andpd xmm4, xmm2 // |x/y|
2562 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2563 | jbe >1
2564 | andnpd xmm2, xmm0 // Isolate sign bit.
2565 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2566 | subsd xmm4, xmm3
2567 | orpd xmm4, xmm2 // Merge sign bit back in.
2568 | sseconst_1 xmm2, RD
2569 | cmpsd xmm0, xmm4, 1 // x/y < result?
2570 | andpd xmm0, xmm2
2571 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2572 | movaps xmm0, xmm5
2573 | mulsd xmm1, xmm4
2574 | subsd xmm0, xmm1
2575 | ret
2576 |1:
2577 | mulsd xmm1, xmm0
2578 | movaps xmm0, xmm5
2579 | subsd xmm0, xmm1
2580 | ret
2581 |
2582 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2583 |->vm_powi_sse:
2584 | cmp eax, 1; jle >6 // i<=1?
2585 | // Now 1 < (unsigned)i <= 0x80000000.
2586 |1: // Handle leading zeros.
2587 | test eax, 1; jnz >2
2588 | mulsd xmm0, xmm0
2589 | shr eax, 1
2590 | jmp <1
2591 |2:
2592 | shr eax, 1; jz >5
2593 | movaps xmm1, xmm0
2594 |3: // Handle trailing bits.
2595 | mulsd xmm0, xmm0
2596 | shr eax, 1; jz >4
2597 | jnc <3
2598 | mulsd xmm1, xmm0
2599 | jmp <3
2600 |4:
2601 | mulsd xmm0, xmm1
2602 |5:
2603 | ret
2604 |6:
2605 | je <5 // x^1 ==> x
2606 | jb >7 // x^0 ==> 1
2607 | neg eax
2608 | call <1
2609 | sseconst_1 xmm1, RD
2610 | divsd xmm1, xmm0
2611 | movaps xmm0, xmm1
2612 | ret
2613 |7:
2614 | sseconst_1 xmm0, RD
2615 | ret
2616 |
2617 |//-----------------------------------------------------------------------
2618 |//-- Miscellaneous functions --------------------------------------------
2619 |//-----------------------------------------------------------------------
2620 |
2621 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2622 |->vm_cpuid:
2623 | mov eax, CARG1d
2624 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2625 | push rbx
2626 | xor ecx, ecx
2627 | cpuid
2628 | mov [rsi], eax
2629 | mov [rsi+4], ebx
2630 | mov [rsi+8], ecx
2631 | mov [rsi+12], edx
2632 | pop rbx
2633 | .if X64WIN; pop rsi; .endif
2634 | ret
2635 |
2636 |.define NEXT_TAB, TAB:CARG1
2637 |.define NEXT_IDX, CARG2d
2638 |.define NEXT_IDXa, CARG2
2639 |.define NEXT_PTR, RC
2640 |.define NEXT_PTRd, RCd
2641 |.define NEXT_TMP, CARG3
2642 |.define NEXT_ASIZE, CARG4d
2643 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
2644 |.if X64WIN
2645 |.define NEXT_RES_PTR, [rsp+aword*5]
2646 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
2647 |.else
2648 |.define NEXT_RES_PTR, [rsp+aword*1]
2649 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
2650 |.endif
2651 |
2652 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2653 |// Next idx returned in edx.
2654 |->vm_next:
2655 |.if JIT
2656 | mov NEXT_ASIZE, NEXT_TAB->asize
2657 |1: // Traverse array part.
2658 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
2659 | mov NEXT_TMP, NEXT_TAB->array
2660 | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8]
2661 | cmp NEXT_TMP, LJ_TNIL; je >2
2662 | lea NEXT_PTR, NEXT_RES_PTR
2663 | mov qword [NEXT_PTR], NEXT_TMP
2664 |.if DUALNUM
2665 | setint NEXT_TMP, NEXT_IDXa
2666 | mov qword [NEXT_PTR+qword*1], NEXT_TMP
2667 |.else
2668 | cvtsi2sd xmm0, NEXT_IDX
2669 | movsd qword [NEXT_PTR+qword*1], xmm0
2670 |.endif
2671 | NEXT_RES_IDX 1
2672 | ret
2673 |2: // Skip holes in array part.
2674 | add NEXT_IDX, 1
2675 | jmp <1
2676 |
2677 |5: // Traverse hash part.
2678 | sub NEXT_IDX, NEXT_ASIZE
2679 |6:
2680 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
2681 | imul NEXT_PTRd, NEXT_IDX, #NODE
2682 | add NODE:NEXT_PTR, NEXT_TAB->node
2683 | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7
2684 | NEXT_RES_IDXL NEXT_ASIZE+1
2685 | ret
2686 |7: // Skip holes in hash part.
2687 | add NEXT_IDX, 1
2688 | jmp <6
2689 |
2690 |9: // End of iteration. Set the key to nil (not the value).
2691 | NEXT_RES_IDX NEXT_ASIZE
2692 | lea NEXT_PTR, NEXT_RES_PTR
2693 | mov qword [NEXT_PTR+qword*1], LJ_TNIL
2694 | ret
2695 |.endif
2696 |
2697 |//-----------------------------------------------------------------------
2698 |//-- Assertions ---------------------------------------------------------
2699 |//-----------------------------------------------------------------------
2700 |
2701 |->assert_bad_for_arg_type:
2702#ifdef LUA_USE_ASSERT
2703 | int3
2704#endif
2705 | int3
2706 |
2707 |//-----------------------------------------------------------------------
2708 |//-- FFI helper functions -----------------------------------------------
2709 |//-----------------------------------------------------------------------
2710 |
2711 |// Handler for callback functions. Callback slot number in ah/al.
2712 |->vm_ffi_callback:
2713 |.if FFI
2714 |.type CTSTATE, CTState, PC
2715 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2716 | lea DISPATCH, [ebp+GG_G2DISP]
2717 | mov CTSTATE, GL:ebp->ctype_state
2718 | movzx eax, ax
2719 | mov CTSTATE->cb.slot, eax
2720 | mov CTSTATE->cb.gpr[0], CARG1
2721 | mov CTSTATE->cb.gpr[1], CARG2
2722 | mov CTSTATE->cb.gpr[2], CARG3
2723 | mov CTSTATE->cb.gpr[3], CARG4
2724 | movsd qword CTSTATE->cb.fpr[0], xmm0
2725 | movsd qword CTSTATE->cb.fpr[1], xmm1
2726 | movsd qword CTSTATE->cb.fpr[2], xmm2
2727 | movsd qword CTSTATE->cb.fpr[3], xmm3
2728 |.if X64WIN
2729 | lea rax, [rsp+CFRAME_SIZE+4*8]
2730 |.else
2731 | lea rax, [rsp+CFRAME_SIZE]
2732 | mov CTSTATE->cb.gpr[4], CARG5
2733 | mov CTSTATE->cb.gpr[5], CARG6
2734 | movsd qword CTSTATE->cb.fpr[4], xmm4
2735 | movsd qword CTSTATE->cb.fpr[5], xmm5
2736 | movsd qword CTSTATE->cb.fpr[6], xmm6
2737 | movsd qword CTSTATE->cb.fpr[7], xmm7
2738 |.endif
2739 | mov CTSTATE->cb.stack, rax
2740 | mov CARG2, rsp
2741 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2742 | mov CARG1, CTSTATE
2743 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2744 | // lua_State * returned in eax (RD).
2745 | set_vmstate INTERP
2746 | mov BASE, L:RD->base
2747 | mov RD, L:RD->top
2748 | sub RD, BASE
2749 | mov LFUNC:RB, [BASE-16]
2750 | cleartp LFUNC:RB
2751 | shr RD, 3
2752 | add RD, 1
2753 | ins_callt
2754 |.endif
2755 |
2756 |->cont_ffi_callback: // Return from FFI callback.
2757 |.if FFI
2758 | mov L:RA, SAVE_L
2759 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2760 | mov aword CTSTATE->L, L:RA
2761 | mov L:RA->base, BASE
2762 | mov L:RA->top, RB
2763 | mov CARG1, CTSTATE
2764 | mov CARG2, RC
2765 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2766 | mov rax, CTSTATE->cb.gpr[0]
2767 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2768 | jmp ->vm_leave_unw
2769 |.endif
2770 |
2771 |->vm_ffi_call: // Call C function via FFI.
2772 | // Caveat: needs special frame unwinding, see below.
2773 |.if FFI
2774 | .type CCSTATE, CCallState, rbx
2775 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2776 |
2777 | // Readjust stack.
2778 | mov eax, CCSTATE->spadj
2779 | sub rsp, rax
2780 |
2781 | // Copy stack slots.
2782 | movzx ecx, byte CCSTATE->nsp
2783 | sub ecx, 1
2784 | js >2
2785 |1:
2786 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2787 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2788 | sub ecx, 1
2789 | jns <1
2790 |2:
2791 |
2792 | movzx eax, byte CCSTATE->nfpr
2793 | mov CARG1, CCSTATE->gpr[0]
2794 | mov CARG2, CCSTATE->gpr[1]
2795 | mov CARG3, CCSTATE->gpr[2]
2796 | mov CARG4, CCSTATE->gpr[3]
2797 |.if not X64WIN
2798 | mov CARG5, CCSTATE->gpr[4]
2799 | mov CARG6, CCSTATE->gpr[5]
2800 |.endif
2801 | test eax, eax; jz >5
2802 | movaps xmm0, CCSTATE->fpr[0]
2803 | movaps xmm1, CCSTATE->fpr[1]
2804 | movaps xmm2, CCSTATE->fpr[2]
2805 | movaps xmm3, CCSTATE->fpr[3]
2806 |.if not X64WIN
2807 | cmp eax, 4; jbe >5
2808 | movaps xmm4, CCSTATE->fpr[4]
2809 | movaps xmm5, CCSTATE->fpr[5]
2810 | movaps xmm6, CCSTATE->fpr[6]
2811 | movaps xmm7, CCSTATE->fpr[7]
2812 |.endif
2813 |5:
2814 |
2815 | call aword CCSTATE->func
2816 |
2817 | mov CCSTATE->gpr[0], rax
2818 | movaps CCSTATE->fpr[0], xmm0
2819 |.if not X64WIN
2820 | mov CCSTATE->gpr[1], rdx
2821 | movaps CCSTATE->fpr[1], xmm1
2822 |.endif
2823 |
2824 | mov rbx, [rbp-8]; leave; ret
2825 |.endif
2826 |// Note: vm_ffi_call must be the last function in this object file!
2827 |
2828 |//-----------------------------------------------------------------------
2829}
2830
2831/* Generate the code for a single instruction. */
2832static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2833{
2834 int vk = 0;
2835 |// Note: aligning all instructions does not pay off.
2836 |=>defop:
2837
2838 switch (op) {
2839
2840 /* -- Comparison ops ---------------------------------------------------- */
2841
2842 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2843
2844 |.macro jmp_comp, lt, ge, le, gt, target
2845 ||switch (op) {
2846 ||case BC_ISLT:
2847 | lt target
2848 ||break;
2849 ||case BC_ISGE:
2850 | ge target
2851 ||break;
2852 ||case BC_ISLE:
2853 | le target
2854 ||break;
2855 ||case BC_ISGT:
2856 | gt target
2857 ||break;
2858 ||default: break; /* Shut up GCC. */
2859 ||}
2860 |.endmacro
2861
2862 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2863 | // RA = src1, RD = src2, JMP with RD = target
2864 | ins_AD
2865 | mov ITYPE, [BASE+RA*8]
2866 | mov RB, [BASE+RD*8]
2867 | mov RA, ITYPE
2868 | mov RD, RB
2869 | sar ITYPE, 47
2870 | sar RB, 47
2871 |.if DUALNUM
2872 | cmp ITYPEd, LJ_TISNUM; jne >7
2873 | cmp RBd, LJ_TISNUM; jne >8
2874 | add PC, 4
2875 | cmp RAd, RDd
2876 | jmp_comp jge, jl, jg, jle, >9
2877 |6:
2878 | movzx RDd, PC_RD
2879 | branchPC RD
2880 |9:
2881 | ins_next
2882 |
2883 |7: // RA is not an integer.
2884 | ja ->vmeta_comp
2885 | // RA is a number.
2886 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2887 | // RA is a number, RD is an integer.
2888 | cvtsi2sd xmm0, RDd
2889 | jmp >2
2890 |
2891 |8: // RA is an integer, RD is not an integer.
2892 | ja ->vmeta_comp
2893 | // RA is an integer, RD is a number.
2894 | cvtsi2sd xmm1, RAd
2895 | movd xmm0, RD
2896 | jmp >3
2897 |.else
2898 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2899 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2900 |.endif
2901 |1:
2902 | movd xmm0, RD
2903 |2:
2904 | movd xmm1, RA
2905 |3:
2906 | add PC, 4
2907 | ucomisd xmm0, xmm1
2908 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2909 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2910 |.if DUALNUM
2911 | jmp_comp jbe, ja, jb, jae, <9
2912 | jmp <6
2913 |.else
2914 | jmp_comp jbe, ja, jb, jae, >1
2915 | movzx RDd, PC_RD
2916 | branchPC RD
2917 |1:
2918 | ins_next
2919 |.endif
2920 break;
2921
2922 case BC_ISEQV: case BC_ISNEV:
2923 vk = op == BC_ISEQV;
2924 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2925 | mov RB, [BASE+RD*8]
2926 | mov ITYPE, [BASE+RA*8]
2927 | add PC, 4
2928 | mov RD, RB
2929 | mov RA, ITYPE
2930 | sar RB, 47
2931 | sar ITYPE, 47
2932 |.if DUALNUM
2933 | cmp RBd, LJ_TISNUM; jne >7
2934 | cmp ITYPEd, LJ_TISNUM; jne >8
2935 | cmp RDd, RAd
2936 if (vk) {
2937 | jne >9
2938 } else {
2939 | je >9
2940 }
2941 | movzx RDd, PC_RD
2942 | branchPC RD
2943 |9:
2944 | ins_next
2945 |
2946 |7: // RD is not an integer.
2947 | ja >5
2948 | // RD is a number.
2949 | movd xmm1, RD
2950 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2951 | // RD is a number, RA is an integer.
2952 | cvtsi2sd xmm0, RAd
2953 | jmp >2
2954 |
2955 |8: // RD is an integer, RA is not an integer.
2956 | ja >5
2957 | // RD is an integer, RA is a number.
2958 | cvtsi2sd xmm1, RDd
2959 | jmp >1
2960 |
2961 |.else
2962 | cmp RBd, LJ_TISNUM; jae >5
2963 | cmp ITYPEd, LJ_TISNUM; jae >5
2964 | movd xmm1, RD
2965 |.endif
2966 |1:
2967 | movd xmm0, RA
2968 |2:
2969 | ucomisd xmm0, xmm1
2970 |4:
2971 iseqne_fp:
2972 if (vk) {
2973 | jp >2 // Unordered means not equal.
2974 | jne >2
2975 } else {
2976 | jp >2 // Unordered means not equal.
2977 | je >1
2978 }
2979 iseqne_end:
2980 if (vk) {
2981 |1: // EQ: Branch to the target.
2982 | movzx RDd, PC_RD
2983 | branchPC RD
2984 |2: // NE: Fallthrough to next instruction.
2985 |.if not FFI
2986 |3:
2987 |.endif
2988 } else {
2989 |.if not FFI
2990 |3:
2991 |.endif
2992 |2: // NE: Branch to the target.
2993 | movzx RDd, PC_RD
2994 | branchPC RD
2995 |1: // EQ: Fallthrough to next instruction.
2996 }
2997 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2998 op == BC_ISEQN || op == BC_ISNEN)) {
2999 | jmp <9
3000 } else {
3001 | ins_next
3002 }
3003 |
3004 if (op == BC_ISEQV || op == BC_ISNEV) {
3005 |5: // Either or both types are not numbers.
3006 |.if FFI
3007 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3008 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
3009 |.endif
3010 | cmp RA, RD
3011 | je <1 // Same GCobjs or pvalues?
3012 | cmp RBd, ITYPEd
3013 | jne <2 // Not the same type?
3014 | cmp RBd, LJ_TISTABUD
3015 | ja <2 // Different objects and not table/ud?
3016 |
3017 | // Different tables or userdatas. Need to check __eq metamethod.
3018 | // Field metatable must be at same offset for GCtab and GCudata!
3019 | cleartp TAB:RA
3020 | mov TAB:RB, TAB:RA->metatable
3021 | test TAB:RB, TAB:RB
3022 | jz <2 // No metatable?
3023 | test byte TAB:RB->nomm, 1<<MM_eq
3024 | jnz <2 // Or 'no __eq' flag set?
3025 if (vk) {
3026 | xor RBd, RBd // ne = 0
3027 } else {
3028 | mov RBd, 1 // ne = 1
3029 }
3030 | jmp ->vmeta_equal // Handle __eq metamethod.
3031 } else {
3032 |.if FFI
3033 |3:
3034 | cmp ITYPEd, LJ_TCDATA
3035 if (LJ_DUALNUM && vk) {
3036 | jne <9
3037 } else {
3038 | jne <2
3039 }
3040 | jmp ->vmeta_equal_cd
3041 |.endif
3042 }
3043 break;
3044 case BC_ISEQS: case BC_ISNES:
3045 vk = op == BC_ISEQS;
3046 | ins_AND // RA = src, RD = str const, JMP with RD = target
3047 | mov RB, [BASE+RA*8]
3048 | add PC, 4
3049 | checkstr RB, >3
3050 | cmp RB, [KBASE+RD*8]
3051 iseqne_test:
3052 if (vk) {
3053 | jne >2
3054 } else {
3055 | je >1
3056 }
3057 goto iseqne_end;
3058 case BC_ISEQN: case BC_ISNEN:
3059 vk = op == BC_ISEQN;
3060 | ins_AD // RA = src, RD = num const, JMP with RD = target
3061 | mov RB, [BASE+RA*8]
3062 | add PC, 4
3063 |.if DUALNUM
3064 | checkint RB, >7
3065 | mov RD, [KBASE+RD*8]
3066 | checkint RD, >8
3067 | cmp RBd, RDd
3068 if (vk) {
3069 | jne >9
3070 } else {
3071 | je >9
3072 }
3073 | movzx RDd, PC_RD
3074 | branchPC RD
3075 |9:
3076 | ins_next
3077 |
3078 |7: // RA is not an integer.
3079 | ja >3
3080 | // RA is a number.
3081 | mov RD, [KBASE+RD*8]
3082 | checkint RD, >1
3083 | // RA is a number, RD is an integer.
3084 | cvtsi2sd xmm0, RDd
3085 | jmp >2
3086 |
3087 |8: // RA is an integer, RD is a number.
3088 | cvtsi2sd xmm0, RBd
3089 | movd xmm1, RD
3090 | ucomisd xmm0, xmm1
3091 | jmp >4
3092 |1:
3093 | movd xmm0, RD
3094 |.else
3095 | checknum RB, >3
3096 |1:
3097 | movsd xmm0, qword [KBASE+RD*8]
3098 |.endif
3099 |2:
3100 | ucomisd xmm0, qword [BASE+RA*8]
3101 |4:
3102 goto iseqne_fp;
3103 case BC_ISEQP: case BC_ISNEP:
3104 vk = op == BC_ISEQP;
3105 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3106 | mov RB, [BASE+RA*8]
3107 | sar RB, 47
3108 | add PC, 4
3109 | cmp RBd, RDd
3110 if (!LJ_HASFFI) goto iseqne_test;
3111 if (vk) {
3112 | jne >3
3113 | movzx RDd, PC_RD
3114 | branchPC RD
3115 |2:
3116 | ins_next
3117 |3:
3118 | cmp RBd, LJ_TCDATA; jne <2
3119 | jmp ->vmeta_equal_cd
3120 } else {
3121 | je >2
3122 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3123 | movzx RDd, PC_RD
3124 | branchPC RD
3125 |2:
3126 | ins_next
3127 }
3128 break;
3129
3130 /* -- Unary test and copy ops ------------------------------------------- */
3131
3132 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3133 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3134 | mov ITYPE, [BASE+RD*8]
3135 | add PC, 4
3136 if (op == BC_ISTC || op == BC_ISFC) {
3137 | mov RB, ITYPE
3138 }
3139 | sar ITYPE, 47
3140 | cmp ITYPEd, LJ_TISTRUECOND
3141 if (op == BC_IST || op == BC_ISTC) {
3142 | jae >1
3143 } else {
3144 | jb >1
3145 }
3146 if (op == BC_ISTC || op == BC_ISFC) {
3147 | mov [BASE+RA*8], RB
3148 }
3149 | movzx RDd, PC_RD
3150 | branchPC RD
3151 |1: // Fallthrough to the next instruction.
3152 | ins_next
3153 break;
3154
3155 case BC_ISTYPE:
3156 | ins_AD // RA = src, RD = -type
3157 | mov RB, [BASE+RA*8]
3158 | sar RB, 47
3159 | add RBd, RDd
3160 | jne ->vmeta_istype
3161 | ins_next
3162 break;
3163 case BC_ISNUM:
3164 | ins_AD // RA = src, RD = -(TISNUM-1)
3165 | checknumtp [BASE+RA*8], ->vmeta_istype
3166 | ins_next
3167 break;
3168
3169 /* -- Unary ops --------------------------------------------------------- */
3170
3171 case BC_MOV:
3172 | ins_AD // RA = dst, RD = src
3173 | mov RB, [BASE+RD*8]
3174 | mov [BASE+RA*8], RB
3175 | ins_next_
3176 break;
3177 case BC_NOT:
3178 | ins_AD // RA = dst, RD = src
3179 | mov RB, [BASE+RD*8]
3180 | sar RB, 47
3181 | mov RCd, 2
3182 | cmp RB, LJ_TISTRUECOND
3183 | sbb RCd, 0
3184 | shl RC, 47
3185 | not RC
3186 | mov [BASE+RA*8], RC
3187 | ins_next
3188 break;
3189 case BC_UNM:
3190 | ins_AD // RA = dst, RD = src
3191 | mov RB, [BASE+RD*8]
3192 |.if DUALNUM
3193 | checkint RB, >5
3194 | neg RBd
3195 | jo >4
3196 | setint RB
3197 |9:
3198 | mov [BASE+RA*8], RB
3199 | ins_next
3200 |4:
3201 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3202 | jmp <9
3203 |5:
3204 | ja ->vmeta_unm
3205 |.else
3206 | checknum RB, ->vmeta_unm
3207 |.endif
3208 | mov64 RD, U64x(80000000,00000000)
3209 | xor RB, RD
3210 |.if DUALNUM
3211 | jmp <9
3212 |.else
3213 | mov [BASE+RA*8], RB
3214 | ins_next
3215 |.endif
3216 break;
3217 case BC_LEN:
3218 | ins_AD // RA = dst, RD = src
3219 | mov RD, [BASE+RD*8]
3220 | checkstr RD, >2
3221 |.if DUALNUM
3222 | mov RDd, dword STR:RD->len
3223 |1:
3224 | setint RD
3225 | mov [BASE+RA*8], RD
3226 |.else
3227 | xorps xmm0, xmm0
3228 | cvtsi2sd xmm0, dword STR:RD->len
3229 |1:
3230 | movsd qword [BASE+RA*8], xmm0
3231 |.endif
3232 | ins_next
3233 |2:
3234 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3235 | mov TAB:CARG1, TAB:RD
3236#if LJ_52
3237 | mov TAB:RB, TAB:RD->metatable
3238 | cmp TAB:RB, 0
3239 | jnz >9
3240 |3:
3241#endif
3242 |->BC_LEN_Z:
3243 | mov RB, BASE // Save BASE.
3244 | call extern lj_tab_len // (GCtab *t)
3245 | // Length of table returned in eax (RD).
3246 |.if DUALNUM
3247 | // Nothing to do.
3248 |.else
3249 | cvtsi2sd xmm0, RDd
3250 |.endif
3251 | mov BASE, RB // Restore BASE.
3252 | movzx RAd, PC_RA
3253 | jmp <1
3254#if LJ_52
3255 |9: // Check for __len.
3256 | test byte TAB:RB->nomm, 1<<MM_len
3257 | jnz <3
3258 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3259#endif
3260 break;
3261
3262 /* -- Binary ops -------------------------------------------------------- */
3263
3264 |.macro ins_arithpre, sseins, ssereg
3265 | ins_ABC
3266 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3267 ||switch (vk) {
3268 ||case 0:
3269 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3270 | .if DUALNUM
3271 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3272 | .endif
3273 | movsd xmm0, qword [BASE+RB*8]
3274 | sseins ssereg, qword [KBASE+RC*8]
3275 || break;
3276 ||case 1:
3277 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3278 | .if DUALNUM
3279 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3280 | .endif
3281 | movsd xmm0, qword [KBASE+RC*8]
3282 | sseins ssereg, qword [BASE+RB*8]
3283 || break;
3284 ||default:
3285 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3286 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3287 | movsd xmm0, qword [BASE+RB*8]
3288 | sseins ssereg, qword [BASE+RC*8]
3289 || break;
3290 ||}
3291 |.endmacro
3292 |
3293 |.macro ins_arithdn, intins
3294 | ins_ABC
3295 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3296 ||switch (vk) {
3297 ||case 0:
3298 | mov RB, [BASE+RB*8]
3299 | mov RC, [KBASE+RC*8]
3300 | checkint RB, ->vmeta_arith_vno
3301 | checkint RC, ->vmeta_arith_vno
3302 | intins RBd, RCd; jo ->vmeta_arith_vno
3303 || break;
3304 ||case 1:
3305 | mov RB, [BASE+RB*8]
3306 | mov RC, [KBASE+RC*8]
3307 | checkint RB, ->vmeta_arith_nvo
3308 | checkint RC, ->vmeta_arith_nvo
3309 | intins RCd, RBd; jo ->vmeta_arith_nvo
3310 || break;
3311 ||default:
3312 | mov RB, [BASE+RB*8]
3313 | mov RC, [BASE+RC*8]
3314 | checkint RB, ->vmeta_arith_vvo
3315 | checkint RC, ->vmeta_arith_vvo
3316 | intins RBd, RCd; jo ->vmeta_arith_vvo
3317 || break;
3318 ||}
3319 ||if (vk == 1) {
3320 | setint RC
3321 | mov [BASE+RA*8], RC
3322 ||} else {
3323 | setint RB
3324 | mov [BASE+RA*8], RB
3325 ||}
3326 | ins_next
3327 |.endmacro
3328 |
3329 |.macro ins_arithpost
3330 | movsd qword [BASE+RA*8], xmm0
3331 |.endmacro
3332 |
3333 |.macro ins_arith, sseins
3334 | ins_arithpre sseins, xmm0
3335 | ins_arithpost
3336 | ins_next
3337 |.endmacro
3338 |
3339 |.macro ins_arith, intins, sseins
3340 |.if DUALNUM
3341 | ins_arithdn intins
3342 |.else
3343 | ins_arith, sseins
3344 |.endif
3345 |.endmacro
3346
3347 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3348 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3349 | ins_arith add, addsd
3350 break;
3351 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3352 | ins_arith sub, subsd
3353 break;
3354 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3355 | ins_arith imul, mulsd
3356 break;
3357 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3358 | ins_arith divsd
3359 break;
3360 case BC_MODVN:
3361 | ins_arithpre movsd, xmm1
3362 |->BC_MODVN_Z:
3363 | call ->vm_mod
3364 | ins_arithpost
3365 | ins_next
3366 break;
3367 case BC_MODNV: case BC_MODVV:
3368 | ins_arithpre movsd, xmm1
3369 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3370 break;
3371 case BC_POW:
3372 | ins_arithpre movsd, xmm1
3373 | mov RB, BASE
3374 | call extern pow
3375 | movzx RAd, PC_RA
3376 | mov BASE, RB
3377 | ins_arithpost
3378 | ins_next
3379 break;
3380
3381 case BC_CAT:
3382 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3383 | mov L:CARG1, SAVE_L
3384 | mov L:CARG1->base, BASE
3385 | lea CARG2, [BASE+RC*8]
3386 | mov CARG3d, RCd
3387 | sub CARG3d, RBd
3388 |->BC_CAT_Z:
3389 | mov L:RB, L:CARG1
3390 | mov SAVE_PC, PC
3391 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3392 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3393 | mov BASE, L:RB->base
3394 | test RC, RC
3395 | jnz ->vmeta_binop
3396 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3397 | movzx RAd, PC_RA
3398 | mov RC, [BASE+RB*8]
3399 | mov [BASE+RA*8], RC
3400 | ins_next
3401 break;
3402
3403 /* -- Constant ops ------------------------------------------------------ */
3404
3405 case BC_KSTR:
3406 | ins_AND // RA = dst, RD = str const (~)
3407 | mov RD, [KBASE+RD*8]
3408 | settp RD, LJ_TSTR
3409 | mov [BASE+RA*8], RD
3410 | ins_next
3411 break;
3412 case BC_KCDATA:
3413 |.if FFI
3414 | ins_AND // RA = dst, RD = cdata const (~)
3415 | mov RD, [KBASE+RD*8]
3416 | settp RD, LJ_TCDATA
3417 | mov [BASE+RA*8], RD
3418 | ins_next
3419 |.endif
3420 break;
3421 case BC_KSHORT:
3422 | ins_AD // RA = dst, RD = signed int16 literal
3423 |.if DUALNUM
3424 | movsx RDd, RDW
3425 | setint RD
3426 | mov [BASE+RA*8], RD
3427 |.else
3428 | movsx RDd, RDW // Sign-extend literal.
3429 | cvtsi2sd xmm0, RDd
3430 | movsd qword [BASE+RA*8], xmm0
3431 |.endif
3432 | ins_next
3433 break;
3434 case BC_KNUM:
3435 | ins_AD // RA = dst, RD = num const
3436 | movsd xmm0, qword [KBASE+RD*8]
3437 | movsd qword [BASE+RA*8], xmm0
3438 | ins_next
3439 break;
3440 case BC_KPRI:
3441 | ins_AD // RA = dst, RD = primitive type (~)
3442 | shl RD, 47
3443 | not RD
3444 | mov [BASE+RA*8], RD
3445 | ins_next
3446 break;
3447 case BC_KNIL:
3448 | ins_AD // RA = dst_start, RD = dst_end
3449 | lea RA, [BASE+RA*8+8]
3450 | lea RD, [BASE+RD*8]
3451 | mov RB, LJ_TNIL
3452 | mov [RA-8], RB // Sets minimum 2 slots.
3453 |1:
3454 | mov [RA], RB
3455 | add RA, 8
3456 | cmp RA, RD
3457 | jbe <1
3458 | ins_next
3459 break;
3460
3461 /* -- Upvalue and function ops ------------------------------------------ */
3462
3463 case BC_UGET:
3464 | ins_AD // RA = dst, RD = upvalue #
3465 | mov LFUNC:RB, [BASE-16]
3466 | cleartp LFUNC:RB
3467 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3468 | mov RB, UPVAL:RB->v
3469 | mov RD, [RB]
3470 | mov [BASE+RA*8], RD
3471 | ins_next
3472 break;
3473 case BC_USETV:
3474#define TV2MARKOFS \
3475 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3476 | ins_AD // RA = upvalue #, RD = src
3477 | mov LFUNC:RB, [BASE-16]
3478 | cleartp LFUNC:RB
3479 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3480 | cmp byte UPVAL:RB->closed, 0
3481 | mov RB, UPVAL:RB->v
3482 | mov RA, [BASE+RD*8]
3483 | mov [RB], RA
3484 | jz >1
3485 | // Check barrier for closed upvalue.
3486 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3487 | jnz >2
3488 |1:
3489 | ins_next
3490 |
3491 |2: // Upvalue is black. Check if new value is collectable and white.
3492 | mov RD, RA
3493 | sar RD, 47
3494 | sub RDd, LJ_TISGCV
3495 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3496 | jbe <1
3497 | cleartp GCOBJ:RA
3498 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3499 | jz <1
3500 | // Crossed a write barrier. Move the barrier forward.
3501 |.if not X64WIN
3502 | mov CARG2, RB
3503 | mov RB, BASE // Save BASE.
3504 |.else
3505 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3506 |.endif
3507 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3508 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3509 | mov BASE, RB // Restore BASE.
3510 | jmp <1
3511 break;
3512#undef TV2MARKOFS
3513 case BC_USETS:
3514 | ins_AND // RA = upvalue #, RD = str const (~)
3515 | mov LFUNC:RB, [BASE-16]
3516 | cleartp LFUNC:RB
3517 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3518 | mov STR:RA, [KBASE+RD*8]
3519 | mov RD, UPVAL:RB->v
3520 | settp STR:ITYPE, STR:RA, LJ_TSTR
3521 | mov [RD], STR:ITYPE
3522 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3523 | jnz >2
3524 |1:
3525 | ins_next
3526 |
3527 |2: // Check if string is white and ensure upvalue is closed.
3528 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3529 | jz <1
3530 | cmp byte UPVAL:RB->closed, 0
3531 | jz <1
3532 | // Crossed a write barrier. Move the barrier forward.
3533 | mov RB, BASE // Save BASE (CARG2 == BASE).
3534 | mov CARG2, RD
3535 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3536 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3537 | mov BASE, RB // Restore BASE.
3538 | jmp <1
3539 break;
3540 case BC_USETN:
3541 | ins_AD // RA = upvalue #, RD = num const
3542 | mov LFUNC:RB, [BASE-16]
3543 | cleartp LFUNC:RB
3544 | movsd xmm0, qword [KBASE+RD*8]
3545 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3546 | mov RA, UPVAL:RB->v
3547 | movsd qword [RA], xmm0
3548 | ins_next
3549 break;
3550 case BC_USETP:
3551 | ins_AD // RA = upvalue #, RD = primitive type (~)
3552 | mov LFUNC:RB, [BASE-16]
3553 | cleartp LFUNC:RB
3554 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3555 | shl RD, 47
3556 | not RD
3557 | mov RA, UPVAL:RB->v
3558 | mov [RA], RD
3559 | ins_next
3560 break;
3561 case BC_UCLO:
3562 | ins_AD // RA = level, RD = target
3563 | branchPC RD // Do this first to free RD.
3564 | mov L:RB, SAVE_L
3565 | cmp aword L:RB->openupval, 0
3566 | je >1
3567 | mov L:RB->base, BASE
3568 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3569 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3570 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3571 | mov BASE, L:RB->base
3572 |1:
3573 | ins_next
3574 break;
3575
3576 case BC_FNEW:
3577 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3578 | mov L:RB, SAVE_L
3579 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3580 | mov CARG3, [BASE-16]
3581 | cleartp CARG3
3582 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3583 | mov CARG1, L:RB
3584 | mov SAVE_PC, PC
3585 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3586 | call extern lj_func_newL_gc
3587 | // GCfuncL * returned in eax (RC).
3588 | mov BASE, L:RB->base
3589 | movzx RAd, PC_RA
3590 | settp LFUNC:RC, LJ_TFUNC
3591 | mov [BASE+RA*8], LFUNC:RC
3592 | ins_next
3593 break;
3594
3595 /* -- Table ops --------------------------------------------------------- */
3596
3597 case BC_TNEW:
3598 | ins_AD // RA = dst, RD = hbits|asize
3599 | mov L:RB, SAVE_L
3600 | mov L:RB->base, BASE
3601 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3602 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3603 | mov SAVE_PC, PC
3604 | jae >5
3605 |1:
3606 | mov CARG3d, RDd
3607 | and RDd, 0x7ff
3608 | shr CARG3d, 11
3609 | cmp RDd, 0x7ff
3610 | je >3
3611 |2:
3612 | mov L:CARG1, L:RB
3613 | mov CARG2d, RDd
3614 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3615 | // Table * returned in eax (RC).
3616 | mov BASE, L:RB->base
3617 | movzx RAd, PC_RA
3618 | settp TAB:RC, LJ_TTAB
3619 | mov [BASE+RA*8], TAB:RC
3620 | ins_next
3621 |3: // Turn 0x7ff into 0x801.
3622 | mov RDd, 0x801
3623 | jmp <2
3624 |5:
3625 | mov L:CARG1, L:RB
3626 | call extern lj_gc_step_fixtop // (lua_State *L)
3627 | movzx RDd, PC_RD
3628 | jmp <1
3629 break;
3630 case BC_TDUP:
3631 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3632 | mov L:RB, SAVE_L
3633 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3634 | mov SAVE_PC, PC
3635 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3636 | mov L:RB->base, BASE
3637 | jae >3
3638 |2:
3639 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3640 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3641 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3642 | // Table * returned in eax (RC).
3643 | mov BASE, L:RB->base
3644 | movzx RAd, PC_RA
3645 | settp TAB:RC, LJ_TTAB
3646 | mov [BASE+RA*8], TAB:RC
3647 | ins_next
3648 |3:
3649 | mov L:CARG1, L:RB
3650 | call extern lj_gc_step_fixtop // (lua_State *L)
3651 | movzx RDd, PC_RD // Need to reload RD.
3652 | not RD
3653 | jmp <2
3654 break;
3655
3656 case BC_GGET:
3657 | ins_AND // RA = dst, RD = str const (~)
3658 | mov LFUNC:RB, [BASE-16]
3659 | cleartp LFUNC:RB
3660 | mov TAB:RB, LFUNC:RB->env
3661 | mov STR:RC, [KBASE+RD*8]
3662 | jmp ->BC_TGETS_Z
3663 break;
3664 case BC_GSET:
3665 | ins_AND // RA = src, RD = str const (~)
3666 | mov LFUNC:RB, [BASE-16]
3667 | cleartp LFUNC:RB
3668 | mov TAB:RB, LFUNC:RB->env
3669 | mov STR:RC, [KBASE+RD*8]
3670 | jmp ->BC_TSETS_Z
3671 break;
3672
3673 case BC_TGETV:
3674 | ins_ABC // RA = dst, RB = table, RC = key
3675 | mov TAB:RB, [BASE+RB*8]
3676 | mov RC, [BASE+RC*8]
3677 | checktab TAB:RB, ->vmeta_tgetv
3678 |
3679 | // Integer key?
3680 |.if DUALNUM
3681 | checkint RC, >5
3682 |.else
3683 | // Convert number to int and back and compare.
3684 | checknum RC, >5
3685 | movd xmm0, RC
3686 | cvttsd2si RCd, xmm0
3687 | cvtsi2sd xmm1, RCd
3688 | ucomisd xmm0, xmm1
3689 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3690 |.endif
3691 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3692 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3693 | shl RCd, 3
3694 | add RC, TAB:RB->array
3695 | // Get array slot.
3696 | mov ITYPE, [RC]
3697 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3698 | je >2
3699 |1:
3700 | mov [BASE+RA*8], ITYPE
3701 | ins_next
3702 |
3703 |2: // Check for __index if table value is nil.
3704 | mov TAB:TMPR, TAB:RB->metatable
3705 | test TAB:TMPR, TAB:TMPR
3706 | jz <1
3707 | test byte TAB:TMPR->nomm, 1<<MM_index
3708 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3709 | jmp <1
3710 |
3711 |5: // String key?
3712 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3713 | cleartp STR:RC
3714 | jmp ->BC_TGETS_Z
3715 break;
3716 case BC_TGETS:
3717 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3718 | mov TAB:RB, [BASE+RB*8]
3719 | not RC
3720 | mov STR:RC, [KBASE+RC*8]
3721 | checktab TAB:RB, ->vmeta_tgets
3722 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3723 | mov TMPRd, TAB:RB->hmask
3724 | and TMPRd, STR:RC->sid
3725 | imul TMPRd, #NODE
3726 | add NODE:TMPR, TAB:RB->node
3727 | settp ITYPE, STR:RC, LJ_TSTR
3728 |1:
3729 | cmp NODE:TMPR->key, ITYPE
3730 | jne >4
3731 | // Get node value.
3732 | mov ITYPE, NODE:TMPR->val
3733 | cmp ITYPE, LJ_TNIL
3734 | je >5 // Key found, but nil value?
3735 |2:
3736 | mov [BASE+RA*8], ITYPE
3737 | ins_next
3738 |
3739 |4: // Follow hash chain.
3740 | mov NODE:TMPR, NODE:TMPR->next
3741 | test NODE:TMPR, NODE:TMPR
3742 | jnz <1
3743 | // End of hash chain: key not found, nil result.
3744 | mov ITYPE, LJ_TNIL
3745 |
3746 |5: // Check for __index if table value is nil.
3747 | mov TAB:TMPR, TAB:RB->metatable
3748 | test TAB:TMPR, TAB:TMPR
3749 | jz <2 // No metatable: done.
3750 | test byte TAB:TMPR->nomm, 1<<MM_index
3751 | jnz <2 // 'no __index' flag set: done.
3752 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3753 break;
3754 case BC_TGETB:
3755 | ins_ABC // RA = dst, RB = table, RC = byte literal
3756 | mov TAB:RB, [BASE+RB*8]
3757 | checktab TAB:RB, ->vmeta_tgetb
3758 | cmp RCd, TAB:RB->asize
3759 | jae ->vmeta_tgetb
3760 | shl RCd, 3
3761 | add RC, TAB:RB->array
3762 | // Get array slot.
3763 | mov ITYPE, [RC]
3764 | cmp ITYPE, LJ_TNIL
3765 | je >2
3766 |1:
3767 | mov [BASE+RA*8], ITYPE
3768 | ins_next
3769 |
3770 |2: // Check for __index if table value is nil.
3771 | mov TAB:TMPR, TAB:RB->metatable
3772 | test TAB:TMPR, TAB:TMPR
3773 | jz <1
3774 | test byte TAB:TMPR->nomm, 1<<MM_index
3775 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3776 | jmp <1
3777 break;
3778 case BC_TGETR:
3779 | ins_ABC // RA = dst, RB = table, RC = key
3780 | mov TAB:RB, [BASE+RB*8]
3781 | cleartp TAB:RB
3782 |.if DUALNUM
3783 | mov RCd, dword [BASE+RC*8]
3784 |.else
3785 | cvttsd2si RCd, qword [BASE+RC*8]
3786 |.endif
3787 | cmp RCd, TAB:RB->asize
3788 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3789 | shl RCd, 3
3790 | add RC, TAB:RB->array
3791 | // Get array slot.
3792 |->BC_TGETR_Z:
3793 | mov ITYPE, [RC]
3794 |->BC_TGETR2_Z:
3795 | mov [BASE+RA*8], ITYPE
3796 | ins_next
3797 break;
3798
3799 case BC_TSETV:
3800 | ins_ABC // RA = src, RB = table, RC = key
3801 | mov TAB:RB, [BASE+RB*8]
3802 | mov RC, [BASE+RC*8]
3803 | checktab TAB:RB, ->vmeta_tsetv
3804 |
3805 | // Integer key?
3806 |.if DUALNUM
3807 | checkint RC, >5
3808 |.else
3809 | // Convert number to int and back and compare.
3810 | checknum RC, >5
3811 | movd xmm0, RC
3812 | cvttsd2si RCd, xmm0
3813 | cvtsi2sd xmm1, RCd
3814 | ucomisd xmm0, xmm1
3815 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3816 |.endif
3817 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3818 | jae ->vmeta_tsetv
3819 | shl RCd, 3
3820 | add RC, TAB:RB->array
3821 | cmp aword [RC], LJ_TNIL
3822 | je >3 // Previous value is nil?
3823 |1:
3824 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3825 | jnz >7
3826 |2: // Set array slot.
3827 | mov RB, [BASE+RA*8]
3828 | mov [RC], RB
3829 | ins_next
3830 |
3831 |3: // Check for __newindex if previous value is nil.
3832 | mov TAB:TMPR, TAB:RB->metatable
3833 | test TAB:TMPR, TAB:TMPR
3834 | jz <1
3835 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3836 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3837 | jmp <1
3838 |
3839 |5: // String key?
3840 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3841 | cleartp STR:RC
3842 | jmp ->BC_TSETS_Z
3843 |
3844 |7: // Possible table write barrier for the value. Skip valiswhite check.
3845 | barrierback TAB:RB, TMPR
3846 | jmp <2
3847 break;
3848 case BC_TSETS:
3849 | ins_ABC // RA = src, RB = table, RC = str const (~)
3850 | mov TAB:RB, [BASE+RB*8]
3851 | not RC
3852 | mov STR:RC, [KBASE+RC*8]
3853 | checktab TAB:RB, ->vmeta_tsets
3854 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3855 | mov TMPRd, TAB:RB->hmask
3856 | and TMPRd, STR:RC->sid
3857 | imul TMPRd, #NODE
3858 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3859 | add NODE:TMPR, TAB:RB->node
3860 | settp ITYPE, STR:RC, LJ_TSTR
3861 |1:
3862 | cmp NODE:TMPR->key, ITYPE
3863 | jne >5
3864 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3865 | cmp aword [TMPR], LJ_TNIL
3866 | je >4 // Previous value is nil?
3867 |2:
3868 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3869 | jnz >7
3870 |3: // Set node value.
3871 | mov ITYPE, [BASE+RA*8]
3872 | mov [TMPR], ITYPE
3873 | ins_next
3874 |
3875 |4: // Check for __newindex if previous value is nil.
3876 | mov TAB:ITYPE, TAB:RB->metatable
3877 | test TAB:ITYPE, TAB:ITYPE
3878 | jz <2
3879 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3880 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3881 | jmp <2
3882 |
3883 |5: // Follow hash chain.
3884 | mov NODE:TMPR, NODE:TMPR->next
3885 | test NODE:TMPR, NODE:TMPR
3886 | jnz <1
3887 | // End of hash chain: key not found, add a new one.
3888 |
3889 | // But check for __newindex first.
3890 | mov TAB:TMPR, TAB:RB->metatable
3891 | test TAB:TMPR, TAB:TMPR
3892 | jz >6 // No metatable: continue.
3893 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3894 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3895 |6:
3896 | mov TMP1, ITYPE
3897 | mov L:CARG1, SAVE_L
3898 | mov L:CARG1->base, BASE
3899 | lea CARG3, TMP1
3900 | mov CARG2, TAB:RB
3901 | mov SAVE_PC, PC
3902 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3903 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3904 | mov L:CARG1, SAVE_L
3905 | mov BASE, L:CARG1->base
3906 | mov TMPR, rax
3907 | movzx RAd, PC_RA
3908 | jmp <2 // Must check write barrier for value.
3909 |
3910 |7: // Possible table write barrier for the value. Skip valiswhite check.
3911 | barrierback TAB:RB, ITYPE
3912 | jmp <3
3913 break;
3914 case BC_TSETB:
3915 | ins_ABC // RA = src, RB = table, RC = byte literal
3916 | mov TAB:RB, [BASE+RB*8]
3917 | checktab TAB:RB, ->vmeta_tsetb
3918 | cmp RCd, TAB:RB->asize
3919 | jae ->vmeta_tsetb
3920 | shl RCd, 3
3921 | add RC, TAB:RB->array
3922 | cmp aword [RC], LJ_TNIL
3923 | je >3 // Previous value is nil?
3924 |1:
3925 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3926 | jnz >7
3927 |2: // Set array slot.
3928 | mov ITYPE, [BASE+RA*8]
3929 | mov [RC], ITYPE
3930 | ins_next
3931 |
3932 |3: // Check for __newindex if previous value is nil.
3933 | mov TAB:TMPR, TAB:RB->metatable
3934 | test TAB:TMPR, TAB:TMPR
3935 | jz <1
3936 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3937 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3938 | jmp <1
3939 |
3940 |7: // Possible table write barrier for the value. Skip valiswhite check.
3941 | barrierback TAB:RB, TMPR
3942 | jmp <2
3943 break;
3944 case BC_TSETR:
3945 | ins_ABC // RA = src, RB = table, RC = key
3946 | mov TAB:RB, [BASE+RB*8]
3947 | cleartp TAB:RB
3948 |.if DUALNUM
3949 | mov RC, [BASE+RC*8]
3950 |.else
3951 | cvttsd2si RCd, qword [BASE+RC*8]
3952 |.endif
3953 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3954 | jnz >7
3955 |2:
3956 | cmp RCd, TAB:RB->asize
3957 | jae ->vmeta_tsetr
3958 | shl RCd, 3
3959 | add RC, TAB:RB->array
3960 | // Set array slot.
3961 |->BC_TSETR_Z:
3962 | mov ITYPE, [BASE+RA*8]
3963 | mov [RC], ITYPE
3964 | ins_next
3965 |
3966 |7: // Possible table write barrier for the value. Skip valiswhite check.
3967 | barrierback TAB:RB, TMPR
3968 | jmp <2
3969 break;
3970
3971 case BC_TSETM:
3972 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3973 |1:
3974 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3975 | lea RA, [BASE+RA*8]
3976 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3977 | cleartp TAB:RB
3978 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3979 | jnz >7
3980 |2:
3981 | mov RDd, MULTRES
3982 | sub RDd, 1
3983 | jz >4 // Nothing to copy?
3984 | add RDd, TMPRd // Compute needed size.
3985 | cmp RDd, TAB:RB->asize
3986 | ja >5 // Doesn't fit into array part?
3987 | sub RDd, TMPRd
3988 | shl TMPRd, 3
3989 | add TMPR, TAB:RB->array
3990 |3: // Copy result slots to table.
3991 | mov RB, [RA]
3992 | add RA, 8
3993 | mov [TMPR], RB
3994 | add TMPR, 8
3995 | sub RDd, 1
3996 | jnz <3
3997 |4:
3998 | ins_next
3999 |
4000 |5: // Need to resize array part.
4001 | mov L:CARG1, SAVE_L
4002 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
4003 | mov CARG2, TAB:RB
4004 | mov CARG3d, RDd
4005 | mov L:RB, L:CARG1
4006 | mov SAVE_PC, PC
4007 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
4008 | mov BASE, L:RB->base
4009 | movzx RAd, PC_RA // Restore RA.
4010 | movzx RDd, PC_RD // Restore RD.
4011 | jmp <1 // Retry.
4012 |
4013 |7: // Possible table write barrier for any value. Skip valiswhite check.
4014 | barrierback TAB:RB, RD
4015 | jmp <2
4016 break;
4017
4018 /* -- Calls and vararg handling ----------------------------------------- */
4019
4020 case BC_CALL: case BC_CALLM:
4021 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
4022 if (op == BC_CALLM) {
4023 | add NARGS:RDd, MULTRES
4024 }
4025 | mov LFUNC:RB, [BASE+RA*8]
4026 | checkfunc LFUNC:RB, ->vmeta_call_ra
4027 | lea BASE, [BASE+RA*8+16]
4028 | ins_call
4029 break;
4030
4031 case BC_CALLMT:
4032 | ins_AD // RA = base, RD = extra_nargs
4033 | add NARGS:RDd, MULTRES
4034 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
4035 break;
4036 case BC_CALLT:
4037 | ins_AD // RA = base, RD = nargs+1
4038 | lea RA, [BASE+RA*8+16]
4039 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
4040 | mov LFUNC:RB, [RA-16]
4041 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
4042 |->BC_CALLT_Z:
4043 | mov PC, [BASE-8]
4044 | test PCd, FRAME_TYPE
4045 | jnz >7
4046 |1:
4047 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4048 | mov MULTRES, NARGS:RDd
4049 | sub NARGS:RDd, 1
4050 | jz >3
4051 |2: // Move args down.
4052 | mov RB, [RA]
4053 | add RA, 8
4054 | mov [KBASE], RB
4055 | add KBASE, 8
4056 | sub NARGS:RDd, 1
4057 | jnz <2
4058 |
4059 | mov LFUNC:RB, [BASE-16]
4060 |3:
4061 | cleartp LFUNC:RB
4062 | mov NARGS:RDd, MULTRES
4063 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4064 | ja >5
4065 |4:
4066 | ins_callt
4067 |
4068 |5: // Tailcall to a fast function.
4069 | test PCd, FRAME_TYPE // Lua frame below?
4070 | jnz <4
4071 | movzx RAd, PC_RA
4072 | neg RA
4073 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4074 | cleartp LFUNC:KBASE
4075 | mov KBASE, LFUNC:KBASE->pc
4076 | mov KBASE, [KBASE+PC2PROTO(k)]
4077 | jmp <4
4078 |
4079 |7: // Tailcall from a vararg function.
4080 | sub PC, FRAME_VARG
4081 | test PCd, FRAME_TYPEP
4082 | jnz >8 // Vararg frame below?
4083 | sub BASE, PC // Need to relocate BASE/KBASE down.
4084 | mov KBASE, BASE
4085 | mov PC, [BASE-8]
4086 | jmp <1
4087 |8:
4088 | add PCd, FRAME_VARG
4089 | jmp <1
4090 break;
4091
4092 case BC_ITERC:
4093 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4094 | lea RA, [BASE+RA*8+16] // fb = base+2
4095 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4096 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4097 | mov [RA], RB
4098 | mov [RA+8], RC
4099 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
4100 | mov [RA-16], LFUNC:RB
4101 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4102 | checkfunc LFUNC:RB, ->vmeta_call
4103 | mov BASE, RA
4104 | ins_call
4105 break;
4106
4107 case BC_ITERN:
4108 |.if JIT
4109 | hotloop RBd
4110 |.endif
4111 |->vm_IITERN:
4112 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4113 | mov TAB:RB, [BASE+RA*8-16]
4114 | cleartp TAB:RB
4115 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4116 | mov TMPRd, TAB:RB->asize
4117 | add PC, 4
4118 | mov ITYPE, TAB:RB->array
4119 |1: // Traverse array part.
4120 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4121 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4122 |.if not DUALNUM
4123 | cvtsi2sd xmm0, RCd
4124 |.endif
4125 | // Copy array slot to returned value.
4126 | mov RB, [ITYPE+RC*8]
4127 | mov [BASE+RA*8+8], RB
4128 | // Return array index as a numeric key.
4129 |.if DUALNUM
4130 | setint ITYPE, RC
4131 | mov [BASE+RA*8], ITYPE
4132 |.else
4133 | movsd qword [BASE+RA*8], xmm0
4134 |.endif
4135 | add RCd, 1
4136 | mov [BASE+RA*8-8], RCd // Update control var.
4137 |2:
4138 | movzx RDd, PC_RD // Get target from ITERL.
4139 | branchPC RD
4140 |3:
4141 | ins_next
4142 |
4143 |4: // Skip holes in array part.
4144 | add RCd, 1
4145 | jmp <1
4146 |
4147 |5: // Traverse hash part.
4148 | sub RCd, TMPRd
4149 |6:
4150 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4151 | imul ITYPEd, RCd, #NODE
4152 | add NODE:ITYPE, TAB:RB->node
4153 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4154 | lea TMPRd, [RCd+TMPRd+1]
4155 | // Copy key and value from hash slot.
4156 | mov RB, NODE:ITYPE->key
4157 | mov RC, NODE:ITYPE->val
4158 | mov [BASE+RA*8], RB
4159 | mov [BASE+RA*8+8], RC
4160 | mov [BASE+RA*8-8], TMPRd
4161 | jmp <2
4162 |
4163 |7: // Skip holes in hash part.
4164 | add RCd, 1
4165 | jmp <6
4166 break;
4167
4168 case BC_ISNEXT:
4169 | ins_AD // RA = base, RD = target (points to ITERN)
4170 | mov CFUNC:RB, [BASE+RA*8-24]
4171 | checkfunc CFUNC:RB, >5
4172 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4173 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4174 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4175 | branchPC RD
4176 | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32)
4177 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4178 |1:
4179 | ins_next
4180 |5: // Despecialize bytecode if any of the checks fail.
4181 | mov PC_OP, BC_JMP
4182 | branchPC RD
4183 |.if JIT
4184 | cmp byte [PC], BC_ITERN
4185 | jne >6
4186 |.endif
4187 | mov byte [PC], BC_ITERC
4188 | jmp <1
4189 |.if JIT
4190 |6: // Unpatch JLOOP.
4191 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4192 | movzx RCd, word [PC+2]
4193 | mov TRACE:RA, [RA+RC*8]
4194 | mov eax, TRACE:RA->startins
4195 | mov al, BC_ITERC
4196 | mov dword [PC], eax
4197 | jmp <1
4198 |.endif
4199 break;
4200
4201 case BC_VARG:
4202 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4203 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4204 | lea RA, [BASE+RA*8]
4205 | sub TMPR, [BASE-8]
4206 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4207 | test RB, RB
4208 | jz >5 // Copy all varargs?
4209 | lea RB, [RA+RB*8-8]
4210 | cmp TMPR, BASE // No vararg slots?
4211 | jnb >2
4212 |1: // Copy vararg slots to destination slots.
4213 | mov RC, [TMPR-16]
4214 | add TMPR, 8
4215 | mov [RA], RC
4216 | add RA, 8
4217 | cmp RA, RB // All destination slots filled?
4218 | jnb >3
4219 | cmp TMPR, BASE // No more vararg slots?
4220 | jb <1
4221 |2: // Fill up remainder with nil.
4222 | mov aword [RA], LJ_TNIL
4223 | add RA, 8
4224 | cmp RA, RB
4225 | jb <2
4226 |3:
4227 | ins_next
4228 |
4229 |5: // Copy all varargs.
4230 | mov MULTRES, 1 // MULTRES = 0+1
4231 | mov RC, BASE
4232 | sub RC, TMPR
4233 | jbe <3 // No vararg slots?
4234 | mov RBd, RCd
4235 | shr RBd, 3
4236 | add RBd, 1
4237 | mov MULTRES, RBd // MULTRES = #varargs+1
4238 | mov L:RB, SAVE_L
4239 | add RC, RA
4240 | cmp RC, L:RB->maxstack
4241 | ja >7 // Need to grow stack?
4242 |6: // Copy all vararg slots.
4243 | mov RC, [TMPR-16]
4244 | add TMPR, 8
4245 | mov [RA], RC
4246 | add RA, 8
4247 | cmp TMPR, BASE // No more vararg slots?
4248 | jb <6
4249 | jmp <3
4250 |
4251 |7: // Grow stack for varargs.
4252 | mov L:RB->base, BASE
4253 | mov L:RB->top, RA
4254 | mov SAVE_PC, PC
4255 | sub TMPR, BASE // Need delta, because BASE may change.
4256 | mov TMP1hi, TMPRd
4257 | mov CARG2d, MULTRES
4258 | sub CARG2d, 1
4259 | mov CARG1, L:RB
4260 | call extern lj_state_growstack // (lua_State *L, int n)
4261 | mov BASE, L:RB->base
4262 | movsxd TMPR, TMP1hi
4263 | mov RA, L:RB->top
4264 | add TMPR, BASE
4265 | jmp <6
4266 break;
4267
4268 /* -- Returns ----------------------------------------------------------- */
4269
4270 case BC_RETM:
4271 | ins_AD // RA = results, RD = extra_nresults
4272 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4273 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4274 break;
4275
4276 case BC_RET: case BC_RET0: case BC_RET1:
4277 | ins_AD // RA = results, RD = nresults+1
4278 if (op != BC_RET0) {
4279 | shl RAd, 3
4280 }
4281 |1:
4282 | mov PC, [BASE-8]
4283 | mov MULTRES, RDd // Save nresults+1.
4284 | test PCd, FRAME_TYPE // Check frame type marker.
4285 | jnz >7 // Not returning to a fixarg Lua func?
4286 switch (op) {
4287 case BC_RET:
4288 |->BC_RET_Z:
4289 | mov KBASE, BASE // Use KBASE for result move.
4290 | sub RDd, 1
4291 | jz >3
4292 |2: // Move results down.
4293 | mov RB, [KBASE+RA]
4294 | mov [KBASE-16], RB
4295 | add KBASE, 8
4296 | sub RDd, 1
4297 | jnz <2
4298 |3:
4299 | mov RDd, MULTRES // Note: MULTRES may be >255.
4300 | movzx RBd, PC_RB // So cannot compare with RDL!
4301 |5:
4302 | cmp RBd, RDd // More results expected?
4303 | ja >6
4304 break;
4305 case BC_RET1:
4306 | mov RB, [BASE+RA]
4307 | mov [BASE-16], RB
4308 /* fallthrough */
4309 case BC_RET0:
4310 |5:
4311 | cmp PC_RB, RDL // More results expected?
4312 | ja >6
4313 default:
4314 break;
4315 }
4316 | movzx RAd, PC_RA
4317 | neg RA
4318 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4319 | mov LFUNC:KBASE, [BASE-16]
4320 | cleartp LFUNC:KBASE
4321 | mov KBASE, LFUNC:KBASE->pc
4322 | mov KBASE, [KBASE+PC2PROTO(k)]
4323 | ins_next
4324 |
4325 |6: // Fill up results with nil.
4326 if (op == BC_RET) {
4327 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4328 | add KBASE, 8
4329 } else {
4330 | mov aword [BASE+RD*8-24], LJ_TNIL
4331 }
4332 | add RD, 1
4333 | jmp <5
4334 |
4335 |7: // Non-standard return case.
4336 | lea RB, [PC-FRAME_VARG]
4337 | test RBd, FRAME_TYPEP
4338 | jnz ->vm_return
4339 | // Return from vararg function: relocate BASE down and RA up.
4340 | sub BASE, RB
4341 if (op != BC_RET0) {
4342 | add RA, RB
4343 }
4344 | jmp <1
4345 break;
4346
4347 /* -- Loops and branches ------------------------------------------------ */
4348
4349 |.define FOR_IDX, [RA]
4350 |.define FOR_STOP, [RA+8]
4351 |.define FOR_STEP, [RA+16]
4352 |.define FOR_EXT, [RA+24]
4353
4354 case BC_FORL:
4355 |.if JIT
4356 | hotloop RBd
4357 |.endif
4358 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4359 break;
4360
4361 case BC_JFORI:
4362 case BC_JFORL:
4363#if !LJ_HASJIT
4364 break;
4365#endif
4366 case BC_FORI:
4367 case BC_IFORL:
4368 vk = (op == BC_IFORL || op == BC_JFORL);
4369 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4370 | lea RA, [BASE+RA*8]
4371 if (LJ_DUALNUM) {
4372 | mov RB, FOR_IDX
4373 | checkint RB, >9
4374 | mov TMPR, FOR_STOP
4375 if (!vk) {
4376 | checkint TMPR, ->vmeta_for
4377 | mov ITYPE, FOR_STEP
4378 | test ITYPEd, ITYPEd; js >5
4379 | sar ITYPE, 47;
4380 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4381 } else {
4382#ifdef LUA_USE_ASSERT
4383 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4384 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4385#endif
4386 | mov ITYPE, FOR_STEP
4387 | test ITYPEd, ITYPEd; js >5
4388 | add RBd, ITYPEd; jo >1
4389 | setint RB
4390 | mov FOR_IDX, RB
4391 }
4392 | cmp RBd, TMPRd
4393 | mov FOR_EXT, RB
4394 if (op == BC_FORI) {
4395 | jle >7
4396 |1:
4397 |6:
4398 | branchPC RD
4399 } else if (op == BC_JFORI) {
4400 | branchPC RD
4401 | movzx RDd, PC_RD
4402 | jle =>BC_JLOOP
4403 |1:
4404 |6:
4405 } else if (op == BC_IFORL) {
4406 | jg >7
4407 |6:
4408 | branchPC RD
4409 |1:
4410 } else {
4411 | jle =>BC_JLOOP
4412 |1:
4413 |6:
4414 }
4415 |7:
4416 | ins_next
4417 |
4418 |5: // Invert check for negative step.
4419 if (!vk) {
4420 | sar ITYPE, 47;
4421 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4422 } else {
4423 | add RBd, ITYPEd; jo <1
4424 | setint RB
4425 | mov FOR_IDX, RB
4426 }
4427 | cmp RBd, TMPRd
4428 | mov FOR_EXT, RB
4429 if (op == BC_FORI) {
4430 | jge <7
4431 } else if (op == BC_JFORI) {
4432 | branchPC RD
4433 | movzx RDd, PC_RD
4434 | jge =>BC_JLOOP
4435 } else if (op == BC_IFORL) {
4436 | jl <7
4437 } else {
4438 | jge =>BC_JLOOP
4439 }
4440 | jmp <6
4441 |9: // Fallback to FP variant.
4442 if (!vk) {
4443 | jae ->vmeta_for
4444 }
4445 } else if (!vk) {
4446 | checknumtp FOR_IDX, ->vmeta_for
4447 }
4448 if (!vk) {
4449 | checknumtp FOR_STOP, ->vmeta_for
4450 } else {
4451#ifdef LUA_USE_ASSERT
4452 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4453 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4454#endif
4455 }
4456 | mov RB, FOR_STEP
4457 if (!vk) {
4458 | checknum RB, ->vmeta_for
4459 }
4460 | movsd xmm0, qword FOR_IDX
4461 | movsd xmm1, qword FOR_STOP
4462 if (vk) {
4463 | addsd xmm0, qword FOR_STEP
4464 | movsd qword FOR_IDX, xmm0
4465 | test RB, RB; js >3
4466 } else {
4467 | jl >3
4468 }
4469 | ucomisd xmm1, xmm0
4470 |1:
4471 | movsd qword FOR_EXT, xmm0
4472 if (op == BC_FORI) {
4473 |.if DUALNUM
4474 | jnb <7
4475 |.else
4476 | jnb >2
4477 | branchPC RD
4478 |.endif
4479 } else if (op == BC_JFORI) {
4480 | branchPC RD
4481 | movzx RDd, PC_RD
4482 | jnb =>BC_JLOOP
4483 } else if (op == BC_IFORL) {
4484 |.if DUALNUM
4485 | jb <7
4486 |.else
4487 | jb >2
4488 | branchPC RD
4489 |.endif
4490 } else {
4491 | jnb =>BC_JLOOP
4492 }
4493 |.if DUALNUM
4494 | jmp <6
4495 |.else
4496 |2:
4497 | ins_next
4498 |.endif
4499 |
4500 |3: // Invert comparison if step is negative.
4501 | ucomisd xmm0, xmm1
4502 | jmp <1
4503 break;
4504
4505 case BC_ITERL:
4506 |.if JIT
4507 | hotloop RBd
4508 |.endif
4509 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4510 break;
4511
4512 case BC_JITERL:
4513#if !LJ_HASJIT
4514 break;
4515#endif
4516 case BC_IITERL:
4517 | ins_AJ // RA = base, RD = target
4518 | lea RA, [BASE+RA*8]
4519 | mov RB, [RA]
4520 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4521 if (op == BC_JITERL) {
4522 | mov [RA-8], RB
4523 | jmp =>BC_JLOOP
4524 } else {
4525 | branchPC RD // Otherwise save control var + branch.
4526 | mov [RA-8], RB
4527 }
4528 |1:
4529 | ins_next
4530 break;
4531
4532 case BC_LOOP:
4533 | ins_A // RA = base, RD = target (loop extent)
4534 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4535 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4536 |.if JIT
4537 | hotloop RBd
4538 |.endif
4539 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4540 break;
4541
4542 case BC_ILOOP:
4543 | ins_A // RA = base, RD = target (loop extent)
4544 | ins_next
4545 break;
4546
4547 case BC_JLOOP:
4548 |.if JIT
4549 | ins_AD // RA = base (ignored), RD = traceno
4550 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4551 | mov TRACE:RD, [RA+RD*8]
4552 | mov RD, TRACE:RD->mcode
4553 | mov L:RB, SAVE_L
4554 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4555 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4556 | // Save additional callee-save registers only used in compiled code.
4557 |.if X64WIN
4558 | mov CSAVE_4, r12
4559 | mov CSAVE_3, r13
4560 | mov CSAVE_2, r14
4561 | mov CSAVE_1, r15
4562 | mov RA, rsp
4563 | sub rsp, 10*16+4*8
4564 | movdqa [RA-1*16], xmm6
4565 | movdqa [RA-2*16], xmm7
4566 | movdqa [RA-3*16], xmm8
4567 | movdqa [RA-4*16], xmm9
4568 | movdqa [RA-5*16], xmm10
4569 | movdqa [RA-6*16], xmm11
4570 | movdqa [RA-7*16], xmm12
4571 | movdqa [RA-8*16], xmm13
4572 | movdqa [RA-9*16], xmm14
4573 | movdqa [RA-10*16], xmm15
4574 |.else
4575 | sub rsp, 16
4576 | mov [rsp+16], r12
4577 | mov [rsp+8], r13
4578 |.endif
4579 | jmp RD
4580 |.endif
4581 break;
4582
4583 case BC_JMP:
4584 | ins_AJ // RA = unused, RD = target
4585 | branchPC RD
4586 | ins_next
4587 break;
4588
4589 /* -- Function headers -------------------------------------------------- */
4590
4591 /*
4592 ** Reminder: A function may be called with func/args above L->maxstack,
4593 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4594 ** too. This means all FUNC* ops (including fast functions) must check
4595 ** for stack overflow _before_ adding more slots!
4596 */
4597
4598 case BC_FUNCF:
4599 |.if JIT
4600 | hotcall RBd
4601 |.endif
4602 case BC_FUNCV: /* NYI: compiled vararg functions. */
4603 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4604 break;
4605
4606 case BC_JFUNCF:
4607#if !LJ_HASJIT
4608 break;
4609#endif
4610 case BC_IFUNCF:
4611 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4612 | mov KBASE, [PC-4+PC2PROTO(k)]
4613 | mov L:RB, SAVE_L
4614 | lea RA, [BASE+RA*8] // Top of frame.
4615 | cmp RA, L:RB->maxstack
4616 | ja ->vm_growstack_f
4617 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4618 | cmp NARGS:RDd, RAd // Check for missing parameters.
4619 | jbe >3
4620 |2:
4621 if (op == BC_JFUNCF) {
4622 | movzx RDd, PC_RD
4623 | jmp =>BC_JLOOP
4624 } else {
4625 | ins_next
4626 }
4627 |
4628 |3: // Clear missing parameters.
4629 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4630 | add NARGS:RDd, 1
4631 | cmp NARGS:RDd, RAd
4632 | jbe <3
4633 | jmp <2
4634 break;
4635
4636 case BC_JFUNCV:
4637#if !LJ_HASJIT
4638 break;
4639#endif
4640 | int3 // NYI: compiled vararg functions
4641 break; /* NYI: compiled vararg functions. */
4642
4643 case BC_IFUNCV:
4644 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4645 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4646 | lea RD, [BASE+NARGS:RD*8+8]
4647 | mov LFUNC:KBASE, [BASE-16]
4648 | mov [RD-8], RB // Store delta + FRAME_VARG.
4649 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4650 | mov L:RB, SAVE_L
4651 | lea RA, [RD+RA*8]
4652 | cmp RA, L:RB->maxstack
4653 | ja ->vm_growstack_v // Need to grow stack.
4654 | mov RA, BASE
4655 | mov BASE, RD
4656 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4657 | test RBd, RBd
4658 | jz >2
4659 | add RA, 8
4660 |1: // Copy fixarg slots up to new frame.
4661 | add RA, 8
4662 | cmp RA, BASE
4663 | jnb >3 // Less args than parameters?
4664 | mov KBASE, [RA-16]
4665 | mov [RD], KBASE
4666 | add RD, 8
4667 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4668 | sub RBd, 1
4669 | jnz <1
4670 |2:
4671 if (op == BC_JFUNCV) {
4672 | movzx RDd, PC_RD
4673 | jmp =>BC_JLOOP
4674 } else {
4675 | mov KBASE, [PC-4+PC2PROTO(k)]
4676 | ins_next
4677 }
4678 |
4679 |3: // Clear missing parameters.
4680 | mov aword [RD], LJ_TNIL
4681 | add RD, 8
4682 | sub RBd, 1
4683 | jnz <3
4684 | jmp <2
4685 break;
4686
4687 case BC_FUNCC:
4688 case BC_FUNCCW:
4689 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4690 | mov CFUNC:RB, [BASE-16]
4691 | cleartp CFUNC:RB
4692 | mov KBASE, CFUNC:RB->f
4693 | mov L:RB, SAVE_L
4694 | lea RD, [BASE+NARGS:RD*8-8]
4695 | mov L:RB->base, BASE
4696 | lea RA, [RD+8*LUA_MINSTACK]
4697 | cmp RA, L:RB->maxstack
4698 | mov L:RB->top, RD
4699 if (op == BC_FUNCC) {
4700 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4701 } else {
4702 | mov CARG2, KBASE
4703 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4704 }
4705 | ja ->vm_growstack_c // Need to grow stack.
4706 | set_vmstate C
4707 if (op == BC_FUNCC) {
4708 | call KBASE // (lua_State *L)
4709 } else {
4710 | // (lua_State *L, lua_CFunction f)
4711 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4712 }
4713 | // nresults returned in eax (RD).
4714 | mov BASE, L:RB->base
4715 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4716 | set_vmstate INTERP
4717 | lea RA, [BASE+RD*8]
4718 | neg RA
4719 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4720 | mov PC, [BASE-8] // Fetch PC of caller.
4721 | jmp ->vm_returnc
4722 break;
4723
4724 /* ---------------------------------------------------------------------- */
4725
4726 default:
4727 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4728 exit(2);
4729 break;
4730 }
4731}
4732
4733static int build_backend(BuildCtx *ctx)
4734{
4735 int op;
4736 dasm_growpc(Dst, BC__MAX);
4737 build_subroutines(ctx);
4738 |.code_op
4739 for (op = 0; op < BC__MAX; op++)
4740 build_ins(ctx, (BCOp)op, op);
4741 return BC__MAX;
4742}
4743
4744/* Emit pseudo frame-info for all assembler functions. */
4745static void emit_asm_debug(BuildCtx *ctx)
4746{
4747 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4748 switch (ctx->mode) {
4749 case BUILD_elfasm:
4750 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4751 fprintf(ctx->fp,
4752 ".Lframe0:\n"
4753 "\t.long .LECIE0-.LSCIE0\n"
4754 ".LSCIE0:\n"
4755 "\t.long 0xffffffff\n"
4756 "\t.byte 0x1\n"
4757 "\t.string \"\"\n"
4758 "\t.uleb128 0x1\n"
4759 "\t.sleb128 -8\n"
4760 "\t.byte 0x10\n"
4761 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4762 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4763 "\t.align 8\n"
4764 ".LECIE0:\n\n");
4765 fprintf(ctx->fp,
4766 ".LSFDE0:\n"
4767 "\t.long .LEFDE0-.LASFDE0\n"
4768 ".LASFDE0:\n"
4769 "\t.long .Lframe0\n"
4770 "\t.quad .Lbegin\n"
4771 "\t.quad %d\n"
4772 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4773 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4774 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4775 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4776 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4777#if LJ_NO_UNWIND
4778 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4779 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4780#endif
4781 "\t.align 8\n"
4782 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4783#if LJ_HASFFI
4784 fprintf(ctx->fp,
4785 ".LSFDE1:\n"
4786 "\t.long .LEFDE1-.LASFDE1\n"
4787 ".LASFDE1:\n"
4788 "\t.long .Lframe0\n"
4789 "\t.quad lj_vm_ffi_call\n"
4790 "\t.quad %d\n"
4791 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4792 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4793 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4794 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4795 "\t.align 8\n"
4796 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4797#endif
4798#if !LJ_NO_UNWIND
4799#if LJ_TARGET_SOLARIS
4800 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4801#else
4802 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4803#endif
4804 fprintf(ctx->fp,
4805 ".Lframe1:\n"
4806 "\t.long .LECIE1-.LSCIE1\n"
4807 ".LSCIE1:\n"
4808 "\t.long 0\n"
4809 "\t.byte 0x1\n"
4810 "\t.string \"zPR\"\n"
4811 "\t.uleb128 0x1\n"
4812 "\t.sleb128 -8\n"
4813 "\t.byte 0x10\n"
4814 "\t.uleb128 6\n" /* augmentation length */
4815 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4816 "\t.long lj_err_unwind_dwarf-.\n"
4817 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4818 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4819 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4820 "\t.align 8\n"
4821 ".LECIE1:\n\n");
4822 fprintf(ctx->fp,
4823 ".LSFDE2:\n"
4824 "\t.long .LEFDE2-.LASFDE2\n"
4825 ".LASFDE2:\n"
4826 "\t.long .LASFDE2-.Lframe1\n"
4827 "\t.long .Lbegin-.\n"
4828 "\t.long %d\n"
4829 "\t.uleb128 0\n" /* augmentation length */
4830 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4831 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4832 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4833 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4834 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4835 "\t.align 8\n"
4836 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4837#if LJ_HASFFI
4838 fprintf(ctx->fp,
4839 ".Lframe2:\n"
4840 "\t.long .LECIE2-.LSCIE2\n"
4841 ".LSCIE2:\n"
4842 "\t.long 0\n"
4843 "\t.byte 0x1\n"
4844 "\t.string \"zR\"\n"
4845 "\t.uleb128 0x1\n"
4846 "\t.sleb128 -8\n"
4847 "\t.byte 0x10\n"
4848 "\t.uleb128 1\n" /* augmentation length */
4849 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4850 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4851 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4852 "\t.align 8\n"
4853 ".LECIE2:\n\n");
4854 fprintf(ctx->fp,
4855 ".LSFDE3:\n"
4856 "\t.long .LEFDE3-.LASFDE3\n"
4857 ".LASFDE3:\n"
4858 "\t.long .LASFDE3-.Lframe2\n"
4859 "\t.long lj_vm_ffi_call-.\n"
4860 "\t.long %d\n"
4861 "\t.uleb128 0\n" /* augmentation length */
4862 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4863 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4864 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4865 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4866 "\t.align 8\n"
4867 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4868#endif
4869#endif
4870 break;
4871#if !LJ_NO_UNWIND
4872 /* Mental note: never let Apple design an assembler.
4873 ** Or a linker. Or a plastic case. But I digress.
4874 */
4875 case BUILD_machasm: {
4876#if LJ_HASFFI
4877 int fcsize = 0;
4878#endif
4879 int i;
4880 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4881 fprintf(ctx->fp,
4882 "EH_frame1:\n"
4883 "\t.set L$set$x,LECIEX-LSCIEX\n"
4884 "\t.long L$set$x\n"
4885 "LSCIEX:\n"
4886 "\t.long 0\n"
4887 "\t.byte 0x1\n"
4888 "\t.ascii \"zPR\\0\"\n"
4889 "\t.byte 0x1\n"
4890 "\t.byte 128-8\n"
4891 "\t.byte 0x10\n"
4892 "\t.byte 6\n" /* augmentation length */
4893 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4894 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4895 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4896 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4897 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4898 "\t.align 3\n"
4899 "LECIEX:\n\n");
4900 for (i = 0; i < ctx->nsym; i++) {
4901 const char *name = ctx->sym[i].name;
4902 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4903 if (size == 0) continue;
4904#if LJ_HASFFI
4905 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4906#endif
4907 fprintf(ctx->fp,
4908 "%s.eh:\n"
4909 "LSFDE%d:\n"
4910 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4911 "\t.long L$set$%d\n"
4912 "LASFDE%d:\n"
4913 "\t.long LASFDE%d-EH_frame1\n"
4914 "\t.long %s-.\n"
4915 "\t.long %d\n"
4916 "\t.byte 0\n" /* augmentation length */
4917 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4918 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4919 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4920 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4921 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4922 "\t.align 3\n"
4923 "LEFDE%d:\n\n",
4924 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4925 }
4926#if LJ_HASFFI
4927 if (fcsize) {
4928 fprintf(ctx->fp,
4929 "EH_frame2:\n"
4930 "\t.set L$set$y,LECIEY-LSCIEY\n"
4931 "\t.long L$set$y\n"
4932 "LSCIEY:\n"
4933 "\t.long 0\n"
4934 "\t.byte 0x1\n"
4935 "\t.ascii \"zR\\0\"\n"
4936 "\t.byte 0x1\n"
4937 "\t.byte 128-8\n"
4938 "\t.byte 0x10\n"
4939 "\t.byte 1\n" /* augmentation length */
4940 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4941 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4942 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4943 "\t.align 3\n"
4944 "LECIEY:\n\n");
4945 fprintf(ctx->fp,
4946 "_lj_vm_ffi_call.eh:\n"
4947 "LSFDEY:\n"
4948 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4949 "\t.long L$set$yy\n"
4950 "LASFDEY:\n"
4951 "\t.long LASFDEY-EH_frame2\n"
4952 "\t.long _lj_vm_ffi_call-.\n"
4953 "\t.long %d\n"
4954 "\t.byte 0\n" /* augmentation length */
4955 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4956 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4957 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4958 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4959 "\t.align 3\n"
4960 "LEFDEY:\n\n", fcsize);
4961 }
4962#endif
4963 fprintf(ctx->fp, ".subsections_via_symbols\n");
4964 }
4965 break;
4966#endif
4967 default: /* Difficult for other modes. */
4968 break;
4969 }
4970}
4971